4.2 Functions for reward matrix and querying initating mutations
Function to create reward matrix
<-function(Known_mat,weights){
create_reward_matrix
<- 2
num_type <- nrow(Known_mat);
num_mutations <- Known_mat$Genes
mutant_names <- ncol(Known_mat)
num_clones <- num_type^num_mutations
num_states
<- unlist(apply(as.matrix(Known_mat),1,function(x){list(0:max(unique(as.numeric(x[-1])))) }),recursive = FALSE)
possible_mut_list
<-data.frame(expand.grid(possible_mut_list))
states<-data.frame(expand.grid(apply(states[,1:num_mutations],1,function(x){paste(x,collapse="_",sep="_")}),
state_interactionsapply(states[,1:num_mutations],1,function(x){paste(x,collapse="_",sep="_")})))
$possible<-ifelse(apply(state_interactions,1,function(x){
state_interactions<-as.numeric(do.call(cbind,strsplit(as.character(x[1]),split="_")))
A<-as.numeric(do.call(cbind,strsplit(as.character(x[2]),split="_")))
Bsum(abs(A-B))<=1
0,NA)
}),
$action<-apply(state_interactions,1,function(x){
state_interactions<-as.numeric(do.call(cbind,strsplit(as.character(x[1]),split="_")))
A<-as.numeric(do.call(cbind,strsplit(as.character(x[2]),split="_")))
Bif(!is.na(x["possible"])){
if(sum(abs(B-A))==0){
return("stay")
else{
} return(mutant_names[which((B-A)==1)])
}
}
})
<-setNames(state_interactions%>%filter(action%in%c(mutant_names,"stay")),
datc("State","NextState","Reward","Action"))[,c(1,4,2,3)]
$Reward <- as.numeric(apply(dat,1,function(x){
datifelse(x$NextState%in%names(weights),weights[x$NextState],x$Reward)
}))$Reward <- as.numeric(apply(dat,1,function(x){
datifelse(x$Action%in%"stay",0,x$Reward)
}))$State <- as.character(dat$State)
dat$NextState <- as.character(dat$NextState)
dat$Action <- as.character(dat$Action)
dat
<- list(alpha = 0.8, gamma = 0.9)
control <- ReinforcementLearning(data = dat, s = "State", a = "Action", r = "Reward", s_new = "NextState", iter = 1,control=control)
model <- model$Q
xrownames(x) <- substring(rownames(x),1)
<- setNames(melt(x),c("State","Action","Q"))
Q_mat <-inner_join(dat,Q_mat,by=c("State","Action"))
set$Valid <- TRUE
setreturn(set)
}
Function for retraining with reinforcement learning
<-function(Known_mat,weights){
create_reward_matrix_retrain
<- 2
num_type <- nrow(Known_mat);
num_mutations <- Known_mat$Genes
mutant_names <- ncol(Known_mat)
num_clones <- num_type^num_mutations
num_states
<- unlist(apply(as.matrix(Known_mat),1,function(x){list(0:max(unique(as.numeric(x[-1])))) }),recursive = FALSE)
possible_mut_list
<-data.frame(expand.grid(possible_mut_list))
states<-data.frame(expand.grid(apply(states[,1:num_mutations],1,function(x){paste(x,collapse="_",sep="_")}),
state_interactionsapply(states[,1:num_mutations],1,function(x){paste(x,collapse="_",sep="_")})))
$possible<-ifelse(apply(state_interactions,1,function(x){
state_interactions<-as.numeric(do.call(cbind,strsplit(as.character(x[1]),split="_")))
A<-as.numeric(do.call(cbind,strsplit(as.character(x[2]),split="_")))
Bsum(abs(A-B))<=1
0,NA)
}),
$action<-apply(state_interactions,1,function(x){
state_interactions<-as.numeric(do.call(cbind,strsplit(as.character(x[1]),split="_")))
A<-as.numeric(do.call(cbind,strsplit(as.character(x[2]),split="_")))
Bif(!is.na(x["possible"])){
if(sum(abs(B-A))==0){
return("stay")
else{
} return(mutant_names[which((B-A)==1)])
}
}
})
<-setNames(state_interactions%>%filter(action%in%c(mutant_names,"stay")),
datc("State","NextState","Reward","Action"))[,c(1,4,2,3)]
$Reward <- as.numeric(apply(dat,1,function(x){
datifelse(x$NextState%in%names(weights),weights[x$NextState],x$Reward)
}))$Reward <- as.numeric(apply(dat,1,function(x){
datifelse(x$Action%in%"stay",0,x$Reward)
}))$State <- as.character(dat$State)
dat$NextState <- as.character(dat$NextState)
dat$Action <- as.character(dat$Action)
dat
<- list(alpha = 0.8, gamma = 0.9)
control <- ReinforcementLearning(data = dat, s = "State", a = "Action", r = "Reward", s_new = "NextState", iter = 1,control=control)
model1 <- ReinforcementLearning(data = dat, s = "State", a = "Action", r = "Reward", s_new = "NextState", iter = 1000,control=list(alpha = 0.8, gamma = 0.9,epsilon=0.4),model=model1)
model
<- model$Q
xrownames(x) <- substring(rownames(x),1)
<- setNames(melt(x),c("State","Action","Q"))
Q_mat <-inner_join(dat,Q_mat,by=c("State","Action"))
set$Valid <- TRUE
setreturn(set)
}
Query initiating mutations function
<-function(graph_results_test){
query_initiating_mutations<- graph_results[[sample]]
graph_results_test<-paste(rep(0,length(strsplit(graph_results_test$State[1],split="_")[[1]])),sep="_",collapse="_")
start_index<-graph_results_test%>%filter(State==start_index&Action!="stay")%>%pull(Action)
possible_starting_actions<-list()
final_results<-0
initating_action_countfor(initating_action in possible_starting_actions){
# print(initating_action)
<- graph_results_test
set <-initating_action_count+1
initating_action_count<- list()
storage_results<-0
branches<- set%>%filter(State==start_index&Action==initating_action)%>%pull(NextState)
state_to_kill <- sum(set%>%filter(State==state_to_kill)%>%pull(Valid))
start_killed while(start_killed>0){
#print(branches)
# print(start_killed)
<- branches +1
branches <-0
number_of_mutations<- list()
state_log<-list()
optimal_reward<-list()
action_log<- start_index
current_state<-TRUE
indicator<-0
nextStatewhile(current_state!=nextState) {
# print(number_of_mutations)
<- number_of_mutations+1
number_of_mutations if(number_of_mutations==1){
<- start_index
state_log[[number_of_mutations]]
}<- state_log[[number_of_mutations]]
current_state <- FALSE
nextState_indicator
while(nextState_indicator==FALSE){
if(number_of_mutations==1){
<- set%>%
max_potential_action_indexfilter(State==current_state&Action==initating_action)
else {
} <- set%>%
max_potential_action_index filter(State==current_state&Valid==TRUE)%>%
filter(Q==max(Q))%>%slice_sample(n=1)
}if(nrow(max_potential_action_index)==0){
break
}<- max_potential_action_index%>%pull(NextState)
max_potential_action <- any(set%>%filter(State==max_potential_action&Action!="stay")%>%pull(Valid))
next_valid_action if(next_valid_action==TRUE){
<-max_potential_action
nextState <- max_potential_action_index%>%pull(Action)
current_action ==TRUE
nextState_indicatorbreak
else{
} $State%in%max_potential_action_index["State"]&
set[set$Action%in%max_potential_action_index["Action"],"Valid"] <- FALSE
set
}
}if(nrow(set%>%filter(State==current_state&Action==current_action))==0){
<-NA
optimal_reward[[number_of_mutations]] else {
} <- set%>%
optimal_reward[[number_of_mutations]] filter(State==current_state&Action==current_action)%>%
pull(Reward)
}+1]]<- nextState
state_log[[number_of_mutations<- current_action
action_log[[number_of_mutations]] if(current_action==nextState){
==FALSE
indicator+1]]<-NULL
state_log[[number_of_mutationsbreak
}
}+1]] <- NA
optimal_reward[[number_of_mutations+1]] <- NA
action_log[[number_of_mutations<-data.frame("states"=do.call(rbind,state_log),#[1:(length(state_log)-1)]),
storage_results[[branches]] "actions"=do.call(rbind,action_log),
"reward"=do.call(rbind,optimal_reward),
"nextState"=do.call(rbind,c(state_log[2:length(state_log)],NA)) )
<- storage_results[[branches]]%>%
storage_results[[branches]] filter(states!=nextState)
$cumulative_reward <- cumsum(storage_results[[branches]]$reward)
storage_results[[branches]]
#storage_results[[branches]] <-storage_results[[branches]][1:which.max(storage_results[[branches]]$cumulative_reward), ]
$State%in%current_state&set$Action%in%current_action,"Valid"] <- FALSE
set[set<- sum(set%>%filter(State==state_to_kill)%>%pull(Valid))
start_killed
}<-storage_results[!duplicated(storage_results)]
final_results[[initating_action_count]]
}names(final_results)<-possible_starting_actions
return(final_results)
}
Trajectory summarization
<- function(sample,optimal_mutants_only=FALSE){
trajectory_summariztion #Extract out sample of interest
<-final_results[[sample]]
all_results
#apply over each potential initating mutation and identify the stepwise trajectory that accumulates the most reward
<-setNames(lapply(names(all_results),function(initiating_mutation){
all_results_filtered#print(initiating_mutation)
<-all_results[[initiating_mutation]]
storage_resultslapply(storage_results,function(x){sum(x$reward,na.rm = TRUE)})==0]<-NULL
storage_results[if(length(storage_results)==0){
return(NULL)
}<-lapply(storage_results,function(x){x[1:which.max(x$cumulative_reward),]})
storage_results
if(length(storage_results)==0){
print("error")
return(NULL)
break
else {
}
# Extract columnss of interest
<-do.call(rbind,storage_results)[,c("states","nextState","reward","actions")]
final
# Remove decisions that do not result in a state change, or terminal nodes that do not exist
<- setdiff(
nodes_to_remove setdiff(final$nextState,
$states),
final$Clones$Clone)
final_sample_summary[[sample]]<- final%>%
final filter(!nextState%in%nodes_to_remove)%>%
distinct()%>%
mutate("initiating_mutation"=initiating_mutation)%>%
unite(col="edge",states,nextState,sep="->",remove=FALSE)%>%
relocate(edge,.after = last_col())
return(final)
}names(all_results))
}),
if(length(all_results_filtered)==0){
return(NULL)
else if(all(lapply(all_results_filtered,is.null))){
} return(NULL)
else {
}
<-names(which.max(do.call(c,lapply(all_results_filtered,function(x){
optimalsum(x$reward)
}))))<-unique(names(all_results_filtered))
all_mutants
<-if(optimal_mutants_only){
mutation_output
(optimal)else{
}
(all_mutants)
}
<-do.call(rbind,all_results_filtered)%>%
finalfilter(initiating_mutation%in%mutation_output)%>%
mutate(observed=ifelse(states%in%final_sample_summary[[sample]]$Clones$Clone&
%in%final_sample_summary[[sample]]$Clones$Clone,
nextState"Yes","No"))
return(final)
}
}
<-function(sample,optimal_mutants_only){
plot_optimal_graph_for_trajectory_new
<- trajectory_summariztion(sample,optimal_mutants_only=TRUE)
final
if(final=="error"|is.null(final)){
return("error")
else{
}
<-graph_from_data_frame(final,directed=T)
graph<-final_sample_summary[[sample]]$Clones$Count/sum(final_sample_summary[[sample]]$Clones$Count)
weightsnames(weights) <-final_sample_summary[[sample]]$Clones$Clone
<-weights[names(weights)%in%names(V(graph))]
weight_subset<-setdiff(names(V(graph)),names(weights))
nodes_to_add_names<- rep(0.1,length(nodes_to_add_names))
nodes_to_add names(nodes_to_add)<-nodes_to_add_names
<- c(weight_subset,nodes_to_add)[names(V(graph))]
weight_final
<-ifelse(names(V(graph))%in%final_sample_summary[[sample]]$Clones$Clone,brewer.pal(5,"Reds")[5],"grey80")
clone_colors <- final%>%filter(observed=="Yes")%>%pull(edge)
observe_edges plot(graph,layout=layout_as_tree,
vertex.color=ifelse(names(V(graph))%in%final_sample_summary[[sample]]$Clones$Clone,brewer.pal(5,"Reds")[5],"grey80"),
vertex.frame.color=ifelse(names(V(graph))%in%final_sample_summary[[sample]]$Clones$Clone ,brewer.pal(5,"Reds")[5],"grey80"),
vertex.size=log2(1+weight_final*500),
vertex.label=NA,
edge.color=ifelse(edge_attr(graph)$edge %in%observe_edges,brewer.pal(5,"Blues")[5],"grey80"))#,
} }