6.2 Functions
I’m likely going to wrap all of this into a package soon, but here we will start with a set of packages and functions used throughout the study. I need to go through and annotate with package was used with with function, and that will take a little time. So for the sake of making the code available as soon as possible, I’ll present the intermediate product here.
Packages The following are a list of packages that are used during the study. I will update this list with the minial set and begin to put together a package soon.
library(ape)
library(circlize)
library(cooccur)
library(corrplot)
library(cowplot)
library(data.table)
library(dplyr)
library(forcats)
library(gdata)
library(ggbeeswarm)
library(ggcorrplot)
library(ggnet)
library(ggplotify)
library(ggplot2)
library(ggrepel)
library(ggridges)
library(grid)
library(gridExtra)
library(ggthemes)
library(ggtree)
library(igraph)
library(knitr)
library(miscTools)
library(network)
library(pals)
library(parallel)
library(pheatmap)
library(phylobase)
library(purrr)
library(qgraph)
library(RColorBrewer)
library(rcompanion)
library(ReinforcementLearning)
library(reshape2)
library(sna)
library(tidygraph)
library(tidyr)
library(tidyverse)
library(TRONCO)
library(vegan)
library(widyr)
Custom functions
options(stringsAsFactors=FALSE)
<- function(NGT_to_clone_list,clonal_abundance,sample_to_test,replicates,clone_cutoff){
random_distribution_NGT_CI <- NGT_to_clone[[sample_to_test]][,1:(ncol(NGT_to_clone[[sample_to_test]])-1)]
mat_of_interest <-names(sort(colSums(mat_of_interest),decreasing=TRUE))
bulk_VAF_order <-replicate(n=replicates,apply(mat_of_interest,MARGIN=c(2),function(x){
test<-sample(x,size=length(x),replace=FALSE)
zreturn(as.numeric(z))}),simplify = "array")
<-apply(test,3,function(q){
test2<-data.frame(q[,bulk_VAF_order],
x"Clone"=apply(q[,bulk_VAF_order],1, function(p){paste(p,sep="_",collapse="_")}))
<-data.table(data.frame("Count"=as.matrix(table(x[,"Clone"])),
y"Clone"=names(table(x[,"Clone"]))),
key="Count")
$Clone<-factor(y$Clone,levels=rev(as.character(y$Clone)))
yreturn(data.frame(y))})
if(class(test2)=="list"){y <- lapply(test2,data.frame) %>% reduce(full_join, by = "Clone")}
if(class(test2)=="array"){y <- apply(test2,3,data.frame) %>% reduce(full_join, by = "Clone")}
is.na(y)]<-0
y[rownames(y)<-y[,"Clone"]
<-as.matrix(y[,-2])
ymode(y)<-'numeric'
<-t(apply(y,1,function(p){quantile(p,probs=c(0.025,0.975))}))
z<-data.frame(z,"Clone"=rownames(z))
z_mat<-setNames(data.frame(full_join(data.frame(clonal_abundance[[sample_to_test]]),z_mat,by="Clone")),c("Count","Clone","LCI","UCI"))%>%filter(LCI>=clone_cutoff&!is.na(Count))
setreturn(set)
}
<- function(NGT_to_clone_list,clonal_abundance,sample_to_test,replicates,clone_cutoff){
bootstrap_allele_dropout_NGT_mat <- NGT_to_clone[[sample_to_test]][,1:(ncol(NGT_to_clone[[sample_to_test]])-1)]
mat_of_interest <-names(sort(colSums(mat_of_interest),decreasing=TRUE))
bulk_VAF_order <-replicate(n=replicates,apply(mat_of_interest,MARGIN=c(1,2),function(x){
testif(x==1){z<-x}
if(x==0){z<-sample(x=c(0,1),size=1,prob=c(0.9,0.1))}
if(x==2){z<-sample(x=c(2,1),size=1,prob=c(0.9,0.1))}
return(as.numeric(z))}),simplify = "array")
<-apply(test,3,function(q){
test2<-data.frame(q[,bulk_VAF_order],
x"Clone"=apply(q[,bulk_VAF_order],1, function(p){paste(p,sep="_",collapse="_")}))
<-data.table(data.frame("Count"=as.matrix(table(x[,"Clone"])),
y"Clone"=names(table(x[,"Clone"]))),
key="Count")
$Clone<-factor(y$Clone,levels=rev(as.character(y$Clone)))
yreturn(data.frame(y))})
if(class(test2)=="list"){y <- lapply(test2,data.frame) %>% reduce(full_join, by = "Clone")}
if(class(test2)=="array"){y <- apply(test2,3,data.frame) %>% reduce(full_join, by = "Clone")}
is.na(y)]<-0
y[rownames(y)<-y[,"Clone"]
<-as.matrix(y[,-2])
ymode(y)<-'numeric'
<-t(apply(y,1,function(p){quantile(p,probs=c(0.025,0.975))}))
z<-data.frame(z,"Clone"=rownames(z))
z_mat<-setNames(data.frame(full_join(data.frame(clonal_abundance[[sample_to_test]]),z_mat,by="Clone")),c("Count","Clone","LCI","UCI"))%>%filter(LCI>=clone_cutoff&!is.na(Count))
setreturn(set)
}
<- function(NGT_to_clone_list,clonal_abundance,sample_to_test,replicates,clone_cutoff){
bootstrap_NGT_mat <-replicate(n=replicates,resample_fun(NGT_to_clone[[sample_to_test]]),simplify = "array")
testif(class(test)=="list"){
<- lapply(test,data.frame) %>% reduce(full_join, by = "Clone")
y
}if(class(test)=="array"){
<- apply(test,3,data.frame) %>% reduce(full_join, by = "Clone")
y
}is.na(y)]<-0
y[rownames(y)<-y[,"Clone"]
<-as.matrix(y[,-2])
ymode(y)<-'numeric'
<-t(apply(y,1,function(p){
zquantile(p,probs=c(0.025,0.975))
}))<-data.frame(z,"Clone"=rownames(z))
z_mat<-setNames(data.frame(inner_join(data.frame(clonal_abundance[[sample_to_test]]),z_mat,by="Clone")),c("Count","Clone","LCI","UCI"))%>%filter(LCI>=clone_cutoff)
setreturn(set)
}
<-function(data){
resample_fun<- data[sample(x=1:nrow(data),replace=TRUE),]
x return(as.matrix(data.table(data.frame("Count"= as.matrix(table(x[,"Clone"])),
"Clone"=names(table(x[,"Clone"]))),
key="Count")))
}
<- function(sample,variants){
extract_NGT_files <- read.csv(paste("./",sample,"/","NGT.csv",sep=""))
NGT return(NGT)
}
<- function(sample,variants){
count_unknown_cells<- read.csv(paste("./",sample,"/","NGT.csv",sep=""))
NGT <-NGT[apply(NGT[,variants],MARGIN=1,FUN=function(x){sum(x==3)>0}),"Cell"]
cells_with_unknownreturn(c(length(cells_with_unknown)/nrow(NGT)))
}
<- function(sample,variants){
extract_DP_files <- read.csv(paste("./",sample,"/","DP.csv",sep=""))
DP <-DP[apply(DP[,variants],MARGIN=1,FUN=function(x){sum(x==3)>0}),"Cell"]
cells_with_unknown<-DP[!DP$Cell%in%cells_with_unknown,variants]
matrix_of_interestreturn(matrix_of_interest)
}
<- function(sample){
extract_SNP_info <- read.csv(paste("./",sample,"/","AF.csv",sep=""))
AF if(any(grepl("SNP_INFO.csv",list.files(paste("./",sample,"/",sep=""))))){
<- read.csv(paste("./",sample,"/","SNP_INFO.csv",sep=""))
SNP_INFO rownames(SNP_INFO) <-colnames(AF)[-c(1:2)] # - renames SNP info to have the variants as rownames
c("protein")] <- apply(SNP_INFO,1,function(x){
SNP_INFO[,ifelse(x["protein"]!="",x["protein"],
ifelse(x["cdna"]!="",paste(x["gene"],x["cdna"],sep="_"),
ifelse(x["amplicon"]%in%c("MSK_RL_AMP54", "MSK_RL_AMP55", "MSK_RL_AMP56","MSK_RL_AMP57","MSK_RL_AMP58"),paste("FLT3_INS",x["POS"],x["ALT"],sep="_"),"")))
})else {
} <- read.csv(paste("./",sample,"/","Variants.csv",sep=""))
SNP_INFO rownames(SNP_INFO) <-colnames(AF)[-c(1:2)] # - renames SNP info to have the variants as rownames
colnames(SNP_INFO)[3] <- "protein"
$cDNA <- as.character(SNP_INFO$cDNA)
SNP_INFO$protein <- as.character(SNP_INFO$protein)
SNP_INFO$Variant <- as.character(SNP_INFO$Variant)
SNP_INFO$Gene <- as.character(SNP_INFO$Gene)
SNP_INFOc("protein")] <- apply(SNP_INFO,1,function(x){
SNP_INFO[,ifelse(x["protein"]!="",x["protein"],
ifelse(x["cDNA"]!="",paste(x["Gene"],x["cDNA"],sep="_"),
ifelse(grepl("^chr13",x["Variant"]),paste("FLT3_INS",x["Variant"],sep="_"),"")))
})
}return(SNP_INFO)
}
# run the following just once to load the function
<- function(sample){
plot_variants_and_selection #Load in the data
<- read.csv(paste("./",sample,"/","AF.csv",sep="")) # - variant allele frequency
AF <- read.csv(paste("./",sample,"/","DP.csv",sep="")) # - read depth
DP <- read.csv(paste("./",sample,"/","GQ.csv",sep="")) # - quality scores
GQ <- read.csv(paste("./",sample,"/","NGT.csv",sep="")) # - genotype call converted to categorical (0-reference, 1-heterozygous mutation, 2-homozygous mutation, 3-unknown)
NGT
if(any(grepl("SNP_INFO.csv",list.files(paste("./",sample,"/",sep=""))))){
<- read.csv(paste("./",sample,"/","SNP_INFO.csv",sep=""))
SNP_INFO rownames(SNP_INFO) <-colnames(AF)[-c(1:2)] # - renames SNP info to have the variants as rownames
c("protein")] <- apply(SNP_INFO,1,function(x){
SNP_INFO[,ifelse(x["protein"]!="",x["protein"],
ifelse(x["cdna"]!="",paste(x["gene"],x["cdna"],sep="_"),
ifelse(x["amplicon"]%in%c("MSK_RL_AMP54", "MSK_RL_AMP55", "MSK_RL_AMP56","MSK_RL_AMP57","MSK_RL_AMP58"),paste("FLT3_INS",x["POS"],x["ALT"],sep="_"),"")))
# - variant metadata
})else {
} <- read.csv(paste("./",sample,"/","Variants.csv",sep=""))
SNP_INFO rownames(SNP_INFO) <-colnames(AF)[-c(1:2)] # - renames SNP info to have the variants as rownames
colnames(SNP_INFO)[3] <- "protein"
$cDNA <- as.character(SNP_INFO$cDNA)
SNP_INFO$protein <- as.character(SNP_INFO$protein)
SNP_INFO$Variant <- as.character(SNP_INFO$Variant)
SNP_INFO$Gene <- as.character(SNP_INFO$Gene)
SNP_INFOc("protein")] <- apply(SNP_INFO,1,function(x){
SNP_INFO[,ifelse(x["protein"]!="",x["protein"],
ifelse(x["cDNA"]!="",paste(x["Gene"],x["cDNA"],sep="_"),
ifelse(grepl("^chr13",x["Variant"]),paste("FLT3_INS",x["Variant"],sep="_"),"")))
# - variant metadata
})
}
<- (SNP_INFO %>% filter(protein!="")%>%select(protein))[,1] # select column of variants
protein_changes_of_interest <- rownames(SNP_INFO)[SNP_INFO[,"protein"]%in%protein_changes_of_interest]
SNP_changes_of_interest <- data.frame("Mutant"=SNP_changes_of_interest,"Protein"=protein_changes_of_interest)
SNP_protein_key
# Cell and clone selection and Average depth per cell per allele
<- apply(NGT[,SNP_changes_of_interest],MARGIN=2,table)
distribution_of_unknowns_by_variant <-NGT[,c("Sample","Cell",SNP_changes_of_interest)]
NGT_subset#NGT_interest$Clone <- apply(NGT_interest[,-c(1,2)],1,function(x){paste(x,sep="_",collapse="_")})
#clonal_abundance <- sort(table(NGT_interest$Clone))
<- DP[,c("Sample","Cell",SNP_changes_of_interest)]
DP_subset<-inner_join(melt(DP_subset[,-1],id.var="Cell"),melt(NGT_subset[,-1],id.var="Cell"),by=c("Cell","variable"))
DP_NGT_meltcolnames(DP_NGT_melt) <- c("Cell","Mutant","Depth","Genotype")
$Genotype <- ifelse(DP_NGT_melt$Genotype==3,"Unknown",
DP_NGT_meltifelse(DP_NGT_melt$Genotype==0,"WT",
ifelse(DP_NGT_melt$Genotype==1,"Heterozygous",
ifelse(DP_NGT_melt$Genotype==2,"Homozygous",DP_NGT_melt$Genotype))))
$Genotype <- factor(DP_NGT_melt$Genotype ,levels=c("WT","Heterozygous","Homozygous","Unknown"))
DP_NGT_melt
<-data.frame(DP_NGT_melt%>%group_by(Mutant,Genotype)%>%summarise("Depth"=mean(Depth)))
summarized<- summarized %>% filter(Genotype!="Unknown")
summarized_no_unknown <- group_by(summarized_no_unknown, Mutant)
grouped <- data.frame(summarise(grouped, mean=mean(Depth), sd=sd(Depth)))
output $Include <- rep("Include",length(rownames(output)))
output<- inner_join(SNP_protein_key,output)
output_final
<- ggplot(summarized_no_unknown,aes(x=Mutant,y=Depth,color=Genotype))+geom_point()+coord_flip()+
depth_plotscale_x_discrete(limits = rev(levels(summarized_no_unknown$Mutant)))+theme_bw()#+
write.csv(output_final,paste0("./",sample,"/",sample,"_VARIANT_SELECTION.csv"),row.names=FALSE)
ggsave(paste0("./",sample,"/",sample,"_VARIANT_SELECTION.pdf"),depth_plot,width=10,height=20)
}
<-function(sample,diversity){
run_mbio_analysis#Load in the data
if(any(grepl("VARIANT_SELECTION_LAM",list.files(paste("./",sample,"/",sep=""))))){
<- read.csv(paste("./",sample,"/",sample,"_VARIANT_SELECTION_LAM.csv",sep=""))
LAM_VARIANTS_MAT <- as.character((LAM_VARIANTS_MAT %>% filter(Include=="Include")%>%select(Mutant))[,1])
LAM_VARIANTS_SNP <- as.character((LAM_VARIANTS_MAT %>% filter(Include=="Include")%>%select(Protein))[,1])
LAM_VARIANTS_Protein else {
} print("No variant selection performed!")
return("next")
}<- read.csv(paste("./",sample,"/","AF.csv",sep="")) # - variant allele frequency
AF <- read.csv(paste("./",sample,"/","DP.csv",sep="")) # - read depth
DP <- read.csv(paste("./",sample,"/","GQ.csv",sep="")) # - quality scores
GQ <- read.csv(paste("./",sample,"/","NGT.csv",sep="")) # - genotype call converted to categorical (0-reference, 1-heterozygous mutation, 2-homozygous mutation, 3-unknown)
NGT
if(any(grepl("SNP_INFO.csv",list.files(paste("./",sample,"/",sep=""))))){
<- read.csv(paste("./",sample,"/","SNP_INFO.csv",sep=""))
SNP_INFO rownames(SNP_INFO) <-colnames(AF)[-c(1:2)] # - renames SNP info to have the variants as rownames
c("protein")] <- apply(SNP_INFO,1,function(x){
SNP_INFO[,ifelse(x["protein"]!="",x["protein"],
ifelse(x["cdna"]!="",paste(x["gene"],x["cdna"],sep="_"),
ifelse(x["amplicon"]%in%c("MSK_RL_AMP54", "MSK_RL_AMP55", "MSK_RL_AMP56","MSK_RL_AMP57","MSK_RL_AMP58"),paste("FLT3_INS",x["POS"],x["ALT"],sep="_"),"")))
# - variant metadata
})else {
} <- read.csv(paste("./",sample,"/","Variants.csv",sep=""))
SNP_INFO rownames(SNP_INFO) <-colnames(AF)[-c(1:2)] # - renames SNP info to have the variants as rownames
colnames(SNP_INFO)[3] <- "protein"
$cDNA <- as.character(SNP_INFO$cDNA)
SNP_INFO$protein <- as.character(SNP_INFO$protein)
SNP_INFO$Variant <- as.character(SNP_INFO$Variant)
SNP_INFO$Gene <- as.character(SNP_INFO$Gene)
SNP_INFOc("protein")] <- apply(SNP_INFO,1,function(x){
SNP_INFO[,ifelse(x["protein"]!="",x["protein"],
ifelse(x["cDNA"]!="",paste(x["Gene"],x["cDNA"],sep="_"),
ifelse(grepl("^chr13",x["Variant"]),paste("FLT3_INS",x["Variant"],sep="_"),"")))
# - variant metadata
})
}
<- LAM_VARIANTS_SNP
SNP_changes_of_interest <- LAM_VARIANTS_Protein
protein_changes_of_interest <- data.frame("Mutant"=SNP_changes_of_interest,"Protein"=protein_changes_of_interest)
SNP_protein_key
# Cell and clone selection
<- apply(NGT[,SNP_changes_of_interest],MARGIN=2,table)
distribution_of_unknowns_by_variant #print(distribution_of_unknowns_by_variant)
<-NGT[apply(NGT[,SNP_changes_of_interest],MARGIN=1,FUN=function(x){sum(x==3)>0}),"Cell"]
cells_with_unknown<-NGT[!NGT$Cell%in%cells_with_unknown,SNP_changes_of_interest]
matrix_of_interest<-SNP_INFO[colnames(matrix_of_interest)[order(colSums(matrix_of_interest),decreasing=TRUE)],"protein"]
bulk_VAF_order<- bulk_VAF_order[!duplicated(bulk_VAF_order)]
bulk_VAF_order $Clone <- apply(matrix_of_interest,1,function(x){paste(x,sep="_",collapse="_")})
matrix_of_interest<- sort(table(matrix_of_interest$Clone))
clonal_abundance <-names(clonal_abundance)[clonal_abundance>length(rownames(matrix_of_interest))*0]
clones_to_include<- matrix_of_interest[matrix_of_interest$Clone%in%clones_to_include,]
matrix_subset_clones
#Average depth per cell per allele
<- DP[-cells_with_unknown,c("Sample","Cell",rownames(SNP_INFO)[SNP_INFO$protein%in%protein_changes_of_interest])]
DP_subset<- NGT[NGT$Cell%in%DP_subset$Cell,colnames(DP_subset)]
NGT_subset <-inner_join(melt(DP_subset[,-1],id.var="Cell"),melt(NGT[,-1],id.var="Cell"),by=c("Cell","variable"))
DP_NGT_meltcolnames(DP_NGT_melt) <- c("Cell","Mutant","Depth","Genotype")
$Genotype <- ifelse(DP_NGT_melt$Genotype==3,"Unknown",
DP_NGT_meltifelse(DP_NGT_melt$Genotype==0,"WT",
ifelse(DP_NGT_melt$Genotype==1,"Heterozygous",
ifelse(DP_NGT_melt$Genotype==2,"Homozygous",DP_NGT_melt$Genotype))))
$Genotype <- factor(DP_NGT_melt$Genotype ,levels=c("WT","Heterozygous","Homozygous","Unknown"))
DP_NGT_melt
<-data.frame(DP_NGT_melt%>%group_by(Mutant,Genotype)%>%summarise("Depth"=mean(Depth)))
summarized
# Verify variants selected are unique and encode protein changes
!any(duplicated(SNP_INFO[colnames(matrix_subset_clones),"protein"])) #Should return TRUE
colnames(matrix_subset_clones)<- c(as.character(SNP_INFO[colnames(matrix_subset_clones)[1:length(SNP_changes_of_interest)],"protein"]),"Clone")
# Aggregate data to get clone counts and architecture.
<-matrix_subset_clones[!duplicated(matrix_subset_clones),]
dedup<- melt(dedup)
clonal_architecture colnames(clonal_architecture) <- c("Clone","Mutant","Genotype")
$Genotype <- ifelse(clonal_architecture$Genotype==3,NA,
clonal_architectureifelse(clonal_architecture$Genotype==0,"WT",
ifelse(clonal_architecture$Genotype==1,"Heterozygous",
ifelse(clonal_architecture$Genotype==2,"Homozygous",clonal_architecture$Genotype))))
$Genotype <- factor(clonal_architecture$Genotype ,levels=c("WT","Heterozygous","Homozygous"))
clonal_architecture<- data.frame("Count"=as.matrix(table(matrix_subset_clones$Clone)))
clonal_abundance $Clone <- rownames(clonal_abundance)
clonal_abundance<- data.table(clonal_abundance,key="Count")
clonal_abundance $Clone <- factor(clonal_abundance$Clone,levels=rev(c(clonal_abundance$Clone)))
clonal_abundance$Clone <- factor(as.character(clonal_architecture$Clone),levels=rev(as.character(clonal_abundance$Clone)))
clonal_architecture$Mutant <-factor(clonal_architecture$Mutant , levels=rev(as.character(bulk_VAF_order)))
clonal_architecture
# Generate clonal architecture heatmap
<- ggplot(data = clonal_architecture, aes(x = Clone, y = factor(Mutant), fill = Genotype)) +
gg_heatmap geom_tile() +
scale_fill_manual(values=c("WT"=brewer.pal(7,"Reds")[1],
"Heterozygous"=brewer.pal(7,"Reds")[3],
"Homozygous"=brewer.pal(7,"Reds")[6],
"Unknown"="grey50"),"Genotype") +
theme_classic() +
ylab("Mutation")+
theme(legend.position = "bottom", legend.direction = "horizontal",
axis.text.x = element_blank(),
axis.line=element_blank(),
axis.title.x=element_blank(),
axis.ticks.x = element_blank(),
plot.margin=unit(c(0,1,1,1),"cm"))
# Generate clonal abundance barplot
<- ggplot(data = data.frame(clonal_abundance), aes(x = factor(Clone), y = Count)) +
gg_clonal_barplot geom_bar(stat = "identity", aes(fill = Count)) + theme_gray() +
theme_classic()+
ylim(0,max(clonal_abundance$Count)*1.3) +
ylab("Cell Count")+
geom_text(aes(label=Count), position=position_dodge(width=0.9), vjust=-0.25)+
scale_fill_distiller(name = "Value", palette = "Reds", direction = 1) +
theme(axis.title.x = element_blank(), axis.text.x = element_blank(), axis.ticks.x = element_blank(), axis.line=element_blank(),
legend.position = "none",
plot.margin=unit(c(1,1,-0.2,1),"cm"))
# Generate genotype calls by allele
<- t(apply(NGT[,SNP_changes_of_interest],MARGIN=2,function(x){table(factor(x,levels=c(0,1,2,3)))}))
distribution_of_unknowns_by_variant rownames(distribution_of_unknowns_by_variant) <- SNP_INFO[rownames(distribution_of_unknowns_by_variant),"protein"]
colnames(distribution_of_unknowns_by_variant) <- c("WT","Heterozygous","Homozygous","Unknown")
<-melt(distribution_of_unknowns_by_variant)
allele_melted colnames(allele_melted) <- c("Mutant","Genotype","Cells")
$Genotype <- factor(allele_melted$Genotype,levels=rev(c("WT","Heterozygous","Homozygous","Unknown")))
allele_melted$Mutant <-factor(allele_melted$Mutant , levels=as.character(bulk_VAF_order))
allele_melted
#Generate allele barplot
<- ggplot(data = data.frame(allele_melted), aes(x = factor(Mutant), y = Cells)) +
gg_alleles geom_bar(stat = "identity", aes(fill = Genotype),position="stack") + theme_gray() +
theme_minimal()+
ylab("Cell Count")+
scale_fill_manual(values=c("WT"=brewer.pal(7,"Reds")[1],
"Heterozygous"=brewer.pal(7,"Reds")[3],
"Homozygous"=brewer.pal(7,"Reds")[6],
"Unknown"="grey50"),"Genotype") +
theme(axis.text.x=element_text(angle=30,hjust=1,vjust=1))
#Generate depth by alelle by mutant
<- ggplot(summarized,aes(x=Mutant,color=Genotype,y=Depth))+
gg_depth geom_point(position=position_dodge(width=0.4))+
scale_color_manual(values=c("WT"=brewer.pal(7,"Reds")[1],
"Heterozygous"=brewer.pal(7,"Reds")[3],
"Homozygous"=brewer.pal(7,"Reds")[6],
"Unknown"="grey50"),"Genotype") +
theme(axis.text.x=element_text(angle=30,hjust=1,vjust=1),
plot.margin=unit(c(1,1,1,2),"cm"))
# Compute spacing of graphs
<- list(gg_depth,gg_clonal_barplot,gg_alleles,gg_heatmap)
plots <- list()
grobs <- list()
widths for (i in 1:length(plots)){
<- ggplotGrob(plots[[i]])
grobs[[i]] <- grobs[[i]]$widths[2:5]
widths[[i]]
}<- do.call(grid::unit.pmax, widths)
maxwidth for (i in 1:length(grobs)){
$widths[2:5] <- as.list(maxwidth)
grobs[[i]]
}
# Generate plot
<- textGrob(sample, hjust = 0.5, vjust = 0.5, gp = gpar(fontsize = 20))
grob.title pdf(paste(output_folder,"Graphs/",sample,".pdf",sep=""), width = 16, height = 8) # Open a new pdf file
grid.arrange(grobs=grobs, ncol = 2, top = grob.title)
graphics.off()
if(diversity==TRUE){
<- clonal_abundance
clonal_abundance_mat $Clone <- rownames(clonal_abundance)
clonal_abundance$Clone <- rownames(clonal_abundance_mat)
clonal_abundance_mat<- data.table(clonal_abundance,key="Count")
clonal_abundance_count <- diversity(clonal_abundance_mat[,"Count"])
shannon write.csv2(shannon,paste(output_folder,"Diversity_score/",sample,".csv",sep=""))
}
}
<-function(sample){
stats_output#Load in the data
if(any(grepl("VARIANT_SELECTION_LAM",list.files(paste("./",sample,"/",sep=""))))){
<- read.csv(paste("./",sample,"/",sample,"_VARIANT_SELECTION_LAM.csv",sep=""))
LAM_VARIANTS_MAT <- as.character((LAM_VARIANTS_MAT %>% filter(Include=="Include")%>%select(Mutant))[,1])
LAM_VARIANTS_SNP <- as.character((LAM_VARIANTS_MAT %>% filter(Include=="Include")%>%select(Protein))[,1])
LAM_VARIANTS_Protein else {
} print("No variant selection performed!")
return("next")
}<- read.csv(paste("./",sample,"/","AF.csv",sep="")) # - variant allele frequency
AF <- read.csv(paste("./",sample,"/","DP.csv",sep="")) # - read depth
DP <- read.csv(paste("./",sample,"/","GQ.csv",sep="")) # - quality scores
GQ <- read.csv(paste("./",sample,"/","NGT.csv",sep="")) # - genotype call converted to categorical (0-reference, 1-heterozygous mutation, 2-homozygous mutation, 3-unknown)
NGT
if(any(grepl("SNP_INFO.csv",list.files(paste("./",sample,"/",sep=""))))){
<- read.csv(paste("./",sample,"/","SNP_INFO.csv",sep=""))
SNP_INFO rownames(SNP_INFO) <-colnames(AF)[-c(1:2)] # - renames SNP info to have the variants as rownames
c("protein")] <- apply(SNP_INFO,1,function(x){
SNP_INFO[,ifelse(x["protein"]!="",x["protein"],
ifelse(x["cdna"]!="",paste(x["gene"],x["cdna"],sep="_"),
ifelse(x["amplicon"]%in%c("MSK_RL_AMP54", "MSK_RL_AMP55", "MSK_RL_AMP56","MSK_RL_AMP57","MSK_RL_AMP58"),paste("FLT3_INS",x["POS"],x["ALT"],sep="_"),"")))
# - variant metadata
})else {
} <- read.csv(paste("./",sample,"/","Variants.csv",sep=""))
SNP_INFO rownames(SNP_INFO) <-colnames(AF)[-c(1:2)] # - renames SNP info to have the variants as rownames
colnames(SNP_INFO)[3] <- "protein"
$cDNA <- as.character(SNP_INFO$cDNA)
SNP_INFO$protein <- as.character(SNP_INFO$protein)
SNP_INFO$Variant <- as.character(SNP_INFO$Variant)
SNP_INFO$Gene <- as.character(SNP_INFO$Gene)
SNP_INFOc("protein")] <- apply(SNP_INFO,1,function(x){
SNP_INFO[,ifelse(x["protein"]!="",x["protein"],
ifelse(x["cDNA"]!="",paste(x["Gene"],x["cDNA"],sep="_"),
ifelse(grepl("^chr13",x["Variant"]),paste("FLT3_INS",x["Variant"],sep="_"),"")))
# - variant metadata
})
}
<- LAM_VARIANTS_SNP
SNP_changes_of_interest <- LAM_VARIANTS_Protein
protein_changes_of_interest <- data.frame("Mutant"=SNP_changes_of_interest,"Protein"=protein_changes_of_interest)
SNP_protein_key
# Cell and clone selection
<- apply(NGT[,SNP_changes_of_interest],MARGIN=2,table)
distribution_of_unknowns_by_variant #print(distribution_of_unknowns_by_variant)
<-NGT[apply(NGT[,SNP_changes_of_interest],MARGIN=1,FUN=function(x){sum(x==3)>0}),"Cell"]
cells_with_unknown<-NGT[!NGT$Cell%in%cells_with_unknown,SNP_changes_of_interest]
matrix_of_interest<-SNP_INFO[colnames(matrix_of_interest)[order(colSums(matrix_of_interest),decreasing=TRUE)],"protein"]
bulk_VAF_order<- bulk_VAF_order[!duplicated(bulk_VAF_order)]
bulk_VAF_order $Clone <- apply(matrix_of_interest,1,function(x){paste(x,sep="_",collapse="_")})
matrix_of_interest
# Aggregate data to get clone counts and architecture.
<-matrix_of_interest[!duplicated(matrix_of_interest),]
dedup<- data.frame("Count"=as.matrix(table(matrix_of_interest$Clone)))
clonal_abundance $Clone <- factor(rownames(clonal_abundance),levels=rev(c(rownames(clonal_abundance))))
clonal_abundance$Count <- as.numeric(clonal_abundance$Count)
clonal_abundance$Dominance <- apply(clonal_abundance,1,function(x){
clonal_abundanceas.numeric(x["Count"])/sum(clonal_abundance["Count"]) ) / (max(as.numeric(clonal_abundance[,"Count"]))/sum(clonal_abundance["Count"]))
(
})$Mutation_count <- apply(clonal_abundance,1,function(x){
clonal_abundancesum(as.numeric(strsplit( as.character(x[2]),split="_")[[1]]))
})<- diversity(clonal_abundance[,"Count"])
shannon print(shannon)
<- dedup%>%select(-Clone)
matrix_output <- t(matrix_output)
matrix_output_t rownames(matrix_output_t) <- 0:(dim(matrix_output_t)[[1]]-1)
write.table(shannon,paste0(output_folder,"Diversity_score/",sample,".txt"),sep="\t",row.names=FALSE)
write.table(matrix_output_t,paste0(output_folder,"Genotype_matrix/",sample,".txt"),sep=" ",row.names=TRUE,quote=FALSE,col.names = FALSE)
write.table(clonal_abundance,paste0(output_folder,"Summary_mat/",sample,".txt"),sep="\t",row.names=FALSE)
}
<- function(sample,nrun){
permute_data #Load in the data
if(any(grepl("VARIANT_SELECTION_LAM",list.files(paste("./",sample,"/",sep=""))))){
<- read.csv(paste("./",sample,"/",sample,"_VARIANT_SELECTION_LAM.csv",sep=""))
LAM_VARIANTS_MAT <- as.character((LAM_VARIANTS_MAT %>% filter(Include=="Include")%>%select(Mutant))[,1])
LAM_VARIANTS_SNP <- as.character((LAM_VARIANTS_MAT %>% filter(Include=="Include")%>%select(Protein))[,1])
LAM_VARIANTS_Protein else {
} print("No variant selection performed!")
return("next")
}<- read.csv(paste("./",sample,"/","AF.csv",sep="")) # - variant allele frequency
AF <- read.csv(paste("./",sample,"/","DP.csv",sep="")) # - read depth
DP <- read.csv(paste("./",sample,"/","GQ.csv",sep="")) # - quality scores
GQ <- read.csv(paste("./",sample,"/","NGT.csv",sep="")) # - genotype call converted to categorical (0-reference, 1-heterozygous mutation, 2-homozygous mutation, 3-unknown)
NGT
if(any(grepl("SNP_INFO.csv",list.files(paste("./",sample,"/",sep=""))))){
<- read.csv(paste("./",sample,"/","SNP_INFO.csv",sep=""))
SNP_INFO rownames(SNP_INFO) <-colnames(AF)[-c(1:2)] # - renames SNP info to have the variants as rownames
c("protein")] <- apply(SNP_INFO,1,function(x){
SNP_INFO[,ifelse(x["protein"]!="",x["protein"],
ifelse(x["cdna"]!="",paste(x["gene"],x["cdna"],sep="_"),
ifelse(x["amplicon"]%in%c("MSK_RL_AMP54", "MSK_RL_AMP55", "MSK_RL_AMP56","MSK_RL_AMP57","MSK_RL_AMP58"),paste("FLT3_INS",x["POS"],x["ALT"],sep="_"),"")))
# - variant metadata
})else {
} <- read.csv(paste("./",sample,"/","Variants.csv",sep=""))
SNP_INFO rownames(SNP_INFO) <-colnames(AF)[-c(1:2)] # - renames SNP info to have the variants as rownames
colnames(SNP_INFO)[3] <- "protein"
$cDNA <- as.character(SNP_INFO$cDNA)
SNP_INFO$protein <- as.character(SNP_INFO$protein)
SNP_INFO$Variant <- as.character(SNP_INFO$Variant)
SNP_INFO$Gene <- as.character(SNP_INFO$Gene)
SNP_INFOc("protein")] <- apply(SNP_INFO,1,function(x){
SNP_INFO[,ifelse(x["protein"]!="",x["protein"],
ifelse(x["cDNA"]!="",paste(x["Gene"],x["cDNA"],sep="_"),
ifelse(grepl("^chr13",x["Variant"]),paste("FLT3_INS",x["Variant"],sep="_"),"")))
# - variant metadata
})
}
<- LAM_VARIANTS_SNP
SNP_changes_of_interest <- LAM_VARIANTS_Protein
protein_changes_of_interest <- data.frame("Mutant"=SNP_changes_of_interest,"Protein"=protein_changes_of_interest)
SNP_protein_key
# Cell and clone selection
<- apply(NGT[,SNP_changes_of_interest],MARGIN=2,table)
distribution_of_unknowns_by_variant #print(distribution_of_unknowns_by_variant)
<-NGT[apply(NGT[,SNP_changes_of_interest],MARGIN=1,FUN=function(x){sum(x==3)>0}),"Cell"]
cells_with_unknown<-NGT[!NGT$Cell%in%cells_with_unknown,SNP_changes_of_interest]
matrix_of_interest<-SNP_INFO[colnames(matrix_of_interest)[order(colSums(matrix_of_interest),decreasing=TRUE)],"protein"]
bulk_VAF_order<- bulk_VAF_order[!duplicated(bulk_VAF_order)]
bulk_VAF_order $Clone <- apply(matrix_of_interest,1,function(x){paste(x,sep="_",collapse="_")})
matrix_of_interest
# Aggregate data to get clone counts and architecture.
<-matrix_of_interest[!duplicated(matrix_of_interest),]
dedup<- data.frame("Count"=as.matrix(table(matrix_of_interest$Clone)))
clonal_abundance $Clone <- factor(rownames(clonal_abundance),levels=rev(c(rownames(clonal_abundance))))
clonal_abundance$Count <- as.numeric(clonal_abundance$Count)
clonal_abundance
<- melt(dedup)
clonal_architecture colnames(clonal_architecture) <- c("Clone","Mutant","Genotype")
$Genotype <- ifelse(clonal_architecture$Genotype==3,NA,
clonal_architectureifelse(clonal_architecture$Genotype==0,"WT",
ifelse(clonal_architecture$Genotype==1,"Heterozygous",
ifelse(clonal_architecture$Genotype==2,"Homozygous",clonal_architecture$Genotype))))
$Genotype <- factor(clonal_architecture$Genotype ,levels=c("WT","Heterozygous","Homozygous"))
clonal_architecture
<- unlist(list(1:nrun))
results_holder <-lapply(results_holder,function(x){
randomization_listapply(matrix_of_interest[,1:length(colnames(matrix_of_interest))-1],2,function(x){
<- sample(x,length(x),replace=FALSE)
y
})})
#randomization_list<-plyr::alply(randomization_array,3)
# names(randomization_list) <- NULL
<-suppressMessages(lapply(randomization_list, function(randomization){
empirical_distribution$Clone <- apply(randomization,1,function(x){paste(x,sep="_",collapse="_")})
randomization<-matrix_of_interest[!duplicated(randomization),]
loop_dedup<- melt(loop_dedup)
loop_clonal_architecture colnames(loop_clonal_architecture) <- c("Clone","Mutant","Genotype")
$Genotype <- ifelse(loop_clonal_architecture$Genotype==3,NA,
loop_clonal_architectureifelse(loop_clonal_architecture$Genotype==0,"WT",
ifelse(loop_clonal_architecture$Genotype==1,"Heterozygous",
ifelse(loop_clonal_architecture$Genotype==2,"Homozygous",clonal_architecture$Genotype))))
$Genotype <- factor(loop_clonal_architecture$Genotype ,levels=c("WT","Heterozygous","Homozygous"))
loop_clonal_architecture<- data.frame("Count"=as.matrix(table(randomization$Clone)))
loop_clonal_abundance $Clone <- rownames(loop_clonal_abundance)
loop_clonal_abundancereturn(data.frame(loop_clonal_abundance))
}))
<-empirical_distribution%>%purrr::reduce(dplyr::full_join,by="Clone")
empirical_distribution_matcolnames(empirical_distribution_mat)<-1:length(colnames(empirical_distribution_mat))
rownames(empirical_distribution_mat) <- empirical_distribution_mat[,2]
<- empirical_distribution_mat[,-2]
empirical_distribution_mat is.na(empirical_distribution_mat)]<-0
empirical_distribution_mat[
$Clone <- as.character(clonal_abundance$Clone)
clonal_abundance$Count <- as.numeric(clonal_abundance$Count)
clonal_abundance$pvalue <-apply(clonal_abundance,1,function(x){
clonal_abundance1- sum(as.numeric(x[["Count"]])>empirical_distribution_mat[x[["Clone"]],])/nrun
})
#print(dedup)
colnames(clonal_abundance)[2] <- paste(colnames(dedup)[1:length(SNP_changes_of_interest)],sep="_",collapse="__")
<- clonal_abundance[order(clonal_abundance[,"Count"],decreasing = TRUE),]
clonal_abundance $Dominance <- apply(clonal_abundance,1,function(x){
clonal_abundanceas.numeric(x["Count"])/sum(clonal_abundance["Count"]) ) / (max(as.numeric(clonal_abundance[,"Count"]))/sum(clonal_abundance["Count"]))
(
})$Poisson <- apply(clonal_abundance,1,function(x){
clonal_abundancepoisson.test(as.numeric(x["Count"]),mean(as.numeric(clonal_abundance$Count)),alternative="greater")$p.value
})$Mutation_count <- apply(clonal_abundance,1,function(x){
clonal_abundancesum(as.numeric(strsplit( as.character(x[2]),split="_")[[1]]))
})
write.table(clonal_abundance,paste0(output_folder,"Summary_mat/",sample,".txt"),sep="\t",row.names=FALSE)
}
<- function(sample){
replot_clonal_selection
if(any(grepl("VARIANT_SELECTION_LAM",list.files(paste("./",sample,"/",sep=""))))){
<- read.csv(paste("./",sample,"/",sample,"_VARIANT_SELECTION_LAM.csv",sep=""))
LAM_VARIANTS_MAT <- as.character((LAM_VARIANTS_MAT %>% filter(Include=="Include")%>%select(Mutant))[,1])
LAM_VARIANTS_SNP <- as.character((LAM_VARIANTS_MAT %>% filter(Include=="Include")%>%select(Protein))[,1])
LAM_VARIANTS_Protein else {
} print("No variant selection performed!")
return("next")
}<- read.csv(paste("./",sample,"/","DP.csv",sep="")) # - read depth
DP <- read.csv(paste("./",sample,"/","NGT.csv",sep="")) # - genotype call converted to categorical (0-reference, 1-heterozygous mutation, 2-homozygous mutation, 3-unknown)
NGT
if(any(grepl("SNP_INFO.csv",list.files(paste("./",sample,"/",sep=""))))){
<- read.csv(paste("./",sample,"/","SNP_INFO.csv",sep=""))
SNP_INFO rownames(SNP_INFO) <-colnames(NGT)[-c(1:2)] # - renames SNP info to have the variants as rownames
c("protein")] <- apply(SNP_INFO,1,function(x){
SNP_INFO[,ifelse(x["protein"]!="",x["protein"],
ifelse(x["cdna"]!="",paste(x["gene"],x["cdna"],sep="_"),
ifelse(x["amplicon"]%in%c("MSK_RL_AMP54", "MSK_RL_AMP55", "MSK_RL_AMP56","MSK_RL_AMP57","MSK_RL_AMP58"),paste("FLT3_INS",x["POS"],x["ALT"],sep="_"),"")))
# - variant metadata
})else {
} <- read.csv(paste("./",sample,"/","Variants.csv",sep=""))
SNP_INFO rownames(SNP_INFO) <-colnames(NGT)[-c(1:2)] # - renames SNP info to have the variants as rownames
colnames(SNP_INFO)[3] <- "protein"
$cDNA <- as.character(SNP_INFO$cDNA)
SNP_INFO$protein <- as.character(SNP_INFO$protein)
SNP_INFO$Variant <- as.character(SNP_INFO$Variant)
SNP_INFO$Gene <- as.character(SNP_INFO$Gene)
SNP_INFOc("protein")] <- apply(SNP_INFO,1,function(x){
SNP_INFO[,ifelse(x["protein"]!="",x["protein"],
ifelse(x["cDNA"]!="",paste(x["Gene"],x["cDNA"],sep="_"),
ifelse(grepl("^chr13",x["Variant"]),paste("FLT3_INS",x["Variant"],sep="_"),"")))
# - variant metadata
})
}
<- LAM_VARIANTS_SNP
SNP_changes_of_interest <- LAM_VARIANTS_Protein
protein_changes_of_interest <- data.frame("Mutant"=SNP_changes_of_interest,"Protein"=protein_changes_of_interest)
SNP_protein_key rownames(SNP_protein_key) <- SNP_protein_key$Mutant
# Cell and clone selection
<- apply(NGT[,SNP_changes_of_interest],MARGIN=2,table)
distribution_of_unknowns_by_variant #print(distribution_of_unknowns_by_variant)
<-NGT[apply(NGT[,SNP_changes_of_interest],MARGIN=1,FUN=function(x){sum(x==3)>0}),"Cell"]
cells_with_unknown<-NGT[!NGT$Cell%in%cells_with_unknown,SNP_changes_of_interest]
matrix_of_interest<-SNP_INFO[colnames(matrix_of_interest)[order(colSums(matrix_of_interest),decreasing=TRUE)],"protein"]
bulk_VAF_order<- bulk_VAF_order[!duplicated(bulk_VAF_order)]
bulk_VAF_order $Clone <- apply(matrix_of_interest,1,function(x){paste(x,sep="_",collapse="_")})
matrix_of_interest
<-matrix_of_interest[!duplicated(matrix_of_interest),]
dedupcolnames(dedup) <- SNP_protein_key[colnames(dedup),"Protein"]
<- melt(dedup)
clonal_architecture colnames(clonal_architecture) <- c("Clone","Mutant","Genotype")
$Genotype <- ifelse(clonal_architecture$Genotype==3,NA,
clonal_architectureifelse(clonal_architecture$Genotype==0,"WT",
ifelse(clonal_architecture$Genotype==1,"Heterozygous",
ifelse(clonal_architecture$Genotype==2,"Homozygous",clonal_architecture$Genotype))))
$Genotype <- factor(clonal_architecture$Genotype ,levels=c("WT","Heterozygous","Homozygous"))
clonal_architecture<- data.frame("Count"=as.matrix(table(matrix_of_interest$Clone)))
clonal_abundance $Clone <- rownames(clonal_abundance)
clonal_abundance<- data.table(clonal_abundance,key="Count")
clonal_abundance $Clone <- factor(clonal_abundance$Clone,levels=rev(c(clonal_abundance$Clone)))
clonal_abundance$Clone <- factor(as.character(clonal_architecture$Clone),levels=rev(as.character(clonal_abundance$Clone)))
clonal_architecture$Mutant <-factor(clonal_architecture$Mutant , levels=rev(as.character(bulk_VAF_order)))
clonal_architecture
<- read.delim(paste0("/Volumes/LevineLab/Levine Lab/Bobby/Collaborations/MissionBio/08_08_19/Summary_mat/",sample,".txt"),sep="\t")
clones_import colnames(clones_import)[2] <- "Clone"
<- clones_import %>% filter(pvalue<0.1|Poisson<0.05)%>%select(Clone)
select_clones_full <- clones_import %>% filter(pvalue<0.1|Poisson<0.05)%>%filter(Dominance>0.05)%>%select(Clone)
select_clones_dominant
if(length(select_clones_dominant[,1])>3){
<- clonal_architecture%>%filter(Clone%in%select_clones_dominant[,1])
clonal_architecture_subset <- clonal_abundance%>%filter(Clone%in%select_clones_dominant[,1])
clonal_abundance_subset <- "VAF Subset"
name_var else if(length(select_clones_full[,1])>20){
} print("Complex clonal picture, skipping")
return(next)
else if(length(select_clones_full[,1])<2){
} print("Simple clonal picture, plotting all clones")
<- clonal_architecture
clonal_architecture_subset <- clonal_abundance
clonal_abundance_subset <- "All Clones"
name_var else {
} <- clonal_architecture%>%filter(Clone%in%select_clones_full[,1])
clonal_architecture_subset <- clonal_abundance%>%filter(Clone%in%select_clones_full[,1])
clonal_abundance_subset <- "Full"
name_var
}# Generate clonal architecture heatmap
<- ggplot(data = clonal_architecture_subset, aes(x = Clone, y = factor(Mutant), fill = Genotype)) +
gg_heatmap geom_tile() +
scale_fill_manual(values=c("WT"=brewer.pal(7,"Reds")[1],
"Heterozygous"=brewer.pal(7,"Reds")[3],
"Homozygous"=brewer.pal(7,"Reds")[6],
"Unknown"="grey50"),"Genotype") +
theme_classic() +
ylab("Mutation")+
theme(legend.position = "bottom", legend.direction = "horizontal",
axis.text.x = element_blank(),
axis.line=element_blank(),
axis.title.x=element_blank(),
axis.ticks.x = element_blank(),
plot.margin=unit(c(0,1,1,1),"cm"))
# Generate clonal abundance barplot
<- ggplot(data = data.frame(clonal_abundance_subset), aes(x = factor(Clone), y = Count)) +
gg_clonal_barplot geom_bar(stat = "identity", aes(fill = Count)) + theme_gray() +
theme_classic()+
ylim(0,max(clonal_abundance$Count)*1.3) +
ylab("Cell Count")+
geom_text(aes(label=Count), position=position_dodge(width=0.9), vjust=-0.25)+
scale_fill_distiller(name = "Value", palette = "Reds", direction = 1) +
theme(axis.title.x = element_blank(), axis.text.x = element_blank(), axis.ticks.x = element_blank(), axis.line=element_blank(),
legend.position = "none",
plot.margin=unit(c(1,1,-0.2,1),"cm"))
# Compute spacing of graphs
<- list(gg_clonal_barplot,gg_heatmap)
plots <- list()
grobs <- list()
widths for (i in 1:length(plots)){
<- ggplotGrob(plots[[i]])
grobs[[i]] <- grobs[[i]]$widths[2:5]
widths[[i]]
}<- do.call(grid::unit.pmax, widths)
maxwidth for (i in 1:length(grobs)){
$widths[2:5] <- as.list(maxwidth)
grobs[[i]]
}
# Generate plot
<- textGrob(paste(sample,"-",name_var), hjust = 0.5, vjust = 0.5, gp = gpar(fontsize = 20))
grob.title pdf(paste(output_folder,"Graphs_significant/",sample,".pdf",sep=""), width = 16, height = 8) # Open a new pdf file
grid.arrange(grobs=grobs, ncol = 1, top = grob.title)
graphics.off()
}
<-function(sample,output_folder,clonal_abundance,clonal_architecture,clone_cell_count,shrink)
plot_clonal_heatmap_and_barplot
{#clonal_abundance_subset <-data.frame( clonal_abundance[[sample]])%>%filter(Count>=5)
#clonal_architecture_subset <- data.frame(clonal_architecture[[sample]])%>%
# filter(Clone%in%as.character(clonal_abundance_subset$Clone))
<-final_sample_summary[[sample]]$Clones#clonal_abundance
clonal_abundance_subset$Clone<-factor(final_sample_summary[[sample]]$Clones[,"Clone"],levels=rev(c(final_sample_summary[[sample]]$Clones[,"Clone"])))#clonal_abundance
clonal_abundance_subset<-final_sample_summary[[sample]]$Architecture%>%
clonal_architecture_subsetfilter(Clone%in%as.character(clonal_abundance_subset$Clone))
$Clone <- factor(clonal_architecture_subset$Clone, levels=levels(clonal_abundance_subset$Clone))
clonal_architecture_subset<- ggplot(data = clonal_architecture_subset, aes(x = Clone, y = factor(Mutant,levels=rev(levels(factor(Mutant)))), fill = Genotype)) +
gg_heatmap geom_tile() +# scale_y_discrete(limits = rev(levels(Mutant)))+
scale_fill_manual(values=c("WT"=brewer.pal(7,"Reds")[1],
"Heterozygous"=brewer.pal(7,"Reds")[3],
"Homozygous"=brewer.pal(7,"Reds")[6],
"Unknown"="grey50"),"Genotype") +
theme_classic(base_size=6) +
ylab("Mutation")+
theme(legend.position = "bottom", legend.direction = "horizontal",
axis.text.x = element_blank(),
axis.line=element_blank(),
axis.title.x=element_blank(),
axis.ticks.x = element_blank(),
plot.margin=unit(c(0,1,1,1),"cm"))
# Generate clonal abundance barplot
<- ggplot(data = data.frame(clonal_abundance_subset), aes(x = Clone, y = Count)) +
gg_clonal_barplot geom_bar(stat = "identity", aes(fill = Count)) + theme_gray() +
theme_classic(base_size=6)+
ylim(0,max(clonal_abundance_subset$Count)*1.3) +
ylab("Cell Count")+
geom_text(aes(label=Count), position=position_dodge(width=0.9), vjust=-0.25,size=1)+
scale_fill_distiller(name = "Value", palette = "Reds", direction = 1) +
theme(axis.title.x = element_blank(), axis.text.x = element_blank(), axis.ticks.x = element_blank(), axis.line=element_blank(),
legend.position = "none",
plot.margin=unit(c(1,1,-0.2,1),"cm"))
<- textGrob(paste(sample,"-"), hjust= 0.5, vjust = 0.5, gp = gpar(fontsize = 12))
grob.title <-plot_grid(grob.title,gg_clonal_barplot,gg_heatmap,ncol=1,align="hv",axis="l",rel_heights = c(0.1,1,0.75))
final_plot
save_plot(paste(output_folder,sample,".pdf",sep=""),final_plot, ncol=1) # Open a new pdf file
}
<- function(mut_mat){
generate_and_plot_cooccurence <- cooccur(mat=t(mut_mat),type="spp_site",only_effects = FALSE,eff_matrix=TRUE,thresh=FALSE,eff_standard=FALSE,spp_names=TRUE)$results
cooccur_mat $score<- ifelse(cooccur_mat[,"p_lt"]<=0.05,-1,ifelse(cooccur_mat[,"p_gt"]<=0.05,1,0))
cooccur_mat<- rbind(cooccur_mat[,c("sp1_name","sp2_name","score")],c(setdiff(cooccur_mat$sp2_name,cooccur_mat$sp1_name),setdiff(cooccur_mat$sp1_name,cooccur_mat$sp2_name),0))
cooccur_mat_subset <- dcast(cooccur_mat_subset, sp1_name ~ sp2_name, value.var="score")
cooccur_data_mat rownames(cooccur_data_mat) <- cooccur_data_mat[,1]
<-cooccur_data_mat[,-1]
cooccur_data_mat2is.na(cooccur_data_mat2)]<-0
cooccur_data_mat2[lower.tri(cooccur_data_mat2)]<-NA
cooccur_data_mat2[$Gene <- rownames(cooccur_data_mat2)
cooccur_data_mat2<- na.omit(melt(cooccur_data_mat2, 'Gene', variable_name='Gene2'))
cooccur_data_mat_melt $variable <- factor(cooccur_data_mat_melt$variable, levels=rev(levels(cooccur_data_mat_melt$variable)))
cooccur_data_mat_melt
# Triangle heatmap to compare cohorts
<-ggplot(cooccur_data_mat_melt, aes(Gene, variable))+
grob_corrplotgeom_tile(aes(fill = factor(value)), color='grey90') +
scale_fill_manual(values=c("-1"="firebrick3","0"="white","1"="steelblue2"),"Correlation",
labels=c("Mutually Exclusive","Not Significant","Mutually Inclusive"))+
theme_classic(base_size=10)+xlab("")+ylab("")+
theme(axis.text.x=element_text(angle=45,hjust=1,vjust=1),
axis.line = element_blank(),
legend.position = c(0.8,1),
legend.justification = c(1, 1),
legend.direction = "vertical")+
theme(legend.key.size = unit(0.5,"line"))
return(list("plot"=grob_corrplot,
"data"=cooccur_mat))
}
<- function(sample){
diversity_from_bulk_VAF if(any(grepl("VARIANT_SELECTION_LAM",list.files(paste("./",sample,"/",sep=""))))){
<- read.csv(paste("./",sample,"/",sample,"_VARIANT_SELECTION_LAM.csv",sep=""))
LAM_VARIANTS_MAT <- as.character((LAM_VARIANTS_MAT %>% filter(Include=="Include")%>%select(Mutant))[,1])
LAM_VARIANTS_SNP <- as.character((LAM_VARIANTS_MAT %>% filter(Include=="Include")%>%select(Protein))[,1])
LAM_VARIANTS_Protein else {
} print("No variant selection performed!")
return("next")
}#AF <- read.csv(paste("./",sample,"/","AF.csv",sep="")) # - variant allele frequency
#DP <- read.csv(paste("./",sample,"/","DP.csv",sep="")) # - read depth
#GQ <- read.csv(paste("./",sample,"/","GQ.csv",sep="")) # - quality scores
<- read.csv(paste("./",sample,"/","NGT.csv",sep="")) # - genotype call converted to categorical (0-reference, 1-heterozygous mutation, 2-homozygous mutation, 3-unknown)
NGT
if(any(grepl("SNP_INFO.csv",list.files(paste("./",sample,"/",sep=""))))){
<- read.csv(paste("./",sample,"/","SNP_INFO.csv",sep=""))
SNP_INFO rownames(SNP_INFO) <-colnames(NGT)[-c(1:2)] # - renames SNP info to have the variants as rownames
c("protein")] <- apply(SNP_INFO,1,function(x){
SNP_INFO[,ifelse(x["protein"]!="",x["protein"],
ifelse(x["cdna"]!="",paste(x["gene"],x["cdna"],sep="_"),
ifelse(x["amplicon"]%in%c("MSK_RL_AMP54", "MSK_RL_AMP55", "MSK_RL_AMP56","MSK_RL_AMP57","MSK_RL_AMP58"),paste("FLT3_INS",x["POS"],x["ALT"],sep="_"),"")))
# - variant metadata
})else {
} <- read.csv(paste("./",sample,"/","Variants.csv",sep=""))
SNP_INFO rownames(SNP_INFO) <-colnames(NGT)[-c(1:2)] # - renames SNP info to have the variants as rownames
colnames(SNP_INFO)[3] <- "protein"
$cDNA <- as.character(SNP_INFO$cDNA)
SNP_INFO$protein <- as.character(SNP_INFO$protein)
SNP_INFO$Variant <- as.character(SNP_INFO$Variant)
SNP_INFO$Gene <- as.character(SNP_INFO$Gene)
SNP_INFOc("protein")] <- apply(SNP_INFO,1,function(x){
SNP_INFO[,ifelse(x["protein"]!="",x["protein"],
ifelse(x["cDNA"]!="",paste(x["Gene"],x["cDNA"],sep="_"),
ifelse(grepl("^chr13",x["Variant"]),paste("FLT3_INS",x["Variant"],sep="_"),"")))
# - variant metadata
})
}
<- LAM_VARIANTS_SNP
SNP_changes_of_interest <- LAM_VARIANTS_Protein
protein_changes_of_interest <- data.frame("Mutant"=SNP_changes_of_interest,"Protein"=protein_changes_of_interest)
SNP_protein_key
# Cell and clone selection
<- apply(NGT[,SNP_changes_of_interest],MARGIN=2,table)
distribution_of_unknowns_by_variant #print(distribution_of_unknowns_by_variant)
<-NGT[apply(NGT[,SNP_changes_of_interest],MARGIN=1,FUN=function(x){sum(x==3)>0}),"Cell"]
cells_with_unknown<-NGT[!NGT$Cell%in%cells_with_unknown,SNP_changes_of_interest]
matrix_of_interest<-SNP_INFO[colnames(matrix_of_interest)[order(colSums(matrix_of_interest),decreasing=TRUE)],"protein"]
bulk_VAF_order<- bulk_VAF_order[!duplicated(bulk_VAF_order)]
bulk_VAF_order $Clone <- apply(matrix_of_interest,1,function(x){paste(x,sep="_",collapse="_")})
matrix_of_interest
<-diversity(colSums(matrix_of_interest[colnames(matrix_of_interest)!="Clone"])/length(rownames(matrix_of_interest)))
outputwrite.table(output,paste0(output_folder,"Diversity_score_faux_VAF/",sample,".txt"),sep="\t")
}
## Old Functions
<- function(sample){
plot_variants_AF_and_DP
#Load in the data
<- read.csv(paste("./",sample,"/","AF.csv",sep="")) # - variant allele frequency
AF <- read.csv(paste("./",sample,"/","DP.csv",sep="")) # - read depth
DP <- read.csv(paste("./",sample,"/","GQ.csv",sep="")) # - quality scores
GQ <- read.csv(paste("./",sample,"/","NGT.csv",sep="")) # - genotype call converted to categorical (0-reference, 1-heterozygous mutation, 2-homozygous mutation, 3-unknown)
NGT <- read.csv(paste("./",sample,"/","SNP_INFO.csv",sep="")) # - variant metadata
SNP_INFO rownames(SNP_INFO) <-colnames(AF)[-c(1:2)] # - renames SNP info to have the variants as rownames
<-inner_join(AF %>% unite("Sample_cell",c("Sample","Cell")) %>%melt("Sample_cell"),
AF_NGT_Melt%>% unite("Sample_cell",c("Sample","Cell")) %>%melt("Sample_cell"),
NGT by=c("Sample_cell","variable"))
colnames(AF_NGT_Melt) <- c("Cell","Variant","AF","NGT")
<- ggplot(AF_NGT_Melt,aes(x=as.factor(NGT),y=AF,color=NGT))+ geom_quasirandom()+facet_wrap(~Variant,ncol=3)
gg_AF_NGT
<-inner_join(DP %>% unite("Sample_cell",c("Sample","Cell")) %>%melt("Sample_cell"),
DP_NGT_Melt%>% unite("Sample_cell",c("Sample","Cell")) %>%melt("Sample_cell"),
NGT by=c("Sample_cell","variable"))
colnames(DP_NGT_Melt) <- c("Cell","Variant","DP","NGT")
<- ggplot(DP_NGT_Melt,aes(x=as.factor(NGT),y=DP,fill=NGT))+geom_boxplot()+facet_wrap(~Variant,ncol=3,scale="free_y")
gg_DP_NGT
ggsave(paste0("./",sample,"/",sample,"_DP_NGT.pdf"),gg_DP_NGT,width=10,height=40)
ggsave(paste0("./",sample,"/",sample,"_AF_NGT.pdf"),gg_AF_NGT,width=10,height=40)
}
<-function(sample,output_folder,melted_protein_mat,clonal_abundance,clonal_architecture,clone_cell_count,shrink)
plot_COMPASS_data
{<-melted_protein_mat[[sample]]
melted_protein_mat <-data.frame( clonal_abundance[[sample]])#%>%filter(Count>=5)
clonal_abundance_subset <- data.frame(clonal_architecture[[sample]])#%>%
clonal_architecture_subset # filter(Clone%in%as.character(clonal_abundance_subset$Clone))
<-unique(as.character(clonal_architecture_subset[clonal_architecture_subset$Genotype%in%c("Heterozygous","Homozygous"),"Mutant"]))
genes_to_display
if(shrink==TRUE){
<-clonal_architecture_subset%>%filter(Mutant%in%genes_to_display)
clonal_architecture_subset
}$Clone <- factor(clonal_architecture_subset$Clone, levels=levels(clonal_abundance_subset$Clone))
clonal_architecture_subset<-melted_protein_mat%>%filter(Clone%in%as.character(clonal_architecture_subset$Clone))
melted_protein_mat_subset$Clone <-factor(melted_protein_mat_subset$Clone, levels=levels(clonal_abundance_subset$Clone))
melted_protein_mat_subset<-ggplot(melted_protein_mat_subset, aes(y=variable, x=(Clone))) + geom_tile(aes(fill = value),colour = "white") +
gg_protein_heatmapscale_fill_distiller(palette = "PRGn")+
theme_minimal(base_size=6) +
theme( axis.text.x = element_blank(), axis.title.x = element_blank(),
legend.position = "right",legend.direction = "vertical",
axis.ticks.x = element_blank(),
plot.margin=unit(c(0,0,0,0),"cm"))
<- ggplot(data = clonal_architecture_subset, aes(x = Clone, y = factor(Mutant,levels=rev(levels(factor(Mutant)))), fill = Genotype)) +
gg_heatmap geom_tile() +# scale_y_discrete(limits = rev(levels(Mutant)))+
scale_fill_manual(values=c("WT"=brewer.pal(7,"Reds")[1],
"Heterozygous"=brewer.pal(7,"Reds")[3],
"Homozygous"=brewer.pal(7,"Reds")[6],
"Unknown"="grey50"),"Genotype") +
theme_classic(base_size=6) +
ylab("Mutation")+
theme(legend.position = "right", legend.direction = "vertical",
axis.text.x = element_blank(),
axis.line=element_blank(),
axis.title.x=element_blank(),
axis.ticks.x = element_blank(),
plot.margin=unit(c(0,0,0,0),"cm"))
<- alphabet()[1:length(unique(clonal_abundance_subset$Clone))]
clone_colors names(clone_colors) <-levels(clonal_abundance_subset$Clone)
# Generate clonal abundance barplot
<- ggplot(data = data.frame(clonal_abundance_subset), aes(x = Clone, y = Count)) +
gg_clonal_barplot geom_bar(stat = "identity", aes(fill = Clone)) + theme_gray() +
theme_classic(base_size=6)+
ylim(0,max(clonal_abundance_subset$Count)*1.3) +
ylab("Cell Count")+
geom_text(aes(label=Count), position=position_dodge(width=0.9), vjust=-0.25,size=1)+
scale_fill_manual(values=clone_colors)+
theme(axis.title.x = element_blank(), axis.text.x = element_blank(), axis.ticks.x = element_blank(), axis.line=element_blank(),
legend.position = "none",
plot.margin=unit(c(0,0,0,0),"cm"))
<- textGrob(paste(sample,"-"), hjust= 0.5, vjust = 0.5, gp = gpar(fontsize = 12))
grob.title <-plot_grid(gg_clonal_barplot,addSmallLegend(gg_heatmap),addSmallLegend(gg_protein_heatmap),ncol=1,align="hv",axis="lrtb",rel_heights = c(1,(length(genes_to_display)*0.15),1))
final_plot
save_plot(paste(output_folder,sample,".pdf",sep=""),final_plot, ncol=1) # Open a new pdf file
}
<- function(myPlot, pointSize = 3, textSize = 8, spaceLegend = 0.5) {
addSmallLegend +
myPlot guides(shape = guide_legend(override.aes = list(size = pointSize)),
color = guide_legend(override.aes = list(size = pointSize))) +
theme(legend.title = element_text(size = textSize),
legend.text = element_text(size = textSize),
legend.key.size = unit(spaceLegend, "lines"))
}
<-function(Known_mat,weights){
create_reward_matrix_deprecatedset.seed(68864)
names(weights) <- apply(Known_mat,2,function(x){paste(x,sep="_",collapse="_")})
<- 2
num_type <- nrow(Known_mat);
num_mutations <- ncol(Known_mat)
num_clones <- num_type^num_mutations
num_states
<-data.frame(expand.grid(rep(list(0:num_type), num_mutations)))
states<-expand.grid(apply(states[,1:num_mutations],1,function(x){paste(x,collapse="_",sep="_")}),
state_interactionsapply(states[,1:num_mutations],1,function(x){paste(x,collapse="_",sep="_")}))
$Reward_states<-ifelse(apply(states,1,function(x){
statesany(apply(Known_mat,2,function(y){ all(x==t(y))}))
2,NA)
}),$Clone <- apply(states[,1:num_mutations],1,function(x){paste(x,collapse="_",sep="_")})
states
$possible<-ifelse(apply(state_interactions,1,function(x){
state_interactions<-as.numeric(do.call(cbind,strsplit(as.character(x[1]),split="_")))
A<-as.numeric(do.call(cbind,strsplit(as.character(x[2]),split="_")))
Bsum(abs(A-B))<=1
2,NA)
}),
<-acast(Var1~Var2,value.var="possible",data=data.frame(state_interactions,
dat"value"=-1))
diag(dat)<-0
lowerTriangle(dat)<-NA
<-do.call(cbind,lapply(colnames(dat),function(Clone){
test_datif(Clone%in%names(weights)){
<-ifelse(dat[,Clone]==2,weights[Clone],dat[,Clone])
outputelse{
} <-dat[,Clone]
output
}return(output)
}))colnames(test_dat) <-rownames(test_dat)
<-graph.adjacency(test_dat,mode="directed",weighted=TRUE)
graph<- get.data.frame(graph)%>% drop_na()
graph_mat
<-make_ego_graph(graph_from_data_frame(graph_mat,directed=TRUE), order=1,nodes=names(weights), mode="all")
subgraphs<-do.call(rbind,lapply(subgraphs,get.data.frame)) %>% distinct(to,from,weight, .keep_all = TRUE)
subgraph_bind<-subgraph_bind%>%filter(!to%in%setdiff(setdiff(subgraph_bind$to,subgraph_bind$from),names(weights)))
subgraph_subsetreturn(subgraph_subset)
}
<-function(Known_mat,weights){
create_reward_matrix
set.seed(68864)
names(weights) <- apply(Known_mat,2,function(x){paste(x,sep="_",collapse="_")})
<- 2
num_type <- nrow(Known_mat);
num_mutations <-rownames(Known_mat)
mutant_names<- ncol(Known_mat)
num_clones <- num_type^num_mutations
num_states
<- unlist(apply(Known_mat,1,function(x){list(0:max(unique(x))) }),recursive = FALSE)
possible_mut_list
<-data.frame(expand.grid(possible_mut_list))
states<-data.frame(expand.grid(apply(states[,1:num_mutations],1,function(x){paste(x,collapse="_",sep="_")}),
state_interactionsapply(states[,1:num_mutations],1,function(x){paste(x,collapse="_",sep="_")})))
$possible<-ifelse(apply(state_interactions,1,function(x){
state_interactions<-as.numeric(do.call(cbind,strsplit(as.character(x[1]),split="_")))
A<-as.numeric(do.call(cbind,strsplit(as.character(x[2]),split="_")))
Bsum(abs(A-B))<=1
0,NA)
}),
$action<-apply(state_interactions,1,function(x){
state_interactions<-as.numeric(do.call(cbind,strsplit(as.character(x[1]),split="_")))
A<-as.numeric(do.call(cbind,strsplit(as.character(x[2]),split="_")))
Bif(!is.na(x["possible"])){
if(sum(abs(B-A))==0){
return("stay")
else{
} return(mutant_names[which((B-A)==1)])
}
}
})
<-setNames(state_interactions%>%filter(action%in%c(mutant_names,"stay")),
datc("State","NextState","Reward","Action"))[,c(1,4,2,3)]
$Reward <- as.numeric(apply(dat,1,function(x){
datifelse(x$NextState%in%names(weights),weights[x$NextState],x$Reward)
}))$Reward <- as.numeric(apply(dat,1,function(x){
datifelse(x$Action%in%"stay",0,x$Reward)
}))$State <- as.character(dat$State)
dat$NextState <- as.character(dat$NextState)
dat$Action <- as.character(dat$Action)
dat
<- list(alpha = 0.8, gamma = 0.9)
control <- ReinforcementLearning(data = dat, s = "State", a = "Action", r = "Reward", s_new = "NextState", iter = 1,control=control)
model <- model$Q
xrownames(x) <- substring(rownames(x),2)
<- setNames(melt(x),c("State","Action","Q"))
Q_mat <-inner_join(dat,Q_mat,by=c("State","Action"))
set$Valid <- TRUE
setreturn(set)
}
<-function(sample,output_folder,clonal_abundance,clonal_architecture,clone_cell_count,shrink)
plot_clonal_heatmap_and_barplot_with_SD
{<-data.frame( clonal_abundance[[sample]])#%>%filter(Count>=5)
clonal_abundance_subset <- intersect(as.character(clonal_abundance_subset$Clone),as.character(clonal_architecture[[sample]]$Clone))
clones_to_use <- data.frame(clonal_architecture[[sample]])%>%
clonal_architecture_subset filter(data.frame(clonal_architecture[[sample]])$Clone%in%clones_to_use)
<- data.frame(clonal_abundance_subset)%>%
clonal_abundance_subset filter(Clone%in%clones_to_use)
<-unique(as.character(clonal_architecture_subset[clonal_architecture_subset$Genotype%in%c("Heterozygous","Homozygous"),"Mutant"]))
genes_to_display
if(shrink==TRUE){
<-clonal_architecture_subset%>%filter(Mutant%in%genes_to_display)
clonal_architecture_subset
}$Clone <- factor(clonal_architecture_subset$Clone, levels=rev(clonal_abundance_subset$Clone))
clonal_architecture_subset$Clone <- factor(clonal_abundance_subset$Clone, levels=levels(clonal_architecture_subset$Clone))
clonal_abundance_subset<- ggplot(data = clonal_architecture_subset, aes(x = Clone, y = factor(Mutant,levels=rev(levels(factor(Mutant)))), fill = Genotype)) +
gg_heatmap geom_tile() +# scale_y_discrete(limits = rev(levels(Mutant)))+
scale_fill_manual(values=c("WT"=brewer.pal(7,"Reds")[1],
"Heterozygous"=brewer.pal(7,"Reds")[3],
"Homozygous"=brewer.pal(7,"Reds")[6],
"Unknown"="grey50"),"Genotype") +
theme_classic(base_size=6) +
ylab("Mutation")+
theme(legend.position = "bottom", legend.direction = "horizontal",
axis.text.x = element_blank(),
axis.line=element_blank(),
axis.title.x=element_blank(),
axis.ticks.x = element_blank(),
plot.margin=unit(c(0,1,1,1),"cm"))
# Generate clonal abundance barplot
<- ggplot(data = data.frame(clonal_abundance_subset), aes(x = Clone, y = Count)) +
gg_clonal_barplot geom_bar(stat = "identity", aes(fill = Count)) + theme_gray() +
theme_classic(base_size=6)+
ylim(0,max(clonal_abundance_subset$Count)*1.3) +
ylab("Cell Count")+
geom_errorbar(aes(ymin = X2.5., ymax = X97.5.), width = 0.2)+
geom_text(aes(label=Count), position=position_dodge(width=0.9), vjust=-0.25,size=1)+
scale_fill_distiller(name = "Value", palette = "Reds", direction = 1) +
theme(axis.title.x = element_blank(), axis.text.x = element_blank(), axis.ticks.x = element_blank(), axis.line=element_blank(),
legend.position = "none",
plot.margin=unit(c(1,1,-0.2,1),"cm"))
<- textGrob(paste(sample,"-"), hjust= 0.5, vjust = 0.5, gp = gpar(fontsize = 12))
grob.title <-plot_grid(grob.title,gg_clonal_barplot,gg_heatmap,ncol=1,align="hv",axis="l",rel_heights = c(0.1,1,0.75))
final_plot
save_plot(paste(output_folder,sample,".pdf",sep=""),final_plot, ncol=1) # Open a new pdf file
}
<-function(graph_results){
query_initiating_mutations<-paste(rep(0,length(strsplit(graph_results$State[1],split="_")[[1]])),sep="_",collapse="_")
start_index<-graph_results%>%filter(State==start_index&Action!="stay")%>%pull(Action)
possible_starting_actions<-list()
final_results<-0
initating_action_countfor(initating_action in possible_starting_actions){
print(initating_action)
<- graph_results
set <-initating_action_count+1
initating_action_count<- list()
storage_results<-0
branches<- set%>%filter(State==start_index&Action==initating_action)%>%pull(NextState)
state_to_kill <- sum(set%>%filter(State==state_to_kill)%>%pull(Valid))
start_killed while(start_killed>0){
#print(branches)
# print(start_killed)
<- branches +1
branches <-0
number_of_mutations<- list()
state_log<-list()
optimal_reward<-list()
action_log<- start_index
current_state<-TRUE
indicator<-0
nextStatewhile(current_state!=nextState) {
# print(number_of_mutations)
<- number_of_mutations+1
number_of_mutations if(number_of_mutations==1){
<- start_index
state_log[[number_of_mutations]]
}<- state_log[[number_of_mutations]]
current_state <- FALSE
nextState_indicator
while(nextState_indicator==FALSE){
if(number_of_mutations==1){
<- set%>%
max_potential_action_indexfilter(State==current_state&Action==initating_action)
else {
} <- set%>%
max_potential_action_index filter(State==current_state&Valid==TRUE)%>%
filter(Q==max(Q))%>%sample_n(1)
}if(nrow(max_potential_action_index)==0){
break
}<- max_potential_action_index%>%pull(NextState)
max_potential_action <- any(set%>%filter(State==max_potential_action&Action!="stay")%>%pull(Valid))
next_valid_action if(next_valid_action==TRUE){
<-max_potential_action
nextState <- max_potential_action_index%>%pull(Action)
current_action ==TRUE
nextState_indicatorbreak
else{
} $State%in%max_potential_action_index["State"]&
set[set$Action%in%max_potential_action_index["Action"],"Valid"] <- FALSE
set
}
}if(nrow(set%>%filter(State==current_state&Action==current_action))==0){
<-NA
optimal_reward[[number_of_mutations]] else {
} <- set%>%
optimal_reward[[number_of_mutations]] filter(State==current_state&Action==current_action)%>%
pull(Reward)
}+1]]<- nextState
state_log[[number_of_mutations<- current_action
action_log[[number_of_mutations]] if(current_action==nextState){
==FALSE
indicator+1]]<-NULL
state_log[[number_of_mutationsbreak
}
}+1]] <- NA
optimal_reward[[number_of_mutations+1]] <- NA
action_log[[number_of_mutations<-data.frame("states"=do.call(rbind,state_log),#[1:(length(state_log)-1)]),
storage_results[[branches]] "actions"=do.call(rbind,action_log),
"reward"=do.call(rbind,optimal_reward),
"nextState"=do.call(rbind,c(state_log[2:length(state_log)],NA)) )
<- storage_results[[branches]]%>%
storage_results[[branches]] filter(states!=nextState)
$cumulative_reward <- cumsum(storage_results[[branches]]$reward)
storage_results[[branches]]
#storage_results[[branches]] <-storage_results[[branches]][1:which.max(storage_results[[branches]]$cumulative_reward), ]
$State%in%current_state&set$Action%in%current_action,"Valid"] <- FALSE
set[set<- sum(set%>%filter(State==state_to_kill)%>%pull(Valid))
start_killed
}<-storage_results[!duplicated(storage_results)]
final_results[[initating_action_count]]
}names(final_results)<-possible_starting_actions
return(final_results)
}