3.1 Figure 1: Cohort characterization
Let’s start by loading in some packages that are recurrently used. Other packages that are only used in one chunk of code are listed appropriately below.
library(dplyr)
library(tidyr)
library(ggplot2)
library(tidyselect)
Here we will focus on the cohort level analysis, and load in the NGT files before they were filtered for clonality.
setwd("/Users/bowmanr/Projects/scDNA")
<-readRDS(file="./data/final_NGTs.rds")
final_NGTs<-readRDS(file="./data/pheno.rds") pheno
One filter we put in was to exclude samples <100 cells.
<-names(final_NGTs)[sapply(names(final_NGTs),function(x){
high_quality_samplesnrow(final_NGTs[[x]])>100
})]
Next we want a catalogue of all mutations so we can determine how many patients were mutated for each gene and how many different mutations were seen in total for each gene.
<-do.call(rbind,lapply(names(final_NGTs),function(x){
final_mut_meltdata.frame("Sample"=x,
"Mutation"=colnames(final_NGTs[[x]]),
"Gene"=do.call(rbind,strsplit(colnames(final_NGTs[[x]]),split="[:_]"))[,1])
}))
Next pages will go into how we made each of the Figures.
3.1.1 Cohort characteristics (EF1A-D)
Now we can start to make the plots that are in Extended Figure 1. Starting with the total number of mutations identified per gene
## Set the levels of the Gene column from most to least prevalent for plotting purposes
$Gene<- factor(final_mut_melt$Gene,levels=names(sort(table(final_mut_melt$Gene), decreasing=TRUE)))
final_mut_melt
<-ggplot(final_mut_melt,aes(x=Gene))+
gg_mut_countgeom_bar(stat="count")+
theme_classic(base_size = 10)+
ylab("Count")+
ggtitle("Number of mutations")+
theme(axis.text.x = element_text(angle=45, hjust=1,vjust=1),
plot.title=element_text(hjust=0.5))+
scale_y_continuous(expand=c(0,0))
Total number of patients mutated for each gene
## tally of how many mutations per patient
<- final_mut_melt%>%dplyr::count(Gene, Sample)
melted_mut_mat
## Set the levels of the Gene column from most to least prevalent for plotting purposes
$Gene<- factor(melted_mut_mat$Gene,levels=names(sort(table(melted_mut_mat$Gene),decreasing=TRUE)))
melted_mut_mat
<-ggplot(melted_mut_mat,aes(x=Gene))+
gg_mut_patientgeom_bar(stat="count")+
theme_classic(base_size =10)+
ylab("Count")+ggtitle("Number of patients with mutation")+
theme(axis.text.x = element_text(angle=45,hjust=1,vjust=1),
plot.title=element_text(hjust=0.5))+
scale_y_continuous(expand=c(0,0))
Number of mutated genes per patient
<-final_mut_melt%>%
gg_mutated_genes_per_patientdistinct(Sample,Gene)%>%
group_by(Sample)%>%
%>%
tallyggplot(aes(x=n))+geom_bar()+
ylab("Count")+
xlab("Number of genes")+
ggtitle("Mutant genes per patient")+
theme_classic(base_size = 10)+
theme(plot.title=element_text(hjust=0.5))+
scale_y_continuous(expand=c(0,0))+
scale_x_continuous(expand=c(0,0),n.breaks=8)
Total number of mutations per patient
<- final_mut_melt%>%
gg_mutations_per_patientgroup_by(Sample)%>%
%>%
tallyggplot(aes(x=n))+geom_bar()+
theme_classic(base_size = 10)+
theme(plot.title=element_text(hjust=0.5))+
ylab("Count")+ggtitle("Variants per patient")+xlab("Number of variants")+
scale_y_continuous(expand=c(0,0))+
scale_x_continuous(expand=c(0,0),n.breaks=6)
library(cowplot)
plot_grid(gg_mut_count,gg_mut_patient,
gg_mutated_genes_per_patient,gg_mutations_per_patient,ncol=2,align="hv",axis="ltrb",
labels = "AUTO")

Figure 3.1: Extended Figure 1A-D
An extra plot worth noting is below, which plots the number of cells per sample, after we filterd out all of the “3” uniformative genotyped cells.
### Cells per sample
data.frame("Cells"=do.call(rbind,lapply(final_NGTs,nrow)))%>%
ggplot(aes(x=Cells))+geom_histogram(binwidth = 100)+
theme_classic(base_size = 10)+
theme(plot.title=element_text(hjust=0.5))+
ylab("Count")+ggtitle("Informative Cells per Sample")+
scale_y_continuous(expand=c(0,0))+
scale_x_continuous(expand=c(0,0),n.breaks=8)
3.1.2 Mutation Co-occurence
Next we want to make the co-occurence matrix on a sample level
library(cooccur)
### create matrix for oncoprint
<- table(melted_mut_mat$Sample,melted_mut_mat$Gene)
mut_mat
### Prepare matrix for co occurence map
<- cooccur(mat=t(mut_mat), type="spp_site",
cooccur_mat only_effects = FALSE,eff_matrix=TRUE,
thresh=FALSE, eff_standard=FALSE,spp_names=TRUE)$results
## Denote which interactions are significantly inclusive or exclusive
# The 'add_row' function generates a new line, but it gets removed later.
# This is helpful for setting the order of the gene labels below.
<- cooccur_mat%>%
cooccur_data_mat mutate(score=ifelse(p_lt<=0.05,-1,
ifelse(p_gt<=0.05,1,0))) %>%
::select(sp1_name,sp2_name,score)%>%
dplyradd_row(sp2_name=setdiff(.$sp1_name,.$sp2_name),
sp1_name=setdiff(.$sp2_name,.$sp1_name),
score=0)
#check out the final that we added so we can remove it later
tail(cooccur_data_mat)
# Order the genes in a coherent pattern for triangle strucutre of graph.
$sp1_name<-factor(cooccur_data_mat$sp1_name,
cooccur_data_matlevels=unique(cooccur_data_mat$sp1_name))
$sp2_name<-factor(cooccur_data_mat$sp2_name,
cooccur_data_matlevels=rev(levels(cooccur_data_mat$sp1_name)))
# Triangle heatmap to compare cohorts
<-ggplot(cooccur_data_mat%>%filter(sp1_name!="BRAF"),aes(x=sp1_name,y=sp2_name))+
grob_corrplotgeom_tile(aes(fill = factor(score)), color='grey90') +
scale_fill_manual(name="Correlation",
values=c("-1"="firebrick3",
"0"="white",
"1"="steelblue2"),
labels=c("Mutually Exclusive",
"Not Significant",
"Mutually Inclusive"))+
theme_classic(base_size=10)+
xlab("")+ylab("")+
theme(axis.text.x=element_text(angle=45,hjust=1,vjust=1),
axis.line = element_blank(),
legend.position = c(0.8,1),
legend.justification = c(1, 1),
legend.direction = "vertical")+
theme(legend.key.size = unit(0.5,"line"))
grob_corrplot
3.1.3 Oncoprint: Figure 1A
Now we are going to make the oncoprint that is in Figure 1A. Check out this example from the ComplexHeatmap package for a better understanding of why things are formatted the way they are.
library(pals) # great package with color palettes in R
library(ComplexHeatmap) #used for making the oncoprint
#Here we will identify whether a variant is an indel, Nonsense or Missense mutation
#Next we group samples together and pivot the matrix into wide format
<-setNames(as.list(rep(0,length(levels(final_mut_melt$Gene)))),levels(final_mut_melt$Gene))
fill_values
<-final_mut_melt%>%
mut_mat_widemutate(Variant_Class=ifelse(grepl("fs\\*|INS_|ins|ext|del",.$Mutation),"Indel",
ifelse(grepl("\\*$",.$Mutation),"Nonsense","Missense")))%>%
# mutate_at(c("Gene","Variant_Class","Sample"),as.character())%>%
group_by(Sample,Gene)%>%
summarize("Variants"=list(Variant_Class))%>%
pivot_wider(id_cols=Sample,
names_from = Gene,
values_from = Variants,
# values_fn = list(Variant_Class=list),
values_fill = list(Variant_Class="")) %>%
#ungroup(Sample,Gene)%>%
data.frame()
# At this point, each column is actually a list, and variants are represnted as a list within a list.
# So we want to unpack it a bit, and turn those variant lists in a ; separated vector
<- list()
mut_mat_wide_storage for(i in 2:ncol(mut_mat_wide)){ # start at 2 to ignore the first column of sample names
<- do.call(rbind,lapply(mut_mat_wide[,i],function(x){
mut_mat_wide_storage[[i]]#if(x==""){
# return(x)
# } else
if(is.null(x)){
return("")
else {
} paste(x,sep=";",collapse=";")
}
}
))
}
## The rest of this just turns this back into a matrix in the format suggested by ComplexHeatmap
1]] <- as.character(mut_mat_wide[,"Sample"])
mut_mat_wide_storage[[<- do.call(cbind,mut_mat_wide_storage)
final_mat colnames(final_mat) <- colnames(mut_mat_wide)
rownames(final_mat) <- final_mat[,1]
<- t(final_mat[,-1])
final_mat
#Now we set up the color schemes for the variants on each row
= c("Indel" = "darkorchid2", "Nonsense" = "black", "Missense" = "darkgreen")
variant_type_colors = list(
alter_functions background = function(x, y, w, h) {
grid.rect(x, y, w, h-unit(0.25, "mm"),
gp = gpar(fill = "#CCCCCC", col = NA))
},Indel = function(x, y, w, h) {
grid.rect(x, y, w-unit(0.5, "mm"), h-unit(0.5, "mm"),
gp = gpar(fill = variant_type_colors["Indel"], col = NA))
},Nonsense = function(x, y, w, h) {
grid.rect(x, y, w-unit(0.5, "mm"), h-unit(0.5, "mm"),
gp = gpar(fill = variant_type_colors["Nonsense"], col = NA))
},Missense = function(x, y, w, h) {
grid.rect(x, y, w-unit(0.5, "mm"), h*0.33,
gp = gpar(fill = variant_type_colors["Missense"], col = NA))
}
)
# Establish colors for each disease state and diagnosis
<- list("Dx" = setNames(okabe(n=length(unique(as.character(pheno[,"Dx"])))),
color_set c( "CH","MDS","MPN", "AML", "sAML" ,"tAML","Other") ),
"Disease.Status" = setNames(tol(n=(length(unique(as.character(pheno[,"Disease.Status"]))))),
c("Newly Diagnosed","Relapse/Refractory","Newly Transformed",
"Persistent","Chronic Stage", "Other")))
# Format the annotations at the top of the oncoprint
<- HeatmapAnnotation(cbar = anno_oncoprint_barplot(),
top_annotation df = pheno[,c("Dx","Disease.Status")],
col = color_set,
annotation_name_side = "left")
# Indicate what should be included in the legend
<- list(title = "Alternations",
heatmap_legend_param at = c("Indel", "Nonsense", "Missense"),
labels = c("Indel", "Nonsense", "Missense"))
# Make the oncoprint
oncoPrint(final_mat,
alter_fun = alter_functions,
col = variant_type_colors,
top_annotation = top_annotation,
heatmap_legend_param = heatmap_legend_param)
3.1.4 Sample clonality: Figure 1C,E Figure 2A,B
Now we focused on patient samples that were included in the clonality analysis. This the filtered set o f patients that had >100 cells, more than 1 mutation, and more than 1 clone following bootstrapping and estalbishing 95% confidence inervals > 10 cells.
<-readRDS(file="./data/final_sample_summary.rds")
final_sample_summary<-readRDS(file="./data/pheno.rds") pheno
Next we are going to build a data frame indicating the mutation status of various patients with regard to epigenetic modifiers (DNMT3a,TET2, ASXL1, IDH1/2, DTAI) and signalling genes (FLT3, vs JAK2 vs NRAS/KRAS)
library(magrittr) # for %<>%
#Tabulate presence/absence of a mutation
<-do.call(rbind,lapply(names(final_sample_summary),function(x){
mutants_in_each_sample<-colnames(final_sample_summary[[x]]$NGT)
y<- list()
z $Sample <- x
z$DNMT3A <- ifelse(any(grepl("DNMT3A",y)),1,0)
z$TET2 <- ifelse(any(grepl("TET2",y)),1,0)
z$ASXL1 <- ifelse(any(grepl("ASXL1",y)),1,0)
z$IDH <- ifelse(any(grepl("IDH",y)),1,0)
z$FLT3 <- ifelse(any(grepl("FLT3",y)),1,0)
z$KIT <- ifelse(any(grepl("KIT",y)),1,0) # n=1 sample, we put it in the "signalling category"
z$RAS <- ifelse(any(grepl("RAS",y)),1,0)
z$JAK2 <- ifelse(any(grepl("JAK2",y)),1,0)
z$PTPN11 <- ifelse(any(grepl("PTPN11",y)),1,0)
zdata.frame(t(do.call(rbind,z)))
}))
# Bin into groups based on mutations and disease type
%<>%mutate(Group=case_when(
mutants_in_each_sample==1|DNMT3A==1|IDH==1|ASXL1==1)&(RAS==0&FLT3==0)~'DTAI',
(TET2==1|DNMT3A==1|IDH==1|ASXL1==1)&((RAS==1&FLT3==0)|
(TET2==1&FLT3==0))~'DTAI-RAS',
(PTPN11==1|DNMT3A==1|IDH==1|ASXL1==1)&(RAS==0&FLT3==1)~'DTAI-FLT3',
(TET2==1|DNMT3A==1|IDH==1|ASXL1==1)&((RAS==1&FLT3==1)|
(TET2==1&FLT3==1))~'DTAI-FLT3-RAS',
(PTPN11==0&DNMT3A==0&IDH==0&ASXL1==0)&(RAS==1|FLT3==1|JAK2==1|KIT==1)~'Signaling'))%>%
(TET2left_join(pheno,by="Sample")%>%
mutate(Final_group=case_when(
grepl("AML|Other",Dx)~Group,
!grepl("AML|Other",Dx)~Dx
))
# Order the groups to match how we have them in the paper
$Final_group <- factor(mutants_in_each_sample$Final_group,
mutants_in_each_samplelevels=c("CH","MPN","Signaling","DTAI",
"DTAI-RAS","DTAI-FLT3","DTAI-FLT3-RAS"))
Next we want to calculate a few metrics for each patient sample.
library(vegan)
<-data.frame(do.call(rbind,lapply(names(final_sample_summary),function(y){
clonal_level_info<- final_sample_summary[[y]]$Clones
x data.frame("Sample"=y,
"Shannon"=vegan::diversity(x[,1],index="shannon"),
"Number_of_clones"=length(x[,1]),
"Number_of_mutations"=ncol(final_sample_summary[[y]]$NGT),
"Number_of_mutations_in_dominant_clone"=sum(as.numeric(do.call(rbind,
strsplit(as.character(x[nrow(x),2]),split="_")))),
"Dominant_clone_size"=max(x[,1]/sum(x[,1]))) #,
})))
Now we’ll merge the data frames together and plot some of the data found in Figure 1 and Figure 2
# Combine the data frame
<-mutants_in_each_sample%>%inner_join(clonal_level_info)
test
# Number of mutations
<-ggplot(test%>%group_by(Final_group)%>%
gg_number_of_mutationssummarise(mean=mean(Number_of_mutations),
sd = sd(Number_of_mutations),
sem = sd(Number_of_mutations)/
sqrt(length(Number_of_mutations))),
aes(x=Final_group,y=mean,fill=Final_group))+
geom_bar(stat="identity",color="black")+
geom_errorbar(aes(ymin = mean-sem, ymax = mean+sem),width=0.5,lwd=0.5)+
theme_classic(base_size = 8)+
ylab("Number of mutations")+xlab("")+ggtitle("")+
scale_y_continuous(limits = c(0,9), expand = c(0, 0)) +
theme(axis.text.x = element_text(angle=30,hjust=1)) +
scale_fill_brewer(type="seq",palette = "Reds",aesthetics = "fill",guide=FALSE)
# Number of clones
<-ggplot(test,aes(y=Number_of_clones,x=Final_group,fill=Final_group))+
gg_number_of_clonesgeom_boxplot(outlier.shape = NA)+
geom_jitter(width = 0.1,size=0.5)+
theme_classic(base_size = 8)+
ylab("Number of clones")+
xlab("")+
theme(axis.text.x = element_text(angle=30,hjust=1)) +
scale_fill_brewer(type="seq",palette = "Reds",aesthetics = "fill",guide=FALSE)
plot_grid(gg_number_of_mutations,gg_number_of_clones,ncol=2,align="hv",axis="ltrb",labels=c("C","E"))

Figure 3.2: Miles et al: Figure 1C,E
Compute statistics for the different group comparisons. We used a Benjamini & Hochberg FDR for multiple test correction with a significance cutoff of 0.1.
library(reshape2) #for melt, I need to come up with a better way to do this, if anyone has ideas let me know!
<-test%>%{melt(pairwise.t.test(.$Number_of_clones,g=.$Final_group,
pvalues_Number_of_clonesdata=.,p.adjust.method="fdr")$p.value)}%>%
filter(!is.na(value))%>%filter(value<0.1)
<-test%>%{melt(pairwise.t.test(.$Number_of_mutations,g=.$Final_group,
pvalues_Number_of_mutationsdata=.,p.adjust.method="fdr")$p.value)}%>%
filter(!is.na(value))%>%filter(value<0.1)
Group 1 | Group 2 | FDR |
---|---|---|
DTAI-RAS | CH | 0.0466140 |
DTAI-FLT3 | CH | 0.0001112 |
DTAI-FLT3-RAS | CH | 0.0245701 |
DTAI-RAS | MPN | 0.0899428 |
DTAI-FLT3 | MPN | 0.0001654 |
DTAI-FLT3-RAS | MPN | 0.0459698 |
DTAI-FLT3 | Signaling | 0.0157077 |
DTAI-FLT3 | DTAI | 0.0001112 |
DTAI-FLT3-RAS | DTAI | 0.0637393 |
DTAI-FLT3 | DTAI-RAS | 0.0105738 |
Group 1 | Group 2 | FDR |
---|---|---|
Signaling | CH | 0.0171093 |
DTAI-RAS | CH | 0.0763530 |
DTAI-FLT3 | CH | 0.0000064 |
DTAI-FLT3-RAS | CH | 0.0040371 |
Signaling | MPN | 0.0729728 |
DTAI-FLT3 | MPN | 0.0000448 |
DTAI-FLT3-RAS | MPN | 0.0171093 |
DTAI | Signaling | 0.0017335 |
DTAI-FLT3 | Signaling | 0.0249373 |
DTAI-RAS | DTAI | 0.0058893 |
DTAI-FLT3 | DTAI | 0.0000000 |
DTAI-FLT3-RAS | DTAI | 0.0003139 |
DTAI-FLT3 | DTAI-RAS | 0.0000812 |
DTAI-FLT3-RAS | DTAI-RAS | 0.0658341 |
Now for Figure 2:
# Shannon diversity index
<-ggplot(test,aes(y=Shannon,x=Final_group,fill=Final_group))+
gg_shannongeom_boxplot(outlier.shape = NA)+
geom_jitter(width = 0.1,size=0.5)+
theme_classic(base_size = 8)+
ylab("Shannon diveristy index")+
xlab("")+
theme(axis.text.x = element_text(angle=30,hjust=1)) +
scale_fill_brewer(type="seq",palette = "Reds",aesthetics = "fill",guide=FALSE)
# Number of mutations in each cohort
<-ggplot(test%>%group_by(Final_group)%>%
gg_Number_of_mutations_in_Dclonesummarise(mean=mean(Number_of_mutations_in_dominant_clone),
sd = sd(Number_of_mutations_in_dominant_clone),
sem = sd(Number_of_mutations_in_dominant_clone)/
sqrt(length(Number_of_mutations_in_dominant_clone))),
aes(x=Final_group,y=mean,fill=Final_group))+
geom_bar(stat="identity",color="black")+
geom_errorbar(aes(ymin = mean-sem, ymax = mean+sem),width=0.5,lwd=0.5)+
theme_classic(base_size = 8)+
ylab("Number of mutations \n in dominant clone")+xlab("")+ggtitle("")+
scale_y_continuous(limits = c(0,4.5), expand = c(0, 0)) +
theme(axis.text.x = element_text(angle=30,hjust=1)) +
scale_fill_brewer(type="seq",palette = "Reds",
aesthetics = "fill",guide=FALSE)
plot_grid(gg_shannon,gg_Number_of_mutations_in_Dclone,ncol=2,align="hv",axis="ltrb",labels=c("A","B"))

Figure 3.3: Miles et al: Figure 2A-B
<-test%>%{melt(pairwise.t.test(.$Shannon,g=.$Final_group,
pvalues_Shannondata=.,p.adjust.method="fdr")$p.value)}%>%
filter(!is.na(value))%>%filter(value<0.1)
<-test%>%{melt(pairwise.t.test(
pvalues_Number_of_mutations_in_dominant_clone$Number_of_mutations_in_dominant_clone,
.g=.$Final_group,
data=.,p.adjust.method="fdr")$p.value)}%>%
filter(!is.na(value))%>%filter(value<0.1)
Group 1 | Group 2 | FDR |
---|---|---|
Signaling | CH | 0.0675230 |
DTAI-RAS | CH | 0.0048421 |
DTAI-FLT3 | CH | 0.0000925 |
DTAI-FLT3-RAS | CH | 0.0082978 |
DTAI-RAS | MPN | 0.0327734 |
DTAI-FLT3 | MPN | 0.0006562 |
DTAI-FLT3-RAS | MPN | 0.0342246 |
DTAI-FLT3 | Signaling | 0.0266874 |
DTAI-RAS | DTAI | 0.0153873 |
DTAI-FLT3 | DTAI | 0.0000925 |
DTAI-FLT3-RAS | DTAI | 0.0327734 |
DTAI-FLT3 | DTAI-RAS | 0.0504716 |
Group 1 | Group 2 | FDR |
---|---|---|
MPN | CH | 0.0068538 |
DTAI | CH | 0.0003515 |
DTAI-RAS | CH | 0.0003515 |
DTAI-FLT3 | CH | 0.0000133 |
DTAI-FLT3-RAS | CH | 0.0008256 |
Signaling | MPN | 0.0620536 |
DTAI-FLT3 | MPN | 0.0620536 |
DTAI | Signaling | 0.0084549 |
DTAI-RAS | Signaling | 0.0084549 |
DTAI-FLT3 | Signaling | 0.0003515 |
DTAI-FLT3-RAS | Signaling | 0.0087669 |
DTAI-FLT3 | DTAI | 0.0602450 |
DTAI-FLT3 | DTAI-RAS | 0.0703147 |
A few interesting points in Extended Figure 3
# Dominant clone size
<-ggplot(test,
gg_dominant_clone_sizeaes(y=Dominant_clone_size,x=Final_group,fill=Final_group))+
geom_boxplot(outlier.shape = NA)+
geom_jitter(width = 0.1,size=0.5)+
theme_classic(base_size = 8)+
ylab("Fraction of sample \n in dominant clone")+
xlab("")+
theme(axis.text.x = element_text(angle=30,hjust=1)) +
scale_fill_brewer(type="seq",palette = "Reds",aesthetics = "fill",guide=FALSE)
# determine the number of mutants alleles in each clone
<- do.call(rbind,lapply(final_sample_summary,function(x){
clone_size_by_genetic_density<-x$Clones%>%filter(Clone%in% x$Clones[,"Clone"] )
possible_clones_subset <-possible_clones_subset[,"Clone"]
clones<-x$NGT[!duplicated(x$NGT)&x$NGT[,"Clone"]%in%clones,]
dedup<-full_join(possible_clones_subset[,1:2],dedup)
set_mat<-set_mat[,"Count"]
counts <-set_mat[,"Count"]/sum(set_mat[,"Count"])
weights<- rowSums(set_mat[,-c(1:2)])
genetic_complexity return(data.frame("Clone_size"=weights,
"Genetic_density"=genetic_complexity))
}))
<-ggplot(clone_size_by_genetic_density,
gg_clone_size_by_genetic_densityaes(y=Clone_size,x=factor(Genetic_density),
fill=factor(Genetic_density)))+
geom_jitter(width = 0.1,size=0.5)+
geom_boxplot(outlier.shape = NA)+
theme_bw(base_size = 8)+
ylab("Fraction of sample in clone")+
xlab("Number of mutant alleles")+
scale_fill_brewer(type="seq",palette = "Greens",
aesthetics = "fill",guide=FALSE)
plot_grid(gg_dominant_clone_size,gg_clone_size_by_genetic_density,align="hv",axis="tb",ncol=2,labels=c("A","B"))

Figure 3.4: Miles et al: Extended Figure 3A-B
<-test%>%{melt(pairwise.t.test(.$Dominant_clone_size,g=.$Final_group,
pvalues_Dominant_clone_sizedata=.,p.adjust.method="fdr")$p.value)}%>%
filter(!is.na(value))%>%filter(value<0.1)
Group 1 | Group 2 | FDR |
---|---|---|
DTAI-RAS | CH | 0.0356791 |
DTAI-FLT3 | CH | 0.0123253 |
DTAI-RAS | MPN | 0.0749752 |
DTAI-FLT3 | MPN | 0.0168044 |
DTAI-RAS | DTAI | 0.0168044 |
DTAI-FLT3 | DTAI | 0.0081647 |
3.1.5 Clonograph: Figure 1D
This is probably our favorite way of looking at the data.
library(RColorBrewer)
<-readRDS(file="./data/final_sample_summary.rds")
final_sample_summary
<-"MSK8"
sample <-final_sample_summary
sample_list
# Extract out the sample of interest
<-sample_list[[sample]]$Clones
clonal_abundance <-sample_list[[sample]]$Architecture
clonal_architecture
# Ensure the order of the clone abundance and clone architecture are the same.
$Clone <- factor(clonal_architecture$Clone, levels=rev(clonal_abundance$Clone))
clonal_architecture$Clone <- factor(clonal_abundance$Clone, levels=levels(clonal_architecture$Clone))
clonal_abundance
# Generate clonal abundance barplot
<- ggplot(data=clonal_abundance, aes(x=Clone, y=Count,fill=Count)) +
gg_clonal_barplot geom_col()+
theme_classic(base_size=7)+
scale_y_continuous(expand=c(0.01,0))+
#ylim() +
ylab("Cell Count")+
geom_errorbar(aes(ymin = LCI, ymax = UCI), width = 0.2)+
scale_fill_distiller(name = "Value", palette = "Reds", direction = 1) +
theme(axis.title.x = element_blank(),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.line.x =element_blank(),
legend.position = "none",
plot.margin=unit(c(0,0,0,0),"cm"))
# Generate mutation heatmap
<- ggplot(data=clonal_architecture,
gg_heatmap aes(x=Clone, y=Mutant, fill=Genotype))+
geom_tile() +
scale_fill_manual(values=c("WT"=brewer.pal(7,"Reds")[1],
"Heterozygous"=brewer.pal(7,"Reds")[3],
"Homozygous"=brewer.pal(7,"Reds")[6],
"Unknown"="grey50"),name="Genotype")+
theme_classic(base_size=7) +
ylab("Mutation")+
scale_y_discrete(limits = rev(levels(clonal_architecture$Mutant)))+
theme(legend.position = "right", legend.direction = "vertical",
axis.text.x = element_blank(),
axis.line=element_blank(),
axis.title.x=element_blank(),
axis.ticks.x = element_blank(),
plot.margin=unit(c(0,0,0,0),"cm"))
# Put it all together
plot_grid(gg_clonal_barplot,gg_heatmap,ncol=1,align="v",axis="lr",rel_heights = c(1,0.75))

Figure 3.5: Miles et al. Figure 1D