2.5 Assessing clonal abundance
This will be the last code example that proceeds with our limited sample set. After this, the tutorial will procede with the data used in the manuscript. This analysis will focus only on samples that have greater than one mutation.
The general workflow is as follows * Select samples that have at least 2 mutaions * Clone assigment * Tally clonal abundance * Establish a clonal abundance confidence itnerval
# Select samples with at least 2 mutations
<- names(final_NGTs)[do.call(rbind,lapply(final_NGTs,dim))[,2]>2]
clonal_sample_set
# Order columns based on computed_VAF, and assign a clone to each cell
<-lapply(final_NGTs[clonal_sample_set],function(y){
NGT_to_clone<-names(sort(colSums(y[,-1]),decreasing=TRUE))
bulk_VAF_order c("Cell",bulk_VAF_order)] %>%unite("Clone",all_of(`bulk_VAF_order`),sep="_", remove = FALSE)
y[,
})
# Tally clones
<- lapply(NGT_to_clone,function(x){
clonal_abundance%>%count(Clone,name="Count")%>%arrange(Count)
x
})
# Setup a resampling function to generate multiple clonal abundance tallies
<-function(data){
resample_fun<- data[sample(x=1:nrow(data),replace=TRUE),]
x return(as.matrix(x%>%count(Clone,name="Count")%>%arrange(Count)))
}
<- 100 # we did 10,000. Keeping it low here for run time.
replicates <- 10 # minimum number of cells in order to retain a clone
clone_cutoff <- lapply(names(NGT_to_clone),function(sample_to_test){
clonal_abundance_boot_CI <-replicate(n=replicates,resample_fun(NGT_to_clone[[sample_to_test]]),simplify = "array")
testif(class(test)=="list"){
<- setNames(lapply(test,data.frame),1:replicates) %>%
y imap(.x = ., ~ set_names(.x, c("Clone", .y))) %>%
::reduce(full_join, by = "Clone")%>%
purrrmutate_if(names(.)!="Clone",as.numeric)%>%
mutate_each(funs(replace(., is.na(.), 0)))
}if(class(test)=="array"){
<- setNames(apply(test,3,data.frame),1:replicates) %>%
y imap(.x = ., ~ set_names(.x, c("Clone", .y))) %>%
::reduce(full_join, by = "Clone")%>%
purrrmutate_if(names(.)!="Clone",as.numeric)%>%
mutate_each(funs(replace(., is.na(.), 0)))
}<- data.frame(t(apply(y%>%select(-Clone),1,function(p){
z quantile(p,probs=c(0.025,0.975))
"Clone"=y$Clone)
})),<- setNames(data.frame(inner_join(data.frame(clonal_abundance[[sample_to_test]]),z,by="Clone")),
set c("Clone","Count","LCI","UCI"))%>%filter(LCI>=clone_cutoff)
})names(clonal_abundance_boot_CI) <-names(clonal_abundance)
Now that we have a set of clones that we believe reproducibily have at least 10 cells, we remove cells and variants that are no longer represented at sufficient coverage.
<- setNames(lapply(names(clonal_abundance_boot_CI),function(sample_to_test){
clone_filtered_NGTs
# Determine if there are any clones left to process
if(nrow(clonal_abundance_boot_CI[[sample_to_test]])==0) {
return("No clones after boostrapping")
}
# Determine if there are any mutations that are no longer found in a stable clone
<-as.matrix(do.call(rbind,
clone_matrixstrsplit(clonal_abundance_boot_CI[[sample_to_test]][,"Clone"],split="_")))
mode(clone_matrix) <- "numeric"
colnames(clone_matrix) <-colnames(NGT_to_clone[[sample_to_test]])[-c(1,2)]
<-names(which(colSums(clone_matrix)==0))
variants_to_remove
# Check other conditions of interest that might remove sample from further processing
if(nrow(clone_matrix)==1) {
return("Only 1 clone left")
else if(length(setdiff(colnames(clone_matrix),c(variants_to_remove)))<=1){
}return("Removed all but 1 variant")
else {
}# Select only clones that survive the bootstrapping, and remove variants that fall out
<- NGT_to_clone[[sample_to_test]]%>%
NGT_to_clone_subset filter(Clone%in%clonal_abundance_boot_CI[[sample_to_test]]$Clone)%>%
select(!all_of(variants_to_remove))
# Create a key for the new and old clone names after removing variants that are no longer present
<- data.frame("New"=apply(data.frame(clone_matrix)%>%select(!all_of(variants_to_remove)),MARGIN=1,
clone_key function(x){ paste(x,sep="_",collapse="_")}),
"Old"=apply(data.frame(clone_matrix),MARGIN=1,
function(x){ paste(x,sep="_",collapse="_")}))
# If there are any variants to remove and clones that need to be renamed
if(any(clone_key$New!=clone_key$Old)){
$Clone <- sapply(NGT_to_clone_subset$Clone,function(x){
NGT_to_clone_subset$New[match(x,clone_key$Old)]})
clone_key
}return(NGT_to_clone_subset)
}names(clonal_abundance_boot_CI)) }),
One last step I find useful is to explicitly state the genotype of each mutation in each clone. This si useful for the clonotype plots we’ll make later.
<- setNames(lapply(names(clonal_abundance_boot_CI),function(test_sample){
clonal_architecture
<-clone_filtered_NGTs[[test_sample]]%>%
clonal_architecture::select(!Cell)%>%
dplyrdistinct()%>%
pivot_longer(cols=!Clone,
names_to="Mutant",
values_to="Genotype") %>%
mutate(Genotype=ifelse(Genotype==3,NA,
ifelse(Genotype==0,"WT",
ifelse(Genotype==1,"Heterozygous", ifelse(Genotype==2,"Homozygous",NA)))))
names(clonal_abundance_boot_CI)) }),
Lastly, we are going to package everything together into a list format for easy access later.
<-setNames(lapply(names(clonal_architecture),function(sample){
final_sample_summaryreturn(list("Clones"=clonal_abundance_boot_CI[[sample]],
"NGT"=clone_filtered_NGTs[[sample]],
"Architecture"=clonal_architecture[[sample]]))
names(clonal_abundance_boot_CI))
}),
saveRDS(final_sample_summary,file="./analysis/final_sample_summary.rds")