Skip to content

Commit

Permalink
Merge pull request #9 from JohannesGawron/main
Browse files Browse the repository at this point in the history
Adding WBC analyses and more simulations
  • Loading branch information
JohannesGawron authored Jan 16, 2025
2 parents af6ece3 + 127bd63 commit 1d7f2c5
Show file tree
Hide file tree
Showing 2 changed files with 218 additions and 0 deletions.
76 changes: 76 additions & 0 deletions experiments/assessing_cluster_clonality/sandbox/WBC_analysis.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
library(tidyverse)
data <- read_tsv("~/Downloads/splittingSummary_full_final.tsv")
data <- data %>% mutate(WBC = n_wbcs > 0)

data <- data %>% mutate(impact_mutations = high_impact_mutations + medium_impact_mutations)


View(data)
filtered_data <- data %>%
filter(str_detect(`Sample Name`, "Br|Pr|LM2"))


fit <- glm(as.factor(Oligoclonal) ~ n_cells + `Sample Name` + WBC, data = data, family = binomial(link = "logit"))
summary(fit)
#### Not signficant


fit2 <- glm(as.factor(Oligoclonal) ~ n_cells + `Sample Name` + WBC, data = filtered_data, family = binomial(link = "logit"))
summary(fit2)
#### Not signficant


fit3 <- glm(n_wbcs ~ n_cells + `Sample Name` + Oligoclonal, data = filtered_data, family = poisson(link = "log"))
summary(fit3)

fit4 <- glm(n_wbcs ~ n_cells + `Sample Name` + Oligoclonal, data = filtered_data, family = poisson(link = "log"))
summary(fit4)
### Not significant


fit6 <- glm(high_impact_mutations ~ n_cells + `Sample Name` + Oligoclonal + WBC, data = filtered_data, family = poisson(link = "log"))
summary(fit6)
### Not significant


fit7 <- glm(as.factor(Oligoclonal) ~ n_cells + `Sample Name` + impact_mutations + Oligoclonal + WBC, data = filtered_data, family = binomial(link = "logit"))
summary(fit7)


fit5 <- glm(impact_mutations ~ n_cells + `Sample Name` + Oligoclonal + WBC, data = filtered_data, family = poisson(link = "log"))
summary(fit5)
### significant effect of WBC presence on impact mutations

filtered_data %>%
ggplot(aes(x = WBC, y = impact_mutations, group = WBC)) +
geom_boxplot() +
theme(
axis.text.x = element_text(angle = 45, hjust = 1, size = 18),
axis.title.x = element_text(size = 20),
axis.title.y = element_text(size = 20),
axis.text.y = element_text(size = 18)
) +
ylab("# Mutations with functional impact")



fit8 <- glm(impact_mutations ~ n_cells + `Sample Name` + Oligoclonal + n_wbcs, data = filtered_data, family = poisson(link = "log"))
summary(fit8)


filtered_data %>%
ggplot(aes(x = n_wbcs, y = impact_mutations, group = n_wbcs)) +
geom_boxplot() +
theme(
axis.text.x = element_text(angle = 45, hjust = 1, size = 18),
axis.title.x = element_text(size = 20),
axis.title.y = element_text(size = 20),
axis.text.y = element_text(size = 18)
) +
ylab("# Mutations with functional impact")



fit9 <- glm(n_cells ~ `Sample Name` + Oligoclonal, data = filtered_data, family = poisson(link = "log"))
summary(fit9)
### Not significant
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
input_folder <- "/Users/jgawron/Documents/projects/CTC_backup/input_folder"

simulation_input_folder <- "/Users/jgawron/Documents/projects/CTC_backup/simulations/simulations2"
tree_name <- "Br16_AC"
n_sampling_events <- 100

source("functions.R")


tree_names <- c("Br11", "Br16_AC", "Br16_B", "Br16_C", "Br23", "Br26", "Br30", "Br37", "Br38", "Br39", "Br44", "Br45", "Br46", "Br53", "Br57", "Br61", "Br7", "Brx50", "LM2", "Lu2", "Lu7", "Ov8", "Pr6", "Pr9")


mean_splitting_scores_mono <- vector()
for (tree_name in tree_names) {
# input <- load_data(input_folder, tree_name)

# description_data <-
# read_delim(
# file.path(
# input$directory,
### input$sampleName,
# paste0(input$sampleName, "_samples_nodeDescription.tsv")
# ),
# delim = "\t",
### col_names = FALSE,
# quote = "none"
# )
# colnames(description_data) <-
# c("sample_name", "total_number_cells", "tumor_cells", "WBCs", "description")


folders <- list.dirs(path = simulation_input_folder, recursive = FALSE)
matching_folders <- folders[grepl(tree_name, basename(folders))]

for (idx1 in 1:length(matching_folders)) {
simulation_instance <- basename(matching_folders[idx1])
input_simulated <- load_data(simulation_input_folder, simulation_instance)
sample_description_simulated <- input_simulated$sample_description
for (color in c(
"orchid", "orchid1", "orchid2",
"orchid3", "orchid4", "darkorchid",
"darkorchid1", "darkorchid2", "darkorchid3",
"darkorchid4", "purple", "purple1",
"purple2", "purple3", "purple4"
)) {
distance_simulated <-
computeClusterSplits(
input_simulated$sample_description, input_simulated$postSampling,
simulation_instance, input_simulated$nCells, input_simulated$nMutations,
input_simulated$nClusters, input_simulated$alleleCount,
input_simulated$mutatedReadCounts, input_simulated$totalReadCounts,
nMutationSamplingEvents = n_sampling_events,
nTreeSamplingEvents = n_sampling_events,
cellPairSelection = c(color)
)
plot(
ggplot(
data.frame(x = distance_simulated$aggregatedBranchingProbabilities), aes(x = x)
) +
geom_histogram(binwidth = 0.01)
)

mean_splitting_scores_mono <- c(mean_splitting_scores_mono, mean(distance_simulated$aggregatedBranchingProbabilities))
}
}
}



involved_cell_indices <- sub(paste0(".*", tree_name, "_"), "", simulation_instance) %>%
strsplit("_") %>%
unlist()
involved_cell_indices <- as.numeric(involved_cell_indices)

involved_single_tumor_cells <- description_data$sample_name[involved_cell_indices]

pairs <- combn(involved_single_tumor_cells, 2, simplify = FALSE)

distance_separate <-
computeClusterSplits(input$sample_description, input$postSampling,
treeName, input$nCells, input$nMutations,
input$nClusters, input$alleleCount,
input$mutatedReadCounts, input$totalReadCounts,
nMutationSamplingEvents = n_sampling_events,
nTreeSamplingEvents = n_sampling_events,
cellPairSelection = pairs
)

plot(
ggplot(
data.frame(x = distance_separate$aggregatedBranchingProbabilities), aes(x = x)
) +
geom_histogram(binwidth = 0.01)
)
mean(distance_separate$aggregatedBranchingProbabilities)




#########

mean_splitting_scores_mono <- mean_splitting_scores_mono[!is.na(mean_splitting_scores_mono)]

data.frame(splitting_probs = mean_splitting_scores_mono, Monoclonal = TRUE) %>%
ggplot(aes(y = splitting_probs)) +
geom_boxplot() +
theme_minimal()

load("~/Documents/projects/CTC_backup/simulations/simulation3/mean_branching_probs_oligo.RData")
load("~/Documents/projects/CTC_backup/simulations/simulation3/mean_branching_probs_mono.RData")
load("~/Documents/projects/CTC_backup/simulations/simulation3/deviance_splitting_score.RData")

splitting_probs <- data.frame(splitting_probs = mean_splitting_scores_mono, Oligoclonal = "Monoclonal")
splitting_probs2 <- data.frame(splitting_probs = mean_branching_probs_oligo, Oligoclonal = "Oligoclonal")
splitting_probs_single_cells <- data.frame(splitting_probs = -deviance_splitting_score + mean_branching_probs_oligo, Oligoclonal = "Genetically distinct single cells")

splitting_probs <- rbind(splitting_probs, splitting_probs2, splitting_probs_single_cells)
splitting_probs <- splitting_probs %>% mutate(Oligoclonal = factor(Oligoclonal, levels = c("Monoclonal", "Oligoclonal", "Genetically distinct single cells")))

splitting_probs %>%
ggplot(aes(y = splitting_probs, x = Oligoclonal, group = Oligoclonal)) +
geom_boxplot() +
ylab("Mean splitting probability") +
xlab("Clonality status of CTC cluster") +
theme_minimal()
theme(
axis.text.x = element_text(angle = 45, hjust = 1, size = 18),
axis.title.x = element_text(size = 20),
axis.title.y = element_text(size = 20),
axis.text.y = element_text(size = 18)
)

data.frame(y = deviance_splitting_score) %>%
ggplot(aes(y = y)) +
geom_boxplot() +
theme_minimal()


data.frame(y = -deviance_splitting_score + mean_branching_probs_oligo) %>%
ggplot(aes(y = y)) +
geom_boxplot() +
theme_minimal()

0 comments on commit 1d7f2c5

Please sign in to comment.