added WBC analysis markdown file

cbg-ethz · Jan 17, 2025 · 828e4d0 · 828e4d0
1 parent 23d1bd4
commit 828e4d0
Show file tree

Hide file tree

Showing 3 changed files with 102 additions and 5 deletions.
diff --git a/experiments/assessing_cluster_clonality/sandbox/WBC_analysis.R b/experiments/assessing_cluster_clonality/sandbox/WBC_analysis.R
@@ -27,17 +27,14 @@ summary(fit2)
 fit3 <- glm(n_wbcs ~ n_cells + `Sample Name` + Oligoclonal, data = filtered_data, family = poisson(link = "log"))
 summary(fit3)
 
-fit4 <- glm(n_wbcs ~ n_cells + `Sample Name` + Oligoclonal, data = filtered_data, family = poisson(link = "log"))
-summary(fit4)
-### Not significant
 
 
 fit6 <- glm(high_impact_mutations ~ n_cells + `Sample Name` + Oligoclonal + WBC, data = filtered_data, family = poisson(link = "log"))
 summary(fit6)
 ### Not significant
 
 
-fit7 <- glm(as.factor(Oligoclonal) ~ n_cells + `Sample Name` + impact_mutations + Oligoclonal + WBC, data = filtered_data, family = binomial(link = "logit"))
+fit7 <- glm(as.factor(Oligoclonal) ~ n_cells + `Sample Name` + impact_mutations + WBC, data = filtered_data, family = binomial(link = "logit"))
 summary(fit7)
 
 

diff --git a/experiments/assessing_cluster_clonality/sandbox/WBC_analysis.Rmd b/experiments/assessing_cluster_clonality/sandbox/WBC_analysis.Rmd
@@ -0,0 +1,100 @@
+---
+title: "WBC_analysis"
+author: "Johannes Gawron"
+date: "2025-01-17"
+output: html_document
+---
+
+```{r setup, include=FALSE}
+knitr::opts_chunk$set(echo = TRUE)
+library(tidyverse)
+```
+
+## Role of WBCs in oligoclonality and genetic diversity
+Here, we study the role of white blood cells in the formation for the genetic
+diversity respectively the oligoclonality of CTC clusters.
+
+# Loading data
+
+```{r loading, include=FALSE}
+data <- read_tsv("~/work/ctc-data/WES_experiment/splitting_summaries/splittingSummary_full_with_sample_names_annotated.tsv")
+
+data <- data %>% mutate(WBC = n_wbcs > 0)
+
+data <- data %>% mutate(impact_mutations = high_impact_mutations + medium_impact_mutations)
+```
+Here is an overview of the data:
+
+```{r data overview, include=TRUE}
+print(data)
+
+```
+# Preprocessing
+
+The data from Br26 will be excluded manually, as this is in fact not a breast
+cancer sample but an ovarian cancer sample (which we don't study here).
+
+```{r filtering, include=TRUE}
+data <- data %>% dplyr::filter(`Sample Name` != "Br26")
+filtered_data <- data %>%
+  filter(str_detect(`Sample Name`, "Br|Pr|LM2"))
+```
+
+# Data analysis
+To ensure that our analysis is not confounded, we control for the number of
+cells in the CTC clusters and the sample identity throughout the analyses.
+
+## Predicting oligoclonality from the presence of white blood cells
+
+```{r fit1, include=TRUE}
+fit <-
+  glm(
+    as.factor(Oligoclonal) ~ n_cells + `Sample Name` + WBC,
+    data = filtered_data,
+    family = binomial(link = "logit")
+    )
+summary(fit)
+```
+There is no significant effect of the presence of white blood cells on the
+oligoclonality status of the CTC clusters was found.
+
+## Effect of the number of white blood cells on Oligoclonality
+
+```{r fit2, include=TRUE}
+fit <-
+  glm(
+    as.factor(Oligoclonal) ~ n_cells + `Sample Name` + n_wbcs, 
+    data = filtered_data,
+    family = binomial(link = "logit")
+    )
+summary(fit)
+```
+There is no significant effect of the number of white blood cells on the
+oligoclonality status of the CTC clusters wasfound.
+
+
+## Predicting the presence of mutations with high functional impact
+```{r fit}
+fit <-
+  glm(
+    as.factor(Oligoclonal) ~ high_impact_mutations + n_cells + `Sample Name` + WBC,
+    data = filtered_data,
+    family = binomial(link = "logit")
+    )
+summary(fit)
+```
+No significant effect was found for predicting the number of mutations with a
+high functional impact as a function of the presence of white blood cells and of
+the clonality status.
+
+
+## Predicting the presence of mutations with functional impact
+
+```{r fit4}
+fit7 <-
+  glm(
+    as.factor(Oligoclonal) ~ n_cells + `Sample Name` + high_impact_mutations + WBC,
+    data = filtered_data,
+    family = binomial(link = "logit"))
+summary(fit7)
+```
diff --git a/experiments/assessing_cluster_clonality/sandbox/mutation_annotation_summary.R b/experiments/assessing_cluster_clonality/sandbox/mutation_annotation_summary.R
@@ -52,4 +52,4 @@ for(idx in seq_len(nrow(splitting_summary))) {
     called_variants_annotated %>% dplyr::filter(relevant == "HIGH") %>% nrow()
 }
 
-write_delim(splitting_summary, file = "~/work/ctc-data/WES_experiment/splitting_summaries/splittingSummmary_full_with_sample_names_annotated.tsv", delim = "\t")
+write_delim(splitting_summary, file = "~/work/ctc-data/WES_experiment/splitting_summaries/splittingSummary_full_with_sample_names_annotated.tsv", delim = "\t")