Slim Fourati 2024-01-29
suppressPackageStartupMessages(library(package = "knitr"))
suppressPackageStartupMessages(library(package = "readxl"))
suppressPackageStartupMessages(library(package = "zoo"))
suppressPackageStartupMessages(library(package = "ggbeeswarm"))
suppressPackageStartupMessages(library(package = "caTools"))
suppressPackageStartupMessages(library(package = "EDASeq"))
suppressPackageStartupMessages(library(package = "pvca")) # dleelab/pvca
suppressPackageStartupMessages(library(package = "lme4"))
suppressPackageStartupMessages(library(package = "parallelDist"))
suppressPackageStartupMessages(library(package = "parallel"))
suppressPackageStartupMessages(library(package = "edgeR"))
suppressPackageStartupMessages(library(package = "ggrepel"))
suppressPackageStartupMessages(library(package = "ComplexHeatmap"))
suppressPackageStartupMessages(library(package = "tidyverse"))
opts_chunk$set(echo = TRUE, fig.path = "../figure/")
options(readr.show_col_types = FALSE,
dplyr.summarise.inform = FALSE)
workDir <- dirname(getwd())
Reading raw counts
countDF <- read_csv(file = file.path(workDir, "input/joana.genecounts.csv")) %>%
as.data.frame() %>%
column_to_rownames(var = "gene_id")
message("First five rows/columns of count matrix")
## First five rows/columns of count matrix
print(countDF[1:5, 1:5])
## DGAM_wk8_Bcells DGAM_wk8_cDCs DGAM_wk8_monocytes
## ENSMMUG00000000001 305 0 6
## ENSMMUG00000000002 32 0 0
## ENSMMUG00000000005 754 169 399
## ENSMMUG00000000006 0 0 0
## ENSMMUG00000000007 95 0 51
## DGAM_wk8_NKcells DGAM_wk8_pDCs
## ENSMMUG00000000001 33 18
## ENSMMUG00000000002 0 0
## ENSMMUG00000000005 46 927
## ENSMMUG00000000006 2 0
## ENSMMUG00000000007 59 181
message("Dimensions of the count matrix")
## Dimensions of the count matrix
print(dim(countDF))
## [1] 34847 200
Read sample annotation
sampleAnnotFile <- file.path(workDir,
"input",
"Joana_Dias_bulk_RNAf_Sample_tracking_sheet_03082020.xlsx")
# read each plates
sheetLS <- excel_sheets(path = sampleAnnotFile)
sampleAnnotDF <- lapply(sheetLS, function(sheetName) {
sampleAnnotTemp <- read_excel(path = sampleAnnotFile, sheet = sheetName) %>%
mutate(Plate = sheetName)
return(value = sampleAnnotTemp)
}) %>%
do.call(what = rbind)
# append sample id
sampleAnnotDF <- sampleAnnotDF %>%
mutate(rowname = paste(`Monkey ID`,
Timepoint,
`Cell population sorted`,
sep = "_"),
rowname = gsub(pattern = " ", replacement = "", rowname)) %>%
setNames(nm = make.names(names = names(.))) %>%
as.data.frame() %>%
column_to_rownames(var = "rowname")
sampleAnnotDF <- sampleAnnotDF[colnames(countDF), ]
# TotalReads counted
sampleAnnotDF$TotalReads <- colSums(countDF)
message("print header of sample annotation")
## print header of sample annotation
print(sampleAnnotDF[1:5, 1:5])
## Collaborator Sample.
## DGAM_wk8_Bcells Joana Dias/Koup Lab 7
## DGAM_wk8_cDCs Joana Dias/Koup Lab 23
## DGAM_wk8_monocytes Joana Dias/Koup Lab 8
## DGAM_wk8_NKcells Joana Dias/Koup Lab 25
## DGAM_wk8_pDCs Joana Dias/Koup Lab 24
## Sample.Type.Subset Study.. Monkey.ID
## DGAM_wk8_Bcells total RNA from sorted cells in RNAzol 730.2 DGAM
## DGAM_wk8_cDCs total RNA from sorted cells in RNAzol 730.2 DGAM
## DGAM_wk8_monocytes total RNA from sorted cells in RNAzol 730.2 DGAM
## DGAM_wk8_NKcells total RNA from sorted cells in RNAzol 730.2 DGAM
## DGAM_wk8_pDCs total RNA from sorted cells in RNAzol 730.2 DGAM
message("Number of animal w/ RNA-seq data")
## Number of animal w/ RNA-seq data
sampleAnnotDF %>%
.$Monkey.ID %>%
unique() %>%
length() %>%
print()
## [1] 17
message("Number of timepoints and subsets w/ RNA-seq data")
## Number of timepoints and subsets w/ RNA-seq data
sampleAnnotDF %>%
select(Monkey.ID, Timepoint, Cell.population.sorted) %>%
distinct() %>%
group_by(Timepoint, Cell.population.sorted) %>%
summarize(n = n()) %>%
ungroup() %>%
mutate(Timepoint = factor(Timepoint, levels = c("pre", "D14", "wk8"))) %>%
pivot_wider(names_from = Cell.population.sorted, values_from = n) %>%
arrange(Timepoint) %>%
print()
## # A tibble: 3 × 6
## Timepoint `B cells` `NK cells` cDCs monocytes pDCs
## <fct> <int> <int> <int> <int> <int>
## 1 pre 14 14 14 14 14
## 2 D14 10 10 10 10 10
## 3 wk8 16 16 16 16 16
Transcriptomic profiles of 17 animals for 5 subsets at 3 timepoints was performed
Read viral load
vlFile <- file.path(workDir,
"input/Raw_data_for_Slim_JD20200414.xlsx")
vlDF <- read_excel(path = vlFile,
skip = 2,
col_names = paste0("X", 1:8)) %>%
mutate(Treatment = factor(X1,
levels = c("Untreated", "WT bNAb Tx", "DEL bNAb Tx")),
Treatment = na.locf(Treatment)) %>%
filter(X1 != Treatment & !is.na(X1))
by(vlDF, INDICES = vlDF$Treatment, FUN = function(x) {
cNames <- as.vector(unlist(x[1, ]))
cNames[length(cNames)] <- "Treatment"
x <- x[-1, ]
colnames(x) <- cNames
x <- x %>%
pivot_longer(cols = cNames[3:8],
names_to = "Animal ID",
values_to = "Plasma viral loads (copies/mL)")
return(value = x)
}) %>%
do.call(what = rbind) -> vlDF
plotDF <- vlDF %>%
select(-`day post-challenge`) %>%
mutate(`week post-challenge` = as.numeric(`week post-challenge`),
`Plasma viral loads (copies/mL)` =
gsub(pattern = "below |Below ",
replacement = "",
`Plasma viral loads (copies/mL)`),
`Plasma viral loads (copies/mL)` =
as.numeric(`Plasma viral loads (copies/mL)`))
ggplot(data = plotDF,
mapping = aes(x = `week post-challenge`,
y = `Plasma viral loads (copies/mL)`)) +
geom_line(mapping = aes(group = `Animal ID`, color = Treatment), linewidth = 1.1) +
scale_y_log10() +
scale_color_manual(values = c("Untreated" = "grey",
"WT bNAb Tx" = "orange",
"DEL bNAb Tx" = "green")) +
theme_bw()
maxDF <- plotDF %>%
group_by(Treatment, `Animal ID`) %>%
summarize(max.vl = max(`Plasma viral loads (copies/mL)`))
ggplot(data = maxDF,
mapping = aes(x = Treatment, y = max.vl)) +
geom_boxplot(fill = "transparent") +
geom_beeswarm(mapping = aes(color = Treatment), cex = 3, size = 2) +
scale_y_log10() +
labs(y = "Max plasma viral loads (copies/mL)") +
scale_color_manual(values = c("Untreated" = "grey",
"WT bNAb Tx" = "orange",
"DEL bNAb Tx" = "green")) +
theme_bw()
kruskal.test(x = maxDF$max.vl, g = maxDF$Treatment)
##
## Kruskal-Wallis rank sum test
##
## data: maxDF$max.vl and maxDF$Treatment
## Kruskal-Wallis chi-squared = 1.0743, df = 2, p-value = 0.5844
pairwise.wilcox.test(x = maxDF$max.vl, g = maxDF$Treatment, p.adjust.method = "none")
##
## Pairwise comparisons using Wilcoxon rank sum test with continuity correction
##
## data: maxDF$max.vl and maxDF$Treatment
##
## Untreated WT bNAb Tx
## WT bNAb Tx 0.81 -
## DEL bNAb Tx 0.29 0.69
##
## P value adjustment method: none
# append to sample annotation
sampleAnnotDF %>%
rownames_to_column() %>%
merge(y = maxDF, by.x = "Monkey.ID", by.y = "Animal ID") %>%
column_to_rownames() -> sampleAnnotDF
sampleAnnotDF <- sampleAnnotDF[colnames(countDF), ]
time2maxDF <- plotDF %>%
arrange(desc(`week post-challenge`)) %>%
group_by(Treatment, `Animal ID`) %>%
summarize(time2max.vl = `week post-challenge`[which.max(`Plasma viral loads (copies/mL)`)]) %>%
ungroup()
ggplot(data = time2maxDF,
mapping = aes(x = Treatment, y = time2max.vl)) +
geom_boxplot(fill = "transparent") +
geom_beeswarm(mapping = aes(color = Treatment), cex = 3, size = 2) +
labs(y = "Time to max plasma viral loads (weeks)") +
scale_color_manual(values = c("Untreated" = "grey",
"WT bNAb Tx" = "orange",
"DEL bNAb Tx" = "green")) +
theme_bw()
kruskal.test(x = time2maxDF$time2max.vl, g = time2maxDF$Treatment)
##
## Kruskal-Wallis rank sum test
##
## data: time2maxDF$time2max.vl and time2maxDF$Treatment
## Kruskal-Wallis chi-squared = 7.3589, df = 2, p-value = 0.02524
pairwise.wilcox.test(x = time2maxDF$time2max.vl,
g = time2maxDF$Treatment,
p.adjust.method = "none")
##
## Pairwise comparisons using Wilcoxon rank sum test with continuity correction
##
## data: time2maxDF$time2max.vl and time2maxDF$Treatment
##
## Untreated WT bNAb Tx
## WT bNAb Tx 0.033 -
## DEL bNAb Tx 0.062 0.100
##
## P value adjustment method: none
sampleAnnotDF %>%
rownames_to_column() %>%
merge(y = select(time2maxDF, -Treatment),
by.x = "Monkey.ID",
by.y = "Animal ID") %>%
column_to_rownames() -> sampleAnnotDF
sampleAnnotDF <- sampleAnnotDF[colnames(countDF), ]
Read bNAb concentration
bnabFile <- file.path(workDir,
"input/Raw_data_for_Slim_JD20200414.xlsx")
bnabDF <- read_excel(path = bnabFile,
sheet = 2,
skip = 2,
col_names = paste0("X", 1:8)) %>%
mutate(bNab = factor(X1,
levels = c("WT bNAb Tx_[VRC07-523-LS]",
"WT bNAb Tx_[PGT121]",
"DEL bNAb Tx_[VRC07-523-LS/DEL]",
"DEL bNAb Tx_[PGT121/DEL]")),
bNab = na.locf(bNab)) %>%
filter(X1 != bNab & !is.na(X1))
by(bnabDF, INDICES = bnabDF$bNab, FUN = function(x) {
cNames <- as.vector(unlist(x[1, ]))
cNames[length(cNames)] <- "bNab"
x <- x[-1, ]
colnames(x) <- cNames
x <- x %>%
pivot_longer(cols = cNames[3:8],
names_to = "Animal ID",
values_to = "Plasma bNAb concentration (ug/mL)")
return(value = x)
}) %>%
do.call(what = rbind) -> bnabDF
bnabDF <- bnabDF %>%
mutate(`Plasma bNAb concentration (ug/mL)` =
as.numeric(`Plasma bNAb concentration (ug/mL)`),
`week post-challenge` = as.numeric(`week post-challenge`))
ggplot(data = bnabDF,
mapping = aes(x = `week post-challenge`,
y = `Plasma bNAb concentration (ug/mL)`)) +
geom_line(mapping = aes(by = `Animal ID`, color = bNab)) +
scale_color_manual(values = c("WT bNAb Tx_[VRC07-523-LS]" = "red",
"WT bNAb Tx_[PGT121]" = "darkblue",
"DEL bNAb Tx_[VRC07-523-LS/DEL]" = "orange",
"DEL bNAb Tx_[PGT121/DEL]" = "lightblue"),
name = "bNAb") +
theme_bw()
##
## Kruskal-Wallis rank sum test
##
## data: aucDF$auc and aucDF$bNab
## Kruskal-Wallis chi-squared = 19.98, df = 3, p-value = 0.0001714
statDF <- sampleAnnotDF %>%
mutate(log10.max.vl = log10(max.vl)) %>%
select(Monkey.ID, log10.max.vl, time2max.vl, auc.VRC07.523.LS, auc.PGT121,
Treatment) %>%
distinct()
message("correlation between max VL and bNAb in plasma:")
## correlation between max VL and bNAb in plasma:
cor.test(formula = ~log10.max.vl+auc.VRC07.523.LS,
data = statDF,
method = "spearman")
##
## Spearman's rank correlation rho
##
## data: log10.max.vl and auc.VRC07.523.LS
## S = 261.74, p-value = 0.7933
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.08481042
cor.test(formula = ~log10.max.vl+auc.PGT121,
data = statDF,
method = "spearman")
##
## Spearman's rank correlation rho
##
## data: log10.max.vl and auc.PGT121
## S = 253.66, p-value = 0.7264
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.1130806
message("correlation between time to max VL and bNAb in plasma:")
## correlation between time to max VL and bNAb in plasma:
cor.test(formula = ~time2max.vl+auc.VRC07.523.LS,
data = statDF,
method = "spearman")
##
## Spearman's rank correlation rho
##
## data: time2max.vl and auc.VRC07.523.LS
## S = 126.77, p-value = 0.06007
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.5567549
cor.test(formula = ~time2max.vl+auc.PGT121,
data = statDF,
method = "spearman")
##
## Spearman's rank correlation rho
##
## data: time2max.vl and auc.PGT121
## S = 126.77, p-value = 0.06007
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.5567549
plotDF <- statDF %>%
pivot_longer(cols = c(auc.VRC07.523.LS, auc.PGT121),
names_to = "bNAb",
values_to = "AUC")
ggplot(data = plotDF,
mapping = aes(x = time2max.vl, y = AUC)) +
geom_point(mapping = aes(color = bNAb, shape = Treatment)) +
theme_bw()
Read gene annotation
colNames <- c("seqname",
"source",
"feature",
"start",
"end",
"score",
"strand",
"frame",
"attribute")
colTypes <- paste(c(rep("c", times = 3),
rep("i", times = 2),
rep("c", times = 4)),
collapse = "")
featuresAnnotationFile <- file.path(workDir, "input/joana.genes.gtf")
skipNb <- read_lines(file = featuresAnnotationFile)
skipNb <- sum(grepl(pattern = "^#", skipNb))
featuresAnnot <- read_tsv(file = featuresAnnotationFile,
skip = skipNb,
col_names = colNames,
col_types = colTypes)
# extract gene_id and transcript_id from attributes
featAnnot <- featuresAnnot %>%
mutate(gene_id = gsub(pattern = ".*gene_id \"([^;]+)\";.+",
replacement = "\\1",
attribute),
gene_name = ifelse(test = grepl(pattern = "gene_name",
attribute),
yes = gsub(pattern = ".+gene_name \"([^;]+)\";.+",
replacement = "\\1",
attribute),
no = NA),
gene_biotype = ifelse(test = grepl(pattern = "gene_biotype",
attribute),
yes = gsub(pattern = ".+gene_biotype \"([^;]+)\";.*",
replacement = "\\1",
attribute),
no = NA)) %>%
select(seqname, strand, gene_id, gene_name, gene_biotype) %>%
distinct() %>%
as.data.frame()
rownames(featAnnot) <- featAnnot$gene_id
featAnnot <- featAnnot[rownames(countDF), ]
cat("Number of genes annotated")
## Number of genes annotated
featAnnot %>%
group_by(gene_biotype) %>%
summarize(n = n()) %>%
arrange(desc(n))
## # A tibble: 19 × 2
## gene_biotype n
## <chr> <int>
## 1 protein_coding 21356
## 2 lncRNA 4596
## 3 misc_RNA 3385
## 4 snRNA 1839
## 5 snoRNA 1315
## 6 pseudogene 666
## 7 Y_RNA 661
## 8 miRNA 622
## 9 rRNA 103
## 10 IG_V_gene 87
## 11 processed_pseudogene 83
## 12 TR_V_gene 46
## 13 scaRNA 43
## 14 TR_J_gene 21
## 15 TR_C_gene 8
## 16 vaultRNA 7
## 17 ribozyme 4
## 18 IG_C_gene 3
## 19 IG_D_gene 2
Build raw SeqExpressionSet
# build raw ExpressionSet
esetRaw <- newSeqExpressionSet(counts = as.matrix(countDF),
featureData = AnnotatedDataFrame(featAnnot),
phenoData = AnnotatedDataFrame(sampleAnnotDF))
save(esetRaw, file = file.path(workDir, "output/joana.esetRaw.RData"))
print(esetRaw)
## SeqExpressionSet (storageMode: lockedEnvironment)
## assayData: 34847 features, 200 samples
## element names: counts, normalizedCounts, offset
## protocolData: none
## phenoData
## sampleNames: DGAM_wk8_Bcells DGAM_wk8_cDCs ... HZ9_wk8_pDCs (200
## total)
## varLabels: Monkey.ID Collaborator ... auc.PGT121 (32 total)
## varMetadata: labelDescription
## featureData
## featureNames: ENSMMUG00000000001 ENSMMUG00000000002 ...
## ENSMMUG00000065351 (34847 total)
## fvarLabels: seqname strand ... gene_biotype (5 total)
## fvarMetadata: labelDescription
## experimentData: use 'experimentData(object)'
## Annotation:
Barplot with read alignment statistics
## number of samples with more than 2e06 counted reads
## # A tibble: 2 × 2
## over10e6 `n()`
## <lgl> <int>
## 1 FALSE 25
## 2 TRUE 175
All samples except 25 have more than 2 million reads and can be used for diff. expression analysis.
Barplot with mapped reads statistics
plotDF <- counts(esetRaw) %>%
as.data.frame() %>%
rownames_to_column() %>%
merge(y = select(fData(esetRaw), gene_id, gene_biotype),
by.x = "rowname",
by.y = "gene_id") %>%
mutate(gene_biotype = ifelse(test = grepl(pattern = "pseudogene",
gene_biotype),
yes = "pseudogene",
no = gene_biotype),
gene_biotype = ifelse(test = gene_biotype %in%
c("miRNA", "lincRNA", "snoRNA",
"sRNA", "snRNA", "vaultRNA",
"3prime_overlapping_ncrna",
"macro_lncRNA", "scaRNA"),
yes = "ncrna",
no = gene_biotype),
gene_biotype = ifelse(test = gene_biotype %in%
c("antisense",
"ncRNA",
"Mt_rRNA",
"Mt_tRNA",
"protein_coding",
"pseudogene",
"ribozyme",
"rRNA"),
yes = gene_biotype,
no = "other")) %>%
gather(sample, value, -rowname, -gene_biotype) %>%
group_by(sample, gene_biotype) %>%
summarize(eta = sum(value),
mu = mean(value),
q2 = median(value)) %>%
ungroup() %>%
arrange(!grepl("Water", `sample`)) %>%
mutate(`sample` = factor(`sample`, levels = unique(`sample`)))
ggplot(data = plotDF,
mapping = aes(x = sample, y = eta, fill = gene_biotype)) +
geom_bar(stat = "identity") +
scale_y_continuous(expand = c(0, 0)) +
labs(x = "Samples", y = "Number of reads") +
theme_bw() +
theme(axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
message("percentage of reads that are from protein coding")
## percentage of reads that are from protein coding
plotDF %>%
group_by(sample) %>%
summarize(n = sum(eta)) %>%
merge(y = plotDF) %>%
filter(gene_biotype %in% "protein_coding") %>%
mutate(perc_prot_cod = eta/n * 100) %>%
summarize(type = "perc_prot_coding",
min = min(.$perc_prot_cod, na.rm = TRUE),
max = max(.$perc_prot_cod, na.rm = TRUE))
## type min max
## 1 perc_prot_coding 76.30473 98.93777
As expected, most counted reads are mapped to protein coding genes.
Principal variance component analysis (raw)
esetTemp <- ExpressionSet(assayData = log2(counts(esetRaw) + 0.25),
phenoData = AnnotatedDataFrame(data = pData(esetRaw)))
esetTemp$TotalReadsQ3 <- cut(esetTemp$TotalReads,
breaks = quantile(esetTemp$TotalReads,
probs = c(0,0.33, 0.66,1)),
include.lowest = TRUE,
right = TRUE)
esetTemp$NumberCellsQ3 <- cut(esetTemp$Number.of.Cells,
breaks = c(0, 1500, 3000, 5000),
include.lowest = TRUE,
right = TRUE)
fit <- PVCA(counts = exprs(esetTemp),
meta = pData(esetTemp)[, c("Timepoint",
"Treatment",
"Monkey.ID",
"Cell.population.sorted",
"NumberCellsQ3",
"TotalReadsQ3")],
threshold = 0.6,
inter = FALSE)
plotDF <- data.frame(effect = as.vector(fit),
label = names(fit)) %>%
arrange(effect) %>%
mutate(label = factor(label, levels = label))
ggplot(data = plotDF,
mapping = aes(x = label, y = effect)) +
geom_bar(stat = "identity") +
labs(x = NULL, y = "Explained effect") +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
Multidimensional scaling plot based on raw counts
rawMat <- counts(esetRaw)
mat <- rawMat[rowSums(rawMat) > 0, ]
mat <- log2(mat + 0.25)
distMat <- parallelDist(t(mat), threads = detectCores() - 1)
attributes(distMat)$Labels <- colnames(mat)
fit <- cmdscale(distMat, k = 2, eig = TRUE)
plotDF <- as.data.frame(fit$points)
plotDF <- plotDF %>%
rownames_to_column() %>%
merge(y = rownames_to_column(pData(esetRaw)),
by = "rowname")
ggplot(data = plotDF,
mapping = aes(x = V1,
y = V2,
shape = Cell.population.sorted,
color = TotalReads)) +
geom_point(size = 4) +
labs(x = paste0("1st dimension (",
round((fit$eig/sum(fit$eig))[1] * 100),
"%)"),
y = paste0("2nd dimension (",
round((fit$eig/sum(fit$eig))[2] * 100),
"%)")) +
scale_color_gradient(low = "lightgreen", high = "darkgreen") +
theme_bw()
Normalize (TMM) count data
# remove low counts samples
esetTemp <- esetRaw[, esetRaw$TotalReads >= 2.1e6]
dge <- DGEList(counts = counts(esetTemp),
remove.zeros = TRUE)
# remove low expressed genes
flag <- filterByExpr(dge, group = esetRaw$Cell.population.sorted)
dge <- dge[flag, , keep.lib.sizes = FALSE]
dge <- calcNormFactors(object = dge, method = "TMM")
normalizedCounts <- cpm(dge, normalized.lib.sizes = TRUE, log = TRUE)
eset <- newSeqExpressionSet(
counts = dge$counts,
normalizedCounts = as.matrix(normalizedCounts),
featureData = fData(esetTemp)[rownames(normalizedCounts), ],
phenoData = pData(esetTemp))
save(eset, file = file.path(workDir, "output/joana.eset.RData"))
print(eset)
## SeqExpressionSet (storageMode: lockedEnvironment)
## assayData: 13901 features, 175 samples
## element names: counts, normalizedCounts, offset
## protocolData: none
## phenoData
## sampleNames: DGAM_wk8_Bcells DGAM_wk8_pDCs ... HZ9_wk8_pDCs (175
## total)
## varLabels: Monkey.ID Collaborator ... auc.PGT121 (32 total)
## varMetadata: labelDescription
## featureData
## featureNames: ENSMMUG00000000001 ENSMMUG00000000002 ...
## ENSMMUG00000065345 (13901 total)
## fvarLabels: seqname strand ... gene_biotype (5 total)
## fvarMetadata: labelDescription
## experimentData: use 'experimentData(object)'
## Annotation:
Principal variance component analysis (normalized)
esetTemp <- ExpressionSet(assayData = normCounts(eset),
phenoData = AnnotatedDataFrame(data = pData(eset)))
esetTemp$TotalReadsQ3 <- cut(esetTemp$TotalReads,
breaks = quantile(esetTemp$TotalReads,
probs = c(0,0.33, 0.66,1)),
include.lowest = TRUE,
right = TRUE)
fit <- PVCA(counts = exprs(esetTemp),
meta = pData(esetTemp)[, c("Timepoint",
"Treatment",
"Monkey.ID",
"Cell.population.sorted",
"TotalReadsQ3")],
threshold = 0.6,
inter = FALSE)
plotDF <- data.frame(effect = as.vector(fit),
label = names(fit)) %>%
arrange(effect) %>%
mutate(label = factor(label, levels = unique(label)))
ggplot(data = plotDF,
mapping = aes(x = label, y = effect)) +
geom_bar(stat = "identity") +
labs(x = NULL, y = "Explained effect") +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
Multidimensional scaling plot on normalized counts
# mds normalized/cpm counts
mat <- normCounts(eset)
distMat <- parallelDist(t(mat), threads = detectCores() - 1)
attributes(distMat)$Labels <- colnames(mat)
fit <- cmdscale(distMat, k = 2, eig = TRUE)
plotDF <- as.data.frame(fit$points) %>%
rownames_to_column() %>%
merge(y = rownames_to_column(pData(eset)),
by = "rowname")
plotLabel <- round((fit$eig/sum(fit$eig)) * 100)
plotLabel <- plotLabel[1:2]
plotLabel <- paste0(c("1st dimension (", "2nd dimension ("),
plotLabel,
"%)")
ggplot(data = plotDF,
mapping = aes(x = V1,
y = V2,
color = Cell.population.sorted,
shape = Treatment)) +
geom_point(size = 4) +
labs(x = plotLabel[[1]],
y = plotLabel[[2]]) +
theme_bw() +
theme(legend.key = element_blank(),
plot.title = element_text(hjust = 0.5))
message("Potential mislabelled samples")
## Potential mislabelled samples
filter(plotDF,
(Cell.population.sorted %in% "B cells" &
V1 < 0) |
(Cell.population.sorted %in% "monocytes" &
V1 > 0)) %>%
print()
## rowname V1 V2 Monkey.ID Collaborator Sample.
## 1 HH7_wk8_Bcells -120.70289 46.36394 HH7 Joana Dias/Koup Lab 4
## 2 HH7_wk8_monocytes 79.50156 2.87096 HH7 Joana Dias/Koup Lab 5
## Sample.Type.Subset Study.. Timepoint
## 1 total RNA from sorted cells in RNAzol 730.2 wk8
## 2 total RNA from sorted cells in RNAzol 730.2 wk8
## Cell.population.sorted Sample.Well.or.tube.label Number.of.Cells
## 1 B cells D1 5000
## 2 monocytes E1 2240
## Sample.Volume.ul. Method.to.determine.RNA.conc. RIN
## 1 10 NA NA
## 2 10 NA NA
## Library.Preparation.Method Library.Elution.volume i7.index
## 1 NEB Next Ultra II RNA Library Preparation Kit NA P37
## 2 NEB Next Ultra II RNA Library Preparation Kit NA P49
## Index.Primer.sequence Final.library.conc...nM. Final.avg.library.size..bp.
## 1 AGACCGTA NA 430
## 2 ACGGTCTT NA 430
## Date.Received Location.VRC Illumina.Instrument Flowcell.ID Lane.sequenced
## 1 NA NA HiSeq 4000 HGGCYBBXY 1
## 2 NA NA HiSeq 4000 HGGCYBBXY 1
## Date.Sequenced Notes Plate TotalReads Treatment max.vl time2max.vl
## 1 2020-03-25 NA Plate 1 8821941 Untreated 3.3e+07 4
## 2 2020-03-25 NA Plate 1 3877393 Untreated 3.3e+07 4
## auc.VRC07.523.LS auc.PGT121
## 1 NA NA
## 2 NA NA
After normalization (for total number of reads counted per samples),
subset remain the main driver of expression.
Potential swap was detected for animal HH7 between wk8 B cells and
Monocytes.
Jitter plot of genes coding for cell surface markers of B cells and Monocytes
geneLS <- c("CD14", "MS4A1")
plotDF <- counts(eset[fData(eset)$gene_name %in% geneLS, ]) %>%
as.data.frame() %>%
rownames_to_column() %>%
merge(y = select(fData(eset), gene_id, gene_name),
by.x = "rowname",
by.y = "gene_id") %>%
select(-rowname) %>%
gather(SampleID, count, -gene_name) %>%
merge(y = select(rownames_to_column(pData(eset)),
rowname,
Cell.population.sorted),
by.x = "SampleID",
by.y = "rowname") %>%
mutate(value = pmax(count, 0.1))
# swaps
swapLS <- c("HH7_wk8_Bcells", "HH7_wk8_monocytes")
swapDF <- filter(plotDF, SampleID %in% swapLS)
ggplot(data = filter(plotDF, count > 0),
mapping = aes(x = Cell.population.sorted, y = count)) +
geom_boxplot(outlier.color = "transparent") +
geom_beeswarm(mapping = aes(color = Cell.population.sorted)) +
geom_text_repel(data = swapDF,
mapping = aes(label = SampleID)) +
scale_y_log10() +
facet_wrap(facet = ~gene_name, scale = "free", nrow = 3) +
theme_bw() +
theme(axis.text = element_text(size = 5),
legend.pos = "none")
Principal variance component analysis per subset
plotDF <- NULL
for (SUBSET in unique(eset$Cell.population.sorted)) {
esetTemp <- eset[, eset$Cell.population.sorted %in% SUBSET]
esetTemp <- ExpressionSet(assayData = normCounts(esetTemp),
phenoData = AnnotatedDataFrame(data = pData(esetTemp)))
fit <- PVCA(counts = exprs(esetTemp),
meta = pData(esetTemp)[, c("Timepoint",
"Treatment",
"Monkey.ID")],
threshold = 0.6,
inter = FALSE)
plotTemp <- data.frame(effect = as.vector(fit),
label = names(fit)) %>%
arrange(effect) %>%
mutate(label = factor(label, levels = unique(label)),
Cell.population.sorted = SUBSET)
plotDF <- rbind(plotDF, plotTemp)
}
ggplot(data = plotDF,
mapping = aes(x = label, y = effect)) +
geom_bar(stat = "identity") +
labs(x = NULL, y = "Explained effect") +
theme_bw() +
facet_wrap(facets = ~Cell.population.sorted) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
Substract pre expression
## SeqExpressionSet (storageMode: lockedEnvironment)
## assayData: 13901 features, 82 samples
## element names: counts, normalizedCounts, offset
## protocolData: none
## phenoData
## sampleNames: DGBG_D14_Bcells DGBG_wk8_Bcells ... HZ9_wk8_pDCs (82
## total)
## varLabels: Monkey.ID Collaborator ... auc.PGT121 (32 total)
## varMetadata: labelDescription
## featureData
## featureNames: ENSMMUG00000000001 ENSMMUG00000000002 ...
## ENSMMUG00000065345 (13901 total)
## fvarLabels: seqname strand ... gene_biotype (5 total)
## fvarMetadata: labelDescription
## experimentData: use 'experimentData(object)'
## Annotation:
Principal variance component analysis per subset (on baselined expression)
plotDF <- NULL
for (SUBSET in unique(esetBaselined$Cell.population.sorted)) {
esetTemp <- esetBaselined[, esetBaselined$Cell.population.sorted %in% SUBSET]
esetTemp <- ExpressionSet(assayData = normCounts(esetTemp),
phenoData = AnnotatedDataFrame(data = pData(esetTemp)))
fit <- PVCA(counts = exprs(esetTemp),
meta = pData(esetTemp)[, c("Timepoint",
"Treatment",
"Monkey.ID")],
threshold = 0.6,
inter = FALSE)
plotTemp <- data.frame(effect = as.vector(fit),
label = names(fit)) %>%
arrange(effect) %>%
mutate(label = factor(label, levels = unique(label)),
Cell.population.sorted = SUBSET)
plotDF <- rbind(plotDF, plotTemp)
}
ggplot(data = plotDF,
mapping = aes(x = label, y = effect)) +
geom_bar(stat = "identity") +
labs(x = NULL, y = "Explained effect") +
theme_bw() +
facet_wrap(facets = ~Cell.population.sorted) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
Multidimensional scalling plot on baselined counts by subsets
plotDF <- NULL
for (SUBSET in unique(esetBaselined$Cell.population.sorted)) {
# mds normalized/cpm counts
esetTemp <- esetBaselined[, esetBaselined$Cell.population.sorted %in% SUBSET]
mat <- normCounts(esetTemp)
distMat <- parallelDist(t(mat), threads = detectCores() - 1)
attributes(distMat)$Labels <- colnames(mat)
fit <- cmdscale(distMat, k = 2, eig = TRUE)
as.data.frame(fit$points) %>%
rownames_to_column() %>%
merge(y = rownames_to_column(pData(eset)),
by = "rowname") %>%
rbind(plotDF, .) -> plotDF
}
ggplot(data = plotDF,
mapping = aes(x = V1,
y = V2,
color = Timepoint,
shape = Treatment)) +
geom_point(size = 4) +
facet_wrap(facets = ~Cell.population.sorted, scale = "free") +
theme_bw() +
theme(legend.key = element_blank(),
plot.title = element_text(hjust = 0.5))
Differential expression between timepoints (paired)
message("top 50 genes based on p-values (all FDR<=5%")
## top 50 genes based on p-values (all FDR<=5%
fits <- list()
# vaccine effect
for (SUBSET in unique(eset$Cell.population.sorted)) {
esetTemp <- eset[, eset$Cell.population.sorted %in% SUBSET]
dge <- DGEList(counts = counts(esetTemp),
samples = pData(esetTemp),
genes = fData(esetTemp))
goi <- interaction(gsub(pattern = " .+",
replacement = "",
esetTemp$Treatment),
esetTemp$Timepoint,
drop = TRUE)
donor <- esetTemp$Monkey.ID
designMat <- model.matrix(~0+goi)
rownames(designMat) <- sampleNames(esetTemp)
colnames(designMat) <- gsub(pattern = "goi",
replacement = "",
colnames(designMat))
attr(designMat, "assign") <- attr(designMat, "contrasts") <- NULL
v <- voom(counts = dge, design = designMat)
corfit <- duplicateCorrelation(v$E,
design = designMat,
block = donor)
fit <- lmFit(v,
design = designMat,
block = donor,
correlation = corfit$consensus)
contrastLS <- grep(pattern = "pre", colnames(fit), value = TRUE, invert = TRUE) %>%
paste0(., "-", gsub(pattern = "\\..+", replacement = ".pre", .))
contrastMat <- makeContrasts(contrasts = contrastLS, levels = fit$design)
fit2 <- contrasts.fit(fit = fit, contrasts = contrastMat)
fit2 <- eBayes(fit = fit2)
fits[[paste0(SUBSET, "_vax")]] <- list(fit = fit, fit2 = fit2)
}
# for NK and Monocytes of DEL at wk8 compare low VL vs max VL
for (SUBSET in setdiff(eset$Cell.population.sorted, "cDCs")) {
esetTemp <- eset[, eset$Cell.population.sorted %in% SUBSET &
eset$Treatment != "Untreated"]
dge <- DGEList(counts = counts(esetTemp),
samples = pData(esetTemp),
genes = fData(esetTemp))
goi <- ifelse(test = esetTemp$max.vl >= 1e04,
yes = "highMaxVL",
no = "lowMaxVL") %>%
interaction(gsub(pattern = " .+",
replacement = "",
esetTemp$Treatment),
esetTemp$Timepoint,
.)
designMat <- model.matrix(~0+goi)
rownames(designMat) <- sampleNames(esetTemp)
colnames(designMat) <- gsub(pattern = "goi",
replacement = "",
colnames(designMat))
attr(designMat, "assign") <- attr(designMat, "contrasts") <- NULL
v <- voom(counts = dge, design = designMat)
fit <- lmFit(v, design = designMat)
contrastLS <- grep(pattern = "highMaxVL", colnames(fit), value = TRUE) %>%
paste0(.,
"-",
gsub(pattern = "high", replacement = "low", .))
contrastMat <- makeContrasts(contrasts = contrastLS,
levels = fit$design)
fit2 <- contrasts.fit(fit = fit, contrasts = contrastMat)
fit2 <- eBayes(fit = fit2)
fits[[paste0(SUBSET, "_maxVL")]] <- list(fit = fit, fit2 = fit2)
}
# for NK and Monocytes of DEL at wk8 compare low VL vs max VL
for (SUBSET in setdiff(esetBaselined$Cell.population.sorted, "cDCs")) {
esetTemp <- esetBaselined[, esetBaselined$Cell.population.sorted %in% SUBSET &
esetBaselined$Treatment %in% "DEL bNAb Tx"]
goi <- ifelse(test = esetTemp$max.vl >= 1e04,
yes = "highMaxVL",
no = "lowMaxVL") %>%
interaction(gsub(pattern = " .+",
replacement = "",
esetTemp$Treatment),
esetTemp$Timepoint,
.)
designMat <- model.matrix(~0+goi)
rownames(designMat) <- sampleNames(esetTemp)
colnames(designMat) <- gsub(pattern = "goi",
replacement = "",
colnames(designMat))
attr(designMat, "assign") <- attr(designMat, "contrasts") <- NULL
fit <- lmFit(normCounts(esetTemp), design = designMat)
contrastLS <- grep(pattern = "highMaxVL", colnames(fit), value = TRUE) %>%
paste0(.,
"-",
gsub(pattern = "high", replacement = "low", .))
contrastMat <- makeContrasts(contrasts = contrastLS,
levels = fit$design)
fit2 <- contrasts.fit(fit = fit, contrasts = contrastMat)
fit2 <- eBayes(fit = fit2)
fits[[paste0(SUBSET, "_baselined_maxVL")]] <- list(fit = fit, fit2 = fit2)
}
# save fits
save(fits, file = file.path(workDir, "output/joana.fits.RData"))
# print number of genes differently expressed
degNbDF <- NULL
for (modelName in names(fits)) {
fit2 <- fits[[modelName]][["fit2"]]
for (coefName in colnames(fit2)) {
top <- topTable(fit = fit2, coef = coefName, number = Inf) %>%
as.data.frame()
if (!("gene_id" %in% names(top))) {
top <- top %>%
rownames_to_column(var = "gene_id") %>%
merge(y = select(fData(eset), gene_id, gene_name, gene_biotype), by = "gene_id")
}
top %>%
filter(gene_biotype %in% "protein_coding") %>%
group_by(sign(logFC)) %>%
summarize(p = sum(P.Value <= 0.05),
adj.p = sum(adj.P.Val <= 0.05)) %>%
mutate(modelName = modelName,
contrast = coefName) %>%
rbind(degNbDF) -> degNbDF
}
}
degNbDF %>%
pivot_longer(cols = c(-modelName, -contrast, -`sign(logFC)`),
names_to = "cname",
values_to = "n") %>%
mutate(`sign(logFC)` = c("-1" = "DN", "1" = "UP")[as.character(`sign(logFC)`)],
cname = paste0(cname, `sign(logFC)`)) %>%
select(-`sign(logFC)`) %>%
pivot_wider(names_from = cname, values_from = n) %>%
mutate(p = paste0(pUP, "/", pDN),
adj.p = paste0(adj.pUP, "/", adj.pDN)) %>%
select(modelName, contrast, p, adj.p) %>%
as.data.frame() %>%
kable()
modelName | contrast | p | adj.p |
---|---|---|---|
monocytes_baselined_maxVL | DEL.wk8.highMaxVL-DEL.wk8.lowMaxVL | 866/619 | 134/8 |
monocytes_baselined_maxVL | DEL.D14.highMaxVL-DEL.D14.lowMaxVL | 269/413 | 0/0 |
pDCs_baselined_maxVL | DEL.wk8.highMaxVL-DEL.wk8.lowMaxVL | 694/367 | 0/0 |
pDCs_baselined_maxVL | DEL.D14.highMaxVL-DEL.D14.lowMaxVL | 154/126 | 0/0 |
NK cells_baselined_maxVL | DEL.wk8.highMaxVL-DEL.wk8.lowMaxVL | 715/785 | 2/0 |
NK cells_baselined_maxVL | DEL.D14.highMaxVL-DEL.D14.lowMaxVL | 256/420 | 0/0 |
B cells_baselined_maxVL | DEL.wk8.highMaxVL-DEL.wk8.lowMaxVL | 396/257 | 0/0 |
B cells_baselined_maxVL | DEL.D14.highMaxVL-DEL.D14.lowMaxVL | 292/403 | 0/0 |
monocytes_maxVL | WT.wk8.highMaxVL-WT.wk8.lowMaxVL | 412/637 | 4/56 |
monocytes_maxVL | DEL.wk8.highMaxVL-DEL.wk8.lowMaxVL | 599/928 | 34/9 |
monocytes_maxVL | WT.pre.highMaxVL-WT.pre.lowMaxVL | 341/133 | 0/0 |
monocytes_maxVL | DEL.pre.highMaxVL-DEL.pre.lowMaxVL | 449/255 | 0/0 |
monocytes_maxVL | WT.D14.highMaxVL-WT.D14.lowMaxVL | 1310/675 | 35/35 |
monocytes_maxVL | DEL.D14.highMaxVL-DEL.D14.lowMaxVL | 238/190 | 0/0 |
NK cells_maxVL | WT.wk8.highMaxVL-WT.wk8.lowMaxVL | 72/306 | 0/2 |
NK cells_maxVL | DEL.wk8.highMaxVL-DEL.wk8.lowMaxVL | 868/671 | 0/1 |
NK cells_maxVL | WT.pre.highMaxVL-WT.pre.lowMaxVL | 248/225 | 0/0 |
NK cells_maxVL | DEL.pre.highMaxVL-DEL.pre.lowMaxVL | 851/351 | 0/1 |
NK cells_maxVL | WT.D14.highMaxVL-WT.D14.lowMaxVL | 978/662 | 7/16 |
NK cells_maxVL | DEL.D14.highMaxVL-DEL.D14.lowMaxVL | 158/167 | 0/0 |
pDCs_maxVL | WT.wk8.highMaxVL-WT.wk8.lowMaxVL | 287/586 | 10/9 |
pDCs_maxVL | DEL.wk8.highMaxVL-DEL.wk8.lowMaxVL | 893/314 | 1/0 |
pDCs_maxVL | WT.pre.highMaxVL-WT.pre.lowMaxVL | 150/418 | 0/0 |
pDCs_maxVL | DEL.pre.highMaxVL-DEL.pre.lowMaxVL | 198/291 | 0/0 |
pDCs_maxVL | WT.D14.highMaxVL-WT.D14.lowMaxVL | 926/350 | 75/15 |
pDCs_maxVL | DEL.D14.highMaxVL-DEL.D14.lowMaxVL | 112/165 | 0/0 |
B cells_maxVL | WT.wk8.highMaxVL-WT.wk8.lowMaxVL | 166/388 | 0/2 |
B cells_maxVL | DEL.wk8.highMaxVL-DEL.wk8.lowMaxVL | 377/525 | 0/1 |
B cells_maxVL | WT.pre.highMaxVL-WT.pre.lowMaxVL | 319/261 | 0/0 |
B cells_maxVL | DEL.pre.highMaxVL-DEL.pre.lowMaxVL | 401/103 | 0/0 |
B cells_maxVL | WT.D14.highMaxVL-WT.D14.lowMaxVL | 465/629 | 2/9 |
B cells_maxVL | DEL.D14.highMaxVL-DEL.D14.lowMaxVL | 312/359 | 0/0 |
cDCs_vax | WT.wk8-WT.pre | 254/150 | 0/0 |
cDCs_vax | Untreated.wk8-Untreated.pre | 880/272 | 22/2 |
cDCs_vax | DEL.wk8-DEL.pre | 236/419 | 1/0 |
cDCs_vax | WT.D14-WT.pre | 345/303 | 0/0 |
cDCs_vax | Untreated.D14-Untreated.pre | 1079/1585 | 290/413 |
cDCs_vax | DEL.D14-DEL.pre | 270/302 | 0/0 |
monocytes_vax | WT.wk8-WT.pre | 405/337 | 0/0 |
monocytes_vax | Untreated.wk8-Untreated.pre | 557/871 | 0/0 |
monocytes_vax | DEL.wk8-DEL.pre | 426/983 | 2/2 |
monocytes_vax | WT.D14-WT.pre | 550/674 | 0/0 |
monocytes_vax | Untreated.D14-Untreated.pre | 633/431 | 0/0 |
monocytes_vax | DEL.D14-DEL.pre | 454/571 | 17/2 |
NK cells_vax | WT.wk8-WT.pre | 398/405 | 0/0 |
NK cells_vax | Untreated.wk8-Untreated.pre | 535/406 | 0/0 |
NK cells_vax | DEL.wk8-DEL.pre | 834/1199 | 87/67 |
NK cells_vax | WT.D14-WT.pre | 564/1602 | 71/180 |
NK cells_vax | Untreated.D14-Untreated.pre | 935/371 | 37/2 |
NK cells_vax | DEL.D14-DEL.pre | 773/1189 | 169/67 |
pDCs_vax | WT.wk8-WT.pre | 478/684 | 0/2 |
pDCs_vax | Untreated.wk8-Untreated.pre | 125/177 | 0/0 |
pDCs_vax | DEL.wk8-DEL.pre | 272/348 | 0/0 |
pDCs_vax | WT.D14-WT.pre | 721/710 | 24/4 |
pDCs_vax | Untreated.D14-Untreated.pre | 337/246 | 1/0 |
pDCs_vax | DEL.D14-DEL.pre | 515/275 | 18/2 |
B cells_vax | WT.wk8-WT.pre | 268/237 | 0/0 |
B cells_vax | Untreated.wk8-Untreated.pre | 574/1224 | 0/0 |
B cells_vax | DEL.wk8-DEL.pre | 387/231 | 5/1 |
B cells_vax | WT.D14-WT.pre | 427/377 | 0/0 |
B cells_vax | Untreated.D14-Untreated.pre | 391/197 | 0/0 |
B cells_vax | DEL.D14-DEL.pre | 519/367 | 35/2 |
Heatmaps with DEGs
for (modelName in names(fits)) {
fit2 <- fits[[modelName]][["fit2"]]
for (coefName in colnames(fit2)) {
top <- topTable(fit = fit2, coef = coefName, number = Inf) %>%
as.data.frame()
if (!("gene_id" %in% names(top))) {
top <- top %>%
rownames_to_column(var = "gene_id") %>%
merge(y = select(fData(eset), gene_id, gene_name, gene_biotype), by = "gene_id")
}
top <- top %>%
filter(!is.na(gene_name) &
gene_biotype %in% "protein_coding" &
adj.P.Val <= 0.05)
if (nrow(top) > 0) {
top <- top %>%
top_n(50, wt = abs(t)) %>%
arrange(desc(logFC))
sampleLS <- which(fit2$contrast[, coefName] != 0) %>%
fit2$design[, .] %>%
as.data.frame() %>%
rownames_to_column() %>%
gather(group, value, -rowname) %>%
group_by(rowname) %>%
summarize(value = sum(value)) %>%
filter(value > 0) %>%
.$rowname
mat <- normCounts(eset)[top$gene_id, sampleLS, drop = FALSE] %>%
t() %>%
scale() %>%
t()
mat <- mat[complete.cases(mat), , drop = FALSE]
colorLS <- colorRampPalette(colors = c("blue", "white", "red"))(n = 100)
breakLS <- c(-1 * max(abs(mat), na.rm = TRUE),
seq(from = -1 * min(abs(range(mat, na.rm = TRUE))),
to = min(abs(range(mat, na.rm = TRUE))),
length.out = 99),
max(abs(mat), na.rm = TRUE))
matAnnot <- pData(eset) %>%
rownames_to_column() %>%
mutate(log10.max.vl = log10(max.vl)) %>%
select(rowname, Timepoint, Treatment, Cell.population.sorted,
log10.max.vl, time2max.vl, auc.VRC07.523.LS, auc.PGT121) %>%
column_to_rownames()
matAnnot <- matAnnot[colnames(mat), ]
matAnnot <- matAnnot[, colMeans(is.na(matAnnot)) < 1]
ha <- HeatmapAnnotation(df = matAnnot)
heat <- Heatmap(matrix = mat,
width = unit(0.5, units = "npc"),
height = unit(0.1+0.6*(nrow(mat)/50), units = "npc"),
top_annotation = ha,
name = "zscore",
column_title = paste0(modelName, ": ", coefName),
row_names_gp = gpar(fontsize = 7),
column_labels = pData(eset)[colnames(mat), "Monkey.ID"],
row_labels = top$gene_name)
print(heat)
}
}
}
Heatmaps with all DEGs across time by subset
message("FDR cutoff of 5%")
## FDR cutoff of 5%
for (modelName in grep(pattern = "vax", names(fits), value = TRUE)) {
fit2 <- fits[[modelName]][["fit2"]]
top <- topTableF(fit2, number = Inf, p.value = 0.05) %>%
as.data.frame()
if (!("gene_id" %in% names(top))) {
top <- top %>%
rownames_to_column(var = "gene_id") %>%
merge(y = select(fData(eset), gene_id, gene_name, gene_biotype), by = "gene_id")
}
top <- top %>%
filter(gene_biotype %in% "protein_coding")
if (nrow(top) > 0) {
top <- top %>%
arrange(desc(WT.D14.WT.pre)) %>%
mutate(gene_name = ifelse(test = is.na(gene_name),
yes = gene_id,
no = gene_name))
sampleLS <- rownames(fit2$design)
sampleLS <- intersect(sampleLS, sampleNames(esetBaselined))
mat <- normCounts(esetBaselined)[top$gene_id, sampleLS, drop = FALSE]
mat <- mat[complete.cases(mat), ]
matAnnot <- pData(eset) %>%
rownames_to_column() %>%
mutate(log10.max.vl = log10(max.vl)) %>%
select(rowname, Timepoint, Treatment, Cell.population.sorted,
log10.max.vl, time2max.vl, auc.VRC07.523.LS, auc.PGT121) %>%
column_to_rownames()
matAnnot <- matAnnot[colnames(mat), ]
ha <- HeatmapAnnotation(df = matAnnot)
splitCols <- pData(eset)[colnames(mat),
c("Timepoint", "Treatment")] %>%
apply(MARGIN = 1, FUN = paste, collapse = ".")
heat <- Heatmap(matrix = mat,
width = unit(0.5, units = "npc"),
height = unit(0.75, units = "npc"),
top_annotation = ha,
name = "log2FC",
column_title = modelName,
show_row_names = FALSE,
column_labels = pData(eset)[colnames(mat),
"Monkey.ID"],
column_split = splitCols)
print(heat)
}
}
## topTableF is obsolete and will be removed in a future version of limma. Please considering using topTable instead.
## topTableF is obsolete and will be removed in a future version of limma. Please considering using topTable instead.
## topTableF is obsolete and will be removed in a future version of limma. Please considering using topTable instead.
## topTableF is obsolete and will be removed in a future version of limma. Please considering using topTable instead.
## topTableF is obsolete and will be removed in a future version of limma. Please considering using topTable instead.
sessionInfo()
## R version 4.3.2 (2023-10-31)
## Platform: aarch64-apple-darwin23.0.0 (64-bit)
## Running under: macOS Sonoma 14.3
##
## Matrix products: default
## BLAS: /opt/homebrew/Cellar/openblas/0.3.26/lib/libopenblasp-r0.3.26.dylib
## LAPACK: /opt/homebrew/Cellar/r/4.3.2/lib/R/lib/libRlapack.dylib; LAPACK version 3.11.0
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: America/Chicago
## tzcode source: internal
##
## attached base packages:
## [1] grid parallel stats4 stats graphics grDevices utils
## [8] datasets methods base
##
## other attached packages:
## [1] lubridate_1.9.3 forcats_1.0.0
## [3] stringr_1.5.1 dplyr_1.1.4
## [5] purrr_1.0.2 readr_2.1.5
## [7] tidyr_1.3.1 tibble_3.2.1
## [9] tidyverse_2.0.0 ComplexHeatmap_2.18.0
## [11] ggrepel_0.9.5 edgeR_4.0.12
## [13] limma_3.58.1 parallelDist_0.2.6
## [15] lme4_1.1-35.1 Matrix_1.6-5
## [17] pvca_0.1.0 EDASeq_2.36.0
## [19] ShortRead_1.60.0 GenomicAlignments_1.38.2
## [21] SummarizedExperiment_1.32.0 MatrixGenerics_1.14.0
## [23] matrixStats_1.2.0 Rsamtools_2.18.0
## [25] GenomicRanges_1.54.1 Biostrings_2.70.1
## [27] GenomeInfoDb_1.38.5 XVector_0.42.0
## [29] IRanges_2.36.0 S4Vectors_0.40.2
## [31] BiocParallel_1.36.0 Biobase_2.62.0
## [33] BiocGenerics_0.48.1 caTools_1.18.2
## [35] ggbeeswarm_0.7.2 ggplot2_3.4.4
## [37] zoo_1.8-12 readxl_1.4.3
## [39] knitr_1.45
##
## loaded via a namespace (and not attached):
## [1] RColorBrewer_1.1-3 shape_1.4.6 rstudioapi_0.15.0
## [4] magrittr_2.0.3 magick_2.8.2 GenomicFeatures_1.54.1
## [7] farver_2.1.1 nloptr_2.0.3 rmarkdown_2.25
## [10] GlobalOptions_0.1.2 BiocIO_1.12.0 zlibbioc_1.48.0
## [13] vctrs_0.6.5 memoise_2.0.1 minqa_1.2.6
## [16] RCurl_1.98-1.14 htmltools_0.5.7 S4Arrays_1.2.0
## [19] progress_1.2.3 curl_5.2.0 cellranger_1.1.0
## [22] SparseArray_1.2.3 cachem_1.0.8 iterators_1.0.14
## [25] lifecycle_1.0.4 pkgconfig_2.0.3 R6_2.5.1
## [28] fastmap_1.1.1 clue_0.3-65 GenomeInfoDbData_1.2.11
## [31] digest_0.6.34 colorspace_2.1-0 AnnotationDbi_1.64.1
## [34] RSQLite_2.3.5 hwriter_1.3.2.1 labeling_0.4.3
## [37] filelock_1.0.3 timechange_0.3.0 fansi_1.0.6
## [40] httr_1.4.7 abind_1.4-5 compiler_4.3.2
## [43] doParallel_1.0.17 bit64_4.0.5 withr_3.0.0
## [46] DBI_1.2.1 highr_0.10 R.utils_2.12.3
## [49] biomaRt_2.58.1 MASS_7.3-60.0.1 rappdirs_0.3.3
## [52] DelayedArray_0.28.0 rjson_0.2.21 tools_4.3.2
## [55] vipor_0.4.7 beeswarm_0.4.0 R.oo_1.26.0
## [58] glue_1.7.0 restfulr_0.0.15 nlme_3.1-164
## [61] cluster_2.1.6 generics_0.1.3 gtable_0.3.4
## [64] tzdb_0.4.0 R.methodsS3_1.8.2 hms_1.1.3
## [67] xml2_1.3.6 utf8_1.2.4 foreach_1.5.2
## [70] pillar_1.9.0 vroom_1.6.5 circlize_0.4.15
## [73] splines_4.3.2 BiocFileCache_2.10.1 lattice_0.22-5
## [76] aroma.light_3.32.0 rtracklayer_1.62.0 bit_4.0.5
## [79] deldir_2.0-2 tidyselect_1.2.0 locfit_1.5-9.8
## [82] xfun_0.41 statmod_1.5.0 stringi_1.8.3
## [85] yaml_2.3.8 boot_1.3-28.1 evaluate_0.23
## [88] codetools_0.2-19 interp_1.1-6 cli_3.6.2
## [91] RcppParallel_5.1.7 munsell_0.5.0 Rcpp_1.0.12
## [94] dbplyr_2.4.0 png_0.1-8 XML_3.99-0.16.1
## [97] blob_1.2.4 prettyunits_1.2.0 latticeExtra_0.6-30
## [100] jpeg_0.1-10 bitops_1.0-7 scales_1.3.0
## [103] crayon_1.5.2 GetoptLong_1.0.5 rlang_1.1.3
## [106] KEGGREST_1.42.0