Skip to content

Commit

Permalink
Did work to conclude the bioinformatics to-do items described in the …
Browse files Browse the repository at this point in the history
…Word document of revision plans
  • Loading branch information
Kris Alavattam committed Sep 26, 2023
1 parent 6de56de commit 2d8ee66
Show file tree
Hide file tree
Showing 2 changed files with 173 additions and 24 deletions.
90 changes: 82 additions & 8 deletions results/2023-0215/rough-draft_draw_scatter-plots.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ options(ggrepel.max.overlaps = Inf)

# Initialize functions and themes ============================================
load_RDS_mRNA <- function(
p_RDS = "notebook/KA.2023-0608-0703.rds-data-objects_min-4-cts-all-but-1-samps",
p_RDS = "notebook/KA.2023-0608-0714.rds-data-objects_min-4-cts-all-but-1-samps",
p_mRNA = "rds_mRNA",
f_RDS
) {
Expand All @@ -22,7 +22,7 @@ load_RDS_mRNA <- function(


load_RDS_ncRNA <- function(
p_RDS = "notebook/KA.2023-0608-0703.rds-data-objects_min-4-cts-all-but-1-samps",
p_RDS = "notebook/KA.2023-0608-0714.rds-data-objects_min-4-cts-all-but-1-samps",
p_ncRNAcm = "rds_pa-ncRNA-collapsed-merged",
f_RDS
) {
Expand Down Expand Up @@ -490,7 +490,7 @@ p_exp <- "2022-2023_RRP6-NAB3/results/2023-0215"
setwd(paste(p_base, p_exp, sep = "/"))

# Load RDS files
p_RDS <- "notebook/KA.2023-0608-0703.rds-data-objects_min-4-cts-all-but-1-samps"
p_RDS <- "notebook/KA.2023-0608-0714.rds-data-objects_min-4-cts-all-but-1-samps"
p_mRNA <- "rds_mRNA"
p_mRNA_AS <- "rds_mRNA-AS"
p_ncRNAcm <- "rds_ncRNA-collapsed"
Expand All @@ -507,7 +507,7 @@ if(base::isTRUE(load_mRNA_N_Q_r6n)) {
))
}

load_mRNA_SS_Q_r6n <- FALSE #ARGUMENT
load_mRNA_SS_Q_r6n <- TRUE #ARGUMENT
if(base::isTRUE(load_mRNA_SS_Q_r6n)) {
mRNA_SS_Q_r6n <- readRDS(paste(
p_RDS,
Expand All @@ -527,7 +527,7 @@ if(base::isTRUE(load_mRNA_SS_G1_r6n)) {
))
}

load_mRNA_N_Q_n3d <- FALSE #ARGUMENT
load_mRNA_N_Q_n3d <- TRUE #ARGUMENT
if(base::isTRUE(load_mRNA_N_Q_n3d)) {
mRNA_N_Q_n3d <- readRDS(paste(
p_RDS,
Expand Down Expand Up @@ -559,7 +559,7 @@ if(base::isTRUE(load_ncRNAcm_N_Q_r6n)) {
))
}

load_ncRNAcm_SS_Q_r6n <- FALSE #ARGUMENT
load_ncRNAcm_SS_Q_r6n <- TRUE #ARGUMENT
if(base::isTRUE(load_ncRNAcm_SS_Q_r6n)) {
ncRNAcm_SS_Q_r6n <- readRDS(paste(
p_RDS,
Expand All @@ -579,7 +579,7 @@ if(base::isTRUE(load_ncRNAcm_SS_G1_r6n)) {
))
}

load_ncRNAcm_N_Q_n3d <- FALSE #ARGUMENT
load_ncRNAcm_N_Q_n3d <- TRUE #ARGUMENT
if(base::isTRUE(load_ncRNAcm_N_Q_n3d)) {
ncRNAcm_N_Q_n3d <- readRDS(paste(
p_RDS,
Expand Down Expand Up @@ -709,7 +709,7 @@ if(base::isTRUE(`load_R64-etc_SS_Q_n3d`)) {
rm(list = ls(pattern = "p_"))


#TODO Plot/assess the following comparisons ===================================
#DONE Plot/assess the following comparisons ===================================
#+ 5 - SS_Q_r6n ~ SS_Q_n3d (y ~ x):
#+ 4 - N_Q_n3d ~ N_Q_n3d_AS (y ~ x): mRNA_N_Q_n3d, `mRNA-AS_N_Q_n3d`
#+ 1 - SS_G1_r6n ~ SS_Q_r6n (y ~ x): mRNA_SS_G1_r6n, mRNA_SS_Q_r6n; ncRNAcm_SS_G1_r6n, ncRNAcm_SS_Q_r6n
Expand Down Expand Up @@ -808,6 +808,43 @@ if(base::isTRUE(run)) {
)
}

# Supplemental table related to Fig. 4B (for revision) for mRNA
run <- TRUE
if(base::isTRUE(run)) {
x <- mRNA_N_Q_n3d$`09_lfc_0.58_fc_1.5`$`04_t_DGE_unshrunken`
y <- mRNA_SS_Q_r6n$`09_lfc_0.58_fc_1.5`$`04_t_DGE_unshrunken`

colnames(x)[6:11] <- paste("x", colnames(x)[6:11], sep = "_")
# colnames(x)

colnames(y)[6:11] <- paste("y", colnames(y)[6:11], sep = "_")
# colnames(y)

vec_cols <- c(
"genome", "seqnames", "start", "end", "width", "strand", "type",
"features", "names", "thorough"
)
df <- dplyr::full_join(x, y, by = vec_cols) %>%
dplyr::relocate(vec_cols, .before = "x_baseMean")

model <- lm(
y_log2FoldChange ~ x_log2FoldChange, data = df, na.action = na.omit
)
df$reg_line <- predict(model, newdata = df)
df <- df %>%
dplyr::mutate(
trend = dplyr::case_when(
y_log2FoldChange > reg_line ~ "y value above reg. line",
y_log2FoldChange < reg_line ~ "y value below reg. line",
TRUE ~ NA
)
)
check_trend <- TRUE
if(base::isTRUE(check_trend)) df$trend %>% table() %>% print()

readr::write_tsv(df, "~/Desktop/supp-tbl_Fig-4B_mRNA.tsv")
}

# Fig. 4B: Look at SS Q rrp6∆ ~ N Q nab3d for pa-ncRNA
#+ i.e., ncRNAcm_SS_Q_r6n ~ ncRNAcm_N_Q_n3d
run <- FALSE
Expand All @@ -825,6 +862,43 @@ if(base::isTRUE(run)) {
)
}

# Supplemental table related to Fig. 4B (for revision) for pa-ncRNA
run <- TRUE
if(base::isTRUE(run)) {
x <- ncRNAcm_N_Q_n3d$`09_lfc_0.58_fc_1.5`$`04_t_DGE_unshrunken`
y <- ncRNAcm_SS_Q_r6n$`09_lfc_0.58_fc_1.5`$`04_t_DGE_unshrunken`

colnames(x)[6:11] <- paste("x", colnames(x)[6:11], sep = "_")
# colnames(x)

colnames(y)[6:11] <- paste("y", colnames(y)[6:11], sep = "_")
# colnames(y)

vec_cols <- c(
"genome", "seqnames", "start", "end", "width", "strand", "type",
"features", "names", "thorough"
)
df <- dplyr::full_join(x, y, by = vec_cols) %>%
dplyr::relocate(vec_cols, .before = "x_baseMean")

model <- lm(
y_log2FoldChange ~ x_log2FoldChange, data = df, na.action = na.omit
)
df$reg_line <- predict(model, newdata = df)
df <- df %>%
dplyr::mutate(
trend = dplyr::case_when(
y_log2FoldChange > reg_line ~ "y value above reg. line",
y_log2FoldChange < reg_line ~ "y value below reg. line",
TRUE ~ NA
)
)
check_trend <- TRUE
if(base::isTRUE(check_trend)) df$trend %>% table() %>% print()

readr::write_tsv(df, "~/Desktop/supp-tbl_Fig-4B_pancRNA.tsv")
}

# Fig. 4_: Look at SS Q rrp6∆ ~ SS Q nab3d for mRNA
#+ i.e., mRNA_SS_Q_r6n ~ mRNA_SS_Q_n3d
run <- FALSE
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ perform_lm_LOESS_MW_etc <- function(
# Perform debugging
debug <- FALSE
if(base::isTRUE(debug)) {
df <- t_Tr_Q
df <- t_mRNA
degree <- 2
span <- 0.66
draw_density <- FALSE
Expand Down Expand Up @@ -181,7 +181,7 @@ perform_lm_LOESS_MW_etc <- function(
xlab("theoretical quantiles") +
ylab("Q sample quantiles") +
theme_AG_boxed_no_legend

# Initialize objects for sample and theoretical residual quantiles
qq_G1_val <- qqnorm(residuals(lm_G1), plot.it = FALSE)
qq_Q_val <- qqnorm(residuals(lm_Q), plot.it = FALSE)
Expand Down Expand Up @@ -451,10 +451,59 @@ perform_lm_LOESS_MW_etc <- function(
ylim(c(y_low, y_high)) +
labs(x = x_lab, y = y_lab) +
theme_AG_boxed_no_legend

if(base::isTRUE(debug)) lm_fit_plot


# Create table of features above, below, or at the regression line -------
#+ With N on y and SS on x, values above the regression line are "actively
#+ degraded" [higher in N (y), lower in SS (x)]; values below the
#+ regression line are "stabilized" [lower in N (y), higher in SS (x)]
df <- df %>%
dplyr::mutate(
log2_pseudo_G1_N = log2(G1_N + 1),
log2_pseudo_G1_SS = log2(G1_SS + 1),
log2_pseudo_Q_N = log2(Q_N + 1),
log2_pseudo_Q_SS = log2(Q_SS + 1)
)

model_G1 <- lm(log2_pseudo_G1_N ~ log2_pseudo_G1_SS, data = df)
model_Q <- lm(log2_pseudo_Q_N ~ log2_pseudo_Q_SS, data = df)

df$G1_reg_line <- predict(model_G1, newdata = df)
df$Q_reg_line <- predict(model_Q, newdata = df)

df <- df %>%
dplyr::mutate(
est_trans_kin_G1 = dplyr::case_when(
log2(G1_N + 1) > G1_reg_line ~ "degraded",
log2(G1_N + 1) < G1_reg_line ~ "stabilized",
TRUE ~ "same"
),
est_trans_kin_Q = dplyr::case_when(
log2(Q_N + 1) > Q_reg_line ~ "degraded",
log2(Q_N + 1) < Q_reg_line ~ "stabilized",
TRUE ~ "same"
),
est_kin_together = dplyr::case_when(
est_trans_kin_G1 == "stabilized" &
est_trans_kin_Q == "stabilized" ~ "stabilized in both",
est_trans_kin_G1 == "degraded" &
est_trans_kin_Q == "degraded" ~ "degraded in both",
est_trans_kin_G1 == "degraded" &
est_trans_kin_Q == "stabilized" ~ "stabilized in Q",
est_trans_kin_G1 == "stabilized" &
est_trans_kin_Q == "degraded" ~ "degraded in Q"
)
)

check_trans_kin <- FALSE
if(base::isTRUE(check_trans_kin)) {
df$est_trans_kin_G1 %>% table() %>% print()
df$est_trans_kin_Q %>% table() %>% print()
df$est_kin_together %>% table() %>% print()
}


# Fit LOESS models on regularized TPM values -----------------------------
#+ ...where, for the G1 group, we're regressing log2(G1_N + 1) on
#+ log2(G1_SS + 1) and, for the Q group, we're regressing log2(Q_N + 1) on
Expand Down Expand Up @@ -638,7 +687,6 @@ perform_lm_LOESS_MW_etc <- function(
ylim(c(y_low, y_high)) +
labs(x = x_lab, y = y_lab) +
theme_AG_boxed_no_legend

if(base::isTRUE(debug)) loess_fit_plot


Expand All @@ -652,6 +700,7 @@ perform_lm_LOESS_MW_etc <- function(
results_list[["01_p_MWU_G1-SS_Q-SS"]] <- `p_MWU_G1-SS_Q-SS`
results_list[["01_p_MWU_G1-N_G1-SS"]] <- `p_MWU_G1-N_G1-SS`
results_list[["01_p_MWU_Q-N_Q-SS"]] <- `p_MWU_Q-N_Q-SS`
results_list[["02_df"]] <- df
results_list[["02_df_G1"]] <- df_G1
results_list[["02_df_Q"]] <- df_Q
results_list[["03_lm_G1"]] <- lm_G1
Expand Down Expand Up @@ -803,15 +852,17 @@ t_Tr_Q <- calculate_mean_TPMs(t_Tr_Q)
t_Tr_G1 <- calculate_mean_TPMs(t_Tr_G1)


#HERE
# Run linear and LOESS regressions, and statistical tests ====================
run <- FALSE #ARGUMENT
run <- TRUE #ARGUMENT
if(base::isTRUE(run)) {
etc_mRNA <- perform_lm_LOESS_MW_etc(t_mRNA)
etc_pancRNA <- perform_lm_LOESS_MW_etc(t_pancRNA)
etc_Tr_Q <- perform_lm_LOESS_MW_etc(t_Tr_Q)
etc_Tr_G1 <- perform_lm_LOESS_MW_etc(t_Tr_G1)

for(i in 1:4) {
# i <- 1
if(i == 1) {
etc <- etc_mRNA
label <- "mRNA"
Expand Down Expand Up @@ -839,22 +890,46 @@ if(base::isTRUE(run)) {
filename = paste0("lm-fit_", label, ".pdf")
)

print_regression_plot(
object = etc[["12_loess_scatter_G1"]],
filename = paste("loess-scatter", label, "G1.pdf", sep = "_")
)
print_regression_plot(
object = etc[["12_loess_scatter_Q"]],
filename = paste("loess-scatter", label, "Q.pdf", sep = "_")
)
print_regression_plot(
object = etc[["12_loess_fit_plot"]],
filename = paste0("loess-fit_", label, ".pdf")
# print_regression_plot(
# object = etc[["12_loess_scatter_G1"]],
# filename = paste("loess-scatter", label, "G1.pdf", sep = "_")
# )
# print_regression_plot(
# object = etc[["12_loess_scatter_Q"]],
# filename = paste("loess-scatter", label, "Q.pdf", sep = "_")
# )
# print_regression_plot(
# object = etc[["12_loess_fit_plot"]],
# filename = paste0("loess-fit_", label, ".pdf")
# )

supp_tbl <- etc[["02_df"]]
colnames(supp_tbl)[12:27] <- c(
paste("TPM", c(
"G1_N_rep1", "G1_N_rep2", "G1_SS_rep1", "G1_SS_rep2",
"Q_N_rep1", "Q_N_rep2", "Q_SS_rep1", "Q_SS_rep2"
), sep = "_"),
paste("mean_TPM", c("G1_N", "G1_SS", "Q_N", "Q_SS"), sep = "_"),
paste("log2_pseudo_mean_TPM", c(
"G1_N", "G1_SS", "Q_N", "Q_SS")
, sep = "_")
)

supp_tbl_filename <- paste0("~/Desktop/", "supp-tbl_", label, ".tsv")
readr::write_tsv(supp_tbl, supp_tbl_filename, na = "#N/A")
}
}


# etc_mRNA$`02_df`$est_trans_kin_G1 %>% table() %>% print()
# etc_mRNA$`02_df`$est_trans_kin_Q %>% table() %>% print()
# etc_mRNA$`02_df`$est_kin_together %>% table() %>% print()
#
# etc_pancRNA$`02_df`$est_trans_kin_G1 %>% table() %>% print()
# etc_pancRNA$`02_df`$est_trans_kin_Q %>% table() %>% print()
# etc_pancRNA$`02_df`$est_kin_together %>% table() %>% print()


# Examine Nab3-AID/Parent log2(FC) w/r/to Parent TPM =========================
# Load necessary RDS file
p_RDS <- "notebook/KA.2023-0608-0703.rds-data-objects_min-4-cts-all-but-1-samps"
Expand Down

0 comments on commit 2d8ee66

Please sign in to comment.