From 866d89ce4e79e99039f736fb30462357f0f3028a Mon Sep 17 00:00:00 2001 From: James Eapen Date: Thu, 16 Jan 2025 13:53:20 -0500 Subject: [PATCH 1/3] refactor(bootstrapCompartments): collapse cor.bin squeeze bool using group bool --- R/bootstrapCompartments.R | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/R/bootstrapCompartments.R b/R/bootstrapCompartments.R index b99e2e6..c0d9270 100644 --- a/R/bootstrapCompartments.R +++ b/R/bootstrapCompartments.R @@ -71,8 +71,7 @@ bootstrapCompartments <- function(obj, original.obj, bootstrap.samples = 1000, colnames(boot.mean) <- "globalMean" s.bins <- shrinkBins(obj, original.obj, prior.means = boot.mean, chr = chr, res = res, assay = assay, genome = genome) - if (group) cor.bins <- getCorMatrix(s.bins, squeeze = FALSE) - if (isFALSE(group)) cor.bins <- getCorMatrix(s.bins, squeeze = TRUE) + cor.bins <- getCorMatrix(s.bins, squeeze = !group) #Stupid check for perfect correlation with global mean if (any(is.na(cor.bins$binmat.cor))) { absig <- matrix(rep(NA, nrow(cor.bins$binmat.cor))) @@ -90,7 +89,7 @@ bootstrapCompartments <- function(obj, original.obj, bootstrap.samples = 1000, colnames(boot.mean) <- "globalMean" s.bins <- shrinkBins(obj, original.obj, prior.means = boot.mean, chr = chr, res = res, assay = assay, genome = genome) - cor.bins <- getCorMatrix(s.bins, squeeze = TRUE) + cor.bins <- getCorMatrix(s.bins, squeeze = !group) #Stupid check for perfect correlation with global mean if (any(is.na(cor.bins$binmat.cor))) { absig <- matrix(rep(NA, nrow(cor.bins$binmat.cor))) From a143a39499c815c4db2e9988e18fa56479b88e97 Mon Sep 17 00:00:00 2001 From: James Eapen Date: Thu, 16 Jan 2025 13:59:32 -0500 Subject: [PATCH 2/3] refactor(fix_compartments): collapse parallel/serial if into one mclapply --- R/bootstrapCompartments.R | 63 ++++++++++++++------------------------- 1 file changed, 22 insertions(+), 41 deletions(-) diff --git a/R/bootstrapCompartments.R b/R/bootstrapCompartments.R index c0d9270..fc56759 100644 --- a/R/bootstrapCompartments.R +++ b/R/bootstrapCompartments.R @@ -57,52 +57,33 @@ bootstrapCompartments <- function(obj, original.obj, bootstrap.samples = 1000, bmeans <- sample.int(bootstrap.means, size = bootstrap.samples, replace = FALSE) colnames(bmeans) <- rep("globalMean", ncol(bmeans)) } - + #if (ncol(original.obj) < 6) stop("We need more than 5 samples to bootstrap with for the results to be meaningful.") - if (!parallel) { - message("Not bootstrapping in parallel will take a long time...") - #bootstrap and recompute compartments - resamp.compartments <- lapply(1:ncol(bmeans), function(b) { - #resample the global means with replacement - message("Working on bootstrap ", b) - - #get the shrunken bins with new global mean - boot.mean <- as.matrix(bmeans[,b]) - colnames(boot.mean) <- "globalMean" - s.bins <- shrinkBins(obj, original.obj, prior.means = boot.mean, - chr = chr, res = res, assay = assay, genome = genome) - cor.bins <- getCorMatrix(s.bins, squeeze = !group) - #Stupid check for perfect correlation with global mean - if (any(is.na(cor.bins$binmat.cor))) { - absig <- matrix(rep(NA, nrow(cor.bins$binmat.cor))) - } else { - absig <- getABSignal(cor.bins, assay = assay) - } - return(absig) - }) - } else { + if (parallel) { message("Bootstrapping in parallel with ", cores, " cores.") - #bootstrap and recompute compartments - resamp.compartments <- mclapply(1:ncol(bmeans), function(b) { - #get the shrunken bins with new global mean - boot.mean <- as.matrix(bmeans[,b]) - colnames(boot.mean) <- "globalMean" - s.bins <- shrinkBins(obj, original.obj, prior.means = boot.mean, - chr = chr, res = res, assay = assay, genome = genome) - cor.bins <- getCorMatrix(s.bins, squeeze = !group) - #Stupid check for perfect correlation with global mean - if (any(is.na(cor.bins$binmat.cor))) { - absig <- matrix(rep(NA, nrow(cor.bins$binmat.cor))) - } else { - absig <- getABSignal(cor.bins, assay = assay) - } - return(absig) - }, mc.cores = cores) + } else { + message("Not bootstrapping in parallel will take a long time...") } + #bootstrap and recompute compartments + resamp.compartments <- mclapply(1:ncol(bmeans), function(b) { + #get the shrunken bins with new global mean + boot.mean <- as.matrix(bmeans[,b]) + colnames(boot.mean) <- "globalMean" + s.bins <- shrinkBins(obj, original.obj, prior.means = boot.mean, + chr = chr, res = res, assay = assay, genome = genome) + cor.bins <- getCorMatrix(s.bins, squeeze = !group) + #Stupid check for perfect correlation with global mean + if (any(is.na(cor.bins$binmat.cor))) { + absig <- matrix(rep(NA, nrow(cor.bins$binmat.cor))) + } else { + absig <- getABSignal(cor.bins, assay = assay) + } + return(absig) + }, mc.cores = ifelse(parallel, cores, 1)) + #summarize the bootstraps and compute confidence intervals - resamp.compartments <- summarizeBootstraps(resamp.compartments, svd, - q = q, assay = assay) + resamp.compartments <- summarizeBootstraps(resamp.compartments, svd, q = q, assay = assay) return(resamp.compartments) } From 0a9482068ff6338ae86b196c60a42d28806dc3cb Mon Sep 17 00:00:00 2001 From: James Eapen Date: Thu, 16 Jan 2025 15:51:01 -0500 Subject: [PATCH 3/3] chore(bootstrapCompartments): style file --- R/bootstrapCompartments.R | 85 ++++++++++++++++++++++++--------------- 1 file changed, 52 insertions(+), 33 deletions(-) diff --git a/R/bootstrapCompartments.R b/R/bootstrapCompartments.R index fc56759..16d9c0f 100644 --- a/R/bootstrapCompartments.R +++ b/R/bootstrapCompartments.R @@ -22,58 +22,77 @@ #' @import SummarizedExperiment #' #' @examples -#' +#' #' # this needs a good example -#' +#' #' @export -bootstrapCompartments <- function(obj, original.obj, bootstrap.samples = 1000, - chr = "chr14", assay = c("rna", "atac", "array"), - parallel = TRUE, cores = 2, targets = NULL, res = 1e6, - genome = c("hg19", "hg38", "mm9", "mm10"), q = 0.95, - svd = NULL, group = FALSE, bootstrap.means = NULL) { - #function for nonparametric bootstrap of compartments and compute 95% CIs - #check input - #match the assay args +bootstrapCompartments <- function( + obj, + original.obj, + bootstrap.samples = 1000, + chr = "chr14", + assay = c("rna", "atac", "array"), + parallel = TRUE, + cores = 2, + targets = NULL, + res = 1e6, + genome = c("hg19", "hg38", "mm9", "mm10"), + q = 0.95, + svd = NULL, + group = FALSE, + bootstrap.means = NULL +) { + # function for nonparametric bootstrap of compartments and compute 95% CIs + # check input + # match the assay args assay <- match.arg(assay) - #double check the obj class is compatible + # double check the obj class is compatible if (!checkAssayType(original.obj)) stop("Input needs to be a SummarizedExperiment") - - #check the names of the assays + + # check the names of the assays if (!any(getAssayNames(original.obj) %in% c("counts", "Beta"))) { message("The assay slot should contain 'counts' for atac/rna.") stop("The assay slot should contain 'Beta' for methylation arrays.") } - - #if we are using targeted means - if (!is.null(targets)) original.obj <- original.obj[,targets] - - #get the global means we are going to use - #this could theoretically break if you ask for more bootstraps here than were pre-computed... - #let's check for one more optimization + + # if we are using targeted means + if (!is.null(targets)) original.obj <- original.obj[, targets] + + # get the global means we are going to use + # this could theoretically break if you ask for more bootstraps here than were pre-computed... + # let's check for one more optimization if (bootstrap.samples == ncol(bootstrap.means)) { bmeans <- bootstrap.means } else { bmeans <- sample.int(bootstrap.means, size = bootstrap.samples, replace = FALSE) colnames(bmeans) <- rep("globalMean", ncol(bmeans)) - } + } - #if (ncol(original.obj) < 6) stop("We need more than 5 samples to bootstrap with for the results to be meaningful.") + # if (ncol(original.obj) < 6) stop("We need more than 5 samples to bootstrap with for the results to be meaningful.") if (parallel) { message("Bootstrapping in parallel with ", cores, " cores.") } else { message("Not bootstrapping in parallel will take a long time...") } - #bootstrap and recompute compartments + # bootstrap and recompute compartments resamp.compartments <- mclapply(1:ncol(bmeans), function(b) { - #get the shrunken bins with new global mean - boot.mean <- as.matrix(bmeans[,b]) + # get the shrunken bins with new global mean + boot.mean <- as.matrix(bmeans[, b]) colnames(boot.mean) <- "globalMean" - s.bins <- shrinkBins(obj, original.obj, prior.means = boot.mean, - chr = chr, res = res, assay = assay, genome = genome) + s.bins <- shrinkBins( + obj, + original.obj, + prior.means = boot.mean, + chr = chr, + res = res, + assay = assay, + genome = genome + ) cor.bins <- getCorMatrix(s.bins, squeeze = !group) - #Stupid check for perfect correlation with global mean + + # Stupid check for perfect correlation with global mean if (any(is.na(cor.bins$binmat.cor))) { absig <- matrix(rep(NA, nrow(cor.bins$binmat.cor))) } else { @@ -82,14 +101,14 @@ bootstrapCompartments <- function(obj, original.obj, bootstrap.samples = 1000, return(absig) }, mc.cores = ifelse(parallel, cores, 1)) - #summarize the bootstraps and compute confidence intervals + # summarize the bootstraps and compute confidence intervals resamp.compartments <- summarizeBootstraps(resamp.compartments, svd, q = q, assay = assay) return(resamp.compartments) } -#helper function to re-sample -#this was inspired by https://github.com/sgibb/bootstrap/blob/master/R/helper-functions.R -.resampleMatrix <- function(x, size=ncol(x)) { - samp.to.select <- sample.int(ncol(x), size=size, replace=TRUE) +# helper function to re-sample +# this was inspired by https://github.com/sgibb/bootstrap/blob/master/R/helper-functions.R +.resampleMatrix <- function(x, size = ncol(x)) { + samp.to.select <- sample.int(ncol(x), size = size, replace = TRUE) return(x[, samp.to.select]) }