Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor bootstrapCompartments #35

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
137 changes: 68 additions & 69 deletions R/bootstrapCompartments.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,94 +22,93 @@
#' @import SummarizedExperiment
#'
#' @examples
#'
#'
#' # this needs a good example
#'
#'
#' @export
bootstrapCompartments <- function(obj, original.obj, bootstrap.samples = 1000,
chr = "chr14", assay = c("rna", "atac", "array"),
parallel = TRUE, cores = 2, targets = NULL, res = 1e6,
genome = c("hg19", "hg38", "mm9", "mm10"), q = 0.95,
svd = NULL, group = FALSE, bootstrap.means = NULL) {
#function for nonparametric bootstrap of compartments and compute 95% CIs
#check input
#match the assay args
bootstrapCompartments <- function(
obj,
original.obj,
bootstrap.samples = 1000,
chr = "chr14",
assay = c("rna", "atac", "array"),
parallel = TRUE,
cores = 2,
targets = NULL,
res = 1e6,
genome = c("hg19", "hg38", "mm9", "mm10"),
q = 0.95,
svd = NULL,
group = FALSE,
bootstrap.means = NULL
) {
# function for nonparametric bootstrap of compartments and compute 95% CIs
# check input
# match the assay args
assay <- match.arg(assay)

#double check the obj class is compatible
# double check the obj class is compatible
if (!checkAssayType(original.obj)) stop("Input needs to be a SummarizedExperiment")
#check the names of the assays

# check the names of the assays
if (!any(getAssayNames(original.obj) %in% c("counts", "Beta"))) {
message("The assay slot should contain 'counts' for atac/rna.")
stop("The assay slot should contain 'Beta' for methylation arrays.")
}
#if we are using targeted means
if (!is.null(targets)) original.obj <- original.obj[,targets]
#get the global means we are going to use
#this could theoretically break if you ask for more bootstraps here than were pre-computed...
#let's check for one more optimization

# if we are using targeted means
if (!is.null(targets)) original.obj <- original.obj[, targets]

# get the global means we are going to use
# this could theoretically break if you ask for more bootstraps here than were pre-computed...
# let's check for one more optimization
if (bootstrap.samples == ncol(bootstrap.means)) {
bmeans <- bootstrap.means
} else {
bmeans <- sample.int(bootstrap.means, size = bootstrap.samples, replace = FALSE)
colnames(bmeans) <- rep("globalMean", ncol(bmeans))
}

#if (ncol(original.obj) < 6) stop("We need more than 5 samples to bootstrap with for the results to be meaningful.")
if (!parallel) {
message("Not bootstrapping in parallel will take a long time...")
#bootstrap and recompute compartments
resamp.compartments <- lapply(1:ncol(bmeans), function(b) {
#resample the global means with replacement
message("Working on bootstrap ", b)

#get the shrunken bins with new global mean
boot.mean <- as.matrix(bmeans[,b])
colnames(boot.mean) <- "globalMean"
s.bins <- shrinkBins(obj, original.obj, prior.means = boot.mean,
chr = chr, res = res, assay = assay, genome = genome)
if (group) cor.bins <- getCorMatrix(s.bins, squeeze = FALSE)
if (isFALSE(group)) cor.bins <- getCorMatrix(s.bins, squeeze = TRUE)
#Stupid check for perfect correlation with global mean
if (any(is.na(cor.bins$binmat.cor))) {
absig <- matrix(rep(NA, nrow(cor.bins$binmat.cor)))
} else {
absig <- getABSignal(cor.bins, assay = assay)
}
return(absig)
})
} else {
}

# if (ncol(original.obj) < 6) stop("We need more than 5 samples to bootstrap with for the results to be meaningful.")
if (parallel) {
message("Bootstrapping in parallel with ", cores, " cores.")
#bootstrap and recompute compartments
resamp.compartments <- mclapply(1:ncol(bmeans), function(b) {
#get the shrunken bins with new global mean
boot.mean <- as.matrix(bmeans[,b])
colnames(boot.mean) <- "globalMean"
s.bins <- shrinkBins(obj, original.obj, prior.means = boot.mean,
chr = chr, res = res, assay = assay, genome = genome)
cor.bins <- getCorMatrix(s.bins, squeeze = TRUE)
#Stupid check for perfect correlation with global mean
if (any(is.na(cor.bins$binmat.cor))) {
absig <- matrix(rep(NA, nrow(cor.bins$binmat.cor)))
} else {
absig <- getABSignal(cor.bins, assay = assay)
}
return(absig)
}, mc.cores = cores)
} else {
message("Not bootstrapping in parallel will take a long time...")
}

#summarize the bootstraps and compute confidence intervals
resamp.compartments <- summarizeBootstraps(resamp.compartments, svd,
q = q, assay = assay)
# bootstrap and recompute compartments
resamp.compartments <- mclapply(1:ncol(bmeans), function(b) {
# get the shrunken bins with new global mean
boot.mean <- as.matrix(bmeans[, b])
colnames(boot.mean) <- "globalMean"
s.bins <- shrinkBins(
obj,
original.obj,
prior.means = boot.mean,
chr = chr,
res = res,
assay = assay,
genome = genome
)
cor.bins <- getCorMatrix(s.bins, squeeze = !group)

# Stupid check for perfect correlation with global mean
if (any(is.na(cor.bins$binmat.cor))) {
absig <- matrix(rep(NA, nrow(cor.bins$binmat.cor)))
} else {
absig <- getABSignal(cor.bins, assay = assay)
}
return(absig)
}, mc.cores = ifelse(parallel, cores, 1))

# summarize the bootstraps and compute confidence intervals
resamp.compartments <- summarizeBootstraps(resamp.compartments, svd, q = q, assay = assay)
return(resamp.compartments)
}

#helper function to re-sample
#this was inspired by https://github.com/sgibb/bootstrap/blob/master/R/helper-functions.R
.resampleMatrix <- function(x, size=ncol(x)) {
samp.to.select <- sample.int(ncol(x), size=size, replace=TRUE)
# helper function to re-sample
# this was inspired by https://github.com/sgibb/bootstrap/blob/master/R/helper-functions.R
.resampleMatrix <- function(x, size = ncol(x)) {
samp.to.select <- sample.int(ncol(x), size = size, replace = TRUE)
return(x[, samp.to.select])
}