Skip to content
1 change: 1 addition & 0 deletions R/pgx-api.R
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,7 @@ pgx.getFamilies <- function(pgx, nmin = 10, extended = FALSE) {

#' @export
pgx.getTopDrugs <- function(pgx, ct, n = 10, dir = 1, na.rm = TRUE, db = 1) {
if(is.null(pgx$drugs)) return(NULL)
x <- pgx$drugs[[db]]$X[, ct]
q <- pgx$drugs[[db]]$Q[, ct]
annot <- pgx$drugs[[db]]$annot[, ]
Expand Down
68 changes: 40 additions & 28 deletions R/pgx-functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -1464,7 +1464,8 @@ pgx.getGeneSetCollections <- function(gsets = rownames(playdata::GSETxGENE)) {
#' @export
filterProbes <- function(annot, genes) {
## check probe name, short probe name or gene name for match
p0 <- (toupper(sub(".*:", "", rownames(annot))) %in% toupper(genes))
a0 <- mofa.strip_prefix(rownames(annot))
p0 <- (toupper(a0) %in% toupper(genes))
p1 <- (toupper(rownames(annot)) %in% toupper(genes))

p_list <- list(p0, p1)
Expand Down Expand Up @@ -1508,10 +1509,10 @@ rename_by2 <- function(counts, annot_table, new_id = "symbol",

##new_id="symbol";na.rm=TRUE;unique=TRUE;keep.prefix=FALSE

## add rownames
## add rownames and extra columns
annot_table$rownames <- rownames(annot_table)
annot_table$rownames2 <- sub("^[A-Za-z]+:", "", rownames(annot_table)) ## strip prefix

if (is.matrix(counts) || inherits(counts, "Matrix") ||
is.data.frame(counts) || !is.null(dim(counts))) {
type <- "matrix"
Expand All @@ -1520,42 +1521,51 @@ rename_by2 <- function(counts, annot_table, new_id = "symbol",
type <- "vector"
probes <- names(counts)
}
probe_match <- apply(annot_table, 2, function(x) sum(probes %in% x))
if (max(probe_match, na.rm = TRUE) == 0) {
return(counts)
}

if( type == "vector") {
counts <- cbind(counts)
}

from_id <- names(which.max(probe_match))
## handle old style annot without symbol column
if(new_id=="symbol" && !"symbol" %in% colnames(annot_table) &&
"gene_name" %in% colnames(annot_table)) new_id <- "gene_name"

## dummy do-noting return
if (new_id == from_id) {
sel <- which(probes %in% annot_table[,from_id])
counts <- counts[sel, , drop=FALSE]
if(type == 'vector') counts <- counts[, 1]
return(counts)
## strip prefix ??
probes0 <- probes
probes <- mofa.strip_prefix(probes0)

## iterative matching of probes.
idx <- rep(NA, length(probes))
names(idx) <- probes0
probe_match <- apply(annot_table, 2, function(x) sum(probes %in% x))
match.cols <- names(sort(probe_match,decreasing=TRUE))
pp <- probes
for (k in match.cols) {
##from_id <- names(which.max(probe_match))
from <- annot_table[, k]
ii <- match(pp, from)
if(any(!is.na(ii))) {
kk <- which(!is.na(ii))
idx[match(pp[kk],probes)] <- ii[kk]
}
pp <- probes[is.na(idx)]
if(length(pp)==0) break
}

if (type == "vector") {
## bail out if no match at all
if(all(is.na(idx))) {
return(counts)
}

## make sure matrix
if( type == "vector") {
counts <- cbind(counts)
}

keep.prefix <- (keep.prefix && all(grepl(":", probes)))

from <- annot_table[, from_id]
## if (!any(duplicated(from)) || unique) {
ii <- match(probes, from)

## create matched counts/data table
keep.prefix <- (keep.prefix && all(grepl(":", probes0)))
if (keep.prefix) {
dt <- mofa.get_prefix(probes)
new.name <- annot_table[ii, new_id]
dt <- mofa.get_prefix(probes0)
new.name <- annot_table[idx, new_id]
new.name <- paste0(dt, ":", new.name)
} else {
new.name <- annot_table[ii, new_id]
new.name <- annot_table[idx, new_id]
}
rownames(counts) <- new.name

Expand Down Expand Up @@ -2606,3 +2616,5 @@ match.dataframe <- function(id, df, parallel=TRUE) {
## ==========================================================================
## ==================== END OF FILE =========================================
## ==========================================================================


65 changes: 43 additions & 22 deletions R/pgx-mofa.R
Original file line number Diff line number Diff line change
Expand Up @@ -899,45 +899,60 @@ mofa.split_data <- function(X, keep.prefix = FALSE) {
xx
}

#' This merges list of multi-omics data to a single matrix. Columns
#' must match. Merge data by row after adding prefix.
#'
#' @export
mofa.merge_data <- function(xx) {
do.call(rbind, mofa.prefix(xx))
}


#' This merges list of multi-omics data to a single matrix. Note that
#' it can handle non-matched data by taking union of rownames or
#' colnames and extending the final matrix. Be careful it can
#' introduce NA in such non-matched cases.
#'
#' @export
mofa.merge_data2 <- function(xdata, prefix.rows=NULL, prefix.cols=NULL) {
mofa.merge_data2 <- function(xdata, merge.rows="prefix", merge.cols="union") {
n1 <- length(Reduce(intersect,lapply(xdata,rownames)))
n2 <- length(Reduce(intersect,lapply(xdata,colnames)))
c(n1,n2)
if(n1 && n2) {
message("WARNING: matrices are overlapping both in rows and columns")
rdim <- sapply(xdata,nrow)
cdim <- sapply(xdata,ncol)
if(n1 < min(rdim) && merge.rows!="prefix") {
message("WARNING: rows do not match")
}
if(is.null(prefix.cols)) prefix.cols <- (n1 > 0 && n2 > 0)
if(is.null(prefix.rows)) prefix.rows <- (n1 > 0 && n2 > 0)
if(n2 < min(cdim) && merge.cols!="prefix") {
message("WARNING: columns do not match")
}
prefix.rows <- (merge.rows=="prefix")
prefix.cols <- (merge.cols=="prefix")
if(prefix.cols) {
## if rows overlap (i.e. same genes), prefix the column names
## (i.e. different datasets)
## prefix the column names. i.e. different datasets.
for(i in 1:length(xdata)) {
nn <- sub("[A-Za-z]+:","",colnames(xdata[[i]]))
nn <- sub("^[A-Za-z]+:","",colnames(xdata[[i]]))
colnames(xdata[[i]]) <- paste0(names(xdata)[i],":",nn)
}
merge.cols <- "union"
}
if(prefix.rows) {
## if columns overlap (i.e. same samples), prefix the feature
## names.
for(i in 1:length(xdata)) {
nn <- sub("[A-Za-z]+:","",rownames(xdata[[i]]))
nn <- sub("^[A-Za-z]+:","",rownames(xdata[[i]]))
rownames(xdata[[i]]) <- paste0(names(xdata)[i],":",nn)
}
merge.rows <- "union"
}
if(merge.rows == "intersect") {
allfeatures <- Reduce(intersect,lapply(xdata,rownames))
} else {
allfeatures <- unique(unlist(lapply(xdata, rownames)))
}
if(merge.cols == "intersect") {
allsamples <- Reduce(intersect,lapply(xdata,colnames))
} else {
allsamples <- unique(unlist(lapply(xdata, colnames)))
}
allfeatures <- unique(unlist(lapply(xdata, rownames)))
allsamples <- unique(unlist(lapply(xdata, colnames)))
D <- matrix(0, length(allfeatures), length(allsamples))
nn <- matrix(0, length(allfeatures), length(allsamples))
rownames(D) <- allfeatures
Expand Down Expand Up @@ -987,11 +1002,11 @@ mofa.get_prefix <- function(x) {
#' @export
mofa.strip_prefix <- function(xx) {
if (class(xx) == "character") {
xx <- sub("[A-Za-z0-9]+:", "", xx)
xx <- sub("^[A-Za-z0-9]+:", "", xx)
return(xx)
}
if (class(xx) == "matrix") {
rownames(xx) <- sub("[A-Za-z0-9]+:", "", rownames(xx))
rownames(xx) <- sub("^[A-Za-z0-9]+:", "", rownames(xx))
return(xx)
}
if (class(xx) %in% c("list", "array") || is.list(xx)) {
Expand Down Expand Up @@ -2652,6 +2667,13 @@ lasagna.create_model <- function(data, pheno="pheno", ntop=1000, nc=20,
add.sink=FALSE, intra=TRUE, fully_connect=FALSE,
add.revpheno = TRUE, condition.edges=TRUE
) {
if(0) {
pheno="pheno"; ntop=1000; nc=20;
annot=NULL; use.gmt=TRUE; use.graphite=TRUE;
add.sink=FALSE; intra=TRUE; fully_connect=FALSE;
add.revpheno = TRUE; condition.edges=TRUE
}

if (pheno == "pheno") {
Y <- expandPhenoMatrix(data$samples, drop.ref=FALSE)
} else if (pheno == "expanded") {
Expand Down Expand Up @@ -2686,7 +2708,8 @@ lasagna.create_model <- function(data, pheno="pheno", ntop=1000, nc=20,
}

## what about not overlapping samples??
X <- mofa.merge_data2(xx, prefix.rows=TRUE, prefix.cols=FALSE)
#X <- mofa.merge_data(xx)
X <- mofa.merge_data2(xx, merge.rows="prefix", merge.cols="union")
##remove(xx)
kk <- intersect(colnames(X),rownames(Y))
X <- X[,kk]
Expand All @@ -2700,7 +2723,7 @@ lasagna.create_model <- function(data, pheno="pheno", ntop=1000, nc=20,
## Compute BIG correlation matrix. WARNING can become huge! NOTE:
## Needs optimization using SPARSE matrix.
suppressWarnings( R <- cor(t(X), use = "pairwise") )

## Sink/source need to be connected allways
ii <- grep("SINK|SOURCE",rownames(R))
if(length(ii)) {
Expand Down Expand Up @@ -2763,6 +2786,7 @@ lasagna.create_model <- function(data, pheno="pheno", ntop=1000, nc=20,

## mask for GSETS/pathways connections???
if (use.gmt) {
### fill me
}

## define layers
Expand Down Expand Up @@ -3012,7 +3036,8 @@ lasagna.plot3D <- function(graph, pos) {
#' @export
lasagna.prune_graph <- function(graph, ntop = 100, layers = NULL,
normalize.edges = FALSE, min.rho = 0.3,
edge.sign = "both", edge.type = "both",
edge.sign = c("both","pos","neg","consensus")[1],
edge.type = c("both","inter","intra","both2")[1],
filter = NULL,
prune = TRUE) {

Expand Down Expand Up @@ -3565,10 +3590,6 @@ mofa.normalizeExpression <- function(X, method1="maxMedian", method2="none") {
return(normX)
}





## ======================================================================
## ======================================================================
## ======================================================================
Loading
Loading