Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
91 changes: 52 additions & 39 deletions R/pgx-annot.R
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,6 @@ getProbeAnnotation <- function(organism,
info("[getProbeAnnotation] annotating with custom annotation")
genes <- getCustomAnnotation2(probes0, annot_table)
} else if (datatype == "metabolomics") {
dbg("[getProbeAnnotation] annotating for metabolomics")
mx.check <- mx.check_mapping(
probes,
all.db = c("playdata", "annothub", "refmet"), check.first = TRUE
Expand All @@ -144,23 +143,19 @@ getProbeAnnotation <- function(organism,
dbg("[getProbeAnnotation] WARNING: not able to map metabolomics probes")
}
} else if (datatype == "lipidomics") {
dbg("[getProbeAnnotation] annotating for lipidomics")
## Directly annotate if probes are recognized
genes <- getLipidAnnotation(
probes,
extra_annot = TRUE,
annot_table = annot_table
)
} else if (datatype == "multi-omics") {
dbg("[getProbeAnnotation] annotating for multi-omics")
genes <- getMultiOmicsProbeAnnotation(organism, probes)
} else {
if (datatype == "proteomics") {
dbg("[getProbeAnnotation] annotating for proteomics")
is.phospho <- annotate_phospho_residue(probes, detect.only = TRUE)
genes <- getGeneAnnotation(organism = organism, probes = probes, is.phospho = is.phospho)
} else {
dbg("[getProbeAnnotation] annotating for transcriptomics")
genes <- getGeneAnnotation(organism = organism, probes = probes)
}
}
Expand Down Expand Up @@ -391,9 +386,6 @@ getGeneAnnotation.ANNOTHUB <- function(
cols <- unique(c(cols, "ENTREZID"))
}

dbg("[getGeneAnnotation.ANNOTHUB] annotation columns:", cols)
dbg("[getGeneAnnotation.ANNOTHUB] probe_type = ", probe_type)
dbg("[getGeneAnnotation.ANNOTHUB] retrieving annotation for ", length(probes), "features.")
suppressMessages(suppressWarnings(
annot <- AnnotationDbi::select(
orgdb,
Expand Down Expand Up @@ -465,17 +457,12 @@ getGeneAnnotation.ANNOTHUB <- function(
missing.probes <- missing.probes[!is.na(missing.probes)]
length(missing.probes)
if (second.pass && length(missing.probes)) {
dbg(
"[getGeneAnnotation.ANNOTHUB] second pass: retrying missing",
length(missing.probes), "symbols..."
)
missing.probe_type <- try(suppressWarnings(suppressMessages(
detect_probetype(organism, missing.probes, orgdb = orgdb)
)), silent = TRUE)
if (inherits(missing.probe_type, "try-error")) {
missing.probe_type <- NULL
}
dbg("[getGeneAnnotation.ANNOTHUB] missing.probe_type=", missing.probe_type)

## only do second try if missing.probetype is different

Expand Down Expand Up @@ -965,10 +952,6 @@ getCustomAnnotation2 <- function(probes, custom_annot, feature.col = "feature",
), 1)
if (length(ortholog.col) == 0) ortholog.col <- NA
}
dbg("[getCustomAnnotation2] feature.col = ", feature.col)
dbg("[getCustomAnnotation2] symbol.col = ", symbol.col)
dbg("[getCustomAnnotation2] title.col = ", gene_title.col)
dbg("[getCustomAnnotation2] ortholog.col = ", ortholog.col)

features <- custom_annot[, feature.col]
custom_annot <- custom_annot[match(probes, features), ]
Expand Down Expand Up @@ -1020,13 +1003,40 @@ getCustomAnnotation2 <- function(probes, custom_annot, feature.col = "feature",
## ================== GET ORTHOLOG FUNCTIONS ======================================
## ================================================================================

biomaRt.useEnsembl <- function(biomart, dataset=NULL,
mirrors = c("www","useast","asia")) {
mirror.ok <- rep(NA, length(mirrors))
names(mirror.ok) <- mirrors
m="www"
for(m in mirrors) {
resp <- NULL
resp <- biomaRt::useEnsembl(biomart = biomart, mirror = m)
mirror.ok[m] <- !(resp@host %in% c(NA,"",NULL))
}
mirror.ok
if(!any(mirror.ok)) {
message("WARNING: could not reach any biomart mirror")
return(NULL)
}
sel.mirror <- head(names(which(mirror.ok)),1)
sel.mirror
if(is.null(dataset)) {
mart <- biomaRt::useEnsembl(biomart = biomart, mirror = sel.mirror)
} else {
mart <- biomaRt::useEnsembl(biomart = biomart, dataset = dataset, mirror = sel.mirror)
}
return(mart)
}


#' @title Get human ortholog from given symbols of organism by using
#' orthogene package. This package needs internet connection.
#'
#' @export
getHumanOrtholog <- function(organism, symbols,
ortho.methods = c("homologene", "gprofiler", "babelgene"),
verbose = 1) {

orthogenes <- rep(NA, length(symbols))
orthosource <- rep(NA, length(symbols))

Expand Down Expand Up @@ -1096,8 +1106,7 @@ getHumanOrtholog <- function(organism, symbols,
nrow(ortho.out) > 0)
results.ok
if (results.ok) {
ii <- which(is.na(orthogenes))
## ii <- head(which(is.na(orthogenes)),10)
ii <- which(is.na(orthogenes)) ## still unmapped
jj <- match(symbols[ii], ortho.out$input_gene)
kk <- ii[which(!is.na(jj))]
jj <- jj[which(!is.na(jj))]
Expand All @@ -1110,26 +1119,24 @@ getHumanOrtholog <- function(organism, symbols,
ortho.found <- all(!is.na(orthogenes))
i <- i + 1
}

table(is.na(orthogenes))
mean.mapped <- round(100 * mean(!is.na(orthogenes)), digits = 4)
orthogene.failed <- (mean.mapped < 10.0)
orthogene.failed
if (orthogene.failed) {
if (verbose > 0) message("[getHumanOrtholog] ratio mapped using orthogene = ", mean.mapped, "%")
if (verbose > 0) message("[getHumanOrtholog] ratio mapped using orthogene = ", mean.mapped, "%")

if (any(is.na(orthogenes))) {
if (verbose > 0) message("[getHumanOrtholog] Trying biomart...")
## test if biomart is reachable
ii <- which(is.na(symbols))
ii <- which(is.na(orthogenes))
res.biomart <- try(getHumanOrtholog.biomart(organism, symbols[ii]), silent = TRUE)
class(res.biomart)
if (!"try-error" %in% class(res.biomart)) {
jj <- which(is.na(res.biomart))
ii <- ii[jj]
orthogenes[ii] <- res.biomart[jj]
orthosource[ii] <- "biomart"
} else {
if (verbose > 0) message("[getHumanOrtholog] biomart failed...")
}
} else {
if (verbose > 0) message("[getHumanOrtholog] skipping biomart...")
mean.mapped <- round(100 * mean(!is.na(orthogenes)), digits = 4)
message("[getHumanOrtholog] ratio mapped after biomart = ", mean.mapped, "%")
}

## Map any missing symbols that look like human genes
Expand Down Expand Up @@ -1166,7 +1173,13 @@ getHumanOrtholog.biomart <- function(organism, symbols, verbose = 1) {
organism0 <- paste0(s1, s2)
organism0
if (verbose > 0) message("[getHumanOrtholog.biomart] Searching biomart for '", organism0, "'")
dd <- listDatasets(useEnsembl(biomart = "genes"))

mart <- biomaRt.useEnsembl(biomart = "genes")
if(is.null(mart)) {
message("ERROR: no biomart mirrors reachable. Exiting.")
return(NULL)
}
dd <- listDatasets(mart)
hh <- grep(organism0, dd$dataset)
hh
if (length(hh) == 0) {
Expand All @@ -1178,8 +1191,8 @@ getHumanOrtholog.biomart <- function(organism, symbols, verbose = 1) {
if (verbose > 0) message("[getHumanOrtholog.biomart] found matching dataset '", dataset, "'")
organism_mart <- NULL
human_mart <- NULL
organism_mart <- biomaRt::useEnsembl(biomart = "genes", dataset = dataset)
human_mart <- biomaRt::useEnsembl(biomart = "genes", dataset = "hsapiens_gene_ensembl")
organism_mart <- biomaRt.useEnsembl(biomart = "genes", dataset = dataset)
human_mart <- biomaRt.useEnsembl(biomart = "genes", dataset = "hsapiens_gene_ensembl")
mart1.ok <- (!is.null(organism_mart) && !"try-error" %in% class(organism_mart) &&
class(organism_mart) == "Mart")
mart2.ok <- (!is.null(human_mart) && !"try-error" %in% class(human_mart) &&
Expand Down Expand Up @@ -1422,7 +1435,7 @@ detect_probetype <- function(organism, probes, orgdb = NULL,
if (tolower(organism) == "rat") organism <- "Rattus norvegicus"

if (is.null(datatype) && all(grepl("[:]", probes))) {
dbg("[detect_probetype] datatype is multi-omics")
dbg("[detect_probetype] datatype is multi-omics?")
datatype <- "multi-omics"
}

Expand Down Expand Up @@ -2454,8 +2467,7 @@ getMultiOmicsProbeAnnotation <- function(organism, probes) {
dtype <- ifelse(grepl("chebi|hmdb|kegg|pubchem|lipid|refmet", dtype), "mx", dtype)
table(dtype)
dtype[!dtype %in% c("gx","px","mx")] <- "custom"
table(dtype)
dbg("[getMultiOmicsProbeAnnotation] dtypes = ", unique(dtype))
dbg("[getMultiOmicsProbeAnnotation] detected datatypes = ", unique(dtype))

## populate with defaults
symbol <- sub("^[a-zA-Z]+:", "", probes)
Expand All @@ -2467,7 +2479,7 @@ getMultiOmicsProbeAnnotation <- function(organism, probes) {
aa <- getGeneAnnotation(organism, pp)
head(aa)
aa$data_type <- sub(":.*", "", probes[ii])
rownames(aa) <- probes[ii]
##rownames(aa) <- probes[ii]
aa$feature <- probes[ii]
annot[['gx']] <- aa
}
Expand All @@ -2478,7 +2490,7 @@ getMultiOmicsProbeAnnotation <- function(organism, probes) {
pp <- sub("^[a-zA-Z]+:", "", probes[ii])
aa <- getMetaboliteAnnotation(pp)
aa$data_type <- "mx"
rownames(aa) <- probes[ii]
##rownames(aa) <- probes[ii]
aa$feature <- probes[ii]
annot[['mx']] <- aa
}
Expand All @@ -2488,7 +2500,7 @@ getMultiOmicsProbeAnnotation <- function(organism, probes) {
aa <- getCustomAnnotation(pp, custom_annot = NULL)
head(aa)
aa$data_type <- "custom"
rownames(aa) <- probes[ii]
##rownames(aa) <- probes[ii]
aa$feature <- probes[ii]
annot[['custom']] <- aa
}
Expand All @@ -2507,8 +2519,9 @@ getMultiOmicsProbeAnnotation <- function(organism, probes) {
annot <- annot[match(probes, annot$feature), ]
rownames(annot) <- make_unique(probes)
head(annot)

## fill NA
annot$symbol[annot$symbol %in% c("-","")] <- NA
symbolx <- paste0("{",symbol,"}")
annot$human_ortholog[which(annot$human_ortholog == "")] <- NA
annot$feature <- ifelse(is.na(annot$feature), probes, annot$feature)
Expand Down
Loading