bigomics · ivokwee · Nov 14, 2025 · Nov 14, 2025 · Nov 14, 2025
diff --git a/data-raw/metabolic_pathways/graphite-edges.rda b/data-raw/metabolic_pathways/graphite-edges.rda
diff --git a/data-raw/metabolic_pathways/graphite-nodes.rda b/data-raw/metabolic_pathways/graphite-nodes.rda
diff --git a/data-raw/metabolic_pathways/graphite-pathways.rda b/data-raw/metabolic_pathways/graphite-pathways.rda
diff --git a/...aw/metabolic_pathways/parse_annotation2.R → ...raw/metabolic_pathways/parse_annotation.R b/...aw/metabolic_pathways/parse_annotation2.R → ...raw/metabolic_pathways/parse_annotation.R
@@ -7,11 +7,15 @@ library(dplyr)
 # Read file (from https://ftp.ebi.ac.uk/pub/databases/chebi/). This
 # database has ChEBI ID, name and definition of the metabolite. We
 # restrict our annotation to only those molecules referred in ChEBI.
-#setwd("~/Playground/playdata")
-chebi <- readr::read_tsv("./data-raw/metabolic_pathways/chebi_compounds_20240801_0501.tsv")
+
+if(!file.exists("~/Downloads/compounds.tsv.gz")) {
+  system("cd ~/Downloads && rm -f compounds.tsv.gz && wget https://ftp.ebi.ac.uk/pub/databases/chebi/flat_files/compounds.tsv.gz")  
+}
+chebi <- readr::read_tsv("~/Downloads/compounds.tsv.gz")
 head(data.frame(chebi))
 colnames(chebi)
-chebi <- chebi[c("ID", "CHEBI_ACCESSION", "NAME", "DEFINITION")]
+#chebi <- chebi[c("ID", "CHEBI_ACCESSION", "NAME", "DEFINITION")]
+chebi <- chebi[c("id", "chebi_accession", "name", "definition")]
 colnames(chebi) <- c("ID", "CHEBI_ACCESSION", "name", "definition")
 head(chebi)
 dim(chebi)
@@ -40,7 +44,9 @@ table(map$ChEBI %in% chebi$ID)
 match2 <- function(a,b)  ifelse(is.na(a), NA, match(a,b))
 
 # add pathbank ID. We need this for the Pathbank SVG pathway images.
-#system("cd ~/Downloads && wget https://pathbank.org/downloads/pathbank_all_metabolites.csv.zip")
+if(!file.exists("~/Downloads/pathbank_all_metabolites.csv.zip")) {
+  system("cd ~/Downloads && rm -f pathbank_all_metabolites.csv.zip && wget https://pathbank.org/downloads/pathbank_all_metabolites.csv.zip")
+}
 mx <- data.table::fread("~/Downloads/pathbank_all_metabolites.csv.zip")
 sum(setdiff(map$HMDB,c(NA,"","-"))  %in% mx[["HMDB ID"]])
 sum(setdiff(map$ChEBI,c(NA,"","-")) %in% mx[["ChEBI ID"]])
@@ -55,8 +61,14 @@ sum(!is.na(i2) & is.na(i1))
 ii <- ifelse(!is.na(i2), i2, i1)
 map$PATHBANK <- mx[["Metabolite ID"]][ii]
 
-# Add REFMET. Get from  https://www.metabolomicsworkbench.org/databases/refmet/browse.php. It has nice annotation of super/main/sub class. Also has lipidmaps ID.
-REFMET <- read.csv("./data-raw/metabolic_pathways/refmet_241218.csv")
+# Add REFMET. Get from
+# https://www.metabolomicsworkbench.org/databases/refmet/browse.php. It
+# has nice annotation of super/main/sub class. Also has lipidmaps ID.
+if(!file.exists("~/Downloads/refmet.csv")) {
+  message("Please download refmet.csv from https://www.metabolomicsworkbench.org/databases/refmet/browse.php")
+}
+#REFMET <- read.csv("./data-raw/metabolic_pathways/refmet_250821.csv")
+REFMET <- read.csv("~/Downloads/refmet.csv")
 dim(REFMET)
 head(REFMET)
 tail(sort(table(REFMET$refmet_name)))
@@ -108,7 +120,6 @@ dim(ANNOTATION)
 # Merge names. Notice we use AnnotHub name as default
 ANNOTATION$name[ANNOTATION$name==""] <- NA
 ANNOTATION$refmet_name[ANNOTATION$refmet_name==""] <- NA
-
 ANNOTATION$name <- ifelse(!is.na(ANNOTATION$name),ANNOTATION$name, ANNOTATION$refmet_name)
 ANNOTATION$refmet_name <- NULL
 
@@ -139,8 +150,8 @@ head(ANNOTATION)
 object.size(ANNOTATION)/1e9
 
 ## replace special-empty with NA
-ANNOTATION <- apply(
-  ANNOTATION,2, function(s) {sel=which(s %in% c('','-'));s[sel]=NA;s})
+ANNOTATION <- apply(ANNOTATION,2, function(s) {
+  sel=which(s %in% c('','-'));s[sel]=NA;s})
 ANNOTATION <- data.frame(ANNOTATION)
 
 ## rename and save
@@ -194,7 +205,3 @@ colSums(!is.na(METABOLITE_METADATA))
 dim(METABOLITE_METADATA)
 usethis::use_data(METABOLITE_METADATA, overwrite = TRUE)
 
-head(METABOLITE_ANNOTATION)
-head(METABOLITE_METADATA)
-tail(METABOLITE_METADATA)
-