diff --git a/NAMESPACE b/NAMESPACE
index c6f34d57..b11de313 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -14,12 +14,17 @@ export(abbreviate_pheno)
export(add_opacity)
export(ai.ask)
export(ai.ask_ellmer)
+export(ai.create_ellmer_chat)
+export(ai.create_report)
export(ai.genesets_keywords)
export(ai.genesets_summary)
export(ai.get_models)
export(ai.get_ollama_models)
export(ai.get_remote_models)
export(ai.model_is_available)
+export(ai.tool_get_current_time)
+export(ai.tool_get_expression)
+export(ai.tool_plot_volcano)
export(alias2hugo)
export(allSpecies)
export(allSpecies.ANNOTHUB)
diff --git a/R/ai-llm.R b/R/ai-llm.R
index 4f14bea5..9718ed53 100644
--- a/R/ai-llm.R
+++ b/R/ai-llm.R
@@ -1,3 +1,18 @@
+
+OLLAMA_MODELS = ai.get_ollama_models()
+DEFAULT_LLM = "gpt-5-nano"
+DEFAULT_LLM = NULL
+
+REMOTE_MODELS <- c(
+ "openai:gpt-5-nano",
+ "xai:grok-4-1-fast-non-reasoning",
+ "groq:llama-3.1-8b-instant",
+ "groq:meta-llama/llama-4-scout-17b-16e-instruct",
+ "groq:openai/gpt-oss-20b",
+ "groq:openai/gpt-oss-120b",
+ "google:gemini-2.5-flash-lite"
+)
+
#'
#' @export
ai.get_ollama_models <- function(models=NULL, size=NULL) {
@@ -32,26 +47,6 @@ ai.get_ollama_models <- function(models=NULL, size=NULL) {
return(available.models)
}
-OLLAMA_MODELS = ai.get_ollama_models()
-DEFAULT_LLM = "gpt-5-nano"
-DEFAULT_LLM = NULL
-
-REMOTE_MODELS <- c(
- "openai:gpt-5-nano",
- "xai:grok-4-fast-non-reasoning",
- "groq:llama-3.1-8b-instant",
- "groq:meta-llama/llama-4-scout-17b-16e-instruct",
- "groq:openai/gpt-oss-20b",
- "groq:openai/gpt-oss-120b",
- "google:gemini-2.5-flash-lite"
-)
-
-if(0) {
- model="gpt-5-nano";prompt=NULL
- model="gemma3:1b";prompt=NULL
- model="grok-4-fast-non-reasoning";prompt=NULL
-}
-
#' @export
ai.get_remote_models <- function(models=NULL) {
keys <- NULL
diff --git a/R/ai-report.R b/R/ai-report.R
new file mode 100644
index 00000000..3b96631e
--- /dev/null
+++ b/R/ai-report.R
@@ -0,0 +1,184 @@
+##---------------------------------------------------------------------
+##----------------------- CONTENT CREATION ----------------------------
+##---------------------------------------------------------------------
+
+
+table_to_content <- function(df) {
+ if(is.null(df)) return(NULL)
+ paste(as.character(knitr::kable(df,format="markdown")),collapse="\n")
+}
+
+list_to_content <- function(a, newline=FALSE) {
+ if(is.null(a)) return("")
+ aa <- sapply(a, function(s) paste(unlist(s), collapse='; '))
+ pp <- paste0("- **",names(a),"**: ")
+ if(newline) pp <- paste0(pp, '\n\n')
+ cc <- paste(paste0(pp,aa), collapse='\n')
+ paste(cc,'\n')
+}
+
+collate_as_sections <- function(a, level=2) {
+ if(is.null(a)) return("")
+ hdr <- paste(rep("#",level),collapse="")
+ for(i in 1:length(a)) {
+ a[[i]] <- paste(hdr,names(a)[i],"\n\n",a[[i]],'\n')
+ }
+ paste(a, collapse="\n\n")
+}
+
+#'
+#'
+#' @export
+ai.create_report <- function(pgx, ntop=20, sections=NULL, collate=FALSE) {
+
+ contrasts <- playbase::pgx.getContrasts(pgx)
+ samples <- rownames(pgx$samples)
+ ct <- contrasts[1] ## FOR NOW!!!!
+
+ all.sections <- c("description", "dataset_info","compute_settings",
+ "differential_expression", "geneset_enrichment",
+ "drug_similarity","hub_genes","wgcna_report")
+ if(is.null(sections)) {
+ sections <- all.sections
+ } else {
+ sections <- intersect(all.sections, sections)
+ }
+
+ description <- NULL
+ if("description" %in% sections) {
+ description <- list(
+ name = pgx$name,
+ title = pgx$description,
+ description = pgx$description
+ )
+ }
+
+ dataset_info <- NULL
+ if("dataset_info" %in% sections) {
+ dataset_info <- list(
+ name = pgx$name,
+ organism = pgx$organism,
+ datatype = pgx$datatype,
+ creation_date = pgx$date,
+ num_samples = ncol(pgx$X),
+ num_features = nrow(pgx$X),
+ num_genesets = nrow(pgx$gsetX),
+ num_comparisons = length(contrasts),
+ samples = samples,
+ comparisons = contrasts
+ # features = c(head(rownames(pgx$counts),40),ifelse(nrow(pgx$counts)>40,"...",""))
+ )
+ }
+
+ compute_settings <- NULL
+ if("compute_settings" %in% sections) {
+ compute_settings <- list(
+ gene_tests = colnames(pgx$gx.meta$meta[[1]]$p),
+ geneset_tests = colnames(pgx$gset.meta$meta[[1]]$p),
+ pgx_slots = sort(names(pgx))
+ )
+ }
+
+ ##-------------------------------------------------------------------
+ differential_expression <- NULL
+ if("differential_expression" %in% sections) {
+ F <- playbase::pgx.getMetaMatrix(pgx)$fc
+ F <- playbase::rename_by2(F, pgx$genes, "symbol")
+ ii <- match(rownames(F),pgx$genes$symbol)
+ rownames(F) <- paste0(pgx$genes$gene_title[ii]," (",rownames(F),")")
+ F.up <- head( F[order(-rowMeans(F)),,drop=FALSE], 2*ntop )
+ F.dn <- head( F[order(rowMeans(F)),,drop=FALSE], 2*ntop )
+
+ differential_expression <- list(
+ "Up-regulated genes (top hits). The top most most positively differentially expressed genes are:" = F.up,
+ "Down-regulated genes (top hits). The top most negatively differentially expressed genes are:" = F.dn
+ )
+ differential_expression <- lapply(differential_expression, table_to_content)
+ }
+
+ ##-------------------------------------------------------------------
+ geneset_enrichment <- NULL
+ if("geneset_enrichment" %in% sections) {
+ G <- playbase::pgx.getMetaMatrix(pgx, level = "geneset")$fc
+ G <- G[order(-rowMeans(G)),,drop=FALSE]
+ revtail <- function(A,n) head(A[nrow(A):1,,drop=FALSE],n)
+ BP.up <- head( G[grep("GOBP|GO_BP",rownames(G)),,drop=FALSE], ntop)
+ BP.dn <- revtail( G[grep("GOBP|GO_BP",rownames(G)),,drop=FALSE], ntop)
+ MF.up <- head( G[grep("GOMF|GO_MF",rownames(G)),,drop=FALSE], ntop)
+ MF.dn <- revtail( G[grep("GOMF|GO_MF",rownames(G)),,drop=FALSE], ntop)
+ CC.up <- head( G[grep("GOCC|GO_CC",rownames(G)),,drop=FALSE], ntop)
+ CC.dn <- revtail( G[grep("GOCC|GO_CC",rownames(G)),,drop=FALSE], ntop)
+ PW.up <- head( G[grep("PATHWAY",rownames(G)),,drop=FALSE], ntop)
+ PW.dn <- revtail( G[grep("PATHWAY",rownames(G)),,drop=FALSE], ntop)
+
+ geneset_enrichment <- list(
+ "Top most positively enriched GO biological process (BP) gene sets:" = BP.up,
+ "Top most negatively enriched GO biological process (BP) gene sets:" = BP.dn,
+ "Top most positively enriched GO molecular function (MF) gene sets:" = MF.up,
+ "Top most negatively enriched GO molecular function (MF) gene sets:" = MF.dn,
+ "Top most positively enriched GO cellular component (CC) gene sets:" = CC.up,
+ "Top most negatively enriched GO cellulare component (CC) gene sets:" = CC.dn,
+ "Top most positively enriched pathways:" = PW.up,
+ "Top most negatively enriched pathways:" = PW.dn
+ )
+ geneset_enrichment <- lapply(geneset_enrichment, table_to_content)
+ }
+
+ ##-------------------------------------------------------------------
+ drug_similarity <- NULL
+ if("drug_similarity" %in% sections && !is.null(pgx$drugs)) {
+ D <- playbase::pgx.getTopDrugs(pgx, ct, n=ntop, na.rm=TRUE)
+ drug_similarity <- list(
+ "Drug Mechanism of Action. Drug Connectivity Map (CMap) analysis of selected comparison. Similarity of the mechanism of action (MOA) is based on correlation enrichment with drug perturbation profiles of LINCS L1000 database. The top most similar (i.e. positively correlated) drugs are:" =
+ table_to_content(playbase::pgx.getTopDrugs(pgx, ct, n=ntop, dir=+1, na.rm=TRUE)),
+ "The top most inhibitory (i.e. negative correlated) drugs are:" =
+ table_to_content(playbase::pgx.getTopDrugs(pgx, ct, n=ntop, dir=-1, na.rm=TRUE))
+ )
+ }
+
+ pcsf_report <- NULL
+ if(FALSE && "pcsf_report" %in% sections) {
+ pcsf_report <- list("Identification of hub genes. Hub genes can identify important regulators. The hub score is computed using a page rank network centrality score. The most central genes are:" = table_to_content(playbase::pgx.getPCSFcentrality(pgx, ct, plot = FALSE, n = ntop))
+ )
+ }
+
+ ##-------------------------------------------------------------------
+ wgcna_report <- NULL
+ if("wgcna_report" %in% sections && !is.null(pgx$wgcna)) {
+ out <- playbase::wgcna.describeModules(
+ pgx$wgcna,
+ modules = NULL,
+ multi = FALSE,
+ ntop = 40,
+ annot = pgx$genes,
+ experiment = pgx$description,
+ verbose = FALSE,
+ model = NULL
+ )
+ names(out)
+ wgcna_report <- list_to_content(out$answers)
+ }
+
+ ##-----------------------------------------------------------
+ report <- list(
+ DATASET_INFO = list_to_content(dataset_info)
+ )
+
+ content <- list(
+ description = list_to_content(description),
+ dataset_info = list_to_content(dataset_info),
+ compute_settings = list_to_content(compute_settings),
+ differential_expression = list_to_content(differential_expression),
+ geneset_enrichment = list_to_content(geneset_enrichment),
+ drug_similarity = list_to_content(drug_similarity),
+ pcsf_report = list_to_content(pcsf_report),
+ wgcna_report = wgcna_report
+ )
+
+ if(collate) {
+ content <- collate_as_sections(content,level=2)
+ }
+
+ return(content)
+}
+
diff --git a/R/ai-tools.R b/R/ai-tools.R
new file mode 100644
index 00000000..5be35057
--- /dev/null
+++ b/R/ai-tools.R
@@ -0,0 +1,80 @@
+##======================================================================
+##====================== FUNCTIONS =====================================
+##======================================================================
+
+#' @export
+ai.create_ellmer_chat <- function(model, system_prompt) {
+ chat <- NULL
+ if( grepl("openai:", model) ) {
+ model1 <- sub("^openai:","",model)
+ chat <- ellmer::chat_openai(model = model1, system_prompt = system_prompt)
+ } else if( grepl("^groq:",model)) {
+ model1 <- sub("^groq:","",model)
+ chat <- ellmer::chat_groq(model = model1, system_prompt = system_prompt)
+ } else if( grepl("^ollama:",model)) {
+ model1 <- sub("^ollama:","",model)
+ chat <- ellmer::chat_ollama(model = model1, system_prompt = system_prompt)
+ } else {
+ message("could not connect to model",model)
+ }
+ chat
+}
+
+##======================================================================
+##==================== TOOLS ===========================================
+##======================================================================
+
+#' Plot Volcano
+#'
+#' @export
+ai.tool_plot_volcano <- ellmer::tool(
+ function(contrast, psig = 0.05) {
+ message("calling tool: plot_volcano(). contrast = ", contrast)
+ cmd <- "playbase::pgx.Volcano(pgx, contrast=1, psig=0.05)"
+ paste0("",cmd,"")
+ },
+ "Code using playbase library to create volcano plot for a given contrast",
+ contrast = ellmer::type_string(
+ "The comparison/contrast for the Volcano plot",
+ required = TRUE
+ ),
+ psig = ellmer::type_number(
+ description = "Significance level. Default psig=0.05",
+ required = FALSE
+ )
+)
+
+#' Get expression values for gene
+#'
+#' @export
+ai.tool_get_expression <- ellmer::tool(
+ function(gene) {
+ message("calling tool: get_expression(). gene = ", gene)
+ pgx <- playdata::GEIGER_PGX
+ list(
+ values = pgx$X[gene,],
+ plot_command = "base::barplot(pgx$X[gene,])"
+ )
+ },
+ "Get expression values for a gene",
+ gene = ellmer::type_string(
+ description = "The gene name to retrieve the expression values for",
+ required = TRUE
+ )
+)
+
+#' Gets the current time in the given time zone.
+#'
+#' @export
+ai.tool_get_current_time <- ellmer::tool(
+ function(tz = "UTC") {
+ message("calling tool: get_current_time()")
+ format(Sys.time(), tz = tz, usetz = TRUE)
+ },
+ "Gets the current time in the given time zone.",
+ tz = ellmer::type_string(
+ description = "The time zone to get the current time in. Defaults to `\"UTC\"`.",
+ required = FALSE
+ )
+)
+
diff --git a/man/ai.tool_get_current_time.Rd b/man/ai.tool_get_current_time.Rd
new file mode 100644
index 00000000..6a321a8e
--- /dev/null
+++ b/man/ai.tool_get_current_time.Rd
@@ -0,0 +1,11 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/ai-tools.R
+\name{ai.tool_get_current_time}
+\alias{ai.tool_get_current_time}
+\title{Gets the current time in the given time zone.}
+\usage{
+ai.tool_get_current_time(tz = "UTC")
+}
+\description{
+Gets the current time in the given time zone.
+}
diff --git a/man/ai.tool_get_expression.Rd b/man/ai.tool_get_expression.Rd
new file mode 100644
index 00000000..300c9ba4
--- /dev/null
+++ b/man/ai.tool_get_expression.Rd
@@ -0,0 +1,11 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/ai-tools.R
+\name{ai.tool_get_expression}
+\alias{ai.tool_get_expression}
+\title{Get expression values for gene}
+\usage{
+ai.tool_get_expression(gene)
+}
+\description{
+Get expression values for gene
+}
diff --git a/man/ai.tool_plot_volcano.Rd b/man/ai.tool_plot_volcano.Rd
new file mode 100644
index 00000000..44394be1
--- /dev/null
+++ b/man/ai.tool_plot_volcano.Rd
@@ -0,0 +1,11 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/ai-tools.R
+\name{ai.tool_plot_volcano}
+\alias{ai.tool_plot_volcano}
+\title{Plot Volcano}
+\usage{
+ai.tool_plot_volcano(contrast, psig = 0.05)
+}
+\description{
+Plot Volcano
+}