From ffa97ca9c4743a7b0ad94a524911cdc06781ec4a Mon Sep 17 00:00:00 2001
From: wsjung <wooseokjung0826@gmail.com>
Date: Sun, 3 Mar 2024 21:36:14 -0600
Subject: [PATCH] added process-skipping options

---
 nextflow.config                        |  6 ++
 nextflow_schema.json                   | 51 +++++++++++-----
 workflows/omicsgenetraitassociation.nf | 84 ++++++++++++++++----------
 3 files changed, 95 insertions(+), 46 deletions(-)

diff --git a/nextflow.config b/nextflow.config
index 7a87452..04165cb 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -60,6 +60,12 @@ params {
     validationShowHiddenParams       = false
     validate_params                  = true
 
+    // process-skipping options
+    skip_pascal                     = false
+    skip_mmap                       = false
+    skip_staar                      = true
+    skip_MEA                        = false
+    skip_CMA                        = false
 }
 
 // Load base.config by default for all pipelines
diff --git a/nextflow_schema.json b/nextflow_schema.json
index b865a9f..25c9ac1 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -206,14 +206,7 @@
                     "description": "Method used to save pipeline results to output directory.",
                     "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.",
                     "fa_icon": "fas fa-copy",
-                    "enum": [
-                        "symlink",
-                        "rellink",
-                        "link",
-                        "copy",
-                        "copyNoFollow",
-                        "move"
-                    ],
+                    "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"],
                     "hidden": true
                 },
                 "email_on_fail": {
@@ -270,6 +263,38 @@
                     "description": "Validation of parameters in lenient more.",
                     "hidden": true,
                     "help_text": "Allows string values that are parseable as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode)."
+                },
+                "email": {
+                    "type": "string"
+                }
+            }
+        },
+        "process": {
+            "title": "Process",
+            "type": "object",
+            "description": "Options to skip various steps within the workflow.",
+            "default": "",
+            "properties": {
+                "skip_MEA": {
+                    "type": "boolean",
+                    "description": "skips module enrichment analysis"
+                },
+                "skip_pascal": {
+                    "type": "boolean",
+                    "description": "skips PASCAL gene-level GWAS aggregation"
+                },
+                "skip_staar": {
+                    "type": "boolean",
+                    "description": "skips STAAR rare variant analysis",
+                    "default": true
+                },
+                "skip_mmap": {
+                    "type": "boolean",
+                    "description": "skips gene-trait association using MMAP"
+                },
+                "skip_cma": {
+                    "type": "boolean",
+                    "description": "skips CMA"
                 }
             }
         }
@@ -295,11 +320,9 @@
         },
         {
             "$ref": "#/definitions/generic_options"
+        },
+        {
+            "$ref": "#/definitions/process"
         }
-    ],
-    "properties": {
-        "email": {
-            "type": "string"
-        }
-    }
+    ]
 }
diff --git a/workflows/omicsgenetraitassociation.nf b/workflows/omicsgenetraitassociation.nf
index 1d28997..2fa275f 100644
--- a/workflows/omicsgenetraitassociation.nf
+++ b/workflows/omicsgenetraitassociation.nf
@@ -103,50 +103,70 @@ workflow OMICSGENETRAITASSOCIATION {
     //
     // MODULE: PASCAL
     //
-    PASCAL_SUBWORKFLOW (
-        ch_input.pascal,
-        params.pascal_gene_annotation,
-        params.pascal_ref_panel
-    )
-    ch_pascal_output = PASCAL_SUBWORKFLOW.out.pascal_output
-    ch_pascal_cma_format = PASCAL_SUBWORKFLOW.out.cma_format_output
-    ch_versions = ch_versions.mix(PASCAL_SUBWORKFLOW.out.versions)
+    if (!params.skip_pascal) {
+        PASCAL_SUBWORKFLOW (
+            ch_input.pascal,
+            params.pascal_gene_annotation,
+            params.pascal_ref_panel
+        )
+        ch_pascal_output = PASCAL_SUBWORKFLOW.out.pascal_output
+        ch_pascal_cma_format = PASCAL_SUBWORKFLOW.out.cma_format_output
+        ch_versions = ch_versions.mix(PASCAL_SUBWORKFLOW.out.versions)
+    }
 
-    // ch_pascal_output.view()
 
     //
     // SUBWORKFLOW: MMAP_SUBWORKFLOW
     //
-    MMAP_SUBWORKFLOW (
-        params.mmap_gene_list,
-        params.trait,
-        ch_input.twas,
-        params.mmap_pedigree_file,
-        params.mmap_cov_matrix_file
-    )
-    ch_mmap_parsed = MMAP_SUBWORKFLOW.out.parsed_mmap_output
-    ch_mmap_cma_format = MMAP_SUBWORKFLOW.out.cma_format_output
-    ch_versions = ch_versions.mix(MMAP_SUBWORKFLOW.out.versions)
+    if (!params.skip_mmap) {
+        MMAP_SUBWORKFLOW (
+            params.mmap_gene_list,
+            params.trait,
+            ch_input.twas,
+            params.mmap_pedigree_file,
+            params.mmap_cov_matrix_file
+        )
+        ch_mmap_parsed = MMAP_SUBWORKFLOW.out.parsed_mmap_output
+        ch_mmap_cma_format = MMAP_SUBWORKFLOW.out.cma_format_output
+        ch_versions = ch_versions.mix(MMAP_SUBWORKFLOW.out.versions)
+    }
 
     //
     // MODULE: run CMA
     //
+    // TODO: scalable way to combine cma formatted input data (when there are multiple additional sources)
+    // TODO: scalable way to combine cma formatted input data when skipping upstream processes
+    if (!params.skip_CMA) {
+        if (!params.skip_pascal && !params.skip_mmap) {
+            ch_cma_input_files = ch_pascal_cma_format
+                .mix(ch_mmap_cma_format)
+                .toList()
+        }
 
-    // ch_pascal_cma_format.view()
-    // ch_mmap_cma_format.view()
+        if (params.skip_pascal) {
+            ch_cma_input_files = ch_mmap_cma_format
+                .toList()
+        }
+        if (params.skip_mmap) {
+            ch_cma_input_files = ch_pascal_cma_format
+                .toList()
+        }
+    }
 
-    ch_cma_input_files = ch_pascal_cma_format
-        .mix(ch_mmap_cma_format)
-        .toList()
+    if (!params.skip_CMA) {
+        CMA_SUBWORKFLOW (
+            ch_cma_input_files,
+            params.trait,
+            []
+        )
+        ch_pval = CMA_SUBWORKFLOW.out.pval
+            .collect()
+        ch_versions = ch_versions.mix(CMA_SUBWORKFLOW.out.versions)
+    }
 
-    CMA_SUBWORKFLOW (
-        ch_cma_input_files,
-        params.trait,
-        []
-    )
-    ch_pval = CMA_SUBWORKFLOW.out.pval
-        .collect()
-    ch_versions = ch_versions.mix(CMA_SUBWORKFLOW.out.versions)
+    if (params.skip_CMA && !params.skip_pascal) {
+        ch_pval = ch_pascal_cma_format
+    }
 
     //
     // MODULE: PREPROCESSFORPASCAL