From ffa97ca9c4743a7b0ad94a524911cdc06781ec4a Mon Sep 17 00:00:00 2001 From: wsjung Date: Sun, 3 Mar 2024 21:36:14 -0600 Subject: [PATCH] added process-skipping options --- nextflow.config | 6 ++ nextflow_schema.json | 51 +++++++++++----- workflows/omicsgenetraitassociation.nf | 84 ++++++++++++++++---------- 3 files changed, 95 insertions(+), 46 deletions(-) diff --git a/nextflow.config b/nextflow.config index 7a87452..04165cb 100644 --- a/nextflow.config +++ b/nextflow.config @@ -60,6 +60,12 @@ params { validationShowHiddenParams = false validate_params = true + // process-skipping options + skip_pascal = false + skip_mmap = false + skip_staar = true + skip_MEA = false + skip_CMA = false } // Load base.config by default for all pipelines diff --git a/nextflow_schema.json b/nextflow_schema.json index b865a9f..25c9ac1 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -206,14 +206,7 @@ "description": "Method used to save pipeline results to output directory.", "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", "fa_icon": "fas fa-copy", - "enum": [ - "symlink", - "rellink", - "link", - "copy", - "copyNoFollow", - "move" - ], + "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], "hidden": true }, "email_on_fail": { @@ -270,6 +263,38 @@ "description": "Validation of parameters in lenient more.", "hidden": true, "help_text": "Allows string values that are parseable as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode)." + }, + "email": { + "type": "string" + } + } + }, + "process": { + "title": "Process", + "type": "object", + "description": "Options to skip various steps within the workflow.", + "default": "", + "properties": { + "skip_MEA": { + "type": "boolean", + "description": "skips module enrichment analysis" + }, + "skip_pascal": { + "type": "boolean", + "description": "skips PASCAL gene-level GWAS aggregation" + }, + "skip_staar": { + "type": "boolean", + "description": "skips STAAR rare variant analysis", + "default": true + }, + "skip_mmap": { + "type": "boolean", + "description": "skips gene-trait association using MMAP" + }, + "skip_cma": { + "type": "boolean", + "description": "skips CMA" } } } @@ -295,11 +320,9 @@ }, { "$ref": "#/definitions/generic_options" + }, + { + "$ref": "#/definitions/process" } - ], - "properties": { - "email": { - "type": "string" - } - } + ] } diff --git a/workflows/omicsgenetraitassociation.nf b/workflows/omicsgenetraitassociation.nf index 1d28997..2fa275f 100644 --- a/workflows/omicsgenetraitassociation.nf +++ b/workflows/omicsgenetraitassociation.nf @@ -103,50 +103,70 @@ workflow OMICSGENETRAITASSOCIATION { // // MODULE: PASCAL // - PASCAL_SUBWORKFLOW ( - ch_input.pascal, - params.pascal_gene_annotation, - params.pascal_ref_panel - ) - ch_pascal_output = PASCAL_SUBWORKFLOW.out.pascal_output - ch_pascal_cma_format = PASCAL_SUBWORKFLOW.out.cma_format_output - ch_versions = ch_versions.mix(PASCAL_SUBWORKFLOW.out.versions) + if (!params.skip_pascal) { + PASCAL_SUBWORKFLOW ( + ch_input.pascal, + params.pascal_gene_annotation, + params.pascal_ref_panel + ) + ch_pascal_output = PASCAL_SUBWORKFLOW.out.pascal_output + ch_pascal_cma_format = PASCAL_SUBWORKFLOW.out.cma_format_output + ch_versions = ch_versions.mix(PASCAL_SUBWORKFLOW.out.versions) + } - // ch_pascal_output.view() // // SUBWORKFLOW: MMAP_SUBWORKFLOW // - MMAP_SUBWORKFLOW ( - params.mmap_gene_list, - params.trait, - ch_input.twas, - params.mmap_pedigree_file, - params.mmap_cov_matrix_file - ) - ch_mmap_parsed = MMAP_SUBWORKFLOW.out.parsed_mmap_output - ch_mmap_cma_format = MMAP_SUBWORKFLOW.out.cma_format_output - ch_versions = ch_versions.mix(MMAP_SUBWORKFLOW.out.versions) + if (!params.skip_mmap) { + MMAP_SUBWORKFLOW ( + params.mmap_gene_list, + params.trait, + ch_input.twas, + params.mmap_pedigree_file, + params.mmap_cov_matrix_file + ) + ch_mmap_parsed = MMAP_SUBWORKFLOW.out.parsed_mmap_output + ch_mmap_cma_format = MMAP_SUBWORKFLOW.out.cma_format_output + ch_versions = ch_versions.mix(MMAP_SUBWORKFLOW.out.versions) + } // // MODULE: run CMA // + // TODO: scalable way to combine cma formatted input data (when there are multiple additional sources) + // TODO: scalable way to combine cma formatted input data when skipping upstream processes + if (!params.skip_CMA) { + if (!params.skip_pascal && !params.skip_mmap) { + ch_cma_input_files = ch_pascal_cma_format + .mix(ch_mmap_cma_format) + .toList() + } - // ch_pascal_cma_format.view() - // ch_mmap_cma_format.view() + if (params.skip_pascal) { + ch_cma_input_files = ch_mmap_cma_format + .toList() + } + if (params.skip_mmap) { + ch_cma_input_files = ch_pascal_cma_format + .toList() + } + } - ch_cma_input_files = ch_pascal_cma_format - .mix(ch_mmap_cma_format) - .toList() + if (!params.skip_CMA) { + CMA_SUBWORKFLOW ( + ch_cma_input_files, + params.trait, + [] + ) + ch_pval = CMA_SUBWORKFLOW.out.pval + .collect() + ch_versions = ch_versions.mix(CMA_SUBWORKFLOW.out.versions) + } - CMA_SUBWORKFLOW ( - ch_cma_input_files, - params.trait, - [] - ) - ch_pval = CMA_SUBWORKFLOW.out.pval - .collect() - ch_versions = ch_versions.mix(CMA_SUBWORKFLOW.out.versions) + if (params.skip_CMA && !params.skip_pascal) { + ch_pval = ch_pascal_cma_format + } // // MODULE: PREPROCESSFORPASCAL