Skip to content

Commit

Permalink
Added nftests to gseagsea (#7266)
Browse files Browse the repository at this point in the history
* Added nftests to gseagsea

* Update input files

* fix nftests

* Address PR comments

* Add seed setting parameter for stabilizing results

* Fix linting

* Add default random seed

* Update datasets paths

---------

Co-authored-by: Simon Pearce <[email protected]>
  • Loading branch information
nschcolnicov and SPPearce authored Jan 13, 2025
1 parent 8dd6297 commit 258399c
Show file tree
Hide file tree
Showing 8 changed files with 853 additions and 749 deletions.
92 changes: 76 additions & 16 deletions modules/nf-core/gsea/gsea/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ process GSEA_GSEA {

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/gsea:4.3.2--hdfd78af_0':
'biocontainers/gsea:4.3.2--hdfd78af_0' }"
'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/0f/0f4fe28961396eeeaa98484cb4f2db5c79abfdf117700df132312fe5c41bff81/data':
'community.wave.seqera.io/library/gsea:4.3.2--a7421d7504fd7c81' }"

input:
tuple val(meta), path(gct), path(cls), path(gene_sets)
Expand All @@ -26,29 +26,27 @@ process GSEA_GSEA {
tuple val(meta), path("*heat_map_1.png") , emit: heatmap
tuple val(meta), path("*pvalues_vs_nes_plot.png") , emit: pvalues_vs_nes_plot
tuple val(meta), path("*ranked_list_corr_2.png") , emit: ranked_list_corr
tuple val(meta), path("*butterfly_plot.png") , emit: butterfly_plot, optional: true
tuple val(meta), path("*[!gene_set_size|gsea_report|ranked_gene_list]*.tsv"), emit: gene_set_tsv, optional: true
tuple val(meta), path("*[!gsea_report|heat_map_corr_plot|index|pos_snapshot|neg_snapshot]*.html"), emit: gene_set_html, optional: true
tuple val(meta), path("*[!butterfly|enplot|global_es_histogram|gset_rnd_es_dist|heat_map|pvalues_vs_nes_plot|ranked_list_corr]*.png"), emit: gene_set_heatmap, optional: true
tuple val(meta), path("*_snapshot*.html") , emit: snapshot, optional: true
tuple val(meta), path("*enplot*.png") , emit: gene_set_enplot, optional: true
tuple val(meta), path("*gset_rnd_es_dist*.png") , emit: gene_set_dist, optional: true
tuple val(meta), path("*.zip") , emit: archive, optional: true
tuple val(meta), path("*butterfly_plot.png") , emit: butterfly_plot , optional: true
tuple val(meta), path("gene_sets_*.tsv") , emit: gene_set_tsv , optional: true
tuple val(meta), path("gene_sets_*.html") , emit: gene_set_html , optional: true
tuple val(meta), path("gene_sets_*.png") , emit: gene_set_heatmap, optional: true
tuple val(meta), path("*_snapshot*.html") , emit: snapshot , optional: true
tuple val(meta), path("*enplot*.png") , emit: gene_set_enplot , optional: true
tuple val(meta), path("*gset_rnd_es_dist*.png") , emit: gene_set_dist , optional: true
tuple val(meta), path("*.zip") , emit: archive , optional: true
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def VERSION = '4.3.2' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions.
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"

// Remove any trailing dots from prefix when passed as report label, so
// GSEA doesn't produce double-dotted top-level outputs
def rpt_label = prefix.replaceAll('\\.$', '')

def rpt_label = prefix.replaceAll('\\.$', '') // Remove any trailing dots from prefix when passed as report label, so GSEA doesn't produce double-dotted top-level outputs
def chip_command = chip ? "-chip $chip -collapse true" : ''
def VERSION = '4.3.2' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions.
if (!(args ==~ /.*-rnd_seed.*/)) {args += " -rnd_seed 10"}

"""
# Run GSEA
Expand Down Expand Up @@ -78,9 +76,71 @@ process GSEA_GSEA {
sed -i.bak "s/\$f/${prefix}\${f}/g" *.rpt *.html && rm *.bak
done
# Rename files so that they can be properly referenced by the output channels
# Function to rename files based on the given pattern
rename_files() {
local pattern=\$1
local exclude_patterns=\$2
local extension=\$3
# Find files matching the pattern but not matching the exclusion patterns
find . -type f -name "\$pattern" | while read -r file; do
# Exclude files based on the provided exclusion patterns
if ! echo "\$file" | grep -qE "\$exclude_patterns"; then
# Rename the file by adding the prefix "gene_sets_"
mv "\$file" "\$(dirname "\$file")/gene_sets_\$(basename "\$file")"
fi
done
}
# Pattern and exclusion for .tsv files
tsv_pattern="*.tsv"
tsv_exclude="gene_set_size|gsea_report|ranked_gene_list"
# Pattern and exclusion for .html files
html_pattern="*.html"
html_exclude="gsea_report|heat_map_corr_plot|index|pos_snapshot|neg_snapshot"
# Pattern and exclusion for .png files
png_pattern="*.png"
png_exclude="butterfly|enplot|global_es_histogram|gset_rnd_es_dist|heat_map|pvalues_vs_nes_plot|ranked_list_corr"
# Rename .tsv files
rename_files "\$tsv_pattern" "\$tsv_exclude" ".tsv"
# Rename .html files
rename_files "\$html_pattern" "\$html_exclude" ".html"
# Rename .png files
rename_files "\$png_pattern" "\$png_exclude" ".png"
cat <<-END_VERSIONS > versions.yml
"${task.process}":
gsea: $VERSION
END_VERSIONS
"""

stub:
def prefix = task.ext.prefix ?: "${meta.id}"
def VERSION = '4.3.2' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions.
"""
touch ${prefix}.rpt
touch ${prefix}.index.html
touch ${prefix}.heat_map_corr_plot.html
touch ${prefix}.gsea_report_for_${reference}.tsv
touch ${prefix}.gsea_report_for_${reference}.html
touch ${prefix}.gsea_report_for_${target}.tsv
touch ${prefix}.gsea_report_for_${target}.html
touch ${prefix}.ranked_gene_list*.tsv
touch ${prefix}.gene_set_sizes.tsv
touch ${prefix}.global_es_histogram.png
touch ${prefix}.heat_map_1.png
touch ${prefix}.pvalues_vs_nes_plot.png
touch ${prefix}.ranked_list_corr_2.png
cat <<-END_VERSIONS > versions.yml
"${task.process}":
gsea: $VERSION
"""
}
13 changes: 7 additions & 6 deletions modules/nf-core/gsea/gsea/meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -182,31 +182,31 @@ output:
type: map
description: |
Groovy Map containing metadata, at a minimum an id e.g. [ id:'test' ]
- "*[!gene_set_size|gsea_report|ranked_gene_list]*.tsv":
- "gene_sets_*.tsv":
type: list
description: Where -make_sets is not set to false, TSV files, one file for each
gene set, with detail on enrichment for each gene
pattern: "[!gene_set_size|gsea_report|ranked_gene_list]*.tsv"
pattern: "gene_sets_*.tsv"
- gene_set_html:
- meta:
type: map
description: |
Groovy Map containing metadata, at a minimum an id e.g. [ id:'test' ]
- "*[!gsea_report|heat_map_corr_plot|index|pos_snapshot|neg_snapshot]*.html":
- "gene_sets_*.html":
type: list
description: Where -make_sets is not set to false, HTML files, one file for
each gene set, with detail on enrichment for each gene
pattern: "[!gsea_report|heat_map_corr_plot|index|pos_snapshot|neg_snapshot]*.html"
pattern: "gene_sets_*.html"
- gene_set_heatmap:
- meta:
type: map
description: |
Groovy Map containing metadata, at a minimum an id e.g. [ id:'test' ]
- "*[!butterfly|enplot|global_es_histogram|gset_rnd_es_dist|heat_map|pvalues_vs_nes_plot|ranked_list_corr]*.png":
- "gene_sets_*.png":
type: list
description: Where -make_sets is not set to false, PNG-format heatmaps, one
file for each gene set, showing expression for each gene
pattern: "[!butterfly|enplot|global_es_histogram|gset_rnd_es_dist|heat_map|pvalues_vs_nes_plot|ranked_list_corr]*.png"
pattern: "gene_sets_*.png"
- snapshot:
- meta:
type: map
Expand Down Expand Up @@ -255,3 +255,4 @@ authors:
- "@pinin4fjords"
maintainers:
- "@pinin4fjords"
- "@nschcolnicov"
78 changes: 78 additions & 0 deletions modules/nf-core/gsea/gsea/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
nextflow_process {

name "Test Process GSEA_GSEA"
script "../main.nf"
process "GSEA_GSEA"

tag "modules"
tag "modules_nfcore"
tag "gsea_gsea"
tag "gsea"
tag "gsea/gsea"

test("test") {

when {
process {
"""
input[0] = [['id':'Condition_genotype_WT_KO', 'variable':'Condition genotype', 'reference':'WT', 'target':'KO', 'blocking':'batch'], file("https://github.com/nf-core/test-datasets/raw/refs/heads/modules/data/genomics/mus_musculus/gene_set_analysis/Condition_treatment_Control_Treated.gct", checkIfExists:true), file("https://github.com/nf-core/test-datasets/raw/refs/heads/modules/data/genomics/mus_musculus/gene_set_analysis/Condition_genotype_WT_KO.cls", checkIfExists:true), file("https://github.com/nf-core/test-datasets/raw/refs/heads/modules/data/genomics/mus_musculus/gene_set_analysis/mh.all.v2022.1.Mm.symbols.gmt", checkIfExists:true)]
input[1] = ['WT', 'KO']
input[2] = file("https://github.com/nf-core/test-datasets/raw/refs/heads/modules/data/genomics/mus_musculus/gene_set_analysis/Mus_musculus.anno.feature_metadata.chip", checkIfExists:true)
"""
}
}

then {
assertAll (
{ assert process.success },
{ assert snapshot(
process.out.rpt.collect{ meta,rpt -> file(rpt).name }, //assert unstable file
process.out.index_html.collect{ meta,index_html -> file(index_html).name }, //assert unstable file
process.out.heat_map_corr_plot,
process.out.report_tsvs_ref,
process.out.report_htmls_ref.collect{ meta,report_htmls_ref -> file(report_htmls_ref).name }, //assert unstable file
process.out.report_tsvs_target,
process.out.report_htmls_target.collect{ meta,report_htmls_target -> file(report_htmls_target).name }, //assert unstable file
process.out.ranked_gene_list,
process.out.gene_set_sizes,
process.out.histogram.collect{ meta,histogram -> file(histogram).name }, //assert unstable file
process.out.heatmap.collect{ meta,heatmap -> file(heatmap).name }, //assert unstable file
process.out.pvalues_vs_nes_plot.collect{ meta,pvalues_vs_nes_plot -> file(pvalues_vs_nes_plot).name }, //assert unstable file
process.out.ranked_list_corr.collect{ meta,ranked_list_corr -> file(ranked_list_corr).name }, //assert unstable file
process.out.butterfly_plot.collect{ meta,butterfly_plot -> file(butterfly_plot).name }, //assert unstable file
process.out.gene_set_tsv,
process.out.gene_set_html[0][1].collect{ file(it).name }, //assert unstable file
process.out.gene_set_heatmap[0][1].collect{ file(it).name }, // unstable file and unstable name
process.out.snapshot[0][1].collect{ file(it).name }, //assert unstable file
process.out.gene_set_enplot[0][1].collect{ file(it).name }, // unstable file and unstable name
process.out.gene_set_dist[0][1].collect{ file(it).name }, //assert unstable file
process.out.archive,
process.out.versions
).match()
}
)
}
}

test("stub") {

options "-stub"

when {
process {
"""
input[0] = [['id':'Condition_genotype_WT_KO', 'variable':'Condition genotype', 'reference':'WT', 'target':'KO', 'blocking':'batch'], file("https://github.com/nf-core/test-datasets/raw/refs/heads/modules/data/genomics/mus_musculus/gene_set_analysis/Condition_treatment_Control_Treated.gct", checkIfExists:true), file("https://github.com/nf-core/test-datasets/raw/refs/heads/modules/data/genomics/mus_musculus/gene_set_analysis/Condition_genotype_WT_KO.cls", checkIfExists:true), file("https://github.com/nf-core/test-datasets/raw/refs/heads/modules/data/genomics/mus_musculus/gene_set_analysis/mh.all.v2022.1.Mm.symbols.gmt", checkIfExists:true)]
input[1] = ['WT', 'KO']
input[2] = file("https://github.com/nf-core/test-datasets/raw/refs/heads/modules/data/genomics/mus_musculus/gene_set_analysis/Mus_musculus.anno.feature_metadata.chip", checkIfExists:true)
"""
}
}

then {
assertAll (
{ assert process.success },
{ assert snapshot(process.out).match() },
)
}
}
}
Loading

0 comments on commit 258399c

Please sign in to comment.