diff --git a/.ci_stuff/test_dag.sh b/.ci_stuff/test_dag.sh index 8bb5ae1a2..255ffddbe 100755 --- a/.ci_stuff/test_dag.sh +++ b/.ci_stuff/test_dag.sh @@ -1,12 +1,7 @@ #!/bin/bash set -ex -if [[ ${CI:-"false"} == "true" ]]; then - export PATH="$HOME/miniconda/bin:$PATH" - hash -r - python -m pip install --no-deps --ignore-installed . -fi -# Needed by DNA, HiC, mRNA-seq, WGBS and scRNA-seq workflows +# Needed by DNA, HiC, mRNAseq, WGBS and scRNAseq workflows mkdir -p PE_input touch PE_input/sample1_R1.fastq.gz PE_input/sample1_R2.fastq.gz \ PE_input/sample2_R1.fastq.gz PE_input/sample2_R2.fastq.gz \ @@ -165,282 +160,282 @@ touch /tmp/genes.t2g snakePipes config --tempDir /tmp # createIndices -WC=`createIndices -o output --snakemakeOptions " --dryrun --conda-prefix /tmp" --genome ftp://ftp.ensembl.org/pub/release-93/fasta/mus_musculus/dna/Mus_musculus.GRCm38.dna_sm.primary_assembly.fa.gz --gtf ftp://ftp.ensembl.org/pub/release-93/gtf/mus_musculus/Mus_musculus.GRCm38.93.gtf.gz blah | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 229 ]; then exit 1 ; fi -WC=`createIndices -o output --snakemakeOptions " --dryrun --conda-prefix /tmp" --genome ftp://ftp.ensembl.org/pub/release-93/fasta/mus_musculus/dna/Mus_musculus.GRCm38.dna_sm.primary_assembly.fa.gz --gtf ftp://ftp.ensembl.org/pub/release-93/gtf/mus_musculus/Mus_musculus.GRCm38.93.gtf.gz --rmskURL http://hgdownload.soe.ucsc.edu/goldenPath/dm6/database/rmsk.txt.gz blah | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 237 ]; then exit 1 ; fi -WC=`createIndices -o output --snakemakeOptions " --dryrun --conda-prefix /tmp" --DAG --genome ftp://ftp.ensembl.org/pub/release-93/fasta/mus_musculus/dna/Mus_musculus.GRCm38.dna_sm.primary_assembly.fa.gz --gtf ftp://ftp.ensembl.org/pub/release-93/gtf/mus_musculus/Mus_musculus.GRCm38.93.gtf.gz --rmskURL http://hgdownload.soe.ucsc.edu/goldenPath/dm6/database/rmsk.txt.gz blah | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 237 ]; then exit 1 ; fi +WC=`createIndices -o output --snakemakeOptions " --dryrun --conda-prefix /tmp" --genome ftp://ftp.ensembl.org/pub/release-93/fasta/mus_musculus/dna/Mus_musculus.GRCm38.dna_sm.primary_assembly.fa.gz --gtf ftp://ftp.ensembl.org/pub/release-93/gtf/mus_musculus/Mus_musculus.GRCm38.93.gtf.gz blah | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 210 ]; then exit 1 ; fi +WC=`createIndices -o output --snakemakeOptions " --dryrun --conda-prefix /tmp" --genome ftp://ftp.ensembl.org/pub/release-93/fasta/mus_musculus/dna/Mus_musculus.GRCm38.dna_sm.primary_assembly.fa.gz --gtf ftp://ftp.ensembl.org/pub/release-93/gtf/mus_musculus/Mus_musculus.GRCm38.93.gtf.gz --rmskURL http://hgdownload.soe.ucsc.edu/goldenPath/dm6/database/rmsk.txt.gz blah | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 218 ]; then exit 1 ; fi +WC=`createIndices -o output --snakemakeOptions " --dryrun --conda-prefix /tmp" --DAG --genome ftp://ftp.ensembl.org/pub/release-93/fasta/mus_musculus/dna/Mus_musculus.GRCm38.dna_sm.primary_assembly.fa.gz --gtf ftp://ftp.ensembl.org/pub/release-93/gtf/mus_musculus/Mus_musculus.GRCm38.93.gtf.gz --rmskURL http://hgdownload.soe.ucsc.edu/goldenPath/dm6/database/rmsk.txt.gz blah | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 218 ]; then exit 1 ; fi # spikein -WC=`createIndices -o output --snakemakeOptions " --dryrun --conda-prefix /tmp" --DAG --genomeURL ftp://ftp.ensembl.org/pub/release-93/fasta/mus_musculus/dna/Mus_musculus.GRCm38.dna_sm.primary_assembly.fa.gz --gtfURL ftp://ftp.ensembl.org/pub/release-93/gtf/mus_musculus/Mus_musculus.GRCm38.93.gtf.gz --spikeinGenomeURL ftp://ftp.ensembl.org/pub/release-79/fasta/drosophila_melanogaster/dna/Drosophila_melanogaster.BDGP6.dna_sm.toplevel.fa.gz --spikeinGtfURL ftp://ftp.ensembl.org/pub/release-96/gtf/drosophila_melanogaster/Drosophila_melanogaster.BDGP6.22.96.gtf.gz --rmskURL http://hgdownload.soe.ucsc.edu/goldenPath/dm6/database/rmsk.txt.gz blah | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 284 ]; then exit 1 ; fi +WC=`createIndices -o output --snakemakeOptions " --dryrun --conda-prefix /tmp" --DAG --genomeURL ftp://ftp.ensembl.org/pub/release-93/fasta/mus_musculus/dna/Mus_musculus.GRCm38.dna_sm.primary_assembly.fa.gz --gtfURL ftp://ftp.ensembl.org/pub/release-93/gtf/mus_musculus/Mus_musculus.GRCm38.93.gtf.gz --spikeinGenomeURL ftp://ftp.ensembl.org/pub/release-79/fasta/drosophila_melanogaster/dna/Drosophila_melanogaster.BDGP6.dna_sm.toplevel.fa.gz --spikeinGtfURL ftp://ftp.ensembl.org/pub/release-96/gtf/drosophila_melanogaster/Drosophila_melanogaster.BDGP6.22.96.gtf.gz --rmskURL http://hgdownload.soe.ucsc.edu/goldenPath/dm6/database/rmsk.txt.gz blah | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 261 ]; then exit 1 ; fi # DNA mapping -WC=`DNA-mapping -i PE_input -o output .ci_stuff/organism.yaml --snakemakeOptions " --dryrun --conda-prefix /tmp " | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1420 ]; then exit 1 ; fi -WC=`DNA-mapping -i PE_input -o output .ci_stuff/organism.yaml --snakemakeOptions " --dryrun --conda-prefix /tmp" --trim --mapq 20 --dedup --properPairs | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1521 ]; then exit 1 ; fi -WC=`DNA-mapping -i PE_input -o output .ci_stuff/organism.yaml --snakemakeOptions " --dryrun --conda-prefix /tmp" --trim --mapq 20 --dedup --properPairs --bcExtract | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1456 ]; then exit 1 ; fi -WC=`DNA-mapping -i PE_input -o output .ci_stuff/organism.yaml --snakemakeOptions " --dryrun --conda-prefix /tmp" --trim --mapq 20 --UMIDedup --properPairs --bcExtract | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1557 ]; then exit 1 ; fi -WC=`DNA-mapping -i PE_input -o output .ci_stuff/organism.yaml --snakemakeOptions " --dryrun --conda-prefix /tmp" --trim --mapq 20 --UMIDedup --properPairs | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1622 ]; then exit 1 ; fi -WC=`DNA-mapping -i PE_input -o output .ci_stuff/organism.yaml --snakemakeOptions " --dryrun --conda-prefix /tmp" --DAG --trim --mapq 20 --UMIDedup --properPairs | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1622 ]; then exit 1 ; fi -WC=`DNA-mapping -i SE_input -o output .ci_stuff/organism.yaml --snakemakeOptions " --dryrun --conda-prefix /tmp" | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1272 ]; then exit 1 ; fi -WC=`DNA-mapping -i SE_input -o output .ci_stuff/organism.yaml --snakemakeOptions " --dryrun --conda-prefix /tmp" --trim --mapq 20 --dedup --properPairs | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1373 ]; then exit 1 ; fi -WC=`DNA-mapping -i PE_input -o output .ci_stuff/organism.yaml --snakemakeOptions " --dryrun --conda-prefix /tmp" --DAG --trim --aligner bwa | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1402 ]; then exit 1 ; fi -WC=`DNA-mapping -i PE_input -o output .ci_stuff/organism.yaml --snakemakeOptions " --dryrun --conda-prefix /tmp" --DAG --trim --aligner bwa-mem2 | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1402 ]; then exit 1 ; fi +WC=`DNAmapping -i PE_input -o output .ci_stuff/organism.yaml --snakemakeOptions " --dryrun --conda-prefix /tmp " | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1305 ]; then exit 1 ; fi +WC=`DNAmapping -i PE_input -o output .ci_stuff/organism.yaml --snakemakeOptions " --dryrun --conda-prefix /tmp" --trim --mapq 20 --dedup --properPairs | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1397 ]; then exit 1 ; fi +WC=`DNAmapping -i PE_input -o output .ci_stuff/organism.yaml --snakemakeOptions " --dryrun --conda-prefix /tmp" --trim --mapq 20 --dedup --properPairs --bcExtract | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1323 ]; then exit 1 ; fi +WC=`DNAmapping -i PE_input -o output .ci_stuff/organism.yaml --snakemakeOptions " --dryrun --conda-prefix /tmp" --trim --mapq 20 --UMIDedup --properPairs --bcExtract | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1397 ]; then exit 1 ; fi +WC=`DNAmapping -i PE_input -o output .ci_stuff/organism.yaml --snakemakeOptions " --dryrun --conda-prefix /tmp" --trim --mapq 20 --UMIDedup --properPairs | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1471 ]; then exit 1 ; fi +WC=`DNAmapping -i PE_input -o output .ci_stuff/organism.yaml --snakemakeOptions " --dryrun --conda-prefix /tmp" --DAG --trim --mapq 20 --UMIDedup --properPairs | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1471 ]; then exit 1 ; fi +WC=`DNAmapping -i SE_input -o output .ci_stuff/organism.yaml --snakemakeOptions " --dryrun --conda-prefix /tmp" | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1157 ]; then exit 1 ; fi +WC=`DNAmapping -i SE_input -o output .ci_stuff/organism.yaml --snakemakeOptions " --dryrun --conda-prefix /tmp" --trim --mapq 20 --dedup --properPairs | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1249 ]; then exit 1 ; fi +WC=`DNAmapping -i PE_input -o output .ci_stuff/organism.yaml --snakemakeOptions " --dryrun --conda-prefix /tmp" --DAG --trim --aligner bwa | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1296 ]; then exit 1 ; fi +WC=`DNAmapping -i PE_input -o output .ci_stuff/organism.yaml --snakemakeOptions " --dryrun --conda-prefix /tmp" --DAG --trim --aligner bwa-mem2 | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1296 ]; then exit 1 ; fi #allelic -WC=`DNA-mapping -m allelic-mapping -i PE_input -o output --snakemakeOptions " --dryrun --conda-prefix /tmp" --VCFfile allelic_input/file.vcf.gz --strains strain1,strain2 .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 2466 ]; then exit 1 ; fi -WC=`DNA-mapping -m allelic-mapping -i PE_input -o output --snakemakeOptions " --dryrun --conda-prefix /tmp" --SNPfile allelic_input/snpfile.txt --NMaskedIndex allelic_input/Ngenome .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 2445 ]; then exit 1 ; fi -WC=`DNA-mapping -m allelic-mapping -i PE_input -o output --snakemakeOptions " --dryrun --conda-prefix /tmp" --VCFfile allelic_input/file.vcf.gz --strains strain1 .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 2466 ]; then exit 1 ; fi +WC=`DNAmapping -m allelic-mapping -i PE_input -o output --snakemakeOptions " --dryrun --conda-prefix /tmp" --VCFfile allelic_input/file.vcf.gz --strains strain1,strain2 .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 2237 ]; then exit 1 ; fi +WC=`DNAmapping -m allelic-mapping -i PE_input -o output --snakemakeOptions " --dryrun --conda-prefix /tmp" --SNPfile allelic_input/snpfile.txt --NMaskedIndex allelic_input/Ngenome .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 2218 ]; then exit 1 ; fi +WC=`DNAmapping -m allelic-mapping -i PE_input -o output --snakemakeOptions " --dryrun --conda-prefix /tmp" --VCFfile allelic_input/file.vcf.gz --strains strain1 .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 2237 ]; then exit 1 ; fi -# ChIP-seq -WC=`ChIP-seq -d BAM_input --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml .ci_stuff/ChIP.sample_config.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 407 ]; then exit 1 ; fi -WC=`ChIP-seq -d BAM_input --snakemakeOptions " --dryrun --conda-prefix /tmp" --peakCaller Genrich .ci_stuff/organism.yaml .ci_stuff/ChIP.sample_config.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 399 ]; then exit 1 ; fi -WC=`ChIP-seq -d BAM_input --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml .ci_stuff/ChIP.sample_config.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 630 ]; then exit 1 ; fi -WC=`ChIP-seq -d BAM_input --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml .ci_stuff/ChIP.sample_broad_config.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 840 ]; then exit 1 ; fi -WC=`ChIP-seq -d BAM_input --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --peakCaller Genrich .ci_stuff/organism.yaml .ci_stuff/ChIP.sample_config.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 609 ]; then exit 1 ; fi -WC=`ChIP-seq -d BAM_input --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --singleEnd .ci_stuff/organism.yaml .ci_stuff/ChIP.sample_config.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 628 ]; then exit 1 ; fi -WC=`ChIP-seq -d BAM_input --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --bigWigType log2ratio .ci_stuff/organism.yaml .ci_stuff/ChIP.sample_config.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 562 ]; then exit 1 ; fi -WC=`ChIP-seq -d BAM_input --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --peakCaller SEACR .ci_stuff/organism.yaml .ci_stuff/ChIP.sample_config.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 927 ]; then exit 1 ; fi -WC=`ChIP-seq -d BAM_input --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --peakCaller SEACR --useSpikeInForNorm .ci_stuff/spikein_organism.yaml .ci_stuff/ChIP.sample_config.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1371 ]; then exit 1 ; fi +# ChIPseq +WC=`ChIPseq -d BAM_input --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml .ci_stuff/ChIP.sample_config.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 368 ]; then exit 1 ; fi +WC=`ChIPseq -d BAM_input --snakemakeOptions " --dryrun --conda-prefix /tmp" --peakCaller Genrich .ci_stuff/organism.yaml .ci_stuff/ChIP.sample_config.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 347 ]; then exit 1 ; fi +WC=`ChIPseq -d BAM_input --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml .ci_stuff/ChIP.sample_config.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 568 ]; then exit 1 ; fi +WC=`ChIPseq -d BAM_input --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml .ci_stuff/ChIP.sample_broad_config.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 736 ]; then exit 1 ; fi +WC=`ChIPseq -d BAM_input --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --peakCaller Genrich .ci_stuff/organism.yaml .ci_stuff/ChIP.sample_config.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 537 ]; then exit 1 ; fi +WC=`ChIPseq -d BAM_input --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --singleEnd .ci_stuff/organism.yaml .ci_stuff/ChIP.sample_config.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 566 ]; then exit 1 ; fi +WC=`ChIPseq -d BAM_input --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --bigWigType log2ratio .ci_stuff/organism.yaml .ci_stuff/ChIP.sample_config.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 506 ]; then exit 1 ; fi +WC=`ChIPseq -d BAM_input --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --peakCaller SEACR .ci_stuff/organism.yaml .ci_stuff/ChIP.sample_config.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 823 ]; then exit 1 ; fi +WC=`ChIPseq -d BAM_input --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --peakCaller SEACR --useSpikeInForNorm .ci_stuff/spikein_organism.yaml .ci_stuff/ChIP.sample_config.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1235 ]; then exit 1 ; fi #noInput -WC=`ChIP-seq -d BAM_input --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml .ci_stuff/ChIP.sample_noControl_config.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 403 ]; then exit 1 ; fi -WC=`ChIP-seq -d BAM_input --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --peakCaller Genrich .ci_stuff/organism.yaml .ci_stuff/ChIP.sample_noControl_config.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 349 ]; then exit 1 ; fi -WC=`ChIP-seq -d BAM_input --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --peakCaller SEACR .ci_stuff/organism.yaml .ci_stuff/ChIP.sample_noControl_config.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 637 ]; then exit 1 ; fi -WC=`ChIP-seq -d BAM_input --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --peakCaller SEACR --useSpikeInForNorm .ci_stuff/spikein_organism.yaml .ci_stuff/ChIP.sample_noControl_config.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 864 ]; then exit 1 ; fi +WC=`ChIPseq -d BAM_input --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml .ci_stuff/ChIP.sample_noControl_config.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 365 ]; then exit 1 ; fi +WC=`ChIPseq -d BAM_input --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --peakCaller Genrich .ci_stuff/organism.yaml .ci_stuff/ChIP.sample_noControl_config.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 307 ]; then exit 1 ; fi +WC=`ChIPseq -d BAM_input --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --peakCaller SEACR .ci_stuff/organism.yaml .ci_stuff/ChIP.sample_noControl_config.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 569 ]; then exit 1 ; fi +WC=`ChIPseq -d BAM_input --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --peakCaller SEACR --useSpikeInForNorm .ci_stuff/spikein_organism.yaml .ci_stuff/ChIP.sample_noControl_config.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 781 ]; then exit 1 ; fi # fromBAM -WC=`ChIP-seq -d outdir --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --fromBAM BAM_input/filtered_bam/ .ci_stuff/organism.yaml .ci_stuff/ChIP.sample_config.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1187 ]; then exit 1 ; fi +WC=`ChIPseq -d outdir --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --fromBAM BAM_input/filtered_bam/ .ci_stuff/organism.yaml .ci_stuff/ChIP.sample_config.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1092 ]; then exit 1 ; fi # fromBam and noInput -WC=`ChIP-seq -d outdir --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --fromBAM BAM_input/filtered_bam/ .ci_stuff/organism.yaml .ci_stuff/ChIP.sample_noControl_config.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 801 ]; then exit 1 ; fi +WC=`ChIPseq -d outdir --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --fromBAM BAM_input/filtered_bam/ .ci_stuff/organism.yaml .ci_stuff/ChIP.sample_noControl_config.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 739 ]; then exit 1 ; fi # spikein -WC=`ChIP-seq -d BAM_input --useSpikeInForNorm --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/spikein_organism.yaml .ci_stuff/ChIP.sample_config.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1142 ]; then exit 1 ; fi +WC=`ChIPseq -d BAM_input --useSpikeInForNorm --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/spikein_organism.yaml .ci_stuff/ChIP.sample_config.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1033 ]; then exit 1 ; fi # spikein and noInput -WC=`ChIP-seq -d BAM_input --useSpikeInForNorm --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/spikein_organism.yaml .ci_stuff/ChIP.sample_noControl_config.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 698 ]; then exit 1 ; fi +WC=`ChIPseq -d BAM_input --useSpikeInForNorm --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/spikein_organism.yaml .ci_stuff/ChIP.sample_noControl_config.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 633 ]; then exit 1 ; fi # fromBAM and spikein -WC=`ChIP-seq -d outdir --useSpikeInForNorm --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --fromBAM BAM_input/filtered_bam/ .ci_stuff/spikein_organism.yaml .ci_stuff/ChIP.sample_config.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1447 ]; then exit 1 ; fi -WC=`ChIP-seq -d outdir --useSpikeInForNorm --getSizeFactorsFrom TSS --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --fromBAM BAM_input/filtered_bam/ .ci_stuff/spikein_organism.yaml .ci_stuff/ChIP.sample_config.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1178 ]; then exit 1 ; fi -WC=`ChIP-seq -d outdir --useSpikeInForNorm --getSizeFactorsFrom input --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --fromBAM BAM_input/filtered_bam/ .ci_stuff/spikein_organism.yaml .ci_stuff/ChIP.sample_config.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1149 ]; then exit 1 ; fi +WC=`ChIPseq -d outdir --useSpikeInForNorm --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --fromBAM BAM_input/filtered_bam/ .ci_stuff/spikein_organism.yaml .ci_stuff/ChIP.sample_config.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1329 ]; then exit 1 ; fi +WC=`ChIPseq -d outdir --useSpikeInForNorm --getSizeFactorsFrom TSS --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --fromBAM BAM_input/filtered_bam/ .ci_stuff/spikein_organism.yaml .ci_stuff/ChIP.sample_config.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1086 ]; then exit 1 ; fi +WC=`ChIPseq -d outdir --useSpikeInForNorm --getSizeFactorsFrom input --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --fromBAM BAM_input/filtered_bam/ .ci_stuff/spikein_organism.yaml .ci_stuff/ChIP.sample_config.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1058 ]; then exit 1 ; fi # allelic -WC=`ChIP-seq -d allelic_BAM_input --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml .ci_stuff/ChIP.sample_short_config.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 423 ]; then exit 1 ; fi +WC=`ChIPseq -d allelic_BAM_input --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml .ci_stuff/ChIP.sample_short_config.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 385 ]; then exit 1 ; fi #multiComp -WC=`ChIP-seq -d BAM_input --sampleSheet .ci_stuff/test_sampleSheet_multiComp.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml .ci_stuff/ChIP.sample_config.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 842 ]; then exit 1 ; fi -WC=`ChIP-seq -d BAM_input --sampleSheet .ci_stuff/test_sampleSheet_multiComp.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --peakCaller Genrich .ci_stuff/organism.yaml .ci_stuff/ChIP.sample_config.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 861 ]; then exit 1 ; fi -WC=`ChIP-seq -d BAM_input --sampleSheet .ci_stuff/test_sampleSheet_multiComp.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml .ci_stuff/ChIP.sample_broad_config.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1052 ]; then exit 1 ; fi +WC=`ChIPseq -d BAM_input --sampleSheet .ci_stuff/test_sampleSheet_multiComp.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml .ci_stuff/ChIP.sample_config.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 757 ]; then exit 1 ; fi +WC=`ChIPseq -d BAM_input --sampleSheet .ci_stuff/test_sampleSheet_multiComp.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --peakCaller Genrich .ci_stuff/organism.yaml .ci_stuff/ChIP.sample_config.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 760 ]; then exit 1 ; fi +WC=`ChIPseq -d BAM_input --sampleSheet .ci_stuff/test_sampleSheet_multiComp.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml .ci_stuff/ChIP.sample_broad_config.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 925 ]; then exit 1 ; fi #multiComp and fromBam -WC=`ChIP-seq -d outdir --sampleSheet .ci_stuff/test_sampleSheet_multiComp.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --fromBAM BAM_input/filtered_bam/ .ci_stuff/organism.yaml .ci_stuff/ChIP.sample_config.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1399 ]; then exit 1 ; fi -WC=`ChIP-seq -d outdir --sampleSheet .ci_stuff/test_sampleSheet_multiComp.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --peakCaller Genrich --fromBAM BAM_input/filtered_bam/ .ci_stuff/organism.yaml .ci_stuff/ChIP.sample_config.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1418 ]; then exit 1 ; fi +WC=`ChIPseq -d outdir --sampleSheet .ci_stuff/test_sampleSheet_multiComp.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --fromBAM BAM_input/filtered_bam/ .ci_stuff/organism.yaml .ci_stuff/ChIP.sample_config.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1281 ]; then exit 1 ; fi +WC=`ChIPseq -d outdir --sampleSheet .ci_stuff/test_sampleSheet_multiComp.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --peakCaller Genrich --fromBAM BAM_input/filtered_bam/ .ci_stuff/organism.yaml .ci_stuff/ChIP.sample_config.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1284 ]; then exit 1 ; fi #multiComp and spikein -WC=`ChIP-seq -d BAM_input --useSpikeInForNorm --sampleSheet .ci_stuff/test_sampleSheet_multiComp.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/spikein_organism.yaml .ci_stuff/ChIP.sample_config.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1335 ]; then exit 1 ; fi -WC=`ChIP-seq -d BAM_input --useSpikeInForNorm --sampleSheet .ci_stuff/test_sampleSheet_multiComp.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --peakCaller Genrich .ci_stuff/spikein_organism.yaml .ci_stuff/ChIP.sample_config.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1354 ]; then exit 1 ; fi +WC=`ChIPseq -d BAM_input --useSpikeInForNorm --sampleSheet .ci_stuff/test_sampleSheet_multiComp.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/spikein_organism.yaml .ci_stuff/ChIP.sample_config.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1205 ]; then exit 1 ; fi +WC=`ChIPseq -d BAM_input --useSpikeInForNorm --sampleSheet .ci_stuff/test_sampleSheet_multiComp.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --peakCaller Genrich .ci_stuff/spikein_organism.yaml .ci_stuff/ChIP.sample_config.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1208 ]; then exit 1 ; fi #multiComp and spikein and noInput -WC=`ChIP-seq -d BAM_input --useSpikeInForNorm --sampleSheet .ci_stuff/test_sampleSheet_multiComp.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/spikein_organism.yaml .ci_stuff/ChIP.sample_noControl_config.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 853 ]; then exit 1 ; fi -WC=`ChIP-seq -d BAM_input --useSpikeInForNorm --sampleSheet .ci_stuff/test_sampleSheet_multiComp.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --peakCaller Genrich .ci_stuff/spikein_organism.yaml .ci_stuff/ChIP.sample_noControl_config.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 839 ]; then exit 1 ; fi +WC=`ChIPseq -d BAM_input --useSpikeInForNorm --sampleSheet .ci_stuff/test_sampleSheet_multiComp.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/spikein_organism.yaml .ci_stuff/ChIP.sample_noControl_config.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 771 ]; then exit 1 ; fi +WC=`ChIPseq -d BAM_input --useSpikeInForNorm --sampleSheet .ci_stuff/test_sampleSheet_multiComp.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --peakCaller Genrich .ci_stuff/spikein_organism.yaml .ci_stuff/ChIP.sample_noControl_config.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 747 ]; then exit 1 ; fi #multiComp and spikein and fromBam -WC=`ChIP-seq -d outdir --useSpikeInForNorm --sampleSheet .ci_stuff/test_sampleSheet_multiComp.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --fromBAM BAM_input/filtered_bam/ .ci_stuff/spikein_organism.yaml .ci_stuff/ChIP.sample_config.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1640 ]; then exit 1 ; fi -WC=`ChIP-seq -d outdir --useSpikeInForNorm --sampleSheet .ci_stuff/test_sampleSheet_multiComp.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --peakCaller Genrich --fromBAM BAM_input/filtered_bam/ .ci_stuff/spikein_organism.yaml .ci_stuff/ChIP.sample_noControl_config.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1045 ]; then exit 1 ; fi +WC=`ChIPseq -d outdir --useSpikeInForNorm --sampleSheet .ci_stuff/test_sampleSheet_multiComp.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --fromBAM BAM_input/filtered_bam/ .ci_stuff/spikein_organism.yaml .ci_stuff/ChIP.sample_config.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1501 ]; then exit 1 ; fi +WC=`ChIPseq -d outdir --useSpikeInForNorm --sampleSheet .ci_stuff/test_sampleSheet_multiComp.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --peakCaller Genrich --fromBAM BAM_input/filtered_bam/ .ci_stuff/spikein_organism.yaml .ci_stuff/ChIP.sample_noControl_config.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 947 ]; then exit 1 ; fi #multiComp and spikein and fromBam and noInput -WC=`ChIP-seq -d outdir --useSpikeInForNorm --sampleSheet .ci_stuff/test_sampleSheet_multiComp.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --fromBAM BAM_input/filtered_bam/ .ci_stuff/spikein_organism.yaml .ci_stuff/ChIP.sample_noControl_config.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1059 ]; then exit 1 ; fi -WC=`ChIP-seq -d outdir --useSpikeInForNorm --sampleSheet .ci_stuff/test_sampleSheet_multiComp.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --peakCaller Genrich --fromBAM BAM_input/filtered_bam/ .ci_stuff/spikein_organism.yaml .ci_stuff/ChIP.sample_noControl_config.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1045 ]; then exit 1 ; fi +WC=`ChIPseq -d outdir --useSpikeInForNorm --sampleSheet .ci_stuff/test_sampleSheet_multiComp.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --fromBAM BAM_input/filtered_bam/ .ci_stuff/spikein_organism.yaml .ci_stuff/ChIP.sample_noControl_config.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 971 ]; then exit 1 ; fi +WC=`ChIPseq -d outdir --useSpikeInForNorm --sampleSheet .ci_stuff/test_sampleSheet_multiComp.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --peakCaller Genrich --fromBAM BAM_input/filtered_bam/ .ci_stuff/spikein_organism.yaml .ci_stuff/ChIP.sample_noControl_config.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 947 ]; then exit 1 ; fi #multiComp and noInput and fromBam -WC=`ChIP-seq -d outdir --sampleSheet .ci_stuff/test_sampleSheet_multiComp.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --peakCaller SEACR --fromBAM BAM_input/filtered_bam/ .ci_stuff/spikein_organism.yaml .ci_stuff/ChIP.sample_noControl_config.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1190 ]; then exit 1 ; fi +WC=`ChIPseq -d outdir --sampleSheet .ci_stuff/test_sampleSheet_multiComp.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --peakCaller SEACR --fromBAM BAM_input/filtered_bam/ .ci_stuff/spikein_organism.yaml .ci_stuff/ChIP.sample_noControl_config.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1081 ]; then exit 1 ; fi -# mRNA-seq -WC=`mRNA-seq -i PE_input -o output --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1673 ]; then exit 1 ; fi -WC=`mRNA-seq -i PE_input -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1685 ]; then exit 1 ; fi -WC=`mRNA-seq -i PE_input -o output --rMats --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1705 ]; then exit 1 ; fi -WC=`mRNA-seq -i PE_input -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" -m "alignment" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1201 ]; then exit 1 ; fi -WC=`mRNA-seq -i PE_input -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" -m "alignment,deepTools_qc" --trim .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1786 ]; then exit 1 ; fi -WC=`mRNA-seq -i PE_input -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" -m "alignment-free,deepTools_qc" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1876 ]; then exit 1 ; fi -WC=`mRNA-seq -i PE_input -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" -m "alignment,deepTools_qc" --bcExtract --trim .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1721 ]; then exit 1 ; fi -WC=`mRNA-seq -i PE_input -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" -m "alignment,deepTools_qc" --bcExtract --UMIDedup --trim .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1813 ]; then exit 1 ; fi -WC=`mRNA-seq -i SE_input -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1526 ]; then exit 1 ; fi -WC=`mRNA-seq -i SE_input -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" -m "alignment" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1053 ]; then exit 1 ; fi -WC=`mRNA-seq -i SE_input -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" -m "alignment,deepTools_qc" --trim .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1627 ]; then exit 1 ; fi -WC=`mRNA-seq -i SE_input -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" -m "alignment-free,deepTools_qc" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1717 ]; then exit 1 ; fi -WC=`mRNA-seq -i SE_input -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --trim --fastqc .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1829 ]; then exit 1 ; fi -WC=`mRNA-seq -i BAM_input/filtered_bam -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --fromBAM .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1151 ]; then exit 1 ; fi +# mRNAseq +WC=`mRNAseq -i PE_input -o output --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1536 ]; then exit 1 ; fi +WC=`mRNAseq -i PE_input -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1547 ]; then exit 1 ; fi +WC=`mRNAseq -i PE_input -o output --rMats --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1566 ]; then exit 1 ; fi +WC=`mRNAseq -i PE_input -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" -m "alignment" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1106 ]; then exit 1 ; fi +WC=`mRNAseq -i PE_input -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" -m "alignment,deepTools_qc" --trim .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1639 ]; then exit 1 ; fi +WC=`mRNAseq -i PE_input -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" -m "alignment-free,deepTools_qc" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1736 ]; then exit 1 ; fi +WC=`mRNAseq -i PE_input -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" -m "alignment,deepTools_qc" --bcExtract --trim .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1565 ]; then exit 1 ; fi +WC=`mRNAseq -i PE_input -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" -m "alignment,deepTools_qc" --bcExtract --UMIDedup --trim .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1639 ]; then exit 1 ; fi +WC=`mRNAseq -i SE_input -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1389 ]; then exit 1 ; fi +WC=`mRNAseq -i SE_input -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" -m "alignment" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 958 ]; then exit 1 ; fi +WC=`mRNAseq -i SE_input -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" -m "alignment,deepTools_qc" --trim .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1481 ]; then exit 1 ; fi +WC=`mRNAseq -i SE_input -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" -m "alignment-free,deepTools_qc" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1578 ]; then exit 1 ; fi +WC=`mRNAseq -i SE_input -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --trim --fastqc .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1665 ]; then exit 1 ; fi +WC=`mRNAseq -i BAM_input/filtered_bam -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --fromBAM .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1058 ]; then exit 1 ; fi #multiple comparison groups -WC=`mRNA-seq --mode alignment,alignment-free -i PE_input -o output --rMats --sampleSheet .ci_stuff/test_sampleSheet_multiComp.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1598 ]; then exit 1 ; fi +WC=`mRNAseq --mode alignment,alignment-free -i PE_input -o output --rMats --sampleSheet .ci_stuff/test_sampleSheet_multiComp.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1476 ]; then exit 1 ; fi # three prime sequencing -WC=`mRNA-seq -i PE_input -o output --mode three-prime-seq --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1775 ]; then exit 1 ; fi -WC=`mRNA-seq -i PE_input -o output --mode three-prime-seq,deepTools_qc --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 2259 ]; then exit 1 ; fi +WC=`mRNAseq -i PE_input -o output --mode three-prime-seq --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1668 ]; then exit 1 ; fi +WC=`mRNAseq -i PE_input -o output --mode three-prime-seq,deepTools_qc --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 2109 ]; then exit 1 ; fi #allelic -WC=`mRNA-seq -m allelic-mapping,deepTools_qc -i PE_input -o output --snakemakeOptions " --dryrun --conda-prefix /tmp" --VCFfile allelic_input/file.vcf.gz --strains strain1,strain2 .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 2527 ]; then exit 1 ; fi -WC=`mRNA-seq -m allelic-mapping,deepTools_qc -i allelic_BAM_input/filtered_bam --fromBAM --bamExt '.filtered.bam' -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --SNPfile allelic_input/snpfile.txt --NMaskedIndex allelic_input/Ngenome .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1408 ]; then exit 1 ; fi -WC=`mRNA-seq -m allelic-mapping,deepTools_qc -i PE_input -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --VCFfile allelic_input/file.vcf.gz --strains strain1,strain2 .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 2539 ]; then exit 1 ; fi -WC=`mRNA-seq -m allelic-mapping,deepTools_qc -i PE_input -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --SNPfile allelic_input/snpfile.txt --NMaskedIndex allelic_input/Ngenome .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 2518 ]; then exit 1 ; fi -WC=`mRNA-seq -m allelic-mapping,deepTools_qc -i PE_input -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --VCFfile allelic_input/file.vcf.gz --strains strain1 .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 2539 ]; then exit 1 ; fi -WC=`mRNA-seq -m allelic-mapping,deepTools_qc,alignment-free -i PE_input -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --VCFfile allelic_input/file.vcf.gz --strains strain1 .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 3294 ]; then exit 1 ; fi -WC=`mRNA-seq -m allelic-counting -i allelic_BAM_input/allelic_bams --fromBAM --bamExt '.sorted.bam' -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 659 ]; then exit 1 ; fi -WC=`mRNA-seq -m allelic-counting -i allelic_BAM_input/allelic_bams --fromBAM --bamExt '.sorted.bam' -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 659 ]; then exit 1 ; fi +WC=`mRNAseq -m allelic-mapping,deepTools_qc -i PE_input -o output --snakemakeOptions " --dryrun --conda-prefix /tmp" --VCFfile allelic_input/file.vcf.gz --strains strain1,strain2 .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 2304 ]; then exit 1 ; fi +WC=`mRNAseq -m allelic-mapping,deepTools_qc -i allelic_BAM_input/filtered_bam --fromBAM --bamExt '.filtered.bam' -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --SNPfile allelic_input/snpfile.txt --NMaskedIndex allelic_input/Ngenome .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1285 ]; then exit 1 ; fi +WC=`mRNAseq -m allelic-mapping,deepTools_qc -i PE_input -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --VCFfile allelic_input/file.vcf.gz --strains strain1,strain2 .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 2315 ]; then exit 1 ; fi +WC=`mRNAseq -m allelic-mapping,deepTools_qc -i PE_input -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --SNPfile allelic_input/snpfile.txt --NMaskedIndex allelic_input/Ngenome .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 2296 ]; then exit 1 ; fi +WC=`mRNAseq -m allelic-mapping,deepTools_qc -i PE_input -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --VCFfile allelic_input/file.vcf.gz --strains strain1 .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 2315 ]; then exit 1 ; fi +WC=`mRNAseq -m allelic-mapping,deepTools_qc,alignment-free -i PE_input -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --VCFfile allelic_input/file.vcf.gz --strains strain1 .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 3012 ]; then exit 1 ; fi +WC=`mRNAseq -m allelic-counting -i allelic_BAM_input/allelic_bams --fromBAM --bamExt '.sorted.bam' -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 638 ]; then exit 1 ; fi +WC=`mRNAseq -m allelic-counting -i allelic_BAM_input/allelic_bams --fromBAM --bamExt '.sorted.bam' -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 638 ]; then exit 1 ; fi #allelic+multicomp -WC=`mRNA-seq -m allelic-counting -i allelic_BAM_input/allelic_bams --fromBAM --bamExt '.sorted.bam' -o output --sampleSheet .ci_stuff/test_sampleSheet_multiComp.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 690 ]; then exit 1 ; fi -WC=`mRNA-seq -m allelic-mapping,deepTools_qc -i allelic_BAM_input/filtered_bam --fromBAM --bamExt '.filtered.bam' -o output --sampleSheet .ci_stuff/test_sampleSheet_multiComp.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --SNPfile allelic_input/snpfile.txt --NMaskedIndex allelic_input/Ngenome .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1439 ]; then exit 1 ; fi -WC=`mRNA-seq -m allelic-mapping,deepTools_qc,alignment-free -i PE_input -o output --sampleSheet .ci_stuff/test_sampleSheet_multiComp.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --VCFfile allelic_input/file.vcf.gz --strains strain1 .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 3302 ]; then exit 1 ; fi +WC=`mRNAseq -m allelic-counting -i allelic_BAM_input/allelic_bams --fromBAM --bamExt '.sorted.bam' -o output --sampleSheet .ci_stuff/test_sampleSheet_multiComp.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 668 ]; then exit 1 ; fi +WC=`mRNAseq -m allelic-mapping,deepTools_qc -i allelic_BAM_input/filtered_bam --fromBAM --bamExt '.filtered.bam' -o output --sampleSheet .ci_stuff/test_sampleSheet_multiComp.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --SNPfile allelic_input/snpfile.txt --NMaskedIndex allelic_input/Ngenome .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1315 ]; then exit 1 ; fi +WC=`mRNAseq -m allelic-mapping,deepTools_qc,alignment-free -i PE_input -o output --sampleSheet .ci_stuff/test_sampleSheet_multiComp.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --VCFfile allelic_input/file.vcf.gz --strains strain1 .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 3021 ]; then exit 1 ; fi -#noncoding RNA-seq -WC=`noncoding-RNA-seq -i PE_input -o output --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1370 ]; then exit 1 ; fi -WC=`noncoding-RNA-seq -i PE_input -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1390 ]; then exit 1 ; fi -WC=`noncoding-RNA-seq -i SE_input -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1231 ]; then exit 1 ; fi -WC=`noncoding-RNA-seq -i BAM_input -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --fromBAM .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 984 ]; then exit 1 ; fi +#ncRNAseq +WC=`ncRNAseq -i PE_input -o output --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1269 ]; then exit 1 ; fi +WC=`ncRNAseq -i PE_input -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1289 ]; then exit 1 ; fi +WC=`ncRNAseq -i SE_input -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1131 ]; then exit 1 ; fi +WC=`ncRNAseq -i BAM_input -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --fromBAM .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 901 ]; then exit 1 ; fi #multiple comparison groups -WC=`noncoding-RNA-seq -i PE_input -o output --sampleSheet .ci_stuff/test_sampleSheet_multiComp.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1418 ]; then exit 1 ; fi +WC=`ncRNAseq -i PE_input -o output --sampleSheet .ci_stuff/test_sampleSheet_multiComp.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1317 ]; then exit 1 ; fi -# scRNA-seq -#WC=`scRNAseq -i PE_input -o output --mode Gruen --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` +# scRNAseq +#WC=`scRNAseq -i PE_input -o output --mode Gruen --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` #if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1038 ]; then exit 1 ; fi -#WC=`scRNAseq -i PE_input -o output --mode Gruen --snakemakeOptions " --dryrun --conda-prefix /tmp" --skipRaceID --splitLib .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` +#WC=`scRNAseq -i PE_input -o output --mode Gruen --snakemakeOptions " --dryrun --conda-prefix /tmp" --skipRaceID --splitLib .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` #if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1015 ]; then exit 1 ; fi -WC=`scRNAseq -i PE_input -o output --mode STARsolo --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1808 ]; then exit 1 ; fi -WC=`scRNAseq -i PE_input -o output --mode STARsolo --skipVelocyto --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1614 ]; then exit 1 ; fi -WC=`scRNAseq -i PE_input -o output --mode Alevin --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 761 ]; then exit 1 ; fi -WC=`scRNAseq -i PE_input -o output --mode Alevin --skipVelocyto --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 641 ]; then exit 1 ; fi +WC=`scRNAseq -i PE_input -o output --mode STARsolo --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1642 ]; then exit 1 ; fi +WC=`scRNAseq -i PE_input -o output --mode STARsolo --skipVelocyto --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1467 ]; then exit 1 ; fi +WC=`scRNAseq -i PE_input -o output --mode Alevin --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 714 ]; then exit 1 ; fi +WC=`scRNAseq -i PE_input -o output --mode Alevin --skipVelocyto --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 604 ]; then exit 1 ; fi # WGBS -WC=`WGBS -i PE_input -o output --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1430 ]; then exit 1 ; fi -WC=`WGBS -i PE_input -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1475 ]; then exit 1 ; fi -WC=`WGBS -i PE_input -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --aligner bwameth2 --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1475 ]; then exit 1 ; fi -WC=`WGBS -i PE_input -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --trim --GCbias .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1486 ]; then exit 1 ; fi -WC=`WGBS -i BAM_input/filtered_bam -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --fromBAM --snakemakeOptions " --dryrun --conda-prefix /tmp" --GCbias .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1062 ]; then exit 1 ; fi -WC=`WGBS -i BAM_input/filtered_bam -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --fromBAM --fastqc --snakemakeOptions " --dryrun --conda-prefix /tmp" --GCbias .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1062 ]; then exit 1 ; fi -WC=`WGBS -i BAM_input/filtered_bam -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --fromBAM --skipBamQC --snakemakeOptions " --dryrun --conda-prefix /tmp" --GCbias .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 559 ]; then exit 1 ; fi +WC=`WGBS -i PE_input -o output --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1300 ]; then exit 1 ; fi +WC=`WGBS -i PE_input -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1343 ]; then exit 1 ; fi +WC=`WGBS -i PE_input -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --aligner bwameth2 --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1343 ]; then exit 1 ; fi +WC=`WGBS -i PE_input -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --trim --GCbias .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1353 ]; then exit 1 ; fi +WC=`WGBS -i BAM_input/filtered_bam -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --fromBAM --snakemakeOptions " --dryrun --conda-prefix /tmp" --GCbias .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 974 ]; then exit 1 ; fi +WC=`WGBS -i BAM_input/filtered_bam -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --fromBAM --fastqc --snakemakeOptions " --dryrun --conda-prefix /tmp" --GCbias .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 974 ]; then exit 1 ; fi +WC=`WGBS -i BAM_input/filtered_bam -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --fromBAM --skipBamQC --snakemakeOptions " --dryrun --conda-prefix /tmp" --GCbias .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 530 ]; then exit 1 ; fi -# ATAC-seq -WC=`ATAC-seq -d BAM_input --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 524 ]; then exit 1 ; fi -WC=`ATAC-seq -d BAM_input --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 686 ]; then exit 1 ; fi -WC=`ATAC-seq -d BAM_input --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --peakCaller Genrich .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 807 ]; then exit 1 ; fi -WC=`ATAC-seq -d BAM_input --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --peakCaller HMMRATAC .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 789 ]; then exit 1 ; fi -WC=`ATAC-seq -d BAM_input --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --maxFragmentSize 120 --qval 0.1 .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 686 ]; then exit 1 ; fi -WC=`ATAC-seq -d BAM_input --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --fromBAM BAM_input/filtered_bam/ .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1160 ]; then exit 1 ; fi +# ATACseq +WC=`ATACseq -d BAM_input --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 460 ]; then exit 1 ; fi +WC=`ATACseq -d BAM_input --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 605 ]; then exit 1 ; fi +WC=`ATACseq -d BAM_input --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --peakCaller Genrich .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 706 ]; then exit 1 ; fi +WC=`ATACseq -d BAM_input --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --peakCaller HMMRATAC .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 697 ]; then exit 1 ; fi +WC=`ATACseq -d BAM_input --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --maxFragmentSize 120 --qval 0.1 .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 605 ]; then exit 1 ; fi +WC=`ATACseq -d BAM_input --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --fromBAM BAM_input/filtered_bam/ .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1055 ]; then exit 1 ; fi #multicomp -WC=`ATAC-seq -d BAM_input --sampleSheet .ci_stuff/test_sampleSheet_multiComp.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 841 ]; then exit 1 ; fi -WC=`ATAC-seq -d BAM_input --sampleSheet .ci_stuff/test_sampleSheet_multiComp.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --peakCaller Genrich .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 980 ]; then exit 1 ; fi -WC=`ATAC-seq -d BAM_input --sampleSheet .ci_stuff/test_sampleSheet_multiComp.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --peakCaller HMMRATAC .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 944 ]; then exit 1 ; fi -WC=`ATAC-seq -d BAM_input --sampleSheet .ci_stuff/test_sampleSheet_multiComp.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --maxFragmentSize 120 --qval 0.1 .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 841 ]; then exit 1 ; fi -WC=`ATAC-seq -d BAM_input --sampleSheet .ci_stuff/test_sampleSheet_multiComp.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --fromBAM BAM_input/filtered_bam/ .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1315 ]; then exit 1 ; fi +WC=`ATACseq -d BAM_input --sampleSheet .ci_stuff/test_sampleSheet_multiComp.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 743 ]; then exit 1 ; fi +WC=`ATACseq -d BAM_input --sampleSheet .ci_stuff/test_sampleSheet_multiComp.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --peakCaller Genrich .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 860 ]; then exit 1 ; fi +WC=`ATACseq -d BAM_input --sampleSheet .ci_stuff/test_sampleSheet_multiComp.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --peakCaller HMMRATAC .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 835 ]; then exit 1 ; fi +WC=`ATACseq -d BAM_input --sampleSheet .ci_stuff/test_sampleSheet_multiComp.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --maxFragmentSize 120 --qval 0.1 .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 743 ]; then exit 1 ; fi +WC=`ATACseq -d BAM_input --sampleSheet .ci_stuff/test_sampleSheet_multiComp.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --fromBAM BAM_input/filtered_bam/ .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1193 ]; then exit 1 ; fi # HiC -WC=`HiC -i PE_input -o output --snakemakeOptions " --dryrun --conda-prefix /tmp" --correctionMethod ICE .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1012 ]; then exit 1 ; fi -WC=`HiC -i PE_input -o output --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 947 ]; then exit 1 ; fi -WC=`HiC -i PE_input -o output --snakemakeOptions " --dryrun --conda-prefix /tmp" --trim .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1048 ]; then exit 1 ; fi -WC=`HiC -i PE_input -o output --snakemakeOptions " --dryrun --conda-prefix /tmp" --enzyme DpnII .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 947 ]; then exit 1 ; fi -WC=`HiC -i PE_input -o output --snakemakeOptions " --dryrun --conda-prefix /tmp" --noTAD .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 855 ]; then exit 1 ; fi -WC=`HiC -i PE_input -o output --snakemakeOptions " --dryrun --conda-prefix /tmp" --aligner bwa-mem2 .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 947 ]; then exit 1 ; fi +WC=`HiC -i PE_input -o output --snakemakeOptions " --dryrun --conda-prefix /tmp" --correctionMethod ICE .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 965 ]; then exit 1 ; fi +WC=`HiC -i PE_input -o output --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 891 ]; then exit 1 ; fi +WC=`HiC -i PE_input -o output --snakemakeOptions " --dryrun --conda-prefix /tmp" --trim .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 983 ]; then exit 1 ; fi +WC=`HiC -i PE_input -o output --snakemakeOptions " --dryrun --conda-prefix /tmp" --enzyme DpnII .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 891 ]; then exit 1 ; fi +WC=`HiC -i PE_input -o output --snakemakeOptions " --dryrun --conda-prefix /tmp" --noTAD .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 808 ]; then exit 1 ; fi +WC=`HiC -i PE_input -o output --snakemakeOptions " --dryrun --conda-prefix /tmp" --aligner bwa-mem2 .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 891 ]; then exit 1 ; fi # preprocessing -WC=`preprocessing -i PE_input -o output --snakemakeOptions " --dryrun --conda-prefix /tmp" --fastqc --optDedupDist 2500 | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 807 ]; then exit 1 ; fi -WC=`preprocessing -i PE_input -o output --snakemakeOptions " --dryrun --conda-prefix /tmp" --DAG --fastqc --optDedupDist 2500 | tee >(cat 1>&2) | grep -v "conda installation\|Conda environment" | sed '/^\s*$/d' | wc -l` -if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 807 ]; then exit 1 ; fi +WC=`preprocessing -i PE_input -o output --snakemakeOptions " --dryrun --conda-prefix /tmp" --fastqc --optDedupDist 2500 | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 761 ]; then exit 1 ; fi +WC=`preprocessing -i PE_input -o output --snakemakeOptions " --dryrun --conda-prefix /tmp" --DAG --fastqc --optDedupDist 2500 | tee >(cat 1>&2) | grep -v -f .ci_stuff/test_ignore_patterns.txt | sed '/^\s*$/d' | wc -l` +if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 761 ]; then exit 1 ; fi rm -rf SE_input PE_input BAM_input output allelic_input allelic_BAM_input /tmp/genes.gtf /tmp/genome.fa /tmp/genome.fa.fai /tmp/rmsk.txt /tmp/genes.bed /tmp/spikein_genes.gtf diff --git a/.ci_stuff/test_ignore_patterns.txt b/.ci_stuff/test_ignore_patterns.txt new file mode 100644 index 000000000..2e0fe8cc0 --- /dev/null +++ b/.ci_stuff/test_ignore_patterns.txt @@ -0,0 +1,10 @@ +run involves checkpoint jobs +conda installation +Conda environment +Using profile +Execute +sanitize_local_storage_copies +tracemalloc +wildcard_constraints +SyntaxWarning: invalid escape sequence +Would remove temporary \ No newline at end of file diff --git a/.github/conda_ci.yml b/.github/conda_ci.yml new file mode 100644 index 000000000..43cafdbde --- /dev/null +++ b/.github/conda_ci.yml @@ -0,0 +1,6 @@ +name: conda_ci +dependencies: + - python=3.11 + - anaconda-client + - conda-build + - conda-verify \ No newline at end of file diff --git a/.github/condarc.yml b/.github/condarc.yml new file mode 100644 index 000000000..1458d5e0a --- /dev/null +++ b/.github/condarc.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +channel_priority: 'strict' +show_channel_urls: True \ No newline at end of file diff --git a/.github/snakePipesEnvCI.yml b/.github/snakePipesEnvCI.yml deleted file mode 100644 index 4a273b9d2..000000000 --- a/.github/snakePipesEnvCI.yml +++ /dev/null @@ -1,13 +0,0 @@ -name: snakePipes_CI -dependencies: - - python=3.9 - - snakemake=7.18.2 - - thefuzz[speedup] - - mock - - sphinx=4.2.0 - - sphinx-argparse - - sphinx_rtd_theme=1.0.0 - - flake8 - - coreutils - - mamba - - graphviz diff --git a/.github/workflows/conda-build.yml b/.github/workflows/conda-build.yml new file mode 100644 index 000000000..54b6f2f9d --- /dev/null +++ b/.github/workflows/conda-build.yml @@ -0,0 +1,19 @@ +name: conda-build + +on: [pull_request, push] + +jobs: + condaBuild: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: conda-incubator/setup-miniconda@v3 + with: + environment-file: .github/conda_ci.yml + condarc-file: .github/condarc.yml + - name: buildSnakePipes + uses: uibcdf/action-build-and-upload-conda-packages@v1.3.0 + with: + meta_yaml_dir: conda-recipe + python-version: 3.11 + upload: false diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index d30591063..49964a1a6 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -8,71 +8,71 @@ defaults: jobs: + pip: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ['3.11', '3.12'] + optdeps: [".", ".[actions]", ".[docs]"] + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + - name: pip + run: | + pip install ${{ matrix.optdeps }} docs: + needs: pip runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - - uses: mamba-org/setup-micromamba@main + - uses: actions/setup-python@v5 with: - environment-file: .github/snakePipesEnvCI.yml - condarc: | - channels: - - conda-forge - - bioconda - - defaults - channel_priority: 'strict' - cache-downloads: true + python-version: 3.11 - name: Install snakePipes run: | - micromamba run -n snakePipes_CI python -m pip install . --no-deps --ignore-installed -vvv + pip install .[docs] - name: docs run: | - micromamba activate snakePipes_CI snakePipes config --tempDir /tmp cd docs make html - flake: + lint: + needs: pip runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - - uses: mamba-org/setup-micromamba@main + - uses: actions/setup-python@v5 with: - environment-file: .github/snakePipesEnvCI.yml - condarc: | - channels: - - conda-forge - - bioconda - - defaults - channel_priority: 'strict' - cache-downloads: true + python-version: 3.11 - name: Install snakePipes run: | - micromamba run -n snakePipes_CI python -m pip install . --no-deps --ignore-installed -vvv - - name: flake + pip install .[actions] + - name: ruff run: | - micromamba activate snakePipes_CI - snakePipes config --tempDir /tmp - flake8 --ignore=E501,E722,E402 --exclude tests,docs/conf.py,build/lib/snakePipes/shared/tools/three_prime_seq,snakePipes/shared/tools/three_prime_seq . + ruff check . CI: + needs: pip runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - - uses: mamba-org/setup-micromamba@main + - uses: conda-incubator/setup-miniconda@v3 with: - environment-file: .github/snakePipesEnvCI.yml - condarc: | - channels: - - conda-forge - - bioconda - - defaults - channel_priority: 'strict' - cache-downloads: true + environment-file: .github/conda_ci.yml + activate-environment: conda_ci + condarc-file: .github/condarc.yml - name: Install snakePipes run: | - micromamba run -n snakePipes_CI python -m pip install . --no-deps --ignore-installed -vvv + pip install . + - name: print_conda_env_export + run: | + conda env export + - name: print_pip_list + run: | + pip list - name: CI run: | - micromamba activate snakePipes_CI snakePipes config --tempDir /tmp ./.ci_stuff/test_dag.sh createEnvs: @@ -104,22 +104,16 @@ jobs: ] runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 - - uses: mamba-org/setup-micromamba@main + - uses: actions/checkout@v4 + - uses: conda-incubator/setup-miniconda@v3 with: - environment-file: .github/snakePipesEnvCI.yml - condarc: | - channels: - - conda-forge - - bioconda - - defaults - channel_priority: 'strict' - cache-downloads: true + environment-file: .github/conda_ci.yml + activate-environment: conda_ci + condarc-file: .github/condarc.yml - name: install snakePipes run: | - micromamba run -n snakePipes_CI python -m pip install . --no-deps --ignore-installed -vvv + pip install . - name: createEnvs run: | - micromamba activate snakePipes_CI snakePipes config --tempDir /tmp - snakePipes createEnvs --autodetectCondaEnvDir --only ${{matrix.envs}} + snakePipes createEnvs --only ${{matrix.envs}} diff --git a/.github/workflows/osx.yml b/.github/workflows/osx.yml index 72115e2c7..e08831e48 100644 --- a/.github/workflows/osx.yml +++ b/.github/workflows/osx.yml @@ -36,22 +36,16 @@ jobs: runs-on: macos-latest steps: - uses: actions/checkout@v4 - - uses: mamba-org/setup-micromamba@main + - uses: conda-incubator/setup-miniconda@v3 with: - environment-file: .github/snakePipesEnvCI.yml - condarc: | - channels: - - conda-forge - - bioconda - - defaults - channel_priority: 'strict' - cache-downloads: true + environment-file: .github/conda_ci.yml + activate-environment: conda_ci + condarc-file: .github/condarc.yml - name: install snakePipes_OSX run: | - micromamba run -n snakePipes_CI python -m pip install . --no-deps --ignore-installed -vvv + pip install . - name: createEnvsOSX run: | - micromamba activate snakePipes_CI conda config --add subdirs osx-64 - snakePipes createEnvs --autodetectCondaEnvDir --only ${{matrix.envs}} + snakePipes createEnvs --only ${{matrix.envs}} diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index fe58ba226..65ba45630 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -11,40 +11,26 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - - uses: mamba-org/setup-micromamba@main + - uses: conda-incubator/setup-miniconda@v3 with: environment-file: snakePipes/shared/rules/envs/rna_seq.yaml - environment-name: rnaseqenv - condarc: | - channels: - - conda-forge - - bioconda - - defaults - channel_priority: 'strict' - cache-downloads: true - post-cleanup: 'none' + activate-environment: snakepipes_RNAseq_environment_3.0 + condarc-file: .github/condarc.yml - name: create_starix run: | - micromamba activate rnaseqenv gunzip -c tests/data/genomes/genome_chr17.fa.gz > genome_chr17.fa gunzip -c tests/data/genomes/genes_chr17.gtf.gz > genes_chr17.gtf STAR --runThreadN 4 --runMode genomeGenerate --genomeDir tests/data/mRNA_STAR --genomeFastaFiles genome_chr17.fa --sjdbGTFfile genes_chr17.gtf --sjdbOverhang 100 --genomeSAindexNbases 12 - - uses: mamba-org/setup-micromamba@main + - uses: conda-incubator/setup-miniconda@v3 with: - environment-file: .github/snakePipesEnvCI.yml - condarc: | - channels: - - conda-forge - - bioconda - - defaults - channel_priority: 'strict' - cache-downloads: true + environment-file: .github/conda_ci.yml + activate-environment: conda_ci + condarc-file: .github/condarc.yml - name: Install snakePipes run: | - micromamba run -n snakePipes_CI python -m pip install . --no-deps --ignore-installed -vvv + pip install .[actions] - name: pytest run: | - micromamba activate snakePipes_CI snakePipes config --tempDir /tmp --condaEnvDir ./ snakePipes createEnvs --only CONDA_SHARED_ENV CONDA_RNASEQ_ENV pytest --verbosity=2 -rP tests/test_mRNA.py @@ -52,21 +38,15 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - - uses: mamba-org/setup-micromamba@main + - uses: conda-incubator/setup-miniconda@v3 with: - environment-file: .github/snakePipesEnvCI.yml - condarc: | - channels: - - conda-forge - - bioconda - - defaults - channel_priority: 'strict' - cache-downloads: true + environment-file: .github/conda_ci.yml + activate-environment: conda_ci + condarc-file: .github/condarc.yml - name: Install snakePipes run: | - micromamba run -n snakePipes_CI python -m pip install . --no-deps --ignore-installed -vvv + pip install .[actions] - name: pytest run: | - micromamba activate snakePipes_CI snakePipes config --tempDir /tmp --condaEnvDir ./ - pytest --verbosity=2 -rP tests/test_jobcounts.py + pytest -n 4 --verbosity=2 -rP tests/test_jobcounts.py diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 28af0721e..5d24820a4 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -3,7 +3,7 @@ version: 2 build: os: ubuntu-22.04 tools: - python: "3.7" + python: "3.12" sphinx: configuration: docs/conf.py @@ -12,4 +12,5 @@ python: install: - method: pip path: . - - requirements: docs/requirements.txt + extra_requirements: + - docs diff --git a/.snakemake-workflow-catalog.yml b/.snakemake-workflow-catalog.yml deleted file mode 100644 index c276e9a09..000000000 --- a/.snakemake-workflow-catalog.yml +++ /dev/null @@ -1,4 +0,0 @@ -usage: - software-stack-deployment: - conda: true - report: true diff --git a/LICENSE b/LICENSE deleted file mode 100644 index 99020f458..000000000 --- a/LICENSE +++ /dev/null @@ -1,19 +0,0 @@ -Copyright (c) 2019 Max Planck Institute for Immunobiology and Epigenetics - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/MANIFEST.in b/MANIFEST.in deleted file mode 100644 index bcb05484d..000000000 --- a/MANIFEST.in +++ /dev/null @@ -1,3 +0,0 @@ -include *.yaml -recursive-include snakePipes/shared * -recursive-include snakePipes/workflows * diff --git a/README.md b/README.md new file mode 100644 index 000000000..473d9a6a1 --- /dev/null +++ b/README.md @@ -0,0 +1,56 @@ +[![linux](https://github.com/maxplanck-ie/snakepipes/actions/workflows/linux.yml/badge.svg)](https://github.com/maxplanck-ie/snakepipes/actions/workflows/linux.yml) +[![osx](https://github.com/maxplanck-ie/snakepipes/actions/workflows/osx.yml/badge.svg)](https://github.com/maxplanck-ie/snakepipes/actions/workflows/osx.yml) +[![pytest](https://github.com/maxplanck-ie/snakepipes/actions/workflows/pytest.yml/badge.svg)](https://github.com/maxplanck-ie/snakepipes/actions/workflows/pytest.yml) +[![readthedocs](https://readthedocs.org/projects/snakepipes/badge/?version=latest)](https://snakepipes.readthedocs.io/en/latest/) +[![citation](https://zenodo.org/badge/54579435.svg)](https://zenodo.org/badge/latestdoi/54579435) + +# SnakePipes + +snakePipes are flexible and powerful workflows built using [snakemake](https://github.com/snakemake/snakemake) that simplify the analysis of NGS data. +![snakePipes](docs/content/images/snakePipes_small.png) + +## Workflows + +- DNAmapping* +- ChIPseq* +- mRNAseq* +- ncRNAseq* +- ATACseq* +- scRNAseq +- HiC +- makePairs* +- Whole Genome Bisulfite Seq/WGBS + +(* also available in allele-specific mode) + +## Installation + +[Conda](https://docs.conda.io/en/latest/#) is a pre-requisite for snakePipes. So make sure this is [installed](https://conda.io/projects/conda/en/latest/user-guide/install/index.html) before. + +Afterwards you can create a snakePipes environment containing the installation by running: + + > conda create -n snakepipes -c mpi-ie -c bioconda -c conda-forge snakePipes + +In case you'd like a development version, you can install snakePipes directly from github using pip: + + > git clone git@github.com:maxplanck-ie/snakepipes.git + > cd snakepipes + > pip install . + +Make sure the environment you are installing this version into has python version 3.11 or later. + +After the installation some configurations have to be set, for which we refer to the documentation. + +## Documentation + +For detailed documentation on setup and usage, please visit the [documentation](https://snakepipes.readthedocs.io/en/latest/). + +## Citation + +If you adopt/run snakePipes for your analysis, please cite it as follows : + +Bhardwaj, Vivek, Steffen Heyne, Katarzyna Sikora, Leily Rabbani, Michael Rauer, Fabian Kilpert, Andreas S. Richter, Devon P. Ryan, and Thomas Manke. 2019. “snakePipes: Facilitating Flexible, Scalable and Integrative Epigenomic Analysis.” Bioinformatics , May. [doi:10.1093/bioinformatics/btz436](https://doi.org/10.1093/bioinformatics/btz436). + +## Note + +SnakePipes are under active development. We appreciate your help in improving it further. Please use issues to the GitHub repository for feature requests or bug reports. diff --git a/README.rst b/README.rst deleted file mode 100644 index d3812a17b..000000000 --- a/README.rst +++ /dev/null @@ -1,87 +0,0 @@ -=========================================================== -snakePipes -=========================================================== - -.. image:: https://readthedocs.org/projects/snakepipes/badge/?version=latest - :target: http://snakepipes.readthedocs.io/en/latest/?badge=latest - :alt: Documentation Status - -.. image:: https://travis-ci.org/maxplanck-ie/snakepipes.svg?branch=develop - :target: https://travis-ci.org/maxplanck-ie/snakepipes - :alt: Build Staus - -.. image:: https://zenodo.org/badge/54579435.svg - :target: https://zenodo.org/badge/latestdoi/54579435 - :alt: Citation - - -snakePipes are flexible and powerful workflows built using `Snakemake `__ that simplify the analysis of NGS data. - -.. image:: ./docs/content/images/snakePipes_small.png - :scale: 20 % - :width: 100 px - :height: 100 px - :align: right - -Workflows available --------------------- - -- DNA-mapping* -- ChIP-seq* -- mRNA-seq* -- noncoding-RNA-seq* -- ATAC-seq* -- scRNA-seq -- Hi-C -- Whole Genome Bisulfite Seq/WGBS - -**(*Also available in "allele-specific" mode)** - -Installation -------------- - -Snakepipes is a set of Snakemake workflows which use conda for installation and dependency resolution, so you will need to `install conda `__ first. - -Afterward, simply run the following: - -``conda install mamba -c conda-forge && mamba create -n snakePipes -c mpi-ie -c bioconda -c conda-forge snakePipes`` - -This will create a new conda environment called "snakePipes" into which snakePipes is installed. You will then need to create the conda environments needed by the various workflows. To facilitate this we provide the `snakePipes` commands: - -* ``conda activate snakePipes`` to activate the appropriate conda environment. -* ``snakePipes createEnvs`` to create the various environments. - -Indices and annotations needed to run the workflows could be created by a simple command : - -``createIndices --genomeURL --gtfURL -o `` - -where `name` refers to the name/id of your genome (specify as you wish). - -A few additional steps you can then take: - -1. **Modify/remove/add the organism yaml files appropriately** : these yaml files would contain location of appropriate -GTF files and genome indexes corresponding to different organisms. The location of these files after installation can be -found using ``snakePipes info`` command. - -2. **Modify the cluster.yaml file appropriately** : This yaml file contains settings for your cluster scheduler (SGE/slurm). -Location revealed using ``snakePipes info`` command. - - -Documentation --------------- - -For detailed documentation on setup and usage, please visit our `read the docs page `__. - - -Citation -------------- - -If you adopt/run snakePipes for your analysis, cite it as follows : - -Bhardwaj, Vivek, Steffen Heyne, Katarzyna Sikora, Leily Rabbani, Michael Rauer, Fabian Kilpert, Andreas S. Richter, Devon P. Ryan, and Thomas Manke. 2019. “snakePipes: Facilitating Flexible, Scalable and Integrative Epigenomic Analysis.” Bioinformatics , May. `doi:10.1093/bioinformatics/btz436 `__ - - -Note -------------- - -SnakePipes are under active development. We appreciate your help in improving it further. Please use issues to the GitHub repository for feature requests or bug reports. diff --git a/conda-recipe/build.sh b/conda-recipe/build.sh deleted file mode 100755 index 89481145a..000000000 --- a/conda-recipe/build.sh +++ /dev/null @@ -1,2 +0,0 @@ -#!/bin/bash -$PYTHON setup.py install --single-version-externally-managed --record=record.txt diff --git a/conda-recipe/meta.yaml b/conda-recipe/meta.yaml index cb477b30e..77afba14c 100755 --- a/conda-recipe/meta.yaml +++ b/conda-recipe/meta.yaml @@ -1,6 +1,8 @@ +{% set version = "3.0.0" %} + package: name: snakepipes - version: 2.9.0 + version: {{ version }} source: path: ../ @@ -8,26 +10,26 @@ source: build: number: 0 noarch: python + script: pip install . requirements: host: - python >=3 + - pip + - seaborn run: - - python >=3.7.0 - - snakemake >=6.2.1, <7.19.0 - - tabulate <0.9 + - python >=3.11 + - snakemake >=8 + - snakemake-executor-plugin-cluster-generic >=1.0.9 - pandas - - graphviz - thefuzz - pyyaml >=5.1 - - coreutils test: commands: - - DNA-mapping --help + - DNAmapping --help about: home: 'https://snakepipes.readthedocs.org' license: MIT summary: NGS processing pipelines from the MPI-IE - license_file: LICENSE diff --git a/docs/conf.py b/docs/conf.py index 0c296325f..3a0ab11de 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -14,6 +14,8 @@ import sys import os +from importlib.metadata import version as importlibversion +import sphinx_rtd_theme # to allow readthedocs to compile without installing some dependencies import mock @@ -82,9 +84,9 @@ exec(open('../snakePipes/__init__.py').read()) # The short X.Y version. -version = __version__ +version = importlibversion("snakePipes") # The full version, including alpha/beta/rc tags. -release = __version__ +release = importlibversion("snakePipes") # An rst epilog to apper at the end of every page rst_epilog = """ @@ -142,12 +144,13 @@ # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. -on_rtd = os.environ.get('READTHEDOCS', None) == 'True' +# on_rtd = os.environ.get('READTHEDOCS', None) == 'True' -if not on_rtd: # only import and set the theme if we're building docs locally - import sphinx_rtd_theme - html_theme = 'sphinx_rtd_theme' # 'alabaster' 'sphinx_rtd_theme' - html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] +#if not on_rtd: # only import and set the theme if we're building docs locally + +# import them both locally and on rtd +html_theme = 'sphinx_rtd_theme' # 'alabaster' 'sphinx_rtd_theme' +html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the @@ -176,7 +179,7 @@ # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -#html_static_path = ['_static'] +# html_static_path = ['_static'] # Add any extra paths that contain custom files (such as robots.txt or # .htaccess) here, relative to this directory. These files are copied @@ -261,7 +264,15 @@ # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, # author, documentclass [howto, manual, or own class]). -latex_documents = [(master_doc, 'snakePipes.tex', u'snakePipes Documentation', u'MPI-IE', 'manual'), ] +latex_documents = [ + ( + master_doc, + 'snakePipes.tex', + u'snakePipes Documentation', + u'MPI-IE', + 'manual' + ), +] # The name of an image file (relative to this directory) to place at the top of # the title page. @@ -288,7 +299,15 @@ # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). -man_pages = [(master_doc, 'snakepipes', u'snakePipes Documentation', [author], 1)] +man_pages = [ + ( + master_doc, + 'snakepipes', + u'snakePipes Documentation', + [author], + 1 + ) +] # If true, show URL addresses after external links. # man_show_urls = False @@ -300,7 +319,16 @@ # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ - (master_doc, 'snakePipes', u'snakePipes Documentation', author, 'snakePipes', 'One line description of project.', 'Miscellaneous'), ] + ( + master_doc, + 'snakePipes', + u'snakePipes Documentation', + author, + 'snakePipes', + 'One line description of project.', + 'Miscellaneous' + ), +] # Documents to append as an appendix to all manuals. # texinfo_appendices = [] @@ -326,8 +354,9 @@ # The basename for the epub file. It defaults to the project name. # epub_basename = project -# The HTML theme for the epub output. Since the default themes are not optimized -# for small screen space, using the same theme for HTML and epub output is +# The HTML theme for the epub output. +# Since the default themes are not optimized for small screen space, +# using the same theme for HTML and epub output is # usually not wise. This defaults to 'epub', a theme designed to save visual # space. # epub_theme = 'epub' @@ -386,4 +415,4 @@ # Example configuration for intersphinx: refer to the Python standard library. -intersphinx_mapping = {'https://docs.python.org/': None} +intersphinx_mapping = {'python': ('https://docs.python.org/3', None)} diff --git a/docs/content/News.rst b/docs/content/News.rst index 925b61ac7..933e6fc00 100644 --- a/docs/content/News.rst +++ b/docs/content/News.rst @@ -156,7 +156,7 @@ snakePipes 2.4.0 ---------------- * Added support for multiple pairwise comparisons for DESeq2, sleuth, and rMats in the mRNA-seq workflow, as well as for DESeq2 in the noncoding-RNA-seq workflow. -* Loompy from conda is now used in mode STARsolo in scRNA-seq workflow. +* Loompy from conda is now used in mode STARsolo in scRNAseq workflow. * Added bamExt to mRNA-seq and noncoding-RNA-seq commandline arguments. * Added multi-thread support to rMats in mRNA-seq workflow. * Fixed deepTools GC bias command with SE reads. diff --git a/docs/content/advanced_usage.rst b/docs/content/advanced_usage.rst index 1bee1a3b9..b8002db8b 100644 --- a/docs/content/advanced_usage.rst +++ b/docs/content/advanced_usage.rst @@ -32,13 +32,13 @@ All files needed to be modified in order to extend/modify a workflow, are availa │   ├── rules │   └── tools └── workflows - ├── ATAC-seq - ├── ChIP-seq + ├── ATACseq + ├── ChIPseq ├── createIndices - ├── DNA-mapping + ├── DNAmapping ├── HiC - ├── mRNA-seq - ├── noncoding-RNA-seq + ├── mRNAseq + ├── ncRNAseq ├── preprocessing ├── scRNAseq └──WGBS @@ -84,8 +84,8 @@ Finally, provide an adjusted config via ``--configfile`` parameter to snakemake! example call:: - snakemake --snakefile /path/to/snakemake_workflows/workflows/ATAC-seq/Snakefile - --configfile /path/to/(snakemake_workflows/workflows/ATAC-seq/)defaults.yaml + snakemake --snakefile /path/to/snakemake_workflows/workflows/ATACseq/Snakefile + --configfile /path/to/(snakemake_workflows/workflows/ATACseq/)defaults.yaml --directory /path/to/outputdir --cores 32 @@ -149,17 +149,17 @@ Therefore in order to change or upgrade a tool version, all you need to do is to Modifying or adding new rules to the workflows ------------------------------------------------ -Modifying or adding new rules to snakePipes workflows is relatively easy. Considering you want to add a new Rscript that performs a downstream analysis on the DESeq2 output in mRNA-seq workflow. These would be the steps needed: +Modifying or adding new rules to snakePipes workflows is relatively easy. Considering you want to add a new Rscript that performs a downstream analysis on the DESeq2 output in mRNAseq workflow. These would be the steps needed: * Test the Rscript on command line first, then move it in the ``shared/rscripts`` folder. * Add a rule that called the Rscript and put it under ``shared/rules`` folder. - * Add the corresponding ``rule all``, that defines the expected output into ``workflows/mRNA-seq/Snakefile`` + * Add the corresponding ``rule all``, that defines the expected output into ``workflows/mRNAseq/Snakefile`` * Now, for easy and reproducible execution of the rule, add a ``conda`` directive and point it to the relevant conda env under ``shared/rules/envs``. Since your rule might need a new R package, `search whether it's available `__ in one of the conda channels and add the package name (as indicated in the conda channel) and version under the ``dependencies`` key. - * Finally, modify the command line wrapper (``workflows/mRNA-seq/mRNA-seq``) to make this new feature available to the users! + * Finally, modify the command line wrapper (``workflows/mRNAseq/mRNAseq``) to make this new feature available to the users! Using AWS or other cloud platforms @@ -172,7 +172,7 @@ There is nothing particularly special about performing computations on AWS or ot 3. Ensure that you install snakePipes on a separate EBS (or equivalent) storage block. We found that a 200GB ``/data`` partition was most convenient. This absolutely must not be the ``/`` partition, as mounting such a persistent image on other instances will result in paths being changed, which result in needing to modify large numbers of files. 4. It's usually sufficient to use a single large (e.g., ``m5.24xlarge``) compute node, with 100+ cores and a few hundred GB RAM. This allows one to use the ``--local`` option and not have to deal with the hassle of setting up a proper cluster on AWS. Make sure the then set ``-j`` to the number of available cores on the node, so snakePipes can make the most efficient use of the resources (and minimize your bill). -Below is an example of running the mRNA-seq pipeline on AWS using the resources outlined above. Note that it's best to store your input/output data on a separate storage block, since its lifetime is likely to be shorter than that of the indices. +Below is an example of running the mRNAseq pipeline on AWS using the resources outlined above. Note that it's best to store your input/output data on a separate storage block, since its lifetime is likely to be shorter than that of the indices. .. code:: bash @@ -215,7 +215,7 @@ Below is an example of running the mRNA-seq pipeline on AWS using the resources # Update defaults.yaml to use /data/tmp for temporary space -Then a larger instance can be spun up and the `mRNA-seq` pipeline run as normal. +Then a larger instance can be spun up and the `mRNAseq` pipeline run as normal. .. code:: bash @@ -224,7 +224,7 @@ Then a larger instance can be spun up and the `mRNA-seq` pipeline run as normal. chown ec2-user /data export PATH=/data/snakePipes/bin:$PATH conda activate snakePipes - mRNA-seq -m alignment -i /data/data -o /data/output --local -j 192 /data/indices/GRCm28.yaml + mRNAseq -m alignment -i /data/data -o /data/output --local -j 192 /data/indices/GRCm28.yaml Receiving emails upon pipeline completion ----------------------------------------- diff --git a/docs/content/images/makePairs_pipeline.png b/docs/content/images/makePairs_pipeline.png new file mode 100644 index 000000000..f077fbef1 Binary files /dev/null and b/docs/content/images/makePairs_pipeline.png differ diff --git a/docs/content/running_snakePipes.rst b/docs/content/running_snakePipes.rst index 0e38bb89a..d268a7062 100644 --- a/docs/content/running_snakePipes.rst +++ b/docs/content/running_snakePipes.rst @@ -6,10 +6,10 @@ Running snakePipes Pipelines under snakePipes are designed in a way such that all workflows are configured and ran in a similar way. -An example with ChIP-seq data +An example with ChIPseq data ------------------------------ -A **typical ChIP-seq analysis** of human samples starts from paired-end FASTQ files in the directory ``input-dir``: +A **typical ChIPseq analysis** of human samples starts from paired-end FASTQ files in the directory ``input-dir``: .. code:: bash @@ -17,15 +17,15 @@ A **typical ChIP-seq analysis** of human samples starts from paired-end FASTQ fi my_H3K27ac_sample_R1.fastq.gz my_H3K27me3_sample_R1.fastq.gz my_Input_sample_R1.fastq.gz my_H3K27ac_sample_R2.fastq.gz my_H3K27me3_sample_R2.fastq.gz my_Input_sample_R2.fastq.gz -The :ref:`ChIP-seq` workflow requires the files to be processed via the :ref:`DNA-mapping` workflow first. We therefore run the DNA-mapping workflow : +The :ref:`ChIPseq` workflow requires the files to be processed via the :ref:`DNAmapping` workflow first. We therefore run the DNAmapping workflow : .. code:: bash - $ DNA-mapping -i /path/to/input-dir -o /path/to/output-dir --mapq 5 -j 10 --dedup hs37d5 + $ DNAmapping -i /path/to/input-dir -o /path/to/output-dir --mapq 5 -j 10 --dedup hs37d5 * ``--mapq 5`` would filter mapped reads for a minimum mapping quality of 5. This would keep only primary alignments from bowtie2, sufficient for downstream analysis. -* ``--dedup`` would remove PCR duplicates (reads with matching 5' position in the genome), a typical step in ChIP-Seq analysis. +* ``--dedup`` would remove PCR duplicates (reads with matching 5' position in the genome), a typical step in ChIPseq analysis. * ``-j 10`` defines 10 jobs to be run in parallel on the cluster (see below). @@ -37,19 +37,19 @@ All individual jobs of the workflow will be submitted to the Grid engine using t **For single-end FASTQ files**, Note that single end data still needs a valid suffix (e.g. sample1_R1.fastq.gz). With a proper suffix, single end mode is detected by default. When executing some workflows with the ``--fromBAM`` flag, it is still necessary to set ``--singleEnd``. -Once the DNA-mapping run is finished sucessfully. We can run the ChIP-seq analysis in the same directory. +Once the DNAmapping run is finished sucessfully. We can run the ChIPseq analysis in the same directory. .. code:: bash - $ ChIP-seq -d /path/to/dna-mapping-output/ hs37d5 chip-samples.yaml + $ ChIPseq -d /path/to/dnamapping-output/ hs37d5 chip-samples.yaml -* ``-d`` specifies the directory where the output of DNA-mapping workflow lies. The ChIP-seq workflow would also write it's output there. +* ``-d`` specifies the directory where the output of DNAmapping workflow lies. The ChIPseq workflow would also write it's output there. * ``hs37d5`` is the name of the genome (keyword for the yaml). -* ``chip-samples.yaml`` is a yaml file that defines for each ChIP sample, the corresponding control (input) sample and the type of mark (broad/sharp). See :ref:`ChIP-seq` for more details on how to setup this yaml file. +* ``chip-samples.yaml`` is a yaml file that defines for each ChIP sample, the corresponding control (input) sample and the type of mark (broad/sharp). See :ref:`ChIPseq` for more details on how to setup this yaml file. -The ChIP-seq workflow would follow up from the DNA-mapping outputs and perform peak calling, create ChIP-input normalized coverage files and also perform differential (control-test) analysis if a sample information file is provided (see below). +The ChIPseq workflow would follow up from the DNAmapping outputs and perform peak calling, create ChIP-input normalized coverage files and also perform differential (control-test) analysis if a sample information file is provided (see below). .. _sampleinfo: @@ -57,8 +57,8 @@ The sample sheet ---------------- Most of the workflows allow users to perform grouped operations as an option, for example -differential expression analysis in mRNA-seq workflow, differential binding analysis in -ChIP-Seq workflow, differential open-chromatin analysis in ATAC-seq workflow or merging of +differential expression analysis in mRNAseq workflow, differential binding analysis in +ChIPseq workflow, differential open-chromatin analysis in ATACseq workflow or merging of groups in Hi-C workflow. For all this analysis, snakePipes needs a ``sampleSheet.tsv`` file (file name is not important, but it has to be tab-separated) that contains sample grouping information. In most cases users would want to groups samples by replicates. The format of the file is as follows: :: @@ -89,7 +89,7 @@ All of the snakePipes workflows that begin with a FASTQ file, perform the same p * **Linking/downsampling the FASTQ file** : The FASTQ rule in the workflows links the input FASTQ file into the FASTQ folder in the output directory. If ``downsampling`` is specified, the FASTQ folder would contain the downsampled FASTQ file. -.. note:: The DNA-mapping and RNA-mapping pipelines can take either single, or paired-end FASTQ files. For paired-end data, the reads ``R1`` and ``R2`` are expected to have the suffix ``_R1`` and ``_R2`` respectively, which can be modified in the ``defaults.yaml`` file using the ``reads`` key, to your needs. For example, files downloaded from NCBI would normally have the extention ``.1.fastq.gz`` and ``.2.fastq.gz``. Also, please check the ``ext`` key in the configuration file if you wish to modify the read extension (default is ``.fastq.gz``). +.. note:: The DNAmapping and RNA-mapping pipelines can take either single, or paired-end FASTQ files. For paired-end data, the reads ``R1`` and ``R2`` are expected to have the suffix ``_R1`` and ``_R2`` respectively, which can be modified in the ``defaults.yaml`` file using the ``reads`` key, to your needs. For example, files downloaded from NCBI would normally have the extention ``.1.fastq.gz`` and ``.2.fastq.gz``. Also, please check the ``ext`` key in the configuration file if you wish to modify the read extension (default is ``.fastq.gz``). * **Quality/adapter trimming** (optional): If ``--trim`` is selected, the ``trimming`` rule would run the selected program (either `Trimgalore `__, or `Cutadapt `__) on the files in the FASTQ folder, and would produce another folder with name ``FASTQ_``, where is either ``Cutadapt`` or ``Trimgalore``. diff --git a/docs/content/setting_up.rst b/docs/content/setting_up.rst index b9a76bf2f..697a150ed 100644 --- a/docs/content/setting_up.rst +++ b/docs/content/setting_up.rst @@ -3,17 +3,15 @@ Setting up snakePipes ===================== -Unlike many other pipelines, setting up snakePipes is easy! All you need is a *linux/OSX system* with *python3-mamba* installation. In past versions, snakePipes was using conda. We are now moving forward with mamba: a Python-based CLI conceived as a drop-in replacement for conda, offering higher speed and more reliable environment solutions to our snakePipes workflows thanks to the bindings over _libsolv_. +Unlike many other pipelines, setting up snakePipes is easy! All you need is a *linux/OSX system* with a working *conda* installation. -Installing conda & mamba ------------------------- - -Follow the instructions `here `__ to install either miniconda or anaconda first. Once you have already installed either miniconda or anaconda, you may simply `add mamba to your base environment `__ - -.. code-block:: bash +Installing conda +---------------- - $ conda install mamba -c conda-forge +.. note:: + Latest snakePipes versions (3.0 onwards) require conda >= 23.10.0 If you have an older version of conda, please don't use it. You may try installing the extension (conda-libmamba-solver) in your base environment, and setting this as default (conda config --set solver libmamba) but this has not been tested by us, hence YMMV. Mamba used to be a pre-requisite for snakePipes (versions 2.5.3 through 2.8.1), refer to older docs in such versions. +Follow the instructions `here `__ to install either miniconda or anaconda first. After installation, check your python path and version : .. code-block:: bash @@ -24,10 +22,6 @@ After installation, check your python path and version : $ python --version # anything above 3.5 is ok! $ Python 3.6.5 :: Anaconda, Inc. - -Now we are ready to install snakePipes latest release using ``mamba``. - - Installing snakePipes --------------------- @@ -35,7 +29,7 @@ The easiest way to install snakePipes is via our conda channel. The following co .. code:: bash - mamba create -n snakePipes -c mpi-ie -c conda-forge -c bioconda snakePipes + conda create -n snakePipes -c mpi-ie -c conda-forge -c bioconda snakePipes This way, the software used within snakePipes do not conflict with the software pre-installed on your terminal or in your python environment. @@ -45,58 +39,93 @@ Now, we should activate this environment: conda activate snakePipes -Finally, in order to create the workflow environments, we'll need to adjust the path under ``defaults.yaml`` settings file. Please continue reading about all global options. -Modify global options ---------------------- - -To see the location of the various YAML files so you can manually inspect them, you can use: +Alternatively, snakePipes can also be installed using pip. You can clone a branch (given that it's version 3.0.0 or later) and just install with pip: .. code:: bash - snakePipes info + pip install . -This would show the locations of: +Just make sure you have python 3.11 or later (cap by snakemake) in your environment. +In case you'd like to develop snakePipes, extra dependencies for the documentation: - * **defaults.yaml** Defines default tool and file paths. See :ref:`conda` - * **cluster.yaml** Defines execution command for the cluster. See :ref:`cluster` - * **organisms/.yaml** : Defines genome indices and annotations for various organisms. See :ref:`organisms` - * Workflow-specific defaults : Defines default options for our command line wrappers. See :ref:`workflowOpts` +.. code:: bash -It is a good idea to keep a copy of your defaults.yaml, cluster.yaml and the whole organism folder in a dedicated location e.g. some folder *outside the snakePipes installation folder* named "snakePipes_configs" . -You can configure snakePipes to use these files after a fresh installation or update with ``snakePipes config --organismsDir my_organisms_dir --clusterConfig my_cluster_config`` . This will also work if you add ``--configMode recycle``. + pip install .[docs] +or for the pytests: -.. _conda: +.. code:: bash -Create the conda environments ------------------------------ + pip install .[actions] -All the tools required for running various pipelines are installed via various conda repositories -(mainly bioconda). The following commands installs the tools and creates the respective conda environments. + +Configuring snakePipes +---------------------- + +Finally, at least one file (``defaults.yaml``) should be modified to match your compute infrastructure. The location of this file can be found out by executing: .. code:: bash - snakePipes createEnvs + snakePipes info -.. note:: +This would return you where the global configuration file is located. +Two fields are important to set: - ``snakePipes createEnvs`` will also set the ``snakemakeOptions:`` line in the global snakePipes - ``defaults.yaml`` files. If you have already modified this then use the ``--keepCondaDir`` option. +++++++++++++++++ +snakemakeProfile +++++++++++++++++ +Defines a `snakemake profile `__ to use. +By default this translates to a pre-shipped 'local' profile (and points to a location relative to the snakePipes package directory). +The local profile runs all jobs without a submission system. -The place where the conda envs are created (and therefore the tools are installed) is defined in ``snakePipes/defaults.yaml`` -file on our GitHub repository. You can modify it to suite your needs. +Another profile shipped within the repository is a default slurm profile (using snakemake-executor-plugin-cluster-generic). +In case you want to use this you can set the snakemakeProfile value to ``shared/profiles/snakepipes_genericprofile``. +After changing the value of snakemakeProfile, you should re-run ``snakePipes info``, which will also print out the full directory of the profile used. +If you want to use the snakepipes_genericprofile, make sure to review the following entries in the profile yaml file with respect to your infrastructure: -Here are the content of *defaults.yaml*:: + * ``module load slurm &&`` - could be omitted + * ``resources.partition`` - set to your slurm partition + * ``conda-prefix`` - set to your preferred location where snakePipes environments should be stored + * ``resources`` - make sure default resources make sense for your infrastructure + * ``ccancel.sh`` - refers to the ccancel.sh file inside the profile directory and contains instructions on how to kill submitted jobs (on failure / interruption of snakemake). The module command could be omitted here as before - snakemakeOptions: '--use-conda --conda-prefix /data/general/scratch/conda_envs' + In case you are using your own snakemake profile already, you can define them here as well. Acceptable values in snakemakeProfile are: + + * absolute path to a snakemake profile directory + * a relative path to a snakemake profile (relative to the package directory) + * The name of a `global snakemake profile `__ -.. note:: + If you use your own profile, just make sure that at least these values are set in your profile: - Whenever you change the `snakemakeOptions:` line in `defaults.yaml`, you should run - `snakePipes createEnvs` to ensure that the conda environments are then created. + * use-conda: true + * conda-prefix: /path/to/prefix + * conda-frontend: conda -Running ``snakePipes createEnvs`` is not strictly required, but facilitates multiple users using the same snakePipes installation. +Additionaly, rule resources are defined in the pre-shipped profiles. +In case you use your own you'd want to have these set in your profile as well. + ++++++++ +tempDir ++++++++ +The temp directory to use. Defaults to /scratch/local. + +After setting the defaults, the conda environments can be created. + +.. _conda: + +Create the conda environments +----------------------------- + +All the tools required for running various pipelines are installed via various conda repositories +(mainly bioconda). The following commands installs the tools and creates the respective conda environments. +Note that the conda-prefix is defined in your profile (and defaults to /tmp). Thus make sure you have set your profile appropriately. +It is important that the conda-prefix is a location that is accessible by your compute nodes as well. +Finally, make sure you have a conda installation with libmamba as the solver (conda version 23.10.0 or later), as this speeds up the process. + +.. code:: bash + + snakePipes createEnvs .. _organisms: @@ -107,6 +136,8 @@ Configure the organisms For each organism of your choice, create a file called ``.yaml`` in the folder specified by ``organismsDir`` in **defaults.yaml** and fill the paths to the required files next to the corresponding yaml entry. For common organisms, the required files are downloaded and the yaml entries can be created automatically via the workflow ``createIndices``. +Note that the organism yamls that come with the installation are only appropriate internally for MPI-IE, and as an external you need to create / download your own. + The yaml files look like this after the setup (an example from drosophila genome ``dm3``) : .. parsed-literal:: @@ -119,7 +150,7 @@ The yaml files look like this after the setup (an example from drosophila genome genome_index: "/data/repository/organisms/dm3_ensembl/genome_fasta/genome.fa.fai" # OPTIONAL. Needed for GC bias estimation by deepTools genome_2bit: "/data/repository/organisms/dm3_ensembl/genome_fasta/genome.2bit" - # Needed for DNA-mapping workflow + # Needed for DNAmapping workflow bowtie2_index: "/data/repository/organisms/dm3_ensembl/BowtieIndex/genome" # index of the genome.fasta using HISAT2, needed for RNA-seq workflow hisat2_index: "/data/repository/organisms/dm3_ensembl/HISAT2Index/genome" @@ -128,9 +159,9 @@ The yaml files look like this after the setup (an example from drosophila genome bwa_index: "/data/repository/organisms/dm3_ensembl/BWAindex/genome.fa" # index of the genome.fasta using STAR, needed for RNA-seq workflow star_index: "/data/repository/organisms/dm3_ensembl/STARIndex/" - # Needed for QC and annotation in DNA-mapping/RNA-Seq workflows + # Needed for QC and annotation in DNAmapping/RNA-Seq workflows genes_bed: "/data/repository/organisms/dm3_ensembl/Ensembl/release-78/genes.bed" - # Needed for QC and annotation in DNA-mapping/RNA-Seq workflows + # Needed for QC and annotation in DNAmapping/RNA-Seq workflows genes_gtf: "/data/repository/organisms/dm3_ensembl/Ensembl/release-78/genes.gtf" # OPTIONAL. For QC and filtering of regions in multiple workflows. blacklist_bed: @@ -161,58 +192,6 @@ For the sake of convenience, we provide premade indices for the following organi To use these, simply download and extract them. You will then need to modify the provided YAML file to indicate exactly where the indices are located (i.e., replace ``/data/processing/ryan`` with whatever is appropriate). -.. _cluster: - -Configure your cluster ----------------------- - -The ``cluster.yaml`` file contains both the default memory requirements as well as two options passed to snakemake that control how jobs are submitted to the cluster and files are retrieved:: - - snakemake_latency_wait: 300 - snakemake_cluster_cmd: module load slurm; SlurmEasy --mem-per-cpu {cluster.memory} --threads {threads} --log {snakePipes_cluster_logDir} --name {rule}.snakemake - snakePipes_cluster_logDir: cluster_logs - __default__: - memory: 8G - snp_split: - memory: 10G - -The location of this file must be specified by the ``clusterConfig`` value in **defaults.yaml**. - -You can change the default per-core memory allocation if needed here. Importantly, the ``snakemake_cluster_cmd`` -option must be changed to match your needs (see table below). Whatever command you specify must include -a ``{cluster.memory}`` option and a ``{threads}`` option. You can specify other required options here as well. -The ``snakemake_latency_wait`` value defines how long snakemake should wait for files to appear -before throwing an error. The default of 300 seconds is typically reasonable when a file system such as -`NFS `__ is in use. Please also note that there are additional memory -settings for each workflow in ``snakePipes/workflows/[workflow]/cluster.yaml`` that you might need to adjust. - -``snakePipes_cluster_logDir:`` can be used like a wildcard in `snakemake_cluster_cmd` to specify the directory -for the stdout and stderr files from a job that is running on the cluster. This is given separate to make sure -the directory exists before execution. A relative path is treated relative to the ouput directory of the workflow. -If you want, you can also give an absolute log directory starting with /. - -==================== ====================================================================================== - Scheduler/Queuing snakemake_cluster_cmd example -==================== ====================================================================================== - **slurm** .. code:: bash - - snakemake_cluster_cmd: module load slurm; sbatch --ntasks-per-node=1 - -c {threads} -J {rule}.snakemake --mem-per-cpu={cluster.memory} - -p MYQUEUE -o {snakePipes_cluster_logDir}/{rule}.%j.out - -e {snakePipes_cluster_logDir}/{rule}.%j.err - snakePipes_cluster_logDir: cluster_logs - - **PBS/Torque** .. code:: bash - - snakemake_cluster_cmd: qsub -N {rule}.snakemake - -q MYQUEUE -l pmem={cluster.memory} - -l walltime=20:00:00 -l nodes=1:ppn={cluster.threads} - -o {snakePipes_cluster_logDir}/{rule}.\$PBS_JOBID.out - -e {snakePipes_cluster_logDir}/{rule}.\$PBS_JOBID.err - snakePipes_cluster_logDir: cluster_logs - - **SGE** *Please send us a working example!* -==================== ====================================================================================== @@ -223,7 +202,7 @@ Configure default options for workflows The default options for all command-line arguments as well as for the cluster (memory) are stored in the workflow-specific folders. If you have cloned the repository locally, these files are located under ``snakePipes/workflows/`` folder. You can modify the values in these yamls to suite your needs. Most of the default values could also be replaced from the command line wrappers while executing a workflow. -Below are some of the workflow defaults from the DNA-mapping pipeline. Empty sections means no default is set: +Below are some of the workflow defaults from the DNAmapping pipeline. Empty sections means no default is set: .. parsed-literal:: ## key for the genome name (eg. dm3) @@ -257,11 +236,11 @@ Test data Test data for the various workflows is available at the following locations: - - `DNA mapping `__ - - `ChIP-seq `__ - - `ATAC-seq `__ - - `mRNA-seq `__ - - `noncoding-RNA-seq `__ + - `DNAmapping `__ + - `ChIPseq `__ + - `ATACseq `__ + - `mRNAseq `__ + - `ncRNAseq `__ - `HiC `__ - `WGBS `__ - - `scRNA-seq `__ + - `scRNAseq `__ diff --git a/docs/content/workflows/ATAC-seq.rst b/docs/content/workflows/ATACseq.rst similarity index 82% rename from docs/content/workflows/ATAC-seq.rst rename to docs/content/workflows/ATACseq.rst index 8b3f20b73..150dfc648 100644 --- a/docs/content/workflows/ATAC-seq.rst +++ b/docs/content/workflows/ATACseq.rst @@ -1,12 +1,12 @@ -.. _ATAC-seq: +.. _ATACseq: -ATAC-seq +ATACseq ======== What it does ------------ -The ATAC-seq pipeline takes one or more BAM files and attempts to find accessible regions. If multiple samples and a sample sheet are provided, then CSAW is additionally used to find differentially accessible regions. Prior to finding open/accessible regions, the BAM files are filtered to include only properly paired reads with appropriate fragment sizes (<150 bases by default). These filtered fragments are then used for the remainder of the pipeline. +The ATACseq pipeline takes one or more BAM files and attempts to find accessible regions. If multiple samples and a sample sheet are provided, then CSAW is additionally used to find differentially accessible regions. Prior to finding open/accessible regions, the BAM files are filtered to include only properly paired reads with appropriate fragment sizes (<150 bases by default). These filtered fragments are then used for the remainder of the pipeline. .. image:: ../images/ATACseq_pipeline.png @@ -15,8 +15,8 @@ The ATAC-seq pipeline takes one or more BAM files and attempts to find accessibl Input requirements ------------------ -The DNA mapping pipeline generates output that is fully compatible with the ATAC-seq pipeline input requirements! -When running the ATAC-seq pipeline, please specify the output directory of DNA-mapping pipeline as the working directory (``-d``). +The DNA mapping pipeline generates output that is fully compatible with the ATACseq pipeline input requirements! +When running the ATACseq pipeline, please specify the output directory of DNAmapping pipeline as the working directory (``-d``). * **filtered_bam** directory contains the input BAM files (either filtered or unfiltered, however you prefer). @@ -27,7 +27,7 @@ When running the ATAC-seq pipeline, please specify the output directory of DNA-m Differential open chromatin analysis ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Similar to differential binding analysis with the ChIP-Seq data. We can perform the differential open chromatin analysis, using the ``--sampleSheet`` option of the ATAC-seq workflow. This requires a sample sheet, which is identical to that required by the ChIP-seq and RNA-seq workflows (see :doc:`ChIP-seq` for details). +Similar to differential binding analysis with the ChIPseq data. We can perform the differential open chromatin analysis, using the ``--sampleSheet`` option of the ATACseq workflow. This requires a sample sheet, which is identical to that required by the ChIPseq and RNA-seq workflows (see :doc:`ChIPseq` for details). An example is below:: @@ -39,7 +39,7 @@ An example is below:: SRR7013049 OreR SRR7013050 OreR -.. note:: This sample sheet has the same requirements as the sample sheet in the ChIP-seq workflow, and also uses the same tool (CSAW) with a narrow default window size. +.. note:: This sample sheet has the same requirements as the sample sheet in the ChIPseq workflow, and also uses the same tool (CSAW) with a narrow default window size. For comparison between two conditions, the name you assign to "condition" is not relevant, but rather the order is. The group mentioned first (in the above case "wild-type") would be used as a "control" and the group mentioned later would be used as "test". @@ -68,12 +68,12 @@ Configuration file There is a configuration file in ``snakePipes/workflows/ATACseq/defaults.yaml``:: ## General/Snakemake parameters, only used/set by wrapper or in Snakemake cmdl, but not in Snakefile - pipeline: ATAC-seq + pipeline: ATACseq configFile: clusterConfigFile: local: false maxJobs: 5 - ## workingdir need to be required DNA-mapping output dir, 'outdir' is set to workingdir internally + ## workingdir need to be required DNAmapping output dir, 'outdir' is set to workingdir internally workingdir: ## preconfigured target genomes (mm9,mm10,dm3,...) , see /path/to/snakemake_workflows/shared/organisms/ ## Value can be also path to your own genome config file! @@ -109,7 +109,7 @@ There is a configuration file in ``snakePipes/workflows/ATACseq/defaults.yaml``: Useful parameters are ``maxFragmentSize``, ``minFragmentSize`` and ``windowSize``, also available from commandline. -* **windowSize**: is the size of windows to test differential binding using CSAW. The default small window size is sufficient for most analysis, since an ATAC-seq peak is sharp. +* **windowSize**: is the size of windows to test differential binding using CSAW. The default small window size is sufficient for most analysis, since an ATACseq peak is sharp. * **fragmentCountThreshold**: refers to the minimum number of counts a chromosome must have to be included in the MACS2 analysis. It is introduced to avoid errors in the peak calling step and should only be changed if MACS2 fails. @@ -161,7 +161,7 @@ Assuming a sample sheet is used, the following will be **added** to the working    ├── sample1.filtered.BAM_peaks.qc.txt    └── sample2.filtered.BAM_peaks.qc.txt -Currently the ATAC-seq workflow performs detection of open chromatin regions via `MACS2 `__ (or `HMMRATAC `__ or `Genrich `__, if specified with ``--peakCaller``), and if a sample sheet is provided, the detection of differential open chromatin sites via `CSAW `__. There are additionally log files in most of the directories. The various outputs are documented in the CSAW and MACS2 documentation. +Currently the ATACseq workflow performs detection of open chromatin regions via `MACS2 `__ (or `HMMRATAC `__ or `Genrich `__, if specified with ``--peakCaller``), and if a sample sheet is provided, the detection of differential open chromatin sites via `CSAW `__. There are additionally log files in most of the directories. The various outputs are documented in the CSAW and MACS2 documentation. For more information on the contents of the **CSAW_MACS2_sampleSheet** folder, see section :ref:`diffOpenChrom` . * **MACS2** / **HMMRATAC** / **Genrich**: Contains peaks found by the peak caller. The most useful files end in ``.narrowPeak`` or ``.gappedPeak`` and are appropriate for visualization in IGV. @@ -171,7 +171,7 @@ For more information on the contents of the **CSAW_MACS2_sampleSheet** folder, s * fraction of reads in peaks (FRiP) * percentage of the genome covered by peaks. -* **deepTools_ATAC**: contains the output of `plotFingerPrint `__, which is a useful QC plot to assess signal enrichment between the ATAC-seq samples. +* **deepTools_ATAC**: contains the output of `plotFingerPrint `__, which is a useful QC plot to assess signal enrichment between the ATACseq samples. .. note:: The ``_sampleSheet`` suffix for the ``CSAW_MACS2_sampleSheet`` is drawn from the name of the sample sheet you use. So if you instead named the sample sheet ``mySampleSheet.txt`` then the folder would be named ``CSAW_mySampleSheet``. This facilitates using multiple sample sheets. Similarly, ``_MACS2`` portion will be different if you use HMMRATAC or Genrich for peak calling. @@ -192,6 +192,6 @@ Command line options .. argparse:: :func: parse_args - :filename: ../snakePipes/workflows/ATAC-seq/ATAC-seq - :prog: ATAC-seq + :filename: ../snakePipes/workflows/ATACseq/ATACseq.py + :prog: ATACseq :nodefault: diff --git a/docs/content/workflows/ChIP-seq.rst b/docs/content/workflows/ChIPseq.rst similarity index 89% rename from docs/content/workflows/ChIP-seq.rst rename to docs/content/workflows/ChIPseq.rst index 707552fd2..4422aef9c 100644 --- a/docs/content/workflows/ChIP-seq.rst +++ b/docs/content/workflows/ChIPseq.rst @@ -1,12 +1,12 @@ -.. _ChIP-seq: +.. _ChIPseq: -ChIP-seq +ChIPseq ======== What it does ------------ -The `ChIP-seq `__ pipeline takes one or more BAM files and attempts to find peaks. If multiple samples and a sample sheet are provided, then CSAW is additionally used to call differential peaks. Both sharp and broad peak calling are supported. +The `ChIPseq `__ pipeline takes one or more BAM files and attempts to find peaks. If multiple samples and a sample sheet are provided, then CSAW is additionally used to call differential peaks. Both sharp and broad peak calling are supported. .. image:: ../images/ChIPseq_pipeline.png @@ -15,10 +15,10 @@ In addition to peaks, bigWig tracks are also generated. Input requirements ------------------ -The DNA mapping pipeline generates output that is fully compatible with the ChIP-seq pipeline input requirements! -When running the ChIP-seq pipeline, please specify the output directory of DNA-mapping pipeline as the working directory (``-w``). +The DNA mapping pipeline generates output that is fully compatible with the ChIPseq pipeline input requirements! +When running the ChIPseq pipeline, please specify the output directory of DNAmapping pipeline as the working directory (``-w``). -If you need to provides file **NOT** generated by the DNA-mapping pipeline, then you must provide a directory with the following structure:: +If you need to provides file **NOT** generated by the DNAmapping pipeline, then you must provide a directory with the following structure:: . ├── deepTools_qc @@ -48,7 +48,7 @@ If you need to provides file **NOT** generated by the DNA-mapping pipeline, then Sample configuration ~~~~~~~~~~~~~~~~~~~~ -The ChIP-seq sample configuration yaml file describes what type of peak calling to perform on each sample and which sample to use as the `input control `__ :: +The ChIPseq sample configuration yaml file describes what type of peak calling to perform on each sample and which sample to use as the `input control `__ :: chip_dict: SRR6761497: @@ -78,7 +78,7 @@ If chromatin from an external organism was spikein in, it is possible to obtain DESeq2-style scaling factors produced with deepTools multiBamSummary will then be used to create bam coverage tracks and passed to CSAW as size Factors if sample sheet is provided. -A hybrid genome can be obtained with createIndices workflow and can be passed to the DNA-mapping workflow without any particular arguments. +A hybrid genome can be obtained with createIndices workflow and can be passed to the DNAmapping workflow without any particular arguments. .. _diffBinding: @@ -123,14 +123,14 @@ Filtered results are also annotated with the distance to the closest gene using Configuration file ~~~~~~~~~~~~~~~~~~ -There is a configuration file in ``snakePipes/workflows/ChIP-seq/defaults.yaml``:: +There is a configuration file in ``snakePipes/workflows/ChIPseq/defaults.yaml``:: - pipeline: chip-seq + pipeline: ChIPseq configFile: clusterConfigFile: local: false maxJobs: 5 - ## workingdir need to be required DNA-mapping output dir, 'outdir' is set to workingdir internally + ## workingdir need to be required DNAmapping output dir, 'outdir' is set to workingdir internally workingdir: ## preconfigured target genomes (mm9,mm10,dm3,...) , see /path/to/snakemake_workflows/shared/organisms/ ## Value can be also path to your own genome config file! @@ -160,7 +160,7 @@ The only parameters that are useful to change are ``bwBinSize``, ``fragmentLengt Understanding the outputs --------------------------- -The ChIP-seq pipeline will generate additional output as follows:: +The ChIPseq pipeline will generate additional output as follows:: . ├── deepTools_ChIP @@ -200,7 +200,7 @@ The ChIP-seq pipeline will generate additional output as follows:: -Following up on the DNA-mapping module results (see :doc:`DNA-mapping`), the workflow produces the following output directories : +Following up on the DNAmapping module results (see :doc:`DNAmapping`), the workflow produces the following output directories : * **deepTools_ChIP**: Contains output from two of the deepTools modules. The `bamCompare `__ output contains the input-normalized coverage files for the samples, which is very useful for downstream analysis, such as visualization in IGV and plotting the heatmaps. The `plotFingerPrint `__ output is a useful QC plot to assess signal enrichment in the ChIP samples. @@ -230,6 +230,6 @@ Command line options .. argparse:: :func: parse_args - :filename: ../snakePipes/workflows/ChIP-seq/ChIP-seq - :prog: ChIP-seq + :filename: ../snakePipes/workflows/ChIPseq/ChIPseq.py + :prog: ChIPseq :nodefault: diff --git a/docs/content/workflows/DNA-mapping.rst b/docs/content/workflows/DNAmapping.rst similarity index 88% rename from docs/content/workflows/DNA-mapping.rst rename to docs/content/workflows/DNAmapping.rst index 03e916f41..a10956920 100644 --- a/docs/content/workflows/DNA-mapping.rst +++ b/docs/content/workflows/DNAmapping.rst @@ -1,12 +1,12 @@ -.. _DNA-mapping: +.. _DNAmapping: -DNA-mapping +DNAmapping =========== What it does ------------ -This is the primary DNA-mapping pipeline. It can be used both alone or upstream of the ATAC-seq and ChIP-seq pipelines. This has a wide array of options, including trimming and various QC steps (e.g., marking duplicates and plotting coverage and PCAs). In addition, basic coverage tracks are created to facilitate viewing the data in IGV. +This is the primary DNAmapping pipeline. It can be used both alone or upstream of the ATACseq and ChIPseq pipelines. This has a wide array of options, including trimming and various QC steps (e.g., marking duplicates and plotting coverage and PCAs). In addition, basic coverage tracks are created to facilitate viewing the data in IGV. .. image:: ../images/DNAmapping_pipeline.png @@ -18,10 +18,10 @@ The only requirement is a directory of gzipped fastq files. Files could be singl Configuration file ~~~~~~~~~~~~~~~~~~ -There is a configuration file in ``snakePipes/workflows/DNA-mapping/defaults.yaml``:: +There is a configuration file in ``snakePipes/workflows/DNAmapping/defaults.yaml``:: ## General/Snakemake parameters, only used/set by wrapper or in Snakemake cmdl, but not in Snakefile - pipeline: dna-mapping + pipeline: dnamapping outdir: configFile: clusterConfigFile: @@ -103,7 +103,7 @@ In addition to the FASTQ module results (see :ref:`running_snakePipes`), the wor * **Bowtie2** : Contains the BAM files after mapping with `Bowtie2 `__ and indexed by `Samtools `__. - * **filtered_bam** : Contains the BAM files filtered by the provided criteria, such as mapping quality (``--mapq``) or PCR duplicates (``--dedup``). This file is used for most downstream analysis in the DNA-mapping and ChIP-seq/ATAC-seq pipeline. + * **filtered_bam** : Contains the BAM files filtered by the provided criteria, such as mapping quality (``--mapq``) or PCR duplicates (``--dedup``). This file is used for most downstream analysis in the DNAmapping and ChIPseq/ATACseq pipeline. * **bamCoverage** : Contains the coverage files (`bigWig format `__) produced from the BAM files by `deepTools bamCoverage `__ . The files are either raw, or 1x normalized (by sequencing depth). They are useful for plotting and inspecting the data in IGV. @@ -126,6 +126,6 @@ Command line options .. argparse:: :func: parse_args - :filename: ../snakePipes/workflows/DNA-mapping/DNA-mapping - :prog: DNA-mapping + :filename: ../snakePipes/workflows/DNAmapping/DNAmapping.py + :prog: DNAmapping :nodefault: diff --git a/docs/content/workflows/HiC.rst b/docs/content/workflows/HiC.rst index 263e51a9f..74af12dff 100644 --- a/docs/content/workflows/HiC.rst +++ b/docs/content/workflows/HiC.rst @@ -153,6 +153,6 @@ Command line options .. argparse:: :func: parse_args - :filename: ../snakePipes/workflows/HiC/HiC + :filename: ../snakePipes/workflows/HiC/HiC.py :prog: HiC :nodefault: diff --git a/docs/content/workflows/WGBS.rst b/docs/content/workflows/WGBS.rst index cfcaa60fa..272dbd703 100644 --- a/docs/content/workflows/WGBS.rst +++ b/docs/content/workflows/WGBS.rst @@ -146,6 +146,6 @@ Command line options .. argparse:: :func: parse_args - :filename: ../snakePipes/workflows/WGBS/WGBS + :filename: ../snakePipes/workflows/WGBS/WGBS.py :prog: WGBS :nodefault: diff --git a/docs/content/workflows/createIndices.rst b/docs/content/workflows/createIndices.rst index 852eb3e05..30ac8f43c 100644 --- a/docs/content/workflows/createIndices.rst +++ b/docs/content/workflows/createIndices.rst @@ -41,7 +41,7 @@ There is a configuration file in ``snakePipes/workflows/createIndices/defaults.y spikeinGtfURL: ## The effective genome size effectiveGenomeSize: 0 - ## Regions to blacklist in the ChIP-seq and related workflows + ## Regions to blacklist in the ChIPseq and related workflows blacklist: spikeinBlacklist: ## Regions to ignore during normalization (e.g., with bamCompare) @@ -88,6 +88,6 @@ Command line options .. argparse:: :func: parse_args - :filename: ../snakePipes/workflows/createIndices/createIndices + :filename: ../snakePipes/workflows/createIndices/createIndices.py :prog: createIndices :nodefault: diff --git a/docs/content/workflows/mRNA-seq.rst b/docs/content/workflows/mRNAseq.rst similarity index 90% rename from docs/content/workflows/mRNA-seq.rst rename to docs/content/workflows/mRNAseq.rst index 4393ea282..549a929a3 100644 --- a/docs/content/workflows/mRNA-seq.rst +++ b/docs/content/workflows/mRNAseq.rst @@ -1,14 +1,14 @@ -.. _mRNA-seq: +.. _mRNAseq: -mRNA-seq +mRNAseq ======== What it does ------------ -The snakePipes mRNA-seq workflow allows users to process their single or paired-end -mRNA-seq fastq files upto the point of gene/transcript-counts and differential expression. -It also allows full allele-specific mRNA-seq analysis (up to allele-specific +The snakePipes mRNAseq workflow allows users to process their single or paired-end +mRNAseq fastq files upto the point of gene/transcript-counts and differential expression. +It also allows full allele-specific mRNAseq analysis (up to allele-specific differential expression) using the *allelic-mapping* mode. .. image:: ../images/RNAseq_pipeline.png @@ -23,7 +23,7 @@ The only requirement is a directory of gzipped fastq files. Files could be singl Configuration file ~~~~~~~~~~~~~~~~~~ -There is a configuration file in ``snakePipes/workflows/mRNA-seq/defaults.yaml``:: +There is a configuration file in ``snakePipes/workflows/mRNAseq/defaults.yaml``:: pipeline: rna-seq @@ -118,7 +118,7 @@ Apart from the common workflow options (see :ref:`running_snakePipes`), the foll * **salmonIndexOptions**: In the ``alignment-free`` mode (see below), this option allows you to change the type of index created by salmon. New users can leave it to default. -* **dnaContam**: Enable this to test for possible DNA contamination in your mRNA-seq samples. DNA contamination is quantified as the fraction of reads falling into intronic and intergenic regions, compared to those falling into exons. Enabling this option would produce a directory called ``GenomicContamination`` with ``.tsv`` files containing this information. +* **dnaContam**: Enable this to test for possible DNA contamination in your mRNAseq samples. DNA contamination is quantified as the fraction of reads falling into intronic and intergenic regions, compared to those falling into exons. Enabling this option would produce a directory called ``GenomicContamination`` with ``.tsv`` files containing this information. * **plotFormat**: You can switch the type of plot produced by all deeptools modules using this option. Possible choices : png, pdf, svg, eps, plotly @@ -177,7 +177,7 @@ sample6 Treatment Group2 Analysis modes -------------- -Following analysis (**modes**) are possible using the mRNA-seq workflow: +Following analysis (**modes**) are possible using the mRNAseq workflow: "alignment" ~~~~~~~~~~~ @@ -225,7 +225,7 @@ using the **deepTools_qc** mode. It's a very useful add-on with any of the other **threePrimeSeq** uses a pipeline developed by the Hilgers lab to annotate and count clusters of reads mapping to three prime ends of genes using -poly(T)VN-primed 3' sequencing kits such as Lexogen's 3' mRNA-seq kit. +poly(T)VN-primed 3' sequencing kits such as Lexogen's 3' mRNAseq kit. In this mode, **fastp** is used to pretrim with preset parameters, followed by **STAR** mapping. @@ -264,11 +264,11 @@ Assuming the pipline was run with ``--mode alignment-free,alignment,deepTools_qc ├── featureCounts ├── multiQC ├── QC_report - ├── mRNA-seq.cluster_config.yaml - ├── mRNA-seq.config.yaml - ├── mRNA-seq_organism.yaml - ├── mRNA-seq_pipeline.pdf - ├── mRNA-seq_run-1.log + ├── mRNAseq.cluster_config.yaml + ├── mRNAseq.config.yaml + ├── mRNAseq_organism.yaml + ├── mRNAseq_pipeline.pdf + ├── mRNAseq_run-1.log ├── Salmon ├── sleuth_Salmon_sampleSheet └── STAR @@ -285,7 +285,7 @@ Apart from the common module outputs (see :ref:`running_snakePipes`), the workfl * **bamCoverage**: (not produced in mode *alignment-free*) This would contain the bigWigs produced by deepTools `bamCoverage `__ . Files with suffix ``.coverage.bw`` are raw coverage files, while the files with suffix ``RPKM.bw`` are `RPKM-normalized `__ coverage files. -* **deepTools_QC**: (produced in the mode *deepTools_QC*) This contains the quality checks specific for mRNA-seq, performed via deepTools. The output folders are names after various deepTools functions and the outputs are explained under `deepTools documentation `__. In short, they show the insert size distribution(**bamPEFragmentSize**), mapping statistics (**estimateReadFiltering**), sample-to-sample correlations and PCA (**multiBigwigSummary, plotCorrelation, plotPCA**), and read enrichment on various genic features (**plotEnrichment**) +* **deepTools_QC**: (produced in the mode *deepTools_QC*) This contains the quality checks specific for mRNAseq, performed via deepTools. The output folders are names after various deepTools functions and the outputs are explained under `deepTools documentation `__. In short, they show the insert size distribution(**bamPEFragmentSize**), mapping statistics (**estimateReadFiltering**), sample-to-sample correlations and PCA (**multiBigwigSummary, plotCorrelation, plotPCA**), and read enrichment on various genic features (**plotEnrichment**) * **DESeq2_[sampleSheet]/DESeq2_Salmon_[sampleSheet]**: (produced in the modes *alignment* or *alignment-free*, only if a sample-sheet is provided.) The folder contains the HTML result report **DESeq2_report.html**, the annotated output file from DESeq2 (**DEseq_basic_DEresults.tsv**) and normalized counts for all samples, produced via DEseq2 (**DEseq_basic_counts_DESeq2.normalized.tsv**) as well as an Rdata file (**DEseq_basic_DESeq.Rdata**) with the R objects ``dds <- DESeq2::DESeq(dds)`` and ``ddr <- DDESeq2::results(dds,alpha = fdr)``. **DESeq2_[sampleSheet]** uses gene counts from ``featureCounts/counts.tsv``, whereas **DESeq2_Salmon_[sampleSheet]** uses transcript counts from ``Salmon/counts.tsv`` that are merged via tximport in R. Sample name to plotting shape mapping on the PCA plot is limited to 36 samples and skipped otherwise. @@ -299,6 +299,6 @@ Command line options .. argparse:: :func: parse_args - :filename: ../snakePipes/workflows/mRNA-seq/mRNA-seq - :prog: mRNA-seq + :filename: ../snakePipes/workflows/mRNAseq/mRNAseq.py + :prog: mRNAseq :nodefault: diff --git a/docs/content/workflows/makePairs.rst b/docs/content/workflows/makePairs.rst new file mode 100644 index 000000000..b1e6ccba4 --- /dev/null +++ b/docs/content/workflows/makePairs.rst @@ -0,0 +1,141 @@ +.. _makePairs: + +makePairs +========= + +What it does +------------ + +The snakePipes makePairs workflow allows users to process their HiC data from raw fastq files to HiC matrices in +an allele-specific manner. The workflow utilized mapping by bwa, followed by analysis +using `pairtools `__ . The workflow follows the `example workflow described in the documentation of pairtools `__ , +which explains each step in detail and would be useful for new users to have a look at. +Currently the output matrices are produced in the `.pairs `__ format. + +.. image:: ../images/makePairs_pipeline.png + +Input requirements and outputs +------------------------------ + +This pipeline requires paired-end reads fastq files as input in order to build allele-specific contact matrices. +The input fastq files will be trimmed (with fastp) and be mapped against a diploid reference genome (with bwa). + +Prior to building the matrix, the pipeline generates two reference genomes (from a reference genome and a VCF file) that contains the information +on haplotypes. The Haplotypes are set using the `--strains` flag. The two reference genomes are then merged to yield one reference genome +(genome/diploid_genome.fa) which is indexed with `bwa` as the basis for mapping of paired-end reads. (Notice that this is different from the mono-allelic HiC workflow +which map reads individually in single-end mode and combines them into contact pairs afterwards. + +The output of mapping step is used by `pairtools`` to construct different contact matrices for each sample (in pairs format) + +Workflow configuration file +--------------------------- + +Default parameters from the provided config file can be altered by user. Below is +the config file description for the makePairs workflow : + +.. parsed-literal:: + + pipeline: makePairs + outdir: + configFile: + clusterConfigFile: + local: False + maxJobs: 5 + ## directory with fastq files + indir: + ## preconfigured target genomes (mm9,mm10,dm3,...) , see /path/to/snakemake_workflows/shared/organisms/ + ## Value can be also path to your own genome config file! + genome: + ## FASTQ file extension (default: ".fastq.gz") + ext: '.fastq.gz' + ## paired-end read name extension (default: ["_R1", "_R2"]) + reads: ["_R1","_R2"] + ## assume paired end reads + pairedEnd: True + ## Number of reads to downsample from each FASTQ file + downsample: + ## Options for trimming + trim: True + trimmer: fastp + trimmerOptions: + + verbose: False + fastqc: True + UMIBarcode: False + bcPattern: "NNNNCCCCCCCCC" + UMIDedup: False + UMIDedupSep: "_" + UMIDedupOpts: "_" + plotFormat: png + bwBinSize: 1000 + aligner: 'bwa' + alignerOptions: '-SPu -T0' + alignerThreads: 30 + + fromBAM: False + sampleSheet: + + + ################################################################################ + # Call snakemake directly, i.e. without using the wrapper script: + # + # Please save a copy of this config yaml file and provide an adjusted config + # via '--configFile' parameter! + # example call: + # + # snakemake --snakefile /path/to/snakemake_workflows/workflows/makePairs/Snakefile + # --configFile /path/to/snakemake_workflows/workflows/makePairs/defaults.yaml + # --directory /path/to/outputdir + # --VCFfile /path/to/vcf_file + # --strains strain1_name,strain2_name + # --cores 32 + ################################################################################ + + +Structure of output directory +----------------------------- + +In addition to the FASTQ module results (see :ref:`running_snakePipes`), the workflow produces the following outputs:: + + . + |-- bam + |-- FASTQ + |-- FastQC + |-- FastQC_trimmed + |-- FASTQ_fastp + |-- genome + |-- multiqc + |-- originalFASTQ + |-- pairs + |-- phase_stats + + +* **bam** folder contains the mapping results in BAM format. The files were obtained after running `bwa `__ in paired-end mode. + +* **originalFASTQ** includes softlinks to the original FASTQ data + +* **FASTQ** links to **originalFASTQ** if no further filters are specified + +* **FASTQ_fastp**: trimmed FASTQ files output by fastp + +* **FastQC** FASTQC report on FASTQ directory + +* **genome** folder contains the diploid_genome.fa.gz that was constructed from 2 strain-specific genomes with rule diploid_genome. Chromosome sizes and indices (bwa) can also be found in this directory + +* **multiqc** folder contains the final QC report generated with MultiQC (including fastqc, fastp, and pairtools modules) + +.. note:: For the pairtools modules to work we used `MultiQC from open2c `__ as specified for the makePiars environment + +* **pairs** folder contains the parsed, phased, sorted and deduplicated contact matrices generated by pairtools. + +* **phase_stats** contains the 4 subsetted pairs files for each sample (unphased pairs, 2 different strains, trans pairs). QC statistics are also calculated and will be processed by MultiQC + + +Command line options +-------------------- + +.. argparse:: + :func: parse_args + :filename: ../snakePipes/workflows/makePairs/makePairs.py + :prog: makePairs + :nodefault: diff --git a/docs/content/workflows/noncoding-RNA-seq.rst b/docs/content/workflows/ncRNAseq.rst similarity index 91% rename from docs/content/workflows/noncoding-RNA-seq.rst rename to docs/content/workflows/ncRNAseq.rst index 85dbe6604..4e215d1b7 100644 --- a/docs/content/workflows/noncoding-RNA-seq.rst +++ b/docs/content/workflows/ncRNAseq.rst @@ -1,12 +1,12 @@ -.. _noncoding-RNA-seq: +.. _ncRNAseq: -noncoding-RNA-seq +ncRNAseq ================= What it does ------------ -The snakePipes noncoding-RNA-seq workflow allows users to process their single or paired-end +The snakePipes ncRNAseq workflow allows users to process their single or paired-end ribosomal-depleted RNA-seq fastq files upto the point of gene/transcript/repeat-element counts and differential expression. Repeat elements are quantified and tested for differential expression at the name, family and class level. Since changes in repeat element expression tend to be unidirectional, size factors from gene expression are used when normalizing repeat element expression. @@ -19,15 +19,15 @@ Input requirements The only requirement is a directory of gzipped fastq files. Files could be single or paired end, and the read extensions could be modified using the keys in the ``defaults.yaml`` file below. -.. _noncodingRNAconfig: +.. _ncRNAconfig: Configuration file ~~~~~~~~~~~~~~~~~~ -There is a configuration file in ``snakePipes/workflows/noncoding-RNA-seq/defaults.yaml``:: +There is a configuration file in ``snakePipes/workflows/ncRNAseq/defaults.yaml``:: ## General/Snakemake parameters, only used/set by wrapper or in Snakemake cmdl, but not in Snakefile - pipeline: noncoding-rna-seq + pipeline: ncRNAseq outdir: configFile: clusterConfigFile: @@ -121,7 +121,7 @@ sample6 Treatment Group2 Analysis modes -------------- -Following analysis (**modes**) are possible using the noncoding-RNA-seq workflow: +Following analysis (**modes**) are possible using the ncRNAseq workflow: "alignment" ~~~~~~~~~~~ @@ -207,7 +207,7 @@ Apart from the common module outputs (see :ref:`running_snakePipes`), the workfl * **bamCoverage**: This would contain the bigWigs produced by deepTools `bamCoverage `__ . Files with suffix ``.coverage.bw`` are raw coverage files, while the files with suffix ``RPKM.bw`` are `RPKM-normalized `__ coverage files. -* **deepTools_QC**: (produced in the mode *deepTools_QC*) This contains the quality checks specific for mRNA-seq, performed via deepTools. The output folders are names after various deepTools functions and the outputs are explained under `deepTools documentation `__. In short, they show the insert size distribution(**bamPEFragmentSize**), mapping statistics (**estimateReadFiltering**), sample-to-sample correlations and PCA (**multiBigwigSummary, plotCorrelation, plotPCA**), and read enrichment on various genic features (**plotEnrichment**) +* **deepTools_QC**: (produced in the mode *deepTools_QC*) This contains the quality checks specific for mRNAseq, performed via deepTools. The output folders are names after various deepTools functions and the outputs are explained under `deepTools documentation `__. In short, they show the insert size distribution(**bamPEFragmentSize**), mapping statistics (**estimateReadFiltering**), sample-to-sample correlations and PCA (**multiBigwigSummary, plotCorrelation, plotPCA**), and read enrichment on various genic features (**plotEnrichment**) * **DESeq2_[sampleSheet]**: (produced only if a sample-sheet is provided.) The folder contains the HTML result reports **DESeq2_report_genes.html**, **DESeq2_report_repeat_name.html**, **DESeq2_report_repeat_class.html** and **DESeq2_report_repeat_family.html** as we as the annotated output file from DESeq2 (**genes_DEresults.tsv**, etc.) and normalized counts for all samples, produced via DEseq2 (**genes_counts_DESeq2.normalized.tsv**, etc.) as well as an Rdata file (**genes_DESeq.Rdata**, etc.) with the R objects ``dds <- DESeq2::DESeq(dds)`` and ``ddr <- DDESeq2::results(dds,alpha = fdr)``. Sample name to plotting shape mapping on the PCA plot is limited to 36 samples and skipped otherwise. @@ -225,6 +225,6 @@ Command line options .. argparse:: :func: parse_args - :filename: ../snakePipes/workflows/noncoding-RNA-seq/noncoding-RNA-seq - :prog: noncoding-RNA-seq + :filename: ../snakePipes/workflows/ncRNAseq/ncRNAseq.py + :prog: ncRNAseq :nodefault: diff --git a/docs/content/workflows/preprocessing.rst b/docs/content/workflows/preprocessing.rst index a35bbd37d..ed63481b4 100644 --- a/docs/content/workflows/preprocessing.rst +++ b/docs/content/workflows/preprocessing.rst @@ -65,6 +65,6 @@ Command line options .. argparse:: :func: parse_args - :filename: ../snakePipes/workflows/preprocessing/preprocessing + :filename: ../snakePipes/workflows/preprocessing/preprocessing.py :prog: Preprocessing :nodefault: diff --git a/docs/content/workflows/scRNA-seq.rst b/docs/content/workflows/scRNAseq.rst similarity index 96% rename from docs/content/workflows/scRNA-seq.rst rename to docs/content/workflows/scRNAseq.rst index 05acfff59..1c8aac538 100644 --- a/docs/content/workflows/scRNA-seq.rst +++ b/docs/content/workflows/scRNAseq.rst @@ -1,12 +1,12 @@ -.. _scRNA-seq: +.. _scRNAseq: -scRNA-seq +scRNAseq ========= What it does ------------ -The scRNA-seq pipeline is intended to process UMI-based data, expecting the cell barcode and umi in Read1, and the cDNA sequence in Read2. The workflow has predefined settings for CelSeq2 and 10x data, but can be extended to custom protocols. +The scRNAseq pipeline is intended to process UMI-based data, expecting the cell barcode and umi in Read1, and the cDNA sequence in Read2. The workflow has predefined settings for CelSeq2 and 10x data, but can be extended to custom protocols. There are currently two analysis modes available: - "STARsolo" which uses STAR solo for mapping and quantitation. @@ -67,7 +67,7 @@ Configuration file The default configuration file is listed below and can be found in ``snakePipes/workflows/scRNAseq/defaults.yaml``:: - pipeline: scrna-seq + pipeline: scrnaseq outdir: configFile: clusterConfigFile: @@ -229,6 +229,6 @@ Command line options .. argparse:: :func: parse_args - :filename: ../snakePipes/workflows/scRNAseq/scRNAseq + :filename: ../snakePipes/workflows/scRNAseq/scRNAseq.py :prog: scRNAseq :nodefault: diff --git a/docs/index.rst b/docs/index.rst index 9f34ecff5..32d6df5af 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -16,13 +16,14 @@ Below is the list of pipelines available in snakePipes Pipeline Description =============================== =============================================================================================================== :ref:`createIndices` Create indices for an organism for further use within snakePipes -:ref:`DNA-mapping` Basic DNA mapping using bowtie2, filter mapped files, QC and create coverage plots -:ref:`ChIP-Seq` Use the DNA mapping output and run ChIP/Input normalization and peak calling -:ref:`ATAC-seq` Use the DNA mapping output and detect open chromatin regions for ATAC-seq data -:ref:`HiC` Hi-C analysis workflow, from mapping to TAD calling -:ref:`noncoding-RNA-Seq` noncoding-RNA-Seq workflow : From mapping to differential expression of genes and repeat elements using DESeq2 -:ref:`mRNA-Seq` RNA-Seq workflow : From mapping to differential expression using DESeq2 -:ref:`scRNA-Seq` Single-cell RNA-Seq (CEL-Seq2) workflow : From mapping to differential expression +:ref:`DNAmapping` Basic DNA mapping using bowtie2, filter mapped files, QC and create coverage plots +:ref:`ChIPSeq` Use the DNA mapping output and run ChIP/Input normalization and peak calling +:ref:`ATACseq` Use the DNA mapping output and detect open chromatin regions for ATACseq data +:ref:`HiC` HiC analysis workflow, from mapping to TAD calling +:ref:`makePairs` pairtools workflow, from allele-specific mapping to HiC matrices +:ref:`ncRNAseq` ncRNAseq workflow : From mapping to differential expression of genes and repeat elements using DESeq2 +:ref:`mRNASeq` RNASeq workflow : From mapping to differential expression using DESeq2 +:ref:`scRNASeq` Single-cell RNA-Seq (CEL-Seq2) workflow : From mapping to differential expression :ref:`WGBS` Whole-genome Bisulfite-Seq analysis workflow, from mapping to DMR calling and differential methylation analysis :ref:`preprocessing` Merging technical replicates (e.g., across lanes), removing optical duplicates, running FastQC =============================== =============================================================================================================== @@ -34,15 +35,13 @@ Quick start .. code:: bash - conda install mamba -c conda-forge && mamba create -n snakePipes -c mpi-ie -c conda-forge -c bioconda snakePipes + conda create -n snakePipes -c mpi-ie -c conda-forge -c bioconda snakePipes * You can update snakePipes to the latest version available on conda with: .. code:: bash - mamba update -n snakePipes -c mpi-ie -c conda-forge -c bioconda --prune snakePipes - -snakePipes is going to move to mamba in the future. + conda update -n snakePipes -c mpi-ie -c conda-forge -c bioconda --prune snakePipes * Download genome fasta and annotations for an your organism, and build indexes, Check in :ref:`createIndices` @@ -52,11 +51,11 @@ snakePipes is going to move to mamba in the future. snakePipes config --help -.. note:: If you have a copy of a `shared/defaults.yaml` with the necessary paths configured (i.e. from a previous installation), you can pass it to snakePipes config with `--oldConfig` and `--configMode recycle` instead of providing all the paths manually again. Config keys have to match for this to work. In the same way, you can pass your external organism yaml folder with ``--organismsDir`` or cluster config with ``--clusterConfig``. +.. note:: If you have a copy of a `shared/defaults.yaml` with the necessary paths configured (i.e. from a previous installation), you can pass it to snakePipes config with `--oldConfig` and `--configMode recycle` instead of providing all the paths manually again. Config keys have to match for this to work. In the same way, you can pass your external organism yaml folder with ``--organismsDir``. * Download example fastq files for the human genome `here `_ -* Execute the DNA-mapping pipeline using the example **command.sh** in the test data directory. +* Execute the DNAmapping pipeline using the example **command.sh** in the test data directory. Running your own analysis @@ -112,14 +111,15 @@ Contents: content/running_snakePipes.rst content/advanced_usage.rst content/workflows/createIndices.rst - content/workflows/DNA-mapping.rst - content/workflows/ChIP-seq.rst - content/workflows/ATAC-seq.rst + content/workflows/DNAmapping.rst + content/workflows/ChIPseq.rst + content/workflows/ATACseq.rst content/workflows/HiC.rst + content/workflows/makePairs.rst content/workflows/preprocessing.rst - content/workflows/mRNA-seq.rst - content/workflows/noncoding-RNA-seq.rst - content/workflows/scRNA-seq.rst + content/workflows/mRNAseq.rst + content/workflows/ncRNAseq.rst + content/workflows/scRNAseq.rst content/workflows/WGBS.rst content/News.rst diff --git a/docs/requirements.txt b/docs/requirements.txt deleted file mode 100644 index baef793a0..000000000 --- a/docs/requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -sphinx==7.1.2 -sphinx-rtd-theme==2.0.0 -readthedocs-sphinx-search==0.3.2 -sphinx-argparse==0.4.0 -mock==5.1.0 diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 000000000..168ec77d0 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,89 @@ +[build-system] +requires = ["setuptools >= 61.0", "setuptools_scm>=8"] +build-backend = "setuptools.build_meta" + +[project] +name = "snakePipes" +description = 'Snakemake workflows and wrappers for NGS data processing from the MPI-IE' +readme = "README.md" +version = "3.0.0" +keywords = [ + "DNAmapping", + "ChIPSeq", + "mRNAseq", + "ncRNAseq", + "ATACseq", + "scRNAseq", + "WGBS / Bisulfite seq", + "epigenetics workflows", + "Illumina" +] + +authors = [ + {name = "Bioinfo-Core MPI-IE"} +] + +classifiers = [ + "Intended Audience :: Bioinformaticians", + "Intended Audience :: Biologists", + "License :: OSI Approved :: MIT License", + "Programming Language :: Python :: 3", +] + +requires-python = ">= 3.11" + +dependencies = [ + "snakemake >= 8", + "pandas", + "thefuzz", + "pyyaml >= 5.1", + "snakemake-executor-plugin-cluster-generic >= 1.0.9", + "graphviz" +] + +[project.optional-dependencies] +actions = [ + "pytest", + "ruff", + "ruamel.yaml", + "pytest-xdist" +] + +docs = [ + "sphinx >= 7.1.2", + "sphinx-rtd-theme >= 2.0.0", + "readthedocs-sphinx-search >= 0.3.2", + "sphinx-argparse >= 0.4.0", + "mock==5.1.0" +] +build = [ + "build" +] + +[tool.setuptools.package-data] +snakePipes = ["**/*.yaml", "**/*.R", "**/*.Rmd", "**/*snakefile", "**/*Snakefile", "**/*sh"] + +[project.scripts] +ATACseq = "snakePipes.workflows.ATACseq.ATACseq:main" +ChIPseq = "snakePipes.workflows.ChIPseq.ChIPseq:main" +createIndices = "snakePipes.workflows.createIndices.createIndices:main" +DNAmapping = "snakePipes.workflows.DNAmapping.DNAmapping:main" +HiC = "snakePipes.workflows.HiC.HiC:main" +makePairs = "snakePipes.workflows.makePairs.makePairs:main" +mRNAseq = "snakePipes.workflows.mRNAseq.mRNAseq:main" +ncRNAseq = "snakePipes.workflows.ncRNAseq.ncRNAseq:main" +preprocessing = "snakePipes.workflows.preprocessing.preprocessing:main" +scRNAseq = "snakePipes.workflows.scRNAseq.scRNAseq:main" +WGBS = "snakePipes.workflows.WGBS.WGBS:main" +snakePipes = "snakePipes.snakePipes:main" + +[tool.ruff] +exclude = [ + "build" +] +lint.select = ["E", "F", "W", "B"] +lint.ignore = [ + "E722", # bare excepts + "E501", # line length + "B006", # Do not use mutable data structures for argument defaults +] diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 2df31a410..000000000 --- a/requirements.txt +++ /dev/null @@ -1,6 +0,0 @@ -snakemake >= 6.2.1, <7.19.0 -psutil -pandas -thefuzz -pyyaml >= 5.1 -tabulate < 0.9 diff --git a/setup.py b/setup.py deleted file mode 100755 index 9b83b8527..000000000 --- a/setup.py +++ /dev/null @@ -1,28 +0,0 @@ -#!/usr/bin/env python -from setuptools import setup, find_packages -from glob import glob -import os.path - -# Set __version__ -exec(open('snakePipes/__init__.py').read()) - -scripts = ['bin/snakePipes'] -for d in glob('snakePipes/workflows/*'): - scripts.append(os.path.join(d, os.path.split(d)[1])) - -requires = open("requirements.txt").read().strip().split("\n") - -setup( - name='snakePipes', - version=__version__, # noqa: F821 - scripts=scripts, - packages=find_packages(), - include_package_data=True, - python_requires='>=3.7', - install_requires=requires, - url='https://github.com/maxplanck-ie/snakepipes', - license='MIT', - description='Snakemake workflows and wrappers for NGS data processing from the MPI-IE', - zip_safe=False, - data_files=[("", ["LICENSE"])] -) diff --git a/snakePipes/__init__.py b/snakePipes/__init__.py index 387cfaccc..e69de29bb 100755 --- a/snakePipes/__init__.py +++ b/snakePipes/__init__.py @@ -1 +0,0 @@ -__version__ = '2.9.0' diff --git a/snakePipes/common_functions.py b/snakePipes/common_functions.py index 37e9ae4d8..1bae3bfb6 100644 --- a/snakePipes/common_functions.py +++ b/snakePipes/common_functions.py @@ -13,8 +13,7 @@ from thefuzz import fuzz import smtplib from email.message import EmailMessage -from snakePipes import __version__ - +from importlib.metadata import version def set_env_yamls(): """ @@ -35,6 +34,7 @@ def set_env_yamls(): 'CONDA_CHIPSEQ_ENV': 'envs/chip_seq.yaml', 'CONDA_ATAC_ENV': 'envs/atac_seq.yaml', 'CONDA_HIC_ENV': 'envs/hic.yaml', + 'CONDA_MAKEPAIRS_ENV': 'envs/makePairs.yaml', 'CONDA_WGBS_ENV': 'envs/wgbs.yaml', 'CONDA_DSS_ENV': 'envs/wgbs_dss.yaml', 'CONDA_RMD_ENV': 'envs/rmarkdown.yaml', @@ -117,7 +117,7 @@ def config_diff(dict1, dict2): def get_version(): # If this is sent to stdout it breaks making a DAG pdf - sys.stderr.write("\n---- This analysis has been done using snakePipes version {} ----\n".format(__version__)) + sys.stderr.write("\n---- This analysis has been done using snakePipes version {} ----\n".format(version("snakePipes"))) def load_organism_data(genome, maindir, verbose): @@ -471,24 +471,6 @@ def checkAlleleParams(args): return allele_mode -def cleanLogs(d, cluster_config): - """ - Remove all empty log files, both in cluster_logs/ and */logs/ - """ - if "snakePipes_cluster_logDir" in cluster_config: - path = os.path.join(d, cluster_config["snakePipes_cluster_logDir"], "*") - if re.search("^/", cluster_config["snakePipes_cluster_logDir"]): - path = os.path.join(cluster_config["snakePipes_cluster_logDir"], "*") - for f in glob.glob(path): - s = os.stat(f) - if s.st_size == 0: - os.remove(f) - for f in glob.glob(os.path.join(d, "*", "logs", "*")): - s = os.stat(f) - if s.st_size == 0: - os.remove(f) - - def check_sample_info_header(sampleSheet_file): """ return True in case sample info file contains column names 'name' and 'condition' @@ -510,7 +492,7 @@ def setDefaults(fileName): """ # Script-neutral paths baseDir = os.path.dirname(__file__) - workflowDir = os.path.join(baseDir, "workflows", fileName) + workflowDir = os.path.join(baseDir, "workflows", fileName.replace('.py', '')) # defaults defaults = load_configfile(os.path.join(workflowDir, "defaults.yaml"), False) @@ -613,12 +595,31 @@ def checkCommonArguments(args, baseDir, outDir=False, createIndices=False, prepr sys.exit("Sorry, there is no email sender specified in defaults.yaml. Please specify one with --emailSender") +def resolveSnakemakeProfile(profName, baseDir): + # if snakemakeProfile is a relative path, resolve it with baseDir + if Path(profName).is_absolute(): + # Absolute path to a profile + assert Path(profName).is_dir() + return(Path(profName)) + elif (Path(baseDir) / profName).resolve().is_dir(): + # Profile is shipped within the repo + return((Path(baseDir) / profName).resolve()) + else: + # relative path + not in repodir, assume it's under snakemake default locations: + _l = (Path('etc', 'xdg', 'snakemake') / profName) + if _l.is_dir(): + return(_l) + _l = (Path('~/.config/snakemake') / profName).expanduser() + if _l.is_dir(): + return(_l) + sys.exit(f"No directory found for snakemake profile {profName}") + def commonYAMLandLogs(baseDir, workflowDir, defaults, args, callingScript): """ Merge dictionaries, write YAML files, construct the snakemake command and create the DAG """ - workflowName = os.path.basename(callingScript) + workflowName = os.path.basename(callingScript).replace('.py', '') os.makedirs(args.outdir, exist_ok=True) if isinstance(args.snakemakeOptions, list): @@ -631,26 +632,9 @@ def commonYAMLandLogs(baseDir, workflowDir, defaults, args, callingScript): # merge cluster config files: 1) global one, 2) workflow specific one, 3) user provided one cfg = load_configfile(os.path.join(baseDir, "shared", "defaults.yaml"), False, "defaults") - if os.path.isfile(os.path.join(baseDir, cfg['clusterConfig'])): - cluster_config = load_configfile(os.path.join(baseDir, cfg['clusterConfig']), False) - else: - cluster_config = load_configfile(os.path.join(cfg['clusterConfig']), False) - cluster_config = merge_dicts(cluster_config, load_configfile(os.path.join(workflowDir, "cluster.yaml"), False), ) - - if args.clusterConfigFile: - user_cluster_config = load_configfile(args.clusterConfigFile, False) - cluster_config = merge_dicts(cluster_config, user_cluster_config) # merge/override variables from user_config.yaml - # Ensure the cluster log directory exists - if re.search("\\{snakePipes_cluster_logDir\\}", cluster_config["snakemake_cluster_cmd"]): - if "snakePipes_cluster_logDir" in cluster_config: - if re.search("^/", cluster_config["snakePipes_cluster_logDir"]): - os.makedirs(cluster_config["snakePipes_cluster_logDir"], exist_ok=True) - else: - os.makedirs(os.path.join(args.outdir, cluster_config["snakePipes_cluster_logDir"]), exist_ok=True) - cluster_config["snakemake_cluster_cmd"] = re.sub("\\{snakePipes_cluster_logDir\\}", cluster_config["snakePipes_cluster_logDir"], cluster_config["snakemake_cluster_cmd"]) - else: - sys.exit("\nPlease provide a key 'snakePipes_cluster_logDir' and value in the cluster configuration file!\n") - write_configfile(os.path.join(args.outdir, '{}.cluster_config.yaml'.format(workflowName)), cluster_config) + + # Properly resolve snakemakeprofile + cfg['snakemakeProfile'] = resolveSnakemakeProfile(cfg['snakemakeProfile'], baseDir) # Save the organism YAML file as {PIPELINE}_organism.yaml if workflowName != "preprocessing": @@ -667,47 +651,93 @@ def commonYAMLandLogs(baseDir, workflowDir, defaults, args, callingScript): if args.keepTemp: args.snakemakeOptions += " --notemp" - snakemake_cmd = """ - TMPDIR={tempDir} - UTEMP=$(mktemp -d ${{TMPDIR:-/tmp}}/snakepipes.XXXXXXXXXX); - XDG_CACHE_HOME=$UTEMP TMPDIR={tempDir} PYTHONNOUSERSITE=True snakemake {snakemakeOptions} --latency-wait {latency_wait} --snakefile {snakefile} --jobs {maxJobs} --directory {workingdir} --configfile {configFile} --keep-going --use-conda --conda-prefix {condaEnvDir} - """.format(latency_wait=cluster_config["snakemake_latency_wait"], - snakefile=os.path.join(workflowDir, "Snakefile"), - maxJobs=args.maxJobs, - workingdir=args.workingdir, - snakemakeOptions=str(args.snakemakeOptions or ''), - tempDir=cfg["tempDir"], - configFile=os.path.join(args.outdir, '{}.config.yaml'.format(workflowName)), - condaEnvDir=cfg["condaEnvDir"]).split() + snakemake_cmd = f"TMPDIR={cfg['tempDir']}; \ + UTEMP=$(mktemp -d ${{TMPDIR:-/tmp}}/snakepipes.XXXXXXXXXX); \ + PYTHONNOUSERSITE=True snakemake \ + {str(args.snakemakeOptions or '')} \ + --snakefile {Path(workflowDir) / 'Snakefile'} \ + --directory {args.workingdir} \ + --configfile {os.path.join(args.outdir, '{}.config.yaml'.format(workflowName))} \ + --profile {cfg['snakemakeProfile']}".split(' ') if args.verbose: snakemake_cmd.append("--printshellcmds") - if not args.local: - snakemake_cmd += ["--cluster-config", - os.path.join(args.outdir, '{}.cluster_config.yaml'.format(workflowName)), - "--cluster", "'" + cluster_config["snakemake_cluster_cmd"], "'"] return " ".join(snakemake_cmd) +def plot_DAG(args, snakemake_cmd, calling_script, defaults): + + if not args.createDAG: + return + + workflow_name = os.path.splitext(os.path.basename(calling_script))[0] + + # dryrun snakemake quietly: only generate the DAG + dag_cmd = f"{snakemake_cmd} --rulegraph --dryrun --quiet --config verbose=False" + + DAGproc = subprocess.Popen( + dag_cmd, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + shell=True) + + # Read DOT data from stdout + dot = DAGproc.stdout.read() + + # Use graphviz to render DAG, if it is available + # conda graphviz doesn't provide the python bindings, the pip graphviz does, but has no executable. + # If graphviz is not available, write out the ASCII as file. + output_file = os.path.join(args.outdir, f"{workflow_name}_pipeline") + try: + import graphviz + if shutil.which('dot'): + graph = graphviz.Source(dot) + graph.render(output_file, format='png') + return + else: + with open(output_file + 'DAG.txt', 'w') as f: + f.write(dot) + return + except ModuleNotFoundError: + with open(output_file + 'DAG.txt', 'w') as f: + f.write(dot) + return + + def print_DAG(args, snakemake_cmd, callingScript, defaults): if args.createDAG: config = defaults config.update(vars(args)) - workflowName = os.path.basename(callingScript) + workflowName = os.path.basename(callingScript).replace('.py', '') oldVerbose = config['verbose'] config['verbose'] = False - write_configfile(os.path.join(args.outdir, '{}.config.yaml'.format(workflowName)), config) - DAGproc = subprocess.Popen(snakemake_cmd + " --rulegraph ", stdout=subprocess.PIPE, shell=True) - subprocess.check_call("dot -Tpdf -o{}/{}_pipeline.pdf".format(args.outdir, workflowName), stdin=DAGproc.stdout, shell=True) + write_configfile( + os.path.join(args.outdir, + '{}.config.yaml'.format(workflowName)), config) + + DAGproc = subprocess.Popen( + snakemake_cmd + " --rulegraph -q ", + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + shell=True) + + subprocess.check_call( + "dot -Tpdf -o{}/{}_pipeline.pdf".format(args.outdir, workflowName), + stdin=DAGproc.stdout, shell=True) config['verbose'] = oldVerbose - write_configfile(os.path.join(args.outdir, '{}.config.yaml'.format(workflowName)), config) + write_configfile( + os.path.join(args.outdir, '{}.config.yaml'.format(workflowName)), + config) def logAndExport(args, workflowName): """ Set up logging """ + workflowName = workflowName.replace('.py', '') # Write snakemake_cmd to log file fnames = glob.glob(os.path.join(args.outdir, '{}_run-[0-9]*.log'.format(workflowName))) if len(fnames) == 0: @@ -727,48 +757,38 @@ def runAndCleanup(args, cmd, logfile_name): Also clean up when finished. """ if args.verbose: - print("\n{}\n".format(cmd)) + print("\n{}\n".format(cmd)) # write log file + f = open(os.path.join(args.outdir, logfile_name), "w") f.write(" ".join(sys.argv) + "\n\n") f.write(cmd + "\n\n") # Run snakemake, stderr -> stdout is needed so readline() doesn't block - p = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) - if args.verbose: - print("PID:", p.pid, "\n") - - while p.poll() is None: - stdout = p.stdout.readline(1024) - if stdout: - sys.stdout.write(stdout.decode('utf-8')) - f.write(stdout.decode('utf-8')) - sys.stdout.flush() - f.flush() - # This avoids the race condition of p.poll() exiting before we get all the output - stdout = p.stdout.read() - if stdout: - sys.stdout.write(stdout.decode('utf-8')) - f.write(stdout.decode('utf-8')) - f.close() + p = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True) + for _l in p.stdout: + sys.stdout.write(_l.strip() + '\n') + f.write(_l.strip() + '\n') + p.wait() # Exit with an error if snakemake encountered an error if p.returncode != 0: - sys.stderr.write("Error: snakemake returned an error code of {}, so processing is incomplete!\n".format(p.returncode)) - if args.emailAddress: - sendEmail(args, p.returncode) - sys.exit(p.returncode) + if args.emailAddress: + sendEmail(args, p.returncode) + f.close() + sys.exit(p.returncode) else: - Path( - os.path.join(args.outdir, "{}_snakePipes.done".format(logfile_name.split('_')[0])) - ).touch() - if os.path.exists(os.path.join(args.outdir, ".snakemake")): - shutil.rmtree(os.path.join(args.outdir, ".snakemake"), ignore_errors=True) + Path( + os.path.join(args.outdir, "{}_snakePipes.done".format(logfile_name.split('_')[0])) + ).touch() + if os.path.exists(os.path.join(args.outdir, ".snakemake")): + shutil.rmtree(os.path.join(args.outdir, ".snakemake"), ignore_errors=True) + f.close() # Send email if desired if args.emailAddress: - sendEmail(args, 0) + sendEmail(args, 0) def predict_chip_dict(wdir, input_pattern_str, bamExt, fromBAM=None): @@ -793,7 +813,7 @@ def predict_chip_dict(wdir, input_pattern_str, bamExt, fromBAM=None): chip_dict_pred = {} chip_dict_pred["chip_dict"] = {} print("---------------------------------------------------------------------------------------") - print("Predict Chip-seq sample configuration") + print("Predict ChIPseq sample configuration") print("---------------------------------------------------------------------------------------") print("\nSearch for Input/control samples...") @@ -841,7 +861,7 @@ def predict_chip_dict(wdir, input_pattern_str, bamExt, fromBAM=None): outfile = os.path.join(wdir, "chip_seq_sample_config.PREDICTED.yaml") write_configfile(outfile, chip_dict_pred) print("---------------------------------------------------------------------------------------") - print("Chip-seq sample configuration is written to file ", outfile) + print("ChIPseq sample configuration is written to file ", outfile) print("Please check and modify this file - this is just a guess! Then run the workflow with it.") print("---------------------------------------------------------------------------------------") diff --git a/snakePipes/parserCommon.py b/snakePipes/parserCommon.py index cf0280d43..3508c8ce0 100644 --- a/snakePipes/parserCommon.py +++ b/snakePipes/parserCommon.py @@ -1,7 +1,7 @@ import argparse import os.path import glob -from snakePipes import __version__ +from importlib.metadata import version def ListGenomes(): @@ -43,7 +43,7 @@ def mainArguments(defaults, workingDir=False, createIndices=False, preprocessing if workingDir: required.add_argument("-d", "--working-dir", dest="workingdir", - help="working directory is output directory and must contain DNA-mapping pipeline output files", + help="working directory is output directory and must contain DNAmapping pipeline output files", required=True) else: if not createIndices: @@ -82,23 +82,6 @@ def mainArguments(defaults, workingDir=False, createIndices=False, preprocessing help="configuration file: config.yaml (default: '%(default)s')", default=defaults["configFile"]) - general.add_argument("--clusterConfigFile", - help="configuration file for cluster usage. In absence, the default options " - "specified in defaults.yaml and workflows/[workflow]/cluster.yaml would be selected (default: '%(default)s')", - default=defaults["clusterConfigFile"]) - - general.add_argument("-j", "--jobs", - dest="maxJobs", - metavar="INT", - help="maximum number of concurrently submitted Slurm jobs / cores if workflow is run locally (default: '%(default)s')", - type=int, default=defaults["maxJobs"]) - - general.add_argument("--local", - dest="local", - action="store_true", - default=False, - help="run workflow locally; default: jobs are submitted to Slurm queue (default: '%(default)s')") - general.add_argument("--keepTemp", action="store_true", help="Prevent snakemake from removing files marked as being temporary (typically intermediate files that are rarely needed by end users). This is mostly useful for debugging problems.") @@ -115,7 +98,7 @@ def mainArguments(defaults, workingDir=False, createIndices=False, preprocessing general.add_argument("--version", action="version", - version="%(prog)s {}".format(__version__)) + version="%(prog)s {}".format(version("snakePipes"))) emailArgs = parser.add_argument_group('Email Arguments') emailArgs.add_argument("--emailAddress", @@ -174,7 +157,7 @@ def snpArguments(defaults): return parser -# DNA-mapping options added +# DNAmapping options added def commonOptions(grp, defaults, bw=True, plots=True, preprocessing=False): """ Common options found in many workflows diff --git a/snakePipes/shared/cluster.yaml b/snakePipes/shared/cluster.yaml deleted file mode 100755 index 583807968..000000000 --- a/snakePipes/shared/cluster.yaml +++ /dev/null @@ -1,47 +0,0 @@ -snakemake_latency_wait: 300 -snakemake_cluster_cmd: module load slurm; sbatch --ntasks-per-node 1 -p bioinfo --mem-per-cpu {cluster.memory} -c {threads} -e {snakePipes_cluster_logDir}/{rule}.%j.err -o {snakePipes_cluster_logDir}/{rule}.%j.out -J {rule}.snakemake -snakePipes_cluster_logDir: cluster_logs -__default__: - memory: 1G -bamCoverage: - memory: 5G -bamCoverage_unique_mappings: - memory: 5G -bamCoverage_RPKM: - memory: 5G -bamCoverage_coverage: - memory: 5G -bamCoverage_raw: - memory: 5G -create_snpgenome: - memory: 30G -FASTQdownsample: - memory: 4G -filter_reads_umi: - memory: 10G -plotCorrelation_pearson: - memory: 3G -plotCorrelation_pearson_allelic: - memory: 5G -plotCorrelation_spearman: - memory: 3G -plotCorrelation_spearman_allelic: - memory: 2G -plotCoverage: - memory: 1G -plotEnrichment: - memory: 1G -plotFingerprint: - memory: 1G -plotPCA: - memory: 4G -plotPCA_allelic: - memory: 4G -plot_heatmap_CSAW_up: - memory: 10G -snp_split: - memory: 25G -sambamba_markdup: - memory: 2G -BAMsort_allelic: - memory: 3G diff --git a/snakePipes/shared/defaults.yaml b/snakePipes/shared/defaults.yaml index 6fc24de4d..e6970966d 100755 --- a/snakePipes/shared/defaults.yaml +++ b/snakePipes/shared/defaults.yaml @@ -1,17 +1,12 @@ ################################################################################ # This file should hold values that should be present in ALL workflows -# -# Note that tempDir MUST end with a "/"! -# # Note that due to limitations in yaml.dump, only very basic structures are # permitted here. ################################################################################ -# -condaEnvDir: '/package/mamba/envs/snakepipesenvs/2.9.0' snakemakeOptions: '' organismsDir: 'shared/organisms' -clusterConfig: 'shared/cluster.yaml' -tempDir: /data/extended/ +snakemakeProfile: 'shared/profiles/local' +tempDir: /scratch/local # The following are only needed if you use the --emailAddress option smtpServer: smtpPort: 0 @@ -24,4 +19,4 @@ max_thread: 25 #print tools versions toolsVersion: True oldConfig: -configMode: manual +configMode: manual \ No newline at end of file diff --git a/snakePipes/shared/profiles/local/config.yaml b/snakePipes/shared/profiles/local/config.yaml new file mode 100644 index 000000000..196c513b2 --- /dev/null +++ b/snakePipes/shared/profiles/local/config.yaml @@ -0,0 +1,208 @@ +cores: 2 +use-conda: True +conda-prefix: /tmp +conda-frontend: conda +printshellcmds: False +default-resources: + mem: 1G + time: 300 + partition: bioinfo +set-resources: + annotation_bed2fasta: + mem: 4G + AlevinForVelocity: + mem: 10G + ATAC_qc: + mem: 2G + bamCoverage: + mem: 5G + bamCoverage_coverage: + mem: 5G + bamCoverage_filtered: + mem: 5G + bamCoverage_raw: + mem: 5G + bamCoverage_RPKM: + mem: 5G + bamCoverage_unique_mappings: + mem: 5G + bamPE_fragment_size: + mem: 10G + BAMsort_allelic: + mem: 3G + Bowtie2: + mem: 4G + bowtie2Index: + mem: 10G + bwa: + mem: 4G + bwaIndex: + mem: 8G + bwamem2: + mem: 6G + bwamem2Index: + mem: 90G + bwameth: + mem: 3G + bwamethIndex: + mem: 20G + bwameth2Index: + mem: 120G + build_matrix: + mem: 9G + call_tads: + mem: 3G + cellsort_bam: + mem: 10G + clumpify: + mem: 2G + CollectAlignmentSummaryMetrics: + mem: 2G + CollectInsertSizeMetrics: + mem: 1G + correct_matrix: + mem: 7G + CpG_report: + mem: 6G + CpG_stats: + mem: 30G + create_annotation_bed: + mem: 4G + createGenomeFasta: + mem: 20G + CSAW: + mem: 30G + create_snpgenome: + mem: 30G + DepthOfCov: + mem: 3G + DepthOfCovGenome: + mem: 3G + DESeq2: + mem: 5G + DESeq2_Salmon_basic: + mem: 3G + DESeq2_Salmon_allelic: + mem: 3G + diagnostic_plot: + mem: 2G + dmrseq: + mem: 10G + downloadGTF: + mem: 20G + DSS: + mem: 10G + fastaDict: + mem: 4G + FASTQdownsample: + mem: 4G + filterBW: + mem: 6G + filter_reads: + mem: 3G + filter_reads_umi: + mem: 10G + filterCoveragePerScaffolds: + mem: 6G + Genrich_peaks: + mem: 20G + HISAT2: + mem: 6G + hisat2Index: + mem: 2G + histoneHMM: + mem: 4G + MACS2: + mem: 8G + make2bit: + mem: 8G + map_fastq_single_end: + mem: 10G + markDupes: + mem: 3G + merge_bins: + mem: 7G + merge_matrices: + mem: 3G + metileneReport: + mem: 6G + namesort_bams: + mem: 6G + plot_heatmap_cov_CSAW: + mem: 5G + plotCorrelation_pearson: + mem: 3G + plotCorrelation_pearson_allelic: + mem: 5G + plotCorrelation_spearman: + mem: 3G + plotCorrelation_spearman_allelic: + mem: 2G + plotCoverage: + mem: 1G + plotEnrichment: + mem: 1G + plotFingerprint: + mem: 1G + plotPCA: + mem: 4G + plotPCA_allelic: + mem: 4G + plot_heatmap_CSAW_up: + mem: 10G + prepForMetilene: + mem: 3G + produceReport: + mem: 20G + run_eisaR: + mem: 30G + Salmon_counts: + mem: 5G + Salmon_index_joint_fa: + mem: 6G + SalmonAlevin: + mem: 10G + SalmonIndex: + mem: 3G + SalmonQuant: + mem: 2G + Salmon_TPM: + mem: 5G + sambamba_flagstat: + mem: 3G + sambamba_flagstat_sorted: + mem: 3G + sambamba_markdup: + mem: 4G + sambamaba_sort_hic_r1: + mem: 4G + sambamaba_sort_hic_r2: + mem: 4G + SEACR_peaks_lenient: + mem: 20G + SEACR_peaks_stringent: + mem: 10G + sleuth_Salmon: + mem: 4G + sleuth_SalmonAllelic: + mem: 10G + snp_split: + mem: 25G + STAR: + mem: 6G + STAR_allele: + mem: 30G + star_index: + mem: 15G + starIndex: + mem: 8G + STARsolo: + mem: 6G + STARsolo_raw_to_seurat: + mem: 10G + TEcounts: + mem: 16G + velo_to_sce: + mem: 30G + velocyto: + mem: 20G \ No newline at end of file diff --git a/snakePipes/shared/profiles/snakepipes_genericprofile/ccancel.sh b/snakePipes/shared/profiles/snakepipes_genericprofile/ccancel.sh new file mode 100755 index 000000000..5801f83a4 --- /dev/null +++ b/snakePipes/shared/profiles/snakepipes_genericprofile/ccancel.sh @@ -0,0 +1,4 @@ +#!/usr/bin/env bash +set -eu +module load slurm +scancel "$@" diff --git a/snakePipes/shared/profiles/snakepipes_genericprofile/config.yaml b/snakePipes/shared/profiles/snakepipes_genericprofile/config.yaml new file mode 100644 index 000000000..c34eb64cd --- /dev/null +++ b/snakePipes/shared/profiles/snakepipes_genericprofile/config.yaml @@ -0,0 +1,220 @@ +executor: cluster-generic +cluster-generic-submit-cmd: + "mkdir -p logs/{rule};\ + module load slurm && sbatch \ + --partition={resources.partition} \ + --cpus-per-task={threads} \ + --mem-per-cpu={resources.mem} \ + --job-name=snakePipes_{rule}-{wildcards} \ + --output=logs/{rule}/{rule}-{wildcards}-%j.out \ + --time={resources.time} \ + --parsable" +jobs: 20 +conda-prefix: /localenv/$USER/anaconda/snakepipes-envs +use-conda: True +conda-frontend: conda +cluster-generic-cancel-cmd: "ccancel.sh" +latency-wait: 300 +default-resources: + mem: 10G + time: 1440 + partition: bioinfo +set-resources: + annotation_bed2fasta: + mem: 4G + AlevinForVelocity: + mem: 10G + ATAC_qc: + mem: 2G + bamCoverage: + mem: 5G + bamCoverage_coverage: + mem: 5G + bamCoverage_filtered: + mem: 5G + bamCoverage_raw: + mem: 5G + bamCoverage_RPKM: + mem: 5G + bamCoverage_unique_mappings: + mem: 5G + bamPE_fragment_size: + mem: 10G + BAMsort_allelic: + mem: 3G + Bowtie2: + mem: 4G + bowtie2Index: + mem: 10G + bwa: + mem: 4G + bwaIndex: + mem: 8G + bwamem2: + mem: 6G + bwamem2Index: + mem: 90G + bwameth: + mem: 3G + bwamethIndex: + mem: 20G + bwameth2Index: + mem: 120G + build_matrix: + mem: 9G + call_tads: + mem: 3G + cellsort_bam: + mem: 10G + clumpify: + mem: 2G + CollectAlignmentSummaryMetrics: + mem: 2G + CollectInsertSizeMetrics: + mem: 1G + correct_matrix: + mem: 7G + CpG_report: + mem: 6G + CpG_stats: + mem: 30G + create_annotation_bed: + mem: 4G + createGenomeFasta: + mem: 20G + CSAW: + mem: 30G + create_snpgenome: + mem: 30G + DepthOfCov: + mem: 3G + DepthOfCovGenome: + mem: 3G + DESeq2: + mem: 5G + DESeq2_Salmon_basic: + mem: 3G + DESeq2_Salmon_allelic: + mem: 3G + diagnostic_plot: + mem: 2G + dmrseq: + mem: 10G + downloadGTF: + mem: 20G + DSS: + mem: 10G + fastaDict: + mem: 4G + FASTQdownsample: + mem: 4G + filterBW: + mem: 6G + filter_reads: + mem: 3G + filter_reads_umi: + mem: 10G + filterCoveragePerScaffolds: + mem: 6G + Genrich_peaks: + mem: 20G + HISAT2: + mem: 6G + hisat2Index: + mem: 2G + histoneHMM: + mem: 4G + MACS2: + mem: 8G + make2bit: + mem: 8G + map_fastq_single_end: + mem: 10G + markDupes: + mem: 3G + merge_bins: + mem: 7G + merge_matrices: + mem: 3G + metileneReport: + mem: 6G + namesort_bams: + mem: 6G + plot_heatmap_cov_CSAW: + mem: 5G + plotCorrelation_pearson: + mem: 3G + plotCorrelation_pearson_allelic: + mem: 5G + plotCorrelation_spearman: + mem: 3G + plotCorrelation_spearman_allelic: + mem: 2G + plotCoverage: + mem: 1G + plotEnrichment: + mem: 1G + plotFingerprint: + mem: 1G + plotPCA: + mem: 4G + plotPCA_allelic: + mem: 4G + plot_heatmap_CSAW_up: + mem: 10G + prepForMetilene: + mem: 3G + produceReport: + mem: 20G + run_eisaR: + mem: 30G + Salmon_counts: + mem: 5G + Salmon_index_joint_fa: + mem: 6G + SalmonAlevin: + mem: 10G + SalmonIndex: + mem: 3G + SalmonQuant: + mem: 2G + Salmon_TPM: + mem: 5G + sambamba_flagstat: + mem: 3G + sambamba_flagstat_sorted: + mem: 3G + sambamba_markdup: + mem: 4G + sambamaba_sort_hic_r1: + mem: 4G + sambamaba_sort_hic_r2: + mem: 4G + SEACR_peaks_lenient: + mem: 20G + SEACR_peaks_stringent: + mem: 10G + sleuth_Salmon: + mem: 4G + sleuth_SalmonAllelic: + mem: 10G + snp_split: + mem: 25G + STAR: + mem: 6G + STAR_allele: + mem: 30G + star_index: + mem: 15G + starIndex: + mem: 8G + STARsolo: + mem: 6G + STARsolo_raw_to_seurat: + mem: 10G + TEcounts: + mem: 16G + velo_to_sce: + mem: 30G + velocyto: + mem: 20G diff --git a/snakePipes/shared/rscripts/ATACseq_QC_report_template.Rmd b/snakePipes/shared/rscripts/ATACseq_QC_report_template.Rmd index 5ee508211..0a27914dc 100644 --- a/snakePipes/shared/rscripts/ATACseq_QC_report_template.Rmd +++ b/snakePipes/shared/rscripts/ATACseq_QC_report_template.Rmd @@ -1,5 +1,5 @@ --- -title: "ATAC-seq filtering report" +title: "ATACseq filtering report" author: "`r Sys.info()[length(Sys.info())-1]`" date: "`r format(Sys.time(), '%d %B, %Y')`" output: html_document diff --git a/snakePipes/shared/rscripts/CSAW.R b/snakePipes/shared/rscripts/CSAW.R index 5f25c8921..39e685a6a 100644 --- a/snakePipes/shared/rscripts/CSAW.R +++ b/snakePipes/shared/rscripts/CSAW.R @@ -22,15 +22,9 @@ external_bed<-as.logical(snakemake@params[["externalBed"]]) bam_pfx<-ifelse(useSpikeInForNorm,"_host",".filtered") bam_folder<-ifelse(useSpikeInForNorm,"split_bam","filtered_bam") -##set up a primitive log -logfile <- file(snakemake@log[["err"]], open="w+") -sink(logfile, type="message") - - ## create output directory # include functions -#sink("CSAW/CSAW.log", append=TRUE) source(paste(snakemake@config[["baseDir"]], snakemake@params[["importfunc"]], sep="/")) suppressPackageStartupMessages(library(GenomicRanges)) ## fix default FDR significance threshold @@ -106,7 +100,7 @@ if (! external_bed) { if(snakemake@params[['peakCaller']] == "MACS2") { allpeaks <- lapply(fnames, function(x) { narrow <- paste0("../MACS2/",x,bam_pfx,".BAM_peaks.narrowPeak") #bam_pfx - if(snakemake@params[["pipeline"]] %in% "ATAC-seq"){ + if(snakemake@params[["pipeline"]] %in% "ATACseq"){ narrow <- paste0("../MACS2/",x,".filtered.short.BAM_peaks.narrowPeak") } broad <- paste0("../MACS2/",x,bam_pfx,".BAM_peaks.broadPeak") #bam_pfx @@ -167,11 +161,8 @@ writeOutput_chip(chip_results, outfile_prefix = "DiffBinding", fdrcutoff = fdr,l ## save data message("Saving data") -#sink() save(chip_object, chip_results, file = "DiffBinding_analysis.Rdata") -sink(type="message") -close(logfile) #### SESSION INFO sink("CSAW.session_info.txt") diff --git a/snakePipes/shared/rscripts/CSAW_report.Rmd b/snakePipes/shared/rscripts/CSAW_report.Rmd index 4bcb2be36..39d2c105d 100644 --- a/snakePipes/shared/rscripts/CSAW_report.Rmd +++ b/snakePipes/shared/rscripts/CSAW_report.Rmd @@ -75,7 +75,7 @@ Heatmaps obtained using log2 ratio of ChIP signal over input were as follows: ```{r plot_l2r,fig.show='hold',fig.align='center', out.width ='40%',out.height='30%',fig.cap="UP and DOWN regions using log2 ratio."} if(sum(grepl("log2r.heatmap.png",snakemake@input[["heatmap_in"]]))<1){ - message("ChIP signal enrichment over input is only available for ChIP-seq workflow!") + message("ChIP signal enrichment over input is only available for ChIPseq workflow!") }else{ knitr::include_graphics(c("CSAW.UP.log2r.heatmap.png", "CSAW.DOWN.log2r.heatmap.png")) diff --git a/snakePipes/shared/rscripts/DB_functions.R b/snakePipes/shared/rscripts/DB_functions.R index 1ebfa8e96..7cf6926cd 100644 --- a/snakePipes/shared/rscripts/DB_functions.R +++ b/snakePipes/shared/rscripts/DB_functions.R @@ -1,7 +1,7 @@ #### ~~~~ Functions to Run CSAW as part of SNAKEMAKE pipeline ~~~~ #### ### (c) Vivek Bhardwaj (bhardwaj@ie-freiburg.mpg.de) -#' Read the Files and Count windows for ChIP-Seq Samples +#' Read the Files and Count windows for ChIPseq Samples #' #' @param sampleSheet tsvfile with sample information #' @param fragmentLength fragment length of sequencing diff --git a/snakePipes/shared/rscripts/WGBS_mergeStats.R b/snakePipes/shared/rscripts/WGBS_mergeStats.R index b921cb74f..fc6c3b302 100644 --- a/snakePipes/shared/rscripts/WGBS_mergeStats.R +++ b/snakePipes/shared/rscripts/WGBS_mergeStats.R @@ -1,7 +1,4 @@ .libPaths(R.home("library")) - -logfile = file(snakemake@log[["err"]], open="w") -sink(logfile) sampleSheet = snakemake@params[['sampleSheet']] groups = snakemake@params[['groups']] blacklist = snakemake@params[['blacklist']] @@ -36,7 +33,4 @@ m = 100 * getMeth(bs, type="raw") d = cbind(d, m) colnames(d)[3:ncol(d)] = sprintf("%s_%s", ss$condition, ss$name) -write.table(d, file=snakemake@output[["MetileneIN"]], sep="\t", row.names=FALSE, quote=FALSE) - -sink() -close(logfile) +write.table(d, file=snakemake@output[["MetileneIN"]], sep="\t", row.names=FALSE, quote=FALSE) \ No newline at end of file diff --git a/snakePipes/shared/rscripts/nearestGene.R b/snakePipes/shared/rscripts/nearestGene.R index 12fdac694..8484bc2d5 100644 --- a/snakePipes/shared/rscripts/nearestGene.R +++ b/snakePipes/shared/rscripts/nearestGene.R @@ -11,11 +11,6 @@ pipeline<-snakemake@params[["pipeline"]] output_bed<-snakemake@output[["annotated_bed"]] -##set up a primitive log -logfile <- file(snakemake@log[["err"]], open="wt") -sink(logfile, type="message") -#---------------------------- - flist<-list(input_bed,t2g,gene_symbol) size_v<-unlist(lapply(flist,function(X)file.info(X)$size)) @@ -29,7 +24,7 @@ if(any(is.na(size_v),sum(size_v==0)>0)){message('Some of the input files are non t2g_tab<-data.table::fread(t2g,header=FALSE) gs_tab<-data.table::fread(gene_symbol,header=FALSE) - if(pipeline %in% c("chip-seq","ATAC-seq")){ + if(pipeline %in% c("chipseq","ATACseq")){ ibed_tab$GeneID<-t2g_tab$V2[match(ibed_tab$V22,t2g_tab$V1)] ibed_tab$GeneSymbol<-gs_tab$V2[match(ibed_tab$GeneID,gs_tab$V1)] obed_tab<-unique(subset(ibed_tab,select=c(paste0("V",c(1:18,23,24)),"GeneID","GeneSymbol"))) @@ -40,10 +35,6 @@ if(any(is.na(size_v),sum(size_v==0)>0)){message('Some of the input files are non write.table(obed_tab,file=output_bed,row.names=FALSE,sep="\t",quote=FALSE) } -#---------------------------- -sink(type="message") -close(logfile) - sink(file.path(snakemake@params[["wdir"]],"nearestGene.session_info.txt")) sessionInfo() sink() diff --git a/snakePipes/shared/rscripts/scRNAseq_EmptyDrops.R b/snakePipes/shared/rscripts/scRNAseq_EmptyDrops.R index 3f3a2a0dc..e177e1e6c 100644 --- a/snakePipes/shared/rscripts/scRNAseq_EmptyDrops.R +++ b/snakePipes/shared/rscripts/scRNAseq_EmptyDrops.R @@ -1,4 +1,3 @@ -sink(snakemake@log[["out"]]) .libPaths(R.home("library")) #system(paste0('mkdir -p ',wdir)) #for debugging @@ -89,9 +88,6 @@ if(length(in_dirs)>1){ outfile<-file.path(wdir,basename(snakemake@output[["seurat"]])) saveRDS(s,file=outfile) -message('done all') -sink() - sink("sessionInfo.txt") sessionInfo() sink() diff --git a/snakePipes/shared/rscripts/scRNAseq_Seurat3.R b/snakePipes/shared/rscripts/scRNAseq_Seurat3.R index 9203a8f06..6e34e76bf 100644 --- a/snakePipes/shared/rscripts/scRNAseq_Seurat3.R +++ b/snakePipes/shared/rscripts/scRNAseq_Seurat3.R @@ -1,4 +1,3 @@ -sink(snakemake@log[["out"]]) .libPaths(R.home("library")) #system(paste0('mkdir -p ',wdir)) #for debugging @@ -28,7 +27,4 @@ print(datav) expression_matrix <- Read10X(data.dir = datav) s = CreateSeuratObject(counts = expression_matrix) outfile<-file.path(wdir,basename(snakemake@output[["seurat"]])) -saveRDS(s,file=outfile) - -message('done all') -sink() +saveRDS(s,file=outfile) \ No newline at end of file diff --git a/snakePipes/shared/rscripts/scRNAseq_eisaR.R b/snakePipes/shared/rscripts/scRNAseq_eisaR.R index 3b879bb6b..76f93a5b2 100644 --- a/snakePipes/shared/rscripts/scRNAseq_eisaR.R +++ b/snakePipes/shared/rscripts/scRNAseq_eisaR.R @@ -1,5 +1,4 @@ #this is a modification of https://github.com/csoneson/rna_velocity_quant/blob/master/scripts/generate_cdna_intron_fa_prepref.R , authored by C.Soneson -sink(snakemake@log[["out"]]) .libPaths(R.home("library")) wdir<-snakemake@params[["wdir"]] @@ -89,7 +88,6 @@ write.table(t2g, file = joint_t2g, message('done all') -sink() sink("sessionInfo.txt") sessionInfo() diff --git a/snakePipes/shared/rscripts/scRNAseq_merge_loom.R b/snakePipes/shared/rscripts/scRNAseq_merge_loom.R index ac3f5ec18..6e2b7b391 100644 --- a/snakePipes/shared/rscripts/scRNAseq_merge_loom.R +++ b/snakePipes/shared/rscripts/scRNAseq_merge_loom.R @@ -1,4 +1,3 @@ -sink(snakemake@log[["out"]]) .libPaths(R.home("library")) #system(paste0('mkdir -p ',wdir)) #for debugging @@ -26,7 +25,4 @@ l<-lapply(in_files,function(X)as.Seurat(ReadVelocity(X))) names(l)<-samples s<-MergeSeurat(x=l[[1]],y=unlist(l[[2:length(l)]]),add.cell.ids=names(l)) outfile<-file.path(wdir,basename(snakemake@output[["seurat"]])) -saveRDS(s,file=outfile) - -message('done all') -sink() +saveRDS(s,file=outfile) \ No newline at end of file diff --git a/snakePipes/shared/rscripts/scRNAseq_report.R b/snakePipes/shared/rscripts/scRNAseq_report.R index 68c818eac..05f0c2bf1 100644 --- a/snakePipes/shared/rscripts/scRNAseq_report.R +++ b/snakePipes/shared/rscripts/scRNAseq_report.R @@ -1,4 +1,3 @@ -sink(snakemake@log[["out"]]) .libPaths(R.home("library")) wdir<-snakemake@params[["wdir"]] @@ -21,9 +20,6 @@ rdf <- Reduce(function(x, y, ...) merge(x, y, all = TRUE, by="Metric", sort=FALS outf<-file.path(wdir,basename(snakemake@output[["report"]])) write.table(rdf,outf,row.names=FALSE,quote=FALSE,sep="\t") -message('done all') -sink() - sink("sessionInfo.txt") sessionInfo() sink() diff --git a/snakePipes/shared/rscripts/scRNAseq_splitAlevinVelocityMatrices.R b/snakePipes/shared/rscripts/scRNAseq_splitAlevinVelocityMatrices.R index ae801d1a4..126e7b61d 100644 --- a/snakePipes/shared/rscripts/scRNAseq_splitAlevinVelocityMatrices.R +++ b/snakePipes/shared/rscripts/scRNAseq_splitAlevinVelocityMatrices.R @@ -1,6 +1,4 @@ #this is a modified copy of https://github.com/csoneson/rna_velocity_quant/blob/master/scripts/sce_helpers.R and https://github.com/csoneson/rna_velocity_quant/blob/master/scripts/summarize_spliced_unspliced_dentate_gyrus.R - -sink(snakemake@log[["out"]]) .libPaths(R.home("library")) wdir<-snakemake@params[["wdir"]] @@ -96,9 +94,6 @@ sce<- do.call(cbind, lapply(samplenames, function(s) { saveRDS(sce,outfile) -message('done all') -sink() - sink("sessionInfo.txt") sessionInfo() sink() diff --git a/snakePipes/shared/rules/ATAC.snakefile b/snakePipes/shared/rules/ATAC.snakefile index e72d2f714..76adf42ad 100755 --- a/snakePipes/shared/rules/ATAC.snakefile +++ b/snakePipes/shared/rules/ATAC.snakefile @@ -4,7 +4,6 @@ # output: # shortBAM = temp(os.path.join(short_bams, "{sample}.short.bam")), # metrics = os.path.join(short_bams, "{sample}.short.metrics") -# log: os.path.join(short_bams, "logs/{sample}.filterFragments.log") # params: # maxFragmentSize=maxFragmentSize, # minFragmentSize=minFragmentSize @@ -16,7 +15,6 @@ # --filterMetrics {output.metrics} \ # --maxFragmentLength {params.maxFragmentSize} \ # --minFragmentLength {params.minFragmentSize} \ -# 2> {log} # """ @@ -25,9 +23,6 @@ # expand(os.path.join(short_bams, "{sample}.short.metrics"), sample=samples) # output: # QCrep='Filtering_metrics/Filtering_report.html' -# log: -# err="Filtering_metrics/logs/produce_report.err", -# out="Filtering_metrics/logs/produce_report.out" # conda: CONDA_RMD_ENV # threads: 1 # script: "../rscripts/ATACseq_QC_report_template.Rmd" @@ -37,7 +32,6 @@ rule filterFragments: "filtered_bam/{sample}.filtered.bam" output: shortBAM = temp(os.path.join(short_bams, "{sample}.short.bam")) - log: os.path.join(short_bams, "logs/{sample}.filterFragments.log") params: maxFragmentSize=maxFragmentSize, minFragmentSize=minFragmentSize @@ -45,7 +39,6 @@ rule filterFragments: conda: CONDA_SAMBAMBA_ENV shell: """ sambamba view -f bam -F "template_length >= {params.minFragmentSize} and template_length <= {params.maxFragmentSize} or template_length >= -{params.maxFragmentSize} and template_length <= {params.minFragmentSize}" -t {threads} -o {output.shortBAM} {input} - 2> {log} """ @@ -59,17 +52,15 @@ rule filterCoveragePerScaffolds: shortbai = temp(os.path.join(short_bams, "{sample}.short.bam.bai")), bam = os.path.join(short_bams, "{sample}.short.cleaned.bam"), bai = os.path.join(short_bams, "{sample}.short.cleaned.bam.bai") - log: os.path.join(short_bams, "logs/{sample}.filterCoveragePerScaffolds.log") params: count_cutoff = int(fragmentCountThreshold) * 2 # must contain more than 2 reads, i.e. 1 fragment threads: 6 conda: CONDA_SHARED_ENV shell: """ - samtools index -@ {threads} {input.bam} 2> {log} - samtools idxstats {input.bam} | awk -v cutoff={params.count_cutoff} \'$3 > cutoff\' | cut -f 1 > {output.whitelist} 2>> {log} - samtools view -@ {threads} -bo {output.bam} {input.bam} $(cat {output.whitelist} | paste -sd\' \') 2>> {log} - samtools index -@ {threads} {output.bam} 2>> {log} - + samtools index -@ {threads} {input.bam} + samtools idxstats {input.bam} | awk -v cutoff={params.count_cutoff} \'$3 > cutoff\' | cut -f 1 > {output.whitelist} + samtools view -@ {threads} -bo {output.bam} {input.bam} $(cat {output.whitelist} | paste -sd\' \') + samtools index -@ {threads} {output.bam} """ @@ -89,9 +80,6 @@ rule callOpenChromatin: write_bdg='--bdg', fileformat='--format BAMPE' threads: 6 - log: - out = os.path.join(outdir_MACS2, "logs", "callOpenChromatin", "{sample}_macs2.out"), - err = os.path.join(outdir_MACS2, "logs", "callOpenChromatin", "{sample}_macs2.err") conda: CONDA_ATAC_ENV shell: """ macs2 callpeak --treatment {input} \ @@ -101,16 +89,15 @@ rule callOpenChromatin: {params.fileformat} \ --qvalue {params.qval_cutoff} \ {params.nomodel} \ - {params.write_bdg} > {log.out} 2> {log.err} + {params.write_bdg} """ rule tempChromSizes: input: genome_index output: temp("HMMRATAC/chrom_sizes") - log: "HMMRATAC/logs/tempChromSizes.log" shell: """ - cut -f 1,2 {input} > {output} 2> {log} + cut -f 1,2 {input} > {output} """ @@ -128,13 +115,12 @@ rule HMMRATAC_peaks: "HMMRATAC/{sample}_peaks.gappedPeak", "HMMRATAC/{sample}_summits.bed", "HMMRATAC/{sample}_training.bed" - log: "HMMRATAC/logs/{sample}.HMMRATAC_peaks.log" params: blacklist = "-e {}".format(blacklist_bed) if blacklist_bed else "" conda: CONDA_ATAC_ENV threads: 4 shell: """ - HMMRATAC -Xmx10G -b {input[0]} -i {input[1]} -g {input[2]} {params.blacklist} -o HMMRATAC/{wildcards.sample} 2> {log} + HMMRATAC -Xmx10G -b {input[0]} -i {input[1]} -g {input[2]} {params.blacklist} -o HMMRATAC/{wildcards.sample} """ #Genrich requires namesorted bams @@ -143,8 +129,6 @@ rule namesort_bams: bam = short_bams + "{sample}.short.cleaned.bam" output: bam = temp(short_bams + "{sample}.short.namesorted.bam") - log: - short_bams + "logs/{sample}.namesort.err" params: tempDir = tempDir threads: 4 @@ -152,7 +136,7 @@ rule namesort_bams: shell: """ TMPDIR={params.tempDir} MYTEMP=$(mktemp -d ${{TMPDIR:-/tmp}}/snakepipes.XXXXXXXXXX) - sambamba sort -t {threads} -o {output.bam} --tmpdir=$MYTEMP -n {input.bam} 2> {log} + sambamba sort -t {threads} -o {output.bam} --tmpdir=$MYTEMP -n {input.bam} rm -rf $MYTEMP """ @@ -165,13 +149,12 @@ if not isMultipleComparison: bams=lambda wildcards: expand(short_bams + "{sample}.short.namesorted.bam", sample=genrichDict[wildcards.group]) output: "Genrich/{group}.narrowPeak" - log: "Genrich/logs/{group}.Genrich_peaks.log" params: bams = lambda wildcards: ",".join(expand(short_bams + "{sample}.short.namesorted.bam", sample=genrichDict[wildcards.group])), blacklist = "-E {}".format(blacklist_bed) if blacklist_bed else "" conda: CONDA_ATAC_ENV shell: """ - Genrich -t {params.bams} -o {output} -r {params.blacklist} -j -y 2> {log} + Genrich -t {params.bams} -o {output} -r {params.blacklist} -j -y """ else: @@ -180,11 +163,10 @@ else: bams=lambda wildcards: expand(short_bams + "{sample}.short.namesorted.bam", sample=genrichDict[wildcards.compGroup][wildcards.group]), output: "Genrich/{group}.{compGroup}.narrowPeak" - log: "Genrich/logs/{group}.{compGroup}.log" params: bams = lambda wildcards: ",".join(expand(os.path.join(short_bams, "{sample}.short.namesorted.bam"), sample=genrichDict[wildcards.compGroup][wildcards.group])), blacklist = "-E {}".format(blacklist_bed) if blacklist_bed else "", conda: CONDA_ATAC_ENV shell: """ - Genrich -t {params.bams} -o {output} -r {params.blacklist} -j -y 2> {log} + Genrich -t {params.bams} -o {output} -r {params.blacklist} -j -y """ diff --git a/snakePipes/shared/rules/ATAC_qc.snakefile b/snakePipes/shared/rules/ATAC_qc.snakefile index 97913dc89..30cc9439b 100755 --- a/snakePipes/shared/rules/ATAC_qc.snakefile +++ b/snakePipes/shared/rules/ATAC_qc.snakefile @@ -1,5 +1,3 @@ - - rule plotFingerprint: input: bams = expand("filtered_bam/{sample}.filtered.bam", sample = samples), @@ -15,9 +13,6 @@ rule plotFingerprint: else "", jsd = "--JSDsample filtered_bam/{}.filtered.bam".format(samples[0]) if (len(samples)>0) else "" - log: - out = os.path.join(deeptools_ATAC, "logs/plotFingerprint.out"), - err = os.path.join(deeptools_ATAC, "logs/plotFingerprint.err") benchmark: os.path.join(deeptools_ATAC, ".benchmark/plotFingerprint.benchmark") threads: lambda wildcards: 24 if 24 {output.qc} 2> {log} + printf "peak_count\tFRiP\tpeak_genome_coverage\n%d\t%5.3f\t%6.4f\n" $peak_count $frip $genomecov > {output.qc} """ diff --git a/snakePipes/shared/rules/Bowtie2.snakefile b/snakePipes/shared/rules/Bowtie2.snakefile index 3b2695e2e..e037a2f66 100755 --- a/snakePipes/shared/rules/Bowtie2.snakefile +++ b/snakePipes/shared/rules/Bowtie2.snakefile @@ -7,7 +7,6 @@ if pairedEnd: output: align_summary = "Bowtie2/{sample}.Bowtie2_summary.txt", bam = temp("Bowtie2/{sample}.sorted.bam")# removing since we keep the sambamba output (dupmarked) - log: "Bowtie2/logs/{sample}.sort.log" params: bowtie2_index=bowtie2_index, alignerOpts = str(alignerOpts or ' ') if not cutntag else " --local --very-sensitive-local "\ @@ -31,7 +30,7 @@ if pairedEnd: -p {threads} \ 2> {output.align_summary} | \ samtools view -Sb - | \ - samtools sort -m 2G -T $MYTEMP/{wildcards.sample} -@ 2 -O bam - > {output.bam} 2> {log}; + samtools sort -m 2G -T $MYTEMP/{wildcards.sample} -@ 2 -O bam - > {output.bam}; rm -rf $MYTEMP """ else: @@ -41,7 +40,6 @@ else: output: align_summary = "Bowtie2/{sample}.Bowtie2_summary.txt", bam = temp("Bowtie2/{sample}.sorted.bam") - log: "Bowtie2/logs/{sample}.sort.log" params: bowtie2_index=bowtie2_index, alignerOpts = str(alignerOpts or ''), @@ -61,6 +59,6 @@ else: -p {threads} \ 2> {output.align_summary} | \ samtools view -Sbu - | \ - samtools sort -m 2G -T $MYTEMP/{wildcards.sample} -@ 2 -O bam - > {output.bam} 2> {log}; + samtools sort -m 2G -T $MYTEMP/{wildcards.sample} -@ 2 -O bam - > {output.bam}; rm -rf $MYTEMP """ diff --git a/snakePipes/shared/rules/Bowtie2_allelic.snakefile b/snakePipes/shared/rules/Bowtie2_allelic.snakefile index 97756d09b..38e0582d9 100755 --- a/snakePipes/shared/rules/Bowtie2_allelic.snakefile +++ b/snakePipes/shared/rules/Bowtie2_allelic.snakefile @@ -18,7 +18,6 @@ if aligner == "Bowtie2": output: align_summary = aligner+"/{sample}.Bowtie2_summary.txt", bam = temp(aligner+"/{sample}.sorted.bam") - log: "Bowtie2/logs/{sample}.sort.log" params: alignerOpts = str(alignerOpts or ''), mateOrientation = mateOrientation, @@ -41,7 +40,7 @@ if aligner == "Bowtie2": -p {threads} \ 2> {output.align_summary} | \ samtools view -Sb - | \ - samtools sort -m 2G -T $MYTEMP/{wildcards.sample} -@ 2 -O bam - > {output.bam} 2> {log}; + samtools sort -m 2G -T $MYTEMP/{wildcards.sample} -@ 2 -O bam - > {output.bam}; rm -rf $MYTEMP """ else: @@ -52,7 +51,6 @@ if aligner == "Bowtie2": output: align_summary = aligner+"/{sample}.Bowtie2_summary.txt", bam = temp(aligner+"/{sample}.sorted.bam") - log: "Bowtie2/logs/{sample}.sort.log" params: alignerOpts = str(alignerOpts or ''), idxbase = getbw_idxbase(bowtie2_index_allelic), @@ -73,7 +71,7 @@ if aligner == "Bowtie2": -p {threads} \ 2> {output.align_summary} | \ samtools view -Sbu - | \ - samtools sort -m 2G -T $MYTEMP/{wildcards.sample} -@ 2 -O bam - > {output.bam} 2> {log}; + samtools sort -m 2G -T $MYTEMP/{wildcards.sample} -@ 2 -O bam - > {output.bam}; rm -rf $MYTEMP """ else: diff --git a/snakePipes/shared/rules/CSAW.multiComp.snakefile b/snakePipes/shared/rules/CSAW.multiComp.snakefile index 3546150b8..1d9945a9a 100644 --- a/snakePipes/shared/rules/CSAW.multiComp.snakefile +++ b/snakePipes/shared/rules/CSAW.multiComp.snakefile @@ -9,18 +9,18 @@ def get_outdir(peak_caller,sampleSheet): def getInputPeaks(peakCaller, chip_samples, genrichDict,comp_group): if peakCaller == "MACS2": - if pipeline in 'ATAC-seq': + if pipeline in 'ATACseq': return expand("MACS2/{chip_sample}.filtered.short.BAM_peaks.xls", chip_sample = chip_samples) - elif pipeline == "chip-seq" and useSpikeInForNorm: + elif pipeline == "chipseq" and useSpikeInForNorm: return expand("MACS2/{chip_sample}_host.BAM_peaks.xls", chip_sample = chip_samples) else: return expand("MACS2/{chip_sample}.filtered.BAM_peaks.xls", chip_sample = chip_samples) elif peakCaller == "HMMRATAC": return expand("HMMRATAC/{chip_sample}_peaks.gappedPeak", chip_sample = chip_samples) elif peakCaller == "SEACR": - if pipeline == "chip-seq" and useSpikeInForNorm: + if pipeline == "chipseq" and useSpikeInForNorm: return expand("SEACR/{chip_sample}_host.stringent.bed",chip_sample=chip_samples) - elif pipeline == "chip-seq" and not useSpikeInForNorm: + elif pipeline == "chipseq" and not useSpikeInForNorm: return expand("SEACR/{chip_sample}.filtered.stringent.bed",chip_sample=chip_samples) elif peakCaller == "Genrich": return expand("Genrich/{genrichGroup}.{{compGroup}}.narrowPeak", genrichGroup = genrichDict[comp_group].keys()) @@ -58,9 +58,9 @@ def getBamCoverage(comp_group): return [] def getHeatmapInput(): - if pipeline in 'ATAC-seq': + if pipeline in 'ATACseq': return(expand("CSAW_{}_{}".format(peakCaller, sample_name + ".{{compGroup}}") + "/CSAW.{change_dir}.cov.heatmap.png", change_dir=['UP','DOWN'])) - elif pipeline in 'chip-seq': + elif pipeline in 'chipseq': if chip_samples_w_ctrl: return(expand("CSAW_{}_{}".format(peakCaller, sample_name + ".{{compGroup}}") + "/CSAW.{change_dir}.cov.heatmap.png", change_dir=['UP','DOWN']) + expand("CSAW_{}_{}".format(peakCaller, sample_name + ".{{compGroup}}") + "/CSAW.{change_dir}.log2r.heatmap.png", change_dir=['UP', 'DOWN'])) else: @@ -101,15 +101,12 @@ rule CSAW: windowSize = windowSize, importfunc = os.path.join("shared", "rscripts", "DB_functions.R"), allele_info = allele_info, - yaml_path=lambda wildcards: samples_config if pipeline in 'chip-seq' else "", + yaml_path=lambda wildcards: samples_config if pipeline in 'chipseq' else "", insert_size_metrics = lambda wildcards,input: os.path.join(outdir, input.insert_size_metrics) if pairedEnd else [], pipeline = pipeline, useSpikeInForNorm = useSpikeInForNorm, scale_factors = lambda wildcards, input: os.path.join(outdir, input.scale_factors) if input.scale_factors else "", externalBed = True if externalBed else False - log: - out = os.path.join(outdir, "{}/logs/CSAW.out".format(get_outdir(peakCaller,os.path.splitext(os.path.basename(str(sampleSheet)))[0]+".{compGroup}.tsv"))), - err = os.path.join(outdir, "{}/logs/CSAW.err".format(get_outdir(peakCaller,os.path.splitext(os.path.basename(str(sampleSheet)))[0]+".{compGroup}.tsv"))) conda: CONDA_ATAC_ENV script: "../rscripts/CSAW.R" @@ -123,16 +120,11 @@ rule calc_matrix_log2r_CSAW: matrix = touch("{}".format(get_outdir(peakCaller,os.path.splitext(os.path.basename(str(sampleSheet)))[0]+".{compGroup}.tsv"))+"/CSAW.{change_dir}.log2r.matrix") params: bed_in = "{}".format(get_outdir(peakCaller,os.path.splitext(os.path.basename(str(sampleSheet)))[0]+".{compGroup}.tsv"))+"/Filtered.results.{change_dir}.bed" - log: - out = os.path.join(outdir, "{}".format(get_outdir(peakCaller,os.path.splitext(os.path.basename(str(sampleSheet)))[0]+".{compGroup}.tsv")) + "/logs/deeptools_matrix.log2r.{change_dir}.out"), - err = os.path.join(outdir, "{}".format(get_outdir(peakCaller,os.path.splitext(os.path.basename(str(sampleSheet)))[0]+".{compGroup}.tsv")) + "/logs/deeptools_matrix.log2r.{change_dir}.err") threads: 8 conda: CONDA_SHARED_ENV shell: """ - touch {log.out} - touch {log.err} if [[ -s {params.bed_in} ]]; then - computeMatrix scale-regions -S {input.bigwigs} -R {params.bed_in} -m 1000 -b 200 -a 200 -o {output.matrix} -p {threads} > {log.out} 2> {log.err} + computeMatrix scale-regions -S {input.bigwigs} -R {params.bed_in} -m 1000 -b 200 -a 200 -o {output.matrix} -p {threads} fi """ @@ -145,13 +137,8 @@ rule plot_heatmap_log2r_CSAW: sorted_regions = touch("{}".format(get_outdir(peakCaller,os.path.splitext(os.path.basename(str(sampleSheet)))[0]+".{compGroup}.tsv")) + "/CSAW.{change_dir}.log2r.sortedRegions.bed") params: smpl_label = lambda wildcards: ' '.join(reordered_dict[wildcards.compGroup].keys()) - log: - out = os.path.join(outdir, "{}".format(get_outdir(peakCaller,os.path.splitext(os.path.basename(str(sampleSheet)))[0]+".{compGroup}.tsv")) + "/logs/deeptools_heatmap.log2r.{change_dir}.out"), - err = os.path.join(outdir, "{}".format(get_outdir(peakCaller,os.path.splitext(os.path.basename(str(sampleSheet)))[0]+".{compGroup}.tsv")) + "/logs/deeptools_heatmap.log2r.{change_dir}.err") conda: CONDA_SHARED_ENV shell: """ - touch {log.out} - touch {log.err} if [[ -s {input.matrix} ]]; then plotHeatmap --matrixFile {input.matrix} \ --outFileSortedRegions {output.sorted_regions} \ @@ -159,7 +146,7 @@ rule plot_heatmap_log2r_CSAW: --startLabel Start --endLabel End \ --legendLocation lower-center \ -x 'Scaled peak length' --labelRotation 90 \ - --samplesLabel {params.smpl_label} --colorMap "coolwarm" > {log.out} 2> {log.err} + --samplesLabel {params.smpl_label} --colorMap "coolwarm" fi """ @@ -173,17 +160,12 @@ rule calc_matrix_cov_CSAW: matrix = touch("{}".format(get_outdir(peakCaller,os.path.splitext(os.path.basename(str(sampleSheet)))[0]+".{compGroup}.tsv")) + "/CSAW.{change_dir}.cov.matrix") params: bed_in = "{}".format(get_outdir(peakCaller,os.path.splitext(os.path.basename(str(sampleSheet)))[0]+".{compGroup}.tsv")) + "/Filtered.results.{change_dir}.bed" - log: - out = os.path.join(outdir, "{}".format(get_outdir(peakCaller,os.path.splitext(os.path.basename(str(sampleSheet)))[0]+".{compGroup}.tsv")) + "/logs/deeptools_matrix.cov.{change_dir}.out"), - err = os.path.join(outdir, "{}".format(get_outdir(peakCaller,os.path.splitext(os.path.basename(str(sampleSheet)))[0]+".{compGroup}.tsv")) + "/logs/deeptools_matrix.cov.{change_dir}.err") threads: 8 conda: CONDA_SHARED_ENV shell: """ - touch {log.out} - touch {log.err} if [[ -s {params.bed_in} ]]; then computeMatrix scale-regions -S {input.bigwigs} -R {params.bed_in} \ - -m 1000 -b 200 -a 200 -o {output.matrix} -p {threads} > {log.out} 2> {log.err} + -m 1000 -b 200 -a 200 -o {output.matrix} -p {threads} fi """ @@ -196,20 +178,15 @@ rule plot_heatmap_cov_CSAW: sorted_regions = touch("{}".format(get_outdir(peakCaller,os.path.splitext(os.path.basename(str(sampleSheet)))[0]+".{compGroup}.tsv")) + "/CSAW.{change_dir}.cov.sortedRegions.bed") params: smpl_label = lambda wildcards: ' '.join(reordered_dict[wildcards.compGroup].keys()) - log: - out = os.path.join(outdir,"{}".format(get_outdir(peakCaller,os.path.splitext(os.path.basename(str(sampleSheet)))[0]+".{compGroup}.tsv")) + "/logs/deeptools_heatmap.cov.{change_dir}.out"), - err = os.path.join(outdir,"{}".format(get_outdir(peakCaller,os.path.splitext(os.path.basename(str(sampleSheet)))[0]+".{compGroup}.tsv")) + "/logs/deeptools_heatmap.cov.{change_dir}.err") conda: CONDA_SHARED_ENV shell: """ - touch {log.out} - touch {log.err} if [[ -s {input.matrix} ]]; then plotHeatmap --matrixFile {input.matrix} \ --outFileSortedRegions {output.sorted_regions} \ --outFileName {output.image} --startLabel Start \ --endLabel End --legendLocation lower-center \ -x 'Scaled peak length' --labelRotation 90 \ - --samplesLabel {params.smpl_label} --colorMap "coolwarm" >{log.out} 2>{log.err} + --samplesLabel {params.smpl_label} --colorMap "coolwarm" fi """ @@ -226,8 +203,5 @@ rule CSAW_report: outdir=os.path.join(outdir, "{}".format(get_outdir(peakCaller,os.path.splitext(os.path.basename(str(sampleSheet)))[0]+".{compGroup}.tsv"))), sampleSheet=sampleSheet, useSpikeInForNorm = useSpikeInForNorm - log: - out = os.path.join(outdir, "{}/logs/report.out".format(get_outdir(peakCaller,os.path.splitext(os.path.basename(str(sampleSheet)))[0]+".{compGroup}.tsv"))), - err = os.path.join(outdir, "{}/logs/report.err".format(get_outdir(peakCaller,os.path.splitext(os.path.basename(str(sampleSheet)))[0]+".{compGroup}.tsv"))) conda: CONDA_ATAC_ENV script: "../rscripts/CSAW_report.Rmd" diff --git a/snakePipes/shared/rules/CSAW.singleComp.snakefile b/snakePipes/shared/rules/CSAW.singleComp.snakefile index 013732eee..52f38656c 100644 --- a/snakePipes/shared/rules/CSAW.singleComp.snakefile +++ b/snakePipes/shared/rules/CSAW.singleComp.snakefile @@ -3,18 +3,18 @@ change_direction = ["UP", "DOWN", "MIXED"] def getInputPeaks(peakCaller, chip_samples, genrichDict): if peakCaller == "MACS2": - if pipeline in 'ATAC-seq': + if pipeline in 'ATACseq': return expand("MACS2/{chip_sample}.filtered.short.BAM_peaks.xls", chip_sample = chip_samples) - elif pipeline == "chip-seq" and useSpikeInForNorm: + elif pipeline == "chipseq" and useSpikeInForNorm: return expand("MACS2/{chip_sample}_host.BAM_peaks.xls", chip_sample = chip_samples) else: return expand("MACS2/{chip_sample}.filtered.BAM_peaks.xls", chip_sample = chip_samples) elif peakCaller == "HMMRATAC": return expand("HMMRATAC/{chip_sample}_peaks.gappedPeak", chip_sample = chip_samples) elif peakCaller == "SEACR": - if pipeline == "chip-seq" and useSpikeInForNorm: + if pipeline == "chipseq" and useSpikeInForNorm: return expand("SEACR/{chip_sample}_host.stringent.bed",chip_sample=chip_samples) - elif pipeline == "chip-seq" and not useSpikeInForNorm: + elif pipeline == "chipseq" and not useSpikeInForNorm: return expand("SEACR/{chip_sample}.filtered.stringent.bed",chip_sample=chip_samples) elif peakCaller == "Genrich": return expand("Genrich/{genrichGroup}.narrowPeak", genrichGroup = genrichDict.keys()) @@ -52,9 +52,9 @@ def getBamCoverage(): return [] def getHeatmapInput(): - if pipeline in 'ATAC-seq': + if pipeline in 'ATACseq': return(expand("CSAW_{}_{}".format(peakCaller, sample_name) + "/CSAW.{change_dir}.cov.heatmap.png", change_dir=['UP','DOWN'])) - elif pipeline in 'chip-seq': + elif pipeline in 'chipseq': if chip_samples_w_ctrl: return(expand("CSAW_{}_{}".format(peakCaller, sample_name) + "/CSAW.{change_dir}.cov.heatmap.png", change_dir=['UP','DOWN']) + expand("CSAW_{}_{}".format(peakCaller, sample_name) + "/CSAW.{change_dir}.log2r.heatmap.png", change_dir=['UP', 'DOWN'])) else: @@ -85,15 +85,12 @@ rule CSAW: windowSize = windowSize, importfunc = os.path.join("shared", "rscripts", "DB_functions.R"), allele_info = allele_info, - yaml_path=lambda wildcards: samples_config if pipeline in 'chip-seq' else "", + yaml_path=lambda wildcards: samples_config if pipeline in 'chipseq' else "", insert_size_metrics = lambda wildcards,input: os.path.join(outdir, input.insert_size_metrics) if pairedEnd else [], pipeline = pipeline, useSpikeInForNorm = useSpikeInForNorm, scale_factors = lambda wildcards, input: os.path.join(outdir, input.scale_factors) if input.scale_factors else "", externalBed = True if externalBed else False - log: - out = os.path.join(outdir, "CSAW_{}_{}/logs/CSAW.out".format(peakCaller, sample_name)), - err = os.path.join(outdir, "CSAW_{}_{}/logs/CSAW.err".format(peakCaller, sample_name)) conda: CONDA_ATAC_ENV script: "../rscripts/CSAW.R" @@ -108,16 +105,11 @@ if chip_samples_w_ctrl: matrix = touch("CSAW_{}_{}".format(peakCaller, sample_name)+"/CSAW.{change_dir}.log2r.matrix") params: bed_in = "CSAW_{}_{}".format(peakCaller, sample_name)+"/Filtered.results.{change_dir}.bed" - log: - out = os.path.join(outdir, "CSAW_{}_{}".format(peakCaller, sample_name) + "/logs/deeptools_matrix.log2r.{change_dir}.out"), - err = os.path.join(outdir, "CSAW_{}_{}".format(peakCaller, sample_name) + "/logs/deeptools_matrix.log2r.{change_dir}.err") threads: 8 conda: CONDA_SHARED_ENV shell: """ - touch {log.out} - touch {log.err} if [[ -s {params.bed_in} ]]; then - computeMatrix scale-regions -S {input.bigwigs} -R {params.bed_in} -m 1000 -b 200 -a 200 -o {output.matrix} -p {threads} > {log.out} 2> {log.err} + computeMatrix scale-regions -S {input.bigwigs} -R {params.bed_in} -m 1000 -b 200 -a 200 -o {output.matrix} -p {threads} fi """ @@ -130,13 +122,8 @@ if chip_samples_w_ctrl: sorted_regions = touch("CSAW_{}_{}".format(peakCaller, sample_name) + "/CSAW.{change_dir}.log2r.sortedRegions.bed") params: smpl_label=' '.join(reordered_dict.keys()) - log: - out = os.path.join(outdir, "CSAW_{}_{}".format(peakCaller, sample_name) + "/logs/deeptools_heatmap.log2r.{change_dir}.out"), - err = os.path.join(outdir, "CSAW_{}_{}".format(peakCaller, sample_name) + "/logs/deeptools_heatmap.log2r.{change_dir}.err") conda: CONDA_SHARED_ENV shell: """ - touch {log.out} - touch {log.err} if [[ -s {input.matrix} ]]; then plotHeatmap --matrixFile {input.matrix} \ --outFileSortedRegions {output.sorted_regions} \ @@ -144,7 +131,7 @@ if chip_samples_w_ctrl: --startLabel Start --endLabel End \ --legendLocation lower-center \ -x 'Scaled peak length' --labelRotation 90 \ - --samplesLabel {params.smpl_label} --colorMap "coolwarm" > {log.out} 2> {log.err} + --samplesLabel {params.smpl_label} --colorMap "coolwarm" fi """ @@ -158,17 +145,12 @@ rule calc_matrix_cov_CSAW: matrix = touch("CSAW_{}_{}".format(peakCaller, sample_name) + "/CSAW.{change_dir}.cov.matrix") params: bed_in = "CSAW_{}_{}".format(peakCaller, sample_name) + "/Filtered.results.{change_dir}.bed" - log: - out = os.path.join(outdir, "CSAW_{}_{}".format(peakCaller, sample_name) + "/logs/deeptools_matrix.cov.{change_dir}.out"), - err = os.path.join(outdir, "CSAW_{}_{}".format(peakCaller, sample_name) + "/logs/deeptools_matrix.cov.{change_dir}.err") threads: 8 conda: CONDA_SHARED_ENV shell: """ - touch {log.out} - touch {log.err} if [[ -s {params.bed_in} ]]; then computeMatrix scale-regions -S {input.bigwigs} -R {params.bed_in} \ - -m 1000 -b 200 -a 200 -o {output.matrix} -p {threads} > {log.out} 2> {log.err} + -m 1000 -b 200 -a 200 -o {output.matrix} -p {threads} fi """ @@ -181,20 +163,15 @@ rule plot_heatmap_cov_CSAW: sorted_regions = touch("CSAW_{}_{}".format(peakCaller, sample_name) + "/CSAW.{change_dir}.cov.sortedRegions.bed") params: smpl_label=' '.join(reordered_dict.keys()) - log: - out = os.path.join(outdir,"CSAW_{}_{}".format(peakCaller, sample_name) + "/logs/deeptools_heatmap.cov.{change_dir}.out"), - err = os.path.join(outdir,"CSAW_{}_{}".format(peakCaller, sample_name) + "/logs/deeptools_heatmap.cov.{change_dir}.err") conda: CONDA_SHARED_ENV shell: """ - touch {log.out} - touch {log.err} if [[ -s {input.matrix} ]]; then plotHeatmap --matrixFile {input.matrix} \ --outFileSortedRegions {output.sorted_regions} \ --outFileName {output.image} --startLabel Start \ --endLabel End --legendLocation lower-center \ -x 'Scaled peak length' --labelRotation 90 \ - --samplesLabel {params.smpl_label} --colorMap "coolwarm" >{log.out} 2>{log.err} + --samplesLabel {params.smpl_label} --colorMap "coolwarm" fi """ @@ -211,8 +188,5 @@ rule CSAW_report: outdir=os.path.join(outdir, "CSAW_{}_{}".format(peakCaller, sample_name)), sampleSheet=sampleSheet, useSpikeInForNorm = useSpikeInForNorm - log: - out = os.path.join(outdir, "CSAW_{}_{}/logs/report.out".format(peakCaller, sample_name)), - err = os.path.join(outdir, "CSAW_{}_{}/logs/report.err".format(peakCaller, sample_name)) conda: CONDA_ATAC_ENV script: "../rscripts/CSAW_report.Rmd" diff --git a/snakePipes/shared/rules/ChIP_peak_calling.snakefile b/snakePipes/shared/rules/ChIP_peak_calling.snakefile index 6d81f1bbb..2c1200e7f 100755 --- a/snakePipes/shared/rules/ChIP_peak_calling.snakefile +++ b/snakePipes/shared/rules/ChIP_peak_calling.snakefile @@ -34,9 +34,6 @@ if pairedEnd: peakCaller_options = lambda wildcards: str(peakCallerOptions or '') if not cutntag else " -p 1e-5 ", bampe_options = lambda wildcards: str(BAMPEPeaks or '')if not cutntag else " ", bam_options = lambda wildcards: str(BAMPeaks or '') if not cutntag else " " - log: - out = "MACS2/logs/MACS2.{chip_sample}.filtered.out", - err = "MACS2/logs/MACS2.{chip_sample}.filtered.err" benchmark: "MACS2/.benchmark/MACS2.{chip_sample}.filtered.benchmark" conda: CONDA_CHIPSEQ_ENV @@ -50,7 +47,7 @@ if pairedEnd: --outdir MACS2 \ --name {wildcards.chip_sample}.filtered.BAM \ {params.peakCaller_options} \ - {params.broad_calling} > {log.out} 2> {log.err} + {params.broad_calling} # also run MACS2 in paired-end mode BAMPE for comparison with single-end mode macs2 callpeak -t {input.chip} \ @@ -59,7 +56,7 @@ if pairedEnd: {params.peakCaller_options} \ -g {params.genome_size} --keep-dup all \ --outdir MACS2 --name {wildcards.chip_sample}.filtered.BAMPE \ - {params.broad_calling} > {log.out}.BAMPE 2> {log.err}.BAMPE + {params.broad_calling} """ else: rule MACS2: @@ -81,9 +78,6 @@ else: frag_size=fragmentLength, peakCaller_options = str(peakCallerOptions or ''), bam_options = str(BAMPeaks or '') - log: - out = "MACS2/logs/MACS2.{chip_sample}.filtered.out", - err = "MACS2/logs/MACS2.{chip_sample}.filtered.err" benchmark: "MACS2/.benchmark/MACS2.{chip_sample}.filtered.benchmark" conda: CONDA_CHIPSEQ_ENV @@ -91,7 +85,7 @@ else: macs2 callpeak -t {input.chip} {params.control_param} -f BAM -g {params.genome_size} \ {params.peakCaller_options} --keep-dup all --outdir MACS2 \ --name {wildcards.chip_sample}.filtered.BAM {params.bam_options} --extsize {params.frag_size} \ - {params.broad_calling} > {log.out} 2> {log.err} + {params.broad_calling} """ @@ -142,8 +136,6 @@ rule namesort_bams: bam = "filtered_bam/{sample}.filtered.bam" output: bam = temp("filtered_bam/{sample}.namesorted.bam") - log: - "filtered_bam/logs/{sample}.namesort.err" params: tempDir = tempDir threads: 4 @@ -151,7 +143,7 @@ rule namesort_bams: shell: """ TMPDIR={params.tempDir} MYTEMP=$(mktemp -d ${{TMPDIR:-/tmp}}/snakepipes.XXXXXXXXXX) - sambamba sort -t {threads} -o {output.bam} --tmpdir=$MYTEMP -n {input.bam} 2> {log} + sambamba sort -t {threads} -o {output.bam} --tmpdir=$MYTEMP -n {input.bam} rm -rf $MYTEMP """ @@ -165,7 +157,6 @@ if not isMultipleComparison: control = lambda wildcards: ["filtered_bam/"+get_control(x)+".namesorted.bam" for x in genrichDict[wildcards.group]] if chip_samples_w_ctrl else [] output: "Genrich/{group}.narrowPeak" - log: "Genrich/logs/{group}.log" params: bams = lambda wildcards: ",".join(expand(os.path.join("filtered_bam", "{sample}.namesorted.bam"), sample=genrichDict[wildcards.group])), blacklist = "-E {}".format(blacklist_bed) if blacklist_bed else "", @@ -174,7 +165,7 @@ if not isMultipleComparison: ignoreForNorm = "-e " + ','.join(ignoreForNormalization) if ignoreForNormalization else "" conda: CONDA_CHIPSEQ_ENV shell: """ - Genrich -t {params.bams} {params.control_pfx} {params.control} -o {output} -r {params.blacklist} {params.ignoreForNorm} -y 2> {log} + Genrich -t {params.bams} {params.control_pfx} {params.control} -o {output} -r {params.blacklist} {params.ignoreForNorm} -y """ else: rule Genrich_peaks: @@ -183,7 +174,6 @@ if not isMultipleComparison: control = lambda wildcards: ["filtered_bam/"+get_control(x)+".namesorted.bam" for x in genrichDict[wildcards.group] ] if chip_samples_w_ctrl else [] output: "Genrich/{group}.narrowPeak" - log: "Genrich/logs/{group}.log" params: bams = lambda wildcards: ",".join(expand(os.path.join("filtered_bam", "{sample}.namesorted.bam"), sample=genrichDict[wildcards.group])), blacklist = "-E {}".format(blacklist_bed) if blacklist_bed else "", @@ -193,7 +183,7 @@ if not isMultipleComparison: ignoreForNorm = "-e " + ','.join(ignoreForNormalization) if ignoreForNormalization else "" conda: CONDA_CHIPSEQ_ENV shell: """ - Genrich -t {params.bams} {params.control_pfx} {params.control} -o {output} -r {params.blacklist} {params.ignoreForNorm} -w {params.frag_size} 2> {log} + Genrich -t {params.bams} {params.control_pfx} {params.control} -o {output} -r {params.blacklist} {params.ignoreForNorm} -w {params.frag_size} """ else: if pairedEnd: @@ -203,7 +193,6 @@ else: control = lambda wildcards: ["filtered_bam/"+get_control(x)+".namesorted.bam" for x in genrichDict[wildcards.compGroup][wildcards.group]] if chip_samples_w_ctrl else [] output: "Genrich/{group}.{compGroup}.narrowPeak" - log: "Genrich/logs/{group}.{compGroup}.log" params: bams = lambda wildcards: ",".join(expand(os.path.join("filtered_bam", "{sample}.namesorted.bam"), sample=genrichDict[wildcards.compGroup][wildcards.group])), blacklist = "-E {}".format(blacklist_bed) if blacklist_bed else "", @@ -212,7 +201,7 @@ else: ignoreForNorm = "-e " + ','.join(ignoreForNormalization) if ignoreForNormalization else "" conda: CONDA_CHIPSEQ_ENV shell: """ - Genrich -t {params.bams} {params.control_pfx} {params.control} -o {output} -r {params.blacklist} {params.ignoreForNorm} -y 2> {log} + Genrich -t {params.bams} {params.control_pfx} {params.control} -o {output} -r {params.blacklist} {params.ignoreForNorm} -y """ else: rule Genrich_peaks: @@ -221,7 +210,6 @@ else: control = lambda wildcards: ["filtered_bam/"+get_control(x)+".namesorted.bam" for x in genrichDict[wildcards.compGroup][wildcards.group] ] if chip_samples_w_ctrl else [] output: "Genrich/{group}.{compGroup}.narrowPeak" - log: "Genrich/logs/{group}.{compGroup}.log" params: bams = lambda wildcards: ",".join(expand(os.path.join("filtered_bam", "{sample}.namesorted.bam"), sample=genrichDict[wildcards.compGroup][wildcards.group])), blacklist = "-E {}".format(blacklist_bed) if blacklist_bed else "", @@ -231,14 +219,13 @@ else: ignoreForNorm = "-e " + ','.join(ignoreForNormalization) if ignoreForNormalization else "" conda: CONDA_CHIPSEQ_ENV shell: """ - Genrich -t {params.bams} {params.control_pfx} {params.control} -o {output} -r {params.blacklist} {params.ignoreForNorm} -w {params.frag_size} 2> {log} + Genrich -t {params.bams} {params.control_pfx} {params.control} -o {output} -r {params.blacklist} {params.ignoreForNorm} -w {params.frag_size} """ rule prep_bedgraph: input: "filtered_bam/{sample}.namesorted.bam" output: temp("filtered_bedgraph/{sample}.fragments.bedgraph") - log: "filtered_bedgraph/log/{sample}.log" params: sample = lambda wildcards: wildcards.sample, genome = genome_index @@ -255,14 +242,13 @@ rule SEACR_peaks_stringent: else [] output: "SEACR/{chip_sample}.filtered.stringent.bed" - log: "SEACR/logs/{chip_sample}_stringent.log" params: fdr = lambda wildcards,input: fdr if not input.control else "", prefix = os.path.join(outdir,"SEACR/{chip_sample}.filtered"), script=os.path.join(maindir, "shared","tools/SEACR-1.3/SEACR_1.3.sh") conda: CONDA_SEACR_ENV shell: """ - bash {params.script} {input.chip} {input.control} {params.fdr} "norm" "stringent" {params.prefix} 2>{log} + bash {params.script} {input.chip} {input.control} {params.fdr} "norm" "stringent" {params.prefix} """ rule SEACR_peaks_relaxed: @@ -272,14 +258,13 @@ rule SEACR_peaks_relaxed: else [] output: "SEACR/{chip_sample}.filtered.relaxed.bed" - log: "SEACR/logs/{chip_sample}_relaxed.log" params: fdr = lambda wildcards,input: fdr if not input.control else "", prefix = os.path.join(outdir,"SEACR/{chip_sample}.filtered"), script=os.path.join(maindir, "shared","tools/SEACR-1.3/SEACR_1.3.sh") conda: CONDA_SEACR_ENV shell: """ - bash {params.script} {input.chip} {input.control} {params.fdr} "norm" "relaxed" {params.prefix} 2>{log} + bash {params.script} {input.chip} {input.control} {params.fdr} "norm" "relaxed" {params.prefix} """ rule SEACR_peak_stringent_qc: diff --git a/snakePipes/shared/rules/ChIP_peak_calling_spikein.snakefile b/snakePipes/shared/rules/ChIP_peak_calling_spikein.snakefile index dc805a7eb..402454009 100755 --- a/snakePipes/shared/rules/ChIP_peak_calling_spikein.snakefile +++ b/snakePipes/shared/rules/ChIP_peak_calling_spikein.snakefile @@ -36,9 +36,6 @@ if pairedEnd: peakCaller_options = lambda wildcards: str(peakCallerOptions or '') if not cutntag else " -p 1e-5 ", bampe_options = lambda wildcards: str(BAMPEPeaks or '')if not cutntag else " ", bam_options = lambda wildcards: str(BAMPeaks or '') if not cutntag else " " - log: - out = "MACS2/logs/MACS2.{chip_sample}_host.filtered.out", - err = "MACS2/logs/MACS2.{chip_sample}_host.filtered.err" benchmark: "MACS2/.benchmark/MACS2.{chip_sample}_host.filtered.benchmark" conda: CONDA_CHIPSEQ_ENV @@ -52,7 +49,7 @@ if pairedEnd: --outdir MACS2 \ --name {wildcards.chip_sample}_host.BAM \ {params.peakCaller_options} \ - {params.broad_calling} > {log.out} 2> {log.err} + {params.broad_calling} # also run MACS2 in paired-end mode BAMPE for comparison with single-end mode macs2 callpeak -t {input.chip} \ @@ -61,7 +58,7 @@ if pairedEnd: {params.peakCaller_options} \ -g {params.genome_size} --keep-dup all \ --outdir MACS2 --name {wildcards.chip_sample}_host.BAMPE \ - {params.broad_calling} > {log.out}.BAMPE 2> {log.err}.BAMPE + {params.broad_calling} """ else: rule MACS2: @@ -83,9 +80,6 @@ else: frag_size=fragmentLength, peakCaller_options = str(peakCallerOptions or ''), bam_options = str(BAMPeaks or '') - log: - out = "MACS2/logs/MACS2.{chip_sample}_host.filtered.out", - err = "MACS2/logs/MACS2.{chip_sample}_host.filtered.err" benchmark: "MACS2/.benchmark/MACS2.{chip_sample}_host.filtered.benchmark" conda: CONDA_CHIPSEQ_ENV @@ -93,7 +87,7 @@ else: macs2 callpeak -t {input.chip} {params.control_param} -f BAM -g {params.genome_size} \ {params.peakCaller_options} --keep-dup all --outdir MACS2 \ --name {wildcards.chip_sample}_host.BAM {params.bam_options} --extsize {params.frag_size} \ - {params.broad_calling} > {log.out} 2> {log.err} + {params.broad_calling} """ @@ -145,8 +139,6 @@ rule namesort_bams: bam = "split_bam/{sample}_host.bam" output: bam = temp("namesorted_bam/{sample}_host_namesorted.bam") - log: - "namesorted_bam/logs/{sample}_host_namesort.err" params: tempDir = tempDir threads: 4 @@ -154,7 +146,7 @@ rule namesort_bams: shell: """ TMPDIR={params.tempDir} MYTEMP=$(mktemp -d ${{TMPDIR:-/tmp}}/snakepipes.XXXXXXXXXX) - sambamba sort -t {threads} -o {output.bam} --tmpdir=$MYTEMP -n {input.bam} 2> {log} + sambamba sort -t {threads} -o {output.bam} --tmpdir=$MYTEMP -n {input.bam} rm -rf $MYTEMP """ @@ -169,7 +161,6 @@ if not isMultipleComparison: control = lambda wildcards: ["namesorted_bam/"+get_control(x)+"_host_namesorted.bam" for x in genrichDict[wildcards.group]] if chip_samples_w_ctrl else [] output: "Genrich/{group}.narrowPeak" - log: "Genrich/logs/{group}.log" params: bams = lambda wildcards: ",".join(expand(os.path.join("namesorted_bam", "{sample}_host_namesorted.bam"), sample=genrichDict[wildcards.group])), blacklist = "-E {}".format(blacklist_bed) if blacklist_bed else "", @@ -178,7 +169,7 @@ if not isMultipleComparison: ignoreForNorm = '-e ' + ','.join(ignoreForNormalization) if ignoreForNormalization else "" conda: CONDA_CHIPSEQ_ENV shell: """ - Genrich -t {params.bams} {params.control_pfx} {params.control} -o {output} -r {params.blacklist} {params.ignoreForNorm} -y 2> {log} + Genrich -t {params.bams} {params.control_pfx} {params.control} -o {output} -r {params.blacklist} {params.ignoreForNorm} -y """ else: rule Genrich_peaks: @@ -187,7 +178,6 @@ if not isMultipleComparison: control = lambda wildcards: ["namesorted_bam/"+get_control(x)+"_host_namesorted.bam" for x in genrichDict[wildcards.group]] if chip_samples_w_ctrl else [] output: "Genrich/{group}.narrowPeak" - log: "Genrich/logs/{group}.log" params: bams = lambda wildcards: ",".join(expand(os.path.join("namesorted_bam", "{sample}_host_namesorted.bam"), sample=genrichDict[wildcards.group])), blacklist = "-E {}".format(blacklist_bed) if blacklist_bed else "", @@ -197,7 +187,7 @@ if not isMultipleComparison: ignoreForNorm = "-e " + ','.join(ignoreForNormalization) if ignoreForNormalization else "" conda: CONDA_CHIPSEQ_ENV shell: """ - Genrich -t {params.bams} {params.control_pfx} {params.control} -o {output} -r {params.blacklist} -e {params.ignoreForNorm} -w {params.frag_size} 2> {log} + Genrich -t {params.bams} {params.control_pfx} {params.control} -o {output} -r {params.blacklist} -e {params.ignoreForNorm} -w {params.frag_size} """ else: if pairedEnd: @@ -207,7 +197,6 @@ else: control = lambda wildcards: ["namesorted_bam/"+get_control(x)+"_host_namesorted.bam" for x in genrichDict[wildcards.compGroup][wildcards.group]] if chip_samples_w_ctrl else [] output: "Genrich/{group}.{compGroup}.narrowPeak" - log: "Genrich/logs/{group}.{compGroup}.log" params: bams = lambda wildcards: ",".join(expand(os.path.join("namesorted_bam", "{sample}_host_namesorted.bam"), sample=genrichDict[wildcards.compGroup][wildcards.group])), blacklist = "-E {}".format(blacklist_bed) if blacklist_bed else "", @@ -216,7 +205,7 @@ else: ignoreForNorm = "-e " + ','.join(ignoreForNormalization) if ignoreForNormalization else "" conda: CONDA_CHIPSEQ_ENV shell: """ - Genrich -t {params.bams} {params.control_pfx} {params.control} -o {output} -r {params.blacklist} {params.ignoreForNorm} -y 2> {log} + Genrich -t {params.bams} {params.control_pfx} {params.control} -o {output} -r {params.blacklist} {params.ignoreForNorm} -y """ else: rule Genrich_peaks: @@ -225,7 +214,6 @@ else: control = lambda wildcards: ["namesorted_bam/"+get_control(x)+"_host_namesorted.bam" for x in genrichDict[wildcards.compGroup][wildcards.group] ] if chip_samples_w_ctrl else [] output: "Genrich/{group}.{compGroup}.narrowPeak" - log: "Genrich/logs/{group}.{compGroup}.log" params: bams = lambda wildcards: ",".join(expand(os.path.join("namesorted_bam", "{sample}_host_namesorted.bam"), sample=genrichDict[wildcards.compGroup][wildcards.group])), blacklist = "-E {}".format(blacklist_bed) if blacklist_bed else "", @@ -235,14 +223,13 @@ else: ignoreForNorm = "-e " + ','.join(ignoreForNormalization) if ignoreForNormalization else "" conda: CONDA_CHIPSEQ_ENV shell: """ - Genrich -t {params.bams} {params.control_pfx} {params.control} -o {output} -r {params.blacklist} -e {params.ignoreForNorm} -w {params.frag_size} 2> {log} + Genrich -t {params.bams} {params.control_pfx} {params.control} -o {output} -r {params.blacklist} -e {params.ignoreForNorm} -w {params.frag_size} """ rule prep_bedgraph: input: "bamCoverage/{sample}.host_scaled.BYhost.bw" output: temp("filtered_bedgraph/{sample}_host.fragments.bedgraph") - log: "filtered_bedgraph/log/{sample}.log" conda: CONDA_SEACR_ENV shell: """ bigWigToBedGraph {input} {output} @@ -255,14 +242,13 @@ rule SEACR_peaks_stringent: else [] output: "SEACR/{chip_sample}_host.stringent.bed" - log: "SEACR/logs/{chip_sample}_stringent.log" params: fdr = lambda wildcards,input: fdr if not input.control else "", prefix = os.path.join(outdir,"SEACR/{chip_sample}_host"), script=os.path.join(maindir, "shared","tools/SEACR-1.3/SEACR_1.3.sh") conda: CONDA_SEACR_ENV shell: """ - bash {params.script} {input.chip} {input.control} {params.fdr} "non" "stringent" {params.prefix} 2>{log} + bash {params.script} {input.chip} {input.control} {params.fdr} "non" "stringent" {params.prefix} """ rule SEACR_peaks_relaxed: @@ -272,14 +258,13 @@ rule SEACR_peaks_relaxed: else [] output: "SEACR/{chip_sample}_host.relaxed.bed" - log: "SEACR/logs/{chip_sample}_relaxed.log" params: fdr = lambda wildcards,input: fdr if not input.control else "", prefix = os.path.join(outdir,"SEACR/{chip_sample}_host"), script=os.path.join(maindir, "shared","tools/SEACR-1.3/SEACR_1.3.sh") conda: CONDA_SEACR_ENV shell: """ - bash {params.script} {input.chip} {input.control} {params.fdr} "non" "relaxed" {params.prefix} 2>{log} + bash {params.script} {input.chip} {input.control} {params.fdr} "non" "relaxed" {params.prefix} """ diff --git a/snakePipes/shared/rules/ChiP-seq_qc_report.snakefile b/snakePipes/shared/rules/ChiP-seq_qc_report.snakefile index 6834c8280..6d9936d44 100644 --- a/snakePipes/shared/rules/ChiP-seq_qc_report.snakefile +++ b/snakePipes/shared/rules/ChiP-seq_qc_report.snakefile @@ -4,9 +4,8 @@ rule convert_flagstat_output: "Sambamba/{sample}.markdup.txt" output: temp("Sambamba/{sample}.dup.converted.tsv") - log: "Sambamba/logs/{sample}.convert_flagstat_output.log" shell: """ - sed -n '1p;4p;5p' {input} | cut -d' ' -f1 | tr '\n' '\t' | sed 's/^/{wildcards.sample}\t/' | sed -e '$a\\' > {output} 2> {log} + sed -n '1p;4p;5p' {input} | cut -d' ' -f1 | tr '\n' '\t' | sed 's/^/{wildcards.sample}\t/' | sed -e '$a\\' > {output} """ #######merge converted sambamba reports###### @@ -15,11 +14,9 @@ rule report_flagstat_all_data: expand("Sambamba/{sample}.dup.converted.tsv",sample=all_samples) output: "Sambamba/flagstat_report_all.tsv" - log: "Sambamba/logs/report_flagstat_all_data.log" shell: """ echo -e 'sample\ttotal\tdup\tmapped' > {output} sort -k1,1V {input} >> {output} - 2> {log} """ ##########QC report for all the samples######### @@ -29,7 +26,6 @@ rule qc_report_all: metrics = "deepTools_ChIP/plotFingerprint/plotFingerprint.metrics.txt" if not useSpikeInForNorm else "split_deepTools_ChIP/plotFingerprint/plotFingerprint.metrics.txt" output: "QC_report/QC_report_all.tsv" - log: "QC_report/logs/qc_report_all.log" shell: """ - awk 'NR == 1; NR > 1 {{print $0 | \"sort -k1,1V\"}}' {input.metrics} | cut -f4,8,10,12 | paste {input.flagstat} - > {output} 2> {log} + awk 'NR == 1; NR > 1 {{print $0 | \"sort -k1,1V\"}}' {input.metrics} | cut -f4,8,10,12 | paste {input.flagstat} - > {output} """ diff --git a/snakePipes/shared/rules/DESeq2.multipleComp.snakefile b/snakePipes/shared/rules/DESeq2.multipleComp.snakefile index 74edc2311..82573ee38 100644 --- a/snakePipes/shared/rules/DESeq2.multipleComp.snakefile +++ b/snakePipes/shared/rules/DESeq2.multipleComp.snakefile @@ -38,9 +38,6 @@ rule DESeq2: formula = config["formula"], counts_table = lambda wildcards,input: os.path.join(outdir,input.counts_table), symbol_file = lambda wildcards,input: os.path.join(outdir,input.symbol_file) - log: - out = "{}/logs/DESeq2.out".format(get_outdir("DESeq2",os.path.splitext(os.path.basename(str(sampleSheet)))[0]+".{compGroup}.tsv")), - err = "{}/logs/DESeq2.err".format(get_outdir("DESeq2",os.path.splitext(os.path.basename(str(sampleSheet)))[0]+".{compGroup}.tsv")) conda: CONDA_RNASEQ_ENV script: "{params.script}" @@ -53,9 +50,6 @@ rule DESeq2_Salmon_basic: symbol_file = "Annotation/genes.filtered.symbol" #get_symbol_file output: "{}/DESeq2.session_info.txt".format(get_outdir("DESeq2_Salmon",os.path.splitext(os.path.basename(str(sampleSheet)))[0]+".{compGroup}.tsv")) - log: - out = "{}/logs/DESeq2.out".format(get_outdir("DESeq2_Salmon",os.path.splitext(os.path.basename(str(sampleSheet)))[0]+".{compGroup}.tsv")), - err = "{}/logs/DESeq2.err".format(get_outdir("DESeq2_Salmon",os.path.splitext(os.path.basename(str(sampleSheet)))[0]+".{compGroup}.tsv")) benchmark: "{}/.benchmark/DESeq2.Salmon.benchmark".format(get_outdir("DESeq2_Salmon",os.path.splitext(os.path.basename(str(sampleSheet)))[0]+".{compGroup}.tsv")) params: diff --git a/snakePipes/shared/rules/DESeq2.singleComp.snakefile b/snakePipes/shared/rules/DESeq2.singleComp.snakefile index 2e506fd5b..93090fccb 100644 --- a/snakePipes/shared/rules/DESeq2.singleComp.snakefile +++ b/snakePipes/shared/rules/DESeq2.singleComp.snakefile @@ -26,9 +26,6 @@ rule DESeq2: formula = config["formula"], counts_table = lambda wildcards,input: os.path.join(outdir,input.counts_table), symbol_file = lambda wildcards,input: os.path.join(outdir,input.symbol_file) - log: - out = "{}/logs/DESeq2.out".format(get_outdir("DESeq2",sampleSheet)), - err = "{}/logs/DESeq2.err".format(get_outdir("DESeq2",sampleSheet)) conda: CONDA_RNASEQ_ENV script: "{params.script}" @@ -41,9 +38,6 @@ rule DESeq2_Salmon_basic: symbol_file = "Annotation/genes.filtered.symbol" #get_symbol_file output: "{}/DESeq2.session_info.txt".format(get_outdir("DESeq2_Salmon",sampleSheet)) - log: - out = "{}/logs/DESeq2.out".format(get_outdir("DESeq2_Salmon",sampleSheet)), - err = "{}/logs/DESeq2.err".format(get_outdir("DESeq2_Salmon",sampleSheet)) benchmark: "{}/.benchmark/DESeq2.Salmon.benchmark".format(get_outdir("DESeq2_Salmon",sampleSheet)) params: @@ -69,9 +63,6 @@ rule DESeq2_Salmon_allelic: symbol_file = "Annotation/genes.filtered.symbol" #get_symbol_file output: "{}/DESeq2.session_info.txt".format(get_outdir("DESeq2_SalmonAllelic",sampleSheet)) - log: - out = "{}/logs/DESeq2.out".format(get_outdir("DESeq2_SalmonAllelic",sampleSheet)), - err = "{}/logs/DESeq2.err".format(get_outdir("DESeq2_SalmonAllelic",sampleSheet)) benchmark: "{}/.benchmark/DESeq2.SalmonAllelic.benchmark".format(get_outdir("DESeq2_SalmonAllelic",sampleSheet)) params: diff --git a/snakePipes/shared/rules/FASTQ.snakefile b/snakePipes/shared/rules/FASTQ.snakefile index 57d6bbfb7..b40904845 100755 --- a/snakePipes/shared/rules/FASTQ.snakefile +++ b/snakePipes/shared/rules/FASTQ.snakefile @@ -1,4 +1,4 @@ -if pairedEnd or pipeline=="scrna-seq": +if pairedEnd or pipeline=="scrnaseq": rule validateFQ: input: r1 = indir+"/{sample}"+reads[0]+ext, @@ -34,7 +34,7 @@ rule origFASTQ1: {params.cmd} """ -if pairedEnd or pipeline=="scrna-seq": +if pairedEnd or pipeline=="scrnaseq": rule origFASTQ2: input: r2 = indir+"/{sample}"+reads[1]+ext, @@ -57,7 +57,6 @@ if downsample: output: r1 = "originalFASTQ/downsample_{sample}"+reads[0]+".fastq.gz", r2 = "originalFASTQ/downsample_{sample}"+reads[1]+".fastq.gz" - log: "originalFASTQ/logs/{sample}.FASTQdownsample.log" params: num_reads = downsample benchmark: @@ -65,9 +64,8 @@ if downsample: threads: lambda wildcards: 10 if 10 {output.r1} 2> {log} - seqtk sample -s 100 {input.r2} {params.num_reads} | pigz -p {threads} -9 > {output.r2} 2>> {log} - + seqtk sample -s 100 {input.r1} {params.num_reads} | pigz -p {threads} -9 > {output.r1} + seqtk sample -s 100 {input.r2} {params.num_reads} | pigz -p {threads} -9 > {output.r2} """ else: rule FASTQdownsample: @@ -75,11 +73,10 @@ if downsample: "originalFASTQ/{sample}.fastq.gz" output: fq = "originalFASTQ/downsample_{sample}.fastq.gz" - log: "originalFASTQ/logs/{sample}.FASTQdownsample.log" threads: lambda wildcards: 12 if 12 {output} 2> {log} + seqtk sample -s 100 {input} {params.num_reads} | pigz -p {threads} -9 > {output} """ diff --git a/snakePipes/shared/rules/FastQC.snakefile b/snakePipes/shared/rules/FastQC.snakefile index 2d763916d..f14d82e09 100644 --- a/snakePipes/shared/rules/FastQC.snakefile +++ b/snakePipes/shared/rules/FastQC.snakefile @@ -1,17 +1,14 @@ -if pipeline == "scrna-seq" and mode == "STARsolo" or pipeline=="scrna-seq" and mode == "Alevin": +if pipeline == "scrnaseq" and mode == "STARsolo" or pipeline=="scrnaseq" and mode == "Alevin": rule FastQC: input: "originalFASTQ/{sample}{read}.fastq.gz" output: "FastQC/{sample}{read}_fastqc.html" - log: - out = "FastQC/logs/FastQC.{sample}{read}.out", - err = "FastQC/logs/FastQC.{sample}{read}.err" benchmark: "FastQC/.benchmark/FastQC.{sample}{read}.benchmark" threads: 2 conda: CONDA_SHARED_ENV - shell: "fastqc -o FastQC {input} > {log.out} 2> {log.err}" + shell: "fastqc -o FastQC {input}" else: if pairedEnd: @@ -20,14 +17,11 @@ else: "EXTERNAL_BAM/{sample}."+bamExt if fromBAM else "FASTQ/{sample}{read}.fastq.gz" output: "FastQC/{sample}_fastqc.html" if fromBAM else "FastQC/{sample}{read}_fastqc.html" - log: - out = "FastQC/logs/FastQC.{sample}{read}.out", - err = "FastQC/logs/FastQC.{sample}{read}.err" benchmark: "FastQC/.benchmark/FastQC.{sample}{read}.benchmark" threads: 2 conda: CONDA_SHARED_ENV - shell: "fastqc -o FastQC {input} > {log.out} 2> {log.err}" + shell: "fastqc -o FastQC {input}" else: rule FastQC_singleEnd: @@ -37,13 +31,10 @@ else: "FastQC/{sample}_fastqc.html" if fromBAM else "FastQC/{sample}"+reads[0]+"_fastqc.html" params: reads=reads[0] - log: - out = "FastQC/logs/FastQC.{sample}"+reads[0]+".out", - err = "FastQC/logs/FastQC.{sample}"+reads[0]+".err" benchmark: "FastQC/.benchmark/FastQC.{sample}"+reads[0]+".benchmark" threads: 2 conda: CONDA_SHARED_ENV shell: """ - fastqc -o FastQC {input} > {log.out} 2> {log.err} + fastqc -o FastQC {input} """ diff --git a/snakePipes/shared/rules/GenomicContamination.snakefile b/snakePipes/shared/rules/GenomicContamination.snakefile index 98d2a4f1d..78e49c110 100644 --- a/snakePipes/shared/rules/GenomicContamination.snakefile +++ b/snakePipes/shared/rules/GenomicContamination.snakefile @@ -8,13 +8,10 @@ rule GContamination_featureCounts: output: txt = temp("GenomicContamination/{sample}.featurecounts.txt"), summary = "GenomicContamination/{sample}.featurecounts.txt.summary" - log: - out = "GenomicContamination/{sample}.featurecounts.out", - err = "GenomicContamination/{sample}.featurecounts.err" threads: 8 conda: CONDA_RNASEQ_ENV shell: - "featureCounts -T {threads} -a {input.gtf} -t transcript -o {output.txt} {input.bams} > {log.out} 2> {log.err}" + "featureCounts -T {threads} -a {input.gtf} -t transcript -o {output.txt} {input.bams}" rule GContamination_featurecount_report: input: @@ -23,9 +20,8 @@ rule GContamination_featurecount_report: finaloutput = "GenomicContamination/{sample}.Gcontamination_report.tsv", temp = temp("GenomicContamination/{sample}.temp"), temp1 = temp("GenomicContamination/{sample}.temp1") - log: "GenomicContamination/logs/{sample}.report.log" shell: - "SUM=$(cut -f2 {input} | tr '\n' '\t'| cut -f2,4,5 | awk '{{num = $1 + $2 + $3}} END {{print num}}');NUM=$(cut -f2 {input} | tr '\n' '\t'| cut -f5 | awk '{{num = $1}} END {{print num}}'); bc -l <<< $NUM/$SUM > {output.temp}; cut -f2 {input} | tr '\n' '\t'|cut -f2 | sed 's/^/{wildcards.sample}\t/' > {output.temp1}; paste -d'\t' {output.temp1} {output.temp} > {output.finaloutput} 2> {log}" + "SUM=$(cut -f2 {input} | tr '\n' '\t'| cut -f2,4,5 | awk '{{num = $1 + $2 + $3}} END {{print num}}');NUM=$(cut -f2 {input} | tr '\n' '\t'| cut -f5 | awk '{{num = $1}} END {{print num}}'); bc -l <<< $NUM/$SUM > {output.temp}; cut -f2 {input} | tr '\n' '\t'|cut -f2 | sed 's/^/{wildcards.sample}\t/' > {output.temp1}; paste -d'\t' {output.temp1} {output.temp} > {output.finaloutput}" ## rule GContamination_featurecount_all_report: diff --git a/snakePipes/shared/rules/LinkBam.snakefile b/snakePipes/shared/rules/LinkBam.snakefile index ea498b0f4..fa3041efe 100644 --- a/snakePipes/shared/rules/LinkBam.snakefile +++ b/snakePipes/shared/rules/LinkBam.snakefile @@ -1,6 +1,6 @@ import os -if pipeline=="rna-seq" and "allelic-counting" in mode: +if pipeline=="rnaseq" and "allelic-counting" in mode: rule link_bam: input: indir + "/{sample}.{suffix}" + bamExt @@ -29,17 +29,17 @@ else: input: indir + "/{sample}" + bamExt output: - aligner + "/{sample}.unsorted.bam" if pipeline=="noncoding-rna-seq" else aligner + "/{sample}.bam" + aligner + "/{sample}.unsorted.bam" if pipeline=="ncRNAseq" else aligner + "/{sample}.bam" params: input_bai = indir + "/{sample}" + bamExt + ".bai", - output_bai = aligner + "/{sample}.unsorted.bam.bai" if pipeline=="noncoding-rna-seq" else aligner + "/{sample}.bam.bai" + output_bai = aligner + "/{sample}.unsorted.bam.bai" if pipeline=="ncRNAseq" else aligner + "/{sample}.bam.bai" run: if os.path.exists(params.input_bai) and not os.path.exists(os.path.join(outdir,params.output_bai)): os.symlink(params.input_bai,os.path.join(outdir,params.output_bai)) if not os.path.exists(os.path.join(outdir,output[0])): os.symlink(os.path.join(outdir,input[0]),os.path.join(outdir,output[0])) - if not pipeline=="noncoding-rna-seq": + if not pipeline=="ncRNAseq": rule samtools_index_external: input: aligner + "/{sample}.bam" @@ -67,8 +67,7 @@ else: aligner + "/{sample}.bam" output: "Sambamba/{sample}.markdup.txt" - log: "Sambamba/logs/{sample}.flagstat.log" conda: CONDA_SAMBAMBA_ENV shell: """ - sambamba flagstat -p {input} > {output} 2> {log} + sambamba flagstat -p {input} > {output} """ diff --git a/snakePipes/shared/rules/Qualimap_bamqc.snakefile b/snakePipes/shared/rules/Qualimap_bamqc.snakefile index f3d26f2e4..79f18b046 100755 --- a/snakePipes/shared/rules/Qualimap_bamqc.snakefile +++ b/snakePipes/shared/rules/Qualimap_bamqc.snakefile @@ -9,9 +9,6 @@ rule Qualimap_bamqc: params: outdir = "Qualimap_qc/{sample}.filtered", collect_overlap_pairs = "--collect-overlap-pairs" if pairedEnd else "" - log: - out = "Qualimap_qc/logs/Qualimap_bamqc.{sample}.filtered.out", - err = "Qualimap_qc/logs/Qualimap_bamqc.{sample}.filtered.err" benchmark: "Qualimap_qc/.benchmark/Qualimap_bamqc.{sample}.filtered.benchmark" threads: lambda wildcards: 16 if 16 {log.out} 2> {log.err}" rule Qualimap_bamqc_symlink_txt: diff --git a/snakePipes/shared/rules/RNA-seq_qc_report.snakefile b/snakePipes/shared/rules/RNA-seq_qc_report.snakefile index 3730f454b..855b1c5af 100644 --- a/snakePipes/shared/rules/RNA-seq_qc_report.snakefile +++ b/snakePipes/shared/rules/RNA-seq_qc_report.snakefile @@ -4,9 +4,8 @@ rule convert_flagstat_output: "Sambamba/{sample}.markdup.txt" output: temp("Sambamba/{sample}.dup.converted.tsv") - log: "Sambamba/logs/{sample}.convert_flagstat_output.log" shell: - "sed -n '1p;4p;5p' {input} | cut -d' ' -f1 | tr '\n' '\t' | sed 's/^/{wildcards.sample}\t/' | sed -e '$a\\' > {output} 2> {log}" + "sed -n '1p;4p;5p' {input} | cut -d' ' -f1 | tr '\n' '\t' | sed 's/^/{wildcards.sample}\t/' | sed -e '$a\\' > {output}" #######merge converted sambamba reports###### rule report_flagstat_all_data: @@ -14,9 +13,8 @@ rule report_flagstat_all_data: expand("Sambamba/{sample}.dup.converted.tsv",sample=samples) output: "Sambamba/flagstat_report_all.tsv" - log: "Sambamba/logs/report_flagstat_all_data.log" shell: - "sort -k1,1V {input} | cat <( echo -e 'sample\ttotal\tdup\tmapped') - > {output} 2> {log}" + "sort -k1,1V {input} | cat <( echo -e 'sample\ttotal\tdup\tmapped') - > {output}" ##########QC report for all the samples######### if dnaContam: @@ -26,15 +24,13 @@ if dnaContam: IHECmetrics = "GenomicContamination/genomic_contamination_featurecount_report.tsv" output: "QC_report/QC_report_all.tsv" - log: "QC_report/logs/qc_report_all.log" shell: - "cut -f2,3 {input.IHECmetrics} | paste {input.flagstat} - > {output} 2> {log}" + "cut -f2,3 {input.IHECmetrics} | paste {input.flagstat} - > {output}" else: rule qc_report_all: input: flagstat = "Sambamba/flagstat_report_all.tsv" output: "QC_report/QC_report_all.tsv" - log: "QC_report/logs/qc_report_all.log" shell: - "cp {input.flagstat} {output} 2> {log}" + "cp {input.flagstat} {output}" diff --git a/snakePipes/shared/rules/RNA_mapping.snakefile b/snakePipes/shared/rules/RNA_mapping.snakefile index 50d98ff1b..d02d4ecf2 100755 --- a/snakePipes/shared/rules/RNA_mapping.snakefile +++ b/snakePipes/shared/rules/RNA_mapping.snakefile @@ -32,7 +32,6 @@ if aligner.upper().find("HISAT2") >=0: bam = temp(aligner+"/{sample}.sorted.bam"), splice = aligner+"/{sample}/splice_sites.txt", met = aligner+"/{sample}/metrics.txt" - log: aligner+"/logs/{sample}.sort.log" params: lib_type = getHISAT_libtype(pairedEnd, libraryType), input_splice = known_splicesites, @@ -53,7 +52,7 @@ if aligner.upper().find("HISAT2") >=0: -1 {input.r1} -2 {input.r2} \ --novel-splicesite-outfile {output.splice} \ --met-file {output.met} 2> {output.align_summary} \ - | samtools sort -m {params.samsort_memory} -T $MYTEMP/{wildcards.sample} -@ {threads} -O bam -o {output.bam} - 2> {log}; + | samtools sort -m {params.samsort_memory} -T $MYTEMP/{wildcards.sample} -@ {threads} -O bam -o {output.bam} -; rm -rf $MYTEMP """ else: @@ -65,7 +64,6 @@ if aligner.upper().find("HISAT2") >=0: bam = temp(aligner+"/{sample}.sorted.bam"), splice = aligner+"/{sample}/splice_sites.txt", met = aligner+"/{sample}/metrics.txt" - log: aligner+"/logs/{sample}.sort.log" params: lib_type = getHISAT_libtype(pairedEnd, libraryType), input_splice = known_splicesites, @@ -86,7 +84,7 @@ if aligner.upper().find("HISAT2") >=0: -U {input[0]} \ --novel-splicesite-outfile {output.splice} \ --met-file {output.met} 2> {output.align_summary} \ - | samtools sort -m {params.samsort_memory} -T $MYTEMP/{wildcards.sample} -@ {threads} -O bam -o {output.bam} - 2> {log} + | samtools sort -m {params.samsort_memory} -T $MYTEMP/{wildcards.sample} -@ {threads} -O bam -o {output.bam} - rm -rf $MYTEMP """ elif aligner.upper().find("STAR") >=0: @@ -97,7 +95,6 @@ elif aligner.upper().find("STAR") >=0: r2 = fastq_dir+"/{sample}"+reads[1]+".fastq.gz" output: bam = temp(aligner+"/{sample}.sorted.bam") - log: aligner+"/logs/{sample}.sort.log" params: alignerOptions = str(alignerOptions or ''), gtf = genes_gtf, @@ -126,7 +123,7 @@ elif aligner.upper().find("STAR") >=0: --readFilesIn {input.r1} {input.r2} \ --readFilesCommand 'gunzip -c' \ --outFileNamePrefix {params.prefix} \ - | samtools sort -m {params.samsort_memory} -T $MYTEMP/{wildcards.sample} -@ {params.samtools_threads} -O bam -o {output.bam} - 2> {log} + | samtools sort -m {params.samsort_memory} -T $MYTEMP/{wildcards.sample} -@ {params.samtools_threads} -O bam -o {output.bam} - rm -rf $MYTEMP """ else: @@ -135,7 +132,6 @@ elif aligner.upper().find("STAR") >=0: fastq_dir+"/{sample}"+reads[0]+".fastq.gz" output: bam = temp(aligner+"/{sample}.sorted.bam") - log: aligner+"/logs/{sample}.sort.log" params: alignerOptions = str(alignerOptions or ''), gtf = genes_gtf, @@ -164,6 +160,6 @@ elif aligner.upper().find("STAR") >=0: --readFilesIn {input} \ --readFilesCommand 'gunzip -c' \ --outFileNamePrefix {params.prefix} \ - | samtools sort -m {params.samsort_memory} -T $MYTEMP/{wildcards.sample} -@ {params.samtools_threads} -O bam -o {output.bam} - 2> {log} + | samtools sort -m {params.samsort_memory} -T $MYTEMP/{wildcards.sample} -@ {params.samtools_threads} -O bam -o {output.bam} - rm -rf $MYTEMP """ diff --git a/snakePipes/shared/rules/RNA_mapping_allelic.snakefile b/snakePipes/shared/rules/RNA_mapping_allelic.snakefile index fda578799..62a1058be 100644 --- a/snakePipes/shared/rules/RNA_mapping_allelic.snakefile +++ b/snakePipes/shared/rules/RNA_mapping_allelic.snakefile @@ -8,7 +8,6 @@ if aligner == "STAR": index = star_index_allelic output: temp(aligner+"/{sample}.sorted.bam") - log: aligner+"/logs/{sample}.sort.log" params: alignerOptions = str(alignerOptions or ''), gtf = genes_gtf, @@ -44,7 +43,7 @@ if aligner == "STAR": --alignIntronMin 1 \ --alignIntronMax 1000000 \ --alignMatesGapMax 1000000 \ - | samtools sort -m {params.samsort_memory} -T $MYTEMP/{wildcards.sample} -@ {threads} -O bam -o {output} - 2> {log}; + | samtools sort -m {params.samsort_memory} -T $MYTEMP/{wildcards.sample} -@ {threads} -O bam -o {output} -; rm -rf $MYTEMP """ else: @@ -54,7 +53,6 @@ if aligner == "STAR": index = star_index_allelic output: temp(aligner+"/{sample}.sorted.bam") - log: aligner+"/logs/{sample}.sort.log" params: alignerOptions = str(alignerOptions or ''), gtf = genes_gtf, @@ -90,7 +88,7 @@ if aligner == "STAR": --alignIntronMin 1 \ --alignIntronMax 1000000 \ --alignMatesGapMax 1000000 \ - | samtools sort -m {params.samsort_memory} -T $MYTEMP/{wildcards.sample} -@ {threads} -O bam -o {output} - 2> {log}; + | samtools sort -m {params.samsort_memory} -T $MYTEMP/{wildcards.sample} -@ {threads} -O bam -o {output} -; rm -rf $MYTEMP """ else: diff --git a/snakePipes/shared/rules/SNPsplit.snakefile b/snakePipes/shared/rules/SNPsplit.snakefile index ea06f0f15..13acbb8a8 100644 --- a/snakePipes/shared/rules/SNPsplit.snakefile +++ b/snakePipes/shared/rules/SNPsplit.snakefile @@ -1,5 +1,5 @@ -## get input bam depending on the mapping prog (use filtered bam in case of chip-seq data) +## get input bam depending on the mapping prog (use filtered bam in case of chipseq data) if aligner == "Bowtie2": rule snp_split: input: @@ -7,17 +7,16 @@ if aligner == "Bowtie2": bam = "filtered_bam/{sample}.filtered.bam" output: targetbam = expand("allelic_bams/{{sample}}.filtered.{suffix}.bam", suffix = ['allele_flagged', 'genome1', 'genome2', 'unassigned']), - tempbam = temp("filtered_bam/{sample}.filtered.sortedByName.bam"), + #tempbam = temp("filtered_bam/{sample}.filtered.sortedByName.bam"), rep1 = "allelic_bams/{sample}.filtered.SNPsplit_report.yaml", rep2 = "allelic_bams/{sample}.filtered.SNPsplit_sort.yaml" - log: "allelic_bams/logs/{sample}.snp_split.log" params: pairedEnd = '--paired' if pairedEnd else '', outdir = "allelic_bams" conda: CONDA_SHARED_ENV shell: "SNPsplit {params.pairedEnd}" - " -o {params.outdir} --snp_file {input.snp} {input.bam} 2> {log}" + " -o {params.outdir} --snp_file {input.snp} {input.bam}" elif aligner == "STAR" or aligner == "EXTERNAL_BAM": rule snp_split: @@ -26,17 +25,16 @@ elif aligner == "STAR" or aligner == "EXTERNAL_BAM": bam = aligner+"/{sample}.bam" output: targetbam = expand("allelic_bams/{{sample}}.{suffix}.bam", suffix = ['allele_flagged', 'genome1', 'genome2', 'unassigned']), - tempbam = temp(aligner+"/{sample}.sortedByName.bam"), + #tempbam = temp(aligner+"/{sample}.sortedByName.bam"), rep1 = "allelic_bams/{sample}.SNPsplit_report.yaml", rep2 = "allelic_bams/{sample}.SNPsplit_sort.yaml" - log: "allelic_bams/logs/{sample}.snp_split.log" params: pairedEnd = '--paired' if pairedEnd else '', outdir = "allelic_bams" conda: CONDA_SHARED_ENV shell: "SNPsplit {params.pairedEnd}" - " -o {params.outdir} --snp_file {input.snp} {input.bam} 2> {log}" + " -o {params.outdir} --snp_file {input.snp} {input.bam}" # move the allele-specific bams to another folder #if aligner == "Bowtie2": @@ -67,7 +65,6 @@ rule BAMsort_allelic: input: "allelic_bams/{sample}.filtered.{suffix}.bam" if aligner == "Bowtie2" else "allelic_bams/{sample}.{suffix}.bam" output: "allelic_bams/{sample}.{suffix}.sorted.bam" - log: "allelic_bams/logs/{sample}.{suffix}.sort.log" threads: 12 params: @@ -76,7 +73,7 @@ rule BAMsort_allelic: shell: """ TMPDIR={params.tempDir} MYTEMP=$(mktemp -d ${{TMPDIR:-/tmp}}/snakepipes.XXXXXXXXXX); - samtools sort -@ {threads} -T $MYTEMP -O bam -o {output} {input} 2> {log}; + samtools sort -@ {threads} -T $MYTEMP -O bam -o {output} {input}; rm -rf $MYTEMP """ @@ -86,6 +83,5 @@ rule BAMindex_allelic: "allelic_bams/{sample}.{suffix}.sorted.bam" output: "allelic_bams/{sample}.{suffix}.sorted.bam.bai" - log: "allelic_bams/logs/{sample}.{suffix}.index.log" conda: CONDA_SHARED_ENV - shell: "samtools index {input} 2> {log}" + shell: "samtools index {input}" diff --git a/snakePipes/shared/rules/Salmon.snakefile b/snakePipes/shared/rules/Salmon.snakefile index d90afb009..c64f1d307 100755 --- a/snakePipes/shared/rules/Salmon.snakefile +++ b/snakePipes/shared/rules/Salmon.snakefile @@ -12,15 +12,12 @@ # "Salmon/.benchmark/Salmon.index.benchmark" # params: # salmonIndexOptions = salmonIndexOptions -# log: -# out = "Salmon/SalmonIndex/SalmonIndex.out", -# err = "Salmon/SalmonIndex/SalmonIndex.err", # threads: lambda wildcards: 16 if 16" {input[1]} | cut -d " " -f 1 | tr -d ">" > {output[0]} # cat {input[0]} {input[1]} > {output[1]} -# salmon index -p {threads} -t {output[1]} -d {output[0]} -i Salmon/SalmonIndex {params.salmonIndexOptions} > {log.out} 2> {log.err} +# salmon index -p {threads} -t {output[1]} -d {output[0]} -i Salmon/SalmonIndex {params.salmonIndexOptions} # """ @@ -52,7 +49,6 @@ if pairedEnd: r2 = fastq_dir+"/{sample}"+reads[1]+".fastq.gz" output: quant = "Salmon/{sample}/quant.sf" - log: "Salmon/logs/{sample}.quant.log" benchmark: "Salmon/.benchmark/SalmonQuant.{sample}.benchmark" params: @@ -63,7 +59,7 @@ if pairedEnd: threads: 8 conda: CONDA_RNASEQ_ENV shell: """ - salmon quant -p {threads} --softclipOverhangs --validateMappings --numBootstraps 50 -i {params.index} -l {params.lib_type} -1 {input.r1} -2 {input.r2} -o {params.outdir} 2> {log} + salmon quant -p {threads} --softclipOverhangs --validateMappings --numBootstraps 50 -i {params.index} -l {params.lib_type} -1 {input.r1} -2 {input.r2} -o {params.outdir} """ else: rule SalmonQuant: @@ -71,7 +67,6 @@ else: fastq = fastq_dir+"/{sample}"+reads[0]+".fastq.gz" output: quant = "Salmon/{sample}/quant.sf" - log: "Salmon/logs/{sample}.quant.log" benchmark: "Salmon/.benchmark/SalmonQuant.{sample}.benchmark" params: @@ -82,7 +77,7 @@ else: threads: 8 conda: CONDA_SALMON_ENV shell: """ - salmon quant -p {threads} --softclipOverhangs --validateMappings --numBootstraps 50 -i {params.index} -l {params.lib_type} -r {input.fastq} -o {params.outdir} 2> {log} + salmon quant -p {threads} --softclipOverhangs --validateMappings --numBootstraps 50 -i {params.index} -l {params.lib_type} -r {input.fastq} -o {params.outdir} """ @@ -103,8 +98,6 @@ rule Salmon_TPM: "Salmon/TPM.transcripts.tsv" benchmark: "Salmon/.benchmark/Salmon_TPM.benchmark" - log: - "Salmon/logs/Salmon_TPM.log" conda: CONDA_RNASEQ_ENV shell: "Rscript "+os.path.join(maindir, "shared", "rscripts", "merge_count_tables.R")+" Name TPM {output} {input} " @@ -117,8 +110,6 @@ rule Salmon_counts: "Salmon/counts.transcripts.tsv" benchmark: "Salmon/.benchmark/Salmon_counts.benchmark" - log: - "Salmon/logs/Salmon_counts.log" conda: CONDA_RNASEQ_ENV shell: "Rscript "+os.path.join(maindir, "shared", "rscripts", "merge_count_tables.R")+" Name NumReads {output} {input} " @@ -130,9 +121,8 @@ rule Salmon_wasabi: "Salmon/{sample}.quant.sf" output: "Salmon/{sample}/abundance.h5" - log: "Salmon/logs/{sample}.wasabi.log" params: "Salmon/{sample}/" conda: CONDA_RNASEQ_ENV shell: - "Rscript "+os.path.join(maindir, "shared", "rscripts", "wasabi.R")+" {params} 2> {log}" + "Rscript "+os.path.join(maindir, "shared", "rscripts", "wasabi.R")+" {params}" diff --git a/snakePipes/shared/rules/Salmon_allelic.snakefile b/snakePipes/shared/rules/Salmon_allelic.snakefile index c43509c0f..4b7f71107 100755 --- a/snakePipes/shared/rules/Salmon_allelic.snakefile +++ b/snakePipes/shared/rules/Salmon_allelic.snakefile @@ -14,15 +14,12 @@ allelic_suffix = ['genome1', 'genome2'] # "Salmon/.benchmark/Salmon.index.benchmark" # params: # salmonIndexOptions = salmonIndexOptions -# log: -# out = "Salmon/SalmonIndex/SalmonIndex.out", -# err = "Salmon/SalmonIndex/SalmonIndex.err", # threads: lambda wildcards: 16 if 16" {input[1]} | cut -d " " -f 1 | tr -d ">" > {output[0]} # cat {input[0]} {input[1]} > {output[1]} -# salmon index -p {threads} -t {output[1]} -d {output[0]} -i Salmon/SalmonIndex {params.salmonIndexOptions} > {log.out} 2> {log.err} +# salmon index -p {threads} -t {output[1]} -d {output[0]} -i Salmon/SalmonIndex {params.salmonIndexOptions} # """ @@ -54,14 +51,12 @@ if pairedEnd: output: r1="allelicFASTQ/{sample}.{allelic_suffix}_R1.fastq.gz", r2="allelicFASTQ/{sample}.{allelic_suffix}_R2.fastq.gz" - log: "allelicFASTQ/logs/bam2fq.{sample}.{allelic_suffix}.log" benchmark: "allelicFASTQ/.benchmark/bam2fq.{sample}.{allelic_suffix}.benchmark" threads: 4 conda: CONDA_SHARED_ENV shell: """ samtools collate -@ {threads} -u -O {input.allelic_bam} | \\ -samtools fastq -1 {output.r1} -2 {output.r2} -0 /dev/null -s /dev/null -n \\ - 2> {log} +samtools fastq -1 {output.r1} -2 {output.r2} -0 /dev/null -s /dev/null -n """ rule SalmonQuant: @@ -70,7 +65,6 @@ samtools fastq -1 {output.r1} -2 {output.r2} -0 /dev/null -s /dev/null -n \\ r2 = "allelicFASTQ/{sample}.{allelic_suffix}_R2.fastq.gz" output: quant = "SalmonAllelic/{sample}.{allelic_suffix}/quant.sf" - log: "SalmonAllelic/logs/{sample}.{allelic_suffix}.quant.log" benchmark: "SalmonAllelic/.benchmark/SalmonQuant.{sample}.{allelic_suffix}.benchmark" params: @@ -81,7 +75,7 @@ samtools fastq -1 {output.r1} -2 {output.r2} -0 /dev/null -s /dev/null -n \\ threads: 8 conda: CONDA_SALMON_ENV shell: """ - salmon quant -p {threads} --softclipOverhangs --validateMappings --numBootstraps 50 -i {params.index} -l {params.lib_type} -1 {input.r1} -2 {input.r2} -o {params.outdir} 2> {log} + salmon quant -p {threads} --softclipOverhangs --validateMappings --numBootstraps 50 -i {params.index} -l {params.lib_type} -1 {input.r1} -2 {input.r2} -o {params.outdir} """ else: rule getAllelicFQ: @@ -90,14 +84,12 @@ else: allelic_bai="allelic_bams/{sample}.{allelic_suffix}.sorted.bam.bai" output: r1="allelicFASTQ/{sample}.{allelic_suffix}_R1.fastq.gz" - log: "allelicFASTQ/logs/bam2fq.{sample}.{allelic_suffix}.log" benchmark: "allelicFASTQ/.benchmark/bam2fq.{sample}.{allelic_suffix}.benchmark" threads: 4 conda: CONDA_SHARED_ENV shell: """ samtools collate -@ {threads} -u -O {input.allelic_bam} | \\ -samtools fastq -1 /dev/null -2 /dev/null -0 /dev/null -s {output.r1} -n \\ - 2> {log} +samtools fastq -1 /dev/null -2 /dev/null -0 /dev/null -s {output.r1} -n """ rule SalmonQuant: @@ -105,7 +97,6 @@ samtools fastq -1 /dev/null -2 /dev/null -0 /dev/null -s {output.r1} -n \\ fastq = "allelicFASTQ/{sample}.{allelic_suffix}_R1.fastq.gz" output: quant = "SalmonAllelic/{sample}.{allelic_suffix}/quant.sf" - log: "SalmonAllelic/logs/{sample}.{allelic_suffix}.quant.log" benchmark: "SalmonAllelic/.benchmark/SalmonQuant.{sample}.{allelic_suffix}.benchmark" params: @@ -116,7 +107,7 @@ samtools fastq -1 /dev/null -2 /dev/null -0 /dev/null -s {output.r1} -n \\ threads: 8 conda: CONDA_SALMON_ENV shell: """ - salmon quant -p {threads} --softclipOverhangs --validateMappings --numBootstraps 50 -i {params.index} -l {params.lib_type} -r {input.fastq} -o {params.outdir} 2> {log} + salmon quant -p {threads} --softclipOverhangs --validateMappings --numBootstraps 50 -i {params.index} -l {params.lib_type} -r {input.fastq} -o {params.outdir} """ @@ -137,8 +128,6 @@ rule Salmon_TPM: "SalmonAllelic/TPM.transcripts.tsv" benchmark: "SalmonAllelic/.benchmark/Salmon_TPM.benchmark" - log: - "SalmonAllelic/logs/Salmon_TPM.log" conda: CONDA_RNASEQ_ENV shell: "Rscript "+os.path.join(maindir, "shared", "rscripts", "merge_count_tables.R")+" Name TPM {output} {input} " @@ -151,8 +140,6 @@ rule Salmon_counts: "SalmonAllelic/counts.transcripts.tsv" benchmark: "SalmonAllelic/.benchmark/Salmon_counts.benchmark" - log: - "SalmonAllelic/logs/Salmon_counts.log" conda: CONDA_RNASEQ_ENV shell: "Rscript "+os.path.join(maindir, "shared", "rscripts", "merge_count_tables.R")+" Name NumReads {output} {input} " @@ -164,9 +151,8 @@ rule Salmon_wasabi: "SalmonAllelic/{sample}.{allelic_suffix}.quant.sf" output: "SalmonAllelic/{sample}.{allelic_suffix}/abundance.h5" - log: "SalmonAllelic/logs/{sample}.{allelic_suffix}.wasabi.log" params: "SalmonAllelic/{sample}.{allelic_suffix}/" conda: CONDA_RNASEQ_ENV shell: - "Rscript "+os.path.join(maindir, "shared", "rscripts", "wasabi.R")+" {params} 2> {log}" + "Rscript "+os.path.join(maindir, "shared", "rscripts", "wasabi.R")+" {params}" diff --git a/snakePipes/shared/rules/WGBS.snakefile b/snakePipes/shared/rules/WGBS.snakefile index 87f2d7202..61ad3d24b 100755 --- a/snakePipes/shared/rules/WGBS.snakefile +++ b/snakePipes/shared/rules/WGBS.snakefile @@ -11,10 +11,9 @@ rule conversionRate: "QC_metrics/{sample}.CHH.Mbias.txt" output: "QC_metrics/{sample}.conv.rate.txt" - log: "QC_metrics/logs/{sample}.conversionRate.log" threads: 1 shell: """ - awk '{{if(NR>1) {{M+=$4; UM+=$5}}}}END{{printf("{wildcards.sample}\\t%f\\n", 100*(1.0-M/(M+UM)))}}' {input} > {output} 2> {log} + awk '{{if(NR>1) {{M+=$4; UM+=$5}}}}END{{printf("{wildcards.sample}\\t%f\\n", 100*(1.0-M/(M+UM)))}}' {input} > {output} """ @@ -26,9 +25,6 @@ if pairedEnd and not fromBAM: r2=fastq_dir + "/{sample}" + reads[1] + ".fastq.gz" output: sbam=temp(aligner+"/{sample}.bam") - log: - err=aligner+"/logs/{sample}.map_reads.err", - out=aligner+"/logs/{sample}.map_reads.out" params: bwameth_index=bwameth_index if aligner=="bwameth" else bwameth2_index, tempDir = tempDir @@ -37,8 +33,8 @@ if pairedEnd and not fromBAM: shell: """ TMPDIR={params.tempDir} MYTEMP=$(mktemp -d "${{TMPDIR:-/tmp}}"/snakepipes.XXXXXXXXXX) - bwameth.py --threads {threads} --reference "{params.bwameth_index}" "{input.r1}" "{input.r2}" 2> {log.err} | \ - samtools sort -T "$MYTEMP"/{wildcards.sample} -m 3G -@ 4 -o "{output.sbam}" 2>> {log.err} + bwameth.py --threads {threads} --reference "{params.bwameth_index}" "{input.r1}" "{input.r2}" | \ + samtools sort -T "$MYTEMP"/{wildcards.sample} -m 3G -@ 4 -o "{output.sbam}" rm -rf "$MYTEMP" """ @@ -48,9 +44,6 @@ elif not pairedEnd and not fromBAM: r1=fastq_dir + "/{sample}" + reads[0] + ".fastq.gz", output: sbam=temp(aligner+"/{sample}.bam") - log: - err=aligner+"/logs/{sample}.map_reads.err", - out=aligner+"/logs/{sample}.map_reads.out" params: bwameth_index=bwameth_index if aligner=="bwameth" else bwameth2_index, tempDir = tempDir @@ -59,8 +52,8 @@ elif not pairedEnd and not fromBAM: shell: """ TMPDIR={params.tempDir} MYTEMP=$(mktemp -d "${{TMPDIR:-/tmp}}"/snakepipes.XXXXXXXXXX) - bwameth.py --threads {threads} --reference "{params.bwameth_index}" "{input.r1}" 2> {log.err} | \ - samtools sort -T "$MYTEMP/{wildcards.sample}" -m 3G -@ 4 -o "{output.sbam}" 2>> {log.err} + bwameth.py --threads {threads} --reference "{params.bwameth_index}" "{input.r1}" | \ + samtools sort -T "$MYTEMP/{wildcards.sample}" -m 3G -@ 4 -o "{output.sbam}" rm -rf "$MYTEMP" """ @@ -70,12 +63,9 @@ if not fromBAM: aligner+"/{sample}.bam" output: temp(aligner+"/{sample}.bam.bai") - log: - err=aligner+"/logs/{sample}.index_bam.err", - out=aligner+"/logs/{sample}.index_bam.out" conda: CONDA_SHARED_ENV shell: """ - samtools index "{input}" > {log.out} 2> {log.err} + samtools index "{input}" """ if not skipBamQC: @@ -85,9 +75,6 @@ if not skipBamQC: aligner+"/{sample}.bam.bai" output: "Sambamba/{sample}.markdup.bam" - log: - err="Sambamba/logs/{sample}.rm_dupes.err", - out="Sambamba/logs/{sample}.rm_dupes.out" threads: lambda wildcards: 10 if 10> {log.out} 2> {log.err} + sambamba markdup --overflow-list-size 600000 -t {threads} --tmpdir "$MYTEMP/{wildcards.sample}" "{input[0]}" "{output}" rm -rf "$MYTEMP" """ @@ -106,13 +93,10 @@ if not skipBamQC: output: "Sambamba/{sample}.markdup.bam.bai" params: - log: - err="Sambamba/logs/{sample}.indexMarkDupes.err", - out="Sambamba/logs/{sample}.indexMarkDupes.out" threads: 1 conda: CONDA_SHARED_ENV shell: """ - samtools index "{input}" 1> {log.out} 2> {log.err} + samtools index "{input}" """ rule link_deduped_bam: @@ -187,12 +171,10 @@ rule calc_Mbias: "QC_metrics/{sample}.Mbias.txt" params: genome=genome_fasta - log: - out="QC_metrics/logs/{sample}.calc_Mbias.out" threads: lambda wildcards: 10 if 10 {output} > {log.out} + MethylDackel mbias -@ {threads} {params.genome} {input[0]} QC_metrics/{wildcards.sample} """ @@ -204,12 +186,10 @@ rule calcCHHbias: temp("QC_metrics/{sample}.CHH.Mbias.txt") params: genome=genome_fasta - log: - err="QC_metrics/logs/{sample}.calcCHHbias.err" threads: lambda wildcards: 10 if 10 {output} 2> {log.err} + MethylDackel mbias -@ {threads} --CHH --noCpG --noSVG {params.genome} {input[0]} QC_metrics/{wildcards.sample} """ @@ -223,8 +203,6 @@ rule calc_GCbias: params: genomeSize=genome_size, twobitpath=genome_2bit - log: - out="QC_metrics/logs/calc_GCbias.out" threads: lambda wildcards: 20 if 20 {output[0]} 2> {log.err} + {params.thresholds} {params.options} -o {output[1]} > {output[0]} """ @@ -266,11 +242,9 @@ rule DepthOfCovGenome: options="--minMappingQuality 10 --smartLabels --samFlagExclude 256", thresholds="-ct 0 -ct 1 -ct 2 -ct 5 -ct 10 -ct 15 -ct 20 -ct 30 -ct 50" threads: lambda wildcards: 20 if 20 {output[0]} 2> {log.err} + plotCoverage -b {input.BAMS} -p {threads} {params.thresholds} {params.options} --outCoverageMetrics {output[2]} -o {output[1]} > {output[0]} """ @@ -279,11 +253,9 @@ rule get_flagstat: "filtered_bam/{sample}.filtered.bam" output: "QC_metrics/{sample}.flagstat" - log: - err="QC_metrics/logs/{sample}.get_flagstat.err" threads: 1 conda: CONDA_SHARED_ENV - shell: "samtools flagstat {input} > {output} 2> {log.err}" + shell: "samtools flagstat {input} > {output}" rule produceReport: @@ -313,14 +285,11 @@ if not noAutoMethylationBias: params: genome=genome_fasta, MethylDackelOptions=MethylDackelOptions - log: - err="MethylDackel/logs/{sample}.methyl_extract.err", - out="MethylDackel/logs/{sample}.methyl_extract.out" threads: lambda wildcards: 10 if 10 {log.out} 2> {log.err} + MethylDackel extract -o MethylDackel/{wildcards.sample} {params.MethylDackelOptions} $mi -@ {threads} {params.genome} {input[0]} """ else: rule methyl_extract: @@ -332,13 +301,10 @@ else: params: genome=genome_fasta, MethylDackelOptions=MethylDackelOptions - log: - err="MethylDackel/logs/{sample}.methyl_extract.err", - out="MethylDackel/logs/{sample}.methyl_extract.out" threads: lambda wildcards: 10 if 10 {log.out} 2> {log.err} + MethylDackel extract -o MethylDackel/{wildcards.sample} {params.MethylDackelOptions} -@ {threads} {params.genome} {input[0]} """ @@ -352,8 +318,6 @@ rule prepForMetilene: groups=metileneGroups, minCoverage=minCoverage, blacklist=blacklist - log: - err='{}/logs/prep_for_stats.err'.format(get_outdir("metilene", targetRegions, minCoverage)), threads: lambda wildcards: 10 if 10{log.err} \ + {input.MetIN} \ | sort -k 1,1 -k2,2n >> {output.MetBed} """ @@ -462,8 +424,6 @@ rule bedGraphToBigWig: output: "MethylDackel/{sample}_CpG.methylation.bw", "MethylDackel/{sample}_CpG.coverage.bw" - log: - err='MethylDackel/logs/{sample}_bedGraphToBigWig.stderr' threads: 1 conda: CONDA_SHARED_ENV - shell: os.path.join(workflow_tools, "bedGraphToBigwig") + " {input[0]} {input[1]} {output[0]} {output[1]} 2> {log.err}" + shell: os.path.join(workflow_tools, "bedGraphToBigwig") + " {input[0]} {input[1]} {output[0]} {output[1]}" diff --git a/snakePipes/shared/rules/bam_filtering.snakefile b/snakePipes/shared/rules/bam_filtering.snakefile index 80c3fc6c5..b6a0616df 100755 --- a/snakePipes/shared/rules/bam_filtering.snakefile +++ b/snakePipes/shared/rules/bam_filtering.snakefile @@ -12,15 +12,12 @@ rule samtools_filter: bam = temp("filtered_bam/{sample}.filtered.tmp.bam") params: shell = lambda wildcards,input,output: "samtools view -@ {} -b {} -o {} {} ".format(str(8 if not local else 2), bam_filter_string,output.bam,input[0]) if bam_filter_string.strip() !="" else "ln -s ../{} {}".format(input[0],output.bam) - log: - out = "filtered_bam/logs/samtools_filter.{sample}.out", - err = "filtered_bam/logs/samtools_filter.{sample}.err" benchmark: "filtered_bam/.benchmark/samtools_filter.{sample}.benchmark" threads: lambda wildcards: 8 if 8 {log.err} + {params.shell} """ @@ -30,6 +27,5 @@ rule samtools_index_tmp_filtered: "filtered_bam/{sample}.filtered.tmp.bam" output: temp("filtered_bam/{sample}.filtered.tmp.bam.bai") - log: "filtered_bam/logs/{sample}.samtools_index_tmp_filtered.log" conda: CONDA_SHARED_ENV - shell: "samtools index {input} 2> {log}" + shell: "samtools index {input}" diff --git a/snakePipes/shared/rules/bwa-mem2.snakefile b/snakePipes/shared/rules/bwa-mem2.snakefile index 9b155dbc3..55b7a9a49 100755 --- a/snakePipes/shared/rules/bwa-mem2.snakefile +++ b/snakePipes/shared/rules/bwa-mem2.snakefile @@ -6,7 +6,6 @@ if pairedEnd: output: align_summary = "bwa-mem2/{sample}.bwa-mem2_summary.txt", #samtools flagstat bam = temp("bwa-mem2/{sample}.sorted.bam") - log: "bwa-mem2/logs/{sample}.sort.log" params: bwa_index = bwa_mem2_index, alignerOpts = str(alignerOpts or ''), @@ -21,7 +20,7 @@ if pairedEnd: -R '@RG\\tID:{wildcards.sample}\\tDS:{wildcards.sample}\\tPL:ILLUMINA\\tSM:{wildcards.sample}' {params.alignerOpts} \ {params.bwa_index} {input.r1} {input.r2} | \ samtools view -Sb - | \ - samtools sort -m 2G -@ 2 -O bam - > {output.bam} 2> {log}; + samtools sort -m 2G -@ 2 -O bam - > {output.bam}; rm -rf $MYTEMP samtools flagstat {output.bam} > {output.align_summary} """ @@ -32,7 +31,6 @@ else: output: align_summary = "bwa-mem2/{sample}.bwa-mem2_summary.txt", #samtools flagstat bam = temp("bwa-mem2/{sample}.sorted.bam") - log: "bwa-mem2/logs/{sample}.sort.log" params: bwa_index = bwa_mem2_index, alignerOpts = str(alignerOpts or ''), @@ -47,7 +45,7 @@ else: -R '@RG\\tID:{wildcards.sample}\\tDS:{wildcards.sample}\\tPL:ILLUMINA\\tSM:{wildcards.sample}' {params.alignerOpts}\ {params.bwa_index} {input} | \ samtools view -Sbu - | \ - samtools sort -m 2G -T $MYTEMP/{wildcards.sample} -@ 2 -O bam - > {output.bam} 2> {log}; + samtools sort -m 2G -T $MYTEMP/{wildcards.sample} -@ 2 -O bam - > {output.bam}; rm -rf $MYTEMP samtools flagstat {output.bam} > {output.align_summary} """ diff --git a/snakePipes/shared/rules/bwa.snakefile b/snakePipes/shared/rules/bwa.snakefile index 1a9dbdde3..4190df932 100755 --- a/snakePipes/shared/rules/bwa.snakefile +++ b/snakePipes/shared/rules/bwa.snakefile @@ -6,7 +6,6 @@ if pairedEnd: output: align_summary = "bwa/{sample}.bwa_summary.txt", #samtools flagstat bam = temp("bwa/{sample}.sorted.bam") - log: "bwa/logs/{sample}.sort.log" params: bwa_index = bwa_index, alignerOpts = str(alignerOpts or ''), @@ -21,7 +20,7 @@ if pairedEnd: -R '@RG\\tID:{wildcards.sample}\\tDS:{wildcards.sample}\\tPL:ILLUMINA\\tSM:{wildcards.sample}' {params.alignerOpts} \ {params.bwa_index} {input.r1} {input.r2} | \ samtools view -Sb - | \ - samtools sort -m 2G -@ 2 -O bam - > {output.bam} 2> {log}; + samtools sort -m 2G -@ 2 -O bam - > {output.bam}; rm -rf $MYTEMP samtools flagstat {output.bam} > {output.align_summary} """ @@ -32,7 +31,6 @@ else: output: align_summary = "bwa/{sample}.bwa_summary.txt", #samtools flagstat bam = temp("bwa/{sample}.sorted.bam") - log: "bwa/logs/{sample}.sort.log" params: bwa_index = bwa_index, alignerOpts = str(alignerOpts or ''), @@ -47,7 +45,7 @@ else: -R '@RG\\tID:{wildcards.sample}\\tDS:{wildcards.sample}\\tPL:ILLUMINA\\tSM:{wildcards.sample}' {params.alignerOpts}\ {params.bwa_index} {input} | \ samtools view -Sbu - | \ - samtools sort -m 2G -T $MYTEMP/{wildcards.sample} -@ 2 -O bam - > {output.bam} 2> {log}; + samtools sort -m 2G -T $MYTEMP/{wildcards.sample} -@ 2 -O bam - > {output.bam}; rm -rf $MYTEMP samtools flagstat {output.bam} > {output.align_summary} """ diff --git a/snakePipes/shared/rules/createIndices.snakefile b/snakePipes/shared/rules/createIndices.snakefile index d06a12853..365149c65 100755 --- a/snakePipes/shared/rules/createIndices.snakefile +++ b/snakePipes/shared/rules/createIndices.snakefile @@ -54,7 +54,7 @@ else: params: spikeinExt = spikeinExt shell: """ - sed -r 's/\s+/{spikeinExt} /' {input} > {output} + sed '/^>/ s/$/{spikeinExt}/' {input} > {output} """ rule createGenomeFasta: @@ -71,20 +71,18 @@ else: rule fastaIndex: input: genome_fasta output: genome_index - log: "logs/fastaIndex.log" conda: CONDA_SHARED_ENV shell: """ - samtools faidx {input} 2> {log} + samtools faidx {input} """ # Default memory allocation: 4G rule fastaDict: input: genome_fasta output: genome_dict - log: "logs/fastaDict.log" conda: CONDA_SHARED_ENV shell: """ - samtools dict -o {output} {input} 2> {log} + samtools dict -o {output} {input} """ if rmsk_file: @@ -99,10 +97,9 @@ if rmsk_file: rule make2bit: input: genome_fasta output: genome_2bit - log: "logs/make2bit.log" conda: CONDA_CREATE_INDEX_ENV shell: """ - faToTwoBit {input} {output} 2> {log} + faToTwoBit {input} {output} """ @@ -136,13 +133,12 @@ rule renameSpikeinChromsGTF: #rule gtf2BED: # input: genes_gtf # output: genes_bed -# log: "logs/gtf2BED.log" # conda: CONDA_CREATE_INDEX_ENV # shell: """ # awk '{{if ($3 != "gene") print $0;}}' {input} \ # | grep -v "^#" \ # | gtfToGenePred /dev/stdin /dev/stdout \ -# | genePredToBed stdin {output} 2> {log} +# | genePredToBed stdin {output} # """ @@ -280,7 +276,6 @@ rule extendGenicRegions: rule bowtie2Index: input: genome_fasta output: os.path.join(outdir, "BowtieIndex/genome.rev.2.bt2") - log: "logs/bowtie2Index.log" params: basedir = os.path.join(outdir, "BowtieIndex") conda: CONDA_CREATE_INDEX_ENV @@ -289,14 +284,12 @@ rule bowtie2Index: ln -s {input} {params.basedir}/genome.fa bowtie2-build -t {threads} {params.basedir}/genome.fa {params.basedir}/genome if [[ -f BowtieIndex/genome.rev.2.bt2l ]]; then ln -s genome.rev.2.bt2l {output} ; fi - 2> {log} """ # Default memory allocation: 20G rule hisat2Index: input: genome_fasta output: os.path.join(outdir, "HISAT2Index/genome.6.ht2") - log: "logs/hisat2Index.log" params: basedir = os.path.join(outdir, "HISAT2Index") threads: lambda wildcards: 10 if 10 {log} """ @@ -312,11 +304,10 @@ rule hisat2Index: rule makeKnownSpliceSites: input: genes_gtf output: known_splicesites - log: "logs/makeKnownSpliceSites.log" conda: CONDA_CREATE_INDEX_ENV threads: lambda wildcards: 10 if 10 {output} 2> {log} + hisat2_extract_splice_sites.py {input} > {output} """ @@ -324,13 +315,12 @@ rule makeKnownSpliceSites: rule starIndex: input: genome_fasta output: os.path.join(outdir, "STARIndex/SAindex") - log: "logs/starIndex.log" params: basedir = os.path.join(outdir, "STARIndex") conda: CONDA_CREATE_INDEX_ENV threads: lambda wildcards: 10 if 10 {log} + STAR --runThreadN {threads} --runMode genomeGenerate --genomeDir {params.basedir} --genomeFastaFiles {input} if [[ -w Log.out ]]; then rm -v Log.out; elif [[ -w {params.basedir}/Log.out ]]; then rm -v {params.basedir}/Log.out; fi """ @@ -340,13 +330,12 @@ rule genes_bed2fasta: genome_fasta = genome_fasta output: "annotation/genes.fa" - log: "annotation/logs/bed2fasta.log" benchmark: "annotation/.benchmark/annotation_bed2fasta.benchmark" threads: 1 conda: CONDA_CREATE_INDEX_ENV shell: - "bedtools getfasta -name -s -split -fi {input.genome_fasta} -bed <(cat {input.bed} | cut -f1-12) | sed 's/(.*)//g' | sed 's/:.*//g' > {output} 2> {log}" + "bedtools getfasta -name -s -split -fi {input.genome_fasta} -bed <(cat {input.bed} | cut -f1-12) | sed 's/(.*)//g' | sed 's/:.*//g' > {output}" rule salmonIndex: @@ -359,15 +348,12 @@ rule salmonIndex: os.path.join(outdir, "SalmonIndex/seq.bin") params: salmonIndexOptions = salmonIndexOptions if salmonIndexOptions else "" - log: - out = "logs/SalmonIndex.out", - err = "logs/SalmonIndex.err", threads: lambda wildcards: 16 if 16" {input[1]} | cut -d " " -f 1 | tr -d ">" > {output[0]} cat {input[0]} {input[1]} > {output[1]} - salmon index -p {threads} -t {output[1]} -d {output[0]} -i SalmonIndex {params.salmonIndexOptions} > {log.out} 2> {log.err} + salmon index -p {threads} -t {output[1]} -d {output[0]} -i SalmonIndex {params.salmonIndexOptions} """ @@ -388,8 +374,6 @@ rule run_eisaR: gtf = lambda wildcards,input: os.path.join(outdir, input.gtf), joint_fasta = lambda wildcards,output: output.joint_fasta, joint_t2g = lambda wildcards,output: output.joint_t2g - log: - out = "logs/eisaR.out" conda: CONDA_eisaR_ENV script: "../rscripts/scRNAseq_eisaR.R" @@ -407,14 +391,11 @@ rule Salmon_index_joint_fa: velo_index = os.path.join(outdir, "SalmonIndex_RNAVelocity/seq.bin") params: salmonIndexOptions = salmonIndexOptions - log: - err = "SalmonIndex_RNAVelocity/logs/SalmonIndex.err", - out = "SalmonIndex_RNAVelocity/logs/SalmonIndex.out" threads: lambda wildcards: 16 if 16 {output.seq_fa} - salmon index -p {threads} -t {output.seq_fa} -d {input.decoys} -i SalmonIndex_RNAVelocity {params.salmonIndexOptions} > {log.out} 2> {log.err} + salmon index -p {threads} -t {output.seq_fa} -d {input.decoys} -i SalmonIndex_RNAVelocity {params.salmonIndexOptions} """ @@ -423,26 +404,24 @@ rule Salmon_index_joint_fa: rule bwaIndex: input: genome_fasta output: os.path.join(outdir, "BWAIndex/genome.fa.sa") - log: "logs/bwaIndex.log" params: genome = os.path.join(outdir, "BWAIndex", "genome.fa") conda: CONDA_CREATE_INDEX_ENV shell: """ ln -s {input} {params.genome} - bwa index {params.genome} 2> {log} + bwa index {params.genome} """ # Default memory allocation: 8G rule bwamem2Index: input: genome_fasta output: os.path.join(outdir, "BWA-MEM2Index/genome.fa.bwt.2bit.64") - log: "logs/bwaIndex.log" params: genome = os.path.join(outdir, "BWA-MEM2Index", "genome.fa") conda: CONDA_CREATE_INDEX_ENV shell: """ ln -s {input} {params.genome} - bwa-mem2 index {params.genome} 2> {log} + bwa-mem2 index {params.genome} """ @@ -450,26 +429,24 @@ rule bwamem2Index: rule bwamethIndex: input: genome_fasta output: os.path.join(outdir, "BWAmethIndex/genome.fa.bwameth.c2t.sa") - log: "logs/bwamethIndex.log" params: genome = os.path.join(outdir, "BWAmethIndex", "genome.fa") conda: CONDA_CREATE_INDEX_ENV shell: """ ln -s {input[0]} {params.genome} - bwameth.py index {params.genome} 2> {log} + bwameth.py index {params.genome} """ # Default memory allocation: 8G rule bwameth2Index: input: genome_fasta output: os.path.join(outdir, "BWAmeth2Index/genome.fa.bwameth.c2t.bwt.2bit.64") - log: "logs/bwameth2Index.log" params: genome = os.path.join(outdir, "BWAmeth2Index", "genome.fa") conda: CONDA_CREATE_INDEX_ENV shell: """ ln -s {input[0]} {params.genome} - bwameth.py index-mem2 {params.genome} 2> {log} + bwameth.py index-mem2 {params.genome} """ # Default memory allocation: 1G @@ -501,8 +478,7 @@ rule renameSpikeinChromsBlacklist: rule computeEffectiveGenomeSize: input: genome_fasta if not spikeinGenomeURL else os.path.join(outdir,"genome_fasta/host.genome.fa") output: os.path.join(outdir, "genome_fasta", "effectiveSize") - log: "logs/computeEffectiveGenomeSize.log" conda: CONDA_SHARED_ENV shell: """ - seqtk comp {input} | awk '{{tot += $3 + $4 + $5 + $6}}END{{print tot}}' > {output} 2> {log} + seqtk comp {input} | awk '{{tot += $3 + $4 + $5 + $6}}END{{print tot}}' > {output} """ diff --git a/snakePipes/shared/rules/deepTools_ATAC.snakefile b/snakePipes/shared/rules/deepTools_ATAC.snakefile index ccfc91603..28a10eef3 100755 --- a/snakePipes/shared/rules/deepTools_ATAC.snakefile +++ b/snakePipes/shared/rules/deepTools_ATAC.snakefile @@ -12,9 +12,6 @@ rule bamcoverage_short_cleaned: ignoreForNorm = "--ignoreForNormalization {}".format(ignoreForNormalization) if ignoreForNormalization else "", read_extension = "--extendReads" if pairedEnd else "--extendReads {}".format(fragmentLength), blacklist = "--blackListFileName {}".format(blacklist_bed) if blacklist_bed else "" - log: - out = "deepTools_ATAC/logs/bamCompare.{sample}.filtered.out", - err = "deepTools_ATAC/logs/bamCompare.{sample}.filtered.out" benchmark: "deepTools_ATAC/.benchmark/deepTools_ATAC/logs/bamCompare.{sample}.filtered.benchmark" threads: lambda wildcards: 16 if 160) else "" - log: - out = "deepTools_ChIP/logs/plotFingerprint.out", - err = "deepTools_ChIP/logs/plotFingerprint.err" benchmark: "deepTools_ChIP/.benchmark/plotFingerprint.benchmark" threads: lambda wildcards: 24 if 240) else "" - log: - out = "split_deepTools_ChIP/logs/plotFingerprint.out", - err = "split_deepTools_ChIP/logs/plotFingerprint.err" benchmark: "split_deepTools_ChIP/.benchmark/plotFingerprint.benchmark" threads: lambda wildcards: 24 if 24[^[:space:]]+).*/\\1_{params.hap1}/g' \ + | bgzip -c > genome_{params.hap1}.fa.gz + + bcftools consensus \ + --fasta-ref {input.genome} \ + --haplotype 1 {input.vcf} \ + --sample {params.hap2} \ + | sed -E 's/(>[^[:space:]]+).*/\\1_{params.hap2}/g' \ + | bgzip -c > genome_{params.hap2}.fa.gz + + cat genome_{params.hap1}.fa.gz genome_{params.hap2}.fa.gz \ + > {output.genome} + + rm genome_{params.hap1}.fa.gz genome_{params.hap2}.fa.gz + """ + + +rule bwa_index_diploid_genome: + # index concatenated genome generated by rule diploid_genome + input: + genome="genome/diploid_genome.fa.gz", + output: + index="genome/diploid_genome.fa.gz.bwt", + threads: 2 + resources: + mem_mb=50000, + conda: + CONDA_MAKEPAIRS_ENV + shell: + """ + bwa index {input.genome} + """ + + +rule chr_sizes: + # obtain chromosome sizes from bwa index + input: + bwaix="genome/{ref}.fa.gz.bwt", + output: + chromsize="genome/{ref}.chromsizes", + params: + fnagz=lambda wildcards, input: Path(input.bwaix).with_suffix(""), + threads: 1 + conda: + CONDA_MAKEPAIRS_ENV + shell: + """ + samtools faidx {params.fnagz} + cut -f1,2 {params.fnagz}.fai > {output.chromsize} + """ diff --git a/snakePipes/shared/rules/envs/makePairs.yaml b/snakePipes/shared/rules/envs/makePairs.yaml new file mode 100644 index 000000000..c470504fe --- /dev/null +++ b/snakePipes/shared/rules/envs/makePairs.yaml @@ -0,0 +1,14 @@ +name: pairtools_phased +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bwa + - samtools + - tabix + - bcftools + - pairtools==1.1 + - pip + - pip: + - git+https://github.com/open2c/MultiQC.git diff --git a/snakePipes/shared/rules/featureCounts.snakefile b/snakePipes/shared/rules/featureCounts.snakefile index 0ac56e0bf..d7cfccaf5 100755 --- a/snakePipes/shared/rules/featureCounts.snakefile +++ b/snakePipes/shared/rules/featureCounts.snakefile @@ -12,9 +12,6 @@ rule featureCounts: paired_opt = lambda wildcards: "-p -B " if pairedEnd else "", opts = config["featureCountsOptions"], tempDir = tempDir - log: - out = "featureCounts/logs/{sample}.out", - err = "featureCounts/logs/{sample}.err" threads: lambda wildcards: 8 if 8 {log.out} 2> {log.err}; + {input.bam}; rm -rf $MYTEMP """ @@ -36,7 +33,6 @@ rule merge_featureCounts: expand("featureCounts/{sample}.counts.txt", sample=samples) output: "featureCounts/counts.tsv" - log: "featureCounts/logs/merge_featureCounts.log" conda: CONDA_RNASEQ_ENV shell: - "Rscript "+os.path.join(maindir, "shared", "rscripts", "merge_featureCounts.R")+" {output} {input} 2> {log}" + "Rscript "+os.path.join(maindir, "shared", "rscripts", "merge_featureCounts.R")+" {output} {input}" diff --git a/snakePipes/shared/rules/featureCounts_allelic.snakefile b/snakePipes/shared/rules/featureCounts_allelic.snakefile index cabf790cb..61797635f 100644 --- a/snakePipes/shared/rules/featureCounts_allelic.snakefile +++ b/snakePipes/shared/rules/featureCounts_allelic.snakefile @@ -14,9 +14,6 @@ rule featureCounts_allele: paired_opt = lambda wildcards: "-p -B " if pairedEnd else "", opts = config["featureCountsOptions"], tempDir = tempDir - log: - out = "featureCounts/logs/{sample}.out", - err = "featureCounts/logs/{sample}.err" threads: 8 conda: CONDA_RNASEQ_ENV shell: """ @@ -29,7 +26,7 @@ rule featureCounts_allele: -a {input.gtf} \ -o {output} \ --tmpDir $MYTEMP \ - {input.bam} {input.allele1} {input.allele2} > {log.out} 2> {log.err}; + {input.bam} {input.allele1} {input.allele2}; rm -rf $MYTEMP """ @@ -38,7 +35,6 @@ rule merge_featureCounts: expand("featureCounts/{sample}.allelic_counts.txt", sample=samples) output: "featureCounts/counts_allelic.tsv" - log: "featureCounts/logs/merge_featureCounts.log" conda: CONDA_RNASEQ_ENV shell: - "Rscript "+os.path.join(maindir, "shared", "rscripts", "merge_featureCounts.R")+" {output} {input} 2> {log}" + "Rscript "+os.path.join(maindir, "shared", "rscripts", "merge_featureCounts.R")+" {output} {input}" diff --git a/snakePipes/shared/rules/filterGTF.snakefile b/snakePipes/shared/rules/filterGTF.snakefile index a0820999a..7dbb658b1 100644 --- a/snakePipes/shared/rules/filterGTF.snakefile +++ b/snakePipes/shared/rules/filterGTF.snakefile @@ -131,11 +131,10 @@ rule annotation_bed2fasta: genome_fasta = genome_fasta output: "Annotation/genes.filtered.fa" - log: "Annotation/logs/bed2fasta.log" benchmark: "Annotation/.benchmark/annotation_bed2fasta.benchmark" threads: 1 conda: CONDA_RNASEQ_ENV shell: - "bedtools getfasta -name -s -split -fi {input.genome_fasta} -bed <(cat {input.bed} | cut -f1-12) | sed 's/(.*)//g' | sed 's/:.*//g' > {output} 2> {log}" + "bedtools getfasta -name -s -split -fi {input.genome_fasta} -bed <(cat {input.bed} | cut -f1-12) | sed 's/(.*)//g' | sed 's/:.*//g' > {output}" diff --git a/snakePipes/shared/rules/filterGTF_spikein.snakefile b/snakePipes/shared/rules/filterGTF_spikein.snakefile index 63cdf88a7..3ec1cce78 100644 --- a/snakePipes/shared/rules/filterGTF_spikein.snakefile +++ b/snakePipes/shared/rules/filterGTF_spikein.snakefile @@ -134,10 +134,8 @@ rule TSS_to_windows: outfile = outdir+"/Annotation_spikein/TSS.filtered.bed", script = maindir+"/shared/tools/TSS_to_windows.py", size = 5000 - log: - "Annotation_spikein/logs/TSS_to_windows.err" conda: CONDA_pysam_ENV shell: """ - python {params.script} -inf {input.bed} -outf {output.TSS_bed} -size {params.size} -bam {input.bam} 2> {log} + python {params.script} -inf {input.bed} -outf {output.TSS_bed} -size {params.size} -bam {input.bam} """ diff --git a/snakePipes/shared/rules/hicexplorer.snakefile b/snakePipes/shared/rules/hicexplorer.snakefile index d524fe0b1..345e281b6 100644 --- a/snakePipes/shared/rules/hicexplorer.snakefile +++ b/snakePipes/shared/rules/hicexplorer.snakefile @@ -22,12 +22,9 @@ rule get_restrictionSite: enzyme + ".bed" params: res_seq = get_restriction_seq(enzyme) - log: - out = "log/get_restrictionSite.out", - err = "log/get_restrictionSite.err" conda: CONDA_HIC_ENV shell: - "hicFindRestSite -f {input} --searchPattern {params.res_seq} -o {output} > {log.out} 2> {log.err}" + "hicFindRestSite -f {input} --searchPattern {params.res_seq} -o {output}" # Map @@ -38,16 +35,13 @@ rule map_fastq_single_end: params: aligner_cmd = getAlignerCmd(aligner), aligner_index = getAlignerIndex(aligner) - log: - out = aligner+"/logs/{sample}{read}.out", - err = aligner+"/logs/{sample}{read}.err" threads: lambda wildcards: 15 if 15 {log.out} && " + "echo 'mapping {input}' && " "{params.aligner_cmd} -A1 -B4 -E50 -L0 " - "-t {threads} {params.aligner_index} {input} 2> {log.err} | " - "samtools view -Shb - > {output.out} 2>> {log.err}" + "-t {threads} {params.aligner_index} {input} | " + "samtools view -Shb - > {output.out}" ## Make HiC Matrix if(RFResolution is True): @@ -66,9 +60,6 @@ if(RFResolution is True): region = lambda wildcards: "--region " + str(restrictRegion) if restrictRegion else "", min_dist = MIN_RS_DISTANCE, max_dist = MAX_RS_DISTANCE - log: - out = "HiC_matrices/logs/{sample}_"+matrixFile_suffix+".out", - err = "HiC_matrices/logs/{sample}_"+matrixFile_suffix+".err" threads: lambda wildcards: 10 if 10 {log.out} 2> {log.err} &&" + "-o {output.matrix} &&" " rm {params.QCfolder}"+"QC_table.txt" else: rule build_matrix: @@ -101,9 +92,6 @@ else: region = lambda wildcards: "--region " + str(restrictRegion) if restrictRegion else "", min_dist = MIN_RS_DISTANCE, max_dist = MAX_RS_DISTANCE - log: - out = "HiC_matrices/logs/{sample}_"+matrixFile_suffix+".out", - err = "HiC_matrices/logs/{sample}_"+matrixFile_suffix+".err" threads: lambda wildcards: 10 if 10 {log.out} 2> {log.err} &&" + "-o {output.matrix} &&" " rm {params.QCfolder}"+"QC_table.txt" ## Merge the samples if asked @@ -126,12 +114,9 @@ rule merge_matrices: lambda wildcards: expand("HiC_matrices/{sample}_"+matrixFile_suffix+matrix_format, sample = sample_dict[wildcards.group]) output: matrix = "HiC_matrices/mergedSamples_{group}_"+matrixFile_suffix+matrix_format - log: - out = "HiC_matrices/logs/hicSumMatrices_{group}_"+matrixFile_suffix+".out", - err = "HiC_matrices/logs/hicSumMatrices_{group}_"+matrixFile_suffix+".err" conda: CONDA_HIC_ENV shell: - "hicSumMatrices -m {input} -o {output.matrix} > {log.out} 2> {log.err}" + "hicSumMatrices -m {input} -o {output.matrix}" ## Merge the bins if asked rule merge_bins: @@ -141,12 +126,9 @@ rule merge_bins: matrix = "HiC_matrices/{sample}_Mbins" + str(nBinsToMerge) + "_" + matrixFile_suffix+matrix_format params: num_bins=nBinsToMerge - log: - out = "HiC_matrices/logs/{sample}_Mbins" + str(nBinsToMerge) + "_" + matrixFile_suffix+".out", - err = "HiC_matrices/logs/{sample}_Mbins" + str(nBinsToMerge) + "_" + matrixFile_suffix+".err" conda: CONDA_HIC_ENV shell: - "hicMergeMatrixBins -m {input} -nb {params.num_bins} -o {output.matrix} >{log.out} 2>{log.err} " + "hicMergeMatrixBins -m {input} -nb {params.num_bins} -o {output.matrix} " ## diagnostic plots rule diagnostic_plot: @@ -198,12 +180,10 @@ else: "HiC_matrices_corrected/{sample}_"+matrixFile_suffix+".corrected"+matrix_format params: chr = lambda wildcards: " --chromosomes " + chromosomes if chromosomes else "" - log: - out = "HiC_matrices_corrected/logs/{sample}_correctoMatrix.out" conda: CONDA_HIC_ENV shell: "hicCorrectMatrix correct --correctionMethod KR " - " {params.chr} -m {input.matrix} -o {output} 2> {log.out}" + " {params.chr} -m {input.matrix} -o {output}" ## Call TADs rule call_tads: @@ -215,9 +195,6 @@ rule call_tads: prefix="TADs/{sample}_"+matrixFile_suffix, parameters=lambda wildcards: findTADParams if findTADParams else "" threads: lambda wildcards: 10 if 10 {log.out} 2> {log.err}" + "--outPrefix {params.prefix}" ##compare matrices using hicPlotDistVsCounts rule distvscounts: @@ -235,11 +212,7 @@ rule distvscounts: "dist_vs_counts.png" params: function_params = lambda wildcards: distVsCountParams if distVsCountParams else " " - log: - out = "HiC_matrices_corrected/logs/dist_vs_counts.out", - err = "HiC_matrices_corrected/logs/dist_vs_counts.err" - conda: CONDA_HIC_ENV shell: - "hicPlotDistVsCounts -m {input.matrices} -o {output} {params.function_params} > {log.out} 2> {log.err}" + "hicPlotDistVsCounts -m {input.matrices} -o {output} {params.function_params}" diff --git a/snakePipes/shared/rules/histoneHMM.snakefile b/snakePipes/shared/rules/histoneHMM.snakefile index 200577df2..2c7d49879 100755 --- a/snakePipes/shared/rules/histoneHMM.snakefile +++ b/snakePipes/shared/rules/histoneHMM.snakefile @@ -34,15 +34,12 @@ rule histoneHMM: params: prefix = "histoneHMM/{sample}.filtered.histoneHMM", genome_index = genome_index - log: - out = "histoneHMM/logs/histoneHMM.{sample}.filtered.out", - err = "histoneHMM/logs/histoneHMM.{sample}.filtered.err" benchmark: "histoneHMM/.benchmark/histoneHMM.{sample}.filtered.benchmark" conda: CONDA_CHIPSEQ_ENV shell: """ RHOME=`R RHOME` - $RHOME/library/histoneHMM/bin/histoneHMM_call_regions.R -b 750 -c {params.genome_index} -o {params.prefix} -P 0.1 {input} > {log.out} 2> {log.err} + $RHOME/library/histoneHMM/bin/histoneHMM_call_regions.R -b 750 -c {params.genome_index} -o {params.prefix} -P 0.1 {input} """ rule format_HMM_output: @@ -70,16 +67,13 @@ rule histoneHMM_out_gz: # the modification date of the input files post = touch("histoneHMM/{sample}.filtered.histoneHMM-em-posterior.txt.gz"), txt = touch("histoneHMM/{sample}.filtered.histoneHMM.txt.gz") - log: - out = "histoneHMM/logs/histoneHMM_out_gz.{sample}.filtered.out", - err = "histoneHMM/logs/histoneHMM_out_gz.{sample}.filtered.err" benchmark: "histoneHMM/.benchmark/histoneHMM_out_gz.{sample}.filtered.benchmark" threads: 2 conda: CONDA_SHARED_ENV shell: """ - grep -v ^\"#\" {input.gff} | sort -k1,1 -k4,4n | bgzip > {output.gff} 2> {log.err} - tabix -p gff {output.gff} > {log.out} 2>> {log.err} + grep -v ^\"#\" {input.gff} | sort -k1,1 -k4,4n | bgzip > {output.gff} + tabix -p gff {output.gff} gzip {input.post} gzip {input.txt} """ diff --git a/snakePipes/shared/rules/masked_genomeIndex.snakefile b/snakePipes/shared/rules/masked_genomeIndex.snakefile index 56c768f64..6eaddb72e 100644 --- a/snakePipes/shared/rules/masked_genomeIndex.snakefile +++ b/snakePipes/shared/rules/masked_genomeIndex.snakefile @@ -31,16 +31,13 @@ if allele_hybrid == 'dual': strain1 = strains[0], strain2 = strains[1], SNPpath = os.path.abspath(VCFfile) - log: - out = "SNPsplit_createSNPgenome.out", - err = "SNPsplit_createSNPgenome.err" conda: CONDA_SHARED_ENV shell: " ( [ -d snp_genome ] || mkdir -p snp_genome ) && cd snp_genome &&" " SNPsplit_genome_preparation" " --dual_hybrid --genome_build {BASENAME}" " --reference_genome {input.genome} --vcf_file {params.SNPpath}" - " --strain {params.strain1} --strain2 {params.strain2} > {log.out} 2> {log.err}" + " --strain {params.strain1} --strain2 {params.strain2}" "&& cd ../" else: rule create_snpgenome: @@ -56,17 +53,14 @@ else: temp_out=temp("all_SNPs_" + strains[0] + "_GRCm38.txt.gz"), out_bname=os.path.basename(SNPFile) - log: - out = "SNPsplit_createSNPgenome.out", - err = "SNPsplit_createSNPgenome.err" conda: CONDA_SHARED_ENV shell: " ( [ -d snp_genome ] || mkdir -p snp_genome ) && cd snp_genome &&" " SNPsplit_genome_preparation" " --genome_build {BASENAME}" " --reference_genome {input.genome} --vcf_file {params.SNPpath}" - " --strain {params.strain1} > {log.out} 2> {log.err}&& cp " - "{params.temp_out} {params.out_bname} >> {log.out} 2>> {log.err} " + " --strain {params.strain1} && cp " + "{params.temp_out} {params.out_bname}" "&& cd ../" if aligner == "STAR": @@ -75,9 +69,6 @@ if aligner == "STAR": snpgenome_dir = SNPdir output: star_index_allelic - log: - out = "snp_genome/star_Nmasked/star.index.out", - err = "snp_genome/star_Nmasked/star.index.err" threads: 10 params: @@ -90,7 +81,6 @@ if aligner == "STAR": " --genomeDir " + "snp_genome/star_Nmasked" " --genomeFastaFiles {input.snpgenome_dir}/*.fa" " --sjdbGTFfile {params.gtf}" - " > {log.out} 2> {log.err}" elif aligner == "Bowtie2": rule bowtie2_index: @@ -98,9 +88,6 @@ elif aligner == "Bowtie2": snpgenome_dir = SNPdir output: bowtie2_index_allelic - log: - out = "snp_genome/bowtie2_Nmasked/bowtie2.index.out", - err = "snp_genome/bowtie2_Nmasked/bowtie2.index.err" threads: lambda wildcards: 10 if 10 {log.out} 2> {log.err}" else: print("Only STAR and Bowtie2 are implemented for allele-specific mapping") diff --git a/snakePipes/shared/rules/multiQC.snakefile b/snakePipes/shared/rules/multiQC.snakefile index efc2df5b0..6a3a0290a 100755 --- a/snakePipes/shared/rules/multiQC.snakefile +++ b/snakePipes/shared/rules/multiQC.snakefile @@ -8,7 +8,7 @@ def multiqc_input_check(return_value): if pairedEnd: readsIdx = 2 - if not pipeline=="scrna-seq" and ("fromBAM" not in globals() or not fromBAM): + if not pipeline=="scrnaseq" and ("fromBAM" not in globals() or not fromBAM): if pairedEnd: if trim and fastqc: infiles.append( expand("FastQC_trimmed/{sample}{read}_fastqc.html", sample = samples, read = reads) ) @@ -27,8 +27,8 @@ def multiqc_input_check(return_value): elif fastqc: infiles.append( expand("FastQC/{sample}"+reads[0]+"_fastqc.html", sample = samples) ) indir +=" FastQC " - if pipeline=="dna-mapping": - # pipeline is DNA-mapping + if pipeline=="dnamapping": + # pipeline is DNAmapping if aligner=="Bowtie2": infiles.append("deepTools_qc/bamPEFragmentSize/fragmentSize.metric.tsv") infiles.append(expand("Bowtie2/{sample}.Bowtie2_summary.txt", sample = samples) + @@ -64,7 +64,7 @@ def multiqc_input_check(return_value): infiles.append( expand("allelic_bams/{sample}.filtered.SNPsplit_report.yaml", sample = samples) ) infiles.append( expand("allelic_bams/{sample}.filtered.SNPsplit_sort.yaml", sample = samples) ) indir += "allelic_bams" - elif pipeline=="rna-seq": + elif pipeline=="rnaseq": # must be RNA-mapping, add files as per the mode if ( "alignment" in mode or "deepTools_qc" in mode or "three-prime-seq" in mode ) and not "allelic-mapping" in mode and not "allelic-counting" in mode: infiles.append( expand(aligner+"/{sample}.bam", sample = samples) + @@ -88,14 +88,14 @@ def multiqc_input_check(return_value): else: infiles.append( expand("Salmon/{sample}/quant.sf", sample = samples) ) indir += " Salmon " - elif pipeline == "noncoding-rna-seq": + elif pipeline == "ncRNAseq": infiles.append(expand("deepTools_qc/estimateReadFiltering/{sample}_filtering_estimation.txt",sample=samples)) indir += " STAR deepTools_qc " elif pipeline == "hic": infiles.append(expand("HiC_matrices/QCplots/{sample}_QC/QC.log", sample = samples)) indir += " " + aligner + " " indir += " ".join(expand("HiC_matrices/QCplots/{sample}_QC ", sample = samples)) - elif pipeline == "scrna-seq": + elif pipeline == "scrnaseq": if trim: infiles.append( expand("FastQC_trimmed/{sample}"+reads[0]+"_fastqc.html", sample = samples) ) indir += " FastQC_trimmed " @@ -143,9 +143,6 @@ rule multiQC: output: "multiQC/multiqc_report.html" params: indirs = multiqc_input_check(return_value = "indir") - log: - out = "multiQC/multiQC.out", - err = "multiQC/multiQC.err" conda: CONDA_SHARED_ENV shell: - "multiqc -o multiQC -f {params.indirs} > {log.out} 2> {log.err}" + "multiqc -o multiQC -f {params.indirs}" diff --git a/snakePipes/shared/rules/nearestGene.multiComp.snakefile b/snakePipes/shared/rules/nearestGene.multiComp.snakefile index d925180ae..a9329336e 100644 --- a/snakePipes/shared/rules/nearestGene.multiComp.snakefile +++ b/snakePipes/shared/rules/nearestGene.multiComp.snakefile @@ -1,20 +1,18 @@ sample_name = os.path.splitext(os.path.basename(sampleSheet))[0] -if pipeline in ['chip-seq','ATAC-seq']: +if pipeline in ['chipseq','ATACseq']: change_direction = ["UP","DOWN","MIXED"] rule get_nearest_transcript: input: - bed="CSAW_{}_{}".format(peakCaller, sample_name + ".{compGroup}")+"/Filtered.results.{change_dir}.bed" if pipeline in ['chip-seq','ATAC-seq'] else "" + bed="CSAW_{}_{}".format(peakCaller, sample_name + ".{compGroup}")+"/Filtered.results.{change_dir}.bed" if pipeline in ['chipseq','ATACseq'] else "" output: annotated_bed=temp("AnnotatedResults_{}_{}".format(peakCaller, sample_name + ".{compGroup}")+"/Filtered.results.{change_dir}_withNearestTranscript.bed") params: genes_bed=genes_bed, - field_offset=lambda wildcards: "18" if pipeline in ['chip-seq','ATAC-seq'] else "" - log: - err= "AnnotatedResults_{}_{}".format(peakCaller, sample_name + ".{compGroup}")+"/logs/bedtools_closest.{change_dir}.err", + field_offset=lambda wildcards: "18" if pipeline in ['chipseq','ATACseq'] else "" conda: CONDA_RNASEQ_ENV shell: """ - if [ -r {input.bed} ]; then bedtools closest -D b -a <( bedtools sort -i {input.bed} ) -b <( bedtools sort -i {params.genes_bed} ) | cut -f1-{params.field_offset},$(( {params.field_offset} + 1 ))-$(( {params.field_offset} + 4 )),$(( {params.field_offset} + 6 )),$(( {params.field_offset} + 13 )) > {output.annotated_bed};fi 2> {log.err} + if [ -r {input.bed} ]; then bedtools closest -D b -a <( bedtools sort -i {input.bed} ) -b <( bedtools sort -i {params.genes_bed} ) | cut -f1-{params.field_offset},$(( {params.field_offset} + 1 ))-$(( {params.field_offset} + 4 )),$(( {params.field_offset} + 6 )),$(( {params.field_offset} + 13 )) > {output.annotated_bed};fi """ rule get_nearest_gene: @@ -27,8 +25,5 @@ rule get_nearest_gene: params: pipeline=pipeline, wdir="AnnotatedResults_{}_{}".format(peakCaller, sample_name + ".{compGroup}") - log: - err="AnnotatedResults_{}_{}".format(peakCaller, sample_name + ".{compGroup}")+"/logs/nearestGene.{change_dir}.err", - out="AnnotatedResults_{}_{}".format(peakCaller, sample_name + ".{compGroup}")+"/logs/nearestGene.{change_dir}.out" conda: CONDA_RNASEQ_ENV script: "../rscripts/nearestGene.R" diff --git a/snakePipes/shared/rules/nearestGene.singleComp.snakefile b/snakePipes/shared/rules/nearestGene.singleComp.snakefile index 7bacd0de4..5b8544def 100644 --- a/snakePipes/shared/rules/nearestGene.singleComp.snakefile +++ b/snakePipes/shared/rules/nearestGene.singleComp.snakefile @@ -1,19 +1,17 @@ sample_name = os.path.splitext(os.path.basename(sampleSheet))[0] -if pipeline in ['chip-seq','ATAC-seq']: +if pipeline in ['chipseq','ATACseq']: change_direction = ["UP","DOWN","MIXED"] rule get_nearest_transcript: input: - bed="CSAW_{}_{}".format(peakCaller, sample_name)+"/Filtered.results.{change_dir}.bed" if pipeline in ['chip-seq','ATAC-seq'] else "" + bed="CSAW_{}_{}".format(peakCaller, sample_name)+"/Filtered.results.{change_dir}.bed" if pipeline in ['chipseq','ATACseq'] else "" output: annotated_bed=temp("AnnotatedResults_{}_{}".format(peakCaller, sample_name)+"/Filtered.results.{change_dir}_withNearestTranscript.bed") params: genes_bed=genes_bed, - field_offset=lambda wildcards: "18" if pipeline in ['chip-seq','ATAC-seq'] else "" - log: - err= "AnnotatedResults_{}_{}".format(peakCaller, sample_name)+"/logs/bedtools_closest.{change_dir}.err", + field_offset=lambda wildcards: "18" if pipeline in ['chipseq','ATACseq'] else "" conda: CONDA_RNASEQ_ENV shell: """ - if [ -r {input.bed} ]; then bedtools closest -D b -a <( bedtools sort -i {input.bed} ) -b <( bedtools sort -i {params.genes_bed} ) | cut -f1-{params.field_offset},$(( {params.field_offset} + 1 ))-$(( {params.field_offset} + 4 )),$(( {params.field_offset} + 6 )),$(( {params.field_offset} + 13 )) > {output.annotated_bed};fi 2> {log.err} + if [ -r {input.bed} ]; then bedtools closest -D b -a <( bedtools sort -i {input.bed} ) -b <( bedtools sort -i {params.genes_bed} ) | cut -f1-{params.field_offset},$(( {params.field_offset} + 1 ))-$(( {params.field_offset} + 4 )),$(( {params.field_offset} + 6 )),$(( {params.field_offset} + 13 )) > {output.annotated_bed};fi """ rule get_nearest_gene: @@ -26,8 +24,5 @@ rule get_nearest_gene: params: pipeline=pipeline, wdir="AnnotatedResults_{}_{}".format(peakCaller, sample_name) - log: - err="AnnotatedResults_{}_{}".format(peakCaller, sample_name)+"/logs/nearestGene.{change_dir}.err", - out="AnnotatedResults_{}_{}".format(peakCaller, sample_name)+"/logs/nearestGene.{change_dir}.out" conda: CONDA_RNASEQ_ENV script: "../rscripts/nearestGene.R" diff --git a/snakePipes/shared/rules/pairtools.snakefile b/snakePipes/shared/rules/pairtools.snakefile new file mode 100644 index 000000000..95bb6c674 --- /dev/null +++ b/snakePipes/shared/rules/pairtools.snakefile @@ -0,0 +1,199 @@ +# based on https://github.com/caballero/snakemake-pairtools-phased/tree/df410ff + + +# Define function that returns pair files (phased or unphased), based on the reference. +def ret_pair(wildcards): + if "diploid_genome" in wildcards.ref: + # Phased setting + return f"pairs/{wildcards.sample}.{wildcards.ref}_phased.pairs.gz" + else: + return f"pairs/{wildcards.sample}.{wildcards.ref}.pairs.gz" + + +# different to bwa.snakefile +# here we skip the expensive sorting with samtools after bwa mem +# consider making this optional in bwa.snakefile +rule bwa_mapping: + input: + fq1="FASTQ_fastp/{sample}_R1.fastq.gz", + fq2="FASTQ_fastp/{sample}_R2.fastq.gz", + ix="genome/{ref}.fa.gz.bwt", + output: + bam="bam/{sample}.{ref}.bam", + threads: 30 + params: + bwathreads=config["alignerThreads"], + bwaparams=config["alignerOptions"], + fna=lambda wildcards, input: Path(input.ix).with_suffix(""), + resources: + mem_mb=3000, + benchmark: + "bam/.benchmark/bwa_mapping.{sample}.{ref}.benchmark" + conda: + CONDA_MAKEPAIRS_ENV + shell: + """ + bwa mem \ + {params.bwaparams} \ + -t {params.bwathreads} \ + {params.fna} \ + {input.fq1} \ + {input.fq2} \ + | samtools view -@ 8 -b \ + > {output.bam} + """ + + +rule pairtools_parse: + input: + bam="bam/{sample}.{ref}.bam", + chr_sizes="genome/{ref}.chromsizes", + output: + pairs="pairs/{sample}.{ref}.pairs.gz", + params: + minmapq=40, + cols=lambda wildcards: ( + "--add-columns XB,AS,XS" if "diploid_genome" in wildcards.ref else "" + ), + threads: 12 + benchmark: + "pairs/.benchmark/pairtools_parse.{sample}.{ref}.benchmark" + conda: + CONDA_MAKEPAIRS_ENV + shell: + """ + pairtools parse \ + --min-mapq {params.minmapq} \ + {params.cols} \ + --drop-sam \ + --walks-policy 5unique \ + -c {input.chr_sizes} \ + {input.bam} \ + -o {output.pairs} + """ + + +rule pairtools_phase: + input: + pairs="pairs/{sample}.diploid_genome.pairs.gz", + output: + pairs="pairs/{sample}.diploid_genome_phased.pairs.gz", + params: + hap1=strains[0], + hap2=strains[1], + threads: 12 + benchmark: + "pairs/.benchmark/pairtools_phase.{sample}.benchmark" + conda: + CONDA_MAKEPAIRS_ENV + shell: + """ + pairtools phase \ + --phase-suffixes _{params.hap1} _{params.hap2} \ + --tag-mode XB \ + --clean-output \ + {input.pairs} -o {output.pairs} + """ + + +rule pairtools_sort: + input: + ret_pair, + output: + pairs="pairs/{sample}.{ref}.pairs.sorted.gz", + threads: 20 + benchmark: + "pairs/.benchmark/pairtools_sort.{sample}.{ref}.benchmark" + conda: + CONDA_MAKEPAIRS_ENV + shell: + """ + pairtools sort \ + {input} \ + -o {output.pairs} \ + --memory 20G + """ + + +rule pairtools_dedup: + input: + pairs="pairs/{sample}.{ref}.pairs.sorted.gz", + output: + pairs="pairs/{sample}.{ref}.pairs.dedup.gz", + stats="pairs/{sample}.{ref}.pairs.dedup.stats", + params: + extra_cols=lambda wildcards: ( + "--extra-col-pair phase1 phase2" if "diploid" in wildcards.ref else "" + ), + threads: 12 + benchmark: + "pairs/.benchmark/pairtools_dedup.{sample}.{ref}.benchmark" + conda: + CONDA_MAKEPAIRS_ENV + shell: + """ + pairtools dedup \ + --mark-dups \ + {params.extra_cols} \ + --output-dups - \ + --output-unmapped - \ + --output-stats {output.stats} \ + -o {output.pairs} \ + {input.pairs} + """ + + +rule pairtools_filter_phased: + input: + pairs="pairs/{sample}.diploid_genome.pairs.dedup.gz", + output: + stats="phase_stats/{sample}.diploid_genome_{phasetype}.pairs.stats", + pairs="phase_stats/{sample}.diploid_genome_{phasetype}.pairs.gz", + params: + filterparam=lambda wildcards: PHASEDIC[wildcards.phasetype], + resources: + mem_mb=1000, + benchmark: + "phase_stats/.benchmark/pairtools_filter_phased.{sample}.diploid_genome_{phasetype}.benchmark" + threads: 12 + conda: + CONDA_MAKEPAIRS_ENV + shell: + """ + pairtools select \ + '{params.filterparam}' \ + {input.pairs} \ + -o {output.pairs} + pairtools stats {output.pairs} -o {output.stats} + """ + + +rule multiqc: + input: + stats=expand( + "pairs/{sample}.{ref}.pairs.dedup.stats", sample=samples, ref=REFERENCES + ), + phasedstats=expand( + "phase_stats/{sample}.diploid_genome_{phasetype}.pairs.stats", + sample=samples, + phasetype=PHASEDIC.keys(), + ), + output: + html="multiqc/multiqc_report.html", + params: + odir="multiqc", + benchmark: + "multiqc/.benchmark/multiqc.benchmark" + threads: 1 + conda: + CONDA_MAKEPAIRS_ENV + shell: + """ + echo input: {input.phasedstats} + multiqc \ + --module pairtools \ + --module fastqc \ + --module fastp \ + -o {params.odir} \ + . + """ diff --git a/snakePipes/shared/rules/preprocessing.snakefile b/snakePipes/shared/rules/preprocessing.snakefile index 57ae3243d..621ed561d 100755 --- a/snakePipes/shared/rules/preprocessing.snakefile +++ b/snakePipes/shared/rules/preprocessing.snakefile @@ -11,10 +11,9 @@ if sampleSheet: output: r1="mergedFASTQ/{sample}" + reads[0] + ext, r2="mergedFASTQ/{sample}" + reads[1] + ext - log: "mergedFASTQ/logs/{sample}.mergeFastq.log" shell: """ - cat {input.r1} > {output.r1} 2> {log} - cat {input.r2} > {output.r2} 2>> {log} + cat {input.r1} > {output.r1} + cat {input.r2} > {output.r2} """ else: rule mergeFastq: @@ -22,9 +21,8 @@ if sampleSheet: r1=lambda wildcards: expand(initialIndir + "/{sample}", sample=sampleDict[wildcards.sample][0]) output: r1="mergedFASTQ/{sample}" + reads[0] + ext - log: "mergedFASTQ/logs/{sample}.mergeFastq.log" shell: """ - cat {input.r1} > {output.r1} 2> {log} + cat {input.r1} > {output.r1} """ else: if pairedEnd: @@ -68,9 +66,6 @@ if optDedupDist > 0: mem=clumpifyMemory, optDedupDist=optDedupDist, clumpifyOptions=clumpifyOptions - log: - stdout="deduplicatedFASTQ/logs/{sample}.stdout", - stderr="deduplicatedFASTQ/logs/{sample}.stderr" benchmark: "deduplicatedFASTQ/.benchmarks/{sample}" threads: lambda wildcards: 20 if 20 0: in2={input.r2} \ out={output.tempOut} \ dupedist={params.optDedupDist} \ - threads={threads} > {log.stdout} 2> {log.stderr} + threads={threads} splitFastq --pigzThreads 4 --R1 {params.R1} --R2 {params.R2} --extension {params.extension} \ {output.tempOut} \ - deduplicatedFASTQ/{wildcards.sample} > {output.metrics} 2>> {log.stderr} + deduplicatedFASTQ/{wildcards.sample} > {output.metrics} """ else: rule clumpify: @@ -101,9 +96,6 @@ if optDedupDist > 0: mem=clumpifyMemory, optDedupDist=optDedupDist, clumpifyOptions=clumpifyOptions - log: - stdout="deduplicatedFASTQ/logs/{sample}.stdout", - stderr="deduplicatedFASTQ/logs/{sample}.stderr" benchmark: "deduplicatedFASTQ/.benchmarks/{sample}" threads: lambda wildcards: 20 if 20 0: in={input.r1} \ out={output.tempOut} \ dupedist={params.optDedupDist} \ - threads={threads} > {log.stdout} 2> {log.stderr} + threads={threads} splitFastq --SE --pigzThreads 4 --R1 {params.R1} --extension {params.extension} \ {output.tempOut} \ - deduplicatedFASTQ/{wildcards.sample} > {output.metrics} 2>> {log.stderr} + deduplicatedFASTQ/{wildcards.sample} > {output.metrics} """ else: if pairedEnd: diff --git a/snakePipes/shared/rules/rMats.multipleComp.snakefile b/snakePipes/shared/rules/rMats.multipleComp.snakefile index f83d27066..eb1bbf9de 100644 --- a/snakePipes/shared/rules/rMats.multipleComp.snakefile +++ b/snakePipes/shared/rules/rMats.multipleComp.snakefile @@ -57,7 +57,6 @@ rule rMats: end = "paired" if pairedEnd else "single", libType = wrap_libType(libraryType), tempDir = tempDir, - log: "rMats_{}/rMats.log".format(os.path.splitext(os.path.basename(str(sampleSheet)))[0]+".{compGroup}") threads: 4 conda: CONDA_RMATS_ENV shell:""" @@ -65,6 +64,6 @@ rule rMats: MYTEMP=$(mktemp -d ${{TMPDIR:-/tmp}}/snakepipes.XXXXXXXXXX); set +o pipefail; readLen=$(samtools view {params.s1} | awk \'{{print length($10)}}\' | head -10000 | awk \'{{ sum += $1 }} END {{ if (NR > 0) print int(sum / NR) }}\') - rmats.py --gtf {params.gtf} --b1 {input.b1} --b2 {input.b2} --od {params.od} --tmp $MYTEMP -t {params.end} --libType {params.libType} --readLength $readLen --variable-read-length --nthread {threads} --tstat {threads} 2> {log}; + rmats.py --gtf {params.gtf} --b1 {input.b1} --b2 {input.b2} --od {params.od} --tmp $MYTEMP -t {params.end} --libType {params.libType} --readLength $readLen --variable-read-length --nthread {threads} --tstat {threads}; rm -rf $MYTEMP """ diff --git a/snakePipes/shared/rules/rMats.singleComp.snakefile b/snakePipes/shared/rules/rMats.singleComp.snakefile index 07334192d..eb355275a 100644 --- a/snakePipes/shared/rules/rMats.singleComp.snakefile +++ b/snakePipes/shared/rules/rMats.singleComp.snakefile @@ -37,7 +37,6 @@ rule rMats: end = "paired" if pairedEnd else "single", libType = wrap_libType(libraryType), tempDir = tempDir, - log: "{}/rMats.log".format(get_outdir("rMats", sampleSheet)) threads: 4 conda: CONDA_RMATS_ENV shell:""" @@ -45,6 +44,6 @@ rule rMats: MYTEMP=$(mktemp -d ${{TMPDIR:-/tmp}}/snakepipes.XXXXXXXXXX); set +o pipefail; readLen=$(samtools view {params.s1} | awk \'{{print length($10)}}\' | head -10000 | awk \'{{ sum += $1 }} END {{ if (NR > 0) print int(sum / NR) }}\') - rmats.py --gtf {params.gtf} --b1 {input.b1} --b2 {input.b2} --od {params.od} --tmp $MYTEMP -t {params.end} --libType {params.libType} --readLength $readLen --variable-read-length --nthread {threads} --tstat {threads} 2> {log}; + rmats.py --gtf {params.gtf} --b1 {input.b1} --b2 {input.b2} --od {params.od} --tmp $MYTEMP -t {params.end} --libType {params.libType} --readLength $readLen --variable-read-length --nthread {threads} --tstat {threads}; rm -rf $MYTEMP """ diff --git a/snakePipes/shared/rules/rMats.snakefile b/snakePipes/shared/rules/rMats.snakefile index 211322c46..d7dc2d137 100644 --- a/snakePipes/shared/rules/rMats.snakefile +++ b/snakePipes/shared/rules/rMats.snakefile @@ -34,11 +34,10 @@ rule rMats: end = "paired" if pairedEnd else "single", libType = wrap_libType(libraryType), tempDir = tempDir, - log: "{}/rMats.log".format(get_outdir("rMats", sampleSheet)) threads: 1 conda: CONDA_RNASEQ_ENV shell:""" set +o pipefail; readLen=$(samtools view {params.s1} | awk \'{{print length($10)}}\' | head -10000 | awk \'{{ sum += $1 }} END {{ if (NR > 0) print int(sum / NR) }}\') - rmats.py --gtf {params.gtf} --b1 {input.b1} --b2 {input.b2} --od {params.od} --tmp {params.tempDir} -t {params.end} --libType {params.libType} --readLength $readLen --variable-read-length --nthread {threads} --tstat {threads} 2> {log} + rmats.py --gtf {params.gtf} --b1 {input.b1} --b2 {input.b2} --od {params.od} --tmp {params.tempDir} -t {params.end} --libType {params.libType} --readLength $readLen --variable-read-length --nthread {threads} --tstat {threads} """ diff --git a/snakePipes/shared/rules/sambamba.snakefile b/snakePipes/shared/rules/sambamba.snakefile index ff099aa83..31a92cbc2 100644 --- a/snakePipes/shared/rules/sambamba.snakefile +++ b/snakePipes/shared/rules/sambamba.snakefile @@ -9,9 +9,6 @@ rule sambamba_markdup: output: aligner+"/{sample}.bam"# duplicate marked threads: lambda wildcards: 10 if 10 {log.err} > {log.out} + sambamba markdup -t {threads} --sort-buffer-size=6000 --overflow-list-size 600000 --tmpdir $MYTEMP {input} {output} rm -rf "$MYTEMP" """ @@ -29,11 +26,10 @@ rule sambamba_flagstat_sorted: aligner+"/{sample}.sorted.bam" output: "Sambamba/{sample}.sorted.markdup.txt" - log: "Sambamba/logs/{sample}.flagstat_sorted.log" conda: CONDA_SAMBAMBA_ENV threads: lambda wildcards: 10 if 10 {output} 2> {log} + sambamba flagstat -p {input} -t {threads} > {output} """ rule sambamba_flagstat: @@ -41,11 +37,10 @@ rule sambamba_flagstat: aligner+"/{sample}.bam" output: "Sambamba/{sample}.markdup.txt" - log: "Sambamba/logs/{sample}.flagstat.log" conda: CONDA_SAMBAMBA_ENV threads: lambda wildcards: 10 if 10 {output} 2> {log} + sambamba flagstat -p {input} -t {threads} > {output} """ ## index the duplicate marked folder @@ -54,8 +49,6 @@ rule samtools_index: aligner+"/{sample}.bam" output: aligner+"/{sample}.bam.bai" - log: aligner + "/logs/{sample}.index.log" conda: CONDA_SHARED_ENV - shell: "samtools index {input} 2> {log}" - + shell: "samtools index {input}" diff --git a/snakePipes/shared/rules/scRNAseq_Alevin.snakefile b/snakePipes/shared/rules/scRNAseq_Alevin.snakefile index d5f651c43..6dd943865 100755 --- a/snakePipes/shared/rules/scRNAseq_Alevin.snakefile +++ b/snakePipes/shared/rules/scRNAseq_Alevin.snakefile @@ -35,14 +35,11 @@ rule SalmonAlevin: outdir = "Alevin/{sample}" output: quantmat = "Alevin/{sample}/alevin/quants_mat.gz", - log: - out = "Alevin/logs/alevin.{sample}.out", - err = "Alevin/logs/alevin.{sample}.err" #Use RNAseq env because Salmon already installed there (no need for duplication). conda: CONDA_SALMON_ENV threads: 8 shell:""" - salmon alevin -l {params.libtype} -1 {input.R1} -2 {input.R2} {params.protocol} -i {params.index} -p {threads} -o {params.outdir} --tgMap {input.tgMap} --dumpFeatures --dumpMtx --numCellBootstraps 100 > {log.out} 2> {log.err} + salmon alevin -l {params.libtype} -1 {input.R1} -2 {input.R2} {params.protocol} -i {params.index} -p {threads} -o {params.outdir} --tgMap {input.tgMap} --dumpFeatures --dumpMtx --numCellBootstraps 100 """ rule AlevinQC: @@ -77,8 +74,6 @@ rule AlevinQC: # gtf = lambda wildcards,input: os.path.join(outdir, input.gtf), # joint_fasta = lambda wildcards,output: os.path.join(outdir,output.joint_fasta), # joint_t2g = lambda wildcards,output: os.path.join(outdir,output.joint_t2g) -# log: -# out = "Annotation/logs/eisaR.out" # conda: CONDA_eisaR_ENV # script: "../rscripts/scRNAseq_eisaR.R" @@ -96,14 +91,11 @@ rule AlevinQC: # velo_index = "Salmon/SalmonIndex_RNAVelocity/seq.bin" # params: # salmonIndexOptions = salmonIndexOptions -# log: -# err = "Salmon/SalmonIndex_RNAVelocity/logs/SalmonIndex.err", -# out = "Salmon/SalmonIndex_RNAVelocity/logs/SalmonIndex.out" # threads: lambda wildcards: 16 if 16 {output.seq_fa} -# salmon index -p {threads} -t {output.seq_fa} -d {input.decoys} -i Salmon/SalmonIndex_RNAVelocity {params.salmonIndexOptions} > {log.out} 2> {log.err} +# salmon index -p {threads} -t {output.seq_fa} -d {input.decoys} -i Salmon/SalmonIndex_RNAVelocity {params.salmonIndexOptions} # """ rule AlevinForVelocity: @@ -120,14 +112,11 @@ rule AlevinForVelocity: outdir = "AlevinForVelocity/{sample}" output: quantmat = "AlevinForVelocity/{sample}/alevin/quants_mat.gz" - log: - out = "AlevinForVelocity/logs/alevin.{sample}.out", - err = "AlevinForVelocity/logs/alevin.{sample}.err" #Use RNAseq env because Salmon already installed there (no need for duplication). conda: CONDA_SALMON_ENV threads: 8 shell:""" - salmon alevin -l {params.libtype} -1 {input.R1} -2 {input.R2} {params.protocol} -i {params.velo_index} -p {threads} -o {params.outdir} --tgMap {params.tgMap} --dumpFeatures --dumpMtx --numCellBootstraps 100 > {log.out} 2> {log.err} + salmon alevin -l {params.libtype} -1 {input.R1} -2 {input.R2} {params.protocol} -i {params.velo_index} -p {threads} -o {params.outdir} --tgMap {params.tgMap} --dumpFeatures --dumpMtx --numCellBootstraps 100 """ rule velo_to_sce: @@ -144,7 +133,5 @@ rule velo_to_sce: t2g = lambda wildcards,input: os.path.join(outdir, input.t2g), g2s = lambda wildcards,input: os.path.join(outdir, input.g2s), outfile = lambda wildcards,output: os.path.join(outdir, output.merged) - log: - out = "SingleCellExperiment/AlevinForVelocity/logs/alevin2sce.out" conda: CONDA_eisaR_ENV script: "../rscripts/scRNAseq_splitAlevinVelocityMatrices.R" diff --git a/snakePipes/shared/rules/scRNAseq_STARsolo.snakefile b/snakePipes/shared/rules/scRNAseq_STARsolo.snakefile index aff040cb6..9591eeccd 100755 --- a/snakePipes/shared/rules/scRNAseq_STARsolo.snakefile +++ b/snakePipes/shared/rules/scRNAseq_STARsolo.snakefile @@ -17,7 +17,6 @@ rule STARsolo: raw_features = "STARsolo/{sample}/{sample}.Solo.out/Gene/raw/features.tsv", filtered_features = "STARsolo/{sample}/{sample}.Solo.out/Gene/filtered/features.tsv", summary = "STARsolo/{sample}/{sample}.Solo.out/Gene/Summary.csv" - log: "STARsolo/logs/{sample}.log" params: alignerOptions = str(alignerOptions or ''), gtf = outdir+"/Annotation/genes.filtered.gtf", @@ -62,9 +61,9 @@ rule STARsolo: --soloBarcodeReadLength 0 \ --soloCBmatchWLtype Exact \ --soloStrand Forward\ - --soloUMIdedup Exact 2> {log} + --soloUMIdedup Exact - ln -s ../{params.prefix}Aligned.sortedByCoord.out.bam {output.bam} 2>> {log} + ln -s ../{params.prefix}Aligned.sortedByCoord.out.bam {output.bam} rm -rf $MYTEMP """ @@ -77,8 +76,6 @@ rule STARsolo_report: wdir = outdir + "/STARsolo", input = lambda wildcards,input: [ os.path.join(outdir,x) for x in input ], samples = samples - log: - out = "STARsolo/logs/Report.out" conda: CONDA_seurat_ENV script: "../rscripts/scRNAseq_report.R" @@ -90,12 +87,11 @@ rule filter_bam: output: bamfile = "filtered_bam/{sample}.filtered.bam", bami = "filtered_bam/{sample}.filtered.bam.bai" - log: "filtered_bam/logs/{sample}.log" threads: lambda wildcards: 8 if 8 {output.bamfile} 2> {log}; - sambamba index -t {threads} {output.bamfile} 2>> {log} + sambamba view -F "not unmapped and [CB] !=null" -t {threads} -f bam {input.bamfile} > {output.bamfile}; + sambamba index -t {threads} {output.bamfile} """ ##remove this rule as soon as STARsolo output has been fixed by Alex Dobin @@ -106,10 +102,9 @@ rule STARsolo_features_to_V3: output: raw_features = "STARsolo/{sample}/{sample}.Solo.out/Gene/raw/features.v3.tsv", filtered_features = "STARsolo/{sample}/{sample}.Solo.out/Gene/filtered/features.v3.tsv" - log:"STARsolo/logs/{sample}.features_to_v3.log" shell: """ - awk '{{print $1, $2, "."}}' {input.raw_features} | tr " " "\t" > {output.raw_features} 2> {log}; - awk '{{print $1, $2, "."}}' {input.filtered_features} | tr " " "\t" > {output.filtered_features} 2>> {log} + awk '{{print $1, $2, "."}}' {input.raw_features} | tr " " "\t" > {output.raw_features}; + awk '{{print $1, $2, "."}}' {input.filtered_features} | tr " " "\t" > {output.filtered_features} """ @@ -129,14 +124,13 @@ rule gzip_STARsolo_for_seurat: filtered_bc_gz = "STARsolo/{sample}/{sample}.Solo.out/Gene/filtered/barcodes.tsv.gz", raw_features_gz = "STARsolo/{sample}/{sample}.Solo.out/Gene/raw/features.tsv.gz", filtered_features_gz = "STARsolo/{sample}/{sample}.Solo.out/Gene/filtered/features.tsv.gz" - log: "STARsolo/logs/{sample}.gzip.log" shell: """ - gzip -c {params.raw_bc} > {params.raw_bc_gz} 2> {log}; - gzip -c {input.raw_features} > {params.raw_features_gz} 2>> {log}; - gzip -c {params.filtered_bc} > {params.filtered_bc_gz} 2>> {log}; - gzip -c {input.filtered_features} > {params.filtered_features_gz} 2>> {log}; - gzip -c {input.raw_counts} > {output.raw_counts_gz} 2>> {log}; - gzip -c {input.filtered_counts} > {output.filtered_counts_gz} 2>> {log} + gzip -c {params.raw_bc} > {params.raw_bc_gz}; + gzip -c {input.raw_features} > {params.raw_features_gz}; + gzip -c {params.filtered_bc} > {params.filtered_bc_gz}; + gzip -c {input.filtered_features} > {params.filtered_features_gz}; + gzip -c {input.raw_counts} > {output.raw_counts_gz}; + gzip -c {input.filtered_counts} > {output.filtered_counts_gz} """ @@ -149,8 +143,6 @@ rule STARsolo_raw_to_seurat: indirs = expand(outdir + "/STARsolo/{sample}/{sample}.Solo.out/Gene/raw",sample=samples), wdir = outdir + "/Seurat/STARsolo_raw", samples = samples - log: - out = "Seurat/STARsolo_raw/logs/seurat.out" conda: CONDA_seurat_ENV script: "../rscripts/scRNAseq_Seurat3.R" @@ -163,8 +155,6 @@ rule STARsolo_filtered_to_seurat: indirs = expand(outdir +"/STARsolo/{sample}/{sample}.Solo.out/Gene/filtered",sample=samples), wdir = outdir +"/Seurat/STARsolo_filtered", samples = samples - log: - out = "Seurat/STARsolo_filtered/logs/seurat.out" conda: CONDA_seurat_ENV script: "../rscripts/scRNAseq_Seurat3.R" @@ -180,8 +170,6 @@ rule remove_empty_drops: indirs = expand(outdir + "/STARsolo/{sample}/{sample}.Solo.out/Gene/raw",sample=samples), wdir = outdir + "/Seurat/STARsolo_raw_RmEmptyDrops", samples = samples - log: - out = "Seurat/STARsolo_raw_RmEmptyDrops/logs/seurat.out" conda: CONDA_seurat_ENV script: "../rscripts/scRNAseq_EmptyDrops.R" @@ -192,7 +180,6 @@ if not skipVelocyto: bam = "filtered_bam/{sample}.filtered.bam" output: bam = "filtered_bam/cellsorted_{sample}.filtered.bam" - log: "filtered_bam/logs/{sample}.cellsort.log" params: samsort_memory="10G", tempDir = tempDir @@ -201,7 +188,7 @@ if not skipVelocyto: shell: """ TMPDIR={params.tempDir} MYTEMP=$(mktemp -d ${{TMPDIR:-/tmp}}/snakepipes.XXXXXXXXXX) - samtools sort -m {params.samsort_memory} -@ {threads} -T $MYTEMP/{wildcards.sample} -t CB -O bam -o {output.bam} {input.bam} 2> {log} + samtools sort -m {params.samsort_memory} -@ {threads} -T $MYTEMP/{wildcards.sample} -t CB -O bam -o {output.bam} {input.bam} rm -rf $MYTEMP """ @@ -217,7 +204,6 @@ if not skipVelocyto: output: outdir = directory("VelocytoCounts/{sample}"), outdum = "VelocytoCounts/{sample}.done.txt" - log: "VelocytoCounts/logs/{sample}.log" params: tempdir = tempDir conda: CONDA_scRNASEQ_ENV @@ -226,7 +212,7 @@ if not skipVelocyto: export LANG=en_US.utf-8 export TMPDIR={params.tempdir} MYTEMP=$(mktemp -d ${{TMPDIR:-/tmp}}/snakepipes.XXXXXXXXXX); - velocyto run --bcfile {input.bc} --outputfolder {output.outdir} --dtype uint64 {input.bam} {input.gtf} 2> {log}; + velocyto run --bcfile {input.bc} --outputfolder {output.outdir} --dtype uint64 {input.bam} {input.gtf} ; touch {output.outdum}; rm -rf $MYTEMP """ @@ -234,14 +220,13 @@ if not skipVelocyto: rule combine_loom: input: expand("VelocytoCounts/{sample}",sample=samples) output: "VelocytoCounts_merged/merged.loom" - log: "VelocytoCounts_merged/logs/combine_loom.log" conda: CONDA_loompy_ENV params: outfile = outdir+"/VelocytoCounts_merged/merged.loom", script = maindir+"/shared/tools/loompy_merge.py", input_fp = lambda wildcards,input: [ os.path.join(outdir,f) for f in input ] shell: """ - python {params.script} -outf {params.outfile} {params.input_fp} 2> {log} + python {params.script} -outf {params.outfile} {params.input_fp} """ #rule velocity_to_seurat: @@ -252,7 +237,5 @@ if not skipVelocyto: # params: # wdir = outdir + "/Seurat/Velocyto", # samples = samples - # log: - # out = "Seurat/Velocyto/logs/seurat.out" # conda: CONDA_seurat3_ENV # script: "../rscripts/scRNAseq_merge_loom.R" diff --git a/snakePipes/shared/rules/sleuth.multiComp.snakefile b/snakePipes/shared/rules/sleuth.multiComp.snakefile index 3f1b9c7cb..6ad575c22 100644 --- a/snakePipes/shared/rules/sleuth.multiComp.snakefile +++ b/snakePipes/shared/rules/sleuth.multiComp.snakefile @@ -19,14 +19,10 @@ rule sleuth_Salmon: sampleSheet = lambda wildcards,input: os.path.join(outdir,str(input.sampleSheet)), fdr = 0.05 threads: 6 - log: - out = "sleuth_Salmon_{}/logs/sleuth.out".format(os.path.splitext(os.path.basename(str(sampleSheet)))[0]+".{compGroup}"), - err = "sleuth_Salmon_{}/logs/sleuth.err".format(os.path.splitext(os.path.basename(str(sampleSheet)))[0]+".{compGroup}") conda: CONDA_SLEUTH_ENV shell: "Rscript {params.script} " "{params.sampleSheet} " "{params.indir} " "{params.outdir} " - "{params.fdr} " + os.path.join(outdir,"{input.t2g}") + - " >{log.out} 2>{log.err}" + "{params.fdr} " + os.path.join(outdir,"{input.t2g}") diff --git a/snakePipes/shared/rules/sleuth.singleComp.snakefile b/snakePipes/shared/rules/sleuth.singleComp.snakefile index 8b4c53644..f38cd738a 100644 --- a/snakePipes/shared/rules/sleuth.singleComp.snakefile +++ b/snakePipes/shared/rules/sleuth.singleComp.snakefile @@ -17,17 +17,13 @@ rule sleuth_Salmon: outdir = os.path.join(outdir,"sleuth_Salmon_{}".format(sample_name)), fdr = 0.05 threads: 6 - log: - out = "sleuth_Salmon_{}/logs/sleuth.out".format(sample_name), - err = "sleuth_Salmon_{}/logs/sleuth.err".format(sample_name) conda: CONDA_SLEUTH_ENV shell: "Rscript {params.script} " "{input.sampleSheet} " "{params.indir} " "{params.outdir} " - "{params.fdr} " + os.path.join(outdir,"{input.t2g}") + - " >{log.out} 2>{log.err}" + "{params.fdr} " + os.path.join(outdir,"{input.t2g}") rule sleuth_SalmonAllelic: input: @@ -44,14 +40,10 @@ rule sleuth_SalmonAllelic: outdir = os.path.join(outdir,"sleuth_SalmonAllelic_{}".format(sample_name)), fdr = 0.05 threads: 6 - log: - out = "sleuth_SalmonAllelic_{}/logs/sleuth.out".format(sample_name), - err = "sleuth_SalmonAllelic_{}/logs/sleuth.err".format(sample_name) conda: CONDA_SLEUTH_ENV shell: "Rscript {params.script} " "{input.sampleSheet} " "{params.indir} " "{params.outdir} " - "{params.fdr} " + os.path.join(outdir,"{input.t2g}") + - " >{log.out} 2>{log.err}" + "{params.fdr} " + os.path.join(outdir,"{input.t2g}") diff --git a/snakePipes/shared/rules/split_bam_ops_ChIP_spikein.snakefile b/snakePipes/shared/rules/split_bam_ops_ChIP_spikein.snakefile index 4f783f961..7f3e33656 100755 --- a/snakePipes/shared/rules/split_bam_ops_ChIP_spikein.snakefile +++ b/snakePipes/shared/rules/split_bam_ops_ChIP_spikein.snakefile @@ -28,12 +28,11 @@ rule split_bamfiles_by_genome: bai = "split_bam/{sample}_{part}.bam.bai" params: region = lambda wildcards: region_dict[wildcards.part] - log: "split_bam/logs/{sample}_{part}.log" conda: CONDA_SAMBAMBA_ENV threads: 4 shell: """ - sambamba slice -o {output.bam} {input.bam} {params.region} 2> {log}; - sambamba index -t {threads} {output.bam} 2>> {log} + sambamba slice -o {output.bam} {input.bam} {params.region}; + sambamba index -t {threads} {output.bam} """ rule multiBamSummary_input: @@ -51,9 +50,6 @@ rule multiBamSummary_input: scaling_factors = "--scalingFactors split_deepTools_qc/multiBamSummary/{part}.input.scaling_factors.txt", binSize = lambda wildcards: " --binSize "+str(spikein_bin_size) if wildcards.part=="spikein" else "", spikein_region = lambda wildcards: " --region "+spikein_region if ((wildcards.part=="spikein") and (spikein_region != "")) else "" - log: - out = "split_deepTools_qc/logs/{part}.input_multiBamSummary.out", - err = "split_deepTools_qc/logs/{part}.input_multiBamSummary.err" benchmark: "split_deepTools_qc/.benchmark/{part}.input_multiBamSummary.benchmark" threads: lambda wildcards: 24 if 24 {output} 2> {log} """ @@ -139,9 +127,6 @@ rule bamCoverage_by_part: blacklist = "--blackListFileName {}".format(blacklist_bed) if blacklist_bed else "", scaling_factors = lambda wildcards,input: "--scaleFactor {}".format(get_scaling_factor(wildcards.sample,input.scale_factors)) ## subset for the one factor needed - log: - out = "bamCoverage/logs/bamCoverage.{sample}.BY{part}.filtered.out", - err = "bamCoverage/logs/bamCoverage.{sample}.BY{part}.filtered.err" benchmark: "bamCoverage/.benchmark/bamCoverage.{sample}.BY{part}.filtered.benchmark" threads: lambda wildcards: 16 if 16 {log.err} > {log.out} + TEcount --format BAM --mode multi -b {input.bam} --GTF {params.gtf} --TE {input.repeatGTF} --project TEcount/{wildcards.sample} """ @@ -119,7 +116,6 @@ rule sortBams: aligner + "/{sample}.unsorted.bam" output: "filtered_bam/{sample}.filtered.bam" - log: "filtered_bam/logs/{sample}.sort.log" threads: 5 params: tempDir = tempDir @@ -127,7 +123,7 @@ rule sortBams: shell: """ TMPDIR={params.tempDir} MYTEMP=$(mktemp -d ${{TMPDIR:-/tmp}}/snakepipes.XXXXXXXXXX); - samtools view -u -F 2304 {input} | samtools sort -@ 4 -m 2G -T $MYTEMP/{wildcards.sample} -o {output} 2> {log} + samtools view -u -F 2304 {input} | samtools sort -@ 4 -m 2G -T $MYTEMP/{wildcards.sample} -o {output} rm -rf $MYTEMP """ if fromBAM: @@ -136,9 +132,8 @@ if fromBAM: "filtered_bam/{sample}.filtered.bam" output: "filtered_bam/{sample}.filtered.bam.bai" - log: "filtered_bam/logs/{sample}.index.log" conda: CONDA_SHARED_ENV - shell: "samtools index {input} 2> {log}" + shell: "samtools index {input}" rule cpGTF: diff --git a/snakePipes/shared/rules/three_prime_seq.snakefile b/snakePipes/shared/rules/three_prime_seq.snakefile index 181a981f4..aa10dd90f 100644 --- a/snakePipes/shared/rules/three_prime_seq.snakefile +++ b/snakePipes/shared/rules/three_prime_seq.snakefile @@ -1,11 +1,8 @@ -# adapted from Andrew Rezansoff's 3' seq pipeline tools -# /data/hilgers/group/rezansoff/3seq_pipeline_tools/ - from pathlib import Path import pandas as pd # prevent wildcards from picking up directories -wildcard_constraints: sample="[^(\/)]+" +wildcard_constraints: sample="[^(/)]+" # read in sampleSheet metadata to merge replicates for preprocess_cluster_pas sample_metadata = pd.read_table(sampleSheet, index_col=None) @@ -26,31 +23,8 @@ def get_outdir(folder_name,sampleSheet): sample_name = os.path.splitext(os.path.basename(str(sampleSheet)))[0] return("{}_{}".format(folder_name, sample_name)) - -# TODO: -# 3. check why cmatrix_filtered is commented out - OK -# 5. re-implement clusterPAS? -# 6. implement filtering of column 4 of the geneAssociation output - things with multiple hits (i.e. commas) should be filtered out [x] -# possibly assign cluster to "next" gene annotation (i.e. the closest?) -# this would be in signal2gene.py -# 7. assign unique IDs with _0, _1 to 4th column in clusterPAS output in 5'->3' order to create unique -# example is /data/hilgers/group2/rezansoff/sakshiProj/2507_3seq/polyA_annotation_polysome2K/AllSamples_clustered_strict1_fromSortedNumeric.txt -> -# /data/hilgers/group2/rezansoff/sakshiProj/2507_3seq/polyA_annotation_polysome2K/AllSamples_clustered_strict1_fromSortedNumeric_uniqIDs.txt -# this will be run by countReadEnds -# then possibly DESeq on countReadEnds output -# understand cmatrix steps? - tools_dir = Path(maindir) / "shared" / "tools" -# trimming done using STAR and fastp - -# rule clusterPAS: -# input: -# "three_prime_seq/SampleAll.txt" -# conda: -# CONDA_SHARED_ENV - - rule polyAT: input: two_bit=genome_2bit, @@ -112,17 +86,12 @@ rule filterBW: bed=filterbw_which_bed output: "three_prime_seq/filtered/{sample}_direction-{direction}.bw" - log: - stdout="three_prime_seq/logs/{sample}_{direction}.stdout", - stderr="three_prime_seq/logs/{sample}_{direction}.stderr" params: script=(tools_dir / "three_prime_seq" / "filterBW.py") conda: CONDA_SHARED_ENV shell: "{params.script} {input} {output} " - "> {log.stdout} " - "2> {log.stderr} " # Associate signal with each gene (flank by some amount) @@ -219,7 +188,7 @@ rule count_read_ends: output: counts="three_prime_seq/{sample}_uniqcounts.txt" wildcard_constraints: - sample="[^\/]+" # no / + sample="[^/]+" conda: CONDA_SHARED_ENV params: diff --git a/snakePipes/shared/rules/trimming.snakefile b/snakePipes/shared/rules/trimming.snakefile index 2e3b44475..626cf6323 100755 --- a/snakePipes/shared/rules/trimming.snakefile +++ b/snakePipes/shared/rules/trimming.snakefile @@ -9,16 +9,13 @@ if pairedEnd: r2 = "FASTQ_Cutadapt/{sample}"+reads[1]+".fastq.gz" params: opts = lambda wildcards: str(trimmerOptions or '') - log: - out = "FASTQ_Cutadapt/logs/Cutadapt.{sample}.out", - err = "FASTQ_Cutadapt/logs/Cutadapt.{sample}.err" benchmark: "FASTQ_Cutadapt/.benchmark/Cutadapt.{sample}.benchmark" threads: lambda wildcards: 8 if 8 {log.out} 2> {log.err} + cutadapt -j {threads} -e 0.1 -q 16 -O 3 --trim-n --minimum-length 25 -a CTGTCTCTTATACACATCT -A CTGTCTCTTATACACATCT {params.opts} \ + -o "{output.r1}" -p "{output.r2}" "{input.r1}" "{input.r2}" """ else: rule cutadapt: @@ -28,16 +25,13 @@ else: "FASTQ_Cutadapt/{sample}"+reads[0]+".fastq.gz", params: opts = lambda wildcards: str(trimmerOptions or '') - log: - out = "FASTQ_Cutadapt/logs/Cutadapt.{sample}.out", - err = "FASTQ_Cutadapt/logs/Cutadapt.{sample}.err" benchmark: "FASTQ_Cutadapt/.benchmark/Cutadapt.{sample}.benchmark" threads: lambda wildcards: 8 if 8 {log.out} 2> {log.err} + cutadapt -j {threads} -e 0.1 -q 16 -O 3 --trim-n --minimum-length 25 -a CTGTCTCTTATACACATCT {params.opts} \ + -o "{output}" "{input.r1}" """ @@ -55,15 +49,12 @@ if pairedEnd: "FASTQ_fastp/{sample}fastp.html" params: opts = lambda wildcards: str(trimmerOptions or '') - log: - out = "FASTQ_fastp/logs/fastp.{sample}.out", - err = "FASTQ_fastp/logs/fastp.{sample}.err" benchmark: "FASTQ_fastp/.benchmark/fastp.{sample}.benchmark" threads: lambda wildcards: 8 if 8 {log.out} 2> {log.err} + fastp -w {threads} -i "{input[0]}" -I "{input[1]}" -o "{output[0]}" -O "{output[1]}" -j "{output[2]}" -h "{output[3]}" {params.opts} """ else: rule fastp: @@ -75,15 +66,12 @@ else: "FASTQ_fastp/{sample}fastp.html" params: opts = lambda wildcards: str(trimmerOptions or '') - log: - out = "FASTQ_fastp/logs/fastp.{sample}.out", - err = "FASTQ_fastp/logs/fastp.{sample}.err" benchmark: "FASTQ_fastp/.benchmark/fastp.{sample}.benchmark" threads: lambda wildcards: 8 if 8 {log.out} 2> {log.err} + fastp -w {threads} -i "{input[0]}" -o "{output[0]}" -j "{output[1]}" -h "{output[2]}" {params.opts} """ @@ -101,14 +89,11 @@ if pairedEnd: tmp1 = "FASTQ_TrimGalore/{sample}"+reads[0]+"_val_1.fq.gz", tmp2 = "FASTQ_TrimGalore/{sample}"+reads[1]+"_val_2.fq.gz", opts = lambda wildcards: str(trimmerOptions or '') - log: - out = "FASTQ_TrimGalore/logs/TrimGalore.{sample}.out", - err = "FASTQ_TrimGalore/logs/TrimGalore.{sample}.err" benchmark: "FASTQ_TrimGalore/.benchmark/TrimGalore.{sample}.benchmark" conda: CONDA_SHARED_ENV shell: """ - trim_galore --output_dir FASTQ_TrimGalore --paired --stringency 3 {params.opts} "{input.r1}" "{input.r2}" > {log.out} 2> {log.err} + trim_galore --output_dir FASTQ_TrimGalore --paired --stringency 3 {params.opts} "{input.r1}" "{input.r2}" mv "{params.tmp1}" "{output.r1}" mv "{params.tmp2}" "{output.r2}" """ @@ -121,9 +106,6 @@ else: params: tmp = "FASTQ_TrimGalore/{sample}" + reads[0] + "_trimmed.fq.gz", opts = lambda wildcards: str(trimmerOptions or '') - log: - out = "FASTQ_TrimGalore/logs/TrimGalore.{sample}.out", - err = "FASTQ_TrimGalore/logs/TrimGalore.{sample}.err" benchmark: "FASTQ_TrimGalore/.benchmark/TrimGalore.{sample}.benchmark" conda: CONDA_SHARED_ENV @@ -141,15 +123,12 @@ if pairedEnd: fastq_dir+"/{sample}{read}.fastq.gz" output: "FastQC_trimmed/{sample}{read}_fastqc.html" - log: - out = "FastQC_trimmed/logs/FastQC_trimmed.{sample}{read}.out", - err = "FastQC_trimmed/logs/FastQC_trimmed.{sample}{read}.err" benchmark: "FastQC_trimmed/.benchmark/FastQC_trimmed.{sample}{read}.benchmark" threads: lambda wildcards: 2 if 2 {log.out} 2> {log.err} + fastqc -o FastQC_trimmed "{input}" """ else: rule FastQC_on_trimmed_SE: @@ -157,13 +136,10 @@ else: fastq_dir+"/{sample}"+reads[0]+".fastq.gz" output: "FastQC_trimmed/{sample}"+reads[0]+"_fastqc.html" - log: - out = "FastQC_trimmed/logs/FastQC_trimmed.{sample}"+reads[0]+".out", - err = "FastQC_trimmed/logs/FastQC_trimmed.{sample}"+reads[0]+".err" benchmark: "FastQC_trimmed/.benchmark/FastQC_trimmed.{sample}"+reads[0]+".benchmark" threads: lambda wildcards: 2 if 2 {log.out} 2> {log.err} + fastqc -o FastQC_trimmed "{input}" """ diff --git a/snakePipes/shared/rules/umi_tools.snakefile b/snakePipes/shared/rules/umi_tools.snakefile index 1d282001a..152ea6dec 100644 --- a/snakePipes/shared/rules/umi_tools.snakefile +++ b/snakePipes/shared/rules/umi_tools.snakefile @@ -11,15 +11,12 @@ if UMIBarcode: r2 = "FASTQ/{sample}"+reads[1]+".fastq.gz" params: bcpattern = str(bcPattern) - log: - out = "FASTQ/logs/{sample}_log.out", - err = "FASTQ/logs/{sample}_log.err" conda: CONDA_SHARED_ENV shell:""" umi_tools extract -I {input.r1} --read2-in={input.r2} \ --bc-pattern={params.bcpattern} --bc-pattern2={params.bcpattern}\ --stdout={output.r1} \ - --read2-out={output.r2} -L {log.out} -E {log.err} + --read2-out={output.r2} """ else: @@ -30,13 +27,10 @@ if UMIBarcode: r1 = "FASTQ/{sample}"+reads[0]+".fastq.gz", params: bcpattern = str(bcPattern) - log: - out = "FASTQ/logs/{sample}_log.out", - err = "FASTQ/logs/{sample}_log.err" conda: CONDA_SHARED_ENV shell: """ umi_tools extract -I {input.r1} --stdout={output.r1} \ - --bc-pattern={params.bcpattern} -L {log.out} -E {log.err} + --bc-pattern={params.bcpattern} """ else: @@ -49,7 +43,7 @@ else: ln -s ../{input} {output} """ - if pairedEnd or pipeline=="scrna-seq": + if pairedEnd or pipeline=="scrnaseq": rule FASTQ2: input: "originalFASTQ/downsample_{sample}"+reads[1]+".fastq.gz" if downsample else "originalFASTQ/{sample}"+reads[1]+".fastq.gz" @@ -59,7 +53,7 @@ else: ln -s ../{input} {output} """ -#If DNA-mapping: +#If DNAmapping: if UMIDedup: rule filter_reads_umi: input: @@ -71,13 +65,10 @@ if UMIDedup: umitools_options = str(UMIDedupOpts or ''), umitools_paired = "--paired " if pairedEnd else " ", umi_sep = str(UMIDedupSep), - log: - out = "filtered_bam/logs/umi_dedup.{sample}.out", - err = "filtered_bam/logs/umi_dedup.{sample}.err" conda: CONDA_SHARED_ENV shell: """ umi_tools dedup -I {input.bamfile} \ - -S {output.bamfile} -L {log.out} -E {log.err} \ + -S {output.bamfile} \ --umi-separator {params.umi_sep} \ {params.umitools_paired} {params.umitools_options} """ @@ -108,6 +99,5 @@ if not (aligner=="bwameth" or aligner=="bwameth2"): "filtered_bam/{sample}.filtered.bam" output: "filtered_bam/{sample}.filtered.bam.bai" - log: "filtered_bam/logs/{sample}.index.log" conda: CONDA_SHARED_ENV - shell: "samtools index {input} 2> {log}" + shell: "samtools index {input}" diff --git a/snakePipes/shared/tools/TSS_to_windows.py b/snakePipes/shared/tools/TSS_to_windows.py index 337f095c4..4eddaa43c 100644 --- a/snakePipes/shared/tools/TSS_to_windows.py +++ b/snakePipes/shared/tools/TSS_to_windows.py @@ -16,10 +16,10 @@ size = args.size bam = pysam.AlignmentFile(bamf) -chroms_sizes = dict(zip(bam.references, bam.lengths)) +chroms_sizes = dict(zip(bam.references, bam.lengths, strict=False)) with open(inf) as f, open(outf, 'w') as of: - for idx, line in enumerate(f): + for line in f: linesplit = line.split('\t') chr = linesplit[0] if chr in chroms_sizes.keys(): diff --git a/snakePipes/shared/tools/correct_sc_counts.py b/snakePipes/shared/tools/correct_sc_counts.py index 714c8dab6..cc809a215 100755 --- a/snakePipes/shared/tools/correct_sc_counts.py +++ b/snakePipes/shared/tools/correct_sc_counts.py @@ -35,7 +35,7 @@ UMICounts[cols[0]] = [0] * (len(cols) - 2) # Add the read counts - readCounts[cols[0]] = [x + int(y) for x, y in zip(readCounts[cols[0]], cols[2:])] + readCounts[cols[0]] = [x + int(y) for x, y in zip(readCounts[cols[0]], cols[2:], strict=False)] for idx, cnt in enumerate(cols[2:]): if cnt == '0': diff --git a/snakePipes/shared/tools/deeptools_cmds.snakefile b/snakePipes/shared/tools/deeptools_cmds.snakefile index 7925f5348..fd6f51968 100755 --- a/snakePipes/shared/tools/deeptools_cmds.snakefile +++ b/snakePipes/shared/tools/deeptools_cmds.snakefile @@ -13,7 +13,7 @@ bamcompare_log2_cmd = """ --binSize {params.bwBinSize} \ -p {threads} \ {params.read_extension} \ - {params.blacklist} > {log.out} 2> {log.err} + {params.blacklist} """ # bamcompare subtract @@ -27,7 +27,7 @@ bamcompare_subtract_cmd = """ --binSize {params.bwBinSize} \ -p {threads} \ {params.read_extension} \ - {params.blacklist} > {log.out} 2> {log.err} + {params.blacklist} """ # bamCoverage RAW @@ -35,7 +35,7 @@ bamcov_raw_cmd = """ bamCoverage -b {input.bam} \ -o {output} \ --binSize {params.bwBinSize} \ - -p {threads} > {log.out} 2> {log.err} + -p {threads} """ # bamCoverage RPKM @@ -43,7 +43,7 @@ bamcov_RPKM_cmd = """ bamCoverage -b {input.bam} \ -o {output} --binSize {params.bwBinSize} \ -p {threads} --normalizeUsing RPKM {params.ignoreForNorm} \ - {params.blacklist} > {log.out} 2> {log.err} + {params.blacklist} """ # bamCoverage three-prime-seq non-unique, strand-specific mappings @@ -53,7 +53,7 @@ bamcov_3pseq_cmd = """ bamCoverage -b {input.bam} \ -o {output.bw_fwd} --binSize {params.bwBinSize} \ --Offset 1 --samFlagExclude 128 --filterRNAstrand {params.direction} \ - -p {threads} --skipNAs > {log.out} 2> {log.err} + -p {threads} --skipNAs """ @@ -62,11 +62,11 @@ bamcov_unique_cmd = """ bamCoverage -b {input.bam} \ -o {output.bw_fwd} --binSize {params.bwBinSize} \ --minMappingQuality 10 --samFlagExclude 2304 --filterRNAstrand forward \ - -p {threads} > {log.out} 2> {log.err} + -p {threads} bamCoverage -b {input.bam} \ -o {output.bw_rev} --binSize {params.bwBinSize} \ --minMappingQuality 10 --samFlagExclude 2304 --filterRNAstrand reverse \ - -p {threads} >> {log.out} 2>> {log.err} + -p {threads} """ @@ -76,11 +76,11 @@ bamcov_cmd = """ -o {output} \ --binSize {params.bwBinSize} \ -p {threads} \ - --normalizeUsing RPGC \ + --normalizeUsing RPKM \ --effectiveGenomeSize {params.genome_size} \ {params.ignoreForNorm} \ {params.blacklist} \ - {params.read_extension} > {log.out} 2> {log.err} + {params.read_extension} """ bamcov_spikein_cmd = """ @@ -93,7 +93,7 @@ bamcov_spikein_cmd = """ {params.ignoreForNorm} \ {params.blacklist} \ {params.scaling_factors} \ - {params.read_extension} > {log.out} 2> {log.err} + {params.read_extension} """ ## computeGC bias (DNA), requires params.median_fragment_length @@ -106,7 +106,7 @@ gcbias_cmd = """ {params.median_fragment_length} \ --sampleSize {params.sampleSize} \ {params.blacklist} \ - -p {threads} > {log.out} 2> {log.err} + -p {threads} """ # plot Enrichment (RNAseq) @@ -118,7 +118,7 @@ plotEnrich_cmd = """ --labels {params.labels} \ --plotTitle 'Fraction of reads in regions' \ --outRawCounts {output} \ - --variableScales > {log.out} 2> {log.err} + --variableScales """ # plot Enrichment (ChIPSeq) @@ -134,10 +134,10 @@ plotEnrich_chip_cmd = """ {params.blacklist} \ -p {threads} \ {params.read_extension} \ - --ignoreDuplicates > {log.out} 2> {log.err} + --ignoreDuplicates """ -#plot fingerprint (ChIP-seq) +#plot fingerprint (ChIPseq) plotFingerprint_cmd = """ plotFingerprint \ -b {input.bams} \ @@ -149,7 +149,7 @@ plotFingerprint_cmd = """ {params.blacklist} \ {params.png} \ {params.read_extension} \ - {params.jsd} > {log.out} 2> {log.err} + {params.jsd} """ @@ -164,7 +164,7 @@ multiBamSummary_cmd = """ {params.binSize} \ {params.spikein_region} \ -p {threads} \ - {params.read_extension} > {log.out} 2> {log.err} + {params.read_extension} """ # multiBAMsum ChIP with spikein @@ -178,7 +178,7 @@ multiBamSummary_spikein_cmd = """ {params.scaling_factors} \ {params.binSize} \ -p {threads} \ - {params.read_extension} > {log.out} 2> {log.err} + {params.read_extension} """ # multiBWsum RNA @@ -189,7 +189,7 @@ multiBWsum_bed_cmd = """ -o {output} \ --labels {params.labels} \ --binSize 1000 \ - -p {threads} > {log.out} 2> {log.err} + -p {threads} """ # multiBamSum RNA @@ -202,7 +202,7 @@ multiBamSum_bed_cmd = """ --binSize 100 \ --scalingFactors {output.scalingFactors} \ {params.blacklist} \ - -p {threads} > {log.out} 2> {log.err} + -p {threads} """ ## plot Corr (both), requires params.label @@ -216,7 +216,7 @@ plotCorr_cmd = """ --plotTitle 'Pearson correlation of {params.title} coverage' \ --outFileCorMatrix {output} \ --colorMap PuBuGn \ - --plotNumbers > {log.out} 2> {log.err} + --plotNumbers """ ## plot Corr Spearman (both), requires params.label @@ -230,7 +230,7 @@ plotCorrSP_cmd = """ --plotTitle 'Spearman correlation of {params.title} coverage' \ --outFileCorMatrix {output} \ --colorMap PuBuGn \ - --plotNumbers > {log.out} 2> {log.err} + --plotNumbers """ # plot PCA (both), requires params.label @@ -240,7 +240,7 @@ plotPCA_cmd = """ --transpose \ --outFileNameData {output} \ --plotWidth 10 \ - -T 'PCA of {params.title} coverage' > {log.out} 2> {log.err} + -T 'PCA of {params.title} coverage' """ # plot Coverage @@ -252,13 +252,13 @@ plotCoverage_cmd = """ --plotTitle 'Genome fragment coverage without duplicates' \ -p {threads} \ {params.read_extension} \ - --ignoreDuplicates > {log.out} 2> {log.err} + --ignoreDuplicates """ #EstimateReadFiltering estimateReadFiltering_cmd = """ estimateReadFiltering -b {input.bam} \ - -o {output} > {log.out} 2> {log.err} + -o {output} """ #bamPEFragmentSize @@ -267,5 +267,5 @@ bamPEFragmentSize_cmd = """ --bamfiles {input.bams} \ --binSize 1000000 \ {params.plotcmd} \ - --table {output} -p {threads} > {log.out} 2> {log.err} + --table {output} -p {threads} """ diff --git a/snakePipes/shared/tools/three_prime_seq/clusterPAS.py b/snakePipes/shared/tools/three_prime_seq/clusterPAS.py index 0ef722644..21e226240 100755 --- a/snakePipes/shared/tools/three_prime_seq/clusterPAS.py +++ b/snakePipes/shared/tools/three_prime_seq/clusterPAS.py @@ -25,7 +25,7 @@ def cluster_pas(args): gene = entry[4] annotation = entry[7] - if count > minReads: + if count > minReads: if gene not in clusters: clusters[gene] = {} #cluster start, cluster end, max count, gene, annotation, summit @@ -34,7 +34,7 @@ def cluster_pas(args): newCluster = True for c in clusters[gene]: if pas > clusters[gene][c][0] - window and pas < clusters[gene][c][1] + window: - clusters[gene][c][0] = min(pas, clusters[gene][c][0]) + clusters[gene][c][0] = min(pas, clusters[gene][c][0]) clusters[gene][c][1] = max(pas + 1, clusters[gene][c][1]) clusters[gene][c][2] += count if count > clusters[gene][c][2]: @@ -46,7 +46,7 @@ def cluster_pas(args): #not close to any existing cluster => new cluster if newCluster: - clusters[gene][pas] = [pas, pas + 1, count, gene, annotation, pas, strand, chrom] + clusters[gene][pas] = [pas, pas + 1, count, gene, annotation, pas, strand, chrom] line = In.readline() diff --git a/snakePipes/shared/tools/three_prime_seq/findSitesMM.py b/snakePipes/shared/tools/three_prime_seq/findSitesMM.py index be1f097d6..7fc66759a 100755 --- a/snakePipes/shared/tools/three_prime_seq/findSitesMM.py +++ b/snakePipes/shared/tools/three_prime_seq/findSitesMM.py @@ -4,6 +4,7 @@ import py2bit from deeptoolsintervals import GTF, tree from deeptoolsintervals.parse import openPossiblyCompressed, parseExonBounds, findRandomLabel +import sys parser = argparse.ArgumentParser(description="Generate a blacklist file of polyX stretches of a given minimum length not within a specified distance of a TES") parser.add_argument("--output", "-o", help="Output file", required=True) @@ -113,7 +114,7 @@ def __init__(self, fname, minDistance=100, strand="+"): assert(line) # This will only fail on empty files line = line.strip() - ftype = self.inferType(fp, line, labelColumn) + self.ftype = self.inferType(fp, line, labelColumn) self.parseBED(fp, line, 12, labelColumn) fp.close() @@ -163,7 +164,7 @@ def processLast(last, chrom, idx, idx2, o, bed): for chrom, chromLength in tb.chroms().items(): s = tb.sequence(chrom) - + idx = 0 idx2 = 0 while idx < chromLength - args.windowLength: diff --git a/snakePipes/shared/tools/three_prime_seq/mergeReadEnds.py b/snakePipes/shared/tools/three_prime_seq/mergeReadEnds.py index fae2cd27d..b5786a727 100644 --- a/snakePipes/shared/tools/three_prime_seq/mergeReadEnds.py +++ b/snakePipes/shared/tools/three_prime_seq/mergeReadEnds.py @@ -10,7 +10,7 @@ def munge(infiles, samples): final_df = list() - for fn, sample in zip(infiles, samples): + for fn, sample in zip(infiles, samples, strict=False): df = pd.read_table(fn, header=0, index_col=None) df['Sample'] = sample df = df[['Gene', 'Counts', 'Sample']] @@ -35,4 +35,4 @@ def main(argv): if __name__ == "__main__": - main(sys.argv[1:]) \ No newline at end of file + main(sys.argv[1:]) diff --git a/bin/snakePipes b/snakePipes/snakePipes.py similarity index 74% rename from bin/snakePipes rename to snakePipes/snakePipes.py index f74a022dd..63b49f6d1 100755 --- a/bin/snakePipes +++ b/snakePipes/snakePipes.py @@ -9,8 +9,8 @@ import hashlib import shutil import snakePipes.common_functions as cof -from snakePipes import __version__ - +from importlib.metadata import version +from pathlib import Path def parse_arguments(): parser = argparse.ArgumentParser( @@ -20,7 +20,7 @@ def parse_arguments(): subparsers = parser.add_subparsers(title="Commands", dest="command") - infoParser = subparsers.add_parser( + subparsers.add_parser( "info", help="Print the location of the various yaml files" ) @@ -31,21 +31,11 @@ def parse_arguments(): "option will result in ALL conda environments being recreated.", ) - envInfoParser = subparsers.add_parser( + subparsers.add_parser( "envInfo", help="Prints the location in which each conda environment is actually stored.", ) - - createEnvsParser.add_argument( - "--autodetectCondaEnvDir", - action="store_true", - help="If specified, this will set condaEnvDir to system conda prefix," - "and will overwrite the condaEnvDir entry in defaults.yaml ." - "Use with caution." - ) - - createEnvsParser.add_argument( "--only", nargs="+", @@ -54,12 +44,6 @@ def parse_arguments(): "possible environments are: {}".format(cof.set_env_yamls().keys()), ) -# createEnvsParser.add_argument( -# "--force", -# action="store_true", -# help="Force creation of conda environments, even if they apparently exist.", -# ) - createEnvsParser.add_argument( "--info", "-i", @@ -127,15 +111,6 @@ def parse_arguments(): default=defaults["organismsDir"], ) - configParser.add_argument( - "--clusterConfig", - help="The YAML file containing the snakeMake cluster command and global " - "memory settings. Both absolute and relative paths are supported. " - "In the latter case the path is then relative to the snakePipes " - "installation directory. (Default: %(default)s)", - default=defaults["clusterConfig"], - ) - configParser.add_argument( "--tempDir", help="A custom directory where temporary files should be written. This " @@ -208,11 +183,21 @@ def info(): """ Print the locations of EVERY yaml file. Break these up a bit so it's clear what they actually belong to. Print path to tempDir and check that it exists. """ + print(25*"-" + " Info " + 25*"-" + "\n") baseDir = os.path.dirname(snakePipes.__file__) cfg = cof.load_configfile( os.path.join(baseDir, "shared", "defaults.yaml"), False, "defaults" ) + # defaults.yaml under shared + print(f"The global configuration file is:\n {Path(baseDir) / 'shared' / 'defaults.yaml'}") + + # tempDir + tempDir = cfg["tempDir"] + print(f" --> tempDir in the global configuration = {tempDir}") + snakemakeProfile = cfg["snakemakeProfile"] + print(f" --> The snakemake profile used = {cof.resolveSnakemakeProfile(snakemakeProfile, baseDir)}\n") + # Organism yaml files print("Organism YAML files:") orgDir = cfg["organismsDir"] @@ -221,34 +206,7 @@ def info(): for f in glob.glob(os.path.join(orgDir, "*.yaml")): print(" {}".format(f)) - # defaults.yaml under shared - print( - "\nThe workflow-generic defaults.yaml file is:\n {}".format( - os.path.join(baseDir, "shared/defaults.yaml") - ) - ) - # cluster.yaml - clusterConfig = cfg["clusterConfig"] - if not os.path.isfile(clusterConfig): - clusterConfig = os.path.join(baseDir, clusterConfig) - print( - "\nThe default cluster.yaml file. Its defaults are overridden by the per-workflow cluster.yaml files:\n {}".format( - clusterConfig - ) - ) - print("\nWorkflow-specific cluster.yaml and defaults.yaml files are in:") - for f in glob.glob(os.path.join(baseDir, "workflows/*/cluster.yaml")): - print(" {}".format(os.path.dirname(f))) - - # tempDir - tempDir = cfg["tempDir"] - msg = ["\nTemp dir under {} ".format(tempDir)] - if os.path.isdir(tempDir): - msg.append("exists and will be used.") - else: - msg.append("does not exist and /tmp will be used instead.") - print("".join(msg)) def envInfo(): @@ -261,8 +219,17 @@ def envInfo(): cf = yaml.load(f, Loader=yaml.FullLoader) f.close() - condaEnvDir=cf["condaEnvDir"] + # Properly resolve the snakemake profile path + profilePath = cof.resolveSnakemakeProfile(cf['snakemakeProfile'], baseDir) + # Find out condaEnvDir from snakemake profile + f = open(profilePath / 'config.yaml') + _p = yaml.load(f, Loader=yaml.FullLoader) + f.close() + if 'conda-prefix' in _p: + condaEnvDir = _p['conda-prefix'].replace("$USER", os.environ.get("USER")) + else: + condaEnvDir = detectCondaDir() for env in cof.set_env_yamls().values(): # Hash the file ala snakemake @@ -272,8 +239,7 @@ def envInfo(): md5hash.update(f.read()) f.close() h = md5hash.hexdigest() - - print("{} is in:\n {}\n".format(env, os.path.join(condaEnvDir, h))) + print(f"{env}: {Path(condaEnvDir, h)}") def fixSitePy(envPath): @@ -301,62 +267,98 @@ def createCondaEnvs(args): """ Create all of the conda environments """ + print(25*"-" + " createEnvs " + 25*"-" + "\n") + baseDir = os.path.dirname(snakePipes.__file__) f = open(os.path.join(baseDir, "shared/defaults.yaml")) cf = yaml.load(f, Loader=yaml.FullLoader) f.close() - condaEnvDir=cf["condaEnvDir"] - condaDirUse=condaEnvDir + # Properly resolve the snakemake profile path + profilePath = cof.resolveSnakemakeProfile(cf['snakemakeProfile'], baseDir) + + # Find out condaEnvDir from snakemake profile + f = open(profilePath / 'config.yaml') + _p = yaml.load(f, Loader=yaml.FullLoader) + f.close() + if 'conda-prefix' in _p: + # For now $USER can be set in this path, resolve this explicitely. + condaEnvDir = _p['conda-prefix'].replace("$USER", os.environ.get("USER")) + _prefsource = f"Snakemakeprofile: {profilePath.name}" + else: + # no condaEnvDir set in profile, thus assume we can detect it + condaEnvDir = detectCondaDir() + _prefsource = f"Environment: $CONDA_PREFIX = {os.environ.get('CONDA_PREFIX')}" - if args.autodetectCondaEnvDir: - condaDirUse=detectCondaDir() - # rewrite defaults.yaml - cof.write_configfile(os.path.join(baseDir, "shared/defaults.yaml"), cf) + # Remove trailing slashes as they screw up the hash calculation + if condaEnvDir[-1] == '/': + condaEnvDir = condaEnvDir[:-1] + print(f"profile used: {profilePath}") + print(f"CondaEnvDir detected as: {condaEnvDir}, from {_prefsource}\n") + + # if mamba is not installed, conda-frontend should be set + if not shutil.which('mamba') and 'conda-frontend' not in _p: + print( + f"WARNING: No mamba detected in your path and conda-frontend not set. Set 'conda-fronted: conda' in {profilePath.name}" + ) + if 'use-conda' not in _p: + print( + f"WARNING: Your profile ({profilePath.name}) should have 'use-conda: True' !" + ) + if 'conda-prefix' not in _p: + print( + f"WARNING: Your profile ({profilePath.name}) does not have 'conda-prefix' set. Environments will go in your default envs folder." + ) + + numberEnvs = len(cof.set_env_yamls().keys()) + if args.only is not None: + numberEnvs = len(args.only) + envNum = 0 for envName, env in cof.set_env_yamls().items(): if args.only is not None and envName not in args.only: continue + envNum += 1 # Hash the file ala snakemake md5hash = hashlib.md5() - md5hash.update(condaDirUse.encode()) + md5hash.update(condaEnvDir.encode()) f = open(os.path.join(baseDir, "shared/rules", env), "rb") md5hash.update(f.read()) f.close() h = md5hash.hexdigest() - sys.stderr.write( - "Creating environment from {} in {}\n".format( - os.path.join(baseDir, "shared/rules", env), condaDirUse - ) - ) cmd = [ - "mamba", + "conda", "env", "create", + '-q', "--file", os.path.join(baseDir, "shared/rules", env), ] - cmd += ["--prefix", os.path.join(condaDirUse, h)] + cmd += ["--prefix", os.path.join(condaEnvDir, h)] - # Don't actually create the env if either --info is set or it already exists and --force is NOT set + # Don't actually create the env if either --info is set if not args.info: - if not os.path.exists(os.path.join(condaDirUse, h)): + if not os.path.exists(os.path.join(condaEnvDir, h)): + print(f"Creating environment ({envNum}/{numberEnvs}) from {env} with hash {h}") try: - os.makedirs(os.path.join(condaDirUse, h), exist_ok=True) + os.makedirs(os.path.join(condaEnvDir, h), exist_ok=True) subprocess.check_call(cmd) except: # Ensure an environment is fully removed on error - shutil.rmtree(os.path.join(condaDirUse, h), ignore_errors=False) + shutil.rmtree(os.path.join(condaEnvDir, h), ignore_errors=False) sys.exit("There was an error when creating the environments!\n") + else: + print(f"Environment ({envNum}/{numberEnvs}) from {env} with hash {h} already exists!") + else: + if not os.path.exists(os.path.join(condaEnvDir, h)): + print(f"Would create environment ({envNum}/{numberEnvs}) from {env} with hash {h}") + else: + print(f"Environment ({envNum}/{numberEnvs}) from {env} with hash {h} already exists!") # Ignore site-packages if args.noSitePackages and not args.info: - fixSitePy(os.path.join(condaDirUse, h)) - - # Ignore site-packages in this env - if args.noSitePackages and not args.info: - fixSitePy(rootDir) + fixSitePy(os.path.join(condaEnvDir, h)) def detectCondaDir(): "Detect the default conda folder." @@ -381,7 +383,6 @@ def updateConfig(args): "snakemakeOptions": args.snakemakeOptions, "condaEnvDir": args.condaEnvDir, "organismsDir": args.organismsDir, - "clusterConfig": args.clusterConfig, "tempDir": args.tempDir, "smtpServer": args.smtpServer, "smtpPort": args.smtpPort, @@ -400,26 +401,20 @@ def updateConfig(args): if args.organismsDir: od = {"organismsDir": args.organismsDir} d.update(od) - if args.clusterConfig: - od = {"clusterConfig": args.clusterConfig} - d.update(od) if not currentDict.keys() & d.keys(): sys.exit("The old and the new config have no matching keys!!!\n") else: sys.exit("Config file not found\n") updatedDict = cof.merge_dicts(currentDict, d) cof.write_configfile(os.path.join(baseDir, "shared", "defaults.yaml"), updatedDict) - newDict = cof.load_configfile( + cof.load_configfile( os.path.join(baseDir, "shared", "defaults.yaml"), True, "Final Updated Config" ) - -def version(): - print("version {}".format(__version__)) - - -def main(args): - args = parse_arguments().parse_args(args) +def main(): + if len(sys.argv) == 1: + sys.argv.append("--help") + args = parse_arguments().parse_args(sys.argv[1:]) if args.command == "info": info() elif args.command == "envInfo": @@ -429,12 +424,7 @@ def main(args): elif args.command == "config": updateConfig(args) elif args.command == "version": - version() + _v = version("snakePipes") + print(f"snakePipes version {_v}") else: createCondaEnvs(args) - - -if __name__ == "__main__": - if len(sys.argv) == 1: - sys.argv.append("--help") - main(sys.argv[1:]) diff --git a/snakePipes/workflows/ATAC-seq/cluster.yaml b/snakePipes/workflows/ATAC-seq/cluster.yaml deleted file mode 100644 index e49ee30a8..000000000 --- a/snakePipes/workflows/ATAC-seq/cluster.yaml +++ /dev/null @@ -1,14 +0,0 @@ -ATAC_qc: - memory: 2G -MACS2: - memory: 8G -CSAW: - memory: 30G -plot_heatmap_cov_CSAW: - memory: 5G -Genrich_peaks: - memory: 20G -namesort_bams: - memory: 6G -filterCoveragePerScaffolds: - memory: 6G diff --git a/snakePipes/workflows/ATAC-seq/ATAC-seq b/snakePipes/workflows/ATACseq/ATACseq.py similarity index 96% rename from snakePipes/workflows/ATAC-seq/ATAC-seq rename to snakePipes/workflows/ATACseq/ATACseq.py index 386f8b457..1b67cff80 100755 --- a/snakePipes/workflows/ATAC-seq/ATAC-seq +++ b/snakePipes/workflows/ATACseq/ATACseq.py @@ -1,10 +1,8 @@ -#!/usr/bin/env python3 - __description__ = """ -MPI-IE workflow for ATAC-seq Analysis +MPI-IE workflow for ATACseq Analysis usage example: - ATAC-seq -d working-dir mm10 + ATACseq -d working-dir mm10 """ import argparse @@ -69,7 +67,7 @@ def parse_args(defaults={"verbose": False, "configFile": None, optional.add_argument("--sampleSheet", dest="sampleSheet", - help="Invoke differential accessibility analysis by providing information on samples; see 'https://github.com/maxplanck-ie/snakepipes/tree/master/docs/content/sampleSheet.example.tsv' for example. IMPORTANT: The first entry defines which group of samples are control. With this, the order of comparison and likewise the sign of values can be changed! Also, the condition `control` should not be used (reserved to mark input samples in the ChIP-Seq workflow (default: '%(default)s').", + help="Invoke differential accessibility analysis by providing information on samples; see 'https://github.com/maxplanck-ie/snakepipes/tree/master/docs/content/sampleSheet.example.tsv' for example. IMPORTANT: The first entry defines which group of samples are control. With this, the order of comparison and likewise the sign of values can be changed! Also, the condition `control` should not be used (reserved to mark input samples in the ChIPSeq workflow (default: '%(default)s').", default=defaults["sampleSheet"]) optional.add_argument("--externalBed", @@ -123,7 +121,7 @@ def main(): cf.runAndCleanup(args, snakemake_cmd, logfile_name) #CreateDAG - cf.print_DAG(args,snakemake_cmd, __file__,defaults) + cf.plot_DAG(args,snakemake_cmd, __file__,defaults) if __name__ == "__main__": diff --git a/snakePipes/workflows/ATAC-seq/Snakefile b/snakePipes/workflows/ATACseq/Snakefile similarity index 96% rename from snakePipes/workflows/ATAC-seq/Snakefile rename to snakePipes/workflows/ATACseq/Snakefile index eda4bf494..a47f9fae4 100755 --- a/snakePipes/workflows/ATAC-seq/Snakefile +++ b/snakePipes/workflows/ATACseq/Snakefile @@ -40,7 +40,7 @@ include: os.path.join(maindir, "shared", "rules", "deepTools_ATAC.snakefile") #import multiQC include: os.path.join(maindir, "shared", "rules", "multiQC.snakefile") -# ATAC-seq open chromatin +# ATACseq open chromatin include: os.path.join(maindir, "shared", "rules", "ATAC.snakefile") # ATAC QC open chromatin @@ -175,7 +175,7 @@ onstart: if toolsVersion: usedEnvs = [CONDA_SHARED_ENV, CONDA_ATAC_ENV, CONDA_RMD_ENV] - cf.writeTools(usedEnvs, workingdir, "ATAC-seq", maindir) + cf.writeTools(usedEnvs, workingdir, "ATACseq", maindir) if sampleSheet: cf.copySampleSheet(sampleSheet, workingdir) @@ -204,9 +204,8 @@ rule all: ### execute after workflow finished ############################################ ################################################################################ onsuccess: - cf.cleanLogs(workingdir, cluster_config) if "verbose" in config and config["verbose"]: - print("\n--- The ATAC-seq open chromatin workflow finished successfully! --------------------------------\n") + print("\n--- The ATACseq open chromatin workflow finished successfully! --------------------------------\n") onerror: - print("\n !!! ERROR in the ATAC-seq open chromatin workflow! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n") + print("\n !!! ERROR in the ATACseq open chromatin workflow! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n") diff --git a/snakePipes/workflows/ATAC-seq/defaults.yaml b/snakePipes/workflows/ATACseq/defaults.yaml similarity index 88% rename from snakePipes/workflows/ATAC-seq/defaults.yaml rename to snakePipes/workflows/ATACseq/defaults.yaml index 64e332020..f8e68b9bd 100644 --- a/snakePipes/workflows/ATAC-seq/defaults.yaml +++ b/snakePipes/workflows/ATACseq/defaults.yaml @@ -1,8 +1,8 @@ ################################################################################ -# This file is the default configuration of the ATAC-seq workflow! +# This file is the default configuration of the ATACseq workflow! # # In order to adjust some parameters, please either use the wrapper script -# (eg. /path/to/snakemake_workflows/workflows/ATAC-seq/ATAC-seq) +# (eg. /path/to/snakemake_workflows/workflows/ATACseq/ATACseq) # or save a copy of this file, modify necessary parameters and then provide # this file to the wrapper or snakmake via '--configFile' option # (see below how to call the snakefile directly) @@ -11,12 +11,12 @@ # can be used in new/extended snakemake rules! ################################################################################ ## General/Snakemake parameters, only used/set by wrapper or in Snakemake cmdl, but not in Snakefile -pipeline: ATAC-seq +pipeline: ATACseq configFile: clusterConfigFile: local: false maxJobs: 5 -## workingdir need to be required DNA-mapping output dir, 'outdir' is set to workingdir internally +## workingdir need to be required DNAmapping output dir, 'outdir' is set to workingdir internally workingdir: ## preconfigured target genomes (mm9,mm10,dm3,...) , see /path/to/snakemake_workflows/shared/organisms/ ## Value can be also path to your own genome config file! diff --git a/snakePipes/workflows/ATAC-seq/internals.snakefile b/snakePipes/workflows/ATACseq/internals.snakefile similarity index 100% rename from snakePipes/workflows/ATAC-seq/internals.snakefile rename to snakePipes/workflows/ATACseq/internals.snakefile diff --git a/snakePipes/workflows/ChIP-seq/cluster.yaml b/snakePipes/workflows/ChIP-seq/cluster.yaml deleted file mode 100644 index 065265642..000000000 --- a/snakePipes/workflows/ChIP-seq/cluster.yaml +++ /dev/null @@ -1,14 +0,0 @@ -MACS2: - memory: 8G -histoneHMM: - memory: 4G -CSAW: - memory: 10G -Genrich_peaks: - memory: 20G -namesort_bams: - memory: 6G -SEACR_peaks_stringent: - memory: 10G -SEACR_peaks_lenient: - memory: 20G diff --git a/snakePipes/workflows/ChIP-seq/ChIP-seq b/snakePipes/workflows/ChIPseq/ChIPseq.py similarity index 97% rename from snakePipes/workflows/ChIP-seq/ChIP-seq rename to snakePipes/workflows/ChIPseq/ChIPseq.py index 3e0d0d020..8772754b3 100755 --- a/snakePipes/workflows/ChIP-seq/ChIP-seq +++ b/snakePipes/workflows/ChIPseq/ChIPseq.py @@ -1,10 +1,8 @@ -#!/usr/bin/env python3 - __description__ = """ -MPI-IE workflow for ChIP-seq analysis +MPI-IE workflow for ChIPseq analysis Usage example: - ChIP-seq -d working-dir mm10 samples.yaml + ChIPseq -d working-dir mm10 samples.yaml """ import argparse @@ -124,7 +122,7 @@ def parse_args(defaults={"verbose": False, "configFile": None, optional.add_argument("--predictChIPDict", nargs='?', action='store', - help="Use existing bam files to predict a CHiP-seq sample configuration file. Write it to the workingdir. " + help="Use existing bam files to predict a ChIPseq sample configuration file. Write it to the workingdir. " "If no value is given, samples that contain 'input' are used as ChIP input/ctrl. Provide a custom pattern like 'input,H3$,H4$' to change that!", default= None, const="input") @@ -191,7 +189,7 @@ def main(): cf.runAndCleanup(args, snakemake_cmd, logfile_name) #CreateDAG - cf.print_DAG(args,snakemake_cmd, __file__,defaults) + cf.plot_DAG(args,snakemake_cmd, __file__,defaults) if __name__ == "__main__": diff --git a/snakePipes/workflows/ChIP-seq/Snakefile b/snakePipes/workflows/ChIPseq/Snakefile similarity index 98% rename from snakePipes/workflows/ChIP-seq/Snakefile rename to snakePipes/workflows/ChIPseq/Snakefile index 610b852aa..beb038a9f 100755 --- a/snakePipes/workflows/ChIP-seq/Snakefile +++ b/snakePipes/workflows/ChIPseq/Snakefile @@ -250,7 +250,7 @@ onstart: if toolsVersion: usedEnvs = [CONDA_SHARED_ENV, CONDA_CHIPSEQ_ENV] - cf.writeTools(usedEnvs, workingdir, "ChIP-seq", maindir) + cf.writeTools(usedEnvs, workingdir, "ChIPseq", maindir) if sampleSheet: cf.copySampleSheet(sampleSheet, workingdir) @@ -276,12 +276,11 @@ rule all: ### execute after workflow finished ############################################ ################################################################################ onsuccess: - cf.cleanLogs(workingdir, cluster_config) if "verbose" in config and config["verbose"]: - print("\n--- ChIP-seq workflow finished successfully! -----------------------------------\n") + print("\n--- ChIPseq workflow finished successfully! -----------------------------------\n") onerror: - print("\n !!! ERROR in ChIP-seq workflow! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n") + print("\n !!! ERROR in ChIPseq workflow! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n") ## benchmark ################################################################### diff --git a/snakePipes/workflows/ChIP-seq/defaults.yaml b/snakePipes/workflows/ChIPseq/defaults.yaml similarity index 89% rename from snakePipes/workflows/ChIP-seq/defaults.yaml rename to snakePipes/workflows/ChIPseq/defaults.yaml index 3608e0a67..56dc3401d 100755 --- a/snakePipes/workflows/ChIP-seq/defaults.yaml +++ b/snakePipes/workflows/ChIPseq/defaults.yaml @@ -1,8 +1,8 @@ ################################################################################ -# This file is the default configuration of the ChIP-seq workflow! +# This file is the default configuration of the ChIPseq workflow! # # In order to adjust some parameters, please either use the wrapper script -# (eg. /path/to/snakemake_workflows/workflows/ChIP-seq/ChIP-seq) +# (eg. /path/to/snakemake_workflows/workflows/ChIPseq/ChIPseq) # or save a copy of this file, modify necessary parameters and then provide # this file to the wrapper or snakmake via '--configFile' option # (see below how to call the snakefile directly) @@ -11,13 +11,13 @@ # can be used in new/extended snakemake rules! ################################################################################ ## General/Snakemake parameters, only used/set by wrapper or in Snakemake cmdl, but not in Snakefile -pipeline: chip-seq +pipeline: chipseq cutntag: False # if set to True, overwrites the peakCaller and peakCallerOptions. configFile: clusterConfigFile: local: false maxJobs: 5 -## workingdir need to be required DNA-mapping output dir, 'outdir' is set to workingdir internally +## workingdir need to be required DNAmapping output dir, 'outdir' is set to workingdir internally workingdir: ## preconfigured target genomes (mm9,mm10,dm3,...) , see /path/to/snakemake_workflows/shared/organisms/ ## Value can be also path to your own genome config file! @@ -64,8 +64,8 @@ absBestLFC: 1 # via '--configFile' parameter! # example call: # -# snakemake --snakefile /path/to/snakemake_workflows/workflows/ChIP-seq/Snakefile -# --configFile /path/to/snakemake_workflows/workflows/ChIP-seq/defaults.yaml +# snakemake --snakefile /path/to/snakemake_workflows/workflows/ChIPseq/Snakefile +# --configFile /path/to/snakemake_workflows/workflows/ChIPseq/defaults.yaml # --directory /path/to/outputdir # --cores 32 ################################################################################ diff --git a/snakePipes/workflows/ChIP-seq/internals.snakefile b/snakePipes/workflows/ChIPseq/internals.snakefile similarity index 95% rename from snakePipes/workflows/ChIP-seq/internals.snakefile rename to snakePipes/workflows/ChIPseq/internals.snakefile index 56877b89a..8c8eae6f5 100755 --- a/snakePipes/workflows/ChIP-seq/internals.snakefile +++ b/snakePipes/workflows/ChIPseq/internals.snakefile @@ -11,8 +11,8 @@ import warnings def get_control(sample): """ - Return control sample name for a given ChIP-seq sample - Return False if given ChIP-seq sample has no control + Return control sample name for a given ChIPseq sample + Return False if given ChIPseq sample has no control """ if sample in chip_samples_w_ctrl: return chip_dict[sample]['control'] @@ -22,8 +22,8 @@ def get_control(sample): def get_control_name(sample): """ - Return control sample alias for a given ChIP-seq sample - Return False if given ChIP-seq sample has no control + Return control sample alias for a given ChIPseq sample + Return False if given ChIPseq sample has no control """ if sample in chip_samples_w_ctrl: if 'control' in chip_dict[sample] and chip_dict[sample]['control'] != None: @@ -36,7 +36,7 @@ def get_control_name(sample): def is_broad(sample): """ - Return True if given ChIP-seq sample is annotated as sample with + Return True if given ChIPseq sample is annotated as sample with broad enrichment, else return False """ if sample in chip_dict: @@ -47,7 +47,7 @@ def is_broad(sample): def is_chip(sample): """ - Return True if a given sample is a ChIP-seq sample + Return True if a given sample is a ChIPseq sample Else return False """ return (sample in chip_samples) @@ -75,8 +75,8 @@ def get_pe_frag_length(sample, frag_len_file): allele_info=is_allelic(workingdir) -# TODO: catch exception if ChIP-seq samples are not unique -# read ChIP-seq dictionary from config.yaml: +# TODO: catch exception if ChIPseq samples are not unique +# read ChIPseq dictionary from config.yaml: # { ChIP1: { control: Input1, broad: True }, ChIP2: { control: Input2, broad: false } #config["chip_dict"] = {} diff --git a/snakePipes/workflows/DNA-mapping/cluster.yaml b/snakePipes/workflows/DNA-mapping/cluster.yaml deleted file mode 100644 index a2bdaa3d5..000000000 --- a/snakePipes/workflows/DNA-mapping/cluster.yaml +++ /dev/null @@ -1,22 +0,0 @@ -bamCoverage: - memory: 4G -bamCoverage_filtered: - memory: 4G -bamPE_fragment_size: - memory: 10G -Bowtie2: - memory: 4G -bwa: - memory: 4G -bwamem2: - memory: 6G -CollectAlignmentSummaryMetrics: - memory: 2G -CollectInsertSizeMetrics: - memory: 1G -filter_reads: - memory: 3G -sambamba_flagstat: - memory: 3G -sambamba_flagstat_sorted: - memory: 3G diff --git a/snakePipes/workflows/DNA-mapping/DNA-mapping b/snakePipes/workflows/DNAmapping/DNAmapping.py similarity index 94% rename from snakePipes/workflows/DNA-mapping/DNA-mapping rename to snakePipes/workflows/DNAmapping/DNAmapping.py index ff2dd2763..09419c81f 100755 --- a/snakePipes/workflows/DNA-mapping/DNA-mapping +++ b/snakePipes/workflows/DNAmapping/DNAmapping.py @@ -1,10 +1,8 @@ -#!/usr/bin/env python3 - __description__ = """ MPI-IE workflow for DNA mapping usage example: - DNA-mapping -i input-dir -o output-dir mm10 + DNAmapping -i input-dir -o output-dir mm10 """ @@ -58,7 +56,7 @@ def parse_args(defaults={"verbose": False, "configFile": None, help="Options that will be passed to Bowtie2 or bwa. You can specify things such as `--local` or " "`--very-sensitive` here. The mate orientation and maximum insert size are specified " "elsewhere. Read group information is set automatically. Note that you may need to escape " - "the first - (e.g., '\--very-fast'). Default: '%(default)s'.", + r"the first - (e.g., '\--very-fast'). Default: '%(default)s'.", default=defaults["alignerOpts"]) optional.add_argument("--cutntag", @@ -84,7 +82,7 @@ def parse_args(defaults={"verbose": False, "configFile": None, action="store_true", help="retain only de-duplicated reads/read pairs " "(given single-/paired-end data), recommended for " - "ChIP-seq data (default: '%(default)s')", + "ChIPseq data (default: '%(default)s')", default=defaults["dedup"]) optional.add_argument("--properPairs", @@ -97,7 +95,7 @@ def parse_args(defaults={"verbose": False, "configFile": None, metavar="INT", help="retain only reads with at least the given " "mapping quality. We recommend using" - "mapq of 3 or more for ChIP-seq to remove all true " + "mapq of 3 or more for ChIPseq to remove all true " "multimapping reads. (default: '%(default)s')", type=int, default=defaults["mapq"]) @@ -158,7 +156,7 @@ def main(): cf.runAndCleanup(args, snakemake_cmd, logfile_name) #CreateDAG - cf.print_DAG(args,snakemake_cmd, __file__,defaults) + cf.plot_DAG(args,snakemake_cmd, __file__,defaults) if __name__ == "__main__": diff --git a/snakePipes/workflows/DNA-mapping/Snakefile b/snakePipes/workflows/DNAmapping/Snakefile similarity index 98% rename from snakePipes/workflows/DNA-mapping/Snakefile rename to snakePipes/workflows/DNAmapping/Snakefile index 044918b37..f5f2c2c9e 100755 --- a/snakePipes/workflows/DNA-mapping/Snakefile +++ b/snakePipes/workflows/DNAmapping/Snakefile @@ -178,7 +178,7 @@ onstart: if toolsVersion: usedEnvs = [CONDA_SHARED_ENV, CONDA_DNA_MAPPING_ENV] - cf.writeTools(usedEnvs, outdir, "DNA-mapping", maindir) + cf.writeTools(usedEnvs, outdir, "DNAmapping", maindir) ### main rule ################################################################## @@ -203,7 +203,6 @@ rule all: ### execute after workflow finished ############################################ ################################################################################ onsuccess: - cf.cleanLogs(outdir, cluster_config) if "verbose" in config and config["verbose"]: print("\n--- DNA mapping workflow finished successfully! --------------------------------\n") diff --git a/snakePipes/workflows/DNA-mapping/defaults.yaml b/snakePipes/workflows/DNAmapping/defaults.yaml similarity index 92% rename from snakePipes/workflows/DNA-mapping/defaults.yaml rename to snakePipes/workflows/DNAmapping/defaults.yaml index 225ef6fd3..7b437190a 100755 --- a/snakePipes/workflows/DNA-mapping/defaults.yaml +++ b/snakePipes/workflows/DNAmapping/defaults.yaml @@ -1,8 +1,8 @@ ################################################################################ -# This file is the default configuration of the DNA-mapping workflow! +# This file is the default configuration of the DNAmapping workflow! # # In order to adjust some parameters, please either use the wrapper script -# (eg. /path/to/snakemake_workflows/workflows/DNA-mapping/DNA-mapping) +# (eg. /path/to/snakemake_workflows/workflows/DNAmapping/DNAmapping) # or save a copy of this file, modify necessary parameters and then provide # this file to the wrapper or snakmake via '--configFile' option # (see below how to call the snakefile directly) @@ -11,7 +11,7 @@ # can be used in new/extended snakemake rules! ################################################################################ ## General/Snakemake parameters, only used/set by wrapper or in Snakemake cmdl, but not in Snakefile -pipeline: dna-mapping +pipeline: dnamapping outdir: configFile: clusterConfigFile: @@ -71,8 +71,8 @@ verbose: false # via '--configFile' parameter! # example call: # -# snakemake --snakefile /path/to/snakemake_workflows/workflows/DNA-mapping/Snakefile -# --configFile /path/to/snakemake_workflows/workflows/DNA-mapping/defaults.yaml +# snakemake --snakefile /path/to/snakemake_workflows/workflows/DNAmapping/Snakefile +# --configFile /path/to/snakemake_workflows/workflows/DNAmapping/defaults.yaml # --directory /path/to/outputdir # --cores 32 ################################################################################ diff --git a/snakePipes/workflows/DNA-mapping/internals.snakefile b/snakePipes/workflows/DNAmapping/internals.snakefile similarity index 100% rename from snakePipes/workflows/DNA-mapping/internals.snakefile rename to snakePipes/workflows/DNAmapping/internals.snakefile diff --git a/snakePipes/workflows/HiC/HiC b/snakePipes/workflows/HiC/HiC.py similarity index 90% rename from snakePipes/workflows/HiC/HiC rename to snakePipes/workflows/HiC/HiC.py index e6e0b7fd4..bf67d607e 100755 --- a/snakePipes/workflows/HiC/HiC +++ b/snakePipes/workflows/HiC/HiC.py @@ -21,7 +21,8 @@ def parse_args(defaults={"verbose": False, "configFile": None, "snakemakeOptions": "--use-conda", "tempDir": None, "downsample": False, "trim": False, "trimmer": "cutadapt", "trimmerOptions": "", - "fastqc": False, "aligner": None, "binSize": 10000, "noTAD": False, + "fastqc": False, "aligner": None, "binSize": 10000, + "noTAD": False, "RFResolution": False, "correctionMethod": "KR", "enzyme": "HindIII", "restrictRegion": None, "mergeSamples": False, "nBinsToMerge": 0, @@ -48,13 +49,13 @@ def parse_args(defaults={"verbose": False, "configFile": None, optional = parser.add_argument_group('Options') parserCommon.commonOptions(optional, defaults, bw=False, plots=False) - + optional.add_argument("--aligner", - help="Program used for mapping: bwa or bwa-mem2 (default: '%(default)s').", - choices=["bwa","bwa-mem2"], + help="Program used for mapping: bwa or bwa-mem2 \ + (default: '%(default)s').", + choices=["bwa", "bwa-mem2"], default=defaults["aligner"]) - optional.add_argument("--RFResolution", action="store_true", help="Create Hi-C matrices at the restriction " @@ -74,7 +75,8 @@ def parse_args(defaults={"verbose": False, "configFile": None, metavar="INT", help="Create Hi-C matrices at the given binSize. " "This option is mutally exclusive with the " - "`--RFResolution` option (default: '%(default)s')", + "`--RFResolution` option \ + (default: '%(default)s')", default=defaults["binSize"]) optional.add_argument("--restrictRegion", @@ -88,7 +90,7 @@ def parse_args(defaults={"verbose": False, "configFile": None, optional.add_argument("--mergeSamples", action="store_true", - help="Merge the HiC matrices and create a new matrix." + help="Merge HiC matrices and create a new matrix." " If this option is specified togather with " "`--sampleInfo` (see below), the samples would " "be merged based on the defined groups. " @@ -105,12 +107,14 @@ def parse_args(defaults={"verbose": False, "configFile": None, optional.add_argument("--findTADParams", type=str, metavar="STR", - help="parameters for HiCFindTADs. (default: '%(default)s')", + help="parameters for HiCFindTADs. \ + (default: '%(default)s')", default=defaults["findTADParams"]) optional.add_argument("--noTAD", action="store_true", - help="Stop the pipeline before TAD calling. (default: '%(default)s')", + help="Stop the pipeline before TAD calling. \ + (default: '%(default)s')", default=defaults["noTAD"]) optional.add_argument("--noCorrect", @@ -168,21 +172,22 @@ def main(): args = parser.parse_args() args, defaults = cf.handleUserArgs(args, defaults, parse_args) - # we also add these paths to config, although we don't use them in the Snakefile + # add these paths to config, although we don't use them in the Snakefile args.baseDir = baseDir # Common arguments cf.checkCommonArguments(args, baseDir, outDir=True) # Handle YAML and log files - snakemake_cmd = cf.commonYAMLandLogs(baseDir, workflowDir, defaults, args, __file__) + snakemake_cmd = cf.commonYAMLandLogs( + baseDir, workflowDir, defaults, args, __file__) logfile_name = cf.logAndExport(args, os.path.basename(__file__)) # Run everything cf.runAndCleanup(args, snakemake_cmd, logfile_name) - #CreateDAG - cf.print_DAG(args,snakemake_cmd, __file__,defaults) + # CreateDAG + cf.plot_DAG(args, snakemake_cmd, __file__, defaults) if __name__ == "__main__": diff --git a/snakePipes/workflows/HiC/Snakefile b/snakePipes/workflows/HiC/Snakefile index 2e3f0c12e..9f20b065b 100755 --- a/snakePipes/workflows/HiC/Snakefile +++ b/snakePipes/workflows/HiC/Snakefile @@ -154,7 +154,6 @@ rule all: ### execute after workflow finished ############################################ ################################################################################ onsuccess: - cf.cleanLogs(outdir, cluster_config) if "verbose" in config and config["verbose"]: print("\n--- Hi-C workflow finished successfully! --------------------------------\n") diff --git a/snakePipes/workflows/HiC/cluster.yaml b/snakePipes/workflows/HiC/cluster.yaml deleted file mode 100644 index 9e0d6a073..000000000 --- a/snakePipes/workflows/HiC/cluster.yaml +++ /dev/null @@ -1,18 +0,0 @@ -build_matrix: - memory: 9G -call_tads: - memory: 3G -correct_matrix: - memory: 7G -diagnostic_plot: - memory: 2G -map_fastq_single_end: - memory: 10G -merge_bins: - memory: 7G -merge_matrices: - memory: 3G -sambamaba_sort_hic_r1: - memory: 4G -sambamaba_sort_hic_r2: - memory: 4G diff --git a/snakePipes/workflows/WGBS/Snakefile b/snakePipes/workflows/WGBS/Snakefile index ee4b1ea82..ecbf00c22 100644 --- a/snakePipes/workflows/WGBS/Snakefile +++ b/snakePipes/workflows/WGBS/Snakefile @@ -179,7 +179,6 @@ rule all: ### execute after workflow finished ############################################ ################################################################################ onsuccess: - cf.cleanLogs(outdir, cluster_config) if "verbose" in config and config["verbose"]: print("--- WGBS workflow finished successfully! --------------------------------")#\n \n diff --git a/snakePipes/workflows/WGBS/WGBS b/snakePipes/workflows/WGBS/WGBS.py similarity index 99% rename from snakePipes/workflows/WGBS/WGBS rename to snakePipes/workflows/WGBS/WGBS.py index 651a7ec6b..08010b0f8 100755 --- a/snakePipes/workflows/WGBS/WGBS +++ b/snakePipes/workflows/WGBS/WGBS.py @@ -180,7 +180,7 @@ def main(): cf.runAndCleanup(args, snakemake_cmd, logfile_name) #CreateDAG - cf.print_DAG(args,snakemake_cmd, __file__,defaults) + cf.plot_DAG(args,snakemake_cmd, __file__,defaults) if __name__ == "__main__": diff --git a/snakePipes/workflows/WGBS/cluster.yaml b/snakePipes/workflows/WGBS/cluster.yaml deleted file mode 100644 index 423a401dd..000000000 --- a/snakePipes/workflows/WGBS/cluster.yaml +++ /dev/null @@ -1,22 +0,0 @@ -bwameth: - memory: 3G -metileneReport: - memory: 6G -DepthOfCov: - memory: 3G -DepthOfCovGenome: - memory: 3G -markDupes: - memory: 3G -prepForMetilene: - memory: 3G -CpG_stats: - memory: 30G -CpG_report: - memory: 6G -DSS: - memory: 10G -dmrseq: - memory: 10G -produceReport: - memory: 20G diff --git a/snakePipes/workflows/createIndices/Snakefile b/snakePipes/workflows/createIndices/Snakefile index e49debf4c..94af0085e 100755 --- a/snakePipes/workflows/createIndices/Snakefile +++ b/snakePipes/workflows/createIndices/Snakefile @@ -168,7 +168,6 @@ rule all: ### execute after workflow finished ############################################ ################################################################################ onsuccess: - cf.cleanLogs(outdir, cluster_config) f = open(os.path.join(outdir, "genome_fasta", "effectiveSize")) organismDictionary['genome_size'] = int(float(f.read().strip())) f.close() diff --git a/snakePipes/workflows/createIndices/cluster.yaml b/snakePipes/workflows/createIndices/cluster.yaml deleted file mode 100644 index 2f637b510..000000000 --- a/snakePipes/workflows/createIndices/cluster.yaml +++ /dev/null @@ -1,30 +0,0 @@ -__default__: - memory: 1G -createGenomeFasta: - memory: 20G -make2bit: - memory: 8G -downloadGTF: - memory: 20G -bowtie2Index: - memory: 10G -hisat2Index: - memory: 2G -starIndex: - memory: 8G -SalmonIndex: - memory: 3G -run_eisaR: - memory: 30G -Salmon_index_joint_fa: - memory: 6G -bwaIndex: - memory: 8G -bwamem2Index: - memory: 90G -bwamethIndex: - memory: 20G -bwameth2Index: - memory: 120G -fastaDict: - memory: 4G diff --git a/snakePipes/workflows/createIndices/createIndices b/snakePipes/workflows/createIndices/createIndices.py similarity index 96% rename from snakePipes/workflows/createIndices/createIndices rename to snakePipes/workflows/createIndices/createIndices.py index 1e7a46f4f..f9ef8212e 100755 --- a/snakePipes/workflows/createIndices/createIndices +++ b/snakePipes/workflows/createIndices/createIndices.py @@ -1,5 +1,3 @@ -#!/usr/bin/env python3 - __description__ = """ Create indices for use by snakePipes. A YAML file will be created by default in the default location where snakePipes looks for organism YAML files. @@ -17,7 +15,7 @@ def parse_args(defaults={"configFile": None, "clusterConfigFile": None, - "maxJobs": 5, "snakemakeOptions": "--use-conda", + "maxJobs": 5, "snakemakeOptions": "", "tempDir": None, "verbose": False, "spikeinExt": None, "salmonIndexOptions": None, "eisaR_flank_length": None }): """ Parse arguments from the command line. @@ -41,7 +39,7 @@ def parse_args(defaults={"configFile": None, "clusterConfigFile": None, help="URL or local path to where the genome fasta file is located. The file may optionally be gzipped.") required.add_argument("--gtfURL", - help="URL or local path to where the genome annotation in GTF format is located. GFF is NOT supported. The file may optionally be gzipped. If this file is not specified, then RNA-seq related tools will NOT be usable.") + help="URL or local path to where the genome annotation in GTF format is located. GFF is NOT supported. The file may optionally be gzipped. If this file is not specified, then RNAseq related tools will NOT be usable.") # Workflow options optional = parser.add_argument_group('Options') @@ -77,7 +75,7 @@ def parse_args(defaults={"configFile": None, "clusterConfigFile": None, help="An optional file list, with one entry per line, the chromosomes to ignore during normalization. These are typically sex chromosomes, mitochondrial DNA, and unplaced contigs.") optional.add_argument("--rmskURL", - help="URL or local path to where the repeat masker output file is located. This is only required if you plan to run the non-coding RNA-seq workflow.") + help="URL or local path to where the repeat masker output file is located. This is only required if you plan to run the ncRNAseq workflow.") optional.add_argument("--userYAML", action="store_true", @@ -127,7 +125,7 @@ def main(): cf.runAndCleanup(args, snakemake_cmd, logfile_name) #CreateDAG - cf.print_DAG(args,snakemake_cmd, __file__,defaults) + cf.plot_DAG(args,snakemake_cmd, __file__,defaults) if __name__ == "__main__": diff --git a/snakePipes/workflows/createIndices/defaults.yaml b/snakePipes/workflows/createIndices/defaults.yaml index d3062c533..72720d88b 100644 --- a/snakePipes/workflows/createIndices/defaults.yaml +++ b/snakePipes/workflows/createIndices/defaults.yaml @@ -29,7 +29,7 @@ gtfURL: spikeinGtfURL: ## The effective genome size effectiveGenomeSize: 0 -## Regions to blacklist in the ChIP-seq and related workflows +## Regions to blacklist in the ChIPseq and related workflows blacklist: spikeinBlacklist: ## Regions to ignore during normalization (e.g., with bamCompare) diff --git a/snakePipes/workflows/mRNA-seq/cluster.yaml b/snakePipes/workflows/mRNA-seq/cluster.yaml deleted file mode 100644 index 3fdf4c6ce..000000000 --- a/snakePipes/workflows/mRNA-seq/cluster.yaml +++ /dev/null @@ -1,32 +0,0 @@ -SalmonQuant: - memory: 2G -bamPE_fragment_size: - memory: 10G -HISAT2: - memory: 6G -STAR: - memory: 6G -create_annotation_bed: - memory: 4G -annotation_bed2fasta: - memory: 4G -sleuth_Salmon: - memory: 4G -sleuth_SalmonAllelic: - memory: 10G -DESeq2: - memory: 5G -DESeq2_Salmon_basic: - memory: 3G -DESeq2_Salmon_allelic: - memory: 3G -star_index: - memory: 15G -STAR_allele: - memory: 30G -Salmon_TPM: - memory: 5G -Salmon_counts: - memory: 5G -filterBW: - memory: 6G diff --git a/snakePipes/workflows/mRNA-seq/Snakefile b/snakePipes/workflows/mRNAseq/Snakefile similarity index 99% rename from snakePipes/workflows/mRNA-seq/Snakefile rename to snakePipes/workflows/mRNAseq/Snakefile index 932a08c25..9a914a587 100755 --- a/snakePipes/workflows/mRNA-seq/Snakefile +++ b/snakePipes/workflows/mRNAseq/Snakefile @@ -355,7 +355,7 @@ onstart: if toolsVersion: usedEnvs = [CONDA_SHARED_ENV, CONDA_RNASEQ_ENV] - cf.writeTools(usedEnvs, outdir, "mRNA-seq", maindir) + cf.writeTools(usedEnvs, outdir, "mRNAseq", maindir) if sampleSheet: cf.copySampleSheet(sampleSheet, outdir) @@ -389,7 +389,6 @@ else: ### execute after finished #################################################### ################################################################################ onsuccess: - cf.cleanLogs(outdir, cluster_config) if "verbose" in config and config["verbose"]: print("\n--- RNA-seq workflow finished successfully! ------------------------------------\n") diff --git a/snakePipes/workflows/mRNA-seq/defaults.yaml b/snakePipes/workflows/mRNAseq/defaults.yaml similarity index 99% rename from snakePipes/workflows/mRNA-seq/defaults.yaml rename to snakePipes/workflows/mRNAseq/defaults.yaml index e86bb2fe0..150251ff9 100644 --- a/snakePipes/workflows/mRNA-seq/defaults.yaml +++ b/snakePipes/workflows/mRNAseq/defaults.yaml @@ -11,7 +11,7 @@ # can be used in new/extended snakemake rules! ################################################################################ ## General/Snakemake parameters, only used/set by wrapper or in Snakemake cmdl, but not in Snakefile -pipeline: rna-seq +pipeline: rnaseq outdir: configFile: clusterConfigFile: diff --git a/snakePipes/workflows/mRNA-seq/internals.snakefile b/snakePipes/workflows/mRNAseq/internals.snakefile similarity index 100% rename from snakePipes/workflows/mRNA-seq/internals.snakefile rename to snakePipes/workflows/mRNAseq/internals.snakefile diff --git a/snakePipes/workflows/mRNA-seq/mRNA-seq b/snakePipes/workflows/mRNAseq/mRNAseq.py similarity index 95% rename from snakePipes/workflows/mRNA-seq/mRNA-seq rename to snakePipes/workflows/mRNAseq/mRNAseq.py index 2a0926b05..fe3392351 100755 --- a/snakePipes/workflows/mRNA-seq/mRNA-seq +++ b/snakePipes/workflows/mRNAseq/mRNAseq.py @@ -1,13 +1,10 @@ -#!/usr/bin/env python3 - __description__ = """ MPI-IE workflow for RNA mapping and analysis usage example: - RNA-seq -i input-dir -o output-dir mm10 + RNAseq -i input-dir -o output-dir mm10 """ - import argparse import os import sys @@ -138,7 +135,6 @@ def parse_args(defaults={"verbose": False, "configFile": None, def main(): baseDir, workflowDir, defaults = cf.setDefaults(os.path.basename(__file__)) - # get command line arguments parser = parse_args(defaults) args = parser.parse_args() @@ -173,7 +169,7 @@ def main(): if args.fromBAM: args.aligner = "EXTERNAL_BAM" if "allelic-counting" in modeTemp and not args.fromBAM: - warnings.warn("--fromBAM is required with allelic-counting mode. Setting to True.") + warnings.warn("--fromBAM is required with allelic-counting mode. Setting to True.", stacklevel=2) args.fromBAM = True if "allelic-counting" in modeTemp: args.bamExt = ".sorted.bam" @@ -184,12 +180,12 @@ def main(): if not args.sampleSheet: sys.exit("mode three-prime-seq requires a sampleSheet " "(specified with --sampleSheet).\n") - aligner = "STAR" - alignerOptions = defaults['threePrimeAlignerOptions'] - trimmerOptions = defaults['threePrimeTrimmerOptions'] - trimmer = "fastp" - trim = True - + args.aligner = "STAR" + args.alignerOptions = defaults['threePrimeAlignerOptions'] + args.trimmerOptions = defaults['threePrimeTrimmerOptions'] + args.trimmer = "fastp" + args.trim = True + ## End workflow-specific checks # Handle YAML and log files @@ -200,8 +196,4 @@ def main(): cf.runAndCleanup(args, snakemake_cmd, logfile_name) #CreateDAG - cf.print_DAG(args,snakemake_cmd, __file__,defaults) - - -if __name__ == "__main__": - main() + cf.plot_DAG(args,snakemake_cmd, __file__,defaults) diff --git a/snakePipes/workflows/makePairs/Snakefile b/snakePipes/workflows/makePairs/Snakefile new file mode 100644 index 000000000..a02ee6331 --- /dev/null +++ b/snakePipes/workflows/makePairs/Snakefile @@ -0,0 +1,125 @@ +# Snakemake workflow for phased mapping with pairtools +# based on https://github.com/caballero/snakemake-pairtools-phased/tree/df410ff +# Juan Caballero +# (C) 2024 + +import os +import snakePipes.common_functions as cf + +### snakemake_workflows initialization ######################################## +maindir = os.path.dirname(os.path.dirname(workflow.basedir)) + +# load conda ENVs (path is relative to "shared/rules" directory) +globals().update(cf.set_env_yamls()) + +# load config file +globals().update( + cf.load_configfile(workflow.overwrite_configfiles[0], config["verbose"]) +) +# load organism-specific data, i.e. genome indices, annotation, etc. +globals().update(cf.load_organism_data(genome, maindir, config["verbose"])) +# return the pipeline version in the log +cf.get_version() + + +# do workflow specific stuff now +include: os.path.join(workflow.basedir, "internals.snakefile") +# FASTQ: either downsample FASTQ files or create symlinks to input files +include: os.path.join(maindir, "shared", "rules", "FASTQ.snakefile") + + +# FastQC +if fastqc: + + include: os.path.join(maindir, "shared", "rules", "FastQC.snakefile") + + +# trimming +if trim: + + include: os.path.join(maindir, "shared", "rules", "trimming.snakefile") + + +# umi_tools: needed for FASTQ handling +include: os.path.join(maindir, "shared", "rules", "umi_tools.snakefile") +# diploid_genome: concatenation and indexing +include: os.path.join(maindir, "shared", "rules", "diploid_genome.snakefile") +# pairtools: bwa mapping (without sorting), pairtools rules & multiqc +include: os.path.join(maindir, "shared", "rules", "pairtools.snakefile") + + +def run_FastQC(fastqc): + if fastqc: + return expand("FastQC/{sample}{read}_fastqc.html", sample=samples, read=reads) + else: + return [] + + +def run_Trimming(trim, fastqc): + if trim and fastqc: + return expand( + fastq_dir + "/{sample}{read}.fastq.gz", sample=samples, read=reads + ) + expand( + "FastQC_trimmed/{sample}{read}_fastqc.html", sample=samples, read=reads + ) + elif trim: + return expand( + fastq_dir + "/{sample}{read}.fastq.gz", sample=samples, read=reads + ) + else: + return [] + + +### execute before workflow starts ############################################# +# does NOT seem to execute "onstart" +################################################################################ +onstart: + if "verbose" in config and config["verbose"]: + print( + "--- Workflow parameters --------------------------------------------------------" + ) + print("samples:", samples) + print("reads:", reads) + print("fastq dir:", fastq_dir) + print("-" * 80, "\n") + + print( + "--- Environment ----------------------------------------------------------------" + ) + print("$TMPDIR: ", os.getenv("TMPDIR", "")) + print("$HOSTNAME: ", os.getenv("HOSTNAME", "")) + print("-" * 80, "\n") + + if toolsVersion: + usedEnvs = [CONDA_SHARED_ENV, CONDA_MAKEPAIRS_ENV] + cf.writeTools(usedEnvs, outdir, "makePairs", maindir) + + if sampleSheet: + cf.copySampleSheet(sampleSheet, outdir) + + +### main rule ################################################################## +################################################################################ + + +rule all: + input: + # set soft-links + expand("originalFASTQ/{sample}{read}.fastq.gz", sample=samples, read=reads), + run_FastQC(fastqc), + run_Trimming(trim, fastqc), + "genome/diploid_genome.chromsizes", + "multiqc/multiqc_report.html", + + +### execute after workflow finished ############################################ +################################################################################ +onsuccess: + if "verbose" in config and config["verbose"]: + print( + "\n--- makePairs finished successfully! --------------------------------\n" + ) + + +onerror: + print("\n !!! ERROR in makePairs workflow! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n") diff --git a/snakePipes/workflows/makePairs/defaults.yaml b/snakePipes/workflows/makePairs/defaults.yaml new file mode 100644 index 000000000..7f51a043e --- /dev/null +++ b/snakePipes/workflows/makePairs/defaults.yaml @@ -0,0 +1,39 @@ +pipeline: makePairs +outdir: +configFile: +clusterConfigFile: +local: False +maxJobs: 5 +## directory with fastq files +indir: +## preconfigured target genomes (mm9,mm10,dm3,...) , see /path/to/snakemake_workflows/shared/organisms/ +## Value can be also path to your own genome config file! +genome: +## FASTQ file extension (default: ".fastq.gz") +ext: '.fastq.gz' +## paired-end read name extension (default: ["_R1", "_R2"]) +reads: ["_R1","_R2"] +## assume paired end reads +pairedEnd: True +## Number of reads to downsample from each FASTQ file +downsample: +## Options for trimming +trim: True +trimmer: fastp +trimmerOptions: + +verbose: False +fastqc: True +UMIBarcode: False +bcPattern: "NNNNCCCCCCCCC" +UMIDedup: False +UMIDedupSep: "_" +UMIDedupOpts: "_" +plotFormat: png +bwBinSize: 1000 +aligner: 'bwa' +alignerOptions: '-SPu -T0' +alignerThreads: 30 + +fromBAM: False +sampleSheet: diff --git a/snakePipes/workflows/makePairs/internals.snakefile b/snakePipes/workflows/makePairs/internals.snakefile new file mode 100644 index 000000000..7f30e121d --- /dev/null +++ b/snakePipes/workflows/makePairs/internals.snakefile @@ -0,0 +1,40 @@ +import glob +import os + +## trim +fastq_dir = "FASTQ" +if trim: + fastq_indir_trim = "FASTQ" + if trimmer == "trimgalore": + fastq_dir = "FASTQ_TrimGalore" + elif trimmer == "cutadapt": + fastq_dir = "FASTQ_Cutadapt" + elif trimmer == "fastp": + fastq_dir = "FASTQ_fastp" + + +## genome names for allele-sp mapping +strains = list(map(str.strip, re.split(",|;", config["strains"]))) + + +infiles = sorted(glob.glob(os.path.join(str(indir or ""), "*" + ext))) +samples = cf.get_sample_names(infiles, ext, reads) +pairedEnd = cf.is_paired(infiles, ext, reads) +del infiles + + +# reference genomes: diploid_genome, nmasked, reference +# currently only the case for diploid_genome is covered +REFERENCES = ["diploid_genome"] + +# possible phasetypes of contacts +PHASETYPES = [strains[0], strains[1], "unphased", "trans"] + +PHASEFILTER = [ + '(phase1=="0") and (phase2=="0")', + '(phase1=="1") and (phase2=="1")', + '(phase1==".") or (phase2==".")', + '(phase1!=phase2) and (phase1!=".") and (phase2!=".") and (phase1!="!") and (phase2!="!")', +] + +PHASEDIC = dict(map(lambda i, j: (i, j), PHASETYPES, PHASEFILTER)) diff --git a/snakePipes/workflows/makePairs/makePairs.py b/snakePipes/workflows/makePairs/makePairs.py new file mode 100644 index 000000000..5281536e6 --- /dev/null +++ b/snakePipes/workflows/makePairs/makePairs.py @@ -0,0 +1,102 @@ +__description__ = """ +MPI-IE workflow for creating HiC matrices with pairtools + +usage example: + makePairs -i input-dir -o output-dir --VCFfile vcf --strains s1,s2 dm6 +""" + +import argparse +import os +import sys +import textwrap +import snakePipes.common_functions as cf +import snakePipes.parserCommon as parserCommon + + +def parse_args(defaults={"verbose": False, "configFile": None, + "clusterConfigFile": None, "maxJobs": 5, + "snakemakeOptions": "--use-conda", "tempDir": None, + "downsample": False, + "trim": False, "trimmer": "fastp", + "trimmerOptions": None, "fastqc": False, + "reads": ["_R1", "_R2"], "ext": ".fastq.gz", + "fromBAM": False, "bamExt": ".bam", + "aligner": "bwa", + "alignerOptions": "-SPu -T0", + "plotFormat": "png", + "UMIDedup": False, + "UMIDedupOpts": "", "bcPattern": "NNNNCCCCCCCCC", + "UMIDedupSep": "_", "UMIBarcode": False}): + """ + Parse arguments from the command line. + """ + mainArgs = parserCommon.mainArguments(defaults, workingDir=False) + snpArgs = parserCommon.snpArguments(defaults) + + parser = argparse.ArgumentParser( + prog=sys.argv[0], + formatter_class=argparse.RawDescriptionHelpFormatter, + description=textwrap.dedent(__description__), + parents=[mainArgs, snpArgs], + add_help=False + ) + + # Workflow options + optional = parser.add_argument_group('Options') + + parserCommon.commonOptions(optional, defaults, bw=False) + + optional.add_argument("--aligner", + help="Program used for mapping: BWA \ + (default: '%(default)s'). If you change this, please change \ + --alignerOptions to match.", + choices=["bwa"], + default=defaults["aligner"]) + + optional.add_argument("--alignerOptions", + help="aligner option string, \ + e.g.: '-SPu -T0' (default: '%(default)s')", + default=defaults["alignerOptions"]) + + optional.add_argument("--fromBAM", + action="store_true", + help="Input folder with bam files. If provided, \ + the analysis will start from this point. If bam files contain single \ + ends, please specify --singleEnd additionally.", + default=defaults["fromBAM"]) + + return parser + + +def main(): + baseDir, workflowDir, defaults = cf.setDefaults(os.path.basename(__file__)) + parser = parse_args(defaults) + args = parser.parse_args() + args, defaults = cf.handleUserArgs(args, defaults, parse_args) + + # add baseDir to config, although we don't use them in the Snakefile + args.baseDir = baseDir + + # Common arguments + cf.checkCommonArguments(args, baseDir, outDir=True) + args.VCFfile = os.path.abspath(args.VCFfile) + + if args.fromBAM: + args.aligner = "EXTERNAL_BAM" + +# ## End workflow-specific checks + + # Handle YAML and log files + snakemake_cmd = \ + cf.commonYAMLandLogs(baseDir, workflowDir, defaults, args, __file__) + logfile_name = cf.logAndExport(args, os.path.basename(__file__)) + + # Run everything + cf.runAndCleanup(args, snakemake_cmd, logfile_name) + + # CreateDAG (with --DAG flag) + cf.plot_DAG(args, snakemake_cmd, __file__, defaults) + + +if __name__ == "__main__": + main() diff --git a/snakePipes/workflows/noncoding-RNA-seq/Snakefile b/snakePipes/workflows/ncRNAseq/Snakefile similarity index 95% rename from snakePipes/workflows/noncoding-RNA-seq/Snakefile rename to snakePipes/workflows/ncRNAseq/Snakefile index b1445aaea..1f0a951ea 100755 --- a/snakePipes/workflows/noncoding-RNA-seq/Snakefile +++ b/snakePipes/workflows/ncRNAseq/Snakefile @@ -144,7 +144,7 @@ onstart: if toolsVersion: usedEnvs = [CONDA_SHARED_ENV, CONDA_NONCODING_RNASEQ_ENV] - cf.writeTools(usedEnvs, outdir, "noncoding-RNA-seq", maindir) + cf.writeTools(usedEnvs, outdir, "ncRNAseq", maindir) if sampleSheet: cf.copySampleSheet(sampleSheet, outdir) @@ -173,9 +173,8 @@ else: ### execute after finished #################################################### ################################################################################ onsuccess: - cf.cleanLogs(outdir, cluster_config) if "verbose" in config and config["verbose"]: - print("\n--- noncoding-RNA-seq workflow finished successfully! ------------------------------------\n") + print("\n--- ncRNAseq workflow finished successfully! ------------------------------------\n") onerror: - print("\n !!! ERROR in noncoding-RNA-seq workflow! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n") + print("\n !!! ERROR in ncRNAseq workflow! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n") diff --git a/snakePipes/workflows/noncoding-RNA-seq/defaults.yaml b/snakePipes/workflows/ncRNAseq/defaults.yaml similarity index 98% rename from snakePipes/workflows/noncoding-RNA-seq/defaults.yaml rename to snakePipes/workflows/ncRNAseq/defaults.yaml index 638ad376c..c63ff7206 100644 --- a/snakePipes/workflows/noncoding-RNA-seq/defaults.yaml +++ b/snakePipes/workflows/ncRNAseq/defaults.yaml @@ -11,7 +11,7 @@ # can be used in new/extended snakemake rules! ################################################################################ ## General/Snakemake parameters, only used/set by wrapper or in Snakemake cmdl, but not in Snakefile -pipeline: noncoding-rna-seq +pipeline: ncRNAseq outdir: configFile: clusterConfigFile: diff --git a/snakePipes/workflows/noncoding-RNA-seq/internals.snakefile b/snakePipes/workflows/ncRNAseq/internals.snakefile similarity index 100% rename from snakePipes/workflows/noncoding-RNA-seq/internals.snakefile rename to snakePipes/workflows/ncRNAseq/internals.snakefile diff --git a/snakePipes/workflows/noncoding-RNA-seq/noncoding-RNA-seq b/snakePipes/workflows/ncRNAseq/ncRNAseq.py similarity index 96% rename from snakePipes/workflows/noncoding-RNA-seq/noncoding-RNA-seq rename to snakePipes/workflows/ncRNAseq/ncRNAseq.py index cc177fed4..925f7af83 100755 --- a/snakePipes/workflows/noncoding-RNA-seq/noncoding-RNA-seq +++ b/snakePipes/workflows/ncRNAseq/ncRNAseq.py @@ -1,10 +1,8 @@ -#!/usr/bin/env python3 - __description__ = """ -MPI-IE workflow for noncoding RNA mapping and analysis +MPI-IE workflow for ncRNAseq mapping and analysis usage example: - noncoding-RNA-seq -i input-dir -o output-dir mm10 + ncRNAseq -i input-dir -o output-dir mm10 """ @@ -123,7 +121,7 @@ def main(): cf.runAndCleanup(args, snakemake_cmd, logfile_name) #CreateDAG - cf.print_DAG(args,snakemake_cmd, __file__,defaults) + cf.plot_DAG(args,snakemake_cmd, __file__,defaults) if __name__ == "__main__": diff --git a/snakePipes/workflows/noncoding-RNA-seq/cluster.yaml b/snakePipes/workflows/noncoding-RNA-seq/cluster.yaml deleted file mode 100644 index 141f61701..000000000 --- a/snakePipes/workflows/noncoding-RNA-seq/cluster.yaml +++ /dev/null @@ -1,8 +0,0 @@ -bamPE_fragment_size: - memory: 10G -STAR: - memory: 6G -DESeq2: - memory: 5G -TEcounts: - memory: 16G diff --git a/snakePipes/workflows/preprocessing/Snakefile b/snakePipes/workflows/preprocessing/Snakefile index 507939755..a655be387 100755 --- a/snakePipes/workflows/preprocessing/Snakefile +++ b/snakePipes/workflows/preprocessing/Snakefile @@ -101,7 +101,6 @@ rule all: ### execute after finished #################################################### ################################################################################ onsuccess: - cf.cleanLogs(outdir, cluster_config) if "verbose" in config and config["verbose"]: print("\n--- Preprocessing workflow finished successfully! ------------------------------------\n") diff --git a/snakePipes/workflows/preprocessing/cluster.yaml b/snakePipes/workflows/preprocessing/cluster.yaml deleted file mode 100644 index c0ae97335..000000000 --- a/snakePipes/workflows/preprocessing/cluster.yaml +++ /dev/null @@ -1,2 +0,0 @@ -clumpify: - memory: 2G diff --git a/snakePipes/workflows/preprocessing/preprocessing b/snakePipes/workflows/preprocessing/preprocessing.py similarity index 99% rename from snakePipes/workflows/preprocessing/preprocessing rename to snakePipes/workflows/preprocessing/preprocessing.py index ad95a65b2..96fdee3f3 100755 --- a/snakePipes/workflows/preprocessing/preprocessing +++ b/snakePipes/workflows/preprocessing/preprocessing.py @@ -137,7 +137,7 @@ def main(): cf.runAndCleanup(args, snakemake_cmd, logfile_name) #CreateDAG - cf.print_DAG(args,snakemake_cmd, __file__,defaults) + cf.plot_DAG(args,snakemake_cmd, __file__,defaults) if __name__ == "__main__": diff --git a/snakePipes/workflows/scRNAseq/Snakefile b/snakePipes/workflows/scRNAseq/Snakefile index d0338d82f..ed773686e 100755 --- a/snakePipes/workflows/scRNAseq/Snakefile +++ b/snakePipes/workflows/scRNAseq/Snakefile @@ -90,7 +90,7 @@ onstart: print("Read extension:", reads) print("Genome:", genome) print("Downsample:", downsample) - print("Mode:", mode) + print("Mode:", mode) print("Input directory for mapping:", fastq_dir) print("BigWig bin size:", bwBinSize) @@ -138,7 +138,6 @@ elif mode=="Alevin": ### execute after workflow finished ############################################ ################################################################################ onsuccess: - cf.cleanLogs(outdir, cluster_config) if "verbose" in config and config["verbose"]: print("\n--- scRNAseq workflow finished successfully! --------------------------------\n") diff --git a/snakePipes/workflows/scRNAseq/cluster.yaml b/snakePipes/workflows/scRNAseq/cluster.yaml deleted file mode 100644 index 6f8e6e868..000000000 --- a/snakePipes/workflows/scRNAseq/cluster.yaml +++ /dev/null @@ -1,20 +0,0 @@ -STAR: - memory: 6G -bamPE_fragment_size: - memory: 3G -annotation_bed2fasta: - memory: 4G -STARsolo: - memory: 6G -SalmonAlevin: - memory: 10G -AlevinForVelocity: - memory: 10G -cellsort_bam: - memory: 10G -velocyto: - memory: 20G -STARsolo_raw_to_seurat: - memory: 10G -velo_to_sce: - memory: 30G diff --git a/snakePipes/workflows/scRNAseq/defaults.yaml b/snakePipes/workflows/scRNAseq/defaults.yaml index 6f2ca0f68..88163c0f3 100644 --- a/snakePipes/workflows/scRNAseq/defaults.yaml +++ b/snakePipes/workflows/scRNAseq/defaults.yaml @@ -1,5 +1,5 @@ ################################################################################ -# This file is the default configuration of the scRNA-seq workflow! +# This file is the default configuration of the scRNAseq workflow! # # In order to adjust some parameters, please either use the wrapper script # (eg. /path/to/snakemake_workflows/workflows/scRNAseq/scRNAseq-mapcount) @@ -11,7 +11,7 @@ # can be used in new/extended snakemake rules! ################################################################################ ## General/Snakemake parameters, only used/set by wrapper or in Snakemake cmdl, but not in Snakefile -pipeline: scrna-seq +pipeline: scrnaseq outdir: configFile: clusterConfigFile: diff --git a/snakePipes/workflows/scRNAseq/scRNAseq b/snakePipes/workflows/scRNAseq/scRNAseq.py similarity index 95% rename from snakePipes/workflows/scRNAseq/scRNAseq rename to snakePipes/workflows/scRNAseq/scRNAseq.py index 7638d74e6..d1f63b6a4 100755 --- a/snakePipes/workflows/scRNAseq/scRNAseq +++ b/snakePipes/workflows/scRNAseq/scRNAseq.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 __description__ = """ -MPI-IE workflow for scRNA-seq (CEL-Seq2 and related protocols) +MPI-IE workflow for scRNAseq (CEL-Seq2 and related protocols) usage example: scRNAseq -i input-dir -o output-dir mm10 @@ -20,12 +20,12 @@ def parse_args(defaults={"verbose": False, "configFile": None, "clusterConfigFile": None, "maxJobs": 5, "snakemakeOptions": "--use-conda", "tempdir": None, "downsample": False, "trim": False, - "reads": ["_R1", "_R2"], "ext": ".fastq.gz", "ext": ".fastq.gz", + "reads": ["_R1", "_R2"], "ext": ".fastq.gz", "alignerOptions": "--outBAMsortingBinsN 30 --twopassMode Basic", "filterGTF": "-v -P 'decay|pseudogene' ", "bwBinSize": 10, "plotFormat": "png", - "mode": "STARsolo", "BCwhiteList": None, "STARsoloCoords": ["1","7","8","7"], "myKit": "CellSeq384", "skipVelocyto": False, + "mode": "STARsolo", "BCwhiteList": None, "STARsoloCoords": ["1","7","8","7"], "myKit": "CellSeq384", "skipVelocyto": False, "alevinLibraryType":"ISR", "prepProtocol":None, "expectCells" : None }): """ Parse arguments from the command line. @@ -47,7 +47,7 @@ def parse_args(defaults={"verbose": False, "configFile": None, dest="mode", metavar="STR", help="Analysis mode. Possible settings are 'Gruen, STARsolo and Alevin' Default: %(default)s", - choices=['STARsolo','Alevin'], + choices=['STARsolo','Alevin'], type=str, default=defaults["mode"]) @@ -150,7 +150,7 @@ def main(): cf.runAndCleanup(args, snakemake_cmd, logfile_name) #CreateDAG - cf.print_DAG(args,snakemake_cmd, __file__,defaults) + cf.plot_DAG(args,snakemake_cmd, __file__,defaults) if __name__ == "__main__": diff --git a/tests/data/cluster_config.yaml b/tests/data/cluster_config.yaml deleted file mode 100644 index 751da28eb..000000000 --- a/tests/data/cluster_config.yaml +++ /dev/null @@ -1 +0,0 @@ -snakemake_cluster_cmd: '' diff --git a/tests/test_jobcounts.py b/tests/test_jobcounts.py index 24b368ce7..cffc110ce 100644 --- a/tests/test_jobcounts.py +++ b/tests/test_jobcounts.py @@ -8,27 +8,24 @@ RMSK = "http://hgdownload.soe.ucsc.edu/goldenPath/dm6/database/rmsk.txt.gz" SPIKEINGENOME = "ftp://ftp.ensembl.org/pub/release-79/fasta/drosophila_melanogaster/dna/Drosophila_melanogaster.BDGP6.dna_sm.toplevel.fa.gz" SPIKEINGTF = "ftp://ftp.ensembl.org/pub/release-96/gtf/drosophila_melanogaster/Drosophila_melanogaster.BDGP6.22.96.gtf.gz" -SMKOPTS = " --dryrun --conda-prefix /tmp -q " - +SMKOPTS = " --dryrun " def parseSpOut(_s) -> int: ''' parse subprocess run output. Take stdout, split, and take the 'jobnumber' field. + The jobnumber field is assumed to be the list entry after 'total' Returns as int. - snakemake's last line in a quiet dryrun has: - totaljobs, jobcount, min threads and max threads. ''' - try: - return (int(_s.stdout.split()[-3])) - except IndexError: - return (0) + for i in range(len(_s.stdout.split())-1): + if 'total' in _s.stdout.split()[i]: + return(int(_s.stdout.split()[i+1])) + return (0) + def createTestData(fp, samples=9) -> None: yaml = YAML() yaml.boolean_representation = ['False', 'True'] - #yaml.default_flow_style = False - #yaml.default_style = None # single end folder (fp / 'SE').mkdir() # paired end folder @@ -46,7 +43,7 @@ def createTestData(fp, samples=9) -> None: (fp / 'allelic_bam_input' / 'deepTools_qc' / 'bamPEFragmentSize' / 'fragmentSize.metric.tsv' ).touch() (fp / 'allelic_bam_input' / 'Sambamba').mkdir(parents=True) (fp / 'allelic_bam_input' / 'bamCoverage' / 'allele_specific').mkdir(parents=True) - + (fp / 'ref').mkdir() (fp / 'ref' / 'genes.gtf').touch() with open(fp / 'ref' / 'genome.fa', 'w') as f: @@ -77,18 +74,18 @@ def createTestData(fp, samples=9) -> None: sample = s+1 # SE (fp / "SE" / "sample{}_R1.fastq.gz".format(sample)).touch() - + # PE (fp / "PE" / "sample{}_R1.fastq.gz".format(sample)).touch() (fp / "PE" / "sample{}_R2.fastq.gz".format(sample)).touch() - + # bam_input (fp / "bam_input" / "sample{}.bam".format(sample)).touch() (fp / "bam_input" / "filtered_bam" / "sample{}.filtered.bam".format(sample)).touch() (fp / "bam_input" / "filtered_bam" / "sample{}.filtered.bam.bai".format(sample)).touch() (fp / "bam_input" / "Sambamba" / "sample{}.markdup.txt".format(sample)).touch() (fp / "bam_input" / "bamCoverage" / "sample{}.filtered.seq_depth_norm.bw".format(sample)).touch() - + # allelic_bams (fp / "allelic_bam_input" / "sample{}.bam".format(sample)).touch() (fp / "allelic_bam_input" / "allelic_bams" / "sample{}.genome1.sorted.bam".format(sample)).touch() @@ -215,12 +212,54 @@ def ifs(tmp_path_factory): createTestData(fp) return fp +class TestmakePairs(): + def test_default(self, ifs): + ci = [ + "makePairs", + '-i', + ifs / 'PE', + '-o', + ifs / 'output', + ifs / 'org.yaml', + '--VCFfile', + ifs / 'allelic_input' / 'file.vcf.gz', + '--strains', + 'strain1,strain2', + '--snakemakeOptions', + SMKOPTS + ] + print(' '.join([str(i) for i in ci])) + _p = sp.run(ci, capture_output=True, text=True) + assert _p.returncode == 0 + assert parseSpOut(_p) == 176 + + def test_dag(self, ifs): + ci = [ + "makePairs", + '-i', + ifs / 'PE', + '-o', + ifs / 'output', + '--DAG', + ifs / 'org.yaml', + '--VCFfile', + ifs / 'allelic_input' / 'file.vcf.gz', + '--strains', + 'strain1,strain2', + '--snakemakeOptions', + SMKOPTS + ] + print(' '.join([str(i) for i in ci])) + _p = sp.run(ci, capture_output=True, text=True) + assert _p.returncode == 0 + assert parseSpOut(_p) == 176 + class TestCreateindices: - def test_default(self): + def test_default(self, ifs): ci = [ 'createIndices', '-o', - 'outdir', + ifs / 'outdir', '--snakemakeOptions', SMKOPTS, '--genome', @@ -233,11 +272,11 @@ def test_default(self): _p = sp.run(ci, capture_output=True, text=True) assert _p.returncode == 0 assert parseSpOut(_p) == 21 - def test_rmsk(self): + def test_rmsk(self, ifs): ci = [ 'createIndices', '-o', - 'outdir', + ifs / 'outdir', '--snakemakeOptions', SMKOPTS, '--genome', @@ -251,12 +290,12 @@ def test_rmsk(self): print(' '.join([str(i) for i in ci])) _p = sp.run(ci, capture_output=True, text=True) assert _p.returncode == 0 - assert parseSpOut(_p) == 22 - def test_DAG(self): + assert parseSpOut(_p) == 22 + def test_DAG(self, ifs): ci = [ 'createIndices', '-o', - 'outdir', + ifs / 'outdir', '--snakemakeOptions', SMKOPTS, '--genome', @@ -272,11 +311,11 @@ def test_DAG(self): _p = sp.run(ci, capture_output=True, text=True) assert _p.returncode == 0 assert parseSpOut(_p) == 22 - def test_spikein(self): + def test_spikein(self, ifs): ci = [ 'createIndices', '-o', - 'outdir', + ifs / 'outdir', '--snakemakeOptions', SMKOPTS, '--genome', @@ -301,11 +340,11 @@ class TestDNAmapping(): def test_default(self, ifs): # PE ci = [ - "DNA-mapping", + "DNAmapping", '-i', ifs / 'PE', '-o', - 'outdir', + ifs / 'outdir', '--snakemakeOptions', SMKOPTS, ifs / 'org.yaml' @@ -316,11 +355,11 @@ def test_default(self, ifs): assert parseSpOut(_p) == 143 def test_properPairs(self, ifs): ci = [ - "DNA-mapping", + "DNAmapping", '-i', ifs / 'PE', '-o', - 'outdir', + ifs / 'outdir', '--snakemakeOptions', SMKOPTS, ifs / 'org.yaml', @@ -336,11 +375,11 @@ def test_properPairs(self, ifs): assert parseSpOut(_p) == 152 def test_bcExtract(self, ifs): ci = [ - "DNA-mapping", + "DNAmapping", '-i', ifs / 'PE', '-o', - 'outdir', + ifs / 'outdir', '--snakemakeOptions', SMKOPTS, ifs / 'org.yaml', @@ -357,11 +396,11 @@ def test_bcExtract(self, ifs): assert parseSpOut(_p) == 143 def test_UMIDedup(self, ifs): ci = [ - "DNA-mapping", + "DNAmapping", '-i', ifs / 'PE', '-o', - 'outdir', + ifs / 'outdir', '--snakemakeOptions', SMKOPTS, ifs / 'org.yaml', @@ -377,11 +416,11 @@ def test_UMIDedup(self, ifs): assert parseSpOut(_p) == 161 def test_UMIDedupbcExtract(self, ifs): ci = [ - "DNA-mapping", + "DNAmapping", '-i', ifs / 'PE', '-o', - 'outdir', + ifs / 'outdir', '--snakemakeOptions', SMKOPTS, ifs / 'org.yaml', @@ -398,11 +437,11 @@ def test_UMIDedupbcExtract(self, ifs): assert parseSpOut(_p) == 152 def test_DAG(self, ifs): ci = [ - "DNA-mapping", + "DNAmapping", '-i', ifs / 'PE', '-o', - 'outdir', + ifs / 'outdir', '--snakemakeOptions', SMKOPTS, ifs / 'org.yaml', @@ -419,11 +458,11 @@ def test_DAG(self, ifs): assert parseSpOut(_p) == 161 def test_bwa(self, ifs): ci = [ - "DNA-mapping", + "DNAmapping", '-i', ifs / 'PE', '-o', - 'outdir', + ifs / 'outdir', '--snakemakeOptions', SMKOPTS, ifs / 'org.yaml', @@ -438,11 +477,11 @@ def test_bwa(self, ifs): assert parseSpOut(_p) == 143 def test_bwa2(self, ifs): ci = [ - "DNA-mapping", + "DNAmapping", '-i', ifs / 'PE', '-o', - 'outdir', + ifs / 'outdir', '--snakemakeOptions', SMKOPTS, ifs / 'org.yaml', @@ -457,11 +496,11 @@ def test_bwa2(self, ifs): assert parseSpOut(_p) == 143 def test_se(self, ifs): ci = [ - "DNA-mapping", + "DNAmapping", '-i', ifs / 'SE', '-o', - 'outdir', + ifs / 'outdir', '--snakemakeOptions', SMKOPTS, ifs / 'org.yaml', @@ -472,11 +511,11 @@ def test_se(self, ifs): assert parseSpOut(_p) == 125 def test_seproperPairs(self, ifs): ci = [ - "DNA-mapping", + "DNAmapping", '-i', ifs / 'SE', '-o', - 'outdir', + ifs / 'outdir', '--snakemakeOptions', SMKOPTS, ifs / 'org.yaml', @@ -494,7 +533,7 @@ def test_seproperPairs(self, ifs): class TestChIPseq: def test_default(self, ifs): ci = [ - "ChIP-seq", + "ChIPseq", '-d', ifs / 'bam_input', '--sampleSheet', @@ -510,7 +549,7 @@ def test_default(self, ifs): assert parseSpOut(_p) == 59 def test_nosamplesheet(self, ifs): ci = [ - "ChIP-seq", + "ChIPseq", '-d', ifs / 'bam_input', '--snakemakeOptions', @@ -524,7 +563,7 @@ def test_nosamplesheet(self, ifs): assert parseSpOut(_p) == 37 def test_nosamplesheet_genrich(self, ifs): ci = [ - "ChIP-seq", + "ChIPseq", '-d', ifs / 'bam_input', '--snakemakeOptions', @@ -540,7 +579,7 @@ def test_nosamplesheet_genrich(self, ifs): assert parseSpOut(_p) == 35 def test_broad(self, ifs): ci = [ - "ChIP-seq", + "ChIPseq", '-d', ifs / 'bam_input', '--sampleSheet', @@ -556,7 +595,7 @@ def test_broad(self, ifs): assert parseSpOut(_p) == 77 def test_genrich(self, ifs): ci = [ - "ChIP-seq", + "ChIPseq", '-d', ifs / 'bam_input', '--sampleSheet', @@ -574,7 +613,7 @@ def test_genrich(self, ifs): assert parseSpOut(_p) == 56 def test_seacr(self, ifs): ci = [ - "ChIP-seq", + "ChIPseq", '-d', ifs / 'bam_input', '--sampleSheet', @@ -592,7 +631,7 @@ def test_seacr(self, ifs): assert parseSpOut(_p) == 89 def test_seacr_spikein(self, ifs): ci = [ - "ChIP-seq", + "ChIPseq", '-d', ifs / 'bam_input', '--sampleSheet', @@ -611,7 +650,7 @@ def test_seacr_spikein(self, ifs): assert parseSpOut(_p) == 130 def test_SE(self, ifs): ci = [ - "ChIP-seq", + "ChIPseq", '-d', ifs / 'bam_input', '--sampleSheet', @@ -628,7 +667,7 @@ def test_SE(self, ifs): assert parseSpOut(_p) == 59 def test_l2ratio(self, ifs): ci = [ - "ChIP-seq", + "ChIPseq", '-d', ifs / 'bam_input', '--sampleSheet', @@ -646,7 +685,7 @@ def test_l2ratio(self, ifs): assert parseSpOut(_p) == 53 def test_default_noInput(self, ifs): ci = [ - "ChIP-seq", + "ChIPseq", '-d', ifs / 'bam_input', '--sampleSheet', @@ -662,7 +701,7 @@ def test_default_noInput(self, ifs): assert parseSpOut(_p) == 38 def test_genrich_noInput(self, ifs): ci = [ - "ChIP-seq", + "ChIPseq", '-d', ifs / 'bam_input', '--sampleSheet', @@ -680,7 +719,7 @@ def test_genrich_noInput(self, ifs): assert parseSpOut(_p) == 32 def test_seacr_noInput(self, ifs): ci = [ - "ChIP-seq", + "ChIPseq", '-d', ifs / 'bam_input', '--sampleSheet', @@ -698,7 +737,7 @@ def test_seacr_noInput(self, ifs): assert parseSpOut(_p) == 62 def test_seacr_spikein_noInput(self, ifs): ci = [ - "ChIP-seq", + "ChIPseq", '-d', ifs / 'bam_input', '--sampleSheet', @@ -717,9 +756,9 @@ def test_seacr_spikein_noInput(self, ifs): assert parseSpOut(_p) == 83 def test_frombam(self, ifs): ci = [ - "ChIP-seq", + "ChIPseq", '-d', - 'outdir', + ifs / 'outdir', '--fromBAM', ifs / 'bam_input' / 'filtered_bam', '--sampleSheet', @@ -735,9 +774,9 @@ def test_frombam(self, ifs): assert parseSpOut(_p) == 119 def test_frombam_noInput(self, ifs): ci = [ - "ChIP-seq", + "ChIPseq", '-d', - 'outdir', + ifs / 'outdir', '--fromBAM', ifs / 'bam_input' / 'filtered_bam', '--sampleSheet', @@ -753,7 +792,7 @@ def test_frombam_noInput(self, ifs): assert parseSpOut(_p) == 80 def test_spikein(self, ifs): ci = [ - "ChIP-seq", + "ChIPseq", '--useSpikeInForNorm', '-d', ifs / 'bam_input', @@ -770,7 +809,7 @@ def test_spikein(self, ifs): assert parseSpOut(_p) == 106 def test_spikein_noInput(self, ifs): ci = [ - "ChIP-seq", + "ChIPseq", '--useSpikeInForNorm', '-d', ifs / 'bam_input', @@ -787,10 +826,10 @@ def test_spikein_noInput(self, ifs): assert parseSpOut(_p) == 65 def test_spikeinfrombam(self, ifs): ci = [ - "ChIP-seq", + "ChIPseq", '--useSpikeInForNorm', '-d', - 'outdir', + ifs / 'outdir', '--fromBAM', ifs / 'bam_input' / 'filtered_bam', '--sampleSheet', @@ -806,12 +845,12 @@ def test_spikeinfrombam(self, ifs): assert parseSpOut(_p) == 142 def test_spikeinfrombamTSSnorm(self, ifs): ci = [ - "ChIP-seq", + "ChIPseq", '--useSpikeInForNorm', '--getSizeFactorsFrom', 'TSS', '-d', - 'outdir', + ifs / 'outdir', '--fromBAM', ifs / 'bam_input' / 'filtered_bam', '--sampleSheet', @@ -827,12 +866,12 @@ def test_spikeinfrombamTSSnorm(self, ifs): assert parseSpOut(_p) == 118 def test_spikeinfrombaminputnorm(self, ifs): ci = [ - "ChIP-seq", + "ChIPseq", '--useSpikeInForNorm', '--getSizeFactorsFrom', 'input', '-d', - 'outdir', + ifs / 'outdir', '--fromBAM', ifs / 'bam_input' / 'filtered_bam', '--sampleSheet', @@ -848,7 +887,7 @@ def test_spikeinfrombaminputnorm(self, ifs): assert parseSpOut(_p) == 115 def test_allelic(self, ifs): ci = [ - "ChIP-seq", + "ChIPseq", '-d', ifs / 'allelic_bam_input', '--sampleSheet', @@ -864,7 +903,7 @@ def test_allelic(self, ifs): assert parseSpOut(_p) == 37 def test_multicomp(self, ifs): ci = [ - "ChIP-seq", + "ChIPseq", '-d', ifs / 'bam_input', '--sampleSheet', @@ -880,7 +919,7 @@ def test_multicomp(self, ifs): assert parseSpOut(_p) == 81 def test_multicomp_genrich(self, ifs): ci = [ - "ChIP-seq", + "ChIPseq", '-d', ifs / 'bam_input', '--sampleSheet', @@ -898,7 +937,7 @@ def test_multicomp_genrich(self, ifs): assert parseSpOut(_p) == 82 def test_multicomp_broad(self, ifs): ci = [ - "ChIP-seq", + "ChIPseq", '-d', ifs / 'bam_input', '--sampleSheet', @@ -914,9 +953,9 @@ def test_multicomp_broad(self, ifs): assert parseSpOut(_p) == 99 def test_multicomp_fromBam(self, ifs): ci = [ - "ChIP-seq", + "ChIPseq", '-d', - 'outdir', + ifs / 'outdir', '--fromBAM', ifs / 'bam_input' / 'filtered_bam', '--sampleSheet', @@ -932,9 +971,9 @@ def test_multicomp_fromBam(self, ifs): assert parseSpOut(_p) == 141 def test_multicomp_fromBam_Genrich(self, ifs): ci = [ - "ChIP-seq", + "ChIPseq", '-d', - 'outdir', + ifs / 'outdir', '--fromBAM', ifs / 'bam_input' / 'filtered_bam', '--sampleSheet', @@ -952,7 +991,7 @@ def test_multicomp_fromBam_Genrich(self, ifs): assert parseSpOut(_p) == 142 def test_multicomp_spikein(self, ifs): ci = [ - "ChIP-seq", + "ChIPseq", '-d', ifs / 'bam_input', '--sampleSheet', @@ -969,7 +1008,7 @@ def test_multicomp_spikein(self, ifs): assert parseSpOut(_p) == 126 def test_multicomp_spikein_genrich(self, ifs): ci = [ - "ChIP-seq", + "ChIPseq", '-d', ifs / 'bam_input', '--sampleSheet', @@ -988,7 +1027,7 @@ def test_multicomp_spikein_genrich(self, ifs): assert parseSpOut(_p) == 127 def test_multicomp_spikein_noInput(self, ifs): ci = [ - "ChIP-seq", + "ChIPseq", '-d', ifs / 'bam_input', '--sampleSheet', @@ -1005,7 +1044,7 @@ def test_multicomp_spikein_noInput(self, ifs): assert parseSpOut(_p) == 81 def test_multicomp_spikein_noInput_Genrich(self, ifs): ci = [ - "ChIP-seq", + "ChIPseq", '-d', ifs / 'bam_input', '--sampleSheet', @@ -1024,9 +1063,9 @@ def test_multicomp_spikein_noInput_Genrich(self, ifs): assert parseSpOut(_p) == 79 def test_multicomp_spikein_fromBam(self, ifs): ci = [ - "ChIP-seq", + "ChIPseq", '-d', - 'outdir', + ifs / 'outdir', '--fromBAM', ifs / 'bam_input' / 'filtered_bam', '--sampleSheet', @@ -1043,9 +1082,9 @@ def test_multicomp_spikein_fromBam(self, ifs): assert parseSpOut(_p) == 162 def test_multicomp_spikein_fromBam_genrich(self, ifs): ci = [ - "ChIP-seq", + "ChIPseq", '-d', - 'outdir', + ifs / 'outdir', '--fromBAM', ifs / 'bam_input' / 'filtered_bam', '--sampleSheet', @@ -1064,9 +1103,9 @@ def test_multicomp_spikein_fromBam_genrich(self, ifs): assert parseSpOut(_p) == 163 def test_multicomp_spikein_fromBam_noInput(self, ifs): ci = [ - "ChIP-seq", + "ChIPseq", '-d', - 'outdir', + ifs / 'outdir', '--fromBAM', ifs / 'bam_input' / 'filtered_bam', '--sampleSheet', @@ -1083,9 +1122,9 @@ def test_multicomp_spikein_fromBam_noInput(self, ifs): assert parseSpOut(_p) == 105 def test_multicomp_spikein_fromBam_noInput_genrich(self, ifs): ci = [ - "ChIP-seq", + "ChIPseq", '-d', - 'outdir', + ifs / 'outdir', '--fromBAM', ifs / 'bam_input' / 'filtered_bam', '--sampleSheet', @@ -1104,9 +1143,9 @@ def test_multicomp_spikein_fromBam_noInput_genrich(self, ifs): assert parseSpOut(_p) == 103 def test_multicomp_fromBam_noInput_SEACR(self, ifs): ci = [ - "ChIP-seq", + "ChIPseq", '-d', - 'outdir', + ifs / 'outdir', '--fromBAM', ifs / 'bam_input' / 'filtered_bam', '--sampleSheet', @@ -1126,11 +1165,11 @@ def test_multicomp_fromBam_noInput_SEACR(self, ifs): class TestmRNAseq: def test_default(self, ifs): ci = [ - "mRNA-seq", + "mRNAseq", '-i', ifs / 'PE', '-o', - 'outdir', + ifs / 'outdir', '--snakemakeOptions', SMKOPTS, ifs / 'org.yaml' @@ -1141,11 +1180,11 @@ def test_default(self, ifs): assert parseSpOut(_p) == 167 def test_DE(self, ifs): ci = [ - "mRNA-seq", + "mRNAseq", '-i', ifs / 'PE', '-o', - 'outdir', + ifs / 'outdir', '--sampleSheet', ifs / 'sampleSheet.tsv', '--snakemakeOptions', @@ -1158,11 +1197,11 @@ def test_DE(self, ifs): assert parseSpOut(_p) == 168 def test_rMats(self, ifs): ci = [ - "mRNA-seq", + "mRNAseq", '-i', ifs / 'PE', '-o', - 'outdir', + ifs / 'outdir', '--sampleSheet', ifs / 'sampleSheet.tsv', '--rMats', @@ -1176,11 +1215,11 @@ def test_rMats(self, ifs): assert parseSpOut(_p) == 170 def test_almode(self, ifs): ci = [ - "mRNA-seq", + "mRNAseq", '-i', ifs / 'PE', '-o', - 'outdir', + ifs / 'outdir', '--sampleSheet', ifs / 'sampleSheet.tsv', '--snakemakeOptions', @@ -1195,11 +1234,11 @@ def test_almode(self, ifs): assert parseSpOut(_p) == 125 def test_trim(self, ifs): ci = [ - "mRNA-seq", + "mRNAseq", '-i', ifs / 'PE', '-o', - 'outdir', + ifs / 'outdir', '--sampleSheet', ifs / 'sampleSheet.tsv', '--snakemakeOptions', @@ -1213,11 +1252,11 @@ def test_trim(self, ifs): assert parseSpOut(_p) == 177 def test_alfreemode(self, ifs): ci = [ - "mRNA-seq", + "mRNAseq", '-i', ifs / 'PE', '-o', - 'outdir', + ifs / 'outdir', '--sampleSheet', ifs / 'sampleSheet.tsv', '--snakemakeOptions', @@ -1232,11 +1271,11 @@ def test_alfreemode(self, ifs): assert parseSpOut(_p) == 188 def test_bcExtract(self, ifs): ci = [ - "mRNA-seq", + "mRNAseq", '-i', ifs / 'PE', '-o', - 'outdir', + ifs / 'outdir', '--sampleSheet', ifs / 'sampleSheet.tsv', '--snakemakeOptions', @@ -1251,11 +1290,11 @@ def test_bcExtract(self, ifs): assert parseSpOut(_p) == 168 def test_bcExtractUMIdedup(self, ifs): ci = [ - "mRNA-seq", + "mRNAseq", '-i', ifs / 'PE', '-o', - 'outdir', + ifs / 'outdir', '--sampleSheet', ifs / 'sampleSheet.tsv', '--snakemakeOptions', @@ -1271,11 +1310,11 @@ def test_bcExtractUMIdedup(self, ifs): assert parseSpOut(_p) == 177 def test_multicomp(self, ifs): ci = [ - "mRNA-seq", + "mRNAseq", '-i', ifs / 'PE', '-o', - 'outdir', + ifs / 'outdir', '--sampleSheet', ifs / 'sampleSheet_mc.tsv', '--snakemakeOptions', @@ -1291,11 +1330,11 @@ def test_multicomp(self, ifs): assert parseSpOut(_p) == 165 def test_SE(self, ifs): ci = [ - "mRNA-seq", + "mRNAseq", '-i', ifs / 'SE', '-o', - 'outdir', + ifs / 'outdir', '--sampleSheet', ifs / 'sampleSheet.tsv', '--snakemakeOptions', @@ -1308,11 +1347,11 @@ def test_SE(self, ifs): assert parseSpOut(_p) == 149 def test_SEalmode(self, ifs): ci = [ - "mRNA-seq", + "mRNAseq", '-i', ifs / 'SE', '-o', - 'outdir', + ifs / 'outdir', '--sampleSheet', ifs / 'sampleSheet.tsv', '--snakemakeOptions', @@ -1327,11 +1366,11 @@ def test_SEalmode(self, ifs): assert parseSpOut(_p) == 107 def test_SEtrim(self, ifs): ci = [ - "mRNA-seq", + "mRNAseq", '-i', ifs / 'SE', '-o', - 'outdir', + ifs / 'outdir', '--sampleSheet', ifs / 'sampleSheet.tsv', '--snakemakeOptions', @@ -1345,11 +1384,11 @@ def test_SEtrim(self, ifs): assert parseSpOut(_p) == 158 def test_SEalfreemode(self, ifs): ci = [ - "mRNA-seq", + "mRNAseq", '-i', ifs / 'SE', '-o', - 'outdir', + ifs / 'outdir', '--sampleSheet', ifs / 'sampleSheet.tsv', '--snakemakeOptions', @@ -1364,11 +1403,11 @@ def test_SEalfreemode(self, ifs): assert parseSpOut(_p) == 169 def test_SEfastqc(self, ifs): ci = [ - "mRNA-seq", + "mRNAseq", '-i', ifs / 'SE', '-o', - 'outdir', + ifs / 'outdir', '--sampleSheet', ifs / 'sampleSheet.tsv', '--snakemakeOptions', @@ -1383,11 +1422,11 @@ def test_SEfastqc(self, ifs): assert parseSpOut(_p) == 176 def test_SEfrombam(self, ifs): ci = [ - "mRNA-seq", + "mRNAseq", '-i', ifs / 'bam_input' / 'filtered_bam', '-o', - 'outdir', + ifs / 'outdir', '--sampleSheet', ifs / 'sampleSheet.tsv', '--snakemakeOptions', @@ -1401,11 +1440,11 @@ def test_SEfrombam(self, ifs): assert parseSpOut(_p) == 114 def test_threeprime(self, ifs): ci = [ - "mRNA-seq", + "mRNAseq", '-i', ifs / 'PE', '-o', - 'outdir', + ifs / 'outdir', '--sampleSheet', ifs / 'sampleSheet.tsv', '--snakemakeOptions', @@ -1420,11 +1459,11 @@ def test_threeprime(self, ifs): assert parseSpOut(_p) == 189 def test_threeprimeqc(self, ifs): ci = [ - "mRNA-seq", + "mRNAseq", '-i', ifs / 'PE', '-o', - 'outdir', + ifs / 'outdir', '--sampleSheet', ifs / 'sampleSheet.tsv', '--snakemakeOptions', @@ -1439,11 +1478,11 @@ def test_threeprimeqc(self, ifs): assert parseSpOut(_p) == 232 def test_allelic(self, ifs): ci = [ - "mRNA-seq", + "mRNAseq", '-i', ifs / 'PE', '-o', - 'outdir', + ifs / 'outdir', '--snakemakeOptions', SMKOPTS, ifs / 'org.yaml', @@ -1460,11 +1499,11 @@ def test_allelic(self, ifs): assert parseSpOut(_p) == 253 def test_allelicfrombam(self, ifs): ci = [ - "mRNA-seq", + "mRNAseq", '-i', ifs / 'allelic_bam_input' / 'filtered_bam', '-o', - 'outdir', + ifs / 'outdir', '--snakemakeOptions', SMKOPTS, ifs / 'org.yaml', @@ -1482,11 +1521,11 @@ def test_allelicfrombam(self, ifs): assert parseSpOut(_p) == 197 def test_allelicDE(self, ifs): ci = [ - "mRNA-seq", + "mRNAseq", '-i', ifs / 'PE', '-o', - 'outdir', + ifs / 'outdir', '--snakemakeOptions', SMKOPTS, ifs / 'org.yaml', @@ -1505,11 +1544,11 @@ def test_allelicDE(self, ifs): assert parseSpOut(_p) == 254 def test_allelicDE_SNPfile(self, ifs): ci = [ - "mRNA-seq", + "mRNAseq", '-i', ifs / 'PE', '-o', - 'outdir', + ifs / 'outdir', '--snakemakeOptions', SMKOPTS, ifs / 'org.yaml', @@ -1528,11 +1567,11 @@ def test_allelicDE_SNPfile(self, ifs): assert parseSpOut(_p) == 252 def test_allelicDEsinglestrain(self, ifs): ci = [ - "mRNA-seq", + "mRNAseq", '-i', ifs / 'PE', '-o', - 'outdir', + ifs / 'outdir', '--snakemakeOptions', SMKOPTS, ifs / 'org.yaml', @@ -1551,11 +1590,11 @@ def test_allelicDEsinglestrain(self, ifs): assert parseSpOut(_p) == 254 def test_allelicDEalfree(self, ifs): ci = [ - "mRNA-seq", + "mRNAseq", '-i', ifs / 'PE', '-o', - 'outdir', + ifs / 'outdir', '--snakemakeOptions', SMKOPTS, ifs / 'org.yaml', @@ -1574,11 +1613,11 @@ def test_allelicDEalfree(self, ifs): assert parseSpOut(_p) == 330 def test_allelic_count_fromBam_singlecomp(self, ifs): ci = [ - "mRNA-seq", - '-i', + "mRNAseq", + '-i', ifs / 'allelic_bam_input' / 'allelic_bams', '-o', - 'outdir', + ifs / 'outdir', '--snakemakeOptions', SMKOPTS, '--fromBAM', @@ -1596,11 +1635,11 @@ def test_allelic_count_fromBam_singlecomp(self, ifs): assert parseSpOut(_p) == 105 def test_allelic_count_fromBam_multicomp(self, ifs): ci = [ - "mRNA-seq", + "mRNAseq", '-i', ifs / 'allelic_bam_input' / 'allelic_bams', '-o', - 'outdir', + ifs / 'outdir', '--snakemakeOptions', SMKOPTS, '--fromBAM', @@ -1618,11 +1657,11 @@ def test_allelic_count_fromBam_multicomp(self, ifs): assert parseSpOut(_p) == 108 def test_allelic_mapping_fromBam_multicomp(self, ifs): ci = [ - "mRNA-seq", + "mRNAseq", '-i', ifs / 'allelic_bam_input' / 'filtered_bam', '-o', - 'outdir', + ifs / 'outdir', '--snakemakeOptions', SMKOPTS, '--fromBAM', @@ -1644,17 +1683,17 @@ def test_allelic_mapping_fromBam_multicomp(self, ifs): assert parseSpOut(_p) == 201 def test_allelic_alfree_multicomp(self, ifs): ci = [ - "mRNA-seq", + "mRNAseq", '-i', ifs / 'PE', '-o', - 'outdir', + ifs / 'outdir', '--snakemakeOptions', SMKOPTS, ifs / 'org.yaml', '--sampleSheet', ifs / 'sampleSheet_mc.tsv', - '-m', + '-m', 'allelic-mapping,deepTools_qc,alignment-free', '--VCFfile', ifs / 'allelic_input' / 'file.vcf.gz', @@ -1669,11 +1708,11 @@ def test_allelic_alfree_multicomp(self, ifs): class TestncRNAseq(): def test_default(self, ifs): ci = [ - "noncoding-RNA-seq", + "ncRNAseq", '-i', ifs / 'PE', '-o', - 'outdir', + ifs / 'outdir', '--snakemakeOptions', SMKOPTS, ifs / 'org.yaml' @@ -1684,11 +1723,11 @@ def test_default(self, ifs): assert parseSpOut(_p) == 137 def test_DE(self, ifs): ci = [ - "noncoding-RNA-seq", + "ncRNAseq", '-i', ifs / 'PE', '-o', - 'outdir', + ifs / 'outdir', '--sampleSheet', ifs / 'sampleSheet.tsv', '--snakemakeOptions', @@ -1701,11 +1740,11 @@ def test_DE(self, ifs): assert parseSpOut(_p) == 139 def test_SE(self, ifs): ci = [ - "noncoding-RNA-seq", + "ncRNAseq", '-i', ifs / 'SE', '-o', - 'outdir', + ifs / 'outdir', '--sampleSheet', ifs / 'sampleSheet.tsv', '--snakemakeOptions', @@ -1718,12 +1757,12 @@ def test_SE(self, ifs): assert parseSpOut(_p) == 120 def test_frombam(self, ifs): ci = [ - "noncoding-RNA-seq", + "ncRNAseq", '-i', ifs / 'bam_input' / 'filtered_bam', '--fromBAM', '-o', - 'outdir', + ifs / 'outdir', '--sampleSheet', ifs / 'sampleSheet.tsv', '--snakemakeOptions', @@ -1736,11 +1775,11 @@ def test_frombam(self, ifs): assert parseSpOut(_p) == 94 def test_multicomp(self, ifs): ci = [ - "noncoding-RNA-seq", + "ncRNAseq", '-i', ifs / 'PE', '-o', - 'outdir', + ifs / 'outdir', '--sampleSheet', ifs / 'sampleSheet_mc.tsv', '--snakemakeOptions', @@ -1759,7 +1798,7 @@ def test_default(self, ifs): '-i', ifs / 'PE', '-o', - 'outdir', + ifs / 'outdir', '--mode', 'STARsolo', '--snakemakeOptions', @@ -1776,7 +1815,7 @@ def test_skipvelo(self, ifs): '-i', ifs / 'PE', '-o', - 'outdir', + ifs / 'outdir', '--mode', 'STARsolo', '--skipVelocyto', @@ -1794,7 +1833,7 @@ def test_alevin(self, ifs): '-i', ifs / 'PE', '-o', - 'outdir', + ifs / 'outdir', '--mode', 'Alevin', '--snakemakeOptions', @@ -1811,7 +1850,7 @@ def test_alevinskipvelo(self, ifs): '-i', ifs / 'PE', '-o', - 'outdir', + ifs / 'outdir', '--mode', 'Alevin', '--skipVelocyto', @@ -1831,7 +1870,7 @@ def test_default(self, ifs): '-i', ifs / 'PE', '-o', - 'outdir', + ifs / 'outdir', '--sampleSheet', ifs / 'sampleSheet.tsv', '--snakemakeOptions', @@ -1848,7 +1887,7 @@ def test_no_sampleSheet(self, ifs): '-i', ifs / 'PE', '-o', - 'outdir', + ifs / 'outdir', '--snakemakeOptions', SMKOPTS, ifs / 'org.yaml' @@ -1863,7 +1902,7 @@ def test_bwameth2(self, ifs): '-i', ifs / 'PE', '-o', - 'outdir', + ifs / 'outdir', '--aligner', 'bwameth2', '--sampleSheet', @@ -1882,7 +1921,7 @@ def test_trimgcbias(self, ifs): '-i', ifs / 'PE', '-o', - 'outdir', + ifs / 'outdir', '--trim', '--GCbias', '--sampleSheet', @@ -1901,7 +1940,7 @@ def test_frombam(self, ifs): '-i', ifs / 'bam_input' / 'filtered_bam', '-o', - 'outdir', + ifs / 'outdir', '--sampleSheet', ifs / 'sampleSheet.tsv', '--fromBAM', @@ -1920,7 +1959,7 @@ def test_frombamfqc(self, ifs): '-i', ifs / 'bam_input' / 'filtered_bam', '-o', - 'outdir', + ifs / 'outdir', '--sampleSheet', ifs / 'sampleSheet.tsv', '--fromBAM', @@ -1940,7 +1979,7 @@ def test_frombamskipqc(self, ifs): '-i', ifs / 'bam_input' / 'filtered_bam', '-o', - 'outdir', + ifs / 'outdir', '--sampleSheet', ifs / 'sampleSheet.tsv', '--fromBAM', @@ -1958,7 +1997,7 @@ def test_frombamskipqc(self, ifs): class TestATAC(): def test_default(self, ifs): ci = [ - "ATAC-seq", + "ATACseq", '-d', ifs / 'bam_input', '--sampleSheet', @@ -1973,7 +2012,7 @@ def test_default(self, ifs): assert parseSpOut(_p) == 63 def test_no_sampleSheet(self, ifs): ci = [ - "ATAC-seq", + "ATACseq", '-d', ifs / 'bam_input', '--snakemakeOptions', @@ -1986,7 +2025,7 @@ def test_no_sampleSheet(self, ifs): assert parseSpOut(_p) == 47 def test_genrich(self, ifs): ci = [ - "ATAC-seq", + "ATACseq", '-d', ifs / 'bam_input', '--sampleSheet', @@ -2003,7 +2042,7 @@ def test_genrich(self, ifs): assert parseSpOut(_p) == 74 def test_HMMRATAC(self, ifs): ci = [ - "ATAC-seq", + "ATACseq", '-d', ifs / 'bam_input', '--sampleSheet', @@ -2020,7 +2059,7 @@ def test_HMMRATAC(self, ifs): assert parseSpOut(_p) == 73 def test_sieve(self, ifs): ci = [ - "ATAC-seq", + "ATACseq", '-d', ifs / 'bam_input', '--sampleSheet', @@ -2039,7 +2078,7 @@ def test_sieve(self, ifs): assert parseSpOut(_p) == 63 def test_frombam(self, ifs): ci = [ - "ATAC-seq", + "ATACseq", '-d', ifs / 'bam_input', '--sampleSheet', @@ -2056,8 +2095,8 @@ def test_frombam(self, ifs): assert parseSpOut(_p) == 114 def test_multicomp_default(self, ifs): ci = [ - "ATAC-seq", - '-d', + "ATACseq", + '-d', ifs / 'bam_input', '--sampleSheet', ifs / 'sampleSheet_mc.tsv', @@ -2071,8 +2110,8 @@ def test_multicomp_default(self, ifs): assert parseSpOut(_p) == 79 def test_multicomp_genrich(self, ifs): ci = [ - "ATAC-seq", - '-d', + "ATACseq", + '-d', ifs / 'bam_input', '--sampleSheet', ifs / 'sampleSheet_mc.tsv', @@ -2088,8 +2127,8 @@ def test_multicomp_genrich(self, ifs): assert parseSpOut(_p) == 92 def test_multicomp_HMMRATAC(self, ifs): ci = [ - "ATAC-seq", - '-d', + "ATACseq", + '-d', ifs / 'bam_input', '--sampleSheet', ifs / 'sampleSheet_mc.tsv', @@ -2105,7 +2144,7 @@ def test_multicomp_HMMRATAC(self, ifs): assert parseSpOut(_p) == 89 def test_multicomp_sieve(self, ifs): ci = [ - "ATAC-seq", + "ATACseq", '-d', ifs / 'bam_input', '--sampleSheet', @@ -2124,7 +2163,7 @@ def test_multicomp_sieve(self, ifs): assert parseSpOut(_p) == 79 def test_multicomp_frombam(self, ifs): ci = [ - "ATAC-seq", + "ATACseq", '-d', ifs / 'bam_input', '--sampleSheet', @@ -2147,7 +2186,7 @@ def test_default(self, ifs): '-i', ifs / 'PE', '-o', - 'output', + ifs / 'output', '--snakemakeOptions', SMKOPTS, ifs / 'org.yaml' @@ -2156,13 +2195,14 @@ def test_default(self, ifs): _p = sp.run(ci, capture_output=True, text=True) assert _p.returncode == 0 assert parseSpOut(_p) == 102 + def test_ice(self, ifs): ci = [ "HiC", '-i', ifs / 'PE', '-o', - 'output', + ifs / 'output', '--snakemakeOptions', SMKOPTS, '--correctionMethod', @@ -2173,13 +2213,14 @@ def test_ice(self, ifs): _p = sp.run(ci, capture_output=True, text=True) assert _p.returncode == 0 assert parseSpOut(_p) == 111 + def test_trim(self, ifs): ci = [ "HiC", '-i', ifs / 'PE', '-o', - 'output', + ifs / 'output', '--snakemakeOptions', SMKOPTS, '--trim', @@ -2189,13 +2230,14 @@ def test_trim(self, ifs): _p = sp.run(ci, capture_output=True, text=True) assert _p.returncode == 0 assert parseSpOut(_p) == 111 + def test_dpnii(self, ifs): ci = [ "HiC", '-i', ifs / 'PE', '-o', - 'output', + ifs / 'output', '--snakemakeOptions', SMKOPTS, '--enzyme', @@ -2206,13 +2248,14 @@ def test_dpnii(self, ifs): _p = sp.run(ci, capture_output=True, text=True) assert _p.returncode == 0 assert parseSpOut(_p) == 102 + def test_notad(self, ifs): ci = [ "HiC", '-i', ifs / 'PE', '-o', - 'output', + ifs / 'output', '--snakemakeOptions', SMKOPTS, '--noTAD', @@ -2222,13 +2265,14 @@ def test_notad(self, ifs): _p = sp.run(ci, capture_output=True, text=True) assert _p.returncode == 0 assert parseSpOut(_p) == 93 + def test_bwamem2(self, ifs): ci = [ "HiC", '-i', ifs / 'PE', '-o', - 'output', + ifs / 'output', '--snakemakeOptions', SMKOPTS, '--aligner', @@ -2247,7 +2291,7 @@ def test_default(self, ifs): '-i', ifs / 'PE', '-o', - 'output', + ifs / 'output', '--snakemakeOptions', SMKOPTS, '--fastqc', @@ -2258,13 +2302,14 @@ def test_default(self, ifs): _p = sp.run(ci, capture_output=True, text=True) assert _p.returncode == 0 assert parseSpOut(_p) == 84 + def test_DAG(self, ifs): ci = [ "preprocessing", '-i', ifs / 'PE', '-o', - 'output', + ifs / 'output', '--snakemakeOptions', SMKOPTS, '--fastqc', diff --git a/tests/test_mRNA.py b/tests/test_mRNA.py index 15069885d..973633268 100644 --- a/tests/test_mRNA.py +++ b/tests/test_mRNA.py @@ -36,7 +36,7 @@ def createTestData(fp): "genes_gtf" : gtfout.as_posix(), "extended_coding_regions_gtf" : "", "blacklist_bed": "", - "ignoreForNormalization": "" + "ignoreForNormalization": "" } # set up yaml yaml = YAML() @@ -53,35 +53,29 @@ def ifs(tmp_path_factory): class TestmRNAseq: def test_mrna(self, ifs): org = ifs / 'org.yaml' - clusterconfig = Path('tests') / 'data' / 'cluster_config.yaml' sp.run( [ - 'mRNA-seq', + 'mRNAseq', '-i', Path('tests') / 'data' / 'mRNA_mIFNB', '-o', 'test_mrna', - '--clusterConfig', - clusterconfig, org ] ) - assert Path('test_mrna/mRNA-seq_snakePipes.done').is_file() == True - + assert Path('test_mrna/mRNAseq_snakePipes.done').is_file() + def test_mrna4(self, ifs): org = ifs / 'org.yaml' - clusterconfig = Path('tests') / 'data' / 'cluster_config.yaml' sp.run( [ - 'mRNA-seq', + 'mRNAseq', '-i', Path('tests') / 'data' / 'mRNA_BcellPancreas', '-o', 'test_mrna_4sample', - '--clusterConfig', - clusterconfig, org ] ) - assert Path('test_mrna_4sample/mRNA-seq_snakePipes.done').is_file() == True + assert Path('test_mrna_4sample/mRNAseq_snakePipes.done').is_file()