diff --git a/.github/workflows/conventional-prs.yaml b/.github/workflows/conventional-prs.yaml index 210988fa..2a544c53 100644 --- a/.github/workflows/conventional-prs.yaml +++ b/.github/workflows/conventional-prs.yaml @@ -13,6 +13,6 @@ jobs: name: Validate PR title runs-on: ubuntu-latest steps: - - uses: amannn/action-semantic-pull-request@v5 + - uses: amannn/action-semantic-pull-request@v6 env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 7ff41d23..f3d83378 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -11,60 +11,106 @@ jobs: formatting: runs-on: ubuntu-latest steps: - - name: Checkout with submodules - uses: actions/checkout@v3 - with: - submodules: recursive - fetch-depth: 0 - - name: Formatting - uses: github/super-linter@v5 - env: - VALIDATE_ALL_CODEBASE: false - DEFAULT_BRANCH: master - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - VALIDATE_SNAKEMAKE_SNAKEFMT: true + - name: Checkout with submodules + uses: actions/checkout@v6 + with: + submodules: recursive + fetch-depth: 0 + - name: Formatting + uses: super-linter/super-linter@v8 + env: + VALIDATE_ALL_CODEBASE: false + DEFAULT_BRANCH: master + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + VALIDATE_SNAKEMAKE_SNAKEFMT: true linting: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 - - name: Linting - uses: snakemake/snakemake-github-action@v1.22.0 - with: - directory: .test - snakefile: workflow/Snakefile - args: "--configfile .test/config_complex/config.yaml --lint" + - uses: actions/checkout@v6 + - name: Linting + uses: snakemake/snakemake-github-action@v2 + with: + directory: .test + snakefile: workflow/Snakefile + args: "--configfile .test/config_complex/config.yaml --lint" - run-workflow: + testing: runs-on: ubuntu-latest needs: - linting - formatting + strategy: + fail-fast: true + matrix: + case: + - name: basic model, no batch_effects + args: >- + --configfile .test/config_basic/config.yaml + report: true + free_disk_space: false + + - name: multiple variables_of_interest, include batch_effects + args: >- + --configfile .test/config_complex/config.yaml + report: true + free_disk_space: false + - name: sra file download, no batch_effects + args: >- + --configfile .test/config_sra/config.yaml + report: true + free_disk_space: true + + name: test ${{ matrix.case.name }} + steps: - - name: Checkout repository with submodules - uses: actions/checkout@v3 - with: - submodules: recursive - - name: Test workflow (basic model, no batch_effects) - uses: snakemake/snakemake-github-action@v1.22.0 - with: - directory: .test - snakefile: workflow/Snakefile - args: "--configfile .test/config_basic/config.yaml --use-conda --show-failed-logs --cores 2 --conda-cleanup-pkgs cache" - - name: Test report (basic model, no batch_effects) - uses: snakemake/snakemake-github-action@v1.22.0 - with: - directory: .test - snakefile: workflow/Snakefile - args: "--configfile .test/config_basic/config.yaml --report report.zip" - - name: Test workflow (multiple variables_of_interest, include batch_effects) - uses: snakemake/snakemake-github-action@v1.22.0 - with: - directory: .test - snakefile: workflow/Snakefile - args: "--configfile .test/config_complex/config.yaml --use-conda --show-failed-logs --cores 2 --conda-cleanup-pkgs cache" - - name: Test report (multiple variables_of_interest, include batch_effects) - uses: snakemake/snakemake-github-action@v1.22.0 - with: - directory: .test - snakefile: workflow/Snakefile - args: "--configfile .test/config_complex/config.yaml --report report.zip" + - name: update apt + if: ${{ matrix.case.free_disk_space }} + run: sudo apt-get update + + - name: Free Disk Space (Ubuntu) + if: ${{ matrix.case.free_disk_space }} + uses: jlumbroso/free-disk-space@v1.3.0 + with: + # this might remove tools that are actually needed, + # if set to "true" but frees about 6 GB + tool-cache: false + + # all of these default to true, but feel free to set to + # "false" if necessary for your workflow + android: true + dotnet: true + haskell: true + large-packages: true + docker-images: false + swap-storage: true + + - name: Checkout repository with submodules + uses: actions/checkout@v6 + with: + submodules: recursive + + - name: run workflow + uses: snakemake/snakemake-github-action@v2 + with: + directory: .test + snakefile: workflow/Snakefile + args: >- + --conda-cleanup-pkgs cache + --software-deployment-method conda + --show-failed-logs + --cores 8 + ${{ matrix.case.args }} + show-disk-usage-on-error: true + + - name: generate report + if: ${{ matrix.case.report }} + uses: snakemake/snakemake-github-action@v2 + with: + directory: .test + snakefile: workflow/Snakefile + args: >- + --software-deployment-method conda + --cores 1 + --report report.zip + ${{ matrix.case.args }} + show-disk-usage-on-error: true diff --git a/.github/workflows/release-please.yaml b/.github/workflows/release-please.yaml index 0a16d028..546a7103 100644 --- a/.github/workflows/release-please.yaml +++ b/.github/workflows/release-please.yaml @@ -13,7 +13,7 @@ jobs: release-please: runs-on: ubuntu-latest steps: - - uses: google-github-actions/release-please-action@v3 + - uses: googleapis/release-please-action@v4 with: release-type: simple token: ${{ secrets.GITHUB_TOKEN }} diff --git a/.test/config_basic/config.yaml b/.test/config_basic/config.yaml index 4e784c40..7f3b4181 100644 --- a/.test/config_basic/config.yaml +++ b/.test/config_basic/config.yaml @@ -4,13 +4,11 @@ samples: config_basic/samples.tsv units: config_basic/units.tsv - ref: species: saccharomyces_cerevisiae - release: 100 + release: 115 build: R64-1-1 - trimming: activate: True diff --git a/.test/config_complex/config.yaml b/.test/config_complex/config.yaml index e3afb8e4..4f4f376b 100644 --- a/.test/config_complex/config.yaml +++ b/.test/config_complex/config.yaml @@ -4,13 +4,11 @@ samples: config_complex/samples.tsv units: config_complex/units.tsv - ref: species: saccharomyces_cerevisiae - release: 100 + release: 115 build: R64-1-1 - trimming: activate: False @@ -44,4 +42,3 @@ params: star: index: "" align: "" - diff --git a/.test/config_sra/config.yaml b/.test/config_sra/config.yaml new file mode 100644 index 00000000..22cc23f5 --- /dev/null +++ b/.test/config_sra/config.yaml @@ -0,0 +1,40 @@ +# For a fully commented config.yaml file, see the main `config/config.yaml` +# example file. + +samples: config_sra/samples.tsv +# This data comes from: +# * https://www.ncbi.nlm.nih.gov/bioproject/PRJEB30262 +# * https://www.ncbi.nlm.nih.gov/Traces/study/?acc=PRJEB30262&o=acc_s%3Aa +units: config_sra/units.tsv + +ref: + species: saccharomyces_cerevisiae + release: 115 + build: R64-1-1 + +trimming: + activate: True + +mergeReads: + activate: False + +pca: + activate: True + labels: + - genotype + +diffexp: + variables_of_interest: + genotype: + base_level: control + batch_effects: "" + contrasts: + stb5_vs_control: + variable_of_interest: genotype + level_of_interest: stb5 + model: ~genotype + +params: + star: + index: "" + align: "" diff --git a/.test/config_sra/samples.tsv b/.test/config_sra/samples.tsv new file mode 100644 index 00000000..d29b5e20 --- /dev/null +++ b/.test/config_sra/samples.tsv @@ -0,0 +1,5 @@ +sample_name genotype +control_g_1 control +control_g_2 control +stb5_g_1 stb5 +stb5_g_2 stb5 \ No newline at end of file diff --git a/.test/config_sra/units.tsv b/.test/config_sra/units.tsv new file mode 100644 index 00000000..80d235a2 --- /dev/null +++ b/.test/config_sra/units.tsv @@ -0,0 +1,9 @@ +sample_name unit_name fq1 fq2 sra fastp_adapters fastp_extra strandedness +control_g_1 lane1 ERR2985811 +control_g_1 lane2 ERR2985812 +control_g_2 lane1 ERR2985815 +control_g_2 lane2 ERR2985816 +stb5_g_1 lane1 ERR2985827 +stb5_g_1 lane2 ERR2985828 +stb5_g_2 lane1 ERR2985831 +stb5_g_2 lane2 ERR2985832 \ No newline at end of file diff --git a/config/config.yaml b/config/config.yaml index b0b9266b..4b593948 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -5,13 +5,36 @@ samples: config/samples.tsv # sample). units: config/units.tsv - ref: - # Ensembl species name + # ensembl species name: + # This species needs to be available for download via the Ensembl FTP site. + # For a quick check, see the Ensembl species list: + # https://www.ensembl.org/info/about/species.html + # For full valid species names, consult the respective table for the release + # you specify, for example for ‘115’ this is at: + # https://ftp.ensembl.org/pub/release-115/species_EnsemblVertebrates.txt + # And to browse available downloads in more detail, see the FTP server: + # https://ftp.ensembl.org/pub/ species: homo_sapiens - # Ensembl release (make sure to take one where snpeff data is available, check 'snpEff databases' output) - release: 100 - # Genome build + # ensembl release version: + # Update this to the latest working version, when you first set up a new + # analysis on a dataset. You can usually find the latest release in the + # Ensembl blog, by looking at the latest posts of the release category: + # https://www.ensembl.info/category/01-release/ + # Later, it only makes sense to update (or downgrade) the release versions + # if either (a) the version you are using consistently fails to download + # (some Ensembl release versions are just broken) or (b) you know that a + # newer version will include changes that will fix some error or adds + # transcripts that will be relevant to your analysis. + release: 115 + # genome build: + # Usually, this should just be the main build listed in: + # https://ftp.ensembl.org/pub/release-115/species_EnsemblVertebrates.txt + # For example, for homo_sapiens, you strip the assembly column entry + # "GRCh38.p12" down to "GRCh38". If in doubt, navigate to the respective + # cdna folder on the FTP server, and look for the correct build in the + # file names there. For example "GRCh38" in: + # https://ftp.ensembl.org/pub/release-115/fasta/homo_sapiens/cdna/ build: GRCh38 trimming: @@ -64,7 +87,6 @@ diffexp: # model: ~jointly_handled + treatment_1 + treatment_2 model: "" - # passing extra parameters to respective rules in the workflow params: star: diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk index 0757353b..581fb217 100644 --- a/workflow/rules/common.smk +++ b/workflow/rules/common.smk @@ -57,7 +57,7 @@ def get_units_fastqs(wildcards): return expand( "sra/{accession}_{read}.fastq", accession=accession, - read=["R1", "R2"], + read=["1", "2"], ) if not is_paired_end(wildcards.sample): return [ diff --git a/workflow/rules/trim.smk b/workflow/rules/trim.smk index 96048e2a..e9380981 100644 --- a/workflow/rules/trim.smk +++ b/workflow/rules/trim.smk @@ -4,6 +4,8 @@ rule get_sra: "sra/{accession}_2.fastq", log: "logs/get-sra/{accession}.log", + params: + extra="-x", wrapper: "v7.2.0/bio/sra-tools/fasterq-dump"