From 50e214c5a67bf0d5774dab9b1137efaed17ef902 Mon Sep 17 00:00:00 2001 From: Wenjie Sun Date: Tue, 14 Apr 2026 10:37:47 -0400 Subject: [PATCH 1/3] update .gitignore --- .gitignore | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/.gitignore b/.gitignore index 3718ea7..4e32d51 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,10 @@ +index_files/*.csv +index_files/*.xlsx +raw_data/*.gz +.nextflow/* +.nextflow.log* +work/* +results/* bk/ .nextflow/ .nextflow.log* From b4f03cf0899e8323ac1a8bac7014536e30c5600b Mon Sep 17 00:00:00 2001 From: Wenjie Sun Date: Mon, 27 Apr 2026 15:44:08 -0400 Subject: [PATCH 2/3] Update documents --- containers/cuttag-preprocess.def | 1 - nextflow.config | 1 - 2 files changed, 2 deletions(-) diff --git a/containers/cuttag-preprocess.def b/containers/cuttag-preprocess.def index b44bdce..6e399c5 100644 --- a/containers/cuttag-preprocess.def +++ b/containers/cuttag-preprocess.def @@ -2,7 +2,6 @@ Bootstrap: docker From: mambaorg/micromamba:1.5.10 %labels - Author OpenAI Codex Description CUT&Tag / CoCnT preprocessing environment for Nextflow %files diff --git a/nextflow.config b/nextflow.config index 9fbafed..971247e 100644 --- a/nextflow.config +++ b/nextflow.config @@ -1,6 +1,5 @@ manifest { name = 'cocnt-cuttag-preprocess' - author = 'OpenAI Codex' homePage = '.' description = 'Nextflow pipeline for CoCnT/CUT&Tag barcode rewriting, trimming, alignment, BED, and BigWig generation.' version = '0.1.0' From e926c05c1fa018a9db55bedc4e60eb8b1db07a22 Mon Sep 17 00:00:00 2001 From: Wenjie Sun Date: Mon, 27 Apr 2026 15:57:55 -0400 Subject: [PATCH 3/3] Remove Singularity support and Update the document --- METHODS.md | 51 ---------------- README.md | 80 +------------------------ containers/README.md | 51 ---------------- containers/build_singularity_image.sh | 40 ------------- containers/cuttag-preprocess.def | 41 ------------- method.txt | 18 ------ nextflow.config => nextflow.config.temp | 23 ------- 7 files changed, 1 insertion(+), 303 deletions(-) delete mode 100644 containers/README.md delete mode 100755 containers/build_singularity_image.sh delete mode 100644 containers/cuttag-preprocess.def delete mode 100644 method.txt rename nextflow.config => nextflow.config.temp (63%) diff --git a/METHODS.md b/METHODS.md index ee4bce8..1d769f3 100644 --- a/METHODS.md +++ b/METHODS.md @@ -24,8 +24,6 @@ The merged workflow supports two input modes: - `src/rewrite_fastq_barcodes.cpp`: fast C++ implementation for barcode rewriting. - `tools/build_rewrite_fastq_barcodes.sh`: build script for the C++ binary. - `envs/cuttag-preprocess.yml`: Conda environment for all required tools. -- `containers/cuttag-preprocess.sif`: default Singularity/Apptainer image path used by the config. -- `containers/cuttag-preprocess.def`: definition file used to build the Singularity/Apptainer image. ## Required software @@ -44,30 +42,6 @@ For faster barcode rewriting, build the compiled helper once: The wrapper used by Nextflow prefers the compiled binary and falls back to Python only if the binary is unavailable. -If you use `singularity` or `apptainer`, the default image path is `containers/cuttag-preprocess.sif`. - -You can build that image from the repository with: - -```bash -containers/build_singularity_image.sh -``` - -or manually on a local Linux machine with root or sudo privileges: - -```bash -sudo singularity build containers/cuttag-preprocess.sif containers/cuttag-preprocess.def -``` - -That image should contain: - -- `python3` -- `cutadapt` -- `bowtie2` -- `samtools` -- `bedtools` -- `bgzip` -- `bedGraphToBigWig` - ## Required inputs - `--input_mode`: `paired_fastq` or `demux`. @@ -184,29 +158,6 @@ nextflow run main.nf -profile slurm,conda \ --out_dir results ``` -Run with Singularity or Apptainer: - -```bash -nextflow run main.nf -profile singularity \ - --input_dir /path/to/fastq \ - --barcode_matrix /path/to/barcode_matrix.csv \ - --ref /path/to/bowtie2/index_basename \ - --chrom_sizes /path/to/genome.chrom.sizes \ - --out_dir results -``` - -Override the default Singularity image location if needed: - -```bash -nextflow run main.nf -profile singularity \ - --singularity_image /path/to/container.sif \ - --input_dir /path/to/fastq \ - --barcode_matrix /path/to/barcode_matrix.csv \ - --ref /path/to/bowtie2/index_basename \ - --chrom_sizes /path/to/genome.chrom.sizes \ - --out_dir results -``` - ## Notes - The pipeline keeps the same adapter sequence and Bowtie2 arguments used in the existing shell script. @@ -216,6 +167,4 @@ nextflow run main.nf -profile singularity \ - The barcode rewrite step is a required part of the workflow and prefers a compiled C++ implementation for speed, while preserving the original Python code as a fallback. - The alignment step preserves the original high-memory setting (`256 GB`, `16 CPUs`, `18h`) but these can be changed in `nextflow.config`. - Intermediate files are published into subdirectories under `--out_dir`. -- The Singularity and Apptainer profiles use `containers/cuttag-preprocess.sif` by default and can be overridden with `--singularity_image`. -- The Singularity and Apptainer profiles bind `./data` from the Nextflow launch directory by default. Override with `--container_bind_paths` if references or data are stored elsewhere. - Sample-name filtering is configurable through `--enable_sample_filter` and `--skip_patterns`, but no samples are excluded unless patterns are provided explicitly. diff --git a/README.md b/README.md index 15b092f..4956704 100644 --- a/README.md +++ b/README.md @@ -20,16 +20,13 @@ The pipeline covers: ## Main Files - `main.nf`: main Nextflow workflow -- `nextflow.config`: runtime profiles and resource defaults +- `nextflow.config.temp`: template runtime profiles and resource defaults, please copy to `nextflow.config` and edit as needed - `bin/rewrite_fastq_barcodes`: wrapper that prefers the compiled barcode-rewrite binary - `bin/rewrite_fastq_barcodes.py`: barcode rewrite helper extracted from the notebook - `bin/modify_scict_header.sh`: generic header normalizer used for sciCT demultiplexing mode - `src/rewrite_fastq_barcodes.cpp`: fast C++ implementation of barcode rewriting - `tools/build_rewrite_fastq_barcodes.sh`: build script for the C++ binary - `envs/cuttag-preprocess.yml`: Conda environment definition -- `envs/cuttag-preprocess-container.yml`: lighter Conda environment used inside the Singularity/Apptainer image -- `containers/cuttag-preprocess.def`: Singularity/Apptainer definition file -- `containers/cuttag-preprocess.sif`: default Singularity image path expected by the config - `METHODS.md`: extended workflow notes and examples ## Faster Barcode Rewriting @@ -183,79 +180,6 @@ nextflow run main.nf -profile slurm,conda \ --out_dir results ``` -### Singularity / Apptainer - -Build the image from the included definition file: - -```bash -containers/build_singularity_image.sh -``` - -On a local Linux machine where you have sudo privileges, the helper runs: - -```bash -sudo singularity build containers/cuttag-preprocess.sif containers/cuttag-preprocess.def -``` - -If you use Apptainer instead: - -```bash -sudo apptainer build containers/cuttag-preprocess.sif containers/cuttag-preprocess.def -``` - -If you cannot build locally, use a remote builder or ask your HPC admins to build it: - -```bash -singularity build --remote containers/cuttag-preprocess.sif containers/cuttag-preprocess.def -``` - -Equivalent helper command: - -```bash -containers/build_singularity_image.sh remote -``` - -Then run: - -```bash -nextflow run main.nf -profile singularity \ - --input_dir /path/to/fastq \ - --barcode_matrix /path/to/barcode_matrix.csv \ - --ref /path/to/bowtie2/index_basename \ - --chrom_sizes /path/to/genome.chrom.sizes \ - --out_dir results -``` - -To use a different image location: - -```bash -nextflow run main.nf -profile singularity \ - --singularity_image /path/to/container.sif \ - --input_dir /path/to/fastq \ - --barcode_matrix /path/to/barcode_matrix.csv \ - --ref /path/to/bowtie2/index_basename \ - --chrom_sizes /path/to/genome.chrom.sizes \ - --out_dir results -``` - -If your references or data are outside the working directory, bind those filesystem roots into the container. The default bind path is `./data` relative to the directory where you launch Nextflow. - -```bash -nextflow run main.nf -profile singularity \ - --container_bind_paths ./data \ - --input_dir /path/to/fastq \ - --barcode_matrix /path/to/barcode_matrix.csv \ - --ref /varidata/research/projects/bbc/versioned_references/latest/data/hg38_gencode/indexes/bowtie2/hg38_gencode \ - --chrom_sizes /varidata/research/projects/bbc/versioned_references/2024-10-31_10.56.03_v17/data/hg38_gencode/sequence/hg38_gencode.fa.fai \ - --out_dir results -``` - -For multiple roots, provide a comma-separated list: - -```bash ---container_bind_paths ./data,/scratch,/home -``` - ## Defaults - sample-name filtering is available but no filename patterns are excluded unless `--skip_patterns` is provided @@ -269,6 +193,4 @@ For multiple roots, provide a comma-separated list: - `nextflow` must be installed on the host system. - The Conda profile creates the software environment automatically from `envs/cuttag-preprocess.yml`. -- The Singularity and Apptainer profiles use `containers/cuttag-preprocess.sif` by default. -- Singularity and Apptainer bind `./data` from the launch directory by default; override with `--container_bind_paths` if needed. - Sample filtering is controlled by `--enable_sample_filter` and `--skip_patterns`. diff --git a/containers/README.md b/containers/README.md deleted file mode 100644 index 7462216..0000000 --- a/containers/README.md +++ /dev/null @@ -1,51 +0,0 @@ -Place the Singularity or Apptainer image for this pipeline at: - -`containers/cuttag-preprocess.sif` - -The image should include: - -- python3 -- cutadapt -- bowtie2 -- samtools -- bedtools -- bgzip -- bedGraphToBigWig - -The container build uses `envs/cuttag-preprocess-container.yml`, a smaller runtime environment than the Nextflow Conda profile. The Conda profile environment keeps the compiler package needed to build the optional fast barcode rewrite binary on HPC. - -Build the image from the included definition file on a local Linux machine where you have root or sudo privileges: - -```bash -containers/build_singularity_image.sh -``` - -Equivalent manual command: - -```bash -sudo singularity build containers/cuttag-preprocess.sif containers/cuttag-preprocess.def -``` - -If you use Apptainer instead: - -```bash -sudo apptainer build containers/cuttag-preprocess.sif containers/cuttag-preprocess.def -``` - -If you cannot build locally, use your site's remote builder or ask the cluster admins to build the image: - -```bash -singularity build --remote containers/cuttag-preprocess.sif containers/cuttag-preprocess.def -``` - -Equivalent helper command: - -```bash -containers/build_singularity_image.sh remote -``` - -If you store the image elsewhere, override it with: - -```bash -nextflow run main.nf -profile singularity --singularity_image /path/to/container.sif ... -``` diff --git a/containers/build_singularity_image.sh b/containers/build_singularity_image.sh deleted file mode 100755 index 77f7d4a..0000000 --- a/containers/build_singularity_image.sh +++ /dev/null @@ -1,40 +0,0 @@ -#!/usr/bin/env bash - -set -euo pipefail - -script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -image="${script_dir}/cuttag-preprocess.sif" -definition="${script_dir}/cuttag-preprocess.def" -mode="${1:-sudo}" - -if command -v apptainer >/dev/null 2>&1; then - runtime="apptainer" -elif command -v singularity >/dev/null 2>&1; then - runtime="singularity" -else - echo "Neither apptainer nor singularity is available in PATH." >&2 - exit 1 -fi - -case "${mode}" in - sudo) - exec sudo "${runtime}" build "${image}" "${definition}" - ;; - remote) - exec "${runtime}" build --remote "${image}" "${definition}" - ;; - plain) - exec "${runtime}" build "${image}" "${definition}" - ;; - *) - cat >&2 </dev/null 2>&1; then - MAMBA_BIN="$(command -v micromamba)" - elif [ -x /usr/local/bin/micromamba ]; then - MAMBA_BIN=/usr/local/bin/micromamba - elif [ -x /opt/conda/bin/micromamba ]; then - MAMBA_BIN=/opt/conda/bin/micromamba - else - echo "micromamba was not found in the base image" >&2 - exit 1 - fi - - export MAMBA_ROOT_PREFIX=/opt/conda - "${MAMBA_BIN}" create -y -n cuttag -f /opt/cuttag-preprocess.yml - "${MAMBA_BIN}" clean --all --yes - - ln -sf /opt/conda/envs/cuttag/bin/python /usr/local/bin/python - ln -sf /opt/conda/envs/cuttag/bin/python /usr/local/bin/python3 - ln -sf /opt/conda/envs/cuttag/bin/cutadapt /usr/local/bin/cutadapt - ln -sf /opt/conda/envs/cuttag/bin/bowtie2 /usr/local/bin/bowtie2 - ln -sf /opt/conda/envs/cuttag/bin/samtools /usr/local/bin/samtools - ln -sf /opt/conda/envs/cuttag/bin/bedtools /usr/local/bin/bedtools - ln -sf /opt/conda/envs/cuttag/bin/bgzip /usr/local/bin/bgzip - ln -sf /opt/conda/envs/cuttag/bin/bedGraphToBigWig /usr/local/bin/bedGraphToBigWig - -%environment - export PATH=/opt/conda/envs/cuttag/bin:$PATH - -%runscript - exec "$@" diff --git a/method.txt b/method.txt deleted file mode 100644 index 6c020b7..0000000 --- a/method.txt +++ /dev/null @@ -1,18 +0,0 @@ -Sequencing data were processed with a reproducible Nextflow pipeline. The workflow was designed for paired-end FASTQ files and performs sample filtering, optional barcode correction, adapter trimming, alignment, fragment generation, and normalized signal track generation. - -Input FASTQ files were identified as paired files matching the pattern *_R1.fq.gz and *_R2.fq.gz. The pipeline supports optional exclusion of samples by filename. By default, files containing the substrings PosCtrl, NegCtrl, Fiducial, PBS, or Undetermined are excluded, although these patterns can be modified or disabled by the user through pipeline parameters. - -For libraries requiring barcode correction, FASTQ headers were rewritten using a Well-ID barcode matrix. This step replaces the terminal s7/s5 barcode combination in the read header with the corresponding Well-ID from a user-supplied CSV file containing the columns PAGE-1-s7, PAGE-1-s5, PAGE-2-s7, PAGE-2-s5, and Well-ID. All four valid PAGE-1/PAGE-2 s7/s5 combinations are recognized. The rewrite stage is implemented with a compiled C++ helper for performance, with the original Python implementation retained as a fallback. Barcode rewriting can be disabled if the FASTQ headers have already been corrected. An optional suffix can also be appended to the assigned Well-ID. - -Adapter trimming was performed with Cutadapt using paired-end mode, a minimum read length of 20 bases, and the adapter sequence CTGTCTCTTATACACATCT applied to both read 1 and read 2. The trimming step uses 8 CPU cores by default. - -Trimmed read pairs were aligned to a user-specified Bowtie2 reference index with Bowtie2 v2-style very-sensitive local alignment parameters. The alignment command used the options --very-sensitive-local, --soft-clipped-unmapped-tlen, --no-mixed, --no-discordant, --dovetail, --phred33, -I 10, and -X 1000. Alignment was configured with 16 CPU cores, 256 GB memory, and an 18 hour wall-time limit by default. - -Alignment output in SAM format was converted to BAM with Samtools. BAM files were then converted to paired-end BED fragments with Bedtools bamtobed -bedpe. Fragment records were reformatted to retain genomic coordinates and barcode-derived identifiers, sorted, collapsed to unique fragments, and written as BED files. The resulting BED files were compressed with bgzip. - -Normalized coverage tracks were generated from the compressed BED files. Total fragment coverage was first calculated as the summed fragment length across all BED intervals. A per-sample scaling factor was then computed as (1 / total_coverage) x 10^10. Bedtools genomecov was used with the -bg option to generate scaled BedGraph coverage using a user-specified chromosome sizes file, and UCSC bedGraphToBigWig was used to convert the BedGraph output to BigWig format. - -The principal user-configurable parameters in the pipeline are: input_dir, specifying the input FASTQ directory; out_dir, specifying the output directory; barcode_matrix, specifying the Well-ID mapping CSV; barcode_suffix, specifying an optional suffix appended during barcode rewriting; skip_barcode_rewrite, disabling header correction when not needed; enable_sample_filter, enabling or disabling filename-based exclusion; skip_patterns, specifying custom exclusion patterns; adapter_seq, specifying the adapter sequence for trimming; ref, specifying the Bowtie2 index basename; chrom_sizes, specifying the chromosome sizes file required for coverage generation; publish_mode, controlling how Nextflow publishes outputs; conda_env, specifying the Conda environment file; and singularity_image, specifying the Singularity or Apptainer image path. - -By default, the pipeline publishes intermediate and final outputs into structured subdirectories under the selected output directory, including rewritten FASTQ files, trimmed FASTQ files, SAM files, BAM files, compressed BED files, and BigWig files. - diff --git a/nextflow.config b/nextflow.config.temp similarity index 63% rename from nextflow.config rename to nextflow.config.temp index 971247e..7f628db 100644 --- a/nextflow.config +++ b/nextflow.config.temp @@ -29,10 +29,6 @@ params { chrom_sizes = null publish_mode = 'copy' conda_env = "${projectDir}/envs/cuttag-preprocess.yml" - singularity_image = "${projectDir}/containers/cuttag-preprocess.sif" - // Bind the launch directory's data/ tree by default so local references, - // annotations, and test assets are visible inside Singularity/Apptainer. - container_bind_paths = "${launchDir}/data" } process { @@ -68,7 +64,6 @@ profiles { slurm { process.executor = 'slurm' - singularity.enabled = false conda.enabled = false } @@ -76,22 +71,4 @@ profiles { conda.enabled = true process.conda = params.conda_env } - - singularity { - singularity.enabled = true - apptainer.enabled = false - singularity.autoMounts = true - // Additional roots can be supplied with --container_bind_paths. - singularity.runOptions = "--bind ${params.container_bind_paths}" - process.container = params.singularity_image - } - - apptainer { - apptainer.enabled = true - singularity.enabled = false - apptainer.autoMounts = true - // Additional roots can be supplied with --container_bind_paths. - apptainer.runOptions = "--bind ${params.container_bind_paths}" - process.container = params.singularity_image - } }