Skip to content

Commit

Permalink
[MODIF] fix issue in json and bwt2 index
Browse files Browse the repository at this point in the history
  • Loading branch information
nservant committed Apr 14, 2021
1 parent cb65bd7 commit a35879e
Show file tree
Hide file tree
Showing 5 changed files with 66 additions and 76 deletions.
3 changes: 3 additions & 0 deletions conf/test.config
Original file line number Diff line number Diff line change
Expand Up @@ -35,4 +35,7 @@ params {
res_tads = '1000'
tads_caller = 'insulation,hicexplorer'
res_compartments = '1000'

// Ignore `--input` as otherwise the parameter validation will throw an error
schema_ignore_params = 'genomes,digest,input_paths,input'
}
22 changes: 10 additions & 12 deletions conf/test_full.config
Original file line number Diff line number Diff line change
Expand Up @@ -11,28 +11,26 @@ params {
config_profile_name = 'Full test profile'
config_profile_description = 'Full test dataset to check pipeline function'

// TODO nf-core: Specify the paths to your full test data ( on nf-core/test-datasets or directly in repositories, e.g. SRA)
// TODO nf-core: Give any required params for the test so that command line flags are not needed
// Input data for full size test
input_paths = [
['SRR4292758_00', ['https://github.com/nf-core/test-datasets/raw/hic/data/SRR4292758_00_R1.fastq.gz', 'https://github.com/nf-core/test-datasets/raw/hic/data/SRR4292758_00_R2.fastq.gz']]
]

// Annotations
fasta = 'https://github.com/nf-core/test-datasets/raw/hic/reference/W303_SGD_2015_JRIU00000000.fsa'
restriction_site = 'A^AGCTT'
ligation_site = 'AAGCTAGCTT'

min_mapq = 2
rm_dup = true
rm_singleton = true
rm_multi = true

digestion = 'hindiii'
min_mapq = 10
min_restriction_fragment_size = 100
max_restriction_fragment_size = 100000
min_insert_size = 100
max_insert_size = 600

bin_size = '1000'
res_dist_decay = '1000'
res_tads = '1000'
tads_caller = 'insulation,hicexplorer'
res_compartments = '1000'

// Options
skip_cool = true
// Ignore `--input` as otherwise the parameter validation will throw an error
schema_ignore_params = 'genomes,digest,input_paths,input'
}
32 changes: 18 additions & 14 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -121,19 +121,19 @@ if (params.split_fastq ){

// Reference genome
if ( params.bwt2_index ){
lastPath = params.bwt2_index.lastIndexOf(File.separator)
bwt2_dir = params.bwt2_index.substring(0,lastPath+1)
bwt2_base = params.bwt2_index.substring(lastPath+1)
//lastPath = params.bwt2_index.lastIndexOf(File.separator)
//bwt2_dir = params.bwt2_index.substring(0,lastPath+1)
//bwt2_base = params.bwt2_index.substring(lastPath+1)

Channel.fromPath( bwt2_dir , checkIfExists: true)
Channel.fromPath( params.bwt2_index , checkIfExists: true)
.ifEmpty { exit 1, "Genome index: Provided index not found: ${params.bwt2_index}" }
.into { bwt2_index_end2end; bwt2_index_trim }

}
else if ( params.fasta ) {
lastPath = params.fasta.lastIndexOf(File.separator)
fasta_base = params.fasta.substring(lastPath+1)
bwt2_base = fasta_base.toString() - ~/(\.fa)?(\.fasta)?(\.fas)?(\.fsa)?$/
//lastPath = params.fasta.lastIndexOf(File.separator)
//fasta_base = params.fasta.substring(lastPath+1)
//fasta_base = fasta_base.toString() - ~/(\.fa)?(\.fasta)?(\.fas)?(\.fsa)?$/

Channel.fromPath( params.fasta )
.ifEmpty { exit 1, "Genome index: Fasta file not found: ${params.fasta}" }
Expand Down Expand Up @@ -329,7 +329,7 @@ process get_software_versions {

if(!params.bwt2_index && params.fasta){
process makeBowtie2Index {
tag "$bwt2_base"
tag "$fasta_base"
label 'process_highmem'
publishDir path: { params.save_reference ? "${params.outdir}/reference_genome" : params.outdir },
saveAs: { params.save_reference ? it : null }, mode: params.publish_dir_mode
Expand All @@ -342,9 +342,10 @@ if(!params.bwt2_index && params.fasta){
file "bowtie2_index" into bwt2_index_trim

script:
fasta_base = fasta.toString() - ~/(\.fa)?(\.fasta)?(\.fas)?(\.fsa)?$/
"""
mkdir bowtie2_index
bowtie2-build ${fasta} bowtie2_index/${bwt2_base}
bowtie2-build ${fasta} bowtie2_index/${fasta_base}
"""
}
}
Expand Down Expand Up @@ -418,19 +419,21 @@ process bowtie2_end_to_end {
def bwt2_opts = params.bwt2_opts_end2end
if (!params.dnase){
"""
INDEX=`find -L ./ -name "*.rev.1.bt2" | sed 's/.rev.1.bt2//'`
bowtie2 --rg-id BMG --rg SM:${prefix} \\
${bwt2_opts} \\
-p ${task.cpus} \\
-x ${index}/${bwt2_base} \\
-x \${INDEX} \\
--un ${prefix}_unmap.fastq \\
-U ${reads} | samtools view -F 4 -bS - > ${prefix}.bam
"""
}else{
"""
INDEX=`find -L ./ -name "*.rev.1.bt2" | sed 's/.rev.1.bt2//'`
bowtie2 --rg-id BMG --rg SM:${prefix} \\
${bwt2_opts} \\
-p ${task.cpus} \\
-x ${index}/${bwt2_base} \\
-x \${INDEX} \\
--un ${prefix}_unmap.fastq \\
-U ${reads} > ${prefix}.bam
"""
Expand Down Expand Up @@ -480,10 +483,11 @@ process bowtie2_on_trimmed_reads {
script:
prefix = reads.toString() - ~/(_trimmed)?(\.fq)?(\.fastq)?(\.gz)?$/
"""
INDEX=`find -L ./ -name "*.rev.1.bt2" | sed 's/.rev.1.bt2//'`
bowtie2 --rg-id BMG --rg SM:${prefix} \\
${params.bwt2_opts_trimmed} \\
-p ${task.cpus} \\
-x ${index}/${bwt2_base} \\
-x \${INDEX} \\
-U ${reads} | samtools view -bS - > ${prefix}_trimmed.bam
"""
}
Expand Down Expand Up @@ -784,9 +788,9 @@ process run_ice{
script:
prefix = rmaps.toString() - ~/(\.matrix)?$/
"""
ice --filter_low_counts_perc ${params.ice_filer_low_count_perc} \
ice --filter_low_counts_perc ${params.ice_filter_low_count_perc} \
--results_filename ${prefix}_iced.matrix \
--filter_high_counts_perc ${params.ice_filer_high_count_perc} \
--filter_high_counts_perc ${params.ice_filter_high_count_perc} \
--max_iter ${params.ice_max_iter} --eps ${params.ice_eps} --remove-all-zeros-loci --output-bias 1 --verbose 1 ${rmaps}
"""
}
Expand Down
8 changes: 4 additions & 4 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,8 @@ params {
bin_size = '1000000'
res_zoomify = '5000'
ice_max_iter = 100
ice_filer_low_count_perc = 0.02
ice_filer_high_count_perc = 0
ice_filter_low_count_perc = 0.02
ice_filter_high_count_perc = 0
ice_eps = 0.1

// Downstream Analysis
Expand All @@ -86,7 +86,6 @@ params {
// Boilerplate options
publish_dir_mode = 'copy'
multiqc_config = false
name = false
email = false
email_on_fail = false
max_multiqc_email_size = 25.MB
Expand All @@ -101,12 +100,13 @@ params {
custom_config_version = 'master'
custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}"
hostnames = false
config_profile_name = false
config_profile_description = false
config_profile_contact = false
config_profile_url = false
validate_params = true
show_hidden_params = false
schema_ignore_params = 'genomes,input_paths'
schema_ignore_params = 'genomes,digest,input_paths'

// Defaults only, expecting to be overwritten
max_memory = 24.GB
Expand Down
77 changes: 31 additions & 46 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,6 @@
},
"min_cis_dist": {
"type": "integer",
"default": "O",
"description": "Minimum distance between loci to consider. Useful for --dnase mode to remove spurious ligation products. Only values > 0 are considered"
}
}
Expand All @@ -148,17 +147,16 @@
"split_fastq": {
"type": "boolean",
"description": "Split the reads into chunks before running the pipelne",
"fa_icon": "fas fa-dna",
"default": "false"
"fa_icon": "fas fa-dna"
},
"fastq_chunks_size": {
"type": "integer",
"description": "Read number per chunks if split_fastq is used",
"default": "20000000"
"default": 20000000
},
"min_mapq": {
"type": "integer",
"default": "10",
"default": 10,
"description": "Keep aligned reads with a minimum quality value"
},
"bwt2_opts_end2end": {
Expand All @@ -185,33 +183,27 @@
"fa_icon": "fas fa-signature",
"properties": {
"keep_dups": {
"type": "string",
"description": "Keep duplicated reads",
"default": "False"
"type": "boolean",
"description": "Keep duplicated reads"
},
"keep_multi": {
"type": "string",
"description": "Keep multi-aligned reads",
"default": "False"
"type": "boolean",
"description": "Keep multi-aligned reads"
},
"max_insert_size": {
"type": "integer",
"default": "0",
"description": "Maximum fragment size to consider. Only values > 0 are considered"
},
"min_insert_size": {
"type": "integer",
"default": "0",
"description": "Minimum fragment size to consider. Only values > 0 are considered"
},
"max_restriction_fragment_size": {
"type": "integer",
"default": "0",
"description": "Maximum restriction fragment size to consider. Only values > 0 are considered"
},
"min_restriction_fragment_size": {
"type": "integer",
"default": "0",
"description": "Minimum restriction fragment size to consider. Only values > 0 are considered"
},
"save_interaction_bam": {
Expand All @@ -232,29 +224,28 @@
"default": "'1000000,500000'",
"description": "Resolution to build the maps (comma separated)"
},
"ice_filer_low_count_perc": {
"type": "string",
"ice_filter_low_count_perc": {
"type": "number",
"default": 0.02,
"description": "Filter low counts rows before normalization"
},
"ice_filer_high_count_perc": {
"ice_filter_high_count_perc": {
"type": "integer",
"default": "0",
"description": "Filter high counts rows before normalization"
},
"ice_eps": {
"type": "string",
"default": "0.1",
"type": "number",
"default": 0.1,
"description": "Threshold for ICE convergence"
},
"ice_max_iter": {
"type": "integer",
"default": "100",
"default": 100,
"description": "Maximum number of iteraction for ICE normalization"
},
"res_zoomify": {
"type": "integer",
"default": 5000,
"type": "string",
"default": "5000",
"description": "Maximum resolution to build mcool file"
}
}
Expand All @@ -266,8 +257,8 @@
"default": "",
"properties": {
"res_dist_decay": {
"type": "integer",
"default": 1000000,
"type": "string",
"default": "1000000",
"description": "Resolution to build count/distance plot"
},
"tads_caller": {
Expand All @@ -281,8 +272,8 @@
"description": "Resolution to run TADs callers (comma separated)"
},
"res_compartments": {
"type": "integer",
"default": 250000,
"type": "string",
"default": "250000",
"description": "Resolution for compartments calling"
}
}
Expand All @@ -299,33 +290,28 @@
"description": "Do not build contact maps"
},
"skip_ice": {
"type": "string",
"description": "Do not run ICE normalization",
"default": "False"
"type": "boolean",
"description": "Do not run ICE normalization"
},
"skip_dist_decay": {
"type": "string",
"description": "Do not run distance/decay plot",
"default": "False"
"type": "boolean",
"description": "Do not run distance/decay plot"
},
"skip_tads": {
"type": "string",
"description": "Do not run TADs calling",
"default": "False"
"type": "boolean",
"description": "Do not run TADs calling"
},
"skip_compartments": {
"type": "string",
"description": "Do not run compartments calling"
},
"skip_balancing": {
"type": "string",
"description": "Do not run cooler balancing normalization",
"default": "False"
"type": "boolean",
"description": "Do not run cooler balancing normalization"
},
"skip_mcool": {
"type": "string",
"description": "Do not generate mcool file for Higlass visualization",
"default": "False"
"type": "boolean",
"description": "Do not generate mcool file for Higlass visualization"
},
"skip_multiqc": {
"type": "boolean",
Expand Down Expand Up @@ -487,9 +473,8 @@
},
"config_profile_name": {
"type": "string",
"description": "Institutional config name.",
"hidden": true,
"fa_icon": "fas fa-users-cog"
"description": "Institutional config name",
"hidden": true
},
"config_profile_description": {
"type": "string",
Expand Down

0 comments on commit a35879e

Please sign in to comment.