-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #140 from MitraDarja/ins
[FIX] Fix input pathways for inser and delete.
- Loading branch information
Showing
16 changed files
with
1,861 additions
and
8 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
#!/bin/bash | ||
|
||
needle="Set path to needle executable" | ||
input_dir="Set to directory that contains the sequence files" | ||
|
||
# Comparison for 4 Threads, change thread number in each step to obtain numbers for 4 threads | ||
/usr/bin/time -v -o needle_21_21_preprocess4.time $needle minimiser -k 21 -w 21 -t 4 --cutoff 49 $input_dir/SRR1313229.fastq.gz $input_dir/SRR1313228.fastq.gz $input_dir/SRR1313227.fastq.gz $input_dir/SRR1313226.fastq.gz | ||
/usr/bin/time -v -o needle_25_21_preprocess4.time $needle minimiser -k 21 -w 25 -t 4 --cutoff 49 $input_dir/SRR1313229.fastq.gz $input_dir/SRR1313228.fastq.gz $input_dir/SRR1313227.fastq.gz $input_dir/SRR1313226.fastq.gz | ||
/usr/bin/time -v -o needle_21_41_preprocess4.time $needle minimiser -k 21 -w 41 -t 4 --cutoff 49 $input_dir/SRR1313229.fastq.gz $input_dir/SRR1313228.fastq.gz $input_dir/SRR1313227.fastq.gz $input_dir/SRR1313226.fastq.gz | ||
|
||
# For both bcalm and kmc a file named "files.lst" with the path to the four files needs to be created. | ||
/usr/bin/time -v -o bcalm_preprocess4.time bash run_bcalm4.sh | ||
/usr/bin/time -v -o kmc_preprocess4.time bash run_kmc4.sh |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
metagraph="Set path to metagraph executable" | ||
out_dir="Set out directory" | ||
|
||
mkdir $out_dir/clean_anno | ||
/usr/bin/time -v -o jointbuild.time $metagraph build -p 4 -k 21 --count-kmers --count-width 32 --outfile-base $out_dir/graph single_dbgs/clean/SRR*.fasta.gz &> jointbuilt.log | ||
/usr/bin/time -v -o annotate_joint.time $metagraph annotate --separately -p 4 -i $out_dir/graph.dbg --anno-filename --count-kmers --count-width 32 -o $out_dir/clean_anno single_dbgs/clean/SRR*.fasta.gz &> anno.log | ||
|
||
mkdir $out_dir/rd0 | ||
mkdir $out_dir/rd1 | ||
mkdir $out_dir/rd2 | ||
/usr/bin/time -v -o rd0.time $metagraph transform_anno -v --anno-type row_diff --count-kmers --row-diff-stage 0 --mem-cap-gb 500 -o $out_dir/rd0/rd -i $out_dir/graph.dbg -p 4 --disk-swap "" $out_dir/clean_anno/*.column.annodbg &>rd0.log | ||
/usr/bin/time -v -o rd1.time $metagraph transform_anno -v --anno-type row_diff --count-kmers --row-diff-stage 1 --mem-cap-gb 500 -o $out_dir/rd1/rd -i $out_dir/graph.dbg -p 4 --disk-swap "" $out_dir/clean_anno/*.column.annodbg &> rd1.log | ||
/usr/bin/time -v -o rd2.time $metagraph transform_anno -v --anno-type row_diff --count-kmers --row-diff-stage 2 --mem-cap-gb 500 -o $out_dir/rd2/rd -i $out_dir/graph.dbg -p 4 --disk-swap "" $out_dir/clean_anno/*.column.annodbg &> rd2.log | ||
|
||
/usr/bin/time -v -o anno.time $metagraph transform_anno --anno-type row_diff_int_brwt --greedy --fast --subsample 1000000 -i $out_dir/graph.dbg -o $out_dir/annotation_final $out_dir/rd2/*.column.annodbg -p 4 --parallel-nodes 10 &> last_anno.log | ||
/usr/bin/time -v -o relax.time $metagraph relax_brwt -v --relax-arity 32 -p 4 -o $out_dir/annotation_final_relaxed $out_dir/annotation_final.row_diff_int_brwt.annodbg &> relax.log | ||
|
||
# Note data/*.fa are the files from here: https://github.com/MitraDarja/analysis_needle/tree/main | ||
/usr/bin/time -v -o query_1.time $metagraph query -p 4 --query-counts -i $out_dir/graph.dbg -a $out_dir/annotation_final_relaxed.row_diff_int_brwt.annodbg data/query_1.fa | ||
/usr/bin/time -v -o query_100.time $metagraph query -p 4 --query-counts -i $out_dir/graph.dbg -a $out_dir/annotation_final_relaxed.row_diff_int_brwt.annodbg data/query_100.fa | ||
/usr/bin/time -v -o query_1000.time $metagraph query -p 4 --query-counts -i $out_dir/graph.dbg -a $out_dir/annotation_final_relaxed.row_diff_int_brwt.annodbg data/query_1000.fa | ||
|
||
# Query with one thead | ||
/usr/bin/time -v -o query_1_1.time $metagraph query -p 1 --query-counts -i $out_dir/graph.dbg -a $out_dir/annotation_final_relaxed.row_diff_int_brwt.annodbg data/query_1.fa | ||
/usr/bin/time -v -o query_100_1.time $metagraph query -p 1 --query-counts -i $out_dir/graph.dbg -a $out_dir/annotation_final_relaxed.row_diff_int_brwt.annodbg data/query_100.fa | ||
/usr/bin/time -v -o query_1000_1.time $metagraph query -p 1 --query-counts -i $out_dir/graph.dbg -a $out_dir/annotation_final_relaxed.row_diff_int_brwt.annodbg data/query_1000.fa | ||
|
||
|
||
# Construction for smooth option | ||
|
||
mkdir $out_dir/clean_anno_smooth | ||
/usr/bin/time -v -o jointbuild_smooth.time $metagraph build -p 4 -k 21 --count-kmers --count-width 32 --outfile-base $out_dir/graph_smooth single_dbgs/clean_smooth/SRR*.fasta.gz &> jointbuilt.log | ||
/usr/bin/time -v -o annotate_joint_smooth.time $metagraph annotate --separately -p 4 -i $out_dir/graph.dbg --anno-filename --count-kmers --count-width 32 -o $out_dir/clean_anno_smooth single_dbgs/clean_smooth/SRR*.fasta.gz &> anno.log | ||
|
||
mkdir $out_dir/smooth_rd0 | ||
mkdir $out_dir/smooth_rd1 | ||
mkdir $out_dir/smooth_rd2 | ||
/usr/bin/time -v -o rd0_smooth.time $metagraph transform_anno -v --anno-type row_diff --count-kmers --row-diff-stage 0 --mem-cap-gb 500 -o $out_dir/smooth_rd0/rd -i $out_dir/graph_smooth.dbg -p 4 --disk-swap "" $out_dir/clean_anno_smooth/*.column.annodbg &>rd0.log | ||
/usr/bin/time -v -o rd1_smooth.time $metagraph transform_anno -v --anno-type row_diff --count-kmers --row-diff-stage 1 --mem-cap-gb 500 -o $out_dir/smooth_rd1/rd -i $out_dir/graph_smooth.dbg -p 4 --disk-swap "" $out_dir/clean_anno_smooth/*.column.annodbg &> rd1.log | ||
/usr/bin/time -v -o rd2_smooth.time $metagraph transform_anno -v --anno-type row_diff --count-kmers --row-diff-stage 2 --mem-cap-gb 500 -o $out_dir/smooth_rd2/rd -i $out_dir/graph_smooth.dbg -p 4 --disk-swap "" $out_dir/clean_anno_smooth/*.column.annodbg &> rd2.log | ||
|
||
/usr/bin/time -v -o anno_smooth.time $metagraph transform_anno --anno-type row_diff_int_brwt --greedy --fast --subsample 1000000 -i $out_dir/graph_smooth.dbg -o $out_dir/annotation_final_smooth $out_dir/rd2/*.column.annodbg -p 4 --parallel-nodes 10 &> last_anno.log | ||
/usr/bin/time -v -o relax_smooth.time $metagraph relax_brwt -v --relax-arity 32 -p 4 -o $out_dir/annotation_final_relaxed_smooth $out_dir/annotation_final_smooth.row_diff_int_brwt.annodbg &> relax.log | ||
|
||
# Note data/*.fa are the files from here: https://github.com/MitraDarja/analysis_needle/tree/main | ||
/usr/bin/time -v -o query_1_smooth.time $metagraph query -p 4 --query-counts -i $out_dir/graph_smooth.dbg -a $out_dir/annotation_final_relaxed_smooth.row_diff_int_brwt.annodbg data/query_1.fa | ||
/usr/bin/time -v -o query_100_smooth.time $metagraph query -p 4 --query-counts -i $out_dir/graph_smooth.dbg -a $out_dir/annotation_final_relaxed_smooth.row_diff_int_brwt.annodbg data/query_100.fa | ||
/usr/bin/time -v -o query_1000_smooth.time $metagraph query -p 4 --query-counts -i $out_dir/graph_smooth.dbg -a $out_dir/annotation_final_relaxed_smooth.row_diff_int_brwt.annodbg data/query_1000.fa | ||
|
||
# Query with one thead | ||
/usr/bin/time -v -o query_1_smooth_1.time $metagraph query -p 1 --query-counts -i $out_dir/graph_smooth.dbg -a $out_dir/annotation_final_relaxed_smooth.row_diff_int_brwt.annodbg data/query_1.fa | ||
/usr/bin/time -v -o query_100_smooth_1.time $metagraph query -p 1 --query-counts -i $out_dir/graph_smooth.dbg -a $out_dir/annotation_final_relaxed_smooth.row_diff_int_brwt.annodbg data/query_100.fa | ||
/usr/bin/time -v -o query_1000_smooth_1.time $metagraph query -p 1 --query-counts -i $out_dir/graph_smooth.dbg -a $out_dir/annotation_final_relaxed_smooth.row_diff_int_brwt.annodbg data/query_1000.fa |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
#!/bin/bash | ||
# Based on https://github.com/kamimrcht/REINDEER/blob/master/reproduce_manuscript_results/bcalm_2585.sh | ||
|
||
bcalm="Set path to bcalm2 executable" | ||
|
||
#get fastq.gz and launch bcalm on each file | ||
while read -r filename; do | ||
$bcalm -in $filename -kmer-size 21 -abundance-min 50 -nb-cores 2 -max-memory 500000 -out-tmp bcalm2 -out-dir bcalm2 | ||
done < files.lst |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
#!/bin/bash | ||
set -eu | ||
|
||
metagraph="Set path to metagraph executable" | ||
# Note: You need to have a kmc_files.lst, which is a file where in each line there is a path to the kmc_files with the ending ".kmc_suf" | ||
|
||
mkdir single_dbgs/ | ||
while read -r filename; do | ||
$metagraph build --state fast --mode canonical --parallel 16 --count-kmers --count-width 32 -k 21 --mem-cap-gb 8 -o single_dbgs/$(basename ${filename}) $filename | ||
echo $filename | ||
done < kmc_files.lst |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
#!/bin/bash | ||
set -eu | ||
|
||
metagraph="Set path to metagraph executable" | ||
# Note: You need to have a dbg_files.txt, which is a file where in each line there is a path to the dbg files with the ending ".dbg" (created with run_build.sh) | ||
|
||
mkdir single_dbgs/clean/ | ||
|
||
while read -r filename; do | ||
$metagraph clean -p4 --to-fasta --primary-kmers --smoothing-window 1 -o single_dbgs/clean/$(basename $filename) --count-kmers --count-width 32 $filename | ||
done < dbg_files.txt |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
#!/bin/bash | ||
set -eu | ||
|
||
metagraph="Set path to metagraph executable" | ||
# Note: You need to have a dbg_files.txt, which is a file where in each line there is a path to the dbg files with the ending ".dbg" (created with run_build.sh) | ||
|
||
mkdir single_dbgs/clean_smooth/ | ||
|
||
while read -r filename; do | ||
$metagraph clean -p 4 --to-fasta --primary-kmers --smoothing-window 1000000000 -o single_dbgs/clean_smooth/$(basename $filename) --count-kmers --count-width 32 $filename | ||
done < dbg_files.txt |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
#!/bin/bash | ||
set -eu | ||
|
||
kmc="Path to kmc executable" | ||
|
||
mkdir kmc_files | ||
mkdir kmc_tmp | ||
while read -r filename threshold; do | ||
$kmc -t64 -r -k21 -ci$threshold -cs65535 -hp $filename kmc_files/$(basename $filename) kmc_tmp/ | ||
echo $filename | ||
done < samples.in |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
#!/bin/bash | ||
|
||
kmc="Set path to kmc executable" | ||
|
||
#get fastq.gz and launch bcalm on each file | ||
while read -r filename; do | ||
$kmc -t4 -r -k21 -ci50 -cs65535 -hp -m500 $filename $(basename $filename .res) kmc_tmp/ | ||
done < files.lst |
Oops, something went wrong.