Skip to content
This repository has been archived by the owner on Aug 31, 2022. It is now read-only.

Commit

Permalink
Correct path and name of pretrained model file
Browse files Browse the repository at this point in the history
Resolves #1
Also updates example model to rgrgr network.
  • Loading branch information
tmassingham-ont committed Sep 11, 2018
1 parent 5874bbd commit b28e640
Showing 1 changed file with 6 additions and 6 deletions.
12 changes: 6 additions & 6 deletions scripts/example_training.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ then
fi


git clone https://github.com/nanoporetech/sloika
git clone --depth 1 https://github.com/nanoporetech/sloika
(
cd sloika
make cleanDevEnv
Expand All @@ -19,22 +19,22 @@ THEANO_FLAGS_GPU=device=gpu,floatX=float32,mode=FAST_RUN,blas.ldflags='-lblas',s


# Generate reference sequences -- replace with your own method
sloika/bin/extract_reference.py reads references.fa
extract_reference.py reads references.fa

# Map reads using model -- takes a few hours
THEANO_FLAGS=${THEANO_FLAGS_CPU} sloika/bin/chunkify.py raw_remap --jobs ${NCPU} --chunk_len 4000 --downsample_factor 5 --output_strand_list unfiltered_strands.txt reads remapped_unfiltered.hdf5 sloika/models/retrained.pkl references.fa
THEANO_FLAGS=${THEANO_FLAGS_CPU} chunkify.py raw_remap --jobs ${NCPU} --chunk_len 4000 --downsample_factor 5 --output_strand_list unfiltered_strands.txt reads remapped_unfiltered.hdf5 sloika/models/pretrained.pkl references.fa

# Filter reads -- criterion from distribution of mapping scores, coverage and proportion of stays
( head -n 1 unfiltered_strands.txt ; cat unfiltered_strands.txt | awk '$3 > 0.5 && $3 < 1.2 && ($7 - $6) > 0.95 * $5 && $5 / ($7 - $6 + $5) < 0.55' ) > filtered_strand_list.txt

# Remap selected reads -- takes a few hours
THEANO_FLAGS=${THEANO_FLAGS_CPU} sloika/bin/chunkify.py raw_remap --jobs ${NCPU} --chunk_len 4000 --downsample_factor 5 --input_strand_list filtered_strand_list.txt --output_strand_list filtered_strands.txt reads remapped_filtered.hdf5 model/baseline.pkl references.fa
THEANO_FLAGS=${THEANO_FLAGS_CPU} chunkify.py raw_remap --jobs ${NCPU} --chunk_len 4000 --downsample_factor 5 --input_strand_list filtered_strand_list.txt --output_strand_list filtered_strands.txt reads remapped_filtered.hdf5 sloika/models/pretrained.pkl references.fa

# Train a model
THEANO_FLAGS=${THEANO_FLAGS_GPU} sloika/bin/train_network.py raw --min_prob 1e-5 sloika/models/raw_1.00_rGr.py training remapped_filtered.hdf5
THEANO_FLAGS=${THEANO_FLAGS_GPU} train_network.py raw --min_prob 1e-5 sloika/models/raw_0.98_rgrgr.py training remapped_filtered.hdf5

# Convert model to CPU
THEANO_FLAGS=${THEANO_FLAGS_GPU} sloika/misc/model_convert.py --target cpu training/model_final.pkl training/model_final_cpu.pkl

# Basecall (slowly)
THEANO_FLAGS=${THEANO_FLAGS_CPU} sloika/bin/basecall_network.py raw --jobs ${NCPU} training/model_final_cpu.pkl test_reads > basecalls.fa
THEANO_FLAGS=${THEANO_FLAGS_CPU} basecall_network.py raw --jobs ${NCPU} training/model_final_cpu.pkl test_reads > basecalls.fa

0 comments on commit b28e640

Please sign in to comment.