diff --git a/MANIFEST.in b/MANIFEST.in index c0a70a4..bb1a975 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -3,5 +3,6 @@ include README.rst include MANIFEST.in include setup.py recursive-include *.py *.fasta +exclude tests exclude ipynbs exclude venv \ No newline at end of file diff --git a/README.rst b/README.rst index 07e778e..9cc0cb3 100644 --- a/README.rst +++ b/README.rst @@ -269,13 +269,51 @@ Analysis of all FASTA/FASTQ files in a directory Metadata addition to analysis ----------------------------- -*Works with any of the analyses above +Add subtype metadata to your analysis results with `-M your-subtype-metadata.tsv`: .. code-block:: bash - hansel -s heidelberg -M -vv --threads -o results.tab -O match_results.tab -D /path/to/fastas_or_fastqs/ - -``biohansel`` works best on TSV metadata files. If possible, use a tab separated metadata file or your analysis may fail. + hansel -s heidelberg \ + -M your-subtype-metadata.tsv \ + -o results.tab \ + -O match_results.tab \ + -D ~/your-reads-directory/ + +Your metadata table **must** contain a field with the field name `subtype`, e.g. + +.. list-table:: + :header-rows: 1 + + * - subtype + - host_association + - geoloc + - genotype_alternative + * - 1 + - human + - Canada + - A + * - 2 + - cow + - USA + - B + +``biohansel`` accepts metadata table files with the following formats and extensions: + +.. list-table:: + :header-rows: 1 + + * - Format + - Extension + - Example Filename + * - Tab-delimited table/tab-separated values (TSV) + - `.tsv` + - `my-metadata-table.tsv` + * - Tab-delimited table/tab-separated values (TSV) + - `.tab` + - `my-metadata-table.tab` + * - Comma-separated values (CSV) + - `.csv` + - `my-metadata-table.csv` Development diff --git a/bio_hansel/main.py b/bio_hansel/main.py index d02bf96..1096a6e 100644 --- a/bio_hansel/main.py +++ b/bio_hansel/main.py @@ -51,7 +51,7 @@ def init_parser(): parser.add_argument('--scheme-name', help='Custom user-specified SNP substyping scheme name') parser.add_argument('-M', '--scheme-metadata', - help='Scheme subtype metadata table (.TSV format accepted; contain column called "subtype")') + help='Scheme subtype metadata table (tab-delimited file with ".tsv" or ".tab" extension or CSV with ".csv" extension format accepted; MUST contain column called "subtype")') parser.add_argument('-p', '--paired-reads', nargs=2, metavar=('forward_reads', 'reverse_reads'), diff --git a/setup.py b/setup.py index 2f1be9d..9e0d29f 100644 --- a/setup.py +++ b/setup.py @@ -38,8 +38,7 @@ long_description=readme, name='bio_hansel', package_data={'bio_hansel': ['data/*/*.fasta', 'data/*/*.tsv',]}, - package_dir={'bio_hansel': 'bio_hansel'}, - packages=find_packages(include=['bio_hansel']), + packages=find_packages(exclude=['test_*.py', 'tests']), setup_requires=setup_requirements, test_suite='tests', tests_require=test_requirements,