diff --git a/Dockerfile b/Dockerfile index e83bb6f2..233cc924 100644 --- a/Dockerfile +++ b/Dockerfile @@ -10,7 +10,7 @@ RUN apt-get -qq update \ rm -rf /var/lib/apt/lists/* RUN mkdir -p /opt/truvari-source/truvari/ -COPY setup.py README.md /opt/truvari-source +COPY setup.py README.md pyproject.toml /opt/truvari-source/ COPY truvari/ /opt/truvari-source/truvari/ WORKDIR /opt/truvari-source diff --git a/README.md b/README.md index 8e69d4a7..17eefad5 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ [![pylint](imgs/pylint.svg)](https://github.com/acenglish/truvari/actions/workflows/pylint.yml) [![FuncTests](https://github.com/acenglish/truvari/actions/workflows/func_tests.yml/badge.svg?branch=develop&event=push)](https://github.com/acenglish/truvari/actions/workflows/func_tests.yml) [![coverage](imgs/coverage.svg)](https://github.com/acenglish/truvari/actions/workflows/func_tests.yml) -[![develop](https://img.shields.io/github/commits-since/acenglish/truvari/v4.1.0)](https://github.com/ACEnglish/truvari/compare/v4.1.0...develop) +[![develop](https://img.shields.io/github/commits-since/acenglish/truvari/v4.2.0)](https://github.com/ACEnglish/truvari/compare/v4.2.0...develop) [![Downloads](https://static.pepy.tech/badge/truvari)](https://pepy.tech/project/truvari) ![Logo](https://raw.githubusercontent.com/ACEnglish/truvari/develop/imgs/BoxScale1_DarkBG.png) @@ -10,7 +10,7 @@ Toolkit for benchmarking, merging, and annotating Structural Variants 📚 [WIKI page](https://github.com/acenglish/truvari/wiki) has detailed documentation. 📈 See [Updates](https://github.com/acenglish/truvari/wiki/Updates) on new versions. -📝 Read our [Paper](https://genomebiology.biomedcentral.com/articles/10.1186/s13059-022-02840-6) to learn more. +📝 Read our Papers ([#1](https://genomebiology.biomedcentral.com/articles/10.1186/s13059-022-02840-6), [#2](https://www.biorxiv.org/content/10.1101/2023.10.29.564632v1)) to learn more. ## 💻 Installation Truvari uses Python 3.6+ and can be installed with pip: diff --git a/docs/api/truvari.rst b/docs/api/truvari.rst index ded7f385..5159989d 100644 --- a/docs/api/truvari.rst +++ b/docs/api/truvari.rst @@ -50,6 +50,10 @@ entry_size_similarity ^^^^^^^^^^^^^^^^^^^^^ .. autofunction:: entry_size_similarity +entry_shared_ref_context +^^^^^^^^^^^^^^^^^^^^^^^^ +.. autofunction:: entry_shared_ref_context + entry_to_hash ^^^^^^^^^^^^^ .. autofunction:: entry_to_hash @@ -62,9 +66,9 @@ entry_variant_type ^^^^^^^^^^^^^^^^^^ .. autofunction:: entry_variant_type -entry_shared_ref_context -^^^^^^^^^^^^^^^^^^^^^^^^ -.. autofunction:: entry_shared_ref_context +entry_within +^^^^^^^^^^^^ +..autofunction:: entry_within Extra Methods ------------- @@ -166,6 +170,14 @@ cmd_exe ^^^^^^^ .. autofunction:: cmd_exe +consolidate_phab_vcfs +^^^^^^^^^^^^^^^^^^^^^ +.. autofunction:: consolidate_phab_vcfs + +coords_within +^^^^^^^^^^^^^ +.. autofunction:: coords_within + count_entries ^^^^^^^^^^^^^ .. autofunction:: count_entries @@ -194,6 +206,10 @@ performance_metrics ^^^^^^^^^^^^^^^^^^^ .. autofunction:: performance_metrics +region_filter +^^^^^^^^^^^^^ +.. autofunction:: region_filter + restricted_float ^^^^^^^^^^^^^^^^ .. autofunction:: restricted_float diff --git a/docs/requirements.txt b/docs/requirements.txt index b2fd0154..05035962 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,7 +1,7 @@ pywfa>=0.5.1 -sphinx==4.2.0 -sphinx_rtd_theme==1.0.0 -readthedocs-sphinx-search==0.1.1 +sphinx>=7.2 +sphinx_rtd_theme>=2 +readthedocs-sphinx-search==0.3.2 pywfa>=0.5.1 rich>=12.5.1 edlib>=1.3.9 diff --git a/docs/v4.2.0/Citations.md b/docs/v4.2.0/Citations.md new file mode 100644 index 00000000..d600b860 --- /dev/null +++ b/docs/v4.2.0/Citations.md @@ -0,0 +1,30 @@ +# Citing Truvari + +English, A.C., Menon, V.K., Gibbs, R.A. et al. Truvari: refined structural variant comparison preserves allelic diversity. Genome Biol 23, 271 (2022). https://doi.org/10.1186/s13059-022-02840-6 + +# Citations + +List of publications using Truvari. Most of these are just pulled from a [Google Scholar Search](https://scholar.google.com/scholar?q=truvari). Please post in the [show-and-tell](https://github.com/spiralgenetics/truvari/discussions/categories/show-and-tell) to have your publication added to the list. +* [A robust benchmark for detection of germline large deletions and insertions](https://www.nature.com/articles/s41587-020-0538-8) +* [Leveraging a WGS compression and indexing format with dynamic graph references to call structural variants](https://www.biorxiv.org/content/10.1101/2020.04.24.060202v1.abstract) +* [Duphold: scalable, depth-based annotation and curation of high-confidence structural variant calls](https://academic.oup.com/gigascience/article/8/4/giz040/5477467?login=true) +* [Parliament2: Accurate structural variant calling at scale](https://academic.oup.com/gigascience/article/9/12/giaa145/6042728) +* [Learning What a Good Structural Variant Looks Like](https://www.biorxiv.org/content/10.1101/2020.05.22.111260v1.full) +* [Long-read trio sequencing of individuals with unsolved intellectual disability](https://www.nature.com/articles/s41431-020-00770-0) +* [lra: A long read aligner for sequences and contigs](https://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1009078) +* [Samplot: a platform for structural variant visual validation and automated filtering](https://genomebiology.biomedcentral.com/articles/10.1186/s13059-021-02380-5) +* [AsmMix: A pipeline for high quality diploid de novo assembly](https://www.biorxiv.org/content/10.1101/2021.01.15.426893v1.abstract) +* [Accurate chromosome-scale haplotype-resolved assembly of human genomes](https://www.nature.com/articles/s41587-020-0711-0) +* [Accurate circular consensus long-read sequencing improves variant detection and assembly of a human genome](https://www.nature.com/articles/s41587-019-0217-9) +* [NPSV: A simulation-driven approach to genotyping structural variants in whole-genome sequencing data](https://academic.oup.com/bioinformatics/article-abstract/37/11/1497/5466452) +* [SVIM-asm: structural variant detection from haploid and diploid genome assemblies](https://academic.oup.com/bioinformatics/article/36/22-23/5519/6042701?login=true) +* [Readfish enables targeted nanopore sequencing of gigabase-sized genomes](https://www.nature.com/articles/s41587-020-00746-x) +* [stLFRsv: A Germline Structural Variant Analysis Pipeline Using Co-barcoded Reads](https://internal-journal.frontiersin.org/articles/10.3389/fgene.2021.636239/full) +* [Long-read-based human genomic structural variation detection with cuteSV](https://genomebiology.biomedcentral.com/articles/10.1186/s13059-020-02107-y) +* [An international virtual hackathon to build tools for the analysis of structural variants within species ranging from coronaviruses to vertebrates](https://f1000research.com/articles/10-246) +* [Paragraph: a graph-based structural variant genotyper for short-read sequence data](https://link.springer.com/article/10.1186/s13059-019-1909-7) +* [Genome-wide investigation identifies a rare copy-number variant burden associated with human spina bifida](https://www.nature.com/articles/s41436-021-01126-9) +* [TT-Mars: Structural Variants Assessment Based on Haplotype-resolved Assemblies](https://www.biorxiv.org/content/10.1101/2021.09.27.462044v1.abstract) +* [An ensemble deep learning framework to refine large deletions in linked-reads](https://www.biorxiv.org/content/10.1101/2021.09.27.462057v1.abstract) +* [MAMnet: detecting and genotyping deletions and insertions based on long reads and a deep learning approach](https://academic.oup.com/bib/advance-article-abstract/doi/10.1093/bib/bbac195/6587170)](https://academic.oup.com/bib/advance-article-abstract/doi/10.1093/bib/bbac195/6587170) +* [Automated filtering of genome-wide large deletions through an ensemble deep learning framework](https://www.sciencedirect.com/science/article/pii/S1046202322001712#b0110) diff --git a/docs/v4.2.0/Comparing-two-SV-programs.md b/docs/v4.2.0/Comparing-two-SV-programs.md new file mode 100644 index 00000000..6463f90d --- /dev/null +++ b/docs/v4.2.0/Comparing-two-SV-programs.md @@ -0,0 +1,67 @@ +A frequent application of comparing SVs is to perform a 'bakeoff' of performance +between two SV programs against a single set of base calls. + +Beyond looking at the Truvari results/report, you may like to investigate what calls +are different between the programs. + +Below is a set of scripts that may help you generate those results. For our examples, +we'll be comparing arbitrary programs Asvs and Bsvs aginst base calls Gsvs. + +*_Note_* - This assumes that each record in Gsvs has a unique ID in the vcf. + +Generate the Truvari report for Asvs and Bsvs +============================================= + +```bash +truvari bench -b Gsvs.vcf.gz -c Asvs.vcf.gz -o cmp_A/ ... +truvari bench -b Gsvs.vcf.gz -c Bsvs.vcf.gz -o cmp_B/ ... +``` +Consistency +=========== +The simplest way to compare the programs is to get the intersection of TPbase calls from the two reports. +```bash +truvari consistency cmp_A/tp-base.vcf cmp_B/tp-base.vcf +``` +See [[consistency wiki|consistency]] for details on the report created. + +Below are older notes to manually create a similar report to what one can make using `truvari consistency` + +Combine the TPs within each report +================================== + +```bash +cd cmp_A/ +paste <(grep -v "#" tp-base.vcf) <(grep -v "#" tp-comp.vcf) > combined_tps.txt +cd ../cmp_B/ +paste <(grep -v "#" tp-base.vcf) <(grep -v "#" tp-comp.vcf) > combined_tps.txt +``` + +Grab the FNs missed by only one program +======================================= + +```bash +(grep -v "#" cmp_A/fn.vcf && grep -v "#" cmp_B/fn.vcf) | cut -f3 | sort | uniq -c | grep "^ *1 " | cut -f2- -d1 > missed_names.txt +``` + +Pull the TP sets' difference +============================ + +```bash +cat missed_names.txt | xargs -I {} grep -w {} cmp_A/combined_tps.txt > missed_by_B.txt +cat missed_names.txt | xargs -I {} grep -w {} cmp_B/combined_tps.txt > missed_by_A.txt +``` + +To look at the base-calls that Bsvs found, but Asvs didn't, run `cut -f1-12 missed_by_A.txt`. + +To look at the Asvs that Bsvs didn't find, run `cut -f13- missed_by_B.txt`. + +Shared FPs between the programs +=============================== + +All of the work above has been about how to analyze the TruePositives. If you'd like to see which calls are shared between Asvs and Bsvs that aren't in Gsvs, simply run Truvari again. + +```bash +bgzip cmp_A/fp.vcf && tabix -p vcf cmp_A/fp.vcf.gz +bgzip cmp_B/fp.vcf && tabix -p vcf cmp_B/fp.vcf.gz +truvari bench -b cmp_A/fp.vcf.gz -c cmp_B/fp.vcf.gz -o shared_fps ... +``` \ No newline at end of file diff --git a/docs/v4.2.0/Development.md b/docs/v4.2.0/Development.md new file mode 100644 index 00000000..ccb38493 --- /dev/null +++ b/docs/v4.2.0/Development.md @@ -0,0 +1,90 @@ +# Truvari API +Many of the helper methods/objects are documented such that developers can reuse truvari in their own code. To see developer documentation, visit [readthedocs](https://truvari.readthedocs.io/en/latest/). + +Documentation can also be seen using +```python +import truvari +help(truvari) +``` + +# docker + +A Dockerfile exists to build an image of Truvari. To make a Docker image, clone the repository and run +```bash +docker build -t truvari . +``` + +You can then run Truvari through docker using +```bash +docker run -v `pwd`:/data -it truvari +``` +Where `pwd` can be whatever directory you'd like to mount in the docker to the path `/data/`, which is the working directory for the Truvari run. You can provide parameters directly to the entry point. +```bash +docker run -v `pwd`:/data -it truvari anno svinfo -i example.vcf.gz +``` + +If you'd like to interact within the docker container for things like running the CI/CD scripts +```bash +docker run -v `pwd`:/data --entrypoint /bin/bash -it truvari +``` +You'll now be inside the container and can run FuncTests or run Truvari directly +```bash +bash repo_utils/truvari_ssshtests.sh +truvari anno svinfo -i example.vcf.gz +``` + +# CI/CD + +Scripts that help ensure the tool's quality. Extra dependencies need to be installed in order to run Truvari's CI/CD scripts. + +```bash +pip install pylint anybadge coverage +``` + +Check code formatting with +```bash +python repo_utils/pylint_maker.py +``` +We use [autopep8](https://pypi.org/project/autopep8/) (via [vim-autopep8](https://github.com/tell-k/vim-autopep8)) for formatting. + +Test the code and generate a coverage report with +```bash +bash repo_utils/truvari_ssshtests.sh +``` + +Truvari leverages github actions to perform these checks when new code is pushed to the repository. We've noticed that the actions sometimes hangs through no fault of the code. If this happens, cancel and resubmit the job. Once FuncTests are successful, it uploads an artifact of the `coverage html` report which you can download to see a line-by-line accounting of test coverage. + +# git flow + +To organize the commits for the repository, we use [git-flow](https://danielkummer.github.io/git-flow-cheatsheet/). Therefore, `develop` is the default branch, the latest tagged release is on `master`, and new, in-development features are within `feature/` + +When contributing to the code, be sure you're working off of develop and have run `git flow init`. + +# versioning + +Truvari uses [Semantic Versioning](https://semver.org/) and tries to stay compliant to [PEP440](https://peps.python.org/pep-0440/). As of v3.0.0, a single version is kept in the code under `truvari/__init__.__version__`. We try to keep the suffix `-dev` on the version in the develop branch. When cutting a new release, we may replace the suffix with `-rc` if we've built a release candidate that may need more testing/development. Once we've committed to a full release that will be pushed to PyPi, no suffix is placed on the version. If you install Truvari from the develop branch, the git repo hash is appended to the installed version as well as '.uc' if there are un-staged commits in the repo. + +# docs + +The github wiki serves the documentation most relevant to the `develop/` branch. When cutting a new release, we freeze and version the wiki's documentation with the helper utility `docs/freeze_wiki.sh`. + +# Creating a release +Follow these steps to create a release + +0) Bump release version +1) Run tests locally +2) Update API Docs +3) Change Updates Wiki +4) Freeze the Wiki +5) Ensure all code is checked in +6) Do a [git-flow release](https://danielkummer.github.io/git-flow-cheatsheet/) +7) Use github action to make a testpypi release +8) Check test release +```bash +python3 -m venv test_truvari +python3 -m pip install --index-url https://test.pypi.org/simple --extra-index-url https://pypi.org/simple/ truvari +``` +9) Use GitHub action to make a pypi release +10) Download release-tarball.zip from step #9’s action +11) Create release (include #9) from the tag +12) Checkout develop and Bump to dev version and README ‘commits since’ badge \ No newline at end of file diff --git a/docs/v4.2.0/Edit-Distance-Ratio-vs-Sequence-Similarity.md b/docs/v4.2.0/Edit-Distance-Ratio-vs-Sequence-Similarity.md new file mode 100644 index 00000000..eb45ce30 --- /dev/null +++ b/docs/v4.2.0/Edit-Distance-Ratio-vs-Sequence-Similarity.md @@ -0,0 +1,55 @@ +By default, Truvari uses [edlib](https://github.com/Martinsos/edlib) to calculate the edit distance between two SV calls. Optionally, the [Levenshtein edit distance ratio](https://en.wikipedia.org/wiki/Levenshtein_distance) can be used to compute the `--pctsim` between two variants. These measures are different than the sequence similarity calculated by [Smith-Waterman alignment](https://en.wikipedia.org/wiki/Smith%E2%80%93Waterman_algorithm). + +To show this difference, consider the following two sequences.: + +``` + AGATACAGGAGTACGAACAGTACAGTACGA + |||||||||||||||*|||||||||||||| +ATCACAGATACAGGAGTACGTACAGTACAGTACGA + +30bp Aligned +1bp Mismatched (96% similarity) +5bp Left-Trimmed (~14% of the bottom sequence) +``` + +The code below runs swalign, Levenshtein, and edlib to compute the `--pctsim` between the two sequences. + + +```python +import swalign +import Levenshtein +import edlib + +seq1 = "AGATACAGGAGTACGAACAGTACAGTACGA" +seq2 = "ATCACAGATACAGGAGTACGTACAGTACAGTACGA" + +scoring = swalign.NucleotideScoringMatrix(2, -1) +alner = swalign.LocalAlignment(scoring, gap_penalty=-2, gap_extension_decay=0.5) +aln = alner.align(seq1, seq2) +mat_tot = aln.matches +mis_tot = aln.mismatches +denom = float(mis_tot + mat_tot) +if denom == 0: + ident = 0 +else: + ident = mat_tot / denom +scr = edlib.align(seq1, seq2) +totlen = len(seq1) + len(seq2) + +print('swalign', ident) +# swalign 0.966666666667 +print('levedit', Levenshtein.ratio(seq1, seq2)) +# levedit 0.892307692308 +print('edlib', (totlen - scr["editDistance"]) / totlen) +# edlib 0.9076923076923077 +``` + +Because the swalign procedure only considers the number of matches and mismatches, the `--pctsim` is higher than the edlib and Levenshtein ratio. + +If we were to account for the 5 'trimmed' bases from the Smith-Waterman alignment when calculating the `--pctsim` by counting each trimmed base as a mismatch, we would see the similarity drop to ~83%. + +[This post](https://stackoverflow.com/questions/14260126/how-python-levenshtein-ratio-is-computed) has a nice response describing exactly how the Levenshtein ratio is computed. + +The Smith-Waterman alignment is much more expensive to compute compared to the Levenshtein ratio, and does not account for 'trimmed' sequence difference. + +However, edlib is the fastest comparison method and is used by default. Levenshtein can be specified with `--use-lev` in `bench` and `collapse`. \ No newline at end of file diff --git a/docs/v4.2.0/Home.md b/docs/v4.2.0/Home.md new file mode 100644 index 00000000..47fbc626 --- /dev/null +++ b/docs/v4.2.0/Home.md @@ -0,0 +1,35 @@ +The wiki holds documentation most relevant for develop. For information on a specific version of Truvari, see [`docs/`](https://github.com/spiralgenetics/truvari/tree/develop/docs) + +Citation: +English, A.C., Menon, V.K., Gibbs, R.A. et al. Truvari: refined structural variant comparison preserves allelic diversity. Genome Biol 23, 271 (2022). https://doi.org/10.1186/s13059-022-02840-6 + +# Before you start +VCFs aren't always created with a strong adherence to the format's specification. + +Truvari expects input VCFs to be valid so that it will only output valid VCFs. + +We've developed a separate tool that runs multiple validation programs and standard VCF parsing libraries in order to validate a VCF. + +Run [this program](https://github.com/acenglish/usable_vcf) over any VCFs that are giving Truvari trouble. + +Furthermore, Truvari expects 'resolved' SVs (e.g. DEL/INS) and will not interpret BND signals across SVTYPEs (e.g. combining two BND lines to match a DEL call). A brief description of Truvari bench methodology is linked below. + +Finally, Truvari does not handle multi-allelic VCF entries and as of v4.0 will throw an error if multi-allelics are encountered. Please use `bcftools norm` to split multi-allelic entries. + +# Index + +- [[Updates|Updates]] +- [[Installation|Installation]] +- Truvari Commands: + - [[anno|anno]] + - [[bench|bench]] + - [[collapse|collapse]] + - [[consistency|consistency]] + - [[divide|divide]] + - [[phab|phab]] + - [[refine|refine]] + - [[segment|segment]] + - [[stratify|stratify]] + - [[vcf2df|vcf2df]] +- [[Development|Development]] +- [[Citations|Citations]] \ No newline at end of file diff --git a/docs/v4.2.0/Installation.md b/docs/v4.2.0/Installation.md new file mode 100644 index 00000000..a929d245 --- /dev/null +++ b/docs/v4.2.0/Installation.md @@ -0,0 +1,56 @@ +Recommended +=========== +For stable versions of Truvari, use pip +``` +python3 -m pip install truvari +``` +Specific versions can be installed via +``` +python3 -m pip install truvari==3.2.0 +``` +See [pypi](https://pypi.org/project/Truvari/#history) for a history of all distributed releases. + +Manual Installation +=================== +To build Truvari directly, clone the repository and switch to a specific tag. +``` +git clone https://github.com/spiralgenetics/truvari.git +git checkout tags/v3.0.0 +python3 -m pip install . +``` + +To see a list of all available tags, run: +``` +git tag -l +``` + +If you have an older clone of the repository and don't see the version you're looking for in tags, make sure to pull the latest changes: +``` +git pull +git fetch --all --tags +``` + +Mamba / Conda +============= +NOTE!! There is a very old version of Truvari on bioconda that - for unknown reasons - supersedes the newer, supported versions. Users may need to specify to conda which release to build. See [this ticket](https://github.com/ACEnglish/truvari/issues/130#issuecomment-1196607866) for details. + +Truvari releases are automatically deployed to bioconda. +Users can follow instructions here (https://mamba.readthedocs.io/en/latest/installation.html) to install mamba. (A faster alternative conda compatible package manager.) + +Creating an environment with Truvari and its dependencies. +``` +mamba create -c conda-forge -c bioconda -n truvari truvari +``` + +Alternatively, see the [conda page](https://anaconda.org/bioconda/truvari) for details +``` +conda install -c bioconda truvari +``` + +Building from develop +===================== +The default branch is `develop`, which holds in-development changes. This is for developers or those wishing to try experimental features and is not recommended for production. Development is versioned higher than the most recent stable release with an added suffix (e.g. Current stable release is `3.0.0`, develop holds `3.1.0-dev`). If you'd like to install develop, repeat the steps above but without `git checkout tags/v3.0.0`. See [wiki](https://github.com/spiralgenetics/truvari/wiki/Development#git-flow) for details on how branching is handled. + +Docker +====== +See [Development](https://github.com/spiralgenetics/truvari/wiki/Development#docker) for details on building a docker container. diff --git a/docs/v4.2.0/MatchIds.md b/docs/v4.2.0/MatchIds.md new file mode 100644 index 00000000..ca52076f --- /dev/null +++ b/docs/v4.2.0/MatchIds.md @@ -0,0 +1,74 @@ +MatchIds are used to tie base/comparison calls together in post-processing for debugging or other exploring. MatchIds have a structure of `{chunkid}.{callid}`. The chunkid is unique id per-chunk of calls. All calls sharing chunkid were within `--chunksize` distance and were compared. The callid is unique to a call in a chunk for each VCF. Because `bench` processes two VCFs (the base and comparison VCFs), the `MatchId` has two values: the first is the base variant's MatchId and the second the comparison variant's MatchId. + +For `--pick single`, the two MatchIds will be identical in the e.g. tp-base.vcf.gz and tp-comp.vcf.gz. However, for `--pick ac|multi`, it's possible to have cases such as one base variant matching to multiple comparison variants. That would give us MatchIds like: + +``` +# tp-base.vcf +MatchId=4.0,4.1 + +# tp-comp.vcf +MatchId=4.0,4.1 +MatchId=4.0,4.2 +``` + +This example tells us that the tp-comp variants are both pointing to `4.0` in tp-base. The tp-base variant has a higher match to the tp-comp `4.1` variant. + +One easy way to combine matched variants is to use `truvari vcf2df` to convert a benchmarking result to a pandas DataFrame and leverage pandas' merge operation. First, we convert the `truvari bench` result. + +```bash +truvari vcf2df --info --bench-dir bench_result/ data.jl +``` + +Next, we combine rows of matched variants: +```python +import joblib +import pandas as pd + +# Load the data +data = joblib.load("data.jl") + +# Separate out the variants from the base VCF and add new columns of the base/comp ids +base = data[data['state'].isin(['tpbase', 'fn'])].copy() +base['base_id'] = base['MatchId'].apply(lambda x: x[0]) +base['comp_id'] = base['MatchId'].apply(lambda x: x[1]) + +# Separate out the variants from the comparison VCF and add new columns of the base/comp ids +comp = data[data['state'].isin(['tp', 'fp'])].copy() +comp['base_id'] = comp['MatchId'].apply(lambda x: x[0]) +comp['comp_id'] = comp['MatchId'].apply(lambda x: x[1]) + +# Merge the base/comparison variants +combined = pd.merge(base, comp, left_on='base_id', right_on='comp_id', suffixes=('_base', '_comp')) + +# How many comp variants matched to multiple base variants? +counts1 = combined['base_id_comp'].value_counts() +print('multi-matched comp count', (counts1 != 1).sum()) + +# How many base variants matched to multiple comp variants? +counts2 = combined['comp_id_base'].value_counts() +print('multi-matched base count', (counts2 != 1).sum()) +``` + +The `MatchId` is also used by `truvari collapse`. However there are two differences. First, in the main `collapse` output, the relevant INFO field is named `CollapsedId`. Second, because collapse only has a single input VCF, it is much easier to merge DataFrames. To merge collapse results kept variants with those that were removed, we again need to convert the VCFs to DataFrames: + +```bash +truvari vcf2df -i kept.vcf.gz kept.jl +truvari vcf2df -i removed.vcf.gz remov.jl +``` + +Then we combine them: +```python +import joblib +import pandas as pd + +# Load the kept variants and set the index. +kept = joblib.load("kept.jl").set_index('CollapseId') + +# Load the removed variants and set the index. +remov = joblib.load("remov.jl") +remov['CollapseId'] = remov['MatchId'].apply(lambda x: x[0]) +remov.set_index('CollapseId', inplace=True) + +# Join the two sets of variants +result_df = kept.join(remov, how='right', rsuffix='_removed') +``` \ No newline at end of file diff --git a/docs/v4.2.0/Multi-allelic-VCFs.md b/docs/v4.2.0/Multi-allelic-VCFs.md new file mode 100644 index 00000000..fd0eb23e --- /dev/null +++ b/docs/v4.2.0/Multi-allelic-VCFs.md @@ -0,0 +1,11 @@ +Truvari only compares the first alternate allele in VCFs. If a VCF contains multi-allelic sites such as: + +``` +chr2 1948201 . T TACAACACGTACGATCAGTAGAC,TCAACACACAACACGTACGATCAGTAGAC .... +``` + +Then pre-process the VCFs with bcftools: + +```bash +bcftools norm -m-any base_calls.vcf.gz | bgzip > base_calls_split.vcf.gz +``` \ No newline at end of file diff --git a/docs/v4.2.0/Updates.md b/docs/v4.2.0/Updates.md new file mode 100644 index 00000000..6f126054 --- /dev/null +++ b/docs/v4.2.0/Updates.md @@ -0,0 +1,245 @@ +# Truvari 4.2 +*January 12, 2024* +* `collapse` + * New parameter `--gt` disallows intra-sample events to collapse ([details](https://github.com/ACEnglish/truvari/wiki/collapse#--gt)) + * New parameter `--intra` for consolidating SAMPLE information during intra-sample collapsing ([details](https://github.com/ACEnglish/truvari/wiki/collapse#--intra)) + * Preserve phasing information when available + * Faster O(n-1) algorithm instead of O(n^2) + * Faster sub-chunking strategy makes smaller chunks of variants needing fewer comparisons + * Fixed rare non-determinism error in cases where multiple variants are at the same position and equal qual/ac could be ordered differently. +* `phab` + * Correct sample handling with `--bSamples` `--cSamples` parameters + * Faster generation of consensus sequence + * Resolved 'overlapping' variant issue causing variants to be dropped + * New `poa` approach to harmonization. Faster than mafft but less accurate. Slower than wfa but more accurate. +* `bench` + * New, easier `MatchId` field to track which baseline/comparison variants match up [details](https://github.com/ACEnglish/truvari/wiki/MatchIds) + * `entry_is_present` method now considers partial missing variants (e.g. `./1`) as present + * Removed the 'weighted' metrics from `summary.json` +* `consistency` + * Fixed issue with counting duplicate records + * Added flag to optionally ignore duplicate records +* `anno svinfo` now overwrites existing SVLEN/SVTYPE info fields +* general + * Reduced fn matches for unroll sequence similarity by reporting maximum of multiple manipulations of variant sequence (roll up/down/none). Comes at a small, but reasonable, expense of some more fp matches. + * Bump pysam version + * Fixed bug in `unroll` sequence similarity that sometimes rolled from the wrong end + * Fixed bug for handling of None in ALT field + * `truvari.compress_index_vcf` forces overwriting of tabix index to prevent annoying crashes + + +# Truvari 4.1 +*August 7, 2023* + +* `bench` + * Creates `candidate.refine.bed` which hooks into `refine` on whole-genome VCFs [details](https://github.com/ACEnglish/truvari/wiki/bench#refining-bench-output) + * `--recount` for correctly assessing whole-genome refinement results + * experimental 'weighted' summary metrics [details](https://github.com/ACEnglish/truvari/wiki/bench#weighted-performance) + * Unresolved SVs (e.g. `ALT == `) are filtered when `--pctseq != 0` +* `phab` + * ~2x faster via reduced IO from operating in stages instead of per-region + * Removed most external calls (e.g. samtools doesn't need to be in the environment anymore) + * new `--align wfa` allows much faster (but slightly less accurate) variant harmonization + * increased determinism of results [detals](https://github.com/ACEnglish/truvari/commit/81a9ab85b91b0c530f9faeedfa4e7e0d68a5e8c2) +* `refine` + * Faster bed file intersection of `--includebed` and `--regions` + * Refine pre-flight check + * Correct refine.regions.txt end position from IntervalTree correction + * Better refine region selection with `--use-original` + * `--use-includebed` switched to `--use-region-coords` so that default behavior is to prefer the includebed's coordinates + * `--use-original-vcfs` to use the original pre-bench VCFs + * `refine.variant_summary.json` is cleaned of uninformative metrics +* `stratify` + * parallel parsing of truvari directory to make processing ~4x faster +* `msa2vcf` Fixed REPL decomposition bug to now preserve haplotypes +* `anno grpaf` - expanded annotation info fields +* `anno density` - new parameter `--stepsize` for sliding windows +* `collapse` + * New optional `--median-info` fields [#146](https://github.com/ACEnglish/truvari/issues/146) +* Minor updates + * Fix some `anno` threading on macOS [#154](https://github.com/ACEnglish/truvari/issues/154) + * Monomorphic/multiallelic check fix in `bench` + * `PHAB_WRITE_MAFFT` environment variable to facilitate updating functional test answer key + * Slightly slimmer docker container + +# Truvari 4.0 +*March 13, 2023* + +As part of the GIAB TR effort, we have made many changes to Truvari's tooling to enable comparison of variants in TR regions down to 5bp. Additionally, in order to keep Truvari user friendly we have made changes to the UI. Namely, we've updated some default parameters, some command-line arguments, and some outputs. There are also a few new tools and how a couple of tools work has changed. Therefore, we decided to bump to a new major release. If you're using Truvari in any kind of production capacity, be sure to test your pipeline before moving to v4.0. + +* New `refine` command for refining benchmarking results. [Details](refine) +* `bench` + * [Unroll](bench#unroll) is now the default sequence comparison approach. + * New `--pick` parameter to control the number of matches a variant can participate in [details](bench#controlling-the-number-of-matches) + * The `summary.txt` is now named `summary.json` + * Outputs parameters to `params.json` + * Output VCFs are sorted, compressed, and indexed + * Ambiguous use of 'call' in outputs corrected to 'comp' (e.g. `tp-call.vcf.gz` is now `tp-comp.vcf.gz`) + * Renamed `--pctsim` parameter to `--pctseq` + * Fixed bug where FP/FN weren't getting the correct, highest scoring match reported + * Fixed bug where `INFO/Multi` wasn't being properly applied + * Fixed bug where variants spanning exactly one `--includebed` region were erroneously being counted. + * Removed parameters: `--giabreport`, `--gtcomp`,`--multimatch`, `--use-lev`, `--prog`, `--unroll` +* `collapse` + * Renamed `--pctsim` parameter to `--pctseq` + * Runtime reduction by ~40% with short-circuiting during `Matcher.build_match` + * Better output sorting which may allow pipelines to be a little faster. +* `vcf2df` + * More granular sizebins for `[0,50)` including better handling of SNPs + * `--multisample` is removed. Now automatically add all samples with `--format` + * key index column removed and replaced by chrom, start, end. Makes rows easier to read and easier to work with e.g. pyranges +* `anno` + * Simplified ui. Commands that work on a single VCF and can stream (stdin/stdout) no longer use `--input` but a positional argument. + * Added `addid` +* `consistency` + * Slight speed improvement + * Better json output format +* `segment` + * Added `--passonly` flag + * Changed UI, including writing to stdout by default + * Fixed END and 1bp DEL bugs, now adds N to segmented variants' REF, and info fields SVTYPE/SVLEN +* API + * Began a focused effort on improving re-usability of Truvari code. + * Entry point to run benchmarking programmatically with [Bench object](https://truvari.readthedocs.io/en/latest/truvari.html#bench). + * Better development version tracking. [details](https://github.com/ACEnglish/truvari/commit/4bbf8d9a5be3b6a3f935afbd3a9b323811b676a0) + * Improved developer documentation. See [readthedocs](https://truvari.readthedocs.io/) +* general + * msa2vcf now left-trims and decomposes variants into indels + * Functional tests reorganization + * Fix for off-by-one errors when using pyintervaltree. See [ticket](https://github.com/ACEnglish/truvari/issues/137) + * Removed progressbar and Levenshtein dependencies as they are no longer used. + +# Truvari 3.5 +*August 27, 2022* + +* `bench` + * `--dup-to-ins` flag automatically treats SVTYPE==DUP as INS, which helps compare some programs/benchmarks + * New `--unroll` sequence comparison method for `bench` and `collapse` ([details](bench#unroll)) +* Major `anno trf` refactor (TODO write docs) including: + * annotation of DEL is fixed (was reporting the ALT copy numbers, not the sample's copy numbers after incorporating the ALT + * allow 'denovo' annotation by applying any TRF annotations found, not just those with corresponding annotations +* New `anno grpaf` annotates vcf with allele frequency info for groups of samples +* New `phab` for variant harmonization ([details](../phab)) +* backend + * `truvari.entry_size` returns the length of the event in the cases where len(REF) == len(ALT) (e.g. SNPs entry_size is 1) + * New key utility for `truvari.build_anno_trees` +* general + * Float metrics written to the VCF (e.g. PctSizeSimilarity) are rounded to precision of 4 + * Nice colors in some `--help` with [rich](https://github.com/Textualize/rich/) +* `divide` + * output shards are now more easily sorted (i.e. `ls divide_result/*.vcf.gz` will return the shards in the order they were made) + * compression/indexing of sub-VCFs in separate threads, reducing runtime +* user issues + * Monomorphic reference ALT alleles no longer throw an error in `bench` ([#131](https://github.com/ACEnglish/truvari/issues/131)) + * `SVLEN Number=A` fix ([#132](https://github.com/ACEnglish/truvari/issues/132)) + +# Truvari 3.4 +*July 7, 2022* + +* Improved performance of `consistency` (see [#127](https://github.com/ACEnglish/truvari/pull/127)) +* Added optional json output of `consistency` report +* Allow GT to be missing, which is allowed by VCF format specification +* TRF now uses `truvari.entry_variant_type` instead of trying to use `pysam.VariantRecord.info["SVLEN"]` +directly which allows greater flexibility. +* vcf2df now parses fields with `Number=\d` (e.g. 2+), which is a valid description +* `truvari.seqsim` is now case insensitive (see [#128](https://github.com/ACEnglish/truvari/issues/128)) +* Collapse option to skip consolidation of genotype information so kept alleles are unaltered +* `truvari anno dpcnt --present` will only count the depths of non ./. variants +* New collapse annotation `NumConsolidate` records how many FORMATs were consolidated +* Official [conda](https://anaconda.org/bioconda/truvari) support + +# Truvari 3.3 +*May 25, 2022* + +* New utilities `vcf_ranges` and `make_temp_filename` +* New annotations `dpcnt` and `lcr` +* Fixed a bug in `truvari collapse --keep` that prevented the `maxqual` or `common` options from working +* Increased determinism for `truvari collapse` so that in cases of tied variant position the longer allele is returned. If the alleles also have the same length, they are sorted alphabetically by the REF +* New `truvari bench --extend` functionality. See [discussion](https://github.com/ACEnglish/truvari/discussions/99) for details + +# Truvari 3.2 +*Apr 1, 2022* + +* Removed `truvari.copy_entry` for `pysam.VariantRecord.translate` a 10x faster operation +* Faster `truvari collapse` ([@c8b319b](https://github.com/ACEnglish/truvari/commit/c8b319b0e717a9e342f52e4a5e927f154eeb0e4a)) +* When building `MatchResult` between variants with shared start/end positions, we save processing work by skipping haplotype creation and just compare REFs/ALTs directly. +* Updated documentation to reference the paper https://doi.org/10.1101/2022.02.21.481353 +* New `truvari anno density` for identifying regions with 'sparse' and 'dense' overlapping SVs ([details](https://github.com/spiralgenetics/truvari/wiki/anno#truvari-anno-density)) +* Better `bench` genotype reporting with `summary.txt` having a `gt_matrix` of Base GT x Comp GT for all Base calls' best, TP match. +* New `truvari anno bpovl` for intersecting against tab-delimited files ([details](https://github.com/spiralgenetics/truvari/wiki/anno#truvari-anno-bpovl)) +* New `truvari divide` command to split VCFs into independent parts ([details](https://github.com/ACEnglish/truvari/wiki/divide)) +* Replaced `--buffer` parameter with `--minhaplen` for slightly better matching specificity +* Bugfix - `truvari anno trf` no longer duplicates entries spanning multple parallelization regions +* Bugfix - `collapse` MatchId/CollapseId annotation wasn't working +* Bugfixes - from [wwliao](https://github.com/wwliao) ([@4dd9968](https://github.com/ACEnglish/truvari/commit/4dd99683912236f433166889bb0b5667e9fa936d) [@ef2cfb3](https://github.com/ACEnglish/truvari/commit/ef2cfb366b60a5af4671d65d3ed987b08da72227)) +* Bugfixes - Issues [#107](https://github.com/ACEnglish/truvari/issues/107), [#108](https://github.com/ACEnglish/truvari/issues/108) + +# Truvari 3.1 +*Dec 22, 2021* + +* `bench` now annotates FPs by working a little differently. See [[bench|bench#methodology]] for details. +* Recalibrated TruScore and new reciprocal overlap measurement for sequence resolved `INS` ([details](https://github.com/spiralgenetics/truvari/discussions/92)) +* Match objects are now usable via the SDK. See [#94](https://github.com/spiralgenetics/truvari/discussions/94) for an example of using Truvari programmatically +* `file_zipper` VCF iteration strategy (`GenomeTree` -> `RegionVCFIterator`) that improves speed, particularly when using `--includebed` +* `collapse` refactored to use Match object and for prettier code, cleaner output. +* `anno remap` now optionally adds `INFO` field of the location of the top N hits. +* An experimental tool `truvari segment` added to help SV association analysis. +* `vcf2df` now supports pulling `FORMAT` fields from multiple samples. +* `vcf2df` now adds `('_ref', '_alt')`, or `('_ref', '_het', '_hom')` for `INFO,Number=[R|G]` fields, respectively. +* Improved documentation, including http://truvari.readthedocs.io/ for developers. +* Increasing/diversifying test coverage exposed minor bugs which were fixed. +* `bench --no-ref --cSample` bug fixes. +* Minor usability feature implemented in `help_unknown_cmd`. + +# Truvari 3.0 +*Sep 15, 2021* + +As Truvari's adoption and functionality grows, we decided to spend time working on sustainability and performance of the tool. Multiple [Actions](https://github.com/spiralgenetics/truvari/actions) for CI/CD have been added. Many components have been refactored for speed, and other 'cruft' code has been removed. Some of these changes (particularly the switch to using edlib for sequence similarity) affects the results. Therefore, we've bumped to a new major release version. + +* Working on speed improvements +* Added edlib as the default when calculating pctseq_sim, keeping Levenstein as an option (`--use-lev`). +* `truvari bench` summary's gt_precision/gt_recall are replaced by gt_concordance, which is just the percent of TP-comp calls with a concordant genotype. `--no-ref` has better functionality. `--giabreport` is different. +* Added `—keep common` to `truvari collapse`, which allows one to choose to keep the allele with the highest MAC. +* `truvari collapse --hap` wasn't working correctly. The assumptions about the calls being phased wasn't being +properly used (e.g. don't collapse 1|1) and the NumCollapsed was being populated before the single-best +match was chosen. The latter is a reporting problem, but the former had an effect on the results with +~3% of collapsed calls being mis-collapsed. +* `truvari anno trf` is now faster and simpler in its approach and whats reported.. and hopefully more useful. +* `truvari anno grm` has min_size and regions arguments added. +* truv2df has become `truvari vcf2df` where the default is vcf conversion with options to run on a `truvari bench` output directory. It also allows a specific sample to be parsed with `--format` and better Number=A handling. +* NeighId added to `truvari anno numneigh`, which works like bedtools cluster. +* The method af_calc now makes MAC/AC. +* Added 'partial' to `truvari anno remap`. +* Added `truvari anno svinfo`. +* Removed `truvari stats` as `truvari vcf2df` is better and began building [community-driven summaries](https://github.com/spiralgenetics/truvari/discussions/categories/vcf2df-recipes). +* Ubiquitous single version. +* Added a Dockerfile and instructions for making a Truvari docker container. +* Code and repository cleaning. +* Github actions for automated pylint, testing, and releases to pypi. +* Preserving per-version documentation from the wiki in `docs/`. + + +# Truvari 2.1 +*Jan 27, 2021* + +We've expanded and improved Truvari's [annotations](https://github.com/spiralgenetics/truvari/wiki/anno). We've added an [SV "collapsing" tool](https://github.com/spiralgenetics/truvari/wiki/collapse). And we've added a way to [turn VCFs into pandas DataFrames](https://github.com/spiralgenetics/truvari/wiki/truv2df) easily for downstream analysis/QC. + +# Truvari 2.0 +*May 14, 2020* + +After performing a drastic code refactor, we were able to create several helper methods from Truvari's core functionality around SV comparisons and VCF manipulations. This reusable code gave us an opportunity to create tools relevant for SV analysis. + +Truvari now contains multiple subcommands. In addition to the original benchmarking functionality (`truvari bench`), Truvari can generate SV relevant summary statistics, compute consistency of calls within VCFs, and we've begun to develop annotations for SVs. Details on these tools are on the [WIKI](https://github.com/spiralgenetics/truvari/wiki). + +We are committed to continually improving Truvari with the hopes of advancing the study and analysis of structural variation. + +# Truvari 1.3 +*September 25th, 2019* + +Truvari has some big changes. In order to keep up with the o deement of Python 2.7 https://pythonclock.org/ +We're now only supporting Python 3. + +Additionally, we now package Truvari so it and its dependencies can be installed directly. See Installation +below. This will enable us to refactor the code for easier maintenance and reusability. + +Finally, we now automatically report genotype comparisons in the summary stats. \ No newline at end of file diff --git a/docs/v4.2.0/anno.md b/docs/v4.2.0/anno.md new file mode 100644 index 00000000..55835902 --- /dev/null +++ b/docs/v4.2.0/anno.md @@ -0,0 +1,494 @@ + +Truvari annotations: +* [gcpct](anno#truvari-anno-gcpct) - GC Percent +* [gtcnt](anno#truvari-anno-gtcnt) - Genotype Counts +* [trf](anno#truvari-anno-trf) - Tandem Repeats +* [grm](anno#truvari-anno-grm) - Mappability +* [repmask](anno#truvari-anno-repmask) - Repeats +* [remap](anno#truvari-anno-remap) - Allele Remapping +* [hompct](anno#truvari-anno-hompct) - Homozygous Percent +* [numneigh](anno#truvari-anno-numneigh) - Number of Neighbors +* [svinfo](anno#truvari-anno-svinfo) - SVINFO Fields +* [bpovl](anno#truvari-anno-bpovl) - Annotation Intersection +* [density](anno#truvari-anno-density) - Call Density +* [dpcnt](anno#truvari-anno-dpcnt) - Depth (DP) and Alt-Depth (AD) Counts +* [lcr](anno#truvari-anno-lcr) - Low-complexity Regions +* [grpaf](anno#truvari-anno-grpaf) - Sample Group Allele-Frequency Annotations + +# truvari anno gcpct + +This will add an INFO tag `GCPCT` to each element in a VCF of the GC percent of the call's sequence. + +For deletions, this is the GC percent of the reference range of the call. For insertions, the ALT sequence is analyzed. +``` +usage: gcpct [-h] [-o OUTPUT] -r REFERENCE [input] + +Annotates GC content of SVs + +positional arguments: + input VCF to annotate (stdin) + +options: + -h, --help show this help message and exit + -o OUTPUT, --output OUTPUT + Output filename (stdout) + -r REFERENCE, --reference REFERENCE + Reference fasta +``` + +# truvari anno gtcnt +This will add an INFO tag `GTCNT` to each element in a VCF with the count of genotypes found across all samples. The value is a list of Counts of genotypes for the allele across all samples (UNK, REF, HET, HOM). This is most useful for pVCFs. + +``` +usage: gtcnt [-h] [-o OUTPUT] [input] + +Annotates GTCounts of alleles + +positional arguments: + input VCF to annotate (stdin) + +options: + -h, --help show this help message and exit + -o OUTPUT, --output OUTPUT + Output filename (stdout) +``` + +# truvari anno trf +Adds a tandem repeat annotation to sequence resolved Insertion/Deletion variants a VCF. + +### Annotations added +| Field Name | Description | +|------------|-------------------------------------------------------------| +| TRF | Entry hits a tandem repeat region | +| TRFdiff | ALT TR copy difference from reference | +| TRFrepeat | Repeat motif | +| TRFovl | Percent of ALT covered by TRF annotation | +| TRFstart | tart position of discovered repeat | +| TRFend | End position of discovered repeat | +| TRFperiod | eriod size of the repeat | +| TRFcopies | Number of copies aligned with the consensus pattern | +| TRFscore | Alignment score | +| TRFentropy | Entropy measure | +| TRFsim | Similarity of ALT sequence to generated motif faux sequence | + +### Details +Given a set of tandem repeat regions and a VCF, this annotate the tandem repeat motif and copy number change of insertions and deletions as expansions/contraction. The expected input catalog of tandem repeats is from a subset of columns in the Adotto TR catalog ([link](https://github.com/ACEnglish/adotto/blob/main/regions/DataDescription.md)). This file can be formatted for `truvari anno trf` via: +```bash +zcat adotto_TRregions_v1.1.bed.gz | cut -f1-3,18 | bgzip > anno.trf.bed.gz +tabix anno.trf.bed.gz +``` +For deletions, the tool simply finds the motif annotation with the highest overlap over the variant's boundaries. It then removes that sequence from the reference and calculates how many copies of the motif are removed with the formula `round(-(ovl_pct * svlen) / anno["period"], 1)`. If a deletion overlaps multiple motifs, the highest scoring motif is chosen based on higher reciprocal overlap percent first and TRF score second (see [code](https://github.com/ACEnglish/truvari/blob/2219f52850252c18dcd8c679da6644bb1cee5b68/truvari/annotations/trf.py#L29)]. + +For insertions, by default the tool first tries to estimate which motif is contained in the alternate sequence. For each overlapping annotation, the copy number difference of the motif is calculated via `copy_diff = len(entry.alts[0][1:]) / anno["period"]`. Next, a 'feaux sequence' is made from `copy_diff` number of the motif. If the sequence is above the `--motif-similarity` with the insertion sequence, that is considered the insertion's motif. If no estimate is above the `--motif-similarity`, the insertion is incorporated into the reference and TRF is run. If the discovered TRF hits match a motif in the tandem repeat regions file, that annotation is used. If the highest scoring TRF hit doesn't match the tandem repeats region file, the nominally de novo annotation is added to the insertion's vcf entry. + +``` +usage: trf [-h] -i INPUT [-o OUTPUT] [-e EXECUTABLE] [-T TRF_PARAMS] -r REPEATS -f REFERENCE [-s MOTIF_SIMILARITY] + [-m MIN_LENGTH] [-R] [--no-estimate] [-C CHUNK_SIZE] [-t THREADS] [--debug] + +Intersect vcf with reference tandem repeats and annotate +variants with the best fitting repeat motif and its copy number +relative to the reference + +options: + -h, --help show this help message and exit + -i INPUT, --input INPUT + VCF to annotate + -o OUTPUT, --output OUTPUT + Output filename (stdout) + -e EXECUTABLE, --executable EXECUTABLE + Path to tandem repeat finder (trf409.linux64) + -T TRF_PARAMS, --trf-params TRF_PARAMS + Default parameters to send to trf (3 7 7 80 5 40 500 -h -ngs) + -r REPEATS, --repeats REPEATS + Reference repeat annotations + -f REFERENCE, --reference REFERENCE + Reference fasta file + -s MOTIF_SIMILARITY, --motif-similarity MOTIF_SIMILARITY + Motif similarity threshold (0.9) + -m MIN_LENGTH, --min-length MIN_LENGTH + Minimum size of entry to annotate (50) + -R, --regions-only Only write variants within --repeats regions (False) + --no-estimate Skip INS estimation procedure and run everything through TRF. (False) + -C CHUNK_SIZE, --chunk-size CHUNK_SIZE + Size (in mbs) of reference chunks for parallelization (5) + -t THREADS, --threads THREADS + Number of threads to use (1) + --debug Verbose logging +``` + +# truvari anno grm + +For every SV, we create a kmer over the the upstream and downstream reference and alternate breakpoints. +We then remap that kmer to the reference genome and report alignment information. +This does not alter the VCF traditional annotations, but instead will create a pandas +DataFrame and save it to a joblib object. + +There are four queries made per-SV. For both reference (r), alternate (a) we create upstream (up) and downstream (dn) kmers. +So the columns are all prefixed with one of "rup_", "rdn_", "aup_", "adn_". + +In the alignment information per-query, there are three 'hit' counts: +- nhits : number of query hits +- dir_hits : direct strand hit count +- com_hits : compliment strand hit count + +The rest of the alignment information is reported by average (avg), maximum (max), and minimum (min) + +The suffixes are: +- q : mapping quality score of the hits +- ed : edit distance of the hits +- mat : number of matches +- mis : number of mismatches + +For example, "aup_avg_q", is the alternate's upstream breakend kmer's average mapping quality score. + +``` +usage: grm [-h] -i INPUT -r REFERENCE [-R REGIONS] [-o OUTPUT] [-k KMERSIZE] [-m MIN_SIZE] [-t THREADS] [--debug] + +Maps graph edge kmers with BWA to assess Graph Reference Mappability + +options: + -h, --help show this help message and exit + -i INPUT, --input INPUT + Input VCF + -r REFERENCE, --reference REFERENCE + BWA indexed reference + -R REGIONS, --regions REGIONS + Bed file of regions to parse (None) + -o OUTPUT, --output OUTPUT + Output dataframe (results.jl) + -k KMERSIZE, --kmersize KMERSIZE + Size of kmer to map (50) + -m MIN_SIZE, --min-size MIN_SIZE + Minimum size of variants to map (25) + -t THREADS, --threads THREADS + Number of threads (1) + --debug Verbose logging +``` + +# truvari anno repmask + +``` +usage: repmask [-h] -i INPUT [-o OUTPUT] [-e EXECUTABLE] [-m MIN_LENGTH] [-M MAX_LENGTH] [-t THRESHOLD] [-p PARAMS] [-T THREADS] + [--debug] + + Wrapper around RepeatMasker to annotate insertion sequences in a VCF + +options: + -h, --help show this help message and exit + -i INPUT, --input INPUT + VCF to annotate (None) + -o OUTPUT, --output OUTPUT + Output filename (/dev/stdout) + -e EXECUTABLE, --executable EXECUTABLE + Path to RepeatMasker (RepeatMasker) + -m MIN_LENGTH, --min-length MIN_LENGTH + Minimum size of entry to annotate (50) + -M MAX_LENGTH, --max-length MAX_LENGTH + Maximum size of entry to annotate (50000) + -t THRESHOLD, --threshold THRESHOLD + Threshold for pct of allele covered (0.8) + -p PARAMS, --params PARAMS + Default parameter string to send to RepeatMasker (-pa {threads} -qq -e hmmer -species human -lcambig + -nocut -div 50 -no_id -s {fasta}) + -T THREADS, --threads THREADS + Number of threads to use (1) + --debug Verbose logging +``` + +# truvari anno remap + +Taking the Allele’s sequence, remap it to the reference and annotate based on the closest alignment. + +![](https://github.com/spiralgenetics/truvari/blob/develop/imgs/remap_example.png) + +``` +usage: remap [-h] -r REFERENCE [-o OUTPUT] [-m MINLENGTH] [-t THRESHOLD] [-d DIST] [-H HITS] [--debug] [input] + +Remap VCF'S alleles sequence to the reference to annotate REMAP + +- novel : Allele has no hits in reference +- tandem : Allele's closest hit is within len(allele) bp of the SV's position +- interspersed : Allele's closest hit is not tandem +- partial : Allele only has partial hit(s) less than --threshold + +Which alleles and alignments to consider can be altered with: +- --minlength : minimum SV length to considred (50) +- --dist : For deletion SVs, do not consider alignments that hit within Nbp of the SV's position +(a.k.a. alignments back to the source sequence) (10) +- --threshold : Minimum percent of allele's sequence used by alignment to be considered (.8) + +positional arguments: + input Input VCF (/dev/stdin) + +options: + -h, --help show this help message and exit + -r REFERENCE, --reference REFERENCE + BWA indexed reference + -o OUTPUT, --output OUTPUT + Output VCF (/dev/stdout) + -m MINLENGTH, --minlength MINLENGTH + Smallest length of allele to remap (50) + -t THRESHOLD, --threshold THRESHOLD + Threshold for pct of allele covered to consider hit (0.8) + -d DIST, --dist DIST Minimum distance an alignment must be from a DEL's position to be considered (10)) + -H HITS, --hits HITS Report top hits as chr:start-end.pct (max 0) + --debug Verbose logging +``` +# truvari anno hompct + +``` +usage: hompct [-h] -i INPUT [-o OUTPUT] [-b BUFFER] [-m MINANNO] [-M MAXGT] [-c MINCOUNT] [--debug] + +Calcluate the the Hom / (Het + Hom) of variants in the region of SVs +Requires the VCF to contain SVs beside SNPs/Indels + +options: + -h, --help show this help message and exit + -i INPUT, --input INPUT + Compressed, indexed VCF to annotate + -o OUTPUT, --output OUTPUT + Output filename (stdout) + -b BUFFER, --buffer BUFFER + Number of base-pairs up/dn-stream to query (5000) + -m MINANNO, --minanno MINANNO + Minimum size of event to annotate (50) + -M MAXGT, --maxgt MAXGT + Largest event size to count for genotyping (1) + -c MINCOUNT, --mincount MINCOUNT + Minimum number of genotyping events to report HOMPCT (0) + --debug Verbose logging +``` + +# truvari anno numneigh + +``` +usage: numneigh [-h] [-o OUTPUT] [-r REFDIST] [-s SIZEMIN] [--passonly] [--debug] [input] + +For every call within size boundaries, +Add NumNeighbors info field of how many calls are within the distance +Add NeighId clustering field in the same chained neighborhood +For example, +:: + -- is a call, refdist is 2 + - - - - - - + nn: 1 2 1 0 1 1 + id: 0 0 0 1 2 2 + +positional arguments: + input VCF to annotate + +options: + -h, --help show this help message and exit + -o OUTPUT, --output OUTPUT + Output vcf (stdout) + -r REFDIST, --refdist REFDIST + Max reference location distance (1000) + -s SIZEMIN, --sizemin SIZEMIN + Minimum variant size to consider for annotation (50) + --passonly Only count calls with FILTER == PASS + --debug Verbose logging +``` + +# truvari anno svinfo + +Uses `truvari.entry_size` and `truvari.entry_variant_type` on entries >= `args.minsize` to add 'SVLEN' and ‘SVTYPE’ annotations to a VCF’s INFO. + +How SVLEN is determined: +- Starts by trying to use INFO/SVLEN +- If SVLEN is unavailable and ALT field is an SV (e.g. \, \, etc), use abs(vcf.start - vcf.end). The INFO/END tag needs to be available, especially for INS. +- Otherwise, return the size difference of the sequence resolved call using abs(len(vcf.REF) - len(str(vcf.ALT[0]))) + +How SVTYPE is determined: +- Starts by trying to use INFO/SVTYPE +- If SVTYPE is unavailable, infer if entry is a insertion or deletion by looking at the REF/ALT sequence size differences +- If REF/ALT sequences are not available, try to parse the \, \, etc from the ALT column. +- Otherwise, assume 'UNK' + +``` +usage: svinfo [-h] [-o OUTPUT] [-m MINSIZE] [input] + +Adds SVTYPE and SVLEN INFO fields + +positional arguments: + input VCF to annotate (stdin) + +options: + -h, --help show this help message and exit + -o OUTPUT, --output OUTPUT + Output filename (stdout) + -m MINSIZE, --minsize MINSIZE + Minimum size of entry to annotate (50) +``` + +# truvari anno bpovl + +After turning a tab-delimited annotation file into an IntervalTree, intersect the start/end and overlap of SVs. +The output is a light-weight pandas DataFrame saved with joblib. The columns in the output are: + +- vcf_key : Variant key from `truvari.entry_to_key` +- intersection : Type of intersection between the SV and the annotation + - start_bnd - SV's start breakpoints hits the annotation + - end_bnd - SV's end breakpoint hits the annotation + - overlaps - Annotation's start/end boundaries are completely within the SV + - contains - Annotation's start/end boundaries span the entire SV +- anno_key : Annotation file's line index + +The idea with this tool is to annotate variants against tab-delimited files, especially when there's a 1-to-N variant to annotations. This tool is useful when used in conjunction with `truvari vcf2df` and pandas DataFrames. + +For example, if we have a VCF of SVs and a GTF of genes/gene features from Ensmbl. Any SV may intersect multiple features, which doesn't lend itself well to directly annotating the VCF's INFO. After using `bpovl`, we'll use Truvari to convert the SVs to a DataFrame. + +```bash +truvari anno bpovl -i variants.vcf.gz -a genes.gtf.gz -o anno.jl -p gff +truvari vcf2df variants.vcf.gz variants.jl +``` + +We can then treat the files similar to a database and do queries and joins to report which variants intersect which annotations. + +```python +import joblib +from gtfparse import read_gtf +variants = joblib.load("variants.jl") +genes = read_gtf("genes.gtf.gz") +annos = joblib.load("anno.jl") +to_check = annos.iloc[0] + +print(to_check) +# vcf_key chr20:958486-958487.A +# intersection start_bnd +# anno_key 11 + +print(variants.loc[to_check['vcf_key']]) +# id None +# svtype INS +# ... etc + +print(annos.loc[to_check['anno_key']]) +# seqname chr20 +# source ensembl_havana +# feature exon +# start 958452 +# ... etc +``` + +Similar to tabix, `bpovl` has presets for known file types like bed and gff. But any tab-delimited file with sequence/chromosome, start position, and end position can be parsed. Just set the "Annotation File Arguments" to the 0-based column indexes. For example, a bed file +has arguments `-s 0 -b 1 -e 2 -c #`. + +``` +usage: bpovl [-h] -a ANNO -o OUTPUT [--sizemin SIZEMIN] [--spanmax SPANMAX] [-p {bed,gff}] [-c COMMENT] [-s SEQUENCE] [-b BEGIN] + [-e END] [-1] + [input] + +Creates intersection of features in an annotation file with SVs' breakpoints and overlap + +positional arguments: + input VCF to annotate (stdin) + +options: + -h, --help show this help message and exit + -a ANNO, --anno ANNO Tab-delimited annotation file + -o OUTPUT, --output OUTPUT + Output joblib DataFrame + --sizemin SIZEMIN Minimum size of variant to annotate (50) + --spanmax SPANMAX Maximum span of SVs to annotate (50000) + +Annotation File Arguments: + -p {bed,gff}, --preset {bed,gff} + Annotation format. This option overwrites -s, -b, -e, -c and -1 (None) + -c COMMENT, --comment COMMENT + Skip lines started with character. (#) + -s SEQUENCE, --sequence SEQUENCE + Column of sequence/chromosome name. (0) + -b BEGIN, --begin BEGIN + Column of start chromosomal position. (1) + -e END, --end END Column of end chromosomal position. (2) + -1, --one-based The position in the anno file is 1-based rather than 0-based. (False) +``` +# truvari anno density +Partitions a `--genome` into `--windowsize` regions and count how many variants overlap. Annotate +regions with no variants as 'sparse' and with greater than or equal to (mean + `--threshold` * standard +deviation) number of variants as 'dense'. Outputs a joblib DataFrame with columns +`chrom, start, end, count, anno`. + +``` +usage: density [-h] -g GENOME -o OUTPUT [-m MASK] [-w WINDOWSIZE] [-s STEPSIZE] [-t THRESHOLD] [input] + +Identify 'dense' and 'sparse' variant windows of the genome + +positional arguments: + input Input VCF (/dev/stdin) + +optional arguments: + -h, --help show this help message and exit + -g GENOME, --genome GENOME + Genome bed file + -o OUTPUT, --output OUTPUT + Output joblib DataFrame + -m MASK, --mask MASK Mask bed file + -w WINDOWSIZE, --windowsize WINDOWSIZE + Window size (10000) + -s STEPSIZE, --stepsize STEPSIZE + Window step size (10000) + -t THRESHOLD, --threshold THRESHOLD + std for identifying 'dense' regions (3) +``` + +# truvari anno dpcnt + +For multi-sample VCFs, it is often useful to have summarized depth (DP) information across samples per-variant. This adds a `INFO/DPCNT` with counts of how many samples have `FORMAT/DP` for each of the user-defined bins. Bins are incremented using `bisect` e.g. `pos = bisect.bisect(bins, dp); bins[pos] += 1; + +``` +usage: dpcnt [-h] [-b BINS] [--no-ad] [-p] [-o OUTPUT] [input] + +Quick utility to count how many samples have >= Nx coverage per-variant + +positional arguments: + input VCF to annotate (stdin) + +options: + -h, --help show this help message and exit + -b BINS, --bins BINS Coverage bins to bisect left the counts (0,5,10,15) + --no-ad Skip adding ADCNT bins + -p, --present Only count sites with present (non ./.) genotypes + -o OUTPUT, --output OUTPUT + Output filename (stdout) +``` + +# truvari anno lcr + +``` +usage: lcr [-h] [-o OUTPUT] [input] + +Annotate low complexity region entropy score for variants +Credit: https://jszym.com/blog/dna_protein_complexity/ + +positional arguments: + input VCF to annotate (stdin) + +options: + -h, --help show this help message and exit + -o OUTPUT, --output OUTPUT + Output filename (stdout) +``` + +# truvari anno grpaf + +Add INFO tags of allele frequency annotations for groups of samples. For every group in `--labels` tab-delimited file, calculate the AF,MAF,ExcHet,HWE,MAC,AC for the samples in the group. Adds INFO tags with suffix of the group identifier (e.g. `AF_EAS`). `--strict` will hard fail if there are samples in the `--labels` not present in the vcf header. + +``` +usage: grpaf [-h] [-o OUTPUT] -l LABELS [-t TAGS] [--strict] [--debug] [input] + +Add allele frequency annotations for subsets of samples + +positional arguments: + input VCF to annotate + +options: + -h, --help show this help message and exit + -o OUTPUT, --output OUTPUT + Output filename (stdout) + -l LABELS, --labels LABELS + Tab-delimited file of sample and group + -t TAGS, --tags TAGS Comma-separated list of tags to add from AF,MAF,ExcHet,HWE,MAC,AC,AN (all) + --strict Exit if sample listed in labels is not present in VCF (False) + --debug Verbose logging +``` \ No newline at end of file diff --git a/docs/v4.2.0/bench.md b/docs/v4.2.0/bench.md new file mode 100644 index 00000000..0d027cb6 --- /dev/null +++ b/docs/v4.2.0/bench.md @@ -0,0 +1,293 @@ + +Quick start +=========== +Run this command where base is your 'truth set' SVs and comp is the comparison set of SVs. +```bash +truvari bench -b base_calls.vcf -c comp_calls.vcf -o output_dir/ +``` + +Matching Parameters +=================== +Picking matching parameters can be more of an art than a science. It really depends on the precision of your callers and the tolerance you wish to allow them such that it is a fair comparison. + +For example, depth of coverage callers (such as CNVnator) will have very 'fuzzy' boundaries, and don't report the exact deleted sequence but only varying regions. So thresholds of `pctseq=0`, `pctsize=.5`, `pctovl=.5`, `refdist=1000` may seem fair. + +[BioGraph](https://github.com/spiralgenetics/biograph) and many long-read callers report precise breakpoints and full alternate allele sequences. When benchmarking those results, we want to ensure our accuracy by using the stricter default thresholds. + +If you're still having trouble picking thresholds, it may be beneficial to do a few runs of Truvari bench over different values. Start with the strict defaults and gradually increase the leniency. From there, you can look at the performance metrics and manually inspect differences between the runs to find out what level you find acceptable. Truvari is meant to be flexible for comparison. More importantly, Truvari helps one clearly report the thresholds used for reproducibility. + +Here is a rundown of each matching parameter. +| Parameter | Default | Definition | +|------------|---------|------------| +| refdist | 500 | Maximum distance comparison calls must be within from base call's start/end | +| pctseq | 0.7 | Edit distance ratio between the REF/ALT haplotype sequences of base and
comparison call. See "Comparing Sequences of Variants" below. | +| pctsize | 0.7 | Ratio of min(base_size, comp_size)/max(base_size, comp_size) | +| pctovl | 0.0 | Ratio of two calls' (overlapping bases)/(longest span) | +| typeignore | False | Types don't need to match to compare calls. | + +Below are matching parameter diagrams to illustrate (approximately) how they work. + +``` + █ = Deletion ^ = Insertion + +--refdist REFDIST (500) + Max reference location distance + + ACTGATCATGAT + |--████--| + █████ + + Calls are within reference distance of 2 + +--pctsize PCTSIZE (0.7) + Min pct allele size similarity + + ACTGATCATGA sizes + █████ -> 5bp + ████ -> 4bp + + variants have 0.8 size similarity + + +--pctovl PCTOVL (0.0) + Min pct reciprocal overlap + + ACTGATCATGA ranges + █████ [2,7) + ████ [4,8) + + variants have 0.6 reciprocial overlap + + +--pctseq PCTSEQ (0.7) + Min percent allele sequence similarity + + A-CTG-ACTG + ^ ^ haplotypes + | └ACTG -> CTGACTGA + └CTGA -> CTGACTGA + + haplotypes have 100% sequence similarity +``` + +Outputs +======= +Truvari bench writes the following files to the `--output` directory. +| File | Description | +|----------------------|---------------------------------------------| +| tp-base.vcf.gz | True positive calls form the base VCF | +| tp-comp.vcf.gz | True positive calls from the comparison VCF | +| fp.vcf.gz | False positive calls from comparison | +| fn.vcf.gz | False negative calls from base | +| summary.json | Json output of performance stats | +| params.json | Json output of parameters used | +| candidate.refine.bed | Bed file of regions for `refine` | +| log.txt | Run's log | + +summary.json +------------ +Stats generated by benchmarking are written to `summary.json`. + +| Metric | Definition | +|----------------|------------------------------------------------------------| +| TP-base | Number of matching calls from the base vcf | +| TP-comp | Number of matching calls from the comp vcf | +| FP | Number of non-matching calls from the comp vcf | +| FN | Number of non-matching calls from the base vcf | +| precision | TP-comp / (TP-comp + FP) | +| recall | TP-base / (TP-base + FN) | +| f1 | 2 * ((recall * precision) / (recall + precision)) | +| base cnt | Number of calls in the base vcf | +| comp cnt | Number of calls in the comp vcf | +| TP-comp_TP-gt | TP-comp with genotype match | +| TP-comp_FP-gt | TP-comp without genotype match | +| TP-base_TP-gt | TP-base with genotype match | +| TP-base_FP-gt | TP-base without genotype match | +| gt_concordance | TP-comp_TP-gt / (TP-comp_TP-gt + TP-comp_FP-gt) | +| gt_matrix | Base GT x Comp GT Matrix of all Base calls' best, TP match | +| weighted | Metrics weighed by variant sequence/size similarity | + +The `gt_matrix` is a table. For example: +```json +"gt_matrix": { + "(0, 1)": { + "(0, 1)": 500, + "(1, 1)": 10 + }, + "(1, 1)": { + "(1, 1)": 800, + "(0, 1)": 20 + } +} +``` +Represents -> +``` +comp (0,1) (1,1) +base +(0,1) 500 10 +(1,1) 20 800 +``` + +Added annotations +----------------- +The output vcfs are annotated with INFO fields and then sorted, compressed, and indexed inside of the output directory. + +| Anno | Definition | +|-------------------|-----------------------------------------------------------------------------------------------------------------| +| TruScore | Truvari score for similarity of match. `((pctseq + pctsize + pctovl) / 3 * 100)` | +| PctSeqSimilarity | Pct sequence similarity between this variant and its closest match | +| PctSizeSimilarity | Pct size similarity between this variant and its closest match | +| PctRecOverlap | Percent reciprocal overlap percent of the two calls | +| StartDistance | Distance of the base call's start from comparison call's start | +| EndDistance | Distance of the base call's end from comparison call's end | +| SizeDiff | Difference in size of base and comp calls | +| GTMatch | Base/comp calls' Genotypes match | +| MatchId | Id to help tie base/comp calls together {chunkid}.{baseid}.{compid} See [[MatchIds wiki\|MatchIds]] for details. | + + +Refining bench output +===================== +As described in the [[refine wiki|refine]], a limitation of Truvari bench is 1-to-1 variant comparison. However, `truvari refine` can harmonize the variants to give them more consistent representations. A bed file named `candidate.refine.bed` is created by `truvari bench` and holds a set of regions which may benefit from refinement. To use it, simply run +```bash +truvari bench -b base.vcf.gz -c comp.vcf.gz -o result/ +truvari refine --regions result/candidate.refine.bed \ + --reference reference.fasta \ + --recount --use-region-coords \ + result/ +``` +See [[refine wiki|refine]] for details. + +Comparing Sequences of Variants +=============================== + +Truvari has implemented two approaches to compare variant sequences. The default comparison is called 'unroll'. Optionally, a `--reference` can be provided and Truvari will use the reference context of a pair of variants for comparison. + +## Unroll +The method of giving a pair of calls the same reference context can be achieved using an 'unroll' method. For a formal description, see [this gist](https://gist.github.com/ACEnglish/1e7421c46ee10c71bee4c03982e5df6c). + +The main idea is that in order to move variants upstream/downstream, the reference sequence flanking the variant will need to be moved downstream/upstream respectively. Or, to say this another way, we can think of the alternate sequences as being circular instead of linear. This means that in order to move the variant e.g. 1bp downstream for an INS, we could remove the first base from the ALT and append it to the end. So in the 'ab' example used to describe "Reference context" below, we only need to unroll the insertion at a position by the distance between it and another variant e.g. the INS `ab` at POS 2 becomes identical to the INS `ba` at POS 1 by rolling `2-1 = 1` bases from the start to the end. + +This unroll method has a number of benefits and a couple of considerations, including: +* not needing a `--reference` for comparison, which saves I/O time +* increasing the number of correctly matching SVs +* decreasing the number of 'suspect' matches in smaller size regimes +* providing a simpler pattern between PctSizeSimilarity and PctSeqSimilarity + +## Reference context +For the reference context method, consider a hypothetical tandem repeat expansion of the reference sequence 'AB'. Here, we'll represent the 'insertion' sequence as lower-case 'ab', though it should be considered equivalent to 'AB'. Three equally valid descriptions of this +variant would be: + +```text +#POS INS Haplotype + 0 ab abAB + 1 ba AbaB + 2 ab ABab +``` + +Therefore, to compare the sequence similarity, Truvari builds the haplotypes over the range of a pair of calls' +`min(starts):max(ends)` before making the the sequence change introduced by the variants. In python, this line +looks like: + +``` python +hap1_seq = ref.get_seq(a1_chrom, start + 1, a1_start).seq + a1_seq + ref.get_seq(a1_chrom, a1_end + 1, end).seq +``` + +Where `a1_seq1` is the longer of the REF or ALT allele. + +## SVs without sequences + +If the base or comp vcfs do not have sequence resolved calls (e.g. ``, simply set `--pctseq=0` to turn off +sequence comparison. The `--reference` does not need to be provided when not using sequence comparison. If +`--pctseq != 0` and an unresolved SV is encountered, a warning will be raised and the variant will not be compared. + +Controlling the number of matches +================================= + +How many matches a variant is allowed to participate in is controlled by the `--pick` parameter. The available pickers are `single`, `ac`, and `multi`. + +* `single` (the default option) allows each variant to participate in up to one match. +* `ac` uses the genotype allele count to control how many matches a variant can have. This means a homozygous alternate variant can participate in two matches (its GT is 1/1 so AC=2). A heterozygous variant can only participate in one match (GT 0/1, AC=1). And, a homozygous reference variant cannot be matched. Note that missing genotypes are considered reference alleles and do not add to the AC e.g. (GT ./1, AC=1). +* `multi` variants can participate in all matches available. + +As an example, imagine we have three variants in a pVCF with two samples we want to compare. + +``` +CHROM POS ID REF ALT base comp +chr20 17785968 ID1 A ACGCGCGCGCG 1/1 1/0 +chr20 17785968 ID2 A ACGCGCGCGCGCG 0/0 0/1 +chr20 17785969 ID3 C CGCGCGCGCGCGC 0/0 1/1 +``` + +To compare samples inside the same vcf, we would use the command: +```bash +truvari bench -b input.vcf.gz -c input.vcf.gz -o output/ --bSample base --cSample comp --no-ref a +``` + +This VCF makes different results depending on the `--pick` parameter + +| Parameter | ID1 State | ID2 State | ID3 State | +|-----------|-----------|-----------|-----------| +| single | TP | FP | FP | +| ac | TP | TP | FP | +| multi | TP | TP | TP | + +--dup-to-ins +============ + +Most SV benchmarks only report DEL and INS SVTYPEs. The flag `--dup-to-ins` will interpret SVs with SVTYPE == DUP to SVTYPE == INS. Note that DUPs generally aren't sequence resolved (i.e. the ALT isn't a sequence) like INS. Therefore, `--dup-to-ins` typically should be used without sequence comparison via `--pctseq 0` + +--sizemin and --sizefilt +======================== + +`--sizemin` is the minimum size of a base call to be considered. + +`--sizefilt` is the minimum size of a comparison call that will be matched to base calls. It can +be less than `sizemin` for edge case variants. + +For example: Imagine `sizemin` is set at 50 and `sizefilt` at 30, and a 50bp base call is 98% similar to a 49bp comparison +call at the same position. + +These two calls could be considered matching. However, if we removed comparison calls less than `sizemin`, +we'd incorrectly classify the 50bp base call as a false negative. Instead, we allow comparison calls between `[sizefilt,sizemin)` to find matches. + +This has the side effect of artificially inflating specificity. For example, if that same 49bp call described +above were below the similarity threshold, it would not be classified as a FP since it is below the `sizemin` +threshold. So we're giving the call a better chance to be useful and less chance to be detrimental +to final statistics. + +Include Bed & VCF Header Contigs +================================ + +If an `--includebed` is provided, only base and comp calls contained within the defined regions are used +for comparison. This is similar to pre-filtering your base/comp calls using: + +```bash +(zgrep "#" my_calls.vcf.gz && bedtools intersect -u -a my_calls.vcf.gz -b include.bed) | bgzip > filtered.vcf.gz +``` + +with the exception that Truvari requires the start and the end to be contained in the same includebed region +whereas `bedtools intersect` does not. + +If an `--includebed` is not provided, the comparison is restricted to only the contigs present in the base VCF +header. Therefore, any comparison calls on contigs not in the base calls will not be counted toward summary +statistics and will not be present in any output vcfs. + +Extending an Include Bed +------------------------ +The option `--extend` extends the regions of interest (set in `--includebed` argument) by the given number of bases on each side, allowing base variants to match comparison variants that are just outside of the original region. If a comparison variant is in the extended regions it can potentially match a base variant that is in the original regions turning it to TP. Comparison variants in the extended regions that don't have a match are not counted as FP. This strategy is similar to the one implemented for size matching where only the base variants longer than sizemin (equal to 50 by default) are considered, but they are allowed to match shorter comparison variants sizefilt (30bp by default) or longer. + +See this [discussion](https://github.com/ACEnglish/truvari/discussions/99)for details. + +Methodology +=========== +Here is a high-level pseudocode description of the steps Truvari bench conducts to compare the two VCFs. +``` +* zip the Base and Comp calls together in sorted order +* create chunks of all calls overlapping within ±`--chunksize` basepairs +* make a |BaseCall| x |CompCall| match matrix for each chunk +* build a Match for each call pair in the chunk - annotate as TP if >= all thresholds +* if the chunk has no Base or Comp calls +** return them all as FNs/FPs +* use `--pick` method to sort and annotate variants with their best match +``` +![](https://github.com/acenglish/truvari/blob/develop/imgs/TruvariBenchMethod.png) \ No newline at end of file diff --git a/docs/v4.2.0/collapse.md b/docs/v4.2.0/collapse.md new file mode 100644 index 00000000..f660f340 --- /dev/null +++ b/docs/v4.2.0/collapse.md @@ -0,0 +1,163 @@ +`collapse` is Truvari's approach to SV merging. After leveraging `bcftools` to merge VCFs, `truvari collapse` can then iterate over the calls and create clusters of SVs that match over the [provided thresholds](https://github.com/spiralgenetics/truvari/wiki/bench#matching-parameters). This is also useful when removing potentially redundant calls within a single sample. + +Example +======= +To start, we merge multiple VCFs (each with their own sample) and ensure there are no multi-allelic entries via: +```bash +bcftools merge -m none one.vcf.gz two.vcf.gz | bgzip > merge.vcf.gz +``` + +This will `paste` SAMPLE information between vcfs when calls have the exact same chrom, pos, ref, and alt. +For example, consider two vcfs: + + >> one.vcf: + chr1 1 ... GT 0/1 + chr1 5 ... GT 1/1 + >> two.vcf: + chr1 1 ... GT 1/1 + chr1 7 ... GT 0/1 + +`bcftools merge` creates: + + >> merge.vcf: + chr1 1 ... GT 0/1 1/1 + chr1 5 ... GT 1/1 ./. + chr1 7 ... GT ./. 0/1 + +This VCF can then be collapsed to allow 'fuzzier' matching than the exact merge just performed. + +```bash +truvari collapse -i merge.vcf.gz -o truvari_merge.vcf -c truvari_collapsed.vcf -f /path/to/reference.fa +``` + +For example, if we collapsed our example merge.vcf by matching any calls within 3bp, we'd create: + + >> truvari_merge.vcf + chr1 1 ... GT 0/1 1/1 + chr1 5 ... GT 1/1 0/1 + >> truvari_collapsed.vcf + chr1 7 ... GT ./. 0/1 + +--choose behavior +================= +When collapsing, the default `--choose` behavior is to take the first variant from a cluster to +be written to the output while the others will be placed in the collapsed output. +Other choosing options are `maxqual` (the call with the highest quality score) or `common` (the call with the highest minor allele count). + +Samples with no genotype information in the kept variant will be filled by the first +collapsed variant containing genotype information. + +--gt +==== +For some results, one may not want to collapse variants with conflicting genotypes from a single sample. With the `--gt all` parameter, variants which are present (non `0/0` or `./.`) in the same sample are not collapsed. With the `-gt het` parameter, only variants which are both heterozygous in a sample (e.g. `0/1` and `0/1`) are prevented from collapsing. The `--gt het` is useful for some SV callers which will redundantly call variants and typically genotype them all as `1/1`. + +--intra +======= +When a single sample is run through multiple SV callers, one may wish to consolidate those results. After the `bcftools merge` of the VCFs, there will be one SAMPLE column per-input. With `--intra`, collapse will consolidate the sample information so that only a single sample column is present in the output. Since the multiple callers may have different genotypes or other FORMAT fields with conflicting information, `--intra` takes the first column from the VCF, then second, etc. For example, if we have an entry with: +``` +FORMAT RESULT1 RESULT2 +GT:GQ:AD ./.:.:3,0 1/1:20:0,30 +``` +The `--intra` output would be: +``` +FORMAT RESULT1 +GT:GQ:AD 1/1:20:3,0 +``` +As you can see in this example, 1) The first sample name is the only one preserved. 2) conflicting FORMAT fields can be consolidated in a non-useful way (here the AD of `3,0` isn't informative to a `1/1` genotype). We're working to provide an API to help users write custom intra-sample consolidation scripts. + +--hap mode +========== +When using `--hap`, we assume phased variants from a single individual. Only the +single best matching call from the other haplotype will be collapsed, +and the consolidated genotype will become 1/1 + +For example, if we collapse anything at the same position: + + chr1 1 .. GT 0|1 + chr1 1 .. GT 1|0 + chr1 2 .. GT 1|0 + +will become: + + chr1 1 .. GT 1/1 + chr1 2 .. GT 1|0 + +--chain mode +============ +Normally, every variant in a set of variants that are collapsed together matches every other variant in the set. However, when using `--chain` mode, we allow 'transitive matching'. This means that all variants match to only at least one other variant in the set. In situations where a 'middle' variant has two matches that don't match each other, without `--chain` the locus will produce two variants whereas using `--chain` will produce one. +For example, if we have + + chr1 5 .. + chr1 7 .. + chr1 9 .. + +When we collapse anything within 2bp of each other, without `--chain`, we output: + + chr1 5 .. + chr1 9 .. + +With `--chain`, we would collapse `chr1 9` as well, producing + + chr1 5 .. + +Annotations +=========== +`collapse` produces two files. The output file has kept variants along with unanalyzed (< sizemin) variants. The collapsed file contains the variants that were collapsed into the kept variants. + +The output file has only two annotations added to the `INFO`. +- `CollapseId` - Identifier of the variant when comparing to the collapse outputs. +- `NumCollapsed` - Number of variants collapsed into this variant +- `NumConsolidated` - Number of samples' genotypes consolidated into this call's genotypes + +The collapsed file has all of the annotations added by [[bench|bench#definition-of-annotations-added-to-tp-vcfs]]. Note that `MatchId` is tied to the output file's `CollapseId`. See [MatchIds](https://github.com/spiralgenetics/truvari/wiki/MatchIds) for details. + +``` +usage: collapse [-h] -i INPUT [-o OUTPUT] [-c COLLAPSED_OUTPUT] [-f REFERENCE] [-k {first,maxqual,common}] [--debug] + [-r REFDIST] [-p PCTSIM] [-B MINHAPLEN] [-P PCTSIZE] [-O PCTOVL] [-t] [--use-lev] [--hap] [--chain] + [--no-consolidate] [--null-consolidate NULL_CONSOLIDATE] [-s SIZEMIN] [-S SIZEMAX] [--passonly] + +Structural variant collapser + +Will collapse all variants within sizemin/max that match over thresholds + +options: + -h, --help show this help message and exit + -i INPUT, --input INPUT + Comparison set of calls + -o OUTPUT, --output OUTPUT + Output vcf (stdout) + -c COLLAPSED_OUTPUT, --collapsed-output COLLAPSED_OUTPUT + Where collapsed variants are written (collapsed.vcf) + -f REFERENCE, --reference REFERENCE + Indexed fasta used to call variants + -k {first,maxqual,common}, --keep {first,maxqual,common} + When collapsing calls, which one to keep (first) + --debug Verbose logging + --hap Collapsing a single individual's haplotype resolved calls (False) + --chain Chain comparisons to extend possible collapsing (False) + --no-consolidate Skip consolidation of sample genotype fields (True) + --null-consolidate NULL_CONSOLIDATE + Comma separated list of FORMAT fields to consolidate into the kept entry by taking the first non-null + from all neighbors (None) + +Comparison Threshold Arguments: + -r REFDIST, --refdist REFDIST + Max reference location distance (500) + -p PCTSIM, --pctsim PCTSIM + Min percent allele sequence similarity. Set to 0 to ignore. (0.95) + -B MINHAPLEN, --minhaplen MINHAPLEN + Minimum haplotype sequence length to create (50) + -P PCTSIZE, --pctsize PCTSIZE + Min pct allele size similarity (minvarsize/maxvarsize) (0.95) + -O PCTOVL, --pctovl PCTOVL + Min pct reciprocal overlap (0.0) for DEL events + -t, --typeignore Variant types don't need to match to compare (False) + --use-lev Use the Levenshtein distance ratio instead of edlib editDistance ratio (False) + +Filtering Arguments: + -s SIZEMIN, --sizemin SIZEMIN + Minimum variant size to consider for comparison (50) + -S SIZEMAX, --sizemax SIZEMAX + Maximum variant size to consider for comparison (50000) + --passonly Only consider calls with FILTER == PASS +``` \ No newline at end of file diff --git a/docs/v4.2.0/consistency.md b/docs/v4.2.0/consistency.md new file mode 100644 index 00000000..41fd599c --- /dev/null +++ b/docs/v4.2.0/consistency.md @@ -0,0 +1,170 @@ + +In addition to looking at performance of a single set of variation against a baseline, one may wish to measure the consistency between multiple sets of variation. The tool `truvari consistency` can automatically create that result. + +Running +======= + +``` +usage: consistency [-h] [-j] VCFs [VCFs ...] + +Over multiple vcfs, calculate their intersection/consistency. + +Calls will match between VCFs if they have a matching key of: + CHROM:POS ID REF ALT + +positional arguments: + VCFs VCFs to intersect + +optional arguments: + -h, --help show this help message and exit + -j, --json Output report in json format +``` +Example +======= + +```bash +truvari consistency fileA.vcf fileB.vcf fileC.vcf +``` + +Matching Entries +================ + +VCF entries will be considered matching if and only if they have an exact same key of `CHROM:POS ID REF ALT`. Because of this stringency, it is recommend that you compare the tp-base.vcf or fn.vcf results from each individual VCF's Truvari output. + +Output Report +============= + +Below is an example report: + +```text +# +# Total 5534 calls across 3 VCFs +# +#File NumCalls +fileA.vcf 4706 +fileB.vcf 4827 +fileC.vcf 4882 +# +# Summary of consistency +# +#VCFs Calls Pct +3 3973 71.79% +2 935 16.90% +1 626 11.31% +# +# Breakdown of VCFs' consistency +# +#Group Total TotalPct PctOfFileCalls +111 3973 71.79% 84.42% 82.31% 81.38% +011 351 6.34% 7.27% 7.19% +101 308 5.57% 6.54% 6.31% +110 276 4.99% 5.86% 5.72% +001 250 4.52% 5.12% +010 227 4.10% 4.70% +100 149 2.69% 3.17% +``` + +At the top we see that we compared 5,534 unique variants between the 3 files, with fileC.vcf having the most calls at 4,882. + +The "Summary of consistency" shows us that 3,973 (71.79%) of all the calls are shared between the 3 VCFs, while 626 (11.31%) are only found in one of the VCFs. + +Reading the "Breakdown of VCFs' consistency", a `Group` is a unique key for presence (1) or absence (0) of a call within each of the listed `#Files`. For example: `Group 111` is calls present in all VCFs; `Group 011` is calls present in only the 2nd and 3rd VCFs (i.e. fileB.vcf and fileC.vcf). + +We see that `Group 101` has calls belonging to the 1st and 3rd `#Files` (i.e. fileA.vcf and fileC.vcf). This group has a total of 308 calls that intersect, or 5.57% of all calls in all VCFs. This 308 represents 6.54% of calls in fileA.vcf and 6.31% of calls in fileC.vcf. + +Finally, we see that fileA.vcf has the least amount of calls unique to it on the `Group 100` line. + +Json +==== +Below is a consistency report in json format. +```json +{ + "vcfs": [ + "repo_utils/test_files/variants/input1.vcf.gz", + "repo_utils/test_files/variants/input2.vcf.gz", + "repo_utils/test_files/variants/input3.vcf.gz" + ], + "total_calls": 3513, + "num_vcfs": 3, + "vcf_counts": { + "repo_utils/test_files/variants/input1.vcf.gz": 2151, + "repo_utils/test_files/variants/input2.vcf.gz": 1783, + "repo_utils/test_files/variants/input3.vcf.gz": 2065 + }, + "shared": [ + { + "vcf_count": 3, + "num_calls": 701, + "call_pct": 0.1995445488186735 + }, + { + "vcf_count": 2, + "num_calls": 1084, + "call_pct": 0.3085681753487048 + }, + { + "vcf_count": 1, + "num_calls": 1728, + "call_pct": 0.4918872758326217 + } + ], + "detailed": [ + { + "group": "111", + "total": 701, + "total_pct": 0.1995445488186735, + "repo_utils/test_files/variants/input1.vcf.gz": 0.32589493258949326, + "repo_utils/test_files/variants/input2.vcf.gz": 0.393157599551318, + "repo_utils/test_files/variants/input3.vcf.gz": 0.3394673123486683 + }, + { + "group": "001", + "total": 645, + "total_pct": 0.18360375747224594, + "repo_utils/test_files/variants/input1.vcf.gz": 0, + "repo_utils/test_files/variants/input2.vcf.gz": 0, + "repo_utils/test_files/variants/input3.vcf.gz": 0.31234866828087166 + }, + { + "group": "100", + "total": 598, + "total_pct": 0.17022487902077996, + "repo_utils/test_files/variants/input1.vcf.gz": 0.2780102278010228, + "repo_utils/test_files/variants/input2.vcf.gz": 0, + "repo_utils/test_files/variants/input3.vcf.gz": 0 + }, + { + "group": "101", + "total": 487, + "total_pct": 0.1386279533162539, + "repo_utils/test_files/variants/input1.vcf.gz": 0.22640632264063226, + "repo_utils/test_files/variants/input2.vcf.gz": 0, + "repo_utils/test_files/variants/input3.vcf.gz": 0.2358353510895884 + }, + { + "group": "010", + "total": 485, + "total_pct": 0.13805863933959578, + "repo_utils/test_files/variants/input1.vcf.gz": 0, + "repo_utils/test_files/variants/input2.vcf.gz": 0.27201346045989905, + "repo_utils/test_files/variants/input3.vcf.gz": 0 + }, + { + "group": "110", + "total": 365, + "total_pct": 0.10389980074010817, + "repo_utils/test_files/variants/input1.vcf.gz": 0.1696885169688517, + "repo_utils/test_files/variants/input2.vcf.gz": 0.2047111609646663, + "repo_utils/test_files/variants/input3.vcf.gz": 0 + }, + { + "group": "011", + "total": 232, + "total_pct": 0.06604042129234272, + "repo_utils/test_files/variants/input1.vcf.gz": 0, + "repo_utils/test_files/variants/input2.vcf.gz": 0.13011777902411667, + "repo_utils/test_files/variants/input3.vcf.gz": 0.11234866828087167 + } + ] +} +``` \ No newline at end of file diff --git a/docs/v4.2.0/divide.md b/docs/v4.2.0/divide.md new file mode 100644 index 00000000..3bbad3cb --- /dev/null +++ b/docs/v4.2.0/divide.md @@ -0,0 +1,58 @@ +Divide a VCF into independent shards. + +Unfortunately, `pysam.VariantRecord` objects aren't pickle-able. This means that if we wanted to have Truvari leverage python's multiprocessing we'd need to make a custom VCF parser. However, the command `truvari divide` allows us to take an input VCF and divide it into multiple independent parts (or shards) which can the be processed over multiple processes. + +`truvari divide` works by parsing a VCF and splitting it into multiple, smaller sub-VCFs. If any variants are within `--buffer` base-pairs, they're output to the same sub-VCF. This allows variants in the same region which would need to be compared to one-another (see `--refdist`) to stay in the same sub-VCF. The `--min` parameter allows us to control the minimum number of variants per sub-VCF so that we don't make too many tiny VCFs. Once the sub-VCFs are created, we can process each independently through whatever truvari command. + +For example, let's say we want to run `truvari collapse` on a very large VCF with many variants and many samples. First, we divide the VCF: + +```bash +truvari divide big_input.vcf.gz sub_vcfs_directory/ +``` + +Inside of `sub_vcfs_directory/` we'll have multiple VCFs, which we can process with a simple bash script + +```bash +NJOBS=$(nproc) # use all processors by default +mkdir -p output_vcfs/ +mkdir -p collap_vcfs/ +mkdir -p logs/ + +for in_vcf in sub_vcfs_directory/*.vcf.gz +do + # Setup file names + base_name=$(basename $in_vcf) + base_name=${base_name%.vcf.gz} + output_name=output_vcfs/${base_name}.vcf + collap_name=collap_vcfs/${base_name}.vcf + log_name=logs/${base_name}.log + # Run the command and send it to the background + truvari collapse -i $in_vcf -o $output_name -c $collap_name -f reference.fa &> logs/${log_name}.log & + # If too many jobs are running, wait for one to finish + while [ $( jobs | wc -l ) -ge ${NJOBS} ] + do + sleep 5 + done +done +``` + +Obviously the logs and `while` loop are tricks for running on a single machine. If you have access to a cluster, I'm sure you can imagine how to create/submit the commands. + +``` +usage: divide [-h] [-b BUFFER] [-m MIN] [--no-compress] [-T THREADS] VCF DIR + +Divide a VCF into independent parts + +positional arguments: + VCF VCF to split + DIR Output directory to save parts + +options: + -h, --help show this help message and exit + -b BUFFER, --buffer BUFFER + Buffer to make mini-clusters (1000) + -m MIN, --min MIN Minimum number of entries per-vcf (100) + --no-compress Don't attempt to compress/index sub-VCFs + -T THREADS, --threads THREADS + Number of threads for compressing/indexing sub-VCFs (1) +``` \ No newline at end of file diff --git a/docs/v4.2.0/phab.md b/docs/v4.2.0/phab.md new file mode 100644 index 00000000..bdcaec9e --- /dev/null +++ b/docs/v4.2.0/phab.md @@ -0,0 +1,157 @@ +Introduction +------------ + +Truvari's comparison engine can match variants using a wide range of thresholds. However, some alleles can produce radically different variant representations. We could dramatically lower our thresholds to identify the match, but this would cause variants from unidentical alleles to be falsely matched. + +This problem is easiest to conceptualize in the case of 'split' variants: imagine a pipeline calls a single 100bp DEL that can also be represented as two 50bp DELs. To match these variants, we would need to loosen our thresholds to `--pick multi --pctsim 0.50 --pctsize 0.50`. Plus, these thresholds leave no margin for error. If the variant caller erroneously deleted an extra base to make a 101bp DEL we would have to lower our thresholds even further. These thresholds are already too low because there's plenty of distinct alleles with >= 50% homology. + +So how do we deal with inconsistent representations? In an ideal world, we would simply get rid of them by harmonizing the variants. This is the aim of `truvari phab` + +`truvari phab` is designed to remove variant representation inconsistencies through harmonization. By reconstructing haplotypes from variants, running multiple-sequence alignment of the haplotypes along with the reference, and then recalling variants, we expect to remove discordance between variant representations and simplify the work required to perform variant comparison. + +Requirements +------------ +Since `truvari phab` uses mafft v7.505 via a command-line call, it expects it to be in the environment path. Download mafft and have its executable available in the `$PATH` [mafft](https://mafft.cbrc.jp/alignment/software/) + +Alternatively, you can use the Truvari [Docker container](Development#docker) which already has mafft ready for use. + +Also, you can use wave front aligner (pyWFA) or partial order alignment (pyabpoa). While wfa is the fastest approach, it will independently align haplotypes and therefore may produce less parsimonous aligments. And while poa is more accurate than wfa and faster than mafft, it is less accurate than mafft. + +Example +------- +As an example, we'll use Truvari's test files in `repo_utils/test_files/phab*` which were created from real data over a tandem repeat at GRCh38 chr1:26399065-26401053 and translated to a small test genome with coordinates chr1:1-1988. + +* `phab_base.vcf.gz` - an 86 sample squared-off pVCF +* `phab_comp.vcf.gz` - a single sample's VCF +* `phab_ref.fa` - a subset of the GRCh38 reference + +This dataset is interesting because the `HG002` sample in `phab_base.vcf.gz` uses the same sequencing experiment ([HPRC](https://github.com/human-pangenomics/HPP_Year1_Assemblies)) as the sample `syndip` in `phab_comp.vcf.gz`, but processed with a different pipeline. And as we will see, the pipeline can make all the difference. + +To start, let's use `truvari bench` to see how similar the variant calls are in this region. +```bash +truvari bench --base phab_base.vcf.gz \ + --comp phab_comp.vcf.gz \ + --sizemin 1 --sizefilt 1 \ + --bSample HG002 \ + --cSample syndip \ + --no-ref a \ + --output initial_bench +``` +This will compare all variants greater than 1bp ( `-S 1 -s 1` which includes SNPs) from the `HG002` sample to the `syndip` sample. We're also excluding any uncalled or reference homozygous sites with `--no-ref a`. The report in `initial_bench/summary.txt` shows: +```json +{ + "TP-base": 5, + "TP-comp": 5, + "FP": 2, + "FN": 22, + "precision": 0.7142857142857143, + "recall": 0.18518518518518517, + "f1": 0.2941176470588235, +} +``` + +These variants are pretty poorly matched, especially considering the `HG002` and `syndip` samples are using the same sequencing experiment. We can also inspect the `initial_bench/fn.vcf.gz` and see a lot of these discordant calls are concentrated in a 200bp window. Let's use `truvari phab` to harmonize the variants in this region. +```bash +truvari phab --base phab_base.vcf.gz \ + --comp phab_comp.vcf.gz \ + --bSample HG002 \ + --cSample syndip \ + --reference phab_ref.fa \ + --region chr1:700-900 \ + -o harmonized.vcf.gz +``` + +In our `harmonized.vcf.gz` we can see there are now only 9 variants. Let's run `truvari bench` again on the output to see how well the variants match after being harmonized. + +```bash +truvari bench -b harmonized.vcf.gz \ + -c harmonized.vcf.gz \ + -S 1 -s 1 \ + --no-ref a \ + --bSample HG002 \ + --cSample syndip \ + -o harmonized_bench/ +``` +Looking at `harmonized_bench/summary.txt` shows: +```json +{ + "TP-base": 8, + "TP-comp": 8, + "FP": 0, + "FN": 0, + "precision": 1.0, + "recall": 1.0, + "f1": 1.0, +} +``` +Now there is no difference between our two sets of variants in this region. + +For this variant call-set, `truvri phab` makes `truvari bench` overkill since the variants create identical haplotypes. In fact, we can benchmark simply by counting the genotypes. +```bash +$ bcftools query -f "[%GT ]\n" phab_result/output.vcf.gz | sort | uniq -c + 1 0/1 1/0 + 1 1/0 0/1 + 6 1/1 1/1 +``` +(We can ignore the phasing differences (`0/1` vs. `1/0`). These pipelines reported the parental alleles in a different order) + +MSA +--- + +If you read the `truvari phab --help` , you may have noticed that the `--comp` VCF is optional. This is by design so that we can also harmonize the variants inside a single VCF. By performing a multiple-sequence alignment across samples, we can better represent variation across a population. To see this in action, let's run `phab` on all 86 samples in the `repo_utils/test_files/phab_base.vcf.gz` +```bash +truvari phab -b phab_base.vcf.gz \ + -f phab_ref.fa \ + -r chr1:700-900 \ + -o msa_example.vcf.gz +``` + +As a simple check, we can count the number of variants before/after `phab`: +```bash +bcftools view -r chr1:700-900 phab_base.vcf.gz | grep -vc "#" +bcftools view -r chr1:700-900 msa_example.vcf.gz | grep -vc "#" +``` +The `160` original variants given to `phab` became just `60`. + +Better yet, these fewer variants occur on fewer positions: +```bash + +bcftools query -r chr1:700-900 -f "%POS\n" phab_base.vcf.gz | sort | uniq | wc -l +bcftools query -r chr1:700-900 -f "%POS\n" msa_example.vcf.gz | sort | uniq | wc -l +``` +This returns that the variants were over `98` positions but now sit at just `16` + +We can also observe changes in the allele frequency after running `phab`: +```bash +bcftools +fill-tags -r chr1:700-900 phab_base.vcf.gz | bcftools query -f "%AC\n" | sort -n | uniq -c +bcftools +fill-tags -r chr1:700-900 msa_example.vcf.gz | bcftools query -f "%AC\n" | sort -n | uniq -c +``` +The allele-count (AC) shows a 15% reduction in singletons and removal of all variants with an AF > 0.50 which would have suggested the reference holds a minor allele. +```txt + original phab + # AC # AC + 39 1 33 1 + 18 2 4 2 + 3 3 2 3 + 3 4 2 4 + 2 5 1 5 + ... + 3 69 1 35 + 1 89 1 40 + 8 109 1 53 + 1 132 1 56 + 1 150 1 81 +``` + +(TODO: pull the adotto TR region annotations and run this example through `truvari anno trf`. I bet we'll get a nice spectrum of copy-diff of the same motif in the `phab` calls.) + +`--align` +========= +By default, `phab` will make the haplotypes and use an external call `mafft` to perform a multiple sequence alignment between them and the reference to harmonize the variants. While this is the most accurate alignment technique, it isn't fast. If you're willing to sacrifice some accuracy for a huge speed increase, you can use `--align wfa`, which also doesn't require an external tool. Another option is `--align poa` which performs a partial order alignment which is faster than mafft but less accurate and slower than wfa but more accurate. However, `poa` appears to be non-deterministic which is not ideal for some benchmarking purposes. + +Limitations +----------- +* Creating and aligning haplotypes is impractical for very long sequences and maybe practically impossible for entire human chromosomes. Therefore, `truvari phab` is recommended to only be run on sub-regions. +* By giving the variants new representations, variant counts will likely change. +* Early testing on `phab` is on phased variants. While it can run on unphased variants, we can't yet recommend it. If regions contain unphased Hets or overlapping variants, it becomes more difficult to build a consensus sequence. So you can try out unphased variants, but proceed with caution. + diff --git a/docs/v4.2.0/refine.md b/docs/v4.2.0/refine.md new file mode 100644 index 00000000..3fa818e7 --- /dev/null +++ b/docs/v4.2.0/refine.md @@ -0,0 +1,114 @@ +As described in the [[phab|phab]] documentation, a constraint on Truvari `bench` finding matches is that there needs to be some consistency in how the variants are represented. To help automate the process of running Truvari `phab` on a benchmarking result and recomputing benchmarking performance on harmonized variants, we present the tool `refine`. + +Quick Start +=========== + +After making a `bench` result: +```bash +truvari bench -b base.vcf.gz -c comp.vcf.gz -o result/ +``` +Use `refine` on the `result/` +```bash +truvari refine -r subset.bed -f ref.fa result/ +``` + +Output +====== +* `refine.variant_summary.json` - result of re-evaluating calls within the specified regions. Same structure as [[summary.json|bench#summaryjson]] +* `refine.regions.txt` - Tab-delimited file with per-region variant counts +* `refine.region_summary.json` - Per-region performance metrics +* `phab_bench/` - Bench results on the subset of variants harmonized + +To see an example output, look at [test data](https://github.com/ACEnglish/truvari/tree/develop/answer_key/refine/refine_output_one) + +Using `refine.regions.txt` +========================== +| Column | Description | +| ----------------- | --------------------------------------- | +| chrom | Region's chromosome | +| start | Region's start | +| end | Region's end | +| in_tpbase | Input's True Positive base count | +| in_tp | Input's True Positive comparison count | +| in_fp | Input's false positive count | +| in_fn | Input's false negative count | +| refined | Boolean for if region was re-evaluated | +| out_tpbase | Output's true positive base count | +| out_tp | Output's true positive comparison count | +| out_fn | Outputs false positive count | +| out_fp | Output's false negative count | +| state | True/False state of the region | + + +Performance by Regions +====================== + +Because `truvari phab` can alter variant counts during harmonization, one may wish to assess the performance on a per-region basis rather than the per-variant basis. In the `refine.regions.txt`, a column `state` will have a TP/FN/FP value as defined by the following rules: + +```python +false_pos = (data['out_fp'] != 0) +false_neg = (data['out_fn'] != 0) +any_false = false_pos | false_neg + +true_positives = (data['out_tp'] != 0) & (data['out_tpbase'] != 0) & ~any_false + +true_negatives = (data[['out_tpbase', 'out_tp', 'out_fn', 'out_fp']] == 0).all(axis=1) + +baseP = (data['out_tpbase'] != 0) | (data['out_fn'] != 0) +compP = (data['out_tp'] != 0) | (data['out_fp'] != 0) +``` + +This logic has two edge cases to consider. 1) a region with at least one false-positive and one false-negative will be counted as both a false-positive and a false-negative. 2) Regions within `--refdist` may experience 'variant bleed' where they e.g. have an out_tp, but no other variants because a neighboring region actually contains the the corresponding `out_tpbase`. For the first case, we simply count the region twice and set its state in `refine.regions.txt` to "FP,FN". For the second case, we set the state to 'UNK' and ignore it when calculating the region summary. Future versions may figure out exactly how to handle (prevent?) 'UNK' regions. + +These by-region state counts are summarized and written to `refine.region_summary.json`. The definition of metrics inside this json are: +| Key | Definition | Formula | +|--------|----------------------------------------------|---------------------------------| +| TP | True Positive region count | | +| TN | True Negative region count | | +| FP | False Positive region count | | +| FN | False Negative region count | | +| base P | Regions with base variant(s) | | +| base N | Regions without base variant(s) | | +| comp P | Regions with comparison variant(s) | | +| comp N | Regions without comparison variant(s) | | +| PPV | Positive Predictive Value (a.k.a. precision) | TP / comp P | +| TPR | True Positive Rate (a.k.a. recall) | TP / base P | +| TNR | True Negative Rate (a.k.a. specificity) | TN / base N | +| NPV | Negative Predictive Value | TN / comp N | +| ACC | Accuracy | (TP + TN) / (base P + base N) | +| BA | Balanced Accuracy | (TPR + TNR) / 2 | +| F1 | f1 score | 2 * ((PPV * TPR) / (PPV + TPR)) | +| UND | Regions without an undetermined state | | + +Even though PPV is synonymous with precision, we use these abbreviated names when dealing with per-region performance in order to help users differentiate from the by-variant performance reports. + +`--align` +========= +By default, Truvari will make the haplotypes and use an external call `mafft` to perform a multiple sequence alignment between them and the reference to harmonize the variants. While this is the most accurate alignment technique, it isn't fast. If you're willing to sacrifice some accuracy for a huge speed increase, you can use `--align wfa`, which also doesn't require an external tool. Another option is `--align poa` which performs a partial order alignment which is faster than mafft but less accurate and slower than wfa but more accurate. However, `poa` appears to be non-deterministic which is not ideal for some benchmarking purposes. + +`--use-original-vcfs` +===================== + +By default, `refine` will use the base/comparison variants from the `bench` results `tp-base.vcf.gz`, `fn.vcf.gz`, `tp-comp.vcf.gz`, and `fp.vcf.gz` as input for `phab`. However, this contains a filtered subset of variants originally provided to `bench` since it removes variants e.g. below `--sizemin` or not `--passonly`. + +With the `--use-original` parameter, all of the original calls from the input vcfs are fetched. This parameter is useful in recovering matches in situations when variants in one call set are split into two variants which are smaller than the minimum size analyzed by `bench`. For example, imagine a base VCF with a 20bp DEL, a comp VCF with two 10bp DEL, and `bench --sizemin 20` was used. `--use-original` will consider the two 10bp comp variants during phab harmonization with the 20bp base DEL. + + +`--regions` +=========== + +This parameter specifies which regions to re-evaluate. If this is not provided, the original `bench` result's `--includebed` is used. If both `--regions` and `--includebed` are provided, the `--includebed` is subset to only those intersecting `--regions`. + +This parameter is helpful for cases when the `--includebed` is not the same set of regions that a caller analyzes. For example, if a TR caller only discovers short tandem repeats (STR), but a benchmark has TRs of all lengths, it isn't useful to benchmark against the non-STR variants. Therefore, you can run `bench` on the full benchmark's regions (`--includebed`), and automatically subset to only the regions analyzed by the caller with `refine --regions`. + +Note that the larger these regions are the slower MAFFT (used by `phab`) will run. Also, when performing the intersection as described above, there may be edge effects in the reported `refine.variant_summary.json`. For example, if a `--region` partially overlaps an `--includebed` region, you may not be analyzing a subset of calls looked at during the original `bench` run. Therefore, the `*summary.json` should be compared with caution. + +`--use-region-coords` +===================== + +When intersecting `--includebed` with `--regions`, use `--regions` coordinates. By default, `refine` will prefer the `--includebed` coordinates. This is helpful for when the original bench result's `--includebed` boundaries should be used instead of the `--regions` + +`--reference` +============= + +By default, the reference is pulled from the original `bench` result's `params.json`. If a reference wasn't used with `bench`, it must be specified with `refine` as it's used by `phab` to realign variants. \ No newline at end of file diff --git a/docs/v4.2.0/segment.md b/docs/v4.2.0/segment.md new file mode 100644 index 00000000..953e5822 --- /dev/null +++ b/docs/v4.2.0/segment.md @@ -0,0 +1,18 @@ +Segmentation: Normalization of SVs into disjointed genomic regions + +For SVs with a span in the genome (currently only DELs), split the overlaps into disjointed regions. This is an experimental tool that explores the possibility of assisting SV association analysis. + +This tool adds an INFO field `SEGCNT` which holds the number of original SVs that overlap the newly reported region. It also adds a FORMAT field `SEG`, which is the 'allele coverage' per-sample. For example, if a sample has two overlapping heterozygous deletions, the shared region will have `SEG=2`. If the two deletions were homozygous then `SEG=4`. + +In the below example, we have three deletions found across three samples. + +![](https://github.com/spiralgenetics/truvari/blob/develop/imgs/segment_example.png) + +The `segment` added annotations for the regions would then be: +| Region | INFO/SEGCNT | S1/SEG | S2/SEG | S3/SEG | +|--------|-------------|--------|--------|--------| +| A | 1 | 2 | 0 | 0 | +| B | 2 | 2 | 1 | 0 | +| C | 3 | 2 | 2 | 2 | +| D | 2 | 2 | 1 | 0 | +| E | 1 | 0 | 1 | 0 | \ No newline at end of file diff --git a/docs/v4.2.0/stratify.md b/docs/v4.2.0/stratify.md new file mode 100644 index 00000000..353dcab5 --- /dev/null +++ b/docs/v4.2.0/stratify.md @@ -0,0 +1,58 @@ +`stratify` is a helper utility for counting variants within bed regions which is essentially the same as running `bedtools intersect -c`. When working with benchmarking results, there are are four vcfs to count (tp-base, tp-comp, fn, fp). Instead of running bedtools four times and collating the results, `stratify` can be given a single `bench` result directory to generate the counts. + +For example: +```bash +$ truvari stratify input.bed bench/ +chrom start end tpbase tp fn fp +chr20 280300 280900 0 0 0 0 +chr20 100000 200000 1 1 0 0 +chr20 642000 642350 1 1 2 1 +``` + +The output from this can then be parsed to generate more details: + +```python +import pandas as pd +import truvari + +df = pd.read_csv("stratify.output.txt", sep='\t') + +# If the input.bed didn't have a header and so we couldn't use the `--header` parameter, we need to name columns +df.columns = ['chrom', 'start', 'end', 'tpbase', 'tp', 'fn', 'fp'] + +# Create the precision, recall, and f1 for each row +metrics = df[["tpbase", "tp", "fn", "fp"]].apply((lambda x: truvari.performance_metrics(*x)), axis=1) + +# metrics is now a DataFrame with a single column of tuples, lets separate them into columns +metrics = pd.DataFrame(metrics.to_list(), columns=["precision", "recall", "f1"]) + +# Extend the dataframe's columns +df = df.join(metrics) +df.head() +``` +Which gives the result: +``` + chrom start end tpbase tp fn fp precision recall f1 +0 chr20 135221 239308 1 1 0 0 1.00 1.00 1.000000 +1 chr20 260797 465632 3 3 3 1 0.75 0.50 0.600000 +2 chr20 465866 622410 1 1 0 0 1.00 1.00 1.000000 +3 chr20 623134 655257 1 1 3 1 0.50 0.25 0.333333 +4 chr20 708338 732041 1 1 1 0 1.00 0.50 0.666667 +``` + +``` +usage: stratify [-h] [-o OUT] [--header] [-w] [--debug] BED VCF + +Count variants per-region in vcf + +positional arguments: + BED Regions to process + VCF Truvari bench result directory or a single VCF + +optional arguments: + -h, --help show this help message and exit + -o OUT, --output OUT Output bed-like file + --header Input regions have header to preserve in output + -w, --within Only count variants contained completely within region boundaries + --debug Verbose logging +``` \ No newline at end of file diff --git a/docs/v4.2.0/vcf2df.md b/docs/v4.2.0/vcf2df.md new file mode 100644 index 00000000..14d9f06c --- /dev/null +++ b/docs/v4.2.0/vcf2df.md @@ -0,0 +1,81 @@ +We enjoy using [pandas](https://pandas.pydata.org/)/[seaborn](https://seaborn.pydata.org/) for python plotting, so we've made the command `truvari vcf2df`. This will turn a VCF into a pandas DataFrame and save it to a file using joblib. The resulting DataFrame will always have the columns: +* chrom: variant chromosome +* start: 0-based start from pysam.VariantRecord.start +* end: 0-based end from pysam.VariantRecord.stop +* id : VCF column ID +* svtype : SVTYPE as determined by `truvari.entry_variant_type` +* svlen : SVLEN as determined by `truvari.entry_size` +* szbin : SVLEN's size bin as determined by `truvari.get_sizebin` +* qual : VCF column QUAL +* filter : VCF column FILTER +* is_pass : boolean of if the filter is empty or PASS + +Optionally, `vcf2df` can attempt to pull `INFO` and `FORMAT` fields from the VCF and put each field into the DataFrame as a new column. For FORMAT fields, the VCF header definition's `Number` is considered and multiple columns may be added. For example, the `AD` field, typically holding Allele Depth has `Number=A`, indicating that there will be one value for each allele. Truvari assumes that all VCFs hold one allele per-line, so there are only 2 alleles described per-line, the reference and alternate allele. Therefore, two columns are added to the DataFrame, `AD_ref` and `AD_alt` corresponding to the 0th and 1st values from the AD field's list of values. Similarity, for PL (genotype likelihood) with `Number=G`, there's three values and columns are created named `PL_ref`, `PL_het`, `PL_hom`. + +After you've created your benchmarking results with `truvari bench`, you'll often want to plot different views of your results. `vcf2df --bench-dir` can parse a truvari output directory's multiple VCF files and add a 'state' column +* state : The truvari state assigned to the variant + * tpbase : Parsed from the tp-base.vcf + * tp : Parsed from the tp-comp.vcf + * fp : Parsed from the fp.vcf + * fn : Parsed from the fn.vcf + +The created DataFrame is saved into a joblib file, which can then be plotted as simply as: +```python +import joblib +import seaborn as sb +import matplotlib.pyplot as plt + +data = joblib.load("test.jl") +p = sb.countplot(data=data[data["state"] == 'tp'], x="szbin", hue="svtype", hue_order=["DEL", "INS"]) +plt.xticks(rotation=45, ha='right') +p.set(title="True Positives by svtype and szbin") +``` +![](https://github.com/spiralgenetics/truvari/blob/develop/imgs/truv2df_example.png) + +This enables concatenation of Truvari results across multiple benchmarking experiments for advanced comparison. For example, imagine there's multiple parameters used for SV discovery over multiple samples. After running `truvari bench` on each of the results with the output directories named to `params/sample/` and each converted to DataFrames with `truvari vcf2df`, we can expand/concatenate the saved joblib DataFrames with: + +```python +import glob +import joblib +import pandas as pd + +files = glob.glob("*/*/data.jl") +dfs = [] +for f in files: + params, sample, frame = f.split('/') + d = joblib.load(f) + d["params"] = params + d["sample"] = sample + dfs.append(d) +df = pd.concat(dfs) +joblib.dump(df, "results.jl") +``` + +To facilitate range queries, PyRanges is helpful. `vcf2df` results can be parsed quickly by pyranges with the command: +```python +result = pyranges.PyRanges(df.rename(columns={'chrom':"Chromosome", "start":"Start", "end":"End"})) +``` + +``` +usage: vcf2df [-h] [-b] [-i] [-f] [-s SAMPLE] [-n] [-S] [-c LVL] [--debug] VCF JL + +Takes a vcf and creates a data frame. Can parse a bench output directory + +positional arguments: + VCF VCF to parse + JL Output joblib to save + +optional arguments: + -h, --help show this help message and exit + -b, --bench-dir Input is a truvari bench directory + -i, --info Attempt to put the INFO fields into the dataframe + -f, --format Attempt to put the FORMAT fileds into the dataframe + -s SAMPLE, --sample SAMPLE + SAMPLE name to parse when building columns for --format + -n, --no-prefix Don't prepend sample name to format columns + -S, --skip-compression + Skip the attempt to optimize the dataframe's size + -c LVL, --compress LVL + Compression level for joblib 0-9 (3) + --debug Verbose logging +``` \ No newline at end of file diff --git a/docs/v4.2.1/Citations.md b/docs/v4.2.1/Citations.md new file mode 100644 index 00000000..d600b860 --- /dev/null +++ b/docs/v4.2.1/Citations.md @@ -0,0 +1,30 @@ +# Citing Truvari + +English, A.C., Menon, V.K., Gibbs, R.A. et al. Truvari: refined structural variant comparison preserves allelic diversity. Genome Biol 23, 271 (2022). https://doi.org/10.1186/s13059-022-02840-6 + +# Citations + +List of publications using Truvari. Most of these are just pulled from a [Google Scholar Search](https://scholar.google.com/scholar?q=truvari). Please post in the [show-and-tell](https://github.com/spiralgenetics/truvari/discussions/categories/show-and-tell) to have your publication added to the list. +* [A robust benchmark for detection of germline large deletions and insertions](https://www.nature.com/articles/s41587-020-0538-8) +* [Leveraging a WGS compression and indexing format with dynamic graph references to call structural variants](https://www.biorxiv.org/content/10.1101/2020.04.24.060202v1.abstract) +* [Duphold: scalable, depth-based annotation and curation of high-confidence structural variant calls](https://academic.oup.com/gigascience/article/8/4/giz040/5477467?login=true) +* [Parliament2: Accurate structural variant calling at scale](https://academic.oup.com/gigascience/article/9/12/giaa145/6042728) +* [Learning What a Good Structural Variant Looks Like](https://www.biorxiv.org/content/10.1101/2020.05.22.111260v1.full) +* [Long-read trio sequencing of individuals with unsolved intellectual disability](https://www.nature.com/articles/s41431-020-00770-0) +* [lra: A long read aligner for sequences and contigs](https://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1009078) +* [Samplot: a platform for structural variant visual validation and automated filtering](https://genomebiology.biomedcentral.com/articles/10.1186/s13059-021-02380-5) +* [AsmMix: A pipeline for high quality diploid de novo assembly](https://www.biorxiv.org/content/10.1101/2021.01.15.426893v1.abstract) +* [Accurate chromosome-scale haplotype-resolved assembly of human genomes](https://www.nature.com/articles/s41587-020-0711-0) +* [Accurate circular consensus long-read sequencing improves variant detection and assembly of a human genome](https://www.nature.com/articles/s41587-019-0217-9) +* [NPSV: A simulation-driven approach to genotyping structural variants in whole-genome sequencing data](https://academic.oup.com/bioinformatics/article-abstract/37/11/1497/5466452) +* [SVIM-asm: structural variant detection from haploid and diploid genome assemblies](https://academic.oup.com/bioinformatics/article/36/22-23/5519/6042701?login=true) +* [Readfish enables targeted nanopore sequencing of gigabase-sized genomes](https://www.nature.com/articles/s41587-020-00746-x) +* [stLFRsv: A Germline Structural Variant Analysis Pipeline Using Co-barcoded Reads](https://internal-journal.frontiersin.org/articles/10.3389/fgene.2021.636239/full) +* [Long-read-based human genomic structural variation detection with cuteSV](https://genomebiology.biomedcentral.com/articles/10.1186/s13059-020-02107-y) +* [An international virtual hackathon to build tools for the analysis of structural variants within species ranging from coronaviruses to vertebrates](https://f1000research.com/articles/10-246) +* [Paragraph: a graph-based structural variant genotyper for short-read sequence data](https://link.springer.com/article/10.1186/s13059-019-1909-7) +* [Genome-wide investigation identifies a rare copy-number variant burden associated with human spina bifida](https://www.nature.com/articles/s41436-021-01126-9) +* [TT-Mars: Structural Variants Assessment Based on Haplotype-resolved Assemblies](https://www.biorxiv.org/content/10.1101/2021.09.27.462044v1.abstract) +* [An ensemble deep learning framework to refine large deletions in linked-reads](https://www.biorxiv.org/content/10.1101/2021.09.27.462057v1.abstract) +* [MAMnet: detecting and genotyping deletions and insertions based on long reads and a deep learning approach](https://academic.oup.com/bib/advance-article-abstract/doi/10.1093/bib/bbac195/6587170)](https://academic.oup.com/bib/advance-article-abstract/doi/10.1093/bib/bbac195/6587170) +* [Automated filtering of genome-wide large deletions through an ensemble deep learning framework](https://www.sciencedirect.com/science/article/pii/S1046202322001712#b0110) diff --git a/docs/v4.2.1/Development.md b/docs/v4.2.1/Development.md new file mode 100644 index 00000000..ccb38493 --- /dev/null +++ b/docs/v4.2.1/Development.md @@ -0,0 +1,90 @@ +# Truvari API +Many of the helper methods/objects are documented such that developers can reuse truvari in their own code. To see developer documentation, visit [readthedocs](https://truvari.readthedocs.io/en/latest/). + +Documentation can also be seen using +```python +import truvari +help(truvari) +``` + +# docker + +A Dockerfile exists to build an image of Truvari. To make a Docker image, clone the repository and run +```bash +docker build -t truvari . +``` + +You can then run Truvari through docker using +```bash +docker run -v `pwd`:/data -it truvari +``` +Where `pwd` can be whatever directory you'd like to mount in the docker to the path `/data/`, which is the working directory for the Truvari run. You can provide parameters directly to the entry point. +```bash +docker run -v `pwd`:/data -it truvari anno svinfo -i example.vcf.gz +``` + +If you'd like to interact within the docker container for things like running the CI/CD scripts +```bash +docker run -v `pwd`:/data --entrypoint /bin/bash -it truvari +``` +You'll now be inside the container and can run FuncTests or run Truvari directly +```bash +bash repo_utils/truvari_ssshtests.sh +truvari anno svinfo -i example.vcf.gz +``` + +# CI/CD + +Scripts that help ensure the tool's quality. Extra dependencies need to be installed in order to run Truvari's CI/CD scripts. + +```bash +pip install pylint anybadge coverage +``` + +Check code formatting with +```bash +python repo_utils/pylint_maker.py +``` +We use [autopep8](https://pypi.org/project/autopep8/) (via [vim-autopep8](https://github.com/tell-k/vim-autopep8)) for formatting. + +Test the code and generate a coverage report with +```bash +bash repo_utils/truvari_ssshtests.sh +``` + +Truvari leverages github actions to perform these checks when new code is pushed to the repository. We've noticed that the actions sometimes hangs through no fault of the code. If this happens, cancel and resubmit the job. Once FuncTests are successful, it uploads an artifact of the `coverage html` report which you can download to see a line-by-line accounting of test coverage. + +# git flow + +To organize the commits for the repository, we use [git-flow](https://danielkummer.github.io/git-flow-cheatsheet/). Therefore, `develop` is the default branch, the latest tagged release is on `master`, and new, in-development features are within `feature/` + +When contributing to the code, be sure you're working off of develop and have run `git flow init`. + +# versioning + +Truvari uses [Semantic Versioning](https://semver.org/) and tries to stay compliant to [PEP440](https://peps.python.org/pep-0440/). As of v3.0.0, a single version is kept in the code under `truvari/__init__.__version__`. We try to keep the suffix `-dev` on the version in the develop branch. When cutting a new release, we may replace the suffix with `-rc` if we've built a release candidate that may need more testing/development. Once we've committed to a full release that will be pushed to PyPi, no suffix is placed on the version. If you install Truvari from the develop branch, the git repo hash is appended to the installed version as well as '.uc' if there are un-staged commits in the repo. + +# docs + +The github wiki serves the documentation most relevant to the `develop/` branch. When cutting a new release, we freeze and version the wiki's documentation with the helper utility `docs/freeze_wiki.sh`. + +# Creating a release +Follow these steps to create a release + +0) Bump release version +1) Run tests locally +2) Update API Docs +3) Change Updates Wiki +4) Freeze the Wiki +5) Ensure all code is checked in +6) Do a [git-flow release](https://danielkummer.github.io/git-flow-cheatsheet/) +7) Use github action to make a testpypi release +8) Check test release +```bash +python3 -m venv test_truvari +python3 -m pip install --index-url https://test.pypi.org/simple --extra-index-url https://pypi.org/simple/ truvari +``` +9) Use GitHub action to make a pypi release +10) Download release-tarball.zip from step #9’s action +11) Create release (include #9) from the tag +12) Checkout develop and Bump to dev version and README ‘commits since’ badge \ No newline at end of file diff --git a/docs/v4.2.1/Home.md b/docs/v4.2.1/Home.md new file mode 100644 index 00000000..47fbc626 --- /dev/null +++ b/docs/v4.2.1/Home.md @@ -0,0 +1,35 @@ +The wiki holds documentation most relevant for develop. For information on a specific version of Truvari, see [`docs/`](https://github.com/spiralgenetics/truvari/tree/develop/docs) + +Citation: +English, A.C., Menon, V.K., Gibbs, R.A. et al. Truvari: refined structural variant comparison preserves allelic diversity. Genome Biol 23, 271 (2022). https://doi.org/10.1186/s13059-022-02840-6 + +# Before you start +VCFs aren't always created with a strong adherence to the format's specification. + +Truvari expects input VCFs to be valid so that it will only output valid VCFs. + +We've developed a separate tool that runs multiple validation programs and standard VCF parsing libraries in order to validate a VCF. + +Run [this program](https://github.com/acenglish/usable_vcf) over any VCFs that are giving Truvari trouble. + +Furthermore, Truvari expects 'resolved' SVs (e.g. DEL/INS) and will not interpret BND signals across SVTYPEs (e.g. combining two BND lines to match a DEL call). A brief description of Truvari bench methodology is linked below. + +Finally, Truvari does not handle multi-allelic VCF entries and as of v4.0 will throw an error if multi-allelics are encountered. Please use `bcftools norm` to split multi-allelic entries. + +# Index + +- [[Updates|Updates]] +- [[Installation|Installation]] +- Truvari Commands: + - [[anno|anno]] + - [[bench|bench]] + - [[collapse|collapse]] + - [[consistency|consistency]] + - [[divide|divide]] + - [[phab|phab]] + - [[refine|refine]] + - [[segment|segment]] + - [[stratify|stratify]] + - [[vcf2df|vcf2df]] +- [[Development|Development]] +- [[Citations|Citations]] \ No newline at end of file diff --git a/docs/v4.2.1/Installation.md b/docs/v4.2.1/Installation.md new file mode 100644 index 00000000..a929d245 --- /dev/null +++ b/docs/v4.2.1/Installation.md @@ -0,0 +1,56 @@ +Recommended +=========== +For stable versions of Truvari, use pip +``` +python3 -m pip install truvari +``` +Specific versions can be installed via +``` +python3 -m pip install truvari==3.2.0 +``` +See [pypi](https://pypi.org/project/Truvari/#history) for a history of all distributed releases. + +Manual Installation +=================== +To build Truvari directly, clone the repository and switch to a specific tag. +``` +git clone https://github.com/spiralgenetics/truvari.git +git checkout tags/v3.0.0 +python3 -m pip install . +``` + +To see a list of all available tags, run: +``` +git tag -l +``` + +If you have an older clone of the repository and don't see the version you're looking for in tags, make sure to pull the latest changes: +``` +git pull +git fetch --all --tags +``` + +Mamba / Conda +============= +NOTE!! There is a very old version of Truvari on bioconda that - for unknown reasons - supersedes the newer, supported versions. Users may need to specify to conda which release to build. See [this ticket](https://github.com/ACEnglish/truvari/issues/130#issuecomment-1196607866) for details. + +Truvari releases are automatically deployed to bioconda. +Users can follow instructions here (https://mamba.readthedocs.io/en/latest/installation.html) to install mamba. (A faster alternative conda compatible package manager.) + +Creating an environment with Truvari and its dependencies. +``` +mamba create -c conda-forge -c bioconda -n truvari truvari +``` + +Alternatively, see the [conda page](https://anaconda.org/bioconda/truvari) for details +``` +conda install -c bioconda truvari +``` + +Building from develop +===================== +The default branch is `develop`, which holds in-development changes. This is for developers or those wishing to try experimental features and is not recommended for production. Development is versioned higher than the most recent stable release with an added suffix (e.g. Current stable release is `3.0.0`, develop holds `3.1.0-dev`). If you'd like to install develop, repeat the steps above but without `git checkout tags/v3.0.0`. See [wiki](https://github.com/spiralgenetics/truvari/wiki/Development#git-flow) for details on how branching is handled. + +Docker +====== +See [Development](https://github.com/spiralgenetics/truvari/wiki/Development#docker) for details on building a docker container. diff --git a/docs/v4.2.1/MatchIds.md b/docs/v4.2.1/MatchIds.md new file mode 100644 index 00000000..ca52076f --- /dev/null +++ b/docs/v4.2.1/MatchIds.md @@ -0,0 +1,74 @@ +MatchIds are used to tie base/comparison calls together in post-processing for debugging or other exploring. MatchIds have a structure of `{chunkid}.{callid}`. The chunkid is unique id per-chunk of calls. All calls sharing chunkid were within `--chunksize` distance and were compared. The callid is unique to a call in a chunk for each VCF. Because `bench` processes two VCFs (the base and comparison VCFs), the `MatchId` has two values: the first is the base variant's MatchId and the second the comparison variant's MatchId. + +For `--pick single`, the two MatchIds will be identical in the e.g. tp-base.vcf.gz and tp-comp.vcf.gz. However, for `--pick ac|multi`, it's possible to have cases such as one base variant matching to multiple comparison variants. That would give us MatchIds like: + +``` +# tp-base.vcf +MatchId=4.0,4.1 + +# tp-comp.vcf +MatchId=4.0,4.1 +MatchId=4.0,4.2 +``` + +This example tells us that the tp-comp variants are both pointing to `4.0` in tp-base. The tp-base variant has a higher match to the tp-comp `4.1` variant. + +One easy way to combine matched variants is to use `truvari vcf2df` to convert a benchmarking result to a pandas DataFrame and leverage pandas' merge operation. First, we convert the `truvari bench` result. + +```bash +truvari vcf2df --info --bench-dir bench_result/ data.jl +``` + +Next, we combine rows of matched variants: +```python +import joblib +import pandas as pd + +# Load the data +data = joblib.load("data.jl") + +# Separate out the variants from the base VCF and add new columns of the base/comp ids +base = data[data['state'].isin(['tpbase', 'fn'])].copy() +base['base_id'] = base['MatchId'].apply(lambda x: x[0]) +base['comp_id'] = base['MatchId'].apply(lambda x: x[1]) + +# Separate out the variants from the comparison VCF and add new columns of the base/comp ids +comp = data[data['state'].isin(['tp', 'fp'])].copy() +comp['base_id'] = comp['MatchId'].apply(lambda x: x[0]) +comp['comp_id'] = comp['MatchId'].apply(lambda x: x[1]) + +# Merge the base/comparison variants +combined = pd.merge(base, comp, left_on='base_id', right_on='comp_id', suffixes=('_base', '_comp')) + +# How many comp variants matched to multiple base variants? +counts1 = combined['base_id_comp'].value_counts() +print('multi-matched comp count', (counts1 != 1).sum()) + +# How many base variants matched to multiple comp variants? +counts2 = combined['comp_id_base'].value_counts() +print('multi-matched base count', (counts2 != 1).sum()) +``` + +The `MatchId` is also used by `truvari collapse`. However there are two differences. First, in the main `collapse` output, the relevant INFO field is named `CollapsedId`. Second, because collapse only has a single input VCF, it is much easier to merge DataFrames. To merge collapse results kept variants with those that were removed, we again need to convert the VCFs to DataFrames: + +```bash +truvari vcf2df -i kept.vcf.gz kept.jl +truvari vcf2df -i removed.vcf.gz remov.jl +``` + +Then we combine them: +```python +import joblib +import pandas as pd + +# Load the kept variants and set the index. +kept = joblib.load("kept.jl").set_index('CollapseId') + +# Load the removed variants and set the index. +remov = joblib.load("remov.jl") +remov['CollapseId'] = remov['MatchId'].apply(lambda x: x[0]) +remov.set_index('CollapseId', inplace=True) + +# Join the two sets of variants +result_df = kept.join(remov, how='right', rsuffix='_removed') +``` \ No newline at end of file diff --git a/docs/v4.2.1/Multi-allelic-VCFs.md b/docs/v4.2.1/Multi-allelic-VCFs.md new file mode 100644 index 00000000..fd0eb23e --- /dev/null +++ b/docs/v4.2.1/Multi-allelic-VCFs.md @@ -0,0 +1,11 @@ +Truvari only compares the first alternate allele in VCFs. If a VCF contains multi-allelic sites such as: + +``` +chr2 1948201 . T TACAACACGTACGATCAGTAGAC,TCAACACACAACACGTACGATCAGTAGAC .... +``` + +Then pre-process the VCFs with bcftools: + +```bash +bcftools norm -m-any base_calls.vcf.gz | bgzip > base_calls_split.vcf.gz +``` \ No newline at end of file diff --git a/docs/v4.2.1/Updates.md b/docs/v4.2.1/Updates.md new file mode 100644 index 00000000..9f3d2e4e --- /dev/null +++ b/docs/v4.2.1/Updates.md @@ -0,0 +1,256 @@ +# Truvari 4.2.1 +*February 6, 2024* +* `collapse` + * Faster handling of genotype data for `--gt` and `--keep common` +* general + * Fix to bed end position bug for including variants ([details](https://github.com/ACEnglish/truvari/issues/193)) + * Fix to Dockerfile +* `refine` + * Changes to `--recount` that accompany the fix to bed end positions. +* New command `ga4gh` to convert Truvari results into GA4GH truth/query VCFs with intermediates tags + +# Truvari 4.2 +*January 12, 2024* +* `collapse` + * New parameter `--gt` disallows intra-sample events to collapse ([details](https://github.com/ACEnglish/truvari/wiki/collapse#--gt)) + * New parameter `--intra` for consolidating SAMPLE information during intra-sample collapsing ([details](https://github.com/ACEnglish/truvari/wiki/collapse#--intra)) + * Preserve phasing information when available + * Faster O(n-1) algorithm instead of O(n^2) + * Faster sub-chunking strategy makes smaller chunks of variants needing fewer comparisons + * Fixed rare non-determinism error in cases where multiple variants are at the same position and equal qual/ac could be ordered differently. +* `phab` + * Correct sample handling with `--bSamples` `--cSamples` parameters + * Faster generation of consensus sequence + * Resolved 'overlapping' variant issue causing variants to be dropped + * New `poa` approach to harmonization. Faster than mafft but less accurate. Slower than wfa but more accurate. +* `bench` + * New, easier `MatchId` field to track which baseline/comparison variants match up [details](https://github.com/ACEnglish/truvari/wiki/MatchIds) + * `entry_is_present` method now considers partial missing variants (e.g. `./1`) as present + * Removed the 'weighted' metrics from `summary.json` +* `consistency` + * Fixed issue with counting duplicate records + * Added flag to optionally ignore duplicate records +* `anno svinfo` now overwrites existing SVLEN/SVTYPE info fields +* general + * Reduced fn matches for unroll sequence similarity by reporting maximum of multiple manipulations of variant sequence (roll up/down/none). Comes at a small, but reasonable, expense of some more fp matches. + * Bump pysam version + * Fixed bug in `unroll` sequence similarity that sometimes rolled from the wrong end + * Fixed bug for handling of None in ALT field + * `truvari.compress_index_vcf` forces overwriting of tabix index to prevent annoying crashes + + +# Truvari 4.1 +*August 7, 2023* + +* `bench` + * Creates `candidate.refine.bed` which hooks into `refine` on whole-genome VCFs [details](https://github.com/ACEnglish/truvari/wiki/bench#refining-bench-output) + * `--recount` for correctly assessing whole-genome refinement results + * experimental 'weighted' summary metrics [details](https://github.com/ACEnglish/truvari/wiki/bench#weighted-performance) + * Unresolved SVs (e.g. `ALT == `) are filtered when `--pctseq != 0` +* `phab` + * ~2x faster via reduced IO from operating in stages instead of per-region + * Removed most external calls (e.g. samtools doesn't need to be in the environment anymore) + * new `--align wfa` allows much faster (but slightly less accurate) variant harmonization + * increased determinism of results [detals](https://github.com/ACEnglish/truvari/commit/81a9ab85b91b0c530f9faeedfa4e7e0d68a5e8c2) +* `refine` + * Faster bed file intersection of `--includebed` and `--regions` + * Refine pre-flight check + * Correct refine.regions.txt end position from IntervalTree correction + * Better refine region selection with `--use-original` + * `--use-includebed` switched to `--use-region-coords` so that default behavior is to prefer the includebed's coordinates + * `--use-original-vcfs` to use the original pre-bench VCFs + * `refine.variant_summary.json` is cleaned of uninformative metrics +* `stratify` + * parallel parsing of truvari directory to make processing ~4x faster +* `msa2vcf` Fixed REPL decomposition bug to now preserve haplotypes +* `anno grpaf` - expanded annotation info fields +* `anno density` - new parameter `--stepsize` for sliding windows +* `collapse` + * New optional `--median-info` fields [#146](https://github.com/ACEnglish/truvari/issues/146) +* Minor updates + * Fix some `anno` threading on macOS [#154](https://github.com/ACEnglish/truvari/issues/154) + * Monomorphic/multiallelic check fix in `bench` + * `PHAB_WRITE_MAFFT` environment variable to facilitate updating functional test answer key + * Slightly slimmer docker container + +# Truvari 4.0 +*March 13, 2023* + +As part of the GIAB TR effort, we have made many changes to Truvari's tooling to enable comparison of variants in TR regions down to 5bp. Additionally, in order to keep Truvari user friendly we have made changes to the UI. Namely, we've updated some default parameters, some command-line arguments, and some outputs. There are also a few new tools and how a couple of tools work has changed. Therefore, we decided to bump to a new major release. If you're using Truvari in any kind of production capacity, be sure to test your pipeline before moving to v4.0. + +* New `refine` command for refining benchmarking results. [Details](refine) +* `bench` + * [Unroll](bench#unroll) is now the default sequence comparison approach. + * New `--pick` parameter to control the number of matches a variant can participate in [details](bench#controlling-the-number-of-matches) + * The `summary.txt` is now named `summary.json` + * Outputs parameters to `params.json` + * Output VCFs are sorted, compressed, and indexed + * Ambiguous use of 'call' in outputs corrected to 'comp' (e.g. `tp-call.vcf.gz` is now `tp-comp.vcf.gz`) + * Renamed `--pctsim` parameter to `--pctseq` + * Fixed bug where FP/FN weren't getting the correct, highest scoring match reported + * Fixed bug where `INFO/Multi` wasn't being properly applied + * Fixed bug where variants spanning exactly one `--includebed` region were erroneously being counted. + * Removed parameters: `--giabreport`, `--gtcomp`,`--multimatch`, `--use-lev`, `--prog`, `--unroll` +* `collapse` + * Renamed `--pctsim` parameter to `--pctseq` + * Runtime reduction by ~40% with short-circuiting during `Matcher.build_match` + * Better output sorting which may allow pipelines to be a little faster. +* `vcf2df` + * More granular sizebins for `[0,50)` including better handling of SNPs + * `--multisample` is removed. Now automatically add all samples with `--format` + * key index column removed and replaced by chrom, start, end. Makes rows easier to read and easier to work with e.g. pyranges +* `anno` + * Simplified ui. Commands that work on a single VCF and can stream (stdin/stdout) no longer use `--input` but a positional argument. + * Added `addid` +* `consistency` + * Slight speed improvement + * Better json output format +* `segment` + * Added `--passonly` flag + * Changed UI, including writing to stdout by default + * Fixed END and 1bp DEL bugs, now adds N to segmented variants' REF, and info fields SVTYPE/SVLEN +* API + * Began a focused effort on improving re-usability of Truvari code. + * Entry point to run benchmarking programmatically with [Bench object](https://truvari.readthedocs.io/en/latest/truvari.html#bench). + * Better development version tracking. [details](https://github.com/ACEnglish/truvari/commit/4bbf8d9a5be3b6a3f935afbd3a9b323811b676a0) + * Improved developer documentation. See [readthedocs](https://truvari.readthedocs.io/) +* general + * msa2vcf now left-trims and decomposes variants into indels + * Functional tests reorganization + * Fix for off-by-one errors when using pyintervaltree. See [ticket](https://github.com/ACEnglish/truvari/issues/137) + * Removed progressbar and Levenshtein dependencies as they are no longer used. + +# Truvari 3.5 +*August 27, 2022* + +* `bench` + * `--dup-to-ins` flag automatically treats SVTYPE==DUP as INS, which helps compare some programs/benchmarks + * New `--unroll` sequence comparison method for `bench` and `collapse` ([details](bench#unroll)) +* Major `anno trf` refactor (TODO write docs) including: + * annotation of DEL is fixed (was reporting the ALT copy numbers, not the sample's copy numbers after incorporating the ALT + * allow 'denovo' annotation by applying any TRF annotations found, not just those with corresponding annotations +* New `anno grpaf` annotates vcf with allele frequency info for groups of samples +* New `phab` for variant harmonization ([details](../phab)) +* backend + * `truvari.entry_size` returns the length of the event in the cases where len(REF) == len(ALT) (e.g. SNPs entry_size is 1) + * New key utility for `truvari.build_anno_trees` +* general + * Float metrics written to the VCF (e.g. PctSizeSimilarity) are rounded to precision of 4 + * Nice colors in some `--help` with [rich](https://github.com/Textualize/rich/) +* `divide` + * output shards are now more easily sorted (i.e. `ls divide_result/*.vcf.gz` will return the shards in the order they were made) + * compression/indexing of sub-VCFs in separate threads, reducing runtime +* user issues + * Monomorphic reference ALT alleles no longer throw an error in `bench` ([#131](https://github.com/ACEnglish/truvari/issues/131)) + * `SVLEN Number=A` fix ([#132](https://github.com/ACEnglish/truvari/issues/132)) + +# Truvari 3.4 +*July 7, 2022* + +* Improved performance of `consistency` (see [#127](https://github.com/ACEnglish/truvari/pull/127)) +* Added optional json output of `consistency` report +* Allow GT to be missing, which is allowed by VCF format specification +* TRF now uses `truvari.entry_variant_type` instead of trying to use `pysam.VariantRecord.info["SVLEN"]` +directly which allows greater flexibility. +* vcf2df now parses fields with `Number=\d` (e.g. 2+), which is a valid description +* `truvari.seqsim` is now case insensitive (see [#128](https://github.com/ACEnglish/truvari/issues/128)) +* Collapse option to skip consolidation of genotype information so kept alleles are unaltered +* `truvari anno dpcnt --present` will only count the depths of non ./. variants +* New collapse annotation `NumConsolidate` records how many FORMATs were consolidated +* Official [conda](https://anaconda.org/bioconda/truvari) support + +# Truvari 3.3 +*May 25, 2022* + +* New utilities `vcf_ranges` and `make_temp_filename` +* New annotations `dpcnt` and `lcr` +* Fixed a bug in `truvari collapse --keep` that prevented the `maxqual` or `common` options from working +* Increased determinism for `truvari collapse` so that in cases of tied variant position the longer allele is returned. If the alleles also have the same length, they are sorted alphabetically by the REF +* New `truvari bench --extend` functionality. See [discussion](https://github.com/ACEnglish/truvari/discussions/99) for details + +# Truvari 3.2 +*Apr 1, 2022* + +* Removed `truvari.copy_entry` for `pysam.VariantRecord.translate` a 10x faster operation +* Faster `truvari collapse` ([@c8b319b](https://github.com/ACEnglish/truvari/commit/c8b319b0e717a9e342f52e4a5e927f154eeb0e4a)) +* When building `MatchResult` between variants with shared start/end positions, we save processing work by skipping haplotype creation and just compare REFs/ALTs directly. +* Updated documentation to reference the paper https://doi.org/10.1101/2022.02.21.481353 +* New `truvari anno density` for identifying regions with 'sparse' and 'dense' overlapping SVs ([details](https://github.com/spiralgenetics/truvari/wiki/anno#truvari-anno-density)) +* Better `bench` genotype reporting with `summary.txt` having a `gt_matrix` of Base GT x Comp GT for all Base calls' best, TP match. +* New `truvari anno bpovl` for intersecting against tab-delimited files ([details](https://github.com/spiralgenetics/truvari/wiki/anno#truvari-anno-bpovl)) +* New `truvari divide` command to split VCFs into independent parts ([details](https://github.com/ACEnglish/truvari/wiki/divide)) +* Replaced `--buffer` parameter with `--minhaplen` for slightly better matching specificity +* Bugfix - `truvari anno trf` no longer duplicates entries spanning multple parallelization regions +* Bugfix - `collapse` MatchId/CollapseId annotation wasn't working +* Bugfixes - from [wwliao](https://github.com/wwliao) ([@4dd9968](https://github.com/ACEnglish/truvari/commit/4dd99683912236f433166889bb0b5667e9fa936d) [@ef2cfb3](https://github.com/ACEnglish/truvari/commit/ef2cfb366b60a5af4671d65d3ed987b08da72227)) +* Bugfixes - Issues [#107](https://github.com/ACEnglish/truvari/issues/107), [#108](https://github.com/ACEnglish/truvari/issues/108) + +# Truvari 3.1 +*Dec 22, 2021* + +* `bench` now annotates FPs by working a little differently. See [[bench|bench#methodology]] for details. +* Recalibrated TruScore and new reciprocal overlap measurement for sequence resolved `INS` ([details](https://github.com/spiralgenetics/truvari/discussions/92)) +* Match objects are now usable via the SDK. See [#94](https://github.com/spiralgenetics/truvari/discussions/94) for an example of using Truvari programmatically +* `file_zipper` VCF iteration strategy (`GenomeTree` -> `RegionVCFIterator`) that improves speed, particularly when using `--includebed` +* `collapse` refactored to use Match object and for prettier code, cleaner output. +* `anno remap` now optionally adds `INFO` field of the location of the top N hits. +* An experimental tool `truvari segment` added to help SV association analysis. +* `vcf2df` now supports pulling `FORMAT` fields from multiple samples. +* `vcf2df` now adds `('_ref', '_alt')`, or `('_ref', '_het', '_hom')` for `INFO,Number=[R|G]` fields, respectively. +* Improved documentation, including http://truvari.readthedocs.io/ for developers. +* Increasing/diversifying test coverage exposed minor bugs which were fixed. +* `bench --no-ref --cSample` bug fixes. +* Minor usability feature implemented in `help_unknown_cmd`. + +# Truvari 3.0 +*Sep 15, 2021* + +As Truvari's adoption and functionality grows, we decided to spend time working on sustainability and performance of the tool. Multiple [Actions](https://github.com/spiralgenetics/truvari/actions) for CI/CD have been added. Many components have been refactored for speed, and other 'cruft' code has been removed. Some of these changes (particularly the switch to using edlib for sequence similarity) affects the results. Therefore, we've bumped to a new major release version. + +* Working on speed improvements +* Added edlib as the default when calculating pctseq_sim, keeping Levenstein as an option (`--use-lev`). +* `truvari bench` summary's gt_precision/gt_recall are replaced by gt_concordance, which is just the percent of TP-comp calls with a concordant genotype. `--no-ref` has better functionality. `--giabreport` is different. +* Added `—keep common` to `truvari collapse`, which allows one to choose to keep the allele with the highest MAC. +* `truvari collapse --hap` wasn't working correctly. The assumptions about the calls being phased wasn't being +properly used (e.g. don't collapse 1|1) and the NumCollapsed was being populated before the single-best +match was chosen. The latter is a reporting problem, but the former had an effect on the results with +~3% of collapsed calls being mis-collapsed. +* `truvari anno trf` is now faster and simpler in its approach and whats reported.. and hopefully more useful. +* `truvari anno grm` has min_size and regions arguments added. +* truv2df has become `truvari vcf2df` where the default is vcf conversion with options to run on a `truvari bench` output directory. It also allows a specific sample to be parsed with `--format` and better Number=A handling. +* NeighId added to `truvari anno numneigh`, which works like bedtools cluster. +* The method af_calc now makes MAC/AC. +* Added 'partial' to `truvari anno remap`. +* Added `truvari anno svinfo`. +* Removed `truvari stats` as `truvari vcf2df` is better and began building [community-driven summaries](https://github.com/spiralgenetics/truvari/discussions/categories/vcf2df-recipes). +* Ubiquitous single version. +* Added a Dockerfile and instructions for making a Truvari docker container. +* Code and repository cleaning. +* Github actions for automated pylint, testing, and releases to pypi. +* Preserving per-version documentation from the wiki in `docs/`. + + +# Truvari 2.1 +*Jan 27, 2021* + +We've expanded and improved Truvari's [annotations](https://github.com/spiralgenetics/truvari/wiki/anno). We've added an [SV "collapsing" tool](https://github.com/spiralgenetics/truvari/wiki/collapse). And we've added a way to [turn VCFs into pandas DataFrames](https://github.com/spiralgenetics/truvari/wiki/truv2df) easily for downstream analysis/QC. + +# Truvari 2.0 +*May 14, 2020* + +After performing a drastic code refactor, we were able to create several helper methods from Truvari's core functionality around SV comparisons and VCF manipulations. This reusable code gave us an opportunity to create tools relevant for SV analysis. + +Truvari now contains multiple subcommands. In addition to the original benchmarking functionality (`truvari bench`), Truvari can generate SV relevant summary statistics, compute consistency of calls within VCFs, and we've begun to develop annotations for SVs. Details on these tools are on the [WIKI](https://github.com/spiralgenetics/truvari/wiki). + +We are committed to continually improving Truvari with the hopes of advancing the study and analysis of structural variation. + +# Truvari 1.3 +*September 25th, 2019* + +Truvari has some big changes. In order to keep up with the o deement of Python 2.7 https://pythonclock.org/ +We're now only supporting Python 3. + +Additionally, we now package Truvari so it and its dependencies can be installed directly. See Installation +below. This will enable us to refactor the code for easier maintenance and reusability. + +Finally, we now automatically report genotype comparisons in the summary stats. \ No newline at end of file diff --git a/docs/v4.2.1/anno.md b/docs/v4.2.1/anno.md new file mode 100644 index 00000000..55835902 --- /dev/null +++ b/docs/v4.2.1/anno.md @@ -0,0 +1,494 @@ + +Truvari annotations: +* [gcpct](anno#truvari-anno-gcpct) - GC Percent +* [gtcnt](anno#truvari-anno-gtcnt) - Genotype Counts +* [trf](anno#truvari-anno-trf) - Tandem Repeats +* [grm](anno#truvari-anno-grm) - Mappability +* [repmask](anno#truvari-anno-repmask) - Repeats +* [remap](anno#truvari-anno-remap) - Allele Remapping +* [hompct](anno#truvari-anno-hompct) - Homozygous Percent +* [numneigh](anno#truvari-anno-numneigh) - Number of Neighbors +* [svinfo](anno#truvari-anno-svinfo) - SVINFO Fields +* [bpovl](anno#truvari-anno-bpovl) - Annotation Intersection +* [density](anno#truvari-anno-density) - Call Density +* [dpcnt](anno#truvari-anno-dpcnt) - Depth (DP) and Alt-Depth (AD) Counts +* [lcr](anno#truvari-anno-lcr) - Low-complexity Regions +* [grpaf](anno#truvari-anno-grpaf) - Sample Group Allele-Frequency Annotations + +# truvari anno gcpct + +This will add an INFO tag `GCPCT` to each element in a VCF of the GC percent of the call's sequence. + +For deletions, this is the GC percent of the reference range of the call. For insertions, the ALT sequence is analyzed. +``` +usage: gcpct [-h] [-o OUTPUT] -r REFERENCE [input] + +Annotates GC content of SVs + +positional arguments: + input VCF to annotate (stdin) + +options: + -h, --help show this help message and exit + -o OUTPUT, --output OUTPUT + Output filename (stdout) + -r REFERENCE, --reference REFERENCE + Reference fasta +``` + +# truvari anno gtcnt +This will add an INFO tag `GTCNT` to each element in a VCF with the count of genotypes found across all samples. The value is a list of Counts of genotypes for the allele across all samples (UNK, REF, HET, HOM). This is most useful for pVCFs. + +``` +usage: gtcnt [-h] [-o OUTPUT] [input] + +Annotates GTCounts of alleles + +positional arguments: + input VCF to annotate (stdin) + +options: + -h, --help show this help message and exit + -o OUTPUT, --output OUTPUT + Output filename (stdout) +``` + +# truvari anno trf +Adds a tandem repeat annotation to sequence resolved Insertion/Deletion variants a VCF. + +### Annotations added +| Field Name | Description | +|------------|-------------------------------------------------------------| +| TRF | Entry hits a tandem repeat region | +| TRFdiff | ALT TR copy difference from reference | +| TRFrepeat | Repeat motif | +| TRFovl | Percent of ALT covered by TRF annotation | +| TRFstart | tart position of discovered repeat | +| TRFend | End position of discovered repeat | +| TRFperiod | eriod size of the repeat | +| TRFcopies | Number of copies aligned with the consensus pattern | +| TRFscore | Alignment score | +| TRFentropy | Entropy measure | +| TRFsim | Similarity of ALT sequence to generated motif faux sequence | + +### Details +Given a set of tandem repeat regions and a VCF, this annotate the tandem repeat motif and copy number change of insertions and deletions as expansions/contraction. The expected input catalog of tandem repeats is from a subset of columns in the Adotto TR catalog ([link](https://github.com/ACEnglish/adotto/blob/main/regions/DataDescription.md)). This file can be formatted for `truvari anno trf` via: +```bash +zcat adotto_TRregions_v1.1.bed.gz | cut -f1-3,18 | bgzip > anno.trf.bed.gz +tabix anno.trf.bed.gz +``` +For deletions, the tool simply finds the motif annotation with the highest overlap over the variant's boundaries. It then removes that sequence from the reference and calculates how many copies of the motif are removed with the formula `round(-(ovl_pct * svlen) / anno["period"], 1)`. If a deletion overlaps multiple motifs, the highest scoring motif is chosen based on higher reciprocal overlap percent first and TRF score second (see [code](https://github.com/ACEnglish/truvari/blob/2219f52850252c18dcd8c679da6644bb1cee5b68/truvari/annotations/trf.py#L29)]. + +For insertions, by default the tool first tries to estimate which motif is contained in the alternate sequence. For each overlapping annotation, the copy number difference of the motif is calculated via `copy_diff = len(entry.alts[0][1:]) / anno["period"]`. Next, a 'feaux sequence' is made from `copy_diff` number of the motif. If the sequence is above the `--motif-similarity` with the insertion sequence, that is considered the insertion's motif. If no estimate is above the `--motif-similarity`, the insertion is incorporated into the reference and TRF is run. If the discovered TRF hits match a motif in the tandem repeat regions file, that annotation is used. If the highest scoring TRF hit doesn't match the tandem repeats region file, the nominally de novo annotation is added to the insertion's vcf entry. + +``` +usage: trf [-h] -i INPUT [-o OUTPUT] [-e EXECUTABLE] [-T TRF_PARAMS] -r REPEATS -f REFERENCE [-s MOTIF_SIMILARITY] + [-m MIN_LENGTH] [-R] [--no-estimate] [-C CHUNK_SIZE] [-t THREADS] [--debug] + +Intersect vcf with reference tandem repeats and annotate +variants with the best fitting repeat motif and its copy number +relative to the reference + +options: + -h, --help show this help message and exit + -i INPUT, --input INPUT + VCF to annotate + -o OUTPUT, --output OUTPUT + Output filename (stdout) + -e EXECUTABLE, --executable EXECUTABLE + Path to tandem repeat finder (trf409.linux64) + -T TRF_PARAMS, --trf-params TRF_PARAMS + Default parameters to send to trf (3 7 7 80 5 40 500 -h -ngs) + -r REPEATS, --repeats REPEATS + Reference repeat annotations + -f REFERENCE, --reference REFERENCE + Reference fasta file + -s MOTIF_SIMILARITY, --motif-similarity MOTIF_SIMILARITY + Motif similarity threshold (0.9) + -m MIN_LENGTH, --min-length MIN_LENGTH + Minimum size of entry to annotate (50) + -R, --regions-only Only write variants within --repeats regions (False) + --no-estimate Skip INS estimation procedure and run everything through TRF. (False) + -C CHUNK_SIZE, --chunk-size CHUNK_SIZE + Size (in mbs) of reference chunks for parallelization (5) + -t THREADS, --threads THREADS + Number of threads to use (1) + --debug Verbose logging +``` + +# truvari anno grm + +For every SV, we create a kmer over the the upstream and downstream reference and alternate breakpoints. +We then remap that kmer to the reference genome and report alignment information. +This does not alter the VCF traditional annotations, but instead will create a pandas +DataFrame and save it to a joblib object. + +There are four queries made per-SV. For both reference (r), alternate (a) we create upstream (up) and downstream (dn) kmers. +So the columns are all prefixed with one of "rup_", "rdn_", "aup_", "adn_". + +In the alignment information per-query, there are three 'hit' counts: +- nhits : number of query hits +- dir_hits : direct strand hit count +- com_hits : compliment strand hit count + +The rest of the alignment information is reported by average (avg), maximum (max), and minimum (min) + +The suffixes are: +- q : mapping quality score of the hits +- ed : edit distance of the hits +- mat : number of matches +- mis : number of mismatches + +For example, "aup_avg_q", is the alternate's upstream breakend kmer's average mapping quality score. + +``` +usage: grm [-h] -i INPUT -r REFERENCE [-R REGIONS] [-o OUTPUT] [-k KMERSIZE] [-m MIN_SIZE] [-t THREADS] [--debug] + +Maps graph edge kmers with BWA to assess Graph Reference Mappability + +options: + -h, --help show this help message and exit + -i INPUT, --input INPUT + Input VCF + -r REFERENCE, --reference REFERENCE + BWA indexed reference + -R REGIONS, --regions REGIONS + Bed file of regions to parse (None) + -o OUTPUT, --output OUTPUT + Output dataframe (results.jl) + -k KMERSIZE, --kmersize KMERSIZE + Size of kmer to map (50) + -m MIN_SIZE, --min-size MIN_SIZE + Minimum size of variants to map (25) + -t THREADS, --threads THREADS + Number of threads (1) + --debug Verbose logging +``` + +# truvari anno repmask + +``` +usage: repmask [-h] -i INPUT [-o OUTPUT] [-e EXECUTABLE] [-m MIN_LENGTH] [-M MAX_LENGTH] [-t THRESHOLD] [-p PARAMS] [-T THREADS] + [--debug] + + Wrapper around RepeatMasker to annotate insertion sequences in a VCF + +options: + -h, --help show this help message and exit + -i INPUT, --input INPUT + VCF to annotate (None) + -o OUTPUT, --output OUTPUT + Output filename (/dev/stdout) + -e EXECUTABLE, --executable EXECUTABLE + Path to RepeatMasker (RepeatMasker) + -m MIN_LENGTH, --min-length MIN_LENGTH + Minimum size of entry to annotate (50) + -M MAX_LENGTH, --max-length MAX_LENGTH + Maximum size of entry to annotate (50000) + -t THRESHOLD, --threshold THRESHOLD + Threshold for pct of allele covered (0.8) + -p PARAMS, --params PARAMS + Default parameter string to send to RepeatMasker (-pa {threads} -qq -e hmmer -species human -lcambig + -nocut -div 50 -no_id -s {fasta}) + -T THREADS, --threads THREADS + Number of threads to use (1) + --debug Verbose logging +``` + +# truvari anno remap + +Taking the Allele’s sequence, remap it to the reference and annotate based on the closest alignment. + +![](https://github.com/spiralgenetics/truvari/blob/develop/imgs/remap_example.png) + +``` +usage: remap [-h] -r REFERENCE [-o OUTPUT] [-m MINLENGTH] [-t THRESHOLD] [-d DIST] [-H HITS] [--debug] [input] + +Remap VCF'S alleles sequence to the reference to annotate REMAP + +- novel : Allele has no hits in reference +- tandem : Allele's closest hit is within len(allele) bp of the SV's position +- interspersed : Allele's closest hit is not tandem +- partial : Allele only has partial hit(s) less than --threshold + +Which alleles and alignments to consider can be altered with: +- --minlength : minimum SV length to considred (50) +- --dist : For deletion SVs, do not consider alignments that hit within Nbp of the SV's position +(a.k.a. alignments back to the source sequence) (10) +- --threshold : Minimum percent of allele's sequence used by alignment to be considered (.8) + +positional arguments: + input Input VCF (/dev/stdin) + +options: + -h, --help show this help message and exit + -r REFERENCE, --reference REFERENCE + BWA indexed reference + -o OUTPUT, --output OUTPUT + Output VCF (/dev/stdout) + -m MINLENGTH, --minlength MINLENGTH + Smallest length of allele to remap (50) + -t THRESHOLD, --threshold THRESHOLD + Threshold for pct of allele covered to consider hit (0.8) + -d DIST, --dist DIST Minimum distance an alignment must be from a DEL's position to be considered (10)) + -H HITS, --hits HITS Report top hits as chr:start-end.pct (max 0) + --debug Verbose logging +``` +# truvari anno hompct + +``` +usage: hompct [-h] -i INPUT [-o OUTPUT] [-b BUFFER] [-m MINANNO] [-M MAXGT] [-c MINCOUNT] [--debug] + +Calcluate the the Hom / (Het + Hom) of variants in the region of SVs +Requires the VCF to contain SVs beside SNPs/Indels + +options: + -h, --help show this help message and exit + -i INPUT, --input INPUT + Compressed, indexed VCF to annotate + -o OUTPUT, --output OUTPUT + Output filename (stdout) + -b BUFFER, --buffer BUFFER + Number of base-pairs up/dn-stream to query (5000) + -m MINANNO, --minanno MINANNO + Minimum size of event to annotate (50) + -M MAXGT, --maxgt MAXGT + Largest event size to count for genotyping (1) + -c MINCOUNT, --mincount MINCOUNT + Minimum number of genotyping events to report HOMPCT (0) + --debug Verbose logging +``` + +# truvari anno numneigh + +``` +usage: numneigh [-h] [-o OUTPUT] [-r REFDIST] [-s SIZEMIN] [--passonly] [--debug] [input] + +For every call within size boundaries, +Add NumNeighbors info field of how many calls are within the distance +Add NeighId clustering field in the same chained neighborhood +For example, +:: + -- is a call, refdist is 2 + - - - - - - + nn: 1 2 1 0 1 1 + id: 0 0 0 1 2 2 + +positional arguments: + input VCF to annotate + +options: + -h, --help show this help message and exit + -o OUTPUT, --output OUTPUT + Output vcf (stdout) + -r REFDIST, --refdist REFDIST + Max reference location distance (1000) + -s SIZEMIN, --sizemin SIZEMIN + Minimum variant size to consider for annotation (50) + --passonly Only count calls with FILTER == PASS + --debug Verbose logging +``` + +# truvari anno svinfo + +Uses `truvari.entry_size` and `truvari.entry_variant_type` on entries >= `args.minsize` to add 'SVLEN' and ‘SVTYPE’ annotations to a VCF’s INFO. + +How SVLEN is determined: +- Starts by trying to use INFO/SVLEN +- If SVLEN is unavailable and ALT field is an SV (e.g. \, \, etc), use abs(vcf.start - vcf.end). The INFO/END tag needs to be available, especially for INS. +- Otherwise, return the size difference of the sequence resolved call using abs(len(vcf.REF) - len(str(vcf.ALT[0]))) + +How SVTYPE is determined: +- Starts by trying to use INFO/SVTYPE +- If SVTYPE is unavailable, infer if entry is a insertion or deletion by looking at the REF/ALT sequence size differences +- If REF/ALT sequences are not available, try to parse the \, \, etc from the ALT column. +- Otherwise, assume 'UNK' + +``` +usage: svinfo [-h] [-o OUTPUT] [-m MINSIZE] [input] + +Adds SVTYPE and SVLEN INFO fields + +positional arguments: + input VCF to annotate (stdin) + +options: + -h, --help show this help message and exit + -o OUTPUT, --output OUTPUT + Output filename (stdout) + -m MINSIZE, --minsize MINSIZE + Minimum size of entry to annotate (50) +``` + +# truvari anno bpovl + +After turning a tab-delimited annotation file into an IntervalTree, intersect the start/end and overlap of SVs. +The output is a light-weight pandas DataFrame saved with joblib. The columns in the output are: + +- vcf_key : Variant key from `truvari.entry_to_key` +- intersection : Type of intersection between the SV and the annotation + - start_bnd - SV's start breakpoints hits the annotation + - end_bnd - SV's end breakpoint hits the annotation + - overlaps - Annotation's start/end boundaries are completely within the SV + - contains - Annotation's start/end boundaries span the entire SV +- anno_key : Annotation file's line index + +The idea with this tool is to annotate variants against tab-delimited files, especially when there's a 1-to-N variant to annotations. This tool is useful when used in conjunction with `truvari vcf2df` and pandas DataFrames. + +For example, if we have a VCF of SVs and a GTF of genes/gene features from Ensmbl. Any SV may intersect multiple features, which doesn't lend itself well to directly annotating the VCF's INFO. After using `bpovl`, we'll use Truvari to convert the SVs to a DataFrame. + +```bash +truvari anno bpovl -i variants.vcf.gz -a genes.gtf.gz -o anno.jl -p gff +truvari vcf2df variants.vcf.gz variants.jl +``` + +We can then treat the files similar to a database and do queries and joins to report which variants intersect which annotations. + +```python +import joblib +from gtfparse import read_gtf +variants = joblib.load("variants.jl") +genes = read_gtf("genes.gtf.gz") +annos = joblib.load("anno.jl") +to_check = annos.iloc[0] + +print(to_check) +# vcf_key chr20:958486-958487.A +# intersection start_bnd +# anno_key 11 + +print(variants.loc[to_check['vcf_key']]) +# id None +# svtype INS +# ... etc + +print(annos.loc[to_check['anno_key']]) +# seqname chr20 +# source ensembl_havana +# feature exon +# start 958452 +# ... etc +``` + +Similar to tabix, `bpovl` has presets for known file types like bed and gff. But any tab-delimited file with sequence/chromosome, start position, and end position can be parsed. Just set the "Annotation File Arguments" to the 0-based column indexes. For example, a bed file +has arguments `-s 0 -b 1 -e 2 -c #`. + +``` +usage: bpovl [-h] -a ANNO -o OUTPUT [--sizemin SIZEMIN] [--spanmax SPANMAX] [-p {bed,gff}] [-c COMMENT] [-s SEQUENCE] [-b BEGIN] + [-e END] [-1] + [input] + +Creates intersection of features in an annotation file with SVs' breakpoints and overlap + +positional arguments: + input VCF to annotate (stdin) + +options: + -h, --help show this help message and exit + -a ANNO, --anno ANNO Tab-delimited annotation file + -o OUTPUT, --output OUTPUT + Output joblib DataFrame + --sizemin SIZEMIN Minimum size of variant to annotate (50) + --spanmax SPANMAX Maximum span of SVs to annotate (50000) + +Annotation File Arguments: + -p {bed,gff}, --preset {bed,gff} + Annotation format. This option overwrites -s, -b, -e, -c and -1 (None) + -c COMMENT, --comment COMMENT + Skip lines started with character. (#) + -s SEQUENCE, --sequence SEQUENCE + Column of sequence/chromosome name. (0) + -b BEGIN, --begin BEGIN + Column of start chromosomal position. (1) + -e END, --end END Column of end chromosomal position. (2) + -1, --one-based The position in the anno file is 1-based rather than 0-based. (False) +``` +# truvari anno density +Partitions a `--genome` into `--windowsize` regions and count how many variants overlap. Annotate +regions with no variants as 'sparse' and with greater than or equal to (mean + `--threshold` * standard +deviation) number of variants as 'dense'. Outputs a joblib DataFrame with columns +`chrom, start, end, count, anno`. + +``` +usage: density [-h] -g GENOME -o OUTPUT [-m MASK] [-w WINDOWSIZE] [-s STEPSIZE] [-t THRESHOLD] [input] + +Identify 'dense' and 'sparse' variant windows of the genome + +positional arguments: + input Input VCF (/dev/stdin) + +optional arguments: + -h, --help show this help message and exit + -g GENOME, --genome GENOME + Genome bed file + -o OUTPUT, --output OUTPUT + Output joblib DataFrame + -m MASK, --mask MASK Mask bed file + -w WINDOWSIZE, --windowsize WINDOWSIZE + Window size (10000) + -s STEPSIZE, --stepsize STEPSIZE + Window step size (10000) + -t THRESHOLD, --threshold THRESHOLD + std for identifying 'dense' regions (3) +``` + +# truvari anno dpcnt + +For multi-sample VCFs, it is often useful to have summarized depth (DP) information across samples per-variant. This adds a `INFO/DPCNT` with counts of how many samples have `FORMAT/DP` for each of the user-defined bins. Bins are incremented using `bisect` e.g. `pos = bisect.bisect(bins, dp); bins[pos] += 1; + +``` +usage: dpcnt [-h] [-b BINS] [--no-ad] [-p] [-o OUTPUT] [input] + +Quick utility to count how many samples have >= Nx coverage per-variant + +positional arguments: + input VCF to annotate (stdin) + +options: + -h, --help show this help message and exit + -b BINS, --bins BINS Coverage bins to bisect left the counts (0,5,10,15) + --no-ad Skip adding ADCNT bins + -p, --present Only count sites with present (non ./.) genotypes + -o OUTPUT, --output OUTPUT + Output filename (stdout) +``` + +# truvari anno lcr + +``` +usage: lcr [-h] [-o OUTPUT] [input] + +Annotate low complexity region entropy score for variants +Credit: https://jszym.com/blog/dna_protein_complexity/ + +positional arguments: + input VCF to annotate (stdin) + +options: + -h, --help show this help message and exit + -o OUTPUT, --output OUTPUT + Output filename (stdout) +``` + +# truvari anno grpaf + +Add INFO tags of allele frequency annotations for groups of samples. For every group in `--labels` tab-delimited file, calculate the AF,MAF,ExcHet,HWE,MAC,AC for the samples in the group. Adds INFO tags with suffix of the group identifier (e.g. `AF_EAS`). `--strict` will hard fail if there are samples in the `--labels` not present in the vcf header. + +``` +usage: grpaf [-h] [-o OUTPUT] -l LABELS [-t TAGS] [--strict] [--debug] [input] + +Add allele frequency annotations for subsets of samples + +positional arguments: + input VCF to annotate + +options: + -h, --help show this help message and exit + -o OUTPUT, --output OUTPUT + Output filename (stdout) + -l LABELS, --labels LABELS + Tab-delimited file of sample and group + -t TAGS, --tags TAGS Comma-separated list of tags to add from AF,MAF,ExcHet,HWE,MAC,AC,AN (all) + --strict Exit if sample listed in labels is not present in VCF (False) + --debug Verbose logging +``` \ No newline at end of file diff --git a/docs/v4.2.1/bench.md b/docs/v4.2.1/bench.md new file mode 100644 index 00000000..0d027cb6 --- /dev/null +++ b/docs/v4.2.1/bench.md @@ -0,0 +1,293 @@ + +Quick start +=========== +Run this command where base is your 'truth set' SVs and comp is the comparison set of SVs. +```bash +truvari bench -b base_calls.vcf -c comp_calls.vcf -o output_dir/ +``` + +Matching Parameters +=================== +Picking matching parameters can be more of an art than a science. It really depends on the precision of your callers and the tolerance you wish to allow them such that it is a fair comparison. + +For example, depth of coverage callers (such as CNVnator) will have very 'fuzzy' boundaries, and don't report the exact deleted sequence but only varying regions. So thresholds of `pctseq=0`, `pctsize=.5`, `pctovl=.5`, `refdist=1000` may seem fair. + +[BioGraph](https://github.com/spiralgenetics/biograph) and many long-read callers report precise breakpoints and full alternate allele sequences. When benchmarking those results, we want to ensure our accuracy by using the stricter default thresholds. + +If you're still having trouble picking thresholds, it may be beneficial to do a few runs of Truvari bench over different values. Start with the strict defaults and gradually increase the leniency. From there, you can look at the performance metrics and manually inspect differences between the runs to find out what level you find acceptable. Truvari is meant to be flexible for comparison. More importantly, Truvari helps one clearly report the thresholds used for reproducibility. + +Here is a rundown of each matching parameter. +| Parameter | Default | Definition | +|------------|---------|------------| +| refdist | 500 | Maximum distance comparison calls must be within from base call's start/end | +| pctseq | 0.7 | Edit distance ratio between the REF/ALT haplotype sequences of base and
comparison call. See "Comparing Sequences of Variants" below. | +| pctsize | 0.7 | Ratio of min(base_size, comp_size)/max(base_size, comp_size) | +| pctovl | 0.0 | Ratio of two calls' (overlapping bases)/(longest span) | +| typeignore | False | Types don't need to match to compare calls. | + +Below are matching parameter diagrams to illustrate (approximately) how they work. + +``` + █ = Deletion ^ = Insertion + +--refdist REFDIST (500) + Max reference location distance + + ACTGATCATGAT + |--████--| + █████ + + Calls are within reference distance of 2 + +--pctsize PCTSIZE (0.7) + Min pct allele size similarity + + ACTGATCATGA sizes + █████ -> 5bp + ████ -> 4bp + + variants have 0.8 size similarity + + +--pctovl PCTOVL (0.0) + Min pct reciprocal overlap + + ACTGATCATGA ranges + █████ [2,7) + ████ [4,8) + + variants have 0.6 reciprocial overlap + + +--pctseq PCTSEQ (0.7) + Min percent allele sequence similarity + + A-CTG-ACTG + ^ ^ haplotypes + | └ACTG -> CTGACTGA + └CTGA -> CTGACTGA + + haplotypes have 100% sequence similarity +``` + +Outputs +======= +Truvari bench writes the following files to the `--output` directory. +| File | Description | +|----------------------|---------------------------------------------| +| tp-base.vcf.gz | True positive calls form the base VCF | +| tp-comp.vcf.gz | True positive calls from the comparison VCF | +| fp.vcf.gz | False positive calls from comparison | +| fn.vcf.gz | False negative calls from base | +| summary.json | Json output of performance stats | +| params.json | Json output of parameters used | +| candidate.refine.bed | Bed file of regions for `refine` | +| log.txt | Run's log | + +summary.json +------------ +Stats generated by benchmarking are written to `summary.json`. + +| Metric | Definition | +|----------------|------------------------------------------------------------| +| TP-base | Number of matching calls from the base vcf | +| TP-comp | Number of matching calls from the comp vcf | +| FP | Number of non-matching calls from the comp vcf | +| FN | Number of non-matching calls from the base vcf | +| precision | TP-comp / (TP-comp + FP) | +| recall | TP-base / (TP-base + FN) | +| f1 | 2 * ((recall * precision) / (recall + precision)) | +| base cnt | Number of calls in the base vcf | +| comp cnt | Number of calls in the comp vcf | +| TP-comp_TP-gt | TP-comp with genotype match | +| TP-comp_FP-gt | TP-comp without genotype match | +| TP-base_TP-gt | TP-base with genotype match | +| TP-base_FP-gt | TP-base without genotype match | +| gt_concordance | TP-comp_TP-gt / (TP-comp_TP-gt + TP-comp_FP-gt) | +| gt_matrix | Base GT x Comp GT Matrix of all Base calls' best, TP match | +| weighted | Metrics weighed by variant sequence/size similarity | + +The `gt_matrix` is a table. For example: +```json +"gt_matrix": { + "(0, 1)": { + "(0, 1)": 500, + "(1, 1)": 10 + }, + "(1, 1)": { + "(1, 1)": 800, + "(0, 1)": 20 + } +} +``` +Represents -> +``` +comp (0,1) (1,1) +base +(0,1) 500 10 +(1,1) 20 800 +``` + +Added annotations +----------------- +The output vcfs are annotated with INFO fields and then sorted, compressed, and indexed inside of the output directory. + +| Anno | Definition | +|-------------------|-----------------------------------------------------------------------------------------------------------------| +| TruScore | Truvari score for similarity of match. `((pctseq + pctsize + pctovl) / 3 * 100)` | +| PctSeqSimilarity | Pct sequence similarity between this variant and its closest match | +| PctSizeSimilarity | Pct size similarity between this variant and its closest match | +| PctRecOverlap | Percent reciprocal overlap percent of the two calls | +| StartDistance | Distance of the base call's start from comparison call's start | +| EndDistance | Distance of the base call's end from comparison call's end | +| SizeDiff | Difference in size of base and comp calls | +| GTMatch | Base/comp calls' Genotypes match | +| MatchId | Id to help tie base/comp calls together {chunkid}.{baseid}.{compid} See [[MatchIds wiki\|MatchIds]] for details. | + + +Refining bench output +===================== +As described in the [[refine wiki|refine]], a limitation of Truvari bench is 1-to-1 variant comparison. However, `truvari refine` can harmonize the variants to give them more consistent representations. A bed file named `candidate.refine.bed` is created by `truvari bench` and holds a set of regions which may benefit from refinement. To use it, simply run +```bash +truvari bench -b base.vcf.gz -c comp.vcf.gz -o result/ +truvari refine --regions result/candidate.refine.bed \ + --reference reference.fasta \ + --recount --use-region-coords \ + result/ +``` +See [[refine wiki|refine]] for details. + +Comparing Sequences of Variants +=============================== + +Truvari has implemented two approaches to compare variant sequences. The default comparison is called 'unroll'. Optionally, a `--reference` can be provided and Truvari will use the reference context of a pair of variants for comparison. + +## Unroll +The method of giving a pair of calls the same reference context can be achieved using an 'unroll' method. For a formal description, see [this gist](https://gist.github.com/ACEnglish/1e7421c46ee10c71bee4c03982e5df6c). + +The main idea is that in order to move variants upstream/downstream, the reference sequence flanking the variant will need to be moved downstream/upstream respectively. Or, to say this another way, we can think of the alternate sequences as being circular instead of linear. This means that in order to move the variant e.g. 1bp downstream for an INS, we could remove the first base from the ALT and append it to the end. So in the 'ab' example used to describe "Reference context" below, we only need to unroll the insertion at a position by the distance between it and another variant e.g. the INS `ab` at POS 2 becomes identical to the INS `ba` at POS 1 by rolling `2-1 = 1` bases from the start to the end. + +This unroll method has a number of benefits and a couple of considerations, including: +* not needing a `--reference` for comparison, which saves I/O time +* increasing the number of correctly matching SVs +* decreasing the number of 'suspect' matches in smaller size regimes +* providing a simpler pattern between PctSizeSimilarity and PctSeqSimilarity + +## Reference context +For the reference context method, consider a hypothetical tandem repeat expansion of the reference sequence 'AB'. Here, we'll represent the 'insertion' sequence as lower-case 'ab', though it should be considered equivalent to 'AB'. Three equally valid descriptions of this +variant would be: + +```text +#POS INS Haplotype + 0 ab abAB + 1 ba AbaB + 2 ab ABab +``` + +Therefore, to compare the sequence similarity, Truvari builds the haplotypes over the range of a pair of calls' +`min(starts):max(ends)` before making the the sequence change introduced by the variants. In python, this line +looks like: + +``` python +hap1_seq = ref.get_seq(a1_chrom, start + 1, a1_start).seq + a1_seq + ref.get_seq(a1_chrom, a1_end + 1, end).seq +``` + +Where `a1_seq1` is the longer of the REF or ALT allele. + +## SVs without sequences + +If the base or comp vcfs do not have sequence resolved calls (e.g. ``, simply set `--pctseq=0` to turn off +sequence comparison. The `--reference` does not need to be provided when not using sequence comparison. If +`--pctseq != 0` and an unresolved SV is encountered, a warning will be raised and the variant will not be compared. + +Controlling the number of matches +================================= + +How many matches a variant is allowed to participate in is controlled by the `--pick` parameter. The available pickers are `single`, `ac`, and `multi`. + +* `single` (the default option) allows each variant to participate in up to one match. +* `ac` uses the genotype allele count to control how many matches a variant can have. This means a homozygous alternate variant can participate in two matches (its GT is 1/1 so AC=2). A heterozygous variant can only participate in one match (GT 0/1, AC=1). And, a homozygous reference variant cannot be matched. Note that missing genotypes are considered reference alleles and do not add to the AC e.g. (GT ./1, AC=1). +* `multi` variants can participate in all matches available. + +As an example, imagine we have three variants in a pVCF with two samples we want to compare. + +``` +CHROM POS ID REF ALT base comp +chr20 17785968 ID1 A ACGCGCGCGCG 1/1 1/0 +chr20 17785968 ID2 A ACGCGCGCGCGCG 0/0 0/1 +chr20 17785969 ID3 C CGCGCGCGCGCGC 0/0 1/1 +``` + +To compare samples inside the same vcf, we would use the command: +```bash +truvari bench -b input.vcf.gz -c input.vcf.gz -o output/ --bSample base --cSample comp --no-ref a +``` + +This VCF makes different results depending on the `--pick` parameter + +| Parameter | ID1 State | ID2 State | ID3 State | +|-----------|-----------|-----------|-----------| +| single | TP | FP | FP | +| ac | TP | TP | FP | +| multi | TP | TP | TP | + +--dup-to-ins +============ + +Most SV benchmarks only report DEL and INS SVTYPEs. The flag `--dup-to-ins` will interpret SVs with SVTYPE == DUP to SVTYPE == INS. Note that DUPs generally aren't sequence resolved (i.e. the ALT isn't a sequence) like INS. Therefore, `--dup-to-ins` typically should be used without sequence comparison via `--pctseq 0` + +--sizemin and --sizefilt +======================== + +`--sizemin` is the minimum size of a base call to be considered. + +`--sizefilt` is the minimum size of a comparison call that will be matched to base calls. It can +be less than `sizemin` for edge case variants. + +For example: Imagine `sizemin` is set at 50 and `sizefilt` at 30, and a 50bp base call is 98% similar to a 49bp comparison +call at the same position. + +These two calls could be considered matching. However, if we removed comparison calls less than `sizemin`, +we'd incorrectly classify the 50bp base call as a false negative. Instead, we allow comparison calls between `[sizefilt,sizemin)` to find matches. + +This has the side effect of artificially inflating specificity. For example, if that same 49bp call described +above were below the similarity threshold, it would not be classified as a FP since it is below the `sizemin` +threshold. So we're giving the call a better chance to be useful and less chance to be detrimental +to final statistics. + +Include Bed & VCF Header Contigs +================================ + +If an `--includebed` is provided, only base and comp calls contained within the defined regions are used +for comparison. This is similar to pre-filtering your base/comp calls using: + +```bash +(zgrep "#" my_calls.vcf.gz && bedtools intersect -u -a my_calls.vcf.gz -b include.bed) | bgzip > filtered.vcf.gz +``` + +with the exception that Truvari requires the start and the end to be contained in the same includebed region +whereas `bedtools intersect` does not. + +If an `--includebed` is not provided, the comparison is restricted to only the contigs present in the base VCF +header. Therefore, any comparison calls on contigs not in the base calls will not be counted toward summary +statistics and will not be present in any output vcfs. + +Extending an Include Bed +------------------------ +The option `--extend` extends the regions of interest (set in `--includebed` argument) by the given number of bases on each side, allowing base variants to match comparison variants that are just outside of the original region. If a comparison variant is in the extended regions it can potentially match a base variant that is in the original regions turning it to TP. Comparison variants in the extended regions that don't have a match are not counted as FP. This strategy is similar to the one implemented for size matching where only the base variants longer than sizemin (equal to 50 by default) are considered, but they are allowed to match shorter comparison variants sizefilt (30bp by default) or longer. + +See this [discussion](https://github.com/ACEnglish/truvari/discussions/99)for details. + +Methodology +=========== +Here is a high-level pseudocode description of the steps Truvari bench conducts to compare the two VCFs. +``` +* zip the Base and Comp calls together in sorted order +* create chunks of all calls overlapping within ±`--chunksize` basepairs +* make a |BaseCall| x |CompCall| match matrix for each chunk +* build a Match for each call pair in the chunk - annotate as TP if >= all thresholds +* if the chunk has no Base or Comp calls +** return them all as FNs/FPs +* use `--pick` method to sort and annotate variants with their best match +``` +![](https://github.com/acenglish/truvari/blob/develop/imgs/TruvariBenchMethod.png) \ No newline at end of file diff --git a/docs/v4.2.1/collapse.md b/docs/v4.2.1/collapse.md new file mode 100644 index 00000000..f660f340 --- /dev/null +++ b/docs/v4.2.1/collapse.md @@ -0,0 +1,163 @@ +`collapse` is Truvari's approach to SV merging. After leveraging `bcftools` to merge VCFs, `truvari collapse` can then iterate over the calls and create clusters of SVs that match over the [provided thresholds](https://github.com/spiralgenetics/truvari/wiki/bench#matching-parameters). This is also useful when removing potentially redundant calls within a single sample. + +Example +======= +To start, we merge multiple VCFs (each with their own sample) and ensure there are no multi-allelic entries via: +```bash +bcftools merge -m none one.vcf.gz two.vcf.gz | bgzip > merge.vcf.gz +``` + +This will `paste` SAMPLE information between vcfs when calls have the exact same chrom, pos, ref, and alt. +For example, consider two vcfs: + + >> one.vcf: + chr1 1 ... GT 0/1 + chr1 5 ... GT 1/1 + >> two.vcf: + chr1 1 ... GT 1/1 + chr1 7 ... GT 0/1 + +`bcftools merge` creates: + + >> merge.vcf: + chr1 1 ... GT 0/1 1/1 + chr1 5 ... GT 1/1 ./. + chr1 7 ... GT ./. 0/1 + +This VCF can then be collapsed to allow 'fuzzier' matching than the exact merge just performed. + +```bash +truvari collapse -i merge.vcf.gz -o truvari_merge.vcf -c truvari_collapsed.vcf -f /path/to/reference.fa +``` + +For example, if we collapsed our example merge.vcf by matching any calls within 3bp, we'd create: + + >> truvari_merge.vcf + chr1 1 ... GT 0/1 1/1 + chr1 5 ... GT 1/1 0/1 + >> truvari_collapsed.vcf + chr1 7 ... GT ./. 0/1 + +--choose behavior +================= +When collapsing, the default `--choose` behavior is to take the first variant from a cluster to +be written to the output while the others will be placed in the collapsed output. +Other choosing options are `maxqual` (the call with the highest quality score) or `common` (the call with the highest minor allele count). + +Samples with no genotype information in the kept variant will be filled by the first +collapsed variant containing genotype information. + +--gt +==== +For some results, one may not want to collapse variants with conflicting genotypes from a single sample. With the `--gt all` parameter, variants which are present (non `0/0` or `./.`) in the same sample are not collapsed. With the `-gt het` parameter, only variants which are both heterozygous in a sample (e.g. `0/1` and `0/1`) are prevented from collapsing. The `--gt het` is useful for some SV callers which will redundantly call variants and typically genotype them all as `1/1`. + +--intra +======= +When a single sample is run through multiple SV callers, one may wish to consolidate those results. After the `bcftools merge` of the VCFs, there will be one SAMPLE column per-input. With `--intra`, collapse will consolidate the sample information so that only a single sample column is present in the output. Since the multiple callers may have different genotypes or other FORMAT fields with conflicting information, `--intra` takes the first column from the VCF, then second, etc. For example, if we have an entry with: +``` +FORMAT RESULT1 RESULT2 +GT:GQ:AD ./.:.:3,0 1/1:20:0,30 +``` +The `--intra` output would be: +``` +FORMAT RESULT1 +GT:GQ:AD 1/1:20:3,0 +``` +As you can see in this example, 1) The first sample name is the only one preserved. 2) conflicting FORMAT fields can be consolidated in a non-useful way (here the AD of `3,0` isn't informative to a `1/1` genotype). We're working to provide an API to help users write custom intra-sample consolidation scripts. + +--hap mode +========== +When using `--hap`, we assume phased variants from a single individual. Only the +single best matching call from the other haplotype will be collapsed, +and the consolidated genotype will become 1/1 + +For example, if we collapse anything at the same position: + + chr1 1 .. GT 0|1 + chr1 1 .. GT 1|0 + chr1 2 .. GT 1|0 + +will become: + + chr1 1 .. GT 1/1 + chr1 2 .. GT 1|0 + +--chain mode +============ +Normally, every variant in a set of variants that are collapsed together matches every other variant in the set. However, when using `--chain` mode, we allow 'transitive matching'. This means that all variants match to only at least one other variant in the set. In situations where a 'middle' variant has two matches that don't match each other, without `--chain` the locus will produce two variants whereas using `--chain` will produce one. +For example, if we have + + chr1 5 .. + chr1 7 .. + chr1 9 .. + +When we collapse anything within 2bp of each other, without `--chain`, we output: + + chr1 5 .. + chr1 9 .. + +With `--chain`, we would collapse `chr1 9` as well, producing + + chr1 5 .. + +Annotations +=========== +`collapse` produces two files. The output file has kept variants along with unanalyzed (< sizemin) variants. The collapsed file contains the variants that were collapsed into the kept variants. + +The output file has only two annotations added to the `INFO`. +- `CollapseId` - Identifier of the variant when comparing to the collapse outputs. +- `NumCollapsed` - Number of variants collapsed into this variant +- `NumConsolidated` - Number of samples' genotypes consolidated into this call's genotypes + +The collapsed file has all of the annotations added by [[bench|bench#definition-of-annotations-added-to-tp-vcfs]]. Note that `MatchId` is tied to the output file's `CollapseId`. See [MatchIds](https://github.com/spiralgenetics/truvari/wiki/MatchIds) for details. + +``` +usage: collapse [-h] -i INPUT [-o OUTPUT] [-c COLLAPSED_OUTPUT] [-f REFERENCE] [-k {first,maxqual,common}] [--debug] + [-r REFDIST] [-p PCTSIM] [-B MINHAPLEN] [-P PCTSIZE] [-O PCTOVL] [-t] [--use-lev] [--hap] [--chain] + [--no-consolidate] [--null-consolidate NULL_CONSOLIDATE] [-s SIZEMIN] [-S SIZEMAX] [--passonly] + +Structural variant collapser + +Will collapse all variants within sizemin/max that match over thresholds + +options: + -h, --help show this help message and exit + -i INPUT, --input INPUT + Comparison set of calls + -o OUTPUT, --output OUTPUT + Output vcf (stdout) + -c COLLAPSED_OUTPUT, --collapsed-output COLLAPSED_OUTPUT + Where collapsed variants are written (collapsed.vcf) + -f REFERENCE, --reference REFERENCE + Indexed fasta used to call variants + -k {first,maxqual,common}, --keep {first,maxqual,common} + When collapsing calls, which one to keep (first) + --debug Verbose logging + --hap Collapsing a single individual's haplotype resolved calls (False) + --chain Chain comparisons to extend possible collapsing (False) + --no-consolidate Skip consolidation of sample genotype fields (True) + --null-consolidate NULL_CONSOLIDATE + Comma separated list of FORMAT fields to consolidate into the kept entry by taking the first non-null + from all neighbors (None) + +Comparison Threshold Arguments: + -r REFDIST, --refdist REFDIST + Max reference location distance (500) + -p PCTSIM, --pctsim PCTSIM + Min percent allele sequence similarity. Set to 0 to ignore. (0.95) + -B MINHAPLEN, --minhaplen MINHAPLEN + Minimum haplotype sequence length to create (50) + -P PCTSIZE, --pctsize PCTSIZE + Min pct allele size similarity (minvarsize/maxvarsize) (0.95) + -O PCTOVL, --pctovl PCTOVL + Min pct reciprocal overlap (0.0) for DEL events + -t, --typeignore Variant types don't need to match to compare (False) + --use-lev Use the Levenshtein distance ratio instead of edlib editDistance ratio (False) + +Filtering Arguments: + -s SIZEMIN, --sizemin SIZEMIN + Minimum variant size to consider for comparison (50) + -S SIZEMAX, --sizemax SIZEMAX + Maximum variant size to consider for comparison (50000) + --passonly Only consider calls with FILTER == PASS +``` \ No newline at end of file diff --git a/docs/v4.2.1/consistency.md b/docs/v4.2.1/consistency.md new file mode 100644 index 00000000..17ba23b7 --- /dev/null +++ b/docs/v4.2.1/consistency.md @@ -0,0 +1,179 @@ + +In addition to looking at performance of a single set of variation against a baseline, one may wish to measure the consistency between multiple sets of variation. The tool `truvari consistency` can automatically create that result. + +Running +======= + +``` +usage: consistency [-h] [-d] [-j] [-o OUTPUT] VCFs [VCFs ...] + +Over multiple vcfs, calculate their intersection/consistency. + +Calls will match between VCFs if they have a matching key of: + CHROM POS ID REF ALT + +positional arguments: + VCFs VCFs to intersect + +optional arguments: + -h, --help show this help message and exit + -d, --no-dups Disallow duplicate SVs + -j, --json Output report in json format + -o OUTPUT, --output OUTPUT + Write tsv of variant keys and their flag +``` + +Example +======= + +```bash +truvari consistency fileA.vcf fileB.vcf fileC.vcf +``` + +Matching Entries +================ + +VCF entries will be considered matching if and only if they have an exact same key of `CHROM POS ID REF ALT`. Because of this stringency, it is recommend that you compare the tp-base.vcf or fn.vcf results from each individual VCF's Truvari output. The key and flags can be written with the `--output` option. + +Duplicates +========== + +If there are VCFs with duplicate keys, they are handled by appending a e.g. `.1` to the key. If you'd like to ignore duplicates, just add `--no-dups` + +Output Report +============= + +Below is an example report: + +```text +# +# Total 5534 calls across 3 VCFs +# +#File NumCalls +fileA.vcf 4706 +fileB.vcf 4827 +fileC.vcf 4882 +# +# Summary of consistency +# +#VCFs Calls Pct +3 3973 71.79% +2 935 16.90% +1 626 11.31% +# +# Breakdown of VCFs' consistency +# +#Group Total TotalPct PctOfFileCalls +111 3973 71.79% 84.42% 82.31% 81.38% +011 351 6.34% 7.27% 7.19% +101 308 5.57% 6.54% 6.31% +110 276 4.99% 5.86% 5.72% +001 250 4.52% 5.12% +010 227 4.10% 4.70% +100 149 2.69% 3.17% +``` + +At the top we see that we compared 5,534 unique variants between the 3 files, with fileC.vcf having the most calls at 4,882. + +The "Summary of consistency" shows us that 3,973 (71.79%) of all the calls are shared between the 3 VCFs, while 626 (11.31%) are only found in one of the VCFs. + +Reading the "Breakdown of VCFs' consistency", a `Group` is a unique key for presence (1) or absence (0) of a call within each of the listed `#Files`. For example: `Group 111` is calls present in all VCFs; `Group 011` is calls present in only the 2nd and 3rd VCFs (i.e. fileB.vcf and fileC.vcf). + +We see that `Group 101` has calls belonging to the 1st and 3rd `#Files` (i.e. fileA.vcf and fileC.vcf). This group has a total of 308 calls that intersect, or 5.57% of all calls in all VCFs. This 308 represents 6.54% of calls in fileA.vcf and 6.31% of calls in fileC.vcf. + +Finally, we see that fileA.vcf has the least amount of calls unique to it on the `Group 100` line. + +Json +==== +Below is a consistency report in json format. +```json +{ + "vcfs": [ + "repo_utils/test_files/variants/input1.vcf.gz", + "repo_utils/test_files/variants/input2.vcf.gz", + "repo_utils/test_files/variants/input3.vcf.gz" + ], + "total_calls": 3513, + "num_vcfs": 3, + "vcf_counts": { + "repo_utils/test_files/variants/input1.vcf.gz": 2151, + "repo_utils/test_files/variants/input2.vcf.gz": 1783, + "repo_utils/test_files/variants/input3.vcf.gz": 2065 + }, + "shared": [ + { + "vcf_count": 3, + "num_calls": 701, + "call_pct": 0.1995445488186735 + }, + { + "vcf_count": 2, + "num_calls": 1084, + "call_pct": 0.3085681753487048 + }, + { + "vcf_count": 1, + "num_calls": 1728, + "call_pct": 0.4918872758326217 + } + ], + "detailed": [ + { + "group": "111", + "total": 701, + "total_pct": 0.1995445488186735, + "repo_utils/test_files/variants/input1.vcf.gz": 0.32589493258949326, + "repo_utils/test_files/variants/input2.vcf.gz": 0.393157599551318, + "repo_utils/test_files/variants/input3.vcf.gz": 0.3394673123486683 + }, + { + "group": "001", + "total": 645, + "total_pct": 0.18360375747224594, + "repo_utils/test_files/variants/input1.vcf.gz": 0, + "repo_utils/test_files/variants/input2.vcf.gz": 0, + "repo_utils/test_files/variants/input3.vcf.gz": 0.31234866828087166 + }, + { + "group": "100", + "total": 598, + "total_pct": 0.17022487902077996, + "repo_utils/test_files/variants/input1.vcf.gz": 0.2780102278010228, + "repo_utils/test_files/variants/input2.vcf.gz": 0, + "repo_utils/test_files/variants/input3.vcf.gz": 0 + }, + { + "group": "101", + "total": 487, + "total_pct": 0.1386279533162539, + "repo_utils/test_files/variants/input1.vcf.gz": 0.22640632264063226, + "repo_utils/test_files/variants/input2.vcf.gz": 0, + "repo_utils/test_files/variants/input3.vcf.gz": 0.2358353510895884 + }, + { + "group": "010", + "total": 485, + "total_pct": 0.13805863933959578, + "repo_utils/test_files/variants/input1.vcf.gz": 0, + "repo_utils/test_files/variants/input2.vcf.gz": 0.27201346045989905, + "repo_utils/test_files/variants/input3.vcf.gz": 0 + }, + { + "group": "110", + "total": 365, + "total_pct": 0.10389980074010817, + "repo_utils/test_files/variants/input1.vcf.gz": 0.1696885169688517, + "repo_utils/test_files/variants/input2.vcf.gz": 0.2047111609646663, + "repo_utils/test_files/variants/input3.vcf.gz": 0 + }, + { + "group": "011", + "total": 232, + "total_pct": 0.06604042129234272, + "repo_utils/test_files/variants/input1.vcf.gz": 0, + "repo_utils/test_files/variants/input2.vcf.gz": 0.13011777902411667, + "repo_utils/test_files/variants/input3.vcf.gz": 0.11234866828087167 + } + ] +} +``` \ No newline at end of file diff --git a/docs/v4.2.1/divide.md b/docs/v4.2.1/divide.md new file mode 100644 index 00000000..3bbad3cb --- /dev/null +++ b/docs/v4.2.1/divide.md @@ -0,0 +1,58 @@ +Divide a VCF into independent shards. + +Unfortunately, `pysam.VariantRecord` objects aren't pickle-able. This means that if we wanted to have Truvari leverage python's multiprocessing we'd need to make a custom VCF parser. However, the command `truvari divide` allows us to take an input VCF and divide it into multiple independent parts (or shards) which can the be processed over multiple processes. + +`truvari divide` works by parsing a VCF and splitting it into multiple, smaller sub-VCFs. If any variants are within `--buffer` base-pairs, they're output to the same sub-VCF. This allows variants in the same region which would need to be compared to one-another (see `--refdist`) to stay in the same sub-VCF. The `--min` parameter allows us to control the minimum number of variants per sub-VCF so that we don't make too many tiny VCFs. Once the sub-VCFs are created, we can process each independently through whatever truvari command. + +For example, let's say we want to run `truvari collapse` on a very large VCF with many variants and many samples. First, we divide the VCF: + +```bash +truvari divide big_input.vcf.gz sub_vcfs_directory/ +``` + +Inside of `sub_vcfs_directory/` we'll have multiple VCFs, which we can process with a simple bash script + +```bash +NJOBS=$(nproc) # use all processors by default +mkdir -p output_vcfs/ +mkdir -p collap_vcfs/ +mkdir -p logs/ + +for in_vcf in sub_vcfs_directory/*.vcf.gz +do + # Setup file names + base_name=$(basename $in_vcf) + base_name=${base_name%.vcf.gz} + output_name=output_vcfs/${base_name}.vcf + collap_name=collap_vcfs/${base_name}.vcf + log_name=logs/${base_name}.log + # Run the command and send it to the background + truvari collapse -i $in_vcf -o $output_name -c $collap_name -f reference.fa &> logs/${log_name}.log & + # If too many jobs are running, wait for one to finish + while [ $( jobs | wc -l ) -ge ${NJOBS} ] + do + sleep 5 + done +done +``` + +Obviously the logs and `while` loop are tricks for running on a single machine. If you have access to a cluster, I'm sure you can imagine how to create/submit the commands. + +``` +usage: divide [-h] [-b BUFFER] [-m MIN] [--no-compress] [-T THREADS] VCF DIR + +Divide a VCF into independent parts + +positional arguments: + VCF VCF to split + DIR Output directory to save parts + +options: + -h, --help show this help message and exit + -b BUFFER, --buffer BUFFER + Buffer to make mini-clusters (1000) + -m MIN, --min MIN Minimum number of entries per-vcf (100) + --no-compress Don't attempt to compress/index sub-VCFs + -T THREADS, --threads THREADS + Number of threads for compressing/indexing sub-VCFs (1) +``` \ No newline at end of file diff --git a/docs/v4.2.1/phab.md b/docs/v4.2.1/phab.md new file mode 100644 index 00000000..bdcaec9e --- /dev/null +++ b/docs/v4.2.1/phab.md @@ -0,0 +1,157 @@ +Introduction +------------ + +Truvari's comparison engine can match variants using a wide range of thresholds. However, some alleles can produce radically different variant representations. We could dramatically lower our thresholds to identify the match, but this would cause variants from unidentical alleles to be falsely matched. + +This problem is easiest to conceptualize in the case of 'split' variants: imagine a pipeline calls a single 100bp DEL that can also be represented as two 50bp DELs. To match these variants, we would need to loosen our thresholds to `--pick multi --pctsim 0.50 --pctsize 0.50`. Plus, these thresholds leave no margin for error. If the variant caller erroneously deleted an extra base to make a 101bp DEL we would have to lower our thresholds even further. These thresholds are already too low because there's plenty of distinct alleles with >= 50% homology. + +So how do we deal with inconsistent representations? In an ideal world, we would simply get rid of them by harmonizing the variants. This is the aim of `truvari phab` + +`truvari phab` is designed to remove variant representation inconsistencies through harmonization. By reconstructing haplotypes from variants, running multiple-sequence alignment of the haplotypes along with the reference, and then recalling variants, we expect to remove discordance between variant representations and simplify the work required to perform variant comparison. + +Requirements +------------ +Since `truvari phab` uses mafft v7.505 via a command-line call, it expects it to be in the environment path. Download mafft and have its executable available in the `$PATH` [mafft](https://mafft.cbrc.jp/alignment/software/) + +Alternatively, you can use the Truvari [Docker container](Development#docker) which already has mafft ready for use. + +Also, you can use wave front aligner (pyWFA) or partial order alignment (pyabpoa). While wfa is the fastest approach, it will independently align haplotypes and therefore may produce less parsimonous aligments. And while poa is more accurate than wfa and faster than mafft, it is less accurate than mafft. + +Example +------- +As an example, we'll use Truvari's test files in `repo_utils/test_files/phab*` which were created from real data over a tandem repeat at GRCh38 chr1:26399065-26401053 and translated to a small test genome with coordinates chr1:1-1988. + +* `phab_base.vcf.gz` - an 86 sample squared-off pVCF +* `phab_comp.vcf.gz` - a single sample's VCF +* `phab_ref.fa` - a subset of the GRCh38 reference + +This dataset is interesting because the `HG002` sample in `phab_base.vcf.gz` uses the same sequencing experiment ([HPRC](https://github.com/human-pangenomics/HPP_Year1_Assemblies)) as the sample `syndip` in `phab_comp.vcf.gz`, but processed with a different pipeline. And as we will see, the pipeline can make all the difference. + +To start, let's use `truvari bench` to see how similar the variant calls are in this region. +```bash +truvari bench --base phab_base.vcf.gz \ + --comp phab_comp.vcf.gz \ + --sizemin 1 --sizefilt 1 \ + --bSample HG002 \ + --cSample syndip \ + --no-ref a \ + --output initial_bench +``` +This will compare all variants greater than 1bp ( `-S 1 -s 1` which includes SNPs) from the `HG002` sample to the `syndip` sample. We're also excluding any uncalled or reference homozygous sites with `--no-ref a`. The report in `initial_bench/summary.txt` shows: +```json +{ + "TP-base": 5, + "TP-comp": 5, + "FP": 2, + "FN": 22, + "precision": 0.7142857142857143, + "recall": 0.18518518518518517, + "f1": 0.2941176470588235, +} +``` + +These variants are pretty poorly matched, especially considering the `HG002` and `syndip` samples are using the same sequencing experiment. We can also inspect the `initial_bench/fn.vcf.gz` and see a lot of these discordant calls are concentrated in a 200bp window. Let's use `truvari phab` to harmonize the variants in this region. +```bash +truvari phab --base phab_base.vcf.gz \ + --comp phab_comp.vcf.gz \ + --bSample HG002 \ + --cSample syndip \ + --reference phab_ref.fa \ + --region chr1:700-900 \ + -o harmonized.vcf.gz +``` + +In our `harmonized.vcf.gz` we can see there are now only 9 variants. Let's run `truvari bench` again on the output to see how well the variants match after being harmonized. + +```bash +truvari bench -b harmonized.vcf.gz \ + -c harmonized.vcf.gz \ + -S 1 -s 1 \ + --no-ref a \ + --bSample HG002 \ + --cSample syndip \ + -o harmonized_bench/ +``` +Looking at `harmonized_bench/summary.txt` shows: +```json +{ + "TP-base": 8, + "TP-comp": 8, + "FP": 0, + "FN": 0, + "precision": 1.0, + "recall": 1.0, + "f1": 1.0, +} +``` +Now there is no difference between our two sets of variants in this region. + +For this variant call-set, `truvri phab` makes `truvari bench` overkill since the variants create identical haplotypes. In fact, we can benchmark simply by counting the genotypes. +```bash +$ bcftools query -f "[%GT ]\n" phab_result/output.vcf.gz | sort | uniq -c + 1 0/1 1/0 + 1 1/0 0/1 + 6 1/1 1/1 +``` +(We can ignore the phasing differences (`0/1` vs. `1/0`). These pipelines reported the parental alleles in a different order) + +MSA +--- + +If you read the `truvari phab --help` , you may have noticed that the `--comp` VCF is optional. This is by design so that we can also harmonize the variants inside a single VCF. By performing a multiple-sequence alignment across samples, we can better represent variation across a population. To see this in action, let's run `phab` on all 86 samples in the `repo_utils/test_files/phab_base.vcf.gz` +```bash +truvari phab -b phab_base.vcf.gz \ + -f phab_ref.fa \ + -r chr1:700-900 \ + -o msa_example.vcf.gz +``` + +As a simple check, we can count the number of variants before/after `phab`: +```bash +bcftools view -r chr1:700-900 phab_base.vcf.gz | grep -vc "#" +bcftools view -r chr1:700-900 msa_example.vcf.gz | grep -vc "#" +``` +The `160` original variants given to `phab` became just `60`. + +Better yet, these fewer variants occur on fewer positions: +```bash + +bcftools query -r chr1:700-900 -f "%POS\n" phab_base.vcf.gz | sort | uniq | wc -l +bcftools query -r chr1:700-900 -f "%POS\n" msa_example.vcf.gz | sort | uniq | wc -l +``` +This returns that the variants were over `98` positions but now sit at just `16` + +We can also observe changes in the allele frequency after running `phab`: +```bash +bcftools +fill-tags -r chr1:700-900 phab_base.vcf.gz | bcftools query -f "%AC\n" | sort -n | uniq -c +bcftools +fill-tags -r chr1:700-900 msa_example.vcf.gz | bcftools query -f "%AC\n" | sort -n | uniq -c +``` +The allele-count (AC) shows a 15% reduction in singletons and removal of all variants with an AF > 0.50 which would have suggested the reference holds a minor allele. +```txt + original phab + # AC # AC + 39 1 33 1 + 18 2 4 2 + 3 3 2 3 + 3 4 2 4 + 2 5 1 5 + ... + 3 69 1 35 + 1 89 1 40 + 8 109 1 53 + 1 132 1 56 + 1 150 1 81 +``` + +(TODO: pull the adotto TR region annotations and run this example through `truvari anno trf`. I bet we'll get a nice spectrum of copy-diff of the same motif in the `phab` calls.) + +`--align` +========= +By default, `phab` will make the haplotypes and use an external call `mafft` to perform a multiple sequence alignment between them and the reference to harmonize the variants. While this is the most accurate alignment technique, it isn't fast. If you're willing to sacrifice some accuracy for a huge speed increase, you can use `--align wfa`, which also doesn't require an external tool. Another option is `--align poa` which performs a partial order alignment which is faster than mafft but less accurate and slower than wfa but more accurate. However, `poa` appears to be non-deterministic which is not ideal for some benchmarking purposes. + +Limitations +----------- +* Creating and aligning haplotypes is impractical for very long sequences and maybe practically impossible for entire human chromosomes. Therefore, `truvari phab` is recommended to only be run on sub-regions. +* By giving the variants new representations, variant counts will likely change. +* Early testing on `phab` is on phased variants. While it can run on unphased variants, we can't yet recommend it. If regions contain unphased Hets or overlapping variants, it becomes more difficult to build a consensus sequence. So you can try out unphased variants, but proceed with caution. + diff --git a/docs/v4.2.1/refine.md b/docs/v4.2.1/refine.md new file mode 100644 index 00000000..c43e0c0b --- /dev/null +++ b/docs/v4.2.1/refine.md @@ -0,0 +1,140 @@ +As described in the [[phab|phab]] documentation, a constraint on Truvari `bench` finding matches is that there needs to be some consistency in how the variants are represented. To help automate the process of running Truvari `phab` on a benchmarking result and recomputing benchmarking performance on harmonized variants, we present the tool `refine`. + +Quick Start +=========== + +Basic +----- +After making a `bench` result: +```bash +truvari bench -b base.vcf.gz -c comp.vcf.gz -o result/ +``` +Use `refine` on the `result/` +```bash +truvari refine -r subset.bed -f ref.fa result/ +``` + +Whole genome +------------ +After making a `bench` result: +```bash +truvari bench -b base.vcf.gz -c comp.vcf.gz --includebed hc_regions.bed -o result/ +``` +Use `refine` on the `result/` analyzing only the regions with putative FP/FN that would benefit from harmonization +```bash +truvari refine -R -U -f ref.fa --regions result/candidate.refine.bed result/ +``` + +Tandem Repeats +-------------- +For benchmarks such as the [GIAB TR](https://www.biorxiv.org/content/10.1101/2023.10.29.564632v1), a TR caller may analyze a different subset of regions. In order to avoid unnecessarily penalizing the performance with FNs from unanalyzed regions: + +```bash +truvari bench -b base.vcf.gz -c comp.vcf.gz --includebed hc_regions.bed -o result/ +``` + +Use `refine` on the `result/` analyzing only the `hc_regions.bed` covered by the TR caller's `tool_regions.bed` +``` +truvari refine -f ref.fa --regions tool_regions.bed result/ +``` + +Output +====== +* `refine.variant_summary.json` - result of re-evaluating calls within the specified regions. Same structure as [[summary.json|bench#summaryjson]] +* `refine.regions.txt` - Tab-delimited file with per-region variant counts +* `refine.region_summary.json` - Per-region performance metrics +* `phab_bench/` - Bench results on the subset of variants harmonized + +To see an example output, look at [test data](https://github.com/ACEnglish/truvari/tree/develop/answer_key/refine/refine_output_one) + +Using `refine.regions.txt` +========================== +| Column | Description | +| ----------------- | --------------------------------------- | +| chrom | Region's chromosome | +| start | Region's start | +| end | Region's end | +| in_tpbase | Input's True Positive base count | +| in_tp | Input's True Positive comparison count | +| in_fp | Input's false positive count | +| in_fn | Input's false negative count | +| refined | Boolean for if region was re-evaluated | +| out_tpbase | Output's true positive base count | +| out_tp | Output's true positive comparison count | +| out_fn | Outputs false positive count | +| out_fp | Output's false negative count | +| state | True/False state of the region | + + +Performance by Regions +====================== + +Because `truvari phab` can alter variant counts during harmonization, one may wish to assess the performance on a per-region basis rather than the per-variant basis. In the `refine.regions.txt`, a column `state` will have a TP/FN/FP value as defined by the following rules: + +```python +false_pos = (data['out_fp'] != 0) +false_neg = (data['out_fn'] != 0) +any_false = false_pos | false_neg + +true_positives = (data['out_tp'] != 0) & (data['out_tpbase'] != 0) & ~any_false + +true_negatives = (data[['out_tpbase', 'out_tp', 'out_fn', 'out_fp']] == 0).all(axis=1) + +baseP = (data['out_tpbase'] != 0) | (data['out_fn'] != 0) +compP = (data['out_tp'] != 0) | (data['out_fp'] != 0) +``` + +This logic has two edge cases to consider. 1) a region with at least one false-positive and one false-negative will be counted as both a false-positive and a false-negative. 2) Regions within `--refdist` may experience 'variant bleed' where they e.g. have an out_tp, but no other variants because a neighboring region actually contains the the corresponding `out_tpbase`. For the first case, we simply count the region twice and set its state in `refine.regions.txt` to "FP,FN". For the second case, we set the state to 'UNK' and ignore it when calculating the region summary. Future versions may figure out exactly how to handle (prevent?) 'UNK' regions. + +These by-region state counts are summarized and written to `refine.region_summary.json`. The definition of metrics inside this json are: +| Key | Definition | Formula | +|--------|----------------------------------------------|---------------------------------| +| TP | True Positive region count | | +| TN | True Negative region count | | +| FP | False Positive region count | | +| FN | False Negative region count | | +| base P | Regions with base variant(s) | | +| base N | Regions without base variant(s) | | +| comp P | Regions with comparison variant(s) | | +| comp N | Regions without comparison variant(s) | | +| PPV | Positive Predictive Value (a.k.a. precision) | TP / comp P | +| TPR | True Positive Rate (a.k.a. recall) | TP / base P | +| TNR | True Negative Rate (a.k.a. specificity) | TN / base N | +| NPV | Negative Predictive Value | TN / comp N | +| ACC | Accuracy | (TP + TN) / (base P + base N) | +| BA | Balanced Accuracy | (TPR + TNR) / 2 | +| F1 | f1 score | 2 * ((PPV * TPR) / (PPV + TPR)) | +| UND | Regions without an undetermined state | | + +Even though PPV is synonymous with precision, we use these abbreviated names when dealing with per-region performance in order to help users differentiate from the by-variant performance reports. + +`--align` +========= +By default, Truvari will make the haplotypes and use an external call `mafft` to perform a multiple sequence alignment between them and the reference to harmonize the variants. While this is the most accurate alignment technique, it isn't fast. If you're willing to sacrifice some accuracy for a huge speed increase, you can use `--align wfa`, which also doesn't require an external tool. Another option is `--align poa` which performs a partial order alignment which is faster than mafft but less accurate and slower than wfa but more accurate. However, `poa` appears to be non-deterministic which is not ideal for some benchmarking purposes. + +`--use-original-vcfs` +===================== + +By default, `refine` will use the base/comparison variants from the `bench` results `tp-base.vcf.gz`, `fn.vcf.gz`, `tp-comp.vcf.gz`, and `fp.vcf.gz` as input for `phab`. However, this contains a filtered subset of variants originally provided to `bench` since it removes variants e.g. below `--sizemin` or not `--passonly`. + +With the `--use-original` parameter, all of the original calls from the input vcfs are fetched. This parameter is useful in recovering matches in situations when variants in one call set are split into two variants which are smaller than the minimum size analyzed by `bench`. For example, imagine a base VCF with a 20bp DEL, a comp VCF with two 10bp DEL, and `bench --sizemin 20` was used. `--use-original` will consider the two 10bp comp variants during phab harmonization with the 20bp base DEL. + + +`--regions` +=========== + +This parameter specifies which regions to re-evaluate. If this is not provided, the original `bench` result's `--includebed` is used. If both `--regions` and `--includebed` are provided, the `--includebed` is subset to only those intersecting `--regions`. + +This parameter is helpful for cases when the `--includebed` is not the same set of regions that a caller analyzes. For example, if a TR caller only discovers short tandem repeats (STR), but a benchmark has TRs of all lengths, it isn't useful to benchmark against the non-STR variants. Therefore, you can run `bench` on the full benchmark's regions (`--includebed`), and automatically subset to only the regions analyzed by the caller with `refine --regions`. + +Note that the larger these regions are the slower MAFFT (used by `phab`) will run. Also, when performing the intersection as described above, there may be edge effects in the reported `refine.variant_summary.json`. For example, if a `--region` partially overlaps an `--includebed` region, you may not be analyzing a subset of calls looked at during the original `bench` run. Therefore, the `*summary.json` should be compared with caution. + +`--use-region-coords` +===================== + +When intersecting `--includebed` with `--regions`, use `--regions` coordinates. By default, `refine` will prefer the `--includebed` coordinates. However, the region's coordinates should be used when using the `candidates.refine.bed` to limit analysis to only the regions with putative FP/FN that would benefit from harmonization - for example, when performing whole genome benchmarking. + +`--reference` +============= + +By default, the reference is pulled from the original `bench` result's `params.json`. If a reference wasn't used with `bench`, it must be specified with `refine` as it's used by `phab` to realign variants. \ No newline at end of file diff --git a/docs/v4.2.1/segment.md b/docs/v4.2.1/segment.md new file mode 100644 index 00000000..953e5822 --- /dev/null +++ b/docs/v4.2.1/segment.md @@ -0,0 +1,18 @@ +Segmentation: Normalization of SVs into disjointed genomic regions + +For SVs with a span in the genome (currently only DELs), split the overlaps into disjointed regions. This is an experimental tool that explores the possibility of assisting SV association analysis. + +This tool adds an INFO field `SEGCNT` which holds the number of original SVs that overlap the newly reported region. It also adds a FORMAT field `SEG`, which is the 'allele coverage' per-sample. For example, if a sample has two overlapping heterozygous deletions, the shared region will have `SEG=2`. If the two deletions were homozygous then `SEG=4`. + +In the below example, we have three deletions found across three samples. + +![](https://github.com/spiralgenetics/truvari/blob/develop/imgs/segment_example.png) + +The `segment` added annotations for the regions would then be: +| Region | INFO/SEGCNT | S1/SEG | S2/SEG | S3/SEG | +|--------|-------------|--------|--------|--------| +| A | 1 | 2 | 0 | 0 | +| B | 2 | 2 | 1 | 0 | +| C | 3 | 2 | 2 | 2 | +| D | 2 | 2 | 1 | 0 | +| E | 1 | 0 | 1 | 0 | \ No newline at end of file diff --git a/docs/v4.2.1/stratify.md b/docs/v4.2.1/stratify.md new file mode 100644 index 00000000..353dcab5 --- /dev/null +++ b/docs/v4.2.1/stratify.md @@ -0,0 +1,58 @@ +`stratify` is a helper utility for counting variants within bed regions which is essentially the same as running `bedtools intersect -c`. When working with benchmarking results, there are are four vcfs to count (tp-base, tp-comp, fn, fp). Instead of running bedtools four times and collating the results, `stratify` can be given a single `bench` result directory to generate the counts. + +For example: +```bash +$ truvari stratify input.bed bench/ +chrom start end tpbase tp fn fp +chr20 280300 280900 0 0 0 0 +chr20 100000 200000 1 1 0 0 +chr20 642000 642350 1 1 2 1 +``` + +The output from this can then be parsed to generate more details: + +```python +import pandas as pd +import truvari + +df = pd.read_csv("stratify.output.txt", sep='\t') + +# If the input.bed didn't have a header and so we couldn't use the `--header` parameter, we need to name columns +df.columns = ['chrom', 'start', 'end', 'tpbase', 'tp', 'fn', 'fp'] + +# Create the precision, recall, and f1 for each row +metrics = df[["tpbase", "tp", "fn", "fp"]].apply((lambda x: truvari.performance_metrics(*x)), axis=1) + +# metrics is now a DataFrame with a single column of tuples, lets separate them into columns +metrics = pd.DataFrame(metrics.to_list(), columns=["precision", "recall", "f1"]) + +# Extend the dataframe's columns +df = df.join(metrics) +df.head() +``` +Which gives the result: +``` + chrom start end tpbase tp fn fp precision recall f1 +0 chr20 135221 239308 1 1 0 0 1.00 1.00 1.000000 +1 chr20 260797 465632 3 3 3 1 0.75 0.50 0.600000 +2 chr20 465866 622410 1 1 0 0 1.00 1.00 1.000000 +3 chr20 623134 655257 1 1 3 1 0.50 0.25 0.333333 +4 chr20 708338 732041 1 1 1 0 1.00 0.50 0.666667 +``` + +``` +usage: stratify [-h] [-o OUT] [--header] [-w] [--debug] BED VCF + +Count variants per-region in vcf + +positional arguments: + BED Regions to process + VCF Truvari bench result directory or a single VCF + +optional arguments: + -h, --help show this help message and exit + -o OUT, --output OUT Output bed-like file + --header Input regions have header to preserve in output + -w, --within Only count variants contained completely within region boundaries + --debug Verbose logging +``` \ No newline at end of file diff --git a/docs/v4.2.1/vcf2df.md b/docs/v4.2.1/vcf2df.md new file mode 100644 index 00000000..14d9f06c --- /dev/null +++ b/docs/v4.2.1/vcf2df.md @@ -0,0 +1,81 @@ +We enjoy using [pandas](https://pandas.pydata.org/)/[seaborn](https://seaborn.pydata.org/) for python plotting, so we've made the command `truvari vcf2df`. This will turn a VCF into a pandas DataFrame and save it to a file using joblib. The resulting DataFrame will always have the columns: +* chrom: variant chromosome +* start: 0-based start from pysam.VariantRecord.start +* end: 0-based end from pysam.VariantRecord.stop +* id : VCF column ID +* svtype : SVTYPE as determined by `truvari.entry_variant_type` +* svlen : SVLEN as determined by `truvari.entry_size` +* szbin : SVLEN's size bin as determined by `truvari.get_sizebin` +* qual : VCF column QUAL +* filter : VCF column FILTER +* is_pass : boolean of if the filter is empty or PASS + +Optionally, `vcf2df` can attempt to pull `INFO` and `FORMAT` fields from the VCF and put each field into the DataFrame as a new column. For FORMAT fields, the VCF header definition's `Number` is considered and multiple columns may be added. For example, the `AD` field, typically holding Allele Depth has `Number=A`, indicating that there will be one value for each allele. Truvari assumes that all VCFs hold one allele per-line, so there are only 2 alleles described per-line, the reference and alternate allele. Therefore, two columns are added to the DataFrame, `AD_ref` and `AD_alt` corresponding to the 0th and 1st values from the AD field's list of values. Similarity, for PL (genotype likelihood) with `Number=G`, there's three values and columns are created named `PL_ref`, `PL_het`, `PL_hom`. + +After you've created your benchmarking results with `truvari bench`, you'll often want to plot different views of your results. `vcf2df --bench-dir` can parse a truvari output directory's multiple VCF files and add a 'state' column +* state : The truvari state assigned to the variant + * tpbase : Parsed from the tp-base.vcf + * tp : Parsed from the tp-comp.vcf + * fp : Parsed from the fp.vcf + * fn : Parsed from the fn.vcf + +The created DataFrame is saved into a joblib file, which can then be plotted as simply as: +```python +import joblib +import seaborn as sb +import matplotlib.pyplot as plt + +data = joblib.load("test.jl") +p = sb.countplot(data=data[data["state"] == 'tp'], x="szbin", hue="svtype", hue_order=["DEL", "INS"]) +plt.xticks(rotation=45, ha='right') +p.set(title="True Positives by svtype and szbin") +``` +![](https://github.com/spiralgenetics/truvari/blob/develop/imgs/truv2df_example.png) + +This enables concatenation of Truvari results across multiple benchmarking experiments for advanced comparison. For example, imagine there's multiple parameters used for SV discovery over multiple samples. After running `truvari bench` on each of the results with the output directories named to `params/sample/` and each converted to DataFrames with `truvari vcf2df`, we can expand/concatenate the saved joblib DataFrames with: + +```python +import glob +import joblib +import pandas as pd + +files = glob.glob("*/*/data.jl") +dfs = [] +for f in files: + params, sample, frame = f.split('/') + d = joblib.load(f) + d["params"] = params + d["sample"] = sample + dfs.append(d) +df = pd.concat(dfs) +joblib.dump(df, "results.jl") +``` + +To facilitate range queries, PyRanges is helpful. `vcf2df` results can be parsed quickly by pyranges with the command: +```python +result = pyranges.PyRanges(df.rename(columns={'chrom':"Chromosome", "start":"Start", "end":"End"})) +``` + +``` +usage: vcf2df [-h] [-b] [-i] [-f] [-s SAMPLE] [-n] [-S] [-c LVL] [--debug] VCF JL + +Takes a vcf and creates a data frame. Can parse a bench output directory + +positional arguments: + VCF VCF to parse + JL Output joblib to save + +optional arguments: + -h, --help show this help message and exit + -b, --bench-dir Input is a truvari bench directory + -i, --info Attempt to put the INFO fields into the dataframe + -f, --format Attempt to put the FORMAT fileds into the dataframe + -s SAMPLE, --sample SAMPLE + SAMPLE name to parse when building columns for --format + -n, --no-prefix Don't prepend sample name to format columns + -S, --skip-compression + Skip the attempt to optimize the dataframe's size + -c LVL, --compress LVL + Compression level for joblib 0-9 (3) + --debug Verbose logging +``` \ No newline at end of file diff --git a/repo_utils/answer_key/ga4gh/ga4gh_norefine_query.vcf.gz b/repo_utils/answer_key/ga4gh/ga4gh_norefine_query.vcf.gz new file mode 100644 index 00000000..9defc9d8 Binary files /dev/null and b/repo_utils/answer_key/ga4gh/ga4gh_norefine_query.vcf.gz differ diff --git a/repo_utils/answer_key/ga4gh/ga4gh_norefine_truth.vcf.gz b/repo_utils/answer_key/ga4gh/ga4gh_norefine_truth.vcf.gz new file mode 100644 index 00000000..906c1e6f Binary files /dev/null and b/repo_utils/answer_key/ga4gh/ga4gh_norefine_truth.vcf.gz differ diff --git a/repo_utils/answer_key/ga4gh/ga4gh_withrefine_query.vcf.gz b/repo_utils/answer_key/ga4gh/ga4gh_withrefine_query.vcf.gz new file mode 100644 index 00000000..ff20f193 Binary files /dev/null and b/repo_utils/answer_key/ga4gh/ga4gh_withrefine_query.vcf.gz differ diff --git a/repo_utils/answer_key/ga4gh/ga4gh_withrefine_truth.vcf.gz b/repo_utils/answer_key/ga4gh/ga4gh_withrefine_truth.vcf.gz new file mode 100644 index 00000000..9233696f Binary files /dev/null and b/repo_utils/answer_key/ga4gh/ga4gh_withrefine_truth.vcf.gz differ diff --git a/repo_utils/answer_key/help.txt b/repo_utils/answer_key/help.txt index 84a38216..468779cc 100644 --- a/repo_utils/answer_key/help.txt +++ b/repo_utils/answer_key/help.txt @@ -1,6 +1,6 @@ usage: truvari [-h] CMD ... -Truvari v4.2.0 Structural Variant Benchmarking and Annotation +Truvari v4.2.1 Structural Variant Benchmarking and Annotation Available commands: bench Performance metrics from comparison of two VCFs @@ -13,6 +13,7 @@ Available commands: divide Divide a VCF into independent shards phab Variant harmonization using MSA refine Automated bench result refinement with phab + ga4gh Convert Truvari result to GA4GH version Print the Truvari version and exit positional arguments: diff --git a/repo_utils/answer_key/refine/refine_output_one/refine.regions.txt b/repo_utils/answer_key/refine/refine_output_one/refine.regions.txt index 4c665b54..41bfa0b8 100644 --- a/repo_utils/answer_key/refine/refine_output_one/refine.regions.txt +++ b/repo_utils/answer_key/refine/refine_output_one/refine.regions.txt @@ -43,7 +43,7 @@ chr20 19663366 19663526 2 2 0 0 False 2 2 0 0 TP chr20 20295955 20296565 2 2 2 1 True 3 3 0 0 TP chr20 20320285 20320579 1 1 2 1 True 1 1 0 0 TP chr20 20337200 20337707 1 1 6 2 True 3 3 0 0 TP -chr20 20354570 20358323 7 7 9 1 True 14 14 0 0 TP +chr20 20354570 20358323 7 7 9 1 True 16 16 0 0 TP chr20 20458640 20458937 2 2 0 0 False 2 2 0 0 TP chr20 21120212 21120539 2 2 1 0 True 1 1 0 0 TP chr20 21721174 21721735 2 2 1 0 True 3 3 0 0 TP @@ -133,7 +133,7 @@ chr20 58694862 58695411 2 2 0 0 False 2 2 0 0 TP chr20 59383804 59385235 3 3 2 0 True 4 4 0 0 TP chr20 59442036 59442273 2 2 0 0 False 2 2 0 0 TP chr20 60087391 60087965 2 2 0 0 False 2 2 0 0 TP -chr20 60314330 60315020 2 2 2 0 True 6 6 0 0 TP +chr20 60314330 60315020 2 2 2 0 True 5 5 0 0 TP chr20 60600107 60600320 1 1 0 0 False 1 1 0 0 TP chr20 60702942 60703115 2 2 2 1 True 3 3 0 0 TP chr20 60764031 60764634 2 2 0 0 False 2 2 0 0 TP @@ -155,7 +155,7 @@ chr20 61723833 61724244 1 1 0 0 False 1 1 0 0 TP chr20 61744188 61744654 2 2 1 0 True 2 2 0 0 TP chr20 61783403 61784839 3 3 1 0 True 4 4 0 0 TP chr20 61919676 61921374 2 2 0 0 False 2 2 0 0 TP -chr20 62057573 62059139 1 1 3 1 True 8 8 0 0 TP +chr20 62057573 62059139 1 1 3 1 True 7 7 0 0 TP chr20 62212844 62213874 4 4 0 0 False 4 4 0 0 TP chr20 62270279 62271094 1 1 3 1 True 4 4 0 0 TP chr20 62317837 62318459 2 2 0 0 False 2 2 0 0 TP diff --git a/repo_utils/answer_key/refine/refine_output_three/refine.region_summary.json b/repo_utils/answer_key/refine/refine_output_three/refine.region_summary.json index 6003cac7..a550c76d 100644 --- a/repo_utils/answer_key/refine/refine_output_three/refine.region_summary.json +++ b/repo_utils/answer_key/refine/refine_output_three/refine.region_summary.json @@ -1,18 +1,18 @@ { - "TP": 17, - "TN": 6, - "FP": 23, + "TP": 19, + "TN": 10, + "FP": 17, "FN": 58, - "base P": 76, - "base N": 16, - "comp P": 85, - "comp N": 7, - "PPV": 0.2, - "TPR": 0.2236842105263158, - "TNR": 0.375, - "NPV": 0.8571428571428571, - "ACC": 0.25, - "BA": 0.2993421052631579, - "F1": 0.2111801242236025, + "base P": 78, + "base N": 14, + "comp P": 81, + "comp N": 11, + "PPV": 0.2345679012345679, + "TPR": 0.24358974358974358, + "TNR": 0.7142857142857143, + "NPV": 0.9090909090909091, + "ACC": 0.31521739130434784, + "BA": 0.47893772893772896, + "F1": 0.2389937106918239, "UND": 0 } \ No newline at end of file diff --git a/repo_utils/answer_key/refine/refine_output_three/refine.regions.txt b/repo_utils/answer_key/refine/refine_output_three/refine.regions.txt index ae958d94..8a5068b2 100644 --- a/repo_utils/answer_key/refine/refine_output_three/refine.regions.txt +++ b/repo_utils/answer_key/refine/refine_output_three/refine.regions.txt @@ -1,93 +1,93 @@ chrom start end in_tpbase in_tp in_fn in_fp refined out_tpbase out_tp out_fn out_fp state -chr20 278929 279069 3 3 1 0 False 3 3 1 0 FN -chr20 641912 642420 3 3 2 0 False 3 3 2 0 FN -chr20 2240960 2241290 1 1 5 1 True 1 1 4 1 FN,FP -chr20 4032357 4033228 2 2 2 1 True 1 1 2 1 FN,FP -chr20 5040476 5040477 0 0 0 2 False 0 0 0 2 FP -chr20 5041941 5042268 1 1 1 1 True 2 2 0 0 TP +chr20 278929 279069 2 3 1 0 False 2 3 1 0 FN +chr20 641912 642420 2 3 2 0 False 2 3 2 0 FN +chr20 2240960 2241290 0 1 5 1 True 1 1 4 1 FN,FP +chr20 4032357 4033228 2 1 2 1 True 0 0 2 1 FN,FP +chr20 5040476 5040477 0 0 0 0 False 0 0 0 0 TN +chr20 5041941 5042268 1 1 1 0 False 1 1 1 0 FN chr20 7720952 7720968 1 1 1 0 False 1 1 1 0 FN -chr20 8661944 8662119 1 1 3 1 True 2 2 0 0 TP +chr20 8661944 8662119 0 1 2 1 True 2 2 0 0 TP chr20 10802727 10802844 1 1 1 0 False 1 1 1 0 FN -chr20 13848272 13848544 3 3 1 0 False 3 3 1 0 FN -chr20 14862054 14862644 5 5 5 1 True 3 3 1 0 FN -chr20 16257854 16259205 2 2 2 0 False 2 2 2 0 FN +chr20 13848272 13848544 2 2 1 0 False 2 2 1 0 FN +chr20 14862054 14862644 4 4 5 1 True 3 3 1 0 FN +chr20 16257854 16259205 2 2 1 0 False 2 2 1 0 FN chr20 16395201 16395373 3 3 1 0 False 3 3 1 0 FN chr20 17081293 17081365 2 2 1 0 False 2 2 1 0 FN chr20 18209139 18210134 3 3 2 1 True 3 3 0 1 FP -chr20 20296014 20296330 2 2 2 1 True 5 5 0 0 TP -chr20 20320339 20320519 1 1 2 1 True 3 3 0 0 TP +chr20 20296014 20296330 1 1 2 1 True 4 4 0 0 TP +chr20 20320339 20320519 0 1 2 1 True 3 3 0 0 TP chr20 20337285 20337624 1 1 6 2 True 1 1 0 0 TP -chr20 20354912 20355435 3 3 1 0 False 3 3 1 0 FN -chr20 20356530 20357810 4 4 8 1 True 8 8 2 0 FN +chr20 20354912 20355435 2 2 1 0 False 2 2 1 0 FN +chr20 20356530 20357810 2 3 8 1 True 8 8 2 0 FN chr20 21120298 21120461 2 2 1 0 False 2 2 1 0 FN -chr20 21721451 21721646 2 2 1 0 False 2 2 1 0 FN -chr20 22082266 22083905 4 4 3 1 True 3 3 4 1 FN,FP -chr20 23155578 23155857 3 3 2 2 True 3 3 1 1 FN,FP +chr20 21721451 21721646 0 2 1 0 False 0 2 1 0 FN +chr20 22082266 22083905 3 3 3 1 True 3 3 4 1 FN,FP +chr20 23155578 23155857 3 3 1 2 True 3 3 1 1 FN,FP chr20 23560939 23561098 1 1 2 2 True 2 2 0 0 TP chr20 24408073 24408820 3 3 1 0 False 3 3 1 0 FN chr20 24682066 24682125 2 2 1 0 False 2 2 1 0 FN -chr20 25781790 25781791 0 0 0 1 False 0 0 0 1 FP -chr20 32723044 32723045 0 0 1 0 False 0 0 1 0 FN -chr20 34235898 34235981 0 0 2 1 True 2 2 0 0 TP +chr20 25781790 25781791 0 0 0 0 False 0 0 0 0 TN +chr20 32723044 32723045 0 0 0 0 False 0 0 0 0 TN +chr20 34235898 34235981 0 0 1 1 True 1 1 0 0 TP chr20 35539212 35539582 4 4 1 0 False 4 4 1 0 FN chr20 35580686 35580756 1 1 2 1 True 1 1 2 1 FN,FP chr20 37361785 37361886 2 2 1 0 False 2 2 1 0 FN -chr20 38123799 38124003 2 2 1 4 True 1 1 2 3 FN,FP +chr20 38123799 38124003 0 1 1 4 True 1 1 2 3 FN,FP chr20 38463997 38464344 2 2 2 0 False 2 2 2 0 FN -chr20 41196370 41196495 3 3 1 0 False 3 3 1 0 FN +chr20 41196370 41196495 2 3 1 0 False 2 3 1 0 FN chr20 41257714 41258003 0 0 0 1 False 0 0 0 1 FP -chr20 44764150 44764203 2 2 1 0 False 2 2 1 0 FN -chr20 45600655 45600695 2 2 2 0 False 2 2 2 0 FN -chr20 48449794 48450385 4 4 2 0 False 4 4 2 0 FN +chr20 44764150 44764203 0 1 1 0 False 0 1 1 0 FN +chr20 45600655 45600695 0 2 2 0 False 0 2 2 0 FN +chr20 48449794 48450385 4 4 1 0 False 4 4 1 0 FN chr20 49834182 49834469 2 2 1 0 False 2 2 1 0 FN -chr20 50775646 50775832 1 1 2 1 True 0 0 0 0 TN -chr20 51953819 51953820 0 0 0 1 False 0 0 0 1 FP -chr20 53204099 53204252 2 2 2 1 True 0 0 0 0 TN +chr20 50775646 50775832 0 0 1 1 True 0 0 0 0 TN +chr20 51953819 51953820 0 0 0 0 False 0 0 0 0 TN +chr20 53204099 53204252 0 1 2 0 False 0 1 2 0 FN chr20 55624808 55625652 6 6 6 0 False 6 6 6 0 FN chr20 55627638 55628305 7 7 4 0 False 7 7 4 0 FN -chr20 55944272 55945175 2 2 2 1 True 1 1 2 0 FN -chr20 56280541 56281913 4 4 5 1 True 4 4 2 1 FN,FP +chr20 55944272 55945175 2 2 1 1 True 1 1 2 0 FN +chr20 56280541 56281913 4 4 4 1 True 5 5 2 1 FN,FP chr20 57090868 57091166 1 1 2 0 False 1 1 2 0 FN chr20 57110450 57110593 2 2 1 0 False 2 2 1 0 FN chr20 57190256 57190428 0 0 3 1 True 0 0 0 0 TN -chr20 57350856 57350920 1 1 1 0 False 1 1 1 0 FN +chr20 57350856 57350920 1 1 0 0 False 1 1 0 0 TP chr20 57949001 57949346 1 1 4 1 True 0 0 4 1 FN,FP -chr20 59384366 59384743 3 3 2 0 False 3 3 2 0 FN -chr20 60314443 60314711 2 2 2 0 False 2 2 2 0 FN -chr20 60703005 60703087 2 2 2 1 True 3 3 0 0 TP -chr20 61100921 61102405 1 1 4 1 True 4 4 0 0 TP -chr20 61201822 61202242 2 2 4 1 True 1 1 0 0 TP -chr20 61282925 61283479 4 4 2 0 False 4 4 2 0 FN +chr20 59384366 59384743 2 2 2 0 False 2 2 2 0 FN +chr20 60314443 60314711 2 1 1 0 False 2 1 1 0 FN +chr20 60703005 60703087 2 2 1 1 True 3 3 0 0 TP +chr20 61100921 61102405 1 1 3 1 True 4 4 0 0 TP +chr20 61201822 61202242 1 2 4 1 True 1 1 0 0 TP +chr20 61282925 61283479 3 3 2 0 False 3 3 2 0 FN chr20 61289662 61290273 1 1 2 1 True 0 0 0 0 TN -chr20 61329345 61329441 0 0 0 2 False 0 0 0 2 FP -chr20 61562109 61562252 0 0 2 1 True 1 1 0 0 TP +chr20 61329345 61329441 0 0 0 1 False 0 0 0 1 FP +chr20 61562109 61562252 0 0 1 1 True 1 1 0 0 TP chr20 61744401 61744592 2 2 1 0 False 2 2 1 0 FN chr20 61783958 61784698 3 3 1 0 False 3 3 1 0 FN -chr20 62057602 62058768 1 1 3 1 True 1 1 0 0 TP +chr20 62057602 62058768 1 1 2 1 True 1 1 0 0 TP chr20 62270413 62270827 1 1 3 1 True 3 3 0 0 TP -chr20 62321396 62321730 2 2 3 0 False 2 2 3 0 FN -chr20 62349641 62349826 1 1 5 1 True 0 0 2 1 FN,FP -chr20 62360410 62360602 0 0 8 2 True 2 2 2 1 FN,FP +chr20 62321396 62321730 2 2 2 0 False 2 2 2 0 FN +chr20 62349641 62349826 1 1 4 1 True 0 0 2 1 FN,FP +chr20 62360410 62360602 0 0 6 2 True 1 1 1 1 FN,FP chr20 62830650 62830697 2 2 1 1 True 1 1 0 0 TP -chr20 62875241 62875404 2 2 3 0 False 2 2 3 0 FN +chr20 62875241 62875404 2 2 2 0 False 2 2 2 0 FN chr20 63028066 63029030 4 4 1 1 True 2 2 0 0 TP -chr20 63049093 63049159 3 3 1 0 False 3 3 1 0 FN +chr20 63049093 63049159 1 1 1 0 False 1 1 1 0 FN chr20 63154687 63154921 1 1 1 0 False 1 1 1 0 FN -chr20 63167473 63167564 2 2 1 0 False 2 2 1 0 FN -chr20 63221509 63221721 1 1 2 1 True 0 0 0 0 TN +chr20 63167473 63167564 2 2 0 0 False 2 2 0 0 TP +chr20 63221509 63221721 1 1 1 1 True 0 0 0 0 TN chr20 63372214 63372400 2 2 1 0 False 2 2 1 0 FN -chr20 63491957 63492390 1 1 3 1 True 2 2 1 0 FN +chr20 63491957 63492390 1 1 2 1 True 2 2 1 0 FN chr20 63535751 63536002 1 1 2 0 False 1 1 2 0 FN chr20 63559415 63559719 1 1 4 1 True 3 3 0 0 TP -chr20 63641847 63642015 1 1 2 1 True 0 0 0 0 TN -chr20 63693449 63693732 1 1 6 1 True 10 10 0 0 TP +chr20 63641847 63642015 0 0 1 0 False 0 0 1 0 FN +chr20 63693449 63693732 1 1 5 1 True 10 10 0 0 TP chr20 63770936 63771014 0 0 0 2 False 0 0 0 2 FP -chr20 63948594 63948653 2 2 1 1 True 0 0 0 1 FP +chr20 63948594 63948653 2 2 0 0 False 2 2 0 0 TP chr20 63964805 63966113 1 1 1 0 False 1 1 1 0 FN -chr20 64065882 64065883 0 0 0 1 False 0 0 0 1 FP +chr20 64065882 64065883 0 0 0 0 False 0 0 0 0 TN chr20 64090733 64091007 0 0 0 2 False 0 0 0 2 FP -chr20 64097039 64097040 0 0 0 2 False 0 0 0 2 FP +chr20 64097039 64097040 0 0 0 0 False 0 0 0 0 TN chr20 64125360 64127875 3 3 3 0 False 3 3 3 0 FN -chr20 64131913 64133856 5 5 8 1 True 5 5 6 5 FN,FP +chr20 64131913 64133856 4 5 8 1 True 5 5 6 5 FN,FP chr20 64134990 64136330 3 3 1 0 False 3 3 1 0 FN chr20 64173438 64176330 4 4 7 3 True 10 10 5 2 FN,FP diff --git a/repo_utils/answer_key/refine/refine_output_three/refine.variant_summary.json b/repo_utils/answer_key/refine/refine_output_three/refine.variant_summary.json index c4e13143..739c8c21 100644 --- a/repo_utils/answer_key/refine/refine_output_three/refine.variant_summary.json +++ b/repo_utils/answer_key/refine/refine_output_three/refine.variant_summary.json @@ -1,11 +1,11 @@ { - "TP-base": 408, - "TP-comp": 408, - "FP": 35, - "FN": 108, - "precision": 0.9209932279909706, - "recall": 0.7906976744186046, - "f1": 0.8508863399374348, - "base cnt": 516, - "comp cnt": 443 + "TP-base": 422, + "TP-comp": 416, + "FP": 38, + "FN": 129, + "precision": 0.9162995594713657, + "recall": 0.7658802177858439, + "f1": 0.8343646923508332, + "base cnt": 551, + "comp cnt": 454 } \ No newline at end of file diff --git a/repo_utils/answer_key/refine/refine_output_two/refine.regions.txt b/repo_utils/answer_key/refine/refine_output_two/refine.regions.txt index fc8fd4c9..da659223 100644 --- a/repo_utils/answer_key/refine/refine_output_two/refine.regions.txt +++ b/repo_utils/answer_key/refine/refine_output_two/refine.regions.txt @@ -24,7 +24,7 @@ chr20 18209097 18210727 3 3 2 1 True 5 5 0 0 TP chr20 18675650 18675915 2 2 0 0 False 2 2 0 0 TP chr20 19663366 19663526 2 2 0 0 False 2 2 0 0 TP chr20 20337200 20337707 1 1 6 2 True 3 3 0 0 TP -chr20 20354570 20358323 7 7 9 1 True 14 14 0 0 TP +chr20 20354570 20358323 7 7 9 1 True 16 16 0 0 TP chr20 20458640 20458937 2 2 0 0 False 2 2 0 0 TP chr20 21120212 21120539 2 2 1 0 True 1 1 0 0 TP chr20 21721174 21721735 2 2 1 0 True 3 3 0 0 TP @@ -73,7 +73,7 @@ chr20 58519902 58520372 2 2 0 0 False 2 2 0 0 TP chr20 58694862 58695411 2 2 0 0 False 2 2 0 0 TP chr20 59442036 59442273 2 2 0 0 False 2 2 0 0 TP chr20 60087391 60087965 2 2 0 0 False 2 2 0 0 TP -chr20 60314330 60315020 2 2 2 0 True 6 6 0 0 TP +chr20 60314330 60315020 2 2 2 0 True 5 5 0 0 TP chr20 60764031 60764634 2 2 0 0 False 2 2 0 0 TP chr20 61176725 61176974 2 2 0 0 False 2 2 0 0 TP chr20 61201683 61202474 2 2 4 1 True 7 7 0 0 TP @@ -85,7 +85,7 @@ chr20 61475406 61475726 2 2 0 0 False 2 2 0 0 TP chr20 61744188 61744654 2 2 1 0 True 2 2 0 0 TP chr20 61783403 61784839 3 3 1 0 True 4 4 0 0 TP chr20 61919676 61921374 2 2 0 0 False 2 2 0 0 TP -chr20 62057573 62059139 1 1 3 1 True 8 8 0 0 TP +chr20 62057573 62059139 1 1 3 1 True 7 7 0 0 TP chr20 62212844 62213874 4 4 0 0 False 4 4 0 0 TP chr20 62270279 62271094 1 1 3 1 True 4 4 0 0 TP chr20 62317837 62318459 2 2 0 0 False 2 2 0 0 TP diff --git a/repo_utils/run_unittest.py b/repo_utils/run_unittest.py new file mode 100644 index 00000000..9226f13d --- /dev/null +++ b/repo_utils/run_unittest.py @@ -0,0 +1,59 @@ +""" +One off unittests +""" +import os +import sys +import pysam +from collections import defaultdict +from intervaltree import IntervalTree + +# Use the current truvari, not any installed libraries +sys.path.insert(0, os.getcwd()) + +import truvari + +# Assume we're running in truvari root directory + +""" +Boundary issues +""" +vcf_fn = "repo_utils/test_files/variants/boundary.vcf.gz" +bed_fn = "repo_utils/test_files/beds/boundary.bed" +region_start = 10 +region_end = 20 + +vcf = pysam.VariantFile(vcf_fn) +for entry in vcf: + state = entry.info['include'] == 'in' + assert state == truvari.entry_within(entry, region_start, region_end), f"Bad Boundary {str(entry)}" + +regions = truvari.RegionVCFIterator(vcf, includebed=bed_fn) +vcf.reset() +truv_ans = defaultdict(lambda: False) +for entry in regions.iterate(vcf): + truv_ans[truvari.entry_to_key(entry)] = True + +vcf.reset() +for entry in vcf: + state = entry.info['include'] == 'in' + assert state == truv_ans[truvari.entry_to_key(entry)], f"Bad Boundary {str(entry)}" + +""" +New Region Filtering +""" +vcf_fn = "repo_utils/test_files/variants/boundary_cpx.vcf.gz" +bed_fn = "repo_utils/test_files/beds/boundary_cpx.bed" + +tree = defaultdict(IntervalTree) +with open(bed_fn, 'r') as fh: + for line in fh: + data = line.strip().split() + tree[data[0]].addi(int(data[1]), int(data[2]) + 1) + +vcf = pysam.VariantFile(vcf_fn) +for entry in truvari.region_filter(vcf, tree, True, False): + assert entry.info['include'] == 'in', f"Bad in {str(entry)}" + +vcf.reset() +for entry in truvari.region_filter(vcf, tree, False, False): + assert entry.info['include'] == 'out', f"Bad out {str(entry)}" diff --git a/repo_utils/sub_tests/ga4gh.sh b/repo_utils/sub_tests/ga4gh.sh new file mode 100644 index 00000000..40acc802 --- /dev/null +++ b/repo_utils/sub_tests/ga4gh.sh @@ -0,0 +1,21 @@ +# ------------------------------------------------------------ +# ga4gh +# ------------------------------------------------------------ +run test_ga4gh $truv ga4gh -i $ANSDIR/refine/refine_output_three/ -o $OD/ga4gh_norefine +if [ $test_ga4gh ]; then + assert_exit_code 0 + assert_equal $(fn_md5 $ANSDIR/ga4gh/ga4gh_norefine_truth.vcf.gz) $(fn_md5 $OD/ga4gh_norefine_truth.vcf.gz) + assert_equal $(fn_md5 $ANSDIR/ga4gh/ga4gh_norefine_query.vcf.gz) $(fn_md5 $OD/ga4gh_norefine_query.vcf.gz) +fi + +run test_ga4gh_refine $truv ga4gh -w -i $ANSDIR/refine/refine_output_three/ -o $OD/ga4gh_withrefine +if [ $test_ga4gh_refine ]; then + assert_exit_code 0 + assert_equal $(fn_md5 $ANSDIR/ga4gh/ga4gh_withrefine_truth.vcf.gz) $(fn_md5 $OD/ga4gh_withrefine_truth.vcf.gz) + assert_equal $(fn_md5 $ANSDIR/ga4gh/ga4gh_withrefine_query.vcf.gz) $(fn_md5 $OD/ga4gh_withrefine_query.vcf.gz) +fi + +run test_ga4gh_badparam $truv ga4gh -w -i notreal.file1234 -o $ANSDIR/ga4gh/ga4gh_withrefine +if [ $test_ga4gh_badparam ]; then + assert_exit_code 1 +fi diff --git a/repo_utils/sub_tests/unittest.sh b/repo_utils/sub_tests/unittest.sh new file mode 100644 index 00000000..85d11064 --- /dev/null +++ b/repo_utils/sub_tests/unittest.sh @@ -0,0 +1,7 @@ +# ------------------------------------------------------------ +# unittest +# ------------------------------------------------------------ +run test_unittest coverage run --concurrency=multiprocessing -p repo_utils/run_unittest.py +if [ $test_unittest ]; then + assert_exit_code 0 +fi diff --git a/repo_utils/test_files/beds/boundary.bed b/repo_utils/test_files/beds/boundary.bed new file mode 100644 index 00000000..ecf93366 --- /dev/null +++ b/repo_utils/test_files/beds/boundary.bed @@ -0,0 +1 @@ +reference 10 20 diff --git a/repo_utils/test_files/beds/boundary_cpx.bed b/repo_utils/test_files/beds/boundary_cpx.bed new file mode 100644 index 00000000..c1af045b --- /dev/null +++ b/repo_utils/test_files/beds/boundary_cpx.bed @@ -0,0 +1,3 @@ +reference 10 20 +reference 110 120 +reference2 10 20 diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_ad2c051e81d70fcd18bbddaf87dc9140.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_06433b401328232c8c4a2f2278e2fb4c.msa similarity index 77% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_ad2c051e81d70fcd18bbddaf87dc9140.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_06433b401328232c8c4a2f2278e2fb4c.msa index 107a7831..ee0d8578 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_ad2c051e81d70fcd18bbddaf87dc9140.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_06433b401328232c8c4a2f2278e2fb4c.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:56280441-56282014 +>syndip_1_chr20:56280441-56282013 aaaaaggtaacatccatatacagatataatatctataacatatatagatatatggatata acatccatatatagatatatctatatatctatctatagctac------------------ ----------------------------------------------atctacagatatct @@ -24,20 +24,20 @@ tagat--atatagatacatctatatagatatatagatatatagatacatctatatagata tattgatacat--atagatacatctatatagattagatatatatagatacatctatatag attagatatatatagatacatctatatagattagatatatatagatacatctatatatag atatatagatatatatagatacatctatatatagacatatagatatatatagatacatct -atatatagatatatagatgtatatagatacatctatat--atagatatatagatgtatat -agatacatctatatatagatatatagatgtatatagatacatctatatatagatatatag -atgtatatagatacatctatat--atagatatatagatgtatatagatacatctatatat -agatatatagatgtatatagatacatctatatatagatatatagatgtatatagatacat -ctatatatagatatatagatgtatatagatacatctatatatagatatatagatgtatat -agatacatctatatatagatatatagatgtatatagatacatagatatatatagatacat -ttatgtatatatatatgtatatatagatatatagatatatatagatacatctacacagag -agatatatagaaacatctacacagggagatatatagatacatctacatagagagatatat -agatacatctacatagagagatatatagatacatctacatagagagatatatagatacat -ctacatagagagatatatagatacatctacacagagagatatatagaaacatctacacag -ggagatatatagatacatctacacagagagatatatagatacatctacatagagagatat -atagatacatctacatagagagatatatagatacatctacatagagagatatatagatac -atctacatagagagatatatagatacatc ->syndip_2_chr20:56280441-56282014 +atatatagatatatagatgtatatagatacatctatatatagatatatagatgtatatag +atacatctatatatagatatatagatgtatatagatacatctatatatagatatatagat +gtatatagatacatctatatatagatatatagatgtatatagatacatctatatatagat +atatagatgtatatagatacatctatatatagatatatagatgtatatagatacatctat +atatagatatatagatgtatatagatacatctatatatagatatatagatgtatatagat +acatctatatatagatatatagatgtatatagatacatagatatatatagatacatttat +gtatatatatatgtatatatagatatatagatatatatagatacatctacacagagagat +atatagaaacatctacacagggagatatatagatacatctacatagagagatatatagat +acatctacatagagagatatatagatacatctacatagagagatatatagatacatctac +atagagagatatatagatacatctacacagagagatatatagaaacatctacacagggag +atatatagatacatctacacagagagatatatagatacatctacatagagagatatatag +atacatctacatagagagatatatagatacatctacatagagagatatatagatacatct +acatagagagatatatagatacat +>syndip_2_chr20:56280441-56282013 aaaaaggtaacatccatatacagatataatatctataacatatatagatatatggatata acatccatatatagatatatctatatatctatctatagctacatctacagatatctatag ctacatctacagatatctacagatatctatagctacatctacagatatctacagatatct @@ -63,11 +63,11 @@ ttgatacatatagatacatctatatagatatatagatatatagatacatctata------ ------------------------------------------------------------ -------------------------------tagatatatatagatacatctatatatag acatatagatatatatagatacatctatatatagacatatagatatatatagatacatct -atatatagatatatagatatatatagatacatctacacagagagatatatagaaacatct -acacagggagatatatagatacatctacacagagagatatatagatacatctacatagag -agatatatagatacatctatatagagagatatatagatacatctacatagagagatatat -agatacatctacatagagagatatatagatacatctacatagagagatatatagatacat -c----------------------------------------------------------- +atatatagatatatagatatatatagatacatctacacagag------agatatatagaa +acatctacacagggagatatatagatacatctacacagagagatatatagatacatctac +atagagagatatatagatacatctatatagagagatatatagatacatctacatagagag +a--------tatatagatacatctacatagagaga--------tatatagatacatctac +atagagagatatatagatacat-------------------------------------- ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ @@ -75,8 +75,8 @@ c----------------------------------------------------------- ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------ ->p:HG002_1_chr20:56280441-56282014 +------------------------ +>p:HG002_1_chr20:56280441-56282013 aaaaaggtaacatccatatacagatataatatctataacatatatagatatatggatata acatccatatatagatatatctatatatctatctatagctacatctacagatatctatag ctacatctacagatatctacagatatctatagctacatctacagatatctacagatatct @@ -102,10 +102,10 @@ ttgatacatatagatacatctatatagatatatagatatatagatacatctata------ ------------------------------------------------------------ -------------------------------tagatatatagatata----tagatacat ctatatagatatatatagatacatctatatatagacatatagatatatatagatacatct -atatatagatatatagatatatatagatacatctacacagagagatatatagaaacatct -acacagggagatatatagatacatctacacagagagatatatagatacatctacatagag -agatatatagatacatctacatagagagatatatagatacatctacatagagagatatat -agatacatctacatagagagatatatagatacatc------------------------- +atatatagatatatagatatatatagatacatctacacagag------agatatatagaa +acatctacacagggagatatatagatacatctacacagagagatatatagatacatctac +atagagagatatatagatacatctacatagagagatatatagatacatctacatagagag +a--------tatatagatacatctacatagagaga--------tatatagatacat---- ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ @@ -114,8 +114,8 @@ agatacatctacatagagagatatatagatacatc------------------------- ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------ ->p:HG002_2_chr20:56280441-56282014 +------------------------ +>p:HG002_2_chr20:56280441-56282013 aaaaaggtaacatccatatacagatataatatctataacatatatagatatatggatata acatccatatatagatatatctatatatctatctatagctac------------------ ----------------------------------------------atctacagatatct @@ -141,20 +141,20 @@ ttgatacatatagatacatctatatagatatatagatatatagatacatctatatagata tatagatatatagatacatacatctatatagattagatatatatagatacatctatatag attagatatatatagatacatctatatagattagatatatatagatacatctatatatag atatatagatatatatagatacatctatatatagatatatagatatatatagatacatct -atatatagatatatagatgtatatagatacatctatat--atagatatatagatgtatat -agatacatctatatatagatatatagatgtatatagatacatctatatatagatatatag -atgtatatagatacatctatat--atagatatatagatgtatatagatacatctatatat -agatatatagatgtatatagatacatctatatatagatatatagatgtatatagatacat -ctatatatagatatatagatgtatatagatacatctatatatagatatatagatgtatat -agatacatctatatatagatatatagatgtatatagatacatagatatatatagatacat -ttatgtatatatatatgtatatatagatatatagatatatatagatacatctacacagag -agatatatagaaacatctacacagggagatatatagatacatctacatagagagctatat -agatatatatagatacatctatatatagacatatagatatatatagatacatctatatat -agatatatagatatatatagatacatctacacagagagatatatagaaacatctacacag -ggagatatatagatacatctacacagagagatatatagatacatctacatagagagatat -atagatacatctacatagagagatatatagatacatctacatagagagatatatagatac -atctacatagagagatatatagatacatc ->ref_chr20:56280441-56282014 +atatatagatatatagatgtatatagatacatctatatatagatatatagatgtatatag +atacatctatatatagatatatagatgtatatagatacatctatatatagatatatagat +gtatatagatacatctatatatagatatatagatgtatatagatacatctatatatagat +atatagatgtatatagatacatctatatatagatatatagatgtatatagatacatctat +atatagatatatagatgtatatagatacatctatatatagatatatagatgtatatagat +acatctatatatagatatatagatgtatatagatacatagatatatatagatacatttat +gtatatatatatgtatatatagatatatagatatatatagatacatctacacagagagat +atatagaaacatctacacagggagatatatagatacatctacatagagagctatatagat +atatatagatacatctatatatagacatatagatatatatagatacatctatatatagat +atatagatatatatagatacatctacacagagagatatatagaaacatctacacagggag +atatatagatacatctacacagagagatatatagatacatctacatagagagatatatag +atacatctacatagagagatatatagatacatctacatagagagatatatagatacatct +acatagagagatatatagatacat +>ref_chr20:56280441-56282013 aaaaaggtaacatccatatacagatataatatctataacatatatagatatatggatata acatccatatatagatatatctatatatctatctatagctac------------------ --------------atctacagatatctatagctacatctacagatatctacagatatct @@ -180,10 +180,10 @@ ttgatacatatagatacatctatatagatatatagatatatagatacatctata------ ------------------------------------------------------------ -------------------------------tagatatatagatata----tagatacat ctatatagatatatatagatacatctatatatagacatatagatatatatagatacatct -atatatagatatatagatatatatagatacatctacacagagagatatatagaaacatct -acacagggagatatatagatacatctacacagagagatatatagatacatctacatagag -agatatatagatacatctacatagagagatatatagatacatctacatagagagatatat -agatacatctacatagagagatatatagatacatc------------------------- +atatatagatatatagatatatatagatacatctacacagag------agatatatagaa +acatctacacagggagatatatagatacatctacacagagagatatatagatacatctac +atagagagatatatagatacatctacatagagagatatatagatacatctacatagagag +a--------tatatagatacatctacatagagaga--------tatatagatacat---- ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ @@ -192,4 +192,4 @@ agatacatctacatagagagatatatagatacatc------------------------- ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------ +------------------------ diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_36dc8f9607000ef7378ba372de940929.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_09e5224f3ed49d0a0d63118c15970e8f.msa similarity index 84% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_36dc8f9607000ef7378ba372de940929.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_09e5224f3ed49d0a0d63118c15970e8f.msa index 63bf4003..dd108ec0 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_36dc8f9607000ef7378ba372de940929.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_09e5224f3ed49d0a0d63118c15970e8f.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:50775546-50775933 +>syndip_1_chr20:50775546-50775932 tttctcattctgtctctccaaagatttccatatatatatatatatatatatatatatata tatatatatataaatataaatatattctatatattcatatatatatattctatatattca tatatataaatatattctatatattcatatataaatatattccatatatataaatatatt @@ -6,8 +6,8 @@ ccatatatt---tatatataaatatatatcatatatttatatataaatatatatcatata tttatatataaatatatatcatatatttatatat-aatatatatcatatatttatatata aatatatatcatatatttatatataaatatatatcatatatttatatataatatatatca tata-ttatatataaatatatatcatatatttatatataaatatatatcatatattaata -tatatcatatatttatatattaatatatatcatatatttatatatt ->syndip_2_chr20:50775546-50775933 +tatatcatatatttatatattaatatatatcatatatttatatat +>syndip_2_chr20:50775546-50775932 tttctcattctgtctctccaaagatttccatatatatatatatatatatatatatatata tatatatatataaatataaatatattctatatattcatatatatatattctatatattca tatatataaatatattctatatattcatatatataaatatattctatatattcatata-t @@ -15,8 +15,8 @@ aaatatattccatatatataaatatattccatatatttatatataaatatatatcatata tttatatataaatatatatcatatatttatatataaatatatatcatatatttatatat- aatatatatcatatatttatatataaatatatatcatatatttatatataatatatatca tata-ttatatataaatatatatcatatatttatatataaatatatatcatatattaata -tatatcatatatttatatattaatatatatcatatatttatatatt ->p:HG002_1_chr20:50775546-50775933 +tatatcatatatttatatattaatatatatcatatatttatatat +>p:HG002_1_chr20:50775546-50775932 tttctcattctgtctctccaaagatttccatatatatatatatatatatatatatatata tatatatatataaatataaatatattctatatattcatatatatatattctatatattca tatatataaatatattctatatattcatatataaatatatattctatatattcatata-t @@ -24,8 +24,8 @@ aaatatattccatatatataaatatattccatatatttatatataaatatatatcatatc catatatataaatatattccatatatttatatataaatatatatcatatatttatatata aatatatatcatatatttatatataaatatatatcatatatttatatataatatatatca tatatttatatataaatatatatcatatatttatatataaatatatatcatatattaata -tatatcatatatttatatattaatatatatcatatatttatatatt ->p:HG002_2_chr20:50775546-50775933 +tatatcatatatttatatattaatatatatcatatatttatatat +>p:HG002_2_chr20:50775546-50775932 tttctcattctgtctctccaaagatttccatatatatatatatatatatatatatatata tatatatatataaatataaatatattctatatattcatatatatatattctatatattca tatatataaatatattctatatattcatatataaatatattccatatatataaatatatt @@ -33,8 +33,8 @@ ccatatatt---tatatataaatatatatcatatatttatatataaatatatatcatata tttatatataaatatatatcatatatttatatat-aatatatatcatatatttatatata aatatatatcatatatttatatataaatatatatcatatatttatatataatatatatca tata-ttatatataaatatatatcatatatttatatataaatatatatcatatattaata -tatatcatatatttatatattaatatatatcatatatttatatatt ->ref_chr20:50775546-50775933 +tatatcatatatttatatattaatatatatcatatatttatatat +>ref_chr20:50775546-50775932 tttctcattctgtctctccaaagatttccatatatatatatatatatatatatatatata tatatatatataaatataaatatattctatatattcatatatatatattctatatattca tatatataaatatattctatatattcatatataaatatattccatatatataaatatatt @@ -42,4 +42,4 @@ ccatatatt---tatatataaatatatatcatatatttatatataaatatatatcatata tttatatataaatatatatcatatatttatatat-aatatatatcatatatttatatata aatatatatcatatatttatatataaatatatatcatatattaatatatatcatatat-- -----ttatatattaatatatatcatatatttatatat---------------------- ----------------------------------------------t +--------------------------------------------- diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_c42f829bb3fad0eeedf846ea3be67cc8.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_0a842177b0a019f7a4fb813c41b08d1e.msa similarity index 95% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_c42f829bb3fad0eeedf846ea3be67cc8.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_0a842177b0a019f7a4fb813c41b08d1e.msa index 8f675fc1..d4cd5f85 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_c42f829bb3fad0eeedf846ea3be67cc8.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_0a842177b0a019f7a4fb813c41b08d1e.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:63491857-63492491 +>syndip_1_chr20:63491857-63492490 gatggggaggtggatggatagatggatggatggatggatggatggatggatggatggggg gatagatggatggatggatggatggacggacggatggggagg--------tggatggatg gatggatggatggatggatggatggatgaggagatgaacggatggatgagatggatggat @@ -31,8 +31,8 @@ ggatggggaaatggatggatggaaggatggatggatagggaggtgggtggatggatggat ggatggatgtacagaaggatggatgggtggggaggtgggtgagtggatgaatggatggat ggatagatagaaggatggatgggtgggacaatagatggatggatagagagaaggatggat gggtggggagatagatggatggatggatggatggatggatagagagaaggatggatggtt -gagacaatggatggatggatggatggatggagagatggatggatggatggat ->syndip_2_chr20:63491857-63492491 +gagacaatggatggatggatggatggatggagagatggatggatggatgga +>syndip_2_chr20:63491857-63492490 gatggggaggtggatggatagatggatggatggatggatggatggatggatggatggggg gatagatggatggatggatggatggacggacggatggggaggtggatggatggatggatg gatggatggatggatggatggatggatgaggagatgaacggatggatgagatggatggat @@ -65,8 +65,8 @@ ggatggataaatggatggatggaaggatggatggataggtaggtgggtggatggatggat ggatggatgtacagaaggatggatgggtgggtaggtgggtgagtggatgaatggatggat ggatagatagaaggatggatgggtgggtcaatagatggatggatagagagaaggatggat gggtggg----tagatggatggatggatggatggatggatagagagaaggatggatggtt -gagacaatggatggatggatggatggatggagagatggatggatggatggat ->p:HG002_1_chr20:63491857-63492491 +gagacaatggatggatggatggatggatggagagatggatggatggatgga +>p:HG002_1_chr20:63491857-63492490 gatggggaggtggatggatagatggatggatggatggatggatggatggatggatggggg gatagatggatggatggatggatggacggacggatggggaggtggatggatggatggatg gatggatggatggatggatggatggatgaggagatgaacggatggatgagatggatggat @@ -99,8 +99,8 @@ ggatggataaatggatggatggaaggatggatggataggtaggtgggtggatggatggat ggatggatgtacagaaggatggatgggtgggtaggtgggtgagtggatgaatggatggat ggatagatagaaggatggatgggtgggtcaatagatggatggatagagagaaggatggat gggtggg----tagatggatggatggatggatggatggatagagagaaggatggatggtt -gagacaatggatggatggatggatggatggagagatggatggatggatggat ->p:HG002_2_chr20:63491857-63492491 +gagacaatggatggatggatggatggatggagagatggatggatggatgga +>p:HG002_2_chr20:63491857-63492490 gatggggaggtggatggatagatggatggatggatggatggatggatggatggatggggg gatagatggatggatggatggatggacggacggatggggaggtggatggatggatggatg gatggatggatggatggatggatggatgaggagatgaacggatggatgagatggatggat @@ -133,8 +133,8 @@ ggatggataaatggatggatggaaggatggatggataggtaggtgggtggatggatggat ggatggatgtacagaaggatggatgggtgggtaggtgggtgagtggatgaatggatggat ggatagatagaaggatggatgggtgggtcaatagatggatggatagagagaaggatggat gggtggg----tagatggatggatggatggatggatggatagagagaaggatggatggtt -gagacaatggatggatggatggatggatggagagatggatggatggatggat ->ref_chr20:63491857-63492491 +gagacaatggatggatggatggatggatggagagatggatggatggatgga +>ref_chr20:63491857-63492490 gatggggaggtggatggatagatggatggatggatggatggatggatggatggatggggg gatagatggatggatggatggatggacggacggatggggaggtggatggatggatggatg gatggatggatggatggatggatggatgaggagatgaacggatggatgagatggatggat @@ -167,4 +167,4 @@ ggatggataaatggatggatggaaggatggatggataggtaggtgggtggatggatggat ggatggatgtacagaaggatggatgggtgggtaggtgggtgagtggatgaatggatggat ggatagatagaaggatggatgggtgggtcaatagatggatggatagagagaaggatggat gggtggg----tagatggatggatggatggatggatggatagagagaaggatggatggtt -gagacaatggatggatggatggatggatggagagatggatggatggatggat +gagacaatggatggatggatggatggatggagagatggatggatggatgga diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_9be1536e40a58a7128ddc75d12940a3e.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_0c8572a4abbd43dfe7727d1bbe104e58.msa similarity index 96% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_9be1536e40a58a7128ddc75d12940a3e.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_0c8572a4abbd43dfe7727d1bbe104e58.msa index 79c1ab78..68732c67 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_9be1536e40a58a7128ddc75d12940a3e.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_0c8572a4abbd43dfe7727d1bbe104e58.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:16257641-16259446 +>syndip_1_chr20:16257641-16259445 ctgagattcctgccacctttgccccttacttttctcaccccgttcactgagggccctcaa gtctgtttctaagcagcagcctgagggatccatttttataatctgaaatcagatatcata cccacaccccaaaacacacacactccttccccatcccaaagagacacaccccttccccat @@ -42,8 +42,8 @@ tccccatcccaaacacacacaca-------------------------ccttccccatcc caaacacacccacccttactccatctcaaacatgtacaccccttcccctatcccaaaata cacacatccatccctacacaccctcacccatttccccatcccagaacctatgcaccaact cagtttatttgaacatgccaggcactgaactaagagaaccaaagtagcaggaggaatacg -gtgttgtcctcccagaacttaccccctgcacgggagtcagccagggaag ->syndip_2_chr20:16257641-16259446 +gtgttgtcctcccagaacttaccccctgcacgggagtcagccagggaa +>syndip_2_chr20:16257641-16259445 ctgagattcctgccacctttgccccttacttttctcaccccgttcactgagggccctcaa gtctgtttctaagcagcagcctgagggatccatttttataatctgaaatcagatatcata cccacaccccaaaacacacacactccttccccatcccaaagagacacaccccttccccat @@ -87,8 +87,8 @@ tccccatcccaaacacacacacaccttcccatcccaaacacacacaccccttccccatcc caaacacacccacccttactccatctcaaacatgtacaccccttcccctatcccaaaata cacacatccatccctacacaccctcacccatttccccatcccagaacctatgcaccaact cagtttatttgaacatgccaggcactgaactaagagaaccaaagtagcaggaggaatacg -gtgttgtcctcccagaacttaccccctgcacgggagtcagccagggaag ->p:HG002_1_chr20:16257641-16259446 +gtgttgtcctcccagaacttaccccctgcacgggagtcagccagggaa +>p:HG002_1_chr20:16257641-16259445 ctgagattcctgccacctttgccccttacttttctcaccccgttcactgagggccctcaa gtctgtttctaagcagcagcctgatggatccatttttataatctgaaatcagatatcata cccacaccccaaaacacacacactccttccccatcccaaagagacacaccccttccccat @@ -123,7 +123,7 @@ ttccctaattccacacacacacaccccttccccatcccaaacacacataccccttcccat cccaaagacacacactcattctgcatcccaaacacacacaccccttcccatcccaaacac acacactacctccgcatcccaaacacatataccccttccccatcccaaacacatacaccc cttccccatcgccaacacacacaccccttccccatcccaaacacacacacccctttccat -cccaaacacacacccattccccatcccagacacacccacccgttccccatcc-------- +cccaaacacacacccattccccatcccagacacacccacccgttccccatc--------- ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ @@ -132,8 +132,8 @@ cccaaacacacacccattccccatcccagacacacccacccgttccccatcc-------- ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ -------------------------------------------------- ->p:HG002_2_chr20:16257641-16259446 +------------------------------------------------ +>p:HG002_2_chr20:16257641-16259445 ctgagattcctgccacctttgccccttacttttctcaccccgttcactgagggccctcaa gtctgtttctaagcagcagcctgagggatccatttttataatctgaaatcagatatcata cccacaccccaaaacacacacactccttccccatcccaaagagacacaccccttccccat @@ -177,8 +177,8 @@ tccccatcccaaacacacacaca-------------------------ccttccccatcc caaacacacccacccttactccatctcaaacatgtacaccccttcccctatcccaaaata cacacatccatccctacacaccctcacccatttccccatcccagaacctatgcaccaact cagtttatttgaacatgccaggcactgaactaagagaaccaaagtagcaggaggaatacg -gtgttgtcctcccagaacttaccccctgcacgggagtcagccagggaag ->ref_chr20:16257641-16259446 +gtgttgtcctcccagaacttaccccctgcacgggagtcagccagggaa +>ref_chr20:16257641-16259445 ctgagattcctgccacctttgccccttacttttctcaccccgttcactgagggccctcaa gtctgtttctaagcagcagcctgagggatccatttttataatctgaaatcagatatcata cccacaccccaaaacacacacactccttccccatcccaaagagacacaccccttccccat @@ -222,4 +222,4 @@ tccccatcccaaacacacacaca-------------------------ccttccccatcc caaacacacccacccttactccatctcaaacatgtacaccccttcccctatcccaaaata cacacatccatccctacacaccctcacccatttccccatcccagaacctatgcaccaact cagtttatttgaacatgccaggcactgaactaagagaaccaaagtagcaggaggaatacg -gtgttgtcctcccagaacttaccccctgcacgggagtcagccagggaag +gtgttgtcctcccagaacttaccccctgcacgggagtcagccagggaa diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_2f584d342cd0122481374518f9e81904.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_0d2e507e60453c89bd248c945dc7f54d.msa similarity index 93% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_2f584d342cd0122481374518f9e81904.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_0d2e507e60453c89bd248c945dc7f54d.msa index 2221e0dc..948c549e 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_2f584d342cd0122481374518f9e81904.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_0d2e507e60453c89bd248c945dc7f54d.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:32722324-32723313 +>syndip_1_chr20:32722324-32723312 acagaggtctcctgaaaattagtgcccataacaggtgtttggtagacaggtctcagccct cagacaaacccatttccaaagatgaaggccagagaatgagaataatgatttatcgttctg gccgggtgtgatggctcatgcctgtaatcccagcgctgtgggaggcgtggccggcagatc @@ -15,8 +15,8 @@ ta-------aataataataataataataaagacttgtgatcctctccctctccctctccc tctccgtctccctctccctctccctctccctccctctccgtctccgtctccgtctccgtc tccctctccctctccccacggtctccctctcatgcggagccgaagctggactgtactgct gccatctcggctcactgcaacctccctgcctgattctcctgcctcagcctgccgagtgcc -tgcgattgcaggcacgcgccgccacgcctgactggtt ->syndip_2_chr20:32722324-32723313 +tgcgattgcaggcacgcgccgccacgcctgactggt +>syndip_2_chr20:32722324-32723312 acagaggtctcctgaaaattagtgcccataacaggtgtttggtagacaggtctcagccct cagacaaacccatttccaaagatgaaggccagagaatgagaataatgatttatcgttctg gccgggtgtgatggctcatgcctgtaatcccagcgctgtgggaggcgtggccggcagatc @@ -33,8 +33,8 @@ taaataaataataataataataataataaagacttgtgatcctctccctctccctctccc tctccgtctccctctccctctccctctccctccctctccgtctccgtctccgtctccgtc tccctctccctctccccacggtctccctctcatgcggagccgaagctggactgtactgct gccatctcggctcactgcaacctccctgcctgattctcctgcctcagcctgccgagtgcc -tgcgattgcaggcacgcgccgccacgcctgactggtt ->p:HG002_1_chr20:32722324-32723313 +tgcgattgcaggcacgcgccgccacgcctgactggt +>p:HG002_1_chr20:32722324-32723312 acagaggtctcctgaaaattagtgcccataacaggtgtttggtagacaggtctcagccct cagacaaacccatttccaaagatgaaggccagagaatgagaataatgatttatcgttctg gccgggtgtgatggctcatgcctgtaatcccagcgctgtgggaggcgtggccggcagatc @@ -51,8 +51,8 @@ ta-------aataataataataataataaagacttgtgatcctctccctctccctctccc tctccgtctccctctccctctccctctccctccctctccgtctccgtctccgtctccgtc tccctctccctctccccacggtctccctctcatgcggagccgaagctggactgtactgct gccatctcggctcactgcaacctccctgcctgattctcctgcctcagcctgccgagtgcc -tgcgattgcaggcacgcgccgccacgcctgactggtt ->p:HG002_2_chr20:32722324-32723313 +tgcgattgcaggcacgcgccgccacgcctgactggt +>p:HG002_2_chr20:32722324-32723312 acagaggtctcctgaaaattagtgcccataacaggtgtttggtagacaggtctcagccct cagacaaacccatttccaaagatgaaggccagagaatgagaataatgatttatcgttctg gccgggtgtgatggctcatgcctgtaatcccagcgctgtgggaggcgtggccggcagatc @@ -69,8 +69,8 @@ ta-------aataataataataataataaagacttgtgatcctctccctctccctctccc tctccgtctccctctccctctccctctccctccctctccgtctccgtctccgtctccgtc tccctctccctctccccacggtctccctctcatgcggagccgaagctggactgtactgct gccatctcggctcactgcaacctccctgcctgattctcctgcctcagcctgccgagtgcc -tgcgattgcaggcacgcgccgccacgcctgactggtt ->ref_chr20:32722324-32723313 +tgcgattgcaggcacgcgccgccacgcctgactggt +>ref_chr20:32722324-32723312 acagaggtctcctgaaaattagtgcccataacaggtgtttggtagacaggtctcagccct cagacaaacccatttccaaagatgaaggccagagaatgagaataatgatttatcgttctg gccgggtgtgatggctcatgcctgtaatcccagcgctgtgggaggcgtggccggcagatc @@ -87,4 +87,4 @@ ta-------aataataataataataataaagacttgtgatcctctccctctccctctccc tctccgtctccctctccctctccctctccctccctctccgtctccgtctccgtctccgtc tccctctccctctccccacggtctccctctcatgcggagccgaagctggactgtactgct gccatctcggctcactgcaacctccctgcctgattctcctgcctcagcctgccgagtgcc -tgcgattgcaggcacgcgccgccacgcctgactggtt +tgcgattgcaggcacgcgccgccacgcctgactggt diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_be4bf563ccc78c76f28205e487e0c322.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_0dd22c519e7416f2e879bd43c26d9682.msa similarity index 92% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_be4bf563ccc78c76f28205e487e0c322.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_0dd22c519e7416f2e879bd43c26d9682.msa index ed4a8e48..3d9e7894 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_be4bf563ccc78c76f28205e487e0c322.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_0dd22c519e7416f2e879bd43c26d9682.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:34235650-34236314 +>syndip_1_chr20:34235650-34236313 agcgcatgcctgtggcccctactactcaggaggcaggggtgggaggattgctagagccca ggaagtcgtggctgaagtgggctcatgataatgccactgcactccagcctgtgtgacaga gtgagactctgagaaagaagaaagaaagaaagaaagaaagaaagaaagaaagaaagaaag @@ -11,8 +11,8 @@ ggaaggaaggagagagagaaagaaagagaaagagaaggaaggaaggagaaagaaaggaaa aggaaaggaaaggagtggaaaggaaaggagaggaaaggagaaaggagggagggagggaaa agaaagaaagaaagaaaaggctaaacctgactatgatattgaatttactgctagctctag gtggtttaaatgattcaagaatcatggccatggctgggcgcagtggctcacgcctataat -cccagcactttgggaggccgaagcgggc ->syndip_2_chr20:34235650-34236314 +cccagcactttgggaggccgaagcggg +>syndip_2_chr20:34235650-34236313 agcgcatgcctgtggcccctactactcaggaggcaggggtgggaggattgctagagccca ggaagtcgtggctgaagtgggctcatgataatgccactgcactccagcctgtgtgacaga gtgagactctgagaaagaagaaagaaagaaagaaagaaagaaagaaagaaagaaagaaag @@ -25,8 +25,8 @@ ggaaggaaggagagagagaaagaaagagaaagagaaggaaggaaggagaaagaaaggaaa aggaaaggaaaggagtggaaaggaaaggagaggaaaggagaaaggagggagggagggaaa agaaagaaagaaagaaaaggctaaacctgactatgatattgaatttactgctagctctag gtggtttaaatgattcaagaatcatggccatggctgggcgcagtggctcacgcctataat -cccagcactttgggaggccgaagcgggc ->p:HG002_1_chr20:34235650-34236314 +cccagcactttgggaggccgaagcggg +>p:HG002_1_chr20:34235650-34236313 agcgcatgcctgtggcccctactactcaggaggcaggggtgggaggattgctagagccca ggaagtcgtggctgaagtgggctcatgataatgccactgcactccagcctgtgtgacaga gtgagactctgagaaagaagaaagaaagaaagaaagaaagaaagaaagaaagaaagaaag @@ -39,8 +39,8 @@ ggaaggaaggagagagagaaagaaagagaaagagaaggaaggaaggagaaagaaaggaaa aggaaaggaaaggagtggaaaggaaaggagaggaaaggagaaaggagggagggagggaaa agaaagaaagaaagaaaaggctaaacctgactatgatattgaatttactgctagctctag gtggtttaaatgattcaagaatcatggccatggctgggcgcagtggctcacgcctataat -cccagcactttgggaggccgaagcaggc ->p:HG002_2_chr20:34235650-34236314 +cccagcactttgggaggccgaagcagg +>p:HG002_2_chr20:34235650-34236313 agcgcatgcctgtggcccctactactcaggaggcaggggtgggaggattgctagagccca ggaagtcgtggctgaagtgggctcatgataatgccactgcactccagcctgtgtgacaga gtgagactctgagaaagaagaaagaaagaaagaaagaaagaaagaaagaaagaaagaaag @@ -53,8 +53,8 @@ ggaaggaaggagagagagaaagaaagagaaagagaaggaaggaaggagaaagaaaggaaa aggaaaggaaaggagtggaaaggaaaggagaggaaaggagaaaggagggagggagggaaa agaaagaaagaaagaaaaggctaaacctgactatgatattgaatttactgctagctctag gtggtttaaatgattcaagaatcatggccatggctgggcgcagtggctcacgcctataat -cccagcactttgggaggccgaagcaggc ->ref_chr20:34235650-34236314 +cccagcactttgggaggccgaagcagg +>ref_chr20:34235650-34236313 agcgcatgcctgtggcccctactactcaggaggcaggggtgggaggattgctagagccca ggaagtcgtggctgaagtgggctcatgataatgccactgcactccagcctgtgtgacaga gtgagactctgagaaagaagaaagaaagaaagaaagaaagaaagaaagaaagaaagaaag @@ -67,4 +67,4 @@ ggaaggaaggagagagagaaagaaagagaaagagaaggaaggaaggaaggagaaag--aa aggaaaggaaaggagtggaaaggaaaggagaggaaaggagaaaggagggagggagggaaa agaaagaaagaaagaaaaggctaaacctgactatgatattgaatttactgctagctctag gtggtttaaatgattcaagaatcatggccatggctgggcgcagtggctcacgcctataat -cccagcactttgggaggccgaagcgggc +cccagcactttgggaggccgaagcggg diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_ae2bc435a754945d8db9c5a5fad60896.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_157340f8623d37786583821b8bd25147.msa similarity index 95% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_ae2bc435a754945d8db9c5a5fad60896.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_157340f8623d37786583821b8bd25147.msa index dd71635f..fa589a59 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_ae2bc435a754945d8db9c5a5fad60896.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_157340f8623d37786583821b8bd25147.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:61100821-61102506 +>syndip_1_chr20:61100821-61102505 ctgtgactggagagaccccgagtgaaggcagacctccatgtcctctacatcctgtgccac atctggagagacccctgtgagggcagagagctacctcctcttcttcctgtgtcacacctg gagagaccccgagtgagggcggatggccacgtcttctacttcctgtgtcacacctggaga @@ -29,8 +29,8 @@ tcctgtgtcacacctggagaggccccgagtgagggcagacggccacctcctctacttcct gtgtcacacctggagagaccccgagtgagggcagacggccacatcctctatttcctgtgt cacacctggagagaccccgagtgagggcagacggccacatcctctaattcctgtgtcaca catggagagaccccgagtgagggcagaaggccacatcctctacttcctgtgtcacaccta -gagagaccccgagtgagggcggatggccacatcctctaattcctgtgtcacacct ->syndip_2_chr20:61100821-61102506 +gagagaccccgagtgagggcggatggccacatcctctaattcctgtgtcacacc +>syndip_2_chr20:61100821-61102505 ctgtgactggagagaccccgagtgaaggcagacctccatgtcctctacatcctgtgccac atctggagagacccctgtgagggcagagagctacctcctcttcttcctgtgtcacacctg gagagaccccgagtgagggcggatggccacgtcttctacttcctgtgtcacacctggaga @@ -61,8 +61,8 @@ tcctgtgtcacacctgcagagaccccaagtgagggcagatggccacctcttctacttcct gtgtcacacctggagaggccccgagtgagggcagacggccacctcctctacttcctgtgt cacacctggagagaccccgagtgagggcagacggccacatcctctaattcctgtgtcaca catggagagaccctgagtgagggcagaaggccacatcctctacttcctgtgtcacaccta -gagagaccccgagtgagggcggatggccacatcctctaattcctgtgtcacacct ->p:HG002_1_chr20:61100821-61102506 +gagagaccccgagtgagggcggatggccacatcctctaattcctgtgtcacacc +>p:HG002_1_chr20:61100821-61102505 ctgtgactggagagaccccgagtgaaggcagacctccatgtcctctacatcctgtgccac atctggagagacccctgtgagggcagagagctacctcctcttcttcctgtgtcacacctg gagagaccccgagtgagggcggatggccacgtcttctacttcctgtgtcacacctggaga @@ -93,8 +93,8 @@ tcctgtgtcacacctggagaggccccgagtgagggcagacggccacctcctctacttcct gtgtcacacctggagagaccccgagtgagggcagacggccacatcctctatttcctgtgt cacacctggagagaccccgagtgagggcagacggccacatcctctaattcctgtgtcaca catggagagaccccgagtgagggcagaaggccacatcctctacttcctgtgtcacaccta -gagagaccccgagtgagggcggatggccacatcctctaattcctgtgtcacacct ->p:HG002_2_chr20:61100821-61102506 +gagagaccccgagtgagggcggatggccacatcctctaattcctgtgtcacacc +>p:HG002_2_chr20:61100821-61102505 ctgtgactggagagaccccgagtgaaggcagacctccatgtcctctacatcctgtgccac atctggagagacccctgtgagggcagagagctacctcctcttcttcctgtgtcacacctg gagagaccccgagtgagggcggatggccacgtcttctacttcctgtgtcacacctggaga @@ -125,8 +125,8 @@ tcctgtgtcacacctggagaggccccgagtgagggcagacggccacctcctctacttcct gtgtcacacctggagagaccccgagtgagggcagacggccacatcctctatttcctgtgt cacacctggagagaccccgagtgagggcagacggccacatcctctaattcctgtgtcaca catggagagaccccgagtgagggcagaaggccacatcctctacttcctgtgtcacaccta -gagagaccccgagtgagggcggatggccacatcctctaattcctgtgtcacacct ->ref_chr20:61100821-61102506 +gagagaccccgagtgagggcggatggccacatcctctaattcctgtgtcacacc +>ref_chr20:61100821-61102505 ctgtgactggagagaccccgagtgaaggcagacctccatgtcctctacatcctgtgccac atctggagagacccctgtgagggcagagagctacctcctcttcttcctgtgtcacacctg gagagaccccgagtgagggcggatggccacgtcttctacttcctgtgtcacacctggaga @@ -157,4 +157,4 @@ tcctgtgtcacacctggagaggccccgagtgagggcagacggccacctcctctacttcct gtgtcacacctggagagaccccgagtgagggcagacggccacatcctctatttcctgtgt cacacctggagagaccccgagtgagggcagacggccacatcctctaattcctgtgtcaca catggagagaccccgagtgagggcagaaggccacatcctctacttcctgtgtcacaccta -gagagaccccgagtgagggcggatggccacatcctctaattcctgtgtcacacct +gagagaccccgagtgagggcggatggccacatcctctaattcctgtgtcacacc diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_34829e853039a0b83260fb740af39cda.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_1c44d23cb30292b3d1f847e04365e588.msa similarity index 96% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_34829e853039a0b83260fb740af39cda.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_1c44d23cb30292b3d1f847e04365e588.msa index 5365dadd..a94a3423 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_34829e853039a0b83260fb740af39cda.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_1c44d23cb30292b3d1f847e04365e588.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:24407883-24409098 +>syndip_1_chr20:24407883-24409097 ctcagtttattcatctataaaacagatgtggcagtggtagtgccacaattcacagcccag ttccaccttcctcacaggctcgtgataaagctccatttcctacccttcctctcagttatg ttggagccacaggaggtgatatagggtgcttccggccctggccatagggtacttctggga @@ -19,8 +19,8 @@ gaggtgatataggatacttccagccctggccatagggtacttctgggaggtgatatagga tacttccagccctggccatagggtacttctgggaggtgatataggatacttccagccttg gccatctcctctcctttaagatacctacacttgttctttcccacccacaactgtatcagt cagcttttactgaataacaaagcacccctaacctcagtgcatgatgaaaccctgtctcta -ctaaaagcacaaaaat ->syndip_2_chr20:24407883-24409098 +ctaaaagcacaaaaa +>syndip_2_chr20:24407883-24409097 ctcagtttattcatctataaaacagatgtggcagtggtagtgccacaattcacagcccag ttccaccttcctcacaggctcgtgataaagctccatttcctacccttcctctcagttatg ttggagccacaggaggtgatatagggtgcttccagccctggccatagggtacttctggga @@ -41,8 +41,8 @@ gaggtgatataggatacttccagccctggccatagggtacttctgggaggtgatatagga tacttccagccctggccatagggtacttctgggaggtgatataggatacttccagccttg gccatctcctctcctttaagatacctacacttgttctttcccacccacaactgtatcagt cagcttttactgaataacaaagcacccctaacctcagtgcatgatgaaaccctgtctcta -ctaaaagcacaaaaat ->p:HG002_1_chr20:24407883-24409098 +ctaaaagcacaaaaa +>p:HG002_1_chr20:24407883-24409097 ctcagtttattcatctataaaacagatgtggcagtggtagtgccacaattcacagcccag ttccaccttcctcacaggctcgtgataaagctccatttcctacccttcctctcagttatg ttggagccacaggaggtgatatagggtgcttccagccctggccatagggtacttctggga @@ -63,8 +63,8 @@ gaggtgatataggatacttccagccctggccatagggtacttctgggaggtgatatagga tacttccagccctggccatagggtacttctgggaggtgatataggatacttccagccttg gccatctcctctcctttaagatacctacacttgttctttcccacccacaactgtatcagt cagcttttactgaataacaaagcacccctaacctcagtgcatgatgaaaccctgtctcta -ctaaaagcacaaaaat ->p:HG002_2_chr20:24407883-24409098 +ctaaaagcacaaaaa +>p:HG002_2_chr20:24407883-24409097 ctcagtttattcatctataaaacagatgtggcagtggtagtgccacaattcacagcccag ttccaccttcctcacaggctcgtgataaagctccatttcctacccttcctctcagttatg ttggagccacaggaggtgatatagggtgcttccggccctggccatagggtacttctggga @@ -85,8 +85,8 @@ gaggtgatataggatacttccagccctggccatagggtacttctgggaggtgatatagga tacttccagccctggccatagggtacttctgggaggtgatataggatacttccagccttg gccatctcctctcctttaagatacctacacttgttctttcccacccacaactgtatcagt cagcttttactgaataacaaagcacccctaacctcagtgcatgatgaaaccctgtctcta -ctaaaagcacaaaaat ->ref_chr20:24407883-24409098 +ctaaaagcacaaaaa +>ref_chr20:24407883-24409097 ctcagtttattcatctataaaacagatgtggcagtggtagtgccacaattcacagcccag ttccaccttcctcacaggctcgtgataaagctccatttcctacccttcctctcagttatg ttggagccacaggaggtgatatagggtgcttccagccctggccatagggtacttctggga @@ -107,4 +107,4 @@ gaggtgatataggatacttccagccctggccatagggtacttctgggaggtgatatagga tacttccagccctggccatagggtacttctgggaggtgatataggatacttccagccttg gccatctcctctcctttaagatacctacacttgttctttcccacccacaactgtatcagt cagcttttactgaataacaaagcacccctaacctcagtgcatgatgaaaccctgtctcta -ctaaaagcacaaaaat +ctaaaagcacaaaaa diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_afe59e2c01695d7d2653b661248a80ef.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_1d20ababbd3c82b9d4be9155373214aa.msa similarity index 86% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_afe59e2c01695d7d2653b661248a80ef.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_1d20ababbd3c82b9d4be9155373214aa.msa index 7fd8c4ee..a0be7d9f 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_afe59e2c01695d7d2653b661248a80ef.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_1d20ababbd3c82b9d4be9155373214aa.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:23560783-23561292 +>syndip_1_chr20:23560783-23561291 taaaatcttcatacagaagatataagagatttgcaaattaccgttatagtattagagtat tctgaatgtcagtatgtacttagtttcactagggagatttttattccctaatgttcttca gcattcctttctctctctttctttctttctttctttctttccttccttccttccttcctt @@ -7,8 +7,8 @@ ccttccttccttccttccttccttccttcct----------------------------- ----------------------ttctttctttctttctttctttctttctttctttcttt ttttctttct-------ttctttctttctttttcttttacagagtcttactcttgttgcc caggctgtagtgcagtggtgtgatctcggctcactgcaacctccacctctgcctcctgca -ttcaagaaattctcctgcctcagcctccagagtagctgggat ->syndip_2_chr20:23560783-23561292 +ttcaagaaattctcctgcctcagcctccagagtagctggga +>syndip_2_chr20:23560783-23561291 taaaatcttcatacagaagatataagagatttgcaaattaccgttatagtattagagtat tctgaatgtcagtatgtacttagtttcactagggagatttttattccctaatgttcttca gcattcctttctctctctttctttctttctttctttctttctttctttctttctttcttt @@ -17,8 +17,8 @@ ctttccttctttccttctttccttctttccttctttccttctttccttctttccttcttt ctttctttctttctttctttctttctttctttctttctttctttctttctttctttcttt ctttctttctttctttcttctttctttctttttcttttacagagtcttactcttgttgcc caggctgtagtgcagtggtgtgatctcggctcactgcaacctccacctctgcctcctgca -ttcaagaaattctcctgcctcagcctccagagtagctgggat ->p:HG002_1_chr20:23560783-23561292 +ttcaagaaattctcctgcctcagcctccagagtagctggga +>p:HG002_1_chr20:23560783-23561291 taaaatcttcatacagaagatataagagatttgcaaattaccgttatagtattagagtat tctgaatgtcagtatgtacttagtttcactagggagatttttattccctaatgttcttca gcattcctttctctctctttctttctttctttctttctttctttctttctttctttcttt @@ -27,8 +27,8 @@ ctttccttctttccttctttccttctttccttctttccttctttccttctttccttcttt ctttctttctttctttctttctttctttctttctttctttctttctttctttctttcttt ctttctttctttctttcttctttctttctttttcttttacagagtcttactcttgttgcc caggctgtagtgcagtggtgtgatctcggctgactgcaacctccacctctgcctcctgca -ttcaagaaattctcctgcctcagcctccagagtagctgggat ->p:HG002_2_chr20:23560783-23561292 +ttcaagaaattctcctgcctcagcctccagagtagctggga +>p:HG002_2_chr20:23560783-23561291 taaaatcttcatacagaagatataagagatttgcaaattaccgttatagtattagagtat tctgaatgtcagtatgtacttagtttcactagggagatttttattccctaatgttcttca gcattcctttctctctctttctttctttctttctttctttccttccttccttccttcctt @@ -37,8 +37,8 @@ ccttccttccttccttccttccttccttcct----------------------------- ----------------------ttctttctttctttctttctttctttctttctttcttt ttttctttct-------ttctttctttctttttcttttacagagtcttactcttgttgcc caggctgtagtgcagtggtgtgatctcggctcactgcaacctccacctctgcctcctgca -ttcaagaaattctcctgcctcagcctccagagtagctgggat ->ref_chr20:23560783-23561292 +ttcaagaaattctcctgcctcagcctccagagtagctggga +>ref_chr20:23560783-23561291 taaaatcttcatacagaagatataagagatttgcaaattaccgttatagtattagagtat tctgaatgtcagtatgtacttagtttcactagggagatttttattccctaatgttcttca gcattcctttctctttctttctttctttctttctttctttctttctttctttctttcctt @@ -47,4 +47,4 @@ ctttccttctttccttctttccttctttccttctttccttctttccttctttcc------ ------ttctttccttctttccttctttctttctttctttctttctttctttctttcttt ctttctttctttctttcttctttctttctttttcttttacagagtcttactcttgttgcc caggctgtagtgcagtggtgtgatctcggctcactgcaacctccacctctgcctcctgca -ttcaagaaattctcctgcctcagcctccagagtagctgggat +ttcaagaaattctcctgcctcagcctccagagtagctggga diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_7051e6b8ca568da2ec784100ee158134.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_1d7c9e595e1b2ef479098d45351d36d3.msa similarity index 75% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_7051e6b8ca568da2ec784100ee158134.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_1d7c9e595e1b2ef479098d45351d36d3.msa index 98015e47..e2dd4670 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_7051e6b8ca568da2ec784100ee158134.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_1d7c9e595e1b2ef479098d45351d36d3.msa @@ -1,44 +1,44 @@ ->syndip_1_chr20:63559315-63559820 +>syndip_1_chr20:63559315-63559819 gcagaagtccaggaggctacgccagagggggcagcagcgcagaaggaggtggtctcctgt gagggtgggagtcagatgggagtcagtcagggtcaggtgggaggagtcagggtcaggtgg gag--------------------ggagtcagggtcaggtgggaggagtcagggtcagatg ggagtcagtcagagtcaggtgggag-gagtcagggtcaggtgggaggagtcagggtcagg -tgggagg--------------------agtcagggtcaggtgggaggagtcagggtcaga +tgggag--------------------gagtcagggtcaggtgggaggagtcagggtcaga tgggagtcagtcagagtcaggtgggag-gagtcagggtcaggtggga------------- ----ggagtcagggtcaggtgggaggagtcagggtcagatgggagtcagtcagagtcagg -tgggaggagtcagggtcaggtgggagg--------------------------------- ----------------------------- ->syndip_2_chr20:63559315-63559820 +tgggaggagtcagggtcaggtgggag---------------------------------- +--------------------------- +>syndip_2_chr20:63559315-63559819 gcagaagtccaggaggctacgccagagggggcagcagcgcagaaggaggtggtctcctgt gagggtgggagtcagatgggagtcagtcagggtcaggtgggaggagtcagggtcaggtgg gaggagtcagggtcaggtgggagtcagtcagggtcaggtgggaggagtcagggtcaggtg ggag-gagtcagggtcagatgggagtcagtcagagtcaggtgggaggagtcagggtcagg -tgggagg----------------------------------------------------- +tgggag------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ ----------------------------- ->p:HG002_1_chr20:63559315-63559820 +--------------------------- +>p:HG002_1_chr20:63559315-63559819 gcagaagtccaggaggctacgccagagggggcagcagcgcagaaggaggtggtctcctgt gagggtgggagtcagatgggagtcagtcagggtcaggtgggaggagtcagggtcaggtgg gaggagtcagggtcaggtgggagtcagtcagggtcaggtgggaggagtcagggtcaggtg ggag-gagtcagggtcagatgggagtcagtcagagtcaggtgggaggagtcagggtcagg -tgggagg----------------------------------------------------- +tgggag------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ ----------------------------- ->p:HG002_2_chr20:63559315-63559820 +--------------------------- +>p:HG002_2_chr20:63559315-63559819 gcagaagtccaggaggctacgccagagggggcagcagcgcagaaggaggtggtctcctgt gagggtgggagtcagatgggagtcagtcagggtcaggtgggaggagtcagggtcaggtgg gaggg--------------------agtcagggtcaggtgggaggagtcagggtcaggtg ggag-gagtcagggtcagatgggagtcagtcagagtcaggtgggaggagtcagggtcagg -tgggagg--------------------agtcagggtcaggt---cggagtcagggtcagg +tgggag--------------------gagtcagggtcaggt---cggagtcagggtcagg tgggag-gagtcagggtcaggtgggagtcagtcagggtcaggtggga------------- ----ggagtcagggtcaggtgggaggagtcagggtcagatgggagtcagtcagagtcagg -tgggaggagtcagggtcaggtgggagg--------------------------------- ----------------------------- ->ref_chr20:63559315-63559820 +tgggaggagtcagggtcaggtgggag---------------------------------- +--------------------------- +>ref_chr20:63559315-63559819 gcagaagtccaggaggctacgccagagggggcagcagcgcagaaggaggtggtctcctgt gagggtgggagtcagatgggagtcagtcagggtcaggtgggaggagtcagggtcaggtgg gaggagtcagggtcaggtgggagtcagtcagggtcaggtgggaggagtcagggtcaggtg @@ -47,4 +47,4 @@ tgggaggagtcagggtcaggtgggaggagtcagggtcaggtgggaggagtcagggtcaga tgggagtcagtcagagtcaggtgggag-gagtcagggtcaggtgggaggagtcagggtca ggtcggagtcagggtcaggtgggaggagtcagggtcaggtgggagtcagtcagggtcagg tgggaggagtcagggtcaggtgggaggagtcagggtcagatgggagtcagtcagagtcag -gtgggaggagtcagggtcaggtgggagg +gtgggaggagtcagggtcaggtgggag diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_94d9fc221f90d8b60251faa405fdfde1.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_20bd20e862e124e808d69a4270d4895a.msa similarity index 94% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_94d9fc221f90d8b60251faa405fdfde1.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_20bd20e862e124e808d69a4270d4895a.msa index 38be0070..3b13513e 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_94d9fc221f90d8b60251faa405fdfde1.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_20bd20e862e124e808d69a4270d4895a.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:5041757-5042810 +>syndip_1_chr20:5041757-5042809 cagtgttggagcctgcagatgctgacagtgcctgcagctttcctgcctattttaggactt gctagaagcttggcatgtcctaaaaaccctttttcaagagtctctccccagctgctgcta gcggga-ttttttttttaactctctatcctatttaacctcctcactgatgaccttttaat @@ -19,8 +19,8 @@ gactcccaaagtgctgggattacaggtgtgagccaccgtgcctggcctaattttgtattt ttagtagagacggggtttctccatgttggtcaggctggtctcgaactcctgacctcaggt gatctgctcgtcttggcctcccaaagtgttgggattataggcataagccaccacaaccga cctgactgctcatttttaaactcttgatctgacactgggggactttaagctataatcttg -ttctgttttttttctttttctcttgccttctatgctttccatt ->syndip_2_chr20:5041757-5042810 +ttctgttttttttctttttctcttgccttctatgctttccat +>syndip_2_chr20:5041757-5042809 cagtgttggagcctgcagatgctgacagtgcctgcagctttcctgcctattttaggactt gctagaagcttggcatgtcctaaaaaccctttttcaagagtctctccccagctgctgcta gcggga-ttttttttttaactctctatcctatttaacctcctcactgatgaccttttaat @@ -41,8 +41,8 @@ gactcccaaagtgctgggattacaggtgtgagccaccgtgcctggcctaattttgtattt ttagtagagacggggtttctccatgttggtcaggctggtctcgaactcctgacctcaggt gatctgctcgtcttggcctcccaaagtgttgggattataggcataagccaccacaaccga cctgactgctcatttttaaactcttgatctgacactgggggactttaagctataatcttg -ttctgttttttttctttttctcttgccttctatgctttccatt ->p:HG002_1_chr20:5041757-5042810 +ttctgttttttttctttttctcttgccttctatgctttccat +>p:HG002_1_chr20:5041757-5042809 cagtgttggagcctgcagatgctgacagtgcctgcagctttcctgcctattttaggactt gctagaagcttggcatgtcctaaaaaccctttttcaagagtctctccccagctgctgcta gcgggatttttttttttaactctctatcctatttaacctcctcactgatgaccttttaat @@ -63,8 +63,8 @@ gactcccaaagtgctgggattacaggtgtgagccaccgtgcctggcctaattttgtattt ttagtagagacggggtttctccatgttggtcaggctggtctcgaactcctgacctcaggt gatctgctcgtcttggcctcccaaagtgttgggattataggcatgagccaccacaaccga cctgactgctcatttttaaactcttgatctgacactgggggactttaagctataatcttg -ttctgttttttttctttttctcttgccttctatgctttccatt ->p:HG002_2_chr20:5041757-5042810 +ttctgttttttttctttttctcttgccttctatgctttccat +>p:HG002_2_chr20:5041757-5042809 cagtgttggagcctgcagatgctgacagtgcctgcagctttcctgcctattttaggactt gctagaagcttggcatgtcctaaaaaccctttttcaagagtctctccccagctgctgcta gcggga-ttttttttttaactctctatcctatttaacctcctcactgatgaccttttaat @@ -85,8 +85,8 @@ gactcccaaagtgctgggattacaggtgtgagccaccgtgcccggcctaattttgtattt ttagtagagacggggtttctccatgttggtcaggctggtctcgaactcccgatctcaggt gatccgctcgtcttggcctcccaaagtgttgggattacaggcatgagccaccacacccag cctgactgctcatttttaaactcttgatctgacactgggggactttaagctataatcttg -ttctgttttttttctttttctcttgccttctatgctttccatt ->ref_chr20:5041757-5042810 +ttctgttttttttctttttctcttgccttctatgctttccat +>ref_chr20:5041757-5042809 cagtgttggagcctgcagatgctgacagtgcctgcagctttcctgcctattttaggactt gctagaagcttggcatgtcctaaaaaccctttttcaagagtctctccccagctgctgcta gcggga-ttttttttttaactctctatcctatttaacctcctcactgatgaccttttaat @@ -107,4 +107,4 @@ gactcccaaagtgctgggattacaggtgtgagccaccgtgcctggcctaattttgtattt ttagtagagacggggtttctccatgttggtcaggctggtctcgaactcctgacctcaggt gatctgctcgtcttggcctcccaaagtgttgggattataggcataagccaccacaaccga cctgactgctcatttttaaactcttgatctgacactgggggactttaagctataatcttg -ttctgttttttttctttttctcttgccttctatgctttccatt +ttctgttttttttctttttctcttgccttctatgctttccat diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_f949c02c5b00935cf4bd4b3c660a5ded.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_2404771658108744496a59df4a81aa48.msa similarity index 96% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_f949c02c5b00935cf4bd4b3c660a5ded.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_2404771658108744496a59df4a81aa48.msa index 88b78cdc..10546aa5 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_f949c02c5b00935cf4bd4b3c660a5ded.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_2404771658108744496a59df4a81aa48.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:20337185-20337725 +>syndip_1_chr20:20337185-20337724 tggaaataaggaaacagggtttaggaaggcctcactagacctgttaggatggatgcatgg atggatggatggatggacagaatgggtgggtggatggatggatggatggatggatggatg gatagatgggtgggtggatggatggatggatagatgggtggatggatagatgggtggatg @@ -32,8 +32,8 @@ gatggatggatggatagatgggtgggtggatggatggatggatagatgggtgggtggatg gatggatggatagatggatagatgggtgggtggatggatggatggatagatgggtgggtg gatggatggatggatagatggatagatgggtgggtggatggatggatggatagatgggtg ggtggatggatggatggatagatgggtgggtgggtggatggatggatggatggatagaat -aaaaaagaaagctaggacatggttctagtgttcttagcagacg ->syndip_2_chr20:20337185-20337725 +aaaaaagaaagctaggacatggttctagtgttcttagcagac +>syndip_2_chr20:20337185-20337724 tggaaataaggaaacagggtttaggaaggcctcactagacctgttaggatggatgcatgg atggatggatggatggacagaatgggtgggtggatggatggatggatggatggatggatg gatggatagatgggtgggtggatggatggatggatagatgggtggatggataaatggatg @@ -67,8 +67,8 @@ gatggatagatgggtgggtgggtggatggatggatggatggataga-------------- ------------------------------------------------------------ ------------------------------------------------------------ ----------------------------------------------------------at -aaaaaagaaagctaggacatggttctagtgttcttagcagacg ->p:HG002_1_chr20:20337185-20337725 +aaaaaagaaagctaggacatggttctagtgttcttagcagac +>p:HG002_1_chr20:20337185-20337724 tggaaataaggaaacagggtttaggaaggcctcactagacctgttaggatggatgcatgg atggatggatggatggacagaatgggtgggtggatggatggatggatagatggatggatg gatggatggatggatggatggatggatggatggatagatgggtgggtgggtggatggatg @@ -102,8 +102,8 @@ gatggatagatgggtgggtgggtggatggatggatggatggataga-------------- ------------------------------------------------------------ ------------------------------------------------------------ ----------------------------------------------------------at -aaaaaagaaagctaggacatggttctagtgttcttagcagacg ->p:HG002_2_chr20:20337185-20337725 +aaaaaagaaagctaggacatggttctagtgttcttagcagac +>p:HG002_2_chr20:20337185-20337724 tggaaataaggaaacagggtttaggaaggcctcactagacctgttaggatggatgcatgg atggatggatggatggacagaatgggtgggtggatggatggatggatggatggatggatg gatagatgggtgggtggatggatggatggatagatgggtggatggatagatgggtggatg @@ -137,8 +137,8 @@ gatggatggatggatagatgggtgggtggatggatggatggatagatgggtggatggata aatggatggatggatggatggatagatgggtgggtggatggatggatggatggatggatg gatggatggatagatagatgggtgggtggatggatggatggatagatgggtgggtgggtg gatggatggatggatggatagatgggtgggtgggtggatggatggatggatggatagaat -aaaaaagaaagctaggacatggttctagtgttcttagcagacg ->ref_chr20:20337185-20337725 +aaaaaagaaagctaggacatggttctagtgttcttagcagac +>ref_chr20:20337185-20337724 tggaaataaggaaacagggtttaggaaggcctcactagacctgttaggatggatgcatgg atggatggatggatggacagaatgggtgggtggatggatggatggatagatggatggatg gatggatggatggatggatggatggatggatggatagatgggtgggtgggtggatggatg @@ -172,4 +172,4 @@ gatagatgggtgggtgggtggatggatggatggatggataga------------------ ------------------------------------------------------------ ------------------------------------------------------------ ----------------------------------------------------------at -aaaaaagaaagctaggacatggttctagtgttcttagcagacg +aaaaaagaaagctaggacatggttctagtgttcttagcagac diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_717cdf145eb3b53da7ad74d00c183277.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_241ebaea2bd4d12cce6bff185f492856.msa similarity index 94% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_717cdf145eb3b53da7ad74d00c183277.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_241ebaea2bd4d12cce6bff185f492856.msa index be26405d..683e67f5 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_717cdf145eb3b53da7ad74d00c183277.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_241ebaea2bd4d12cce6bff185f492856.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:22082166-22084006 +>syndip_1_chr20:22082166-22084005 tatgtatatataatatatgtatatactatacatgtatatattatattatacatgtatata ttataagtatatactatacatgcatgtattatatgtatatactatacatgcatgtattat atgtatatattatacatgcatgtattatatgtatatattatacatgcatgtattatatgt @@ -36,8 +36,8 @@ gccttaaactctaaggtcttctgtgcattcattcctgcttcctattacatatatattata tagatacatatataatatatacatatatactacataatatatgcatatataatatataca tatgttgtatatgtatatataatatatacatatgttgtatatgtatatataatatataca tatgttgtatatgtatatataatatatacatatgttgtatatgtatatataatatataca -tatgttgtatatgtatatataatatatacatatgttgtatatgtatatataatat ->syndip_2_chr20:22082166-22084006 +tatgttgtatatgtatatataatatatacatatgttgtatatgtatatataata +>syndip_2_chr20:22082166-22084005 tatgtatatataatatatgtatatactatacatgtatatattatattatacatgtatata ttataagtatatactatacatgcatgtattatatgtatatac------------------ ----------tatacatgcatgtattatatgtatatattatacatgcatgtattatatgt @@ -74,9 +74,9 @@ atatattatatatacgtatatattaatatacatatatatgtgtctcattataccaataat gccttaaactctaaggtcttctgtgcattcattcctgcttcctattacatatatattata tagatacatatataatatatacatatatactacataatatatgcatatataatatataca tatgttgtatatgtatatataatatatacatatgttgtatatgtatatataatatataca -tatgttgtatatgtatatataatat----------------------------------- -------------------------------------------------------- ->p:HG002_1_chr20:22082166-22084006 +tatgttgtatatgtatatataata------------------------------------ +------------------------------------------------------ +>p:HG002_1_chr20:22082166-22084005 tatgtatatataatatatgtatatactatacatgtatatattatattatacatgtatata ttataagtatatactatacatgcatgtattatatgtatatac------------------ ----------tatacatgcatgtattatatgtatatattatacatgcatgtattatatgt @@ -113,9 +113,9 @@ atatattatatatacgtatatattaatatacatatatatgtgtctcattataccaataat gccttaaactctaaggtcttctgtgcattcattcctgcttcctattacatatatattata tagatacatatataatatatacatatatactacataatatatgcatatataatatataca tatgttgtatatgtatatataatatatacatatgttgtatatgtatatataatatataca -tatgttgtatatgtatatataatat----------------------------------- -------------------------------------------------------- ->p:HG002_2_chr20:22082166-22084006 +tatgttgtatatgtatatataata------------------------------------ +------------------------------------------------------ +>p:HG002_2_chr20:22082166-22084005 tatgtatatataatatatgtatatactatacatgtatatattatattatacatgtatata ttataagtatatactatacatgcatgtattatatgtatatactatacatgcatgtattat atgtatatattatacatgcatgtattatatgtatatattatacatgcatgtattatatgt @@ -153,8 +153,8 @@ gccttaaactctaaggtcttctgtgcattcattcctgcttcctattacatatatattata tagatacatatataatatatacatatatactacataatatatgcatatataatatataca tatgttgtatatgtatatataatatatacatatgttgtatatgtatatataatatataca tatgttgtatatgtatatataatatatacatatgttgtatatgtatatataatatataca -tatgttgtatatgtatatataatatatacatatgttgtatatgtatatataatat ->ref_chr20:22082166-22084006 +tatgttgtatatgtatatataatatatacatatgttgtatatgtatatataata +>ref_chr20:22082166-22084005 tatgtatatataatatatgtatatactatacatgtatatattatattatacatgtatata ttataagtatatactatacatgcatgtattatatgtatatactatacatgcatgtattat atgtatatattatacatgcatgtattatatgtatatattatacatgcatgtattatatgt @@ -191,5 +191,5 @@ atatattatatatacgtatatattaatatacatatatatgtgtctcattataccaataat gccttaaactctaaggtcttctgtgcattcattcctgcttcctattacatatatattata tagatacatatataatatatacatatatactacataatatatgcatatataatatataca tatgttgtatatgtatatataatatatacatatgttgtatatgtatatataatatataca -tatgttgtatatgtatatataatat----------------------------------- -------------------------------------------------------- +tatgttgtatatgtatatataata------------------------------------ +------------------------------------------------------ diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_e19b79eecb589d0faff3fd8eba62aeef.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_24fd8ff0eec7e23320c9f4385933e3f5.msa similarity index 96% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_e19b79eecb589d0faff3fd8eba62aeef.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_24fd8ff0eec7e23320c9f4385933e3f5.msa index 65babcd3..b3b44011 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_e19b79eecb589d0faff3fd8eba62aeef.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_24fd8ff0eec7e23320c9f4385933e3f5.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:55627244-55628551 +>syndip_1_chr20:55627244-55628550 gtatcagctatccactacatacttagaaatttggctaattatcataacataaattaacat tttatggctgtgaaaagagaatagaaatggattttagaatgtcaagtatttatcactagt atgtgttaatataaatagcttaataaaaatagggcatttatatgtatatatgtgggtgtt @@ -21,8 +21,8 @@ tgtatacattatatattatgtatacatatatgtata------------------------ tgtgcacacacacacgagatatgttggttttgtgtaatgcaaccaaataaattaagtata cataggagagagtcactgccctgactcttaaatgtaacccttattaaagaaaatatttta gttcattttttatctatcaaccttttttatctatcaactctgaaatctgcaatcaatcat -atttggttat ->syndip_2_chr20:55627244-55628551 +atttggtta +>syndip_2_chr20:55627244-55628550 gtatcagctatccactacatacttagaaatttggctaattatcataacataaattaacat tttatggctgtgaaaagagaatagaaatggattttagaatgtcaagtatttatcactagt atgtgttaatataaatagcttaataaaaatagggcatttatatgtatatatgtgggtgtt @@ -45,8 +45,8 @@ tacgtatatacgtatatacacgtatatattaaatgataaatttatatgtatatatgcgtg tgtgcacacacacacgagatatgttggttttgtgtaatgcaaccaaataaattaagtata cataggagagagtcactgccctgactcttaaatgtaacccttattaaagaaaatatttta gttcattttttatctatcaaccttttttatctatcaactctgaaatctgcaatcaatcat -atttggttat ->p:HG002_1_chr20:55627244-55628551 +atttggtta +>p:HG002_1_chr20:55627244-55628550 gtatcagctatccactacatacttagaaatttggctaattatcataacataaattaacat tttatggctgtgaaaagagaatagaaatggattttagaatgtcaagtatttatcactagt atgtgttaatataaatagcttaataaaaatagggcatttatatgtatatatgtgggtgtt @@ -69,8 +69,8 @@ tacgtatatacgtatatacacgtatatattaaatgataaatttatatgtatatatgcgtg tgtgcacacacacacgagatatgttggttttgtgtaatgcaaccaaataaattaagtata cataggagagagtcactgccctgactcttaaatgtaacccttattaaagaaaatatttta gttcattttttatctatcaaccttttttatctatcaactctgaaatctgcaatcaatcat -atttggttat ->p:HG002_2_chr20:55627244-55628551 +atttggtta +>p:HG002_2_chr20:55627244-55628550 gtatcagctatccactacatacttagaaatttggctaattatcataacataaattaacat tttatggctgtgaaaagagaatagaaatggattttagaatgtcaagtatttatcactagt atgtgttaatataaatagcttaataaaaatagggcatttatatgtatatatgtgggtgtt @@ -93,8 +93,8 @@ tgtatacattatatattatgtatacatatatgtata------------------------ tgtgcacacacacacgagatatgttggttttgtgtaatgcaaccaaataaattaagtata cataggagagagtcactgccctgagtcttaaatgtaacccttattaaagaaaatatttta gttcattttttatctatcaaccttttttatctatcaactctgaaatctgcaatcaatcat -atttggttat ->ref_chr20:55627244-55628551 +atttggtta +>ref_chr20:55627244-55628550 gtatcagctatccactacatacttagaaatttggctaattatcataacataaattaacat tttatggctgtgaaaagagaatagaaatggattttagaatgtcaagtatttatcactagt atgtgttaatataaatagcttaataaaaatagggcatttatatgtatatatgtgggtgtt @@ -117,4 +117,4 @@ tacatatatacgtatatacacgtatatattaaatgataaatttatatgtatatatgcgtg tgtgcacacacacacgagatatgttggttttgtgtaatgcaaccaaataaattaagtata cataggagagagtcactgccctgactcttaaatgtaacccttattaaagaaaatatttta gttcattttttatctatcaaccttttttatctatcaactctgaaatctgcaatcaatcat -atttggttat +atttggtta diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_c35059068940421b229c17e87e720ab8.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_29711408027d779b5a2977378e72a9e5.msa similarity index 96% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_c35059068940421b229c17e87e720ab8.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_29711408027d779b5a2977378e72a9e5.msa index 0c0de308..605e7ec2 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_c35059068940421b229c17e87e720ab8.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_29711408027d779b5a2977378e72a9e5.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:61783303-61784940 +>syndip_1_chr20:61783303-61784939 ctccaaaggcacattctatatgatttttgcccacaacctttccttaatatgtacatgttt tctagataaaaacattaaatacagctaaagtgacatcaaggtctccctcccttcctttcc tccaatacgtgagagaaatgtaatcccagttccttgggacagttctcgaggccctcagat @@ -26,8 +26,8 @@ aggagaatgtaag----------------------------------------------- cccccaggagaatgtaagcccagttcctcggaacagttctccaggccctcagatgtcctg tgcccccggggcctcatcagacattaagcatgcagtccctttgcagctctggatggcgcg agtttcccacgtcccatccccttcctggctatgggcaagtgcaggctggatcctggcctc -ctccttcctggcctccag ->syndip_2_chr20:61783303-61784940 +ctccttcctggcctcca +>syndip_2_chr20:61783303-61784939 ctccaaaggcacattctatatgatttttgcccacaacctttccttaatatgtacatgttt tctagataaaaacattaaatacagctaaagtgacatcaaggtctccctcccttcctttcc tccaatacgtgagagaaatgtaatcccagttccttgggacagttctcgaggccctcagat @@ -55,8 +55,8 @@ a----------------------------------------------------------- cccccaggagaatgtaagcccagttcctcggaacagttctccaggccctcagatgtcctg tgcccccggggcctcatcagacattaagcatgcagtccctttgcagctctggatggcgcg agtttcccacgtcccatccccttcctggctatgggcaagtgcaggctggatcctggcctc -ctccttcctggcctccag ->p:HG002_1_chr20:61783303-61784940 +ctccttcctggcctcca +>p:HG002_1_chr20:61783303-61784939 ctccaaaggcacattctatatgatttttgcccacaacctttccttaatatgtacatgttt tctagataaaaacattaaatacagctaaagtgacatcaaggtctccctcccttcctttcc tccaatacgtgagagaaatgtaatcccagttccttgggacagttctcgaggccctcagat @@ -84,8 +84,8 @@ a----------------------------------------------------------- cccccaggagaatgtaagcccagttcctcggaacagttctccaggccctcagatgtcctg tgcccccggggcctcatcagacattaagcatgcagtccctttgcagctctggatggcgcg agtttcccacgtcccatccccttcctggctatgggcaagtgcaggctggatcctggcctc -ctccttcctggcctccag ->p:HG002_2_chr20:61783303-61784940 +ctccttcctggcctcca +>p:HG002_2_chr20:61783303-61784939 ctccaaaggcacattctatatgatttttgcccacaacctttccttaatatgtacatgttt tctagataaaaacattaaatacagctaaagtgacatcaaggtctccctcccttcctttcc tccaatacgtgagagaaatgtaatcccagttccttgggacagttctcgaggccctcagat @@ -113,8 +113,8 @@ aggagaatgtaag----------------------------------------------- cccccaggagaatgtaagcccagttcctcggaacagttctccaggccctcagatgtcctg tgcccccggggcctcatcagacattaagcatgcagtccctttgcagctctggatggcgcg agtttcccacgtcccatccccttcctggctatgggcaagtgcaggctggatcctggcctc -ctccttcctggcctccag ->ref_chr20:61783303-61784940 +ctccttcctggcctcca +>ref_chr20:61783303-61784939 ctccaaaggcacattctatatgatttttgcccacaacctttccttaatatgtacatgttt tctagataaaaacattaaatacagctaaagtgacatcaaggtctccctcccttcctttcc tccaatacgtgagagaaatgtaatcccagttccttgggacagttctcgaggccctcagat @@ -142,4 +142,4 @@ ccaagagaaatgtaatcccagttcctcgggacagttctcaaggccctccgatatcctgtg cccccaggagaatgtaagcccagttcctcggaacagttctccaggccctcagatgtcctg tgcccccggggcctcatcagacattaagcatgcagtccctttgcagctctggatggcgcg agtttcccacgtcccatccccttcctggctatgggcaagtgcaggctggatcctggcctc -ctccttcctggcctccag +ctccttcctggcctcca diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_9b6ec2bc5fb37634cbfbb07c76684bcf.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_2acb877c85cd2b044d5bd5f0537d9b2e.msa similarity index 95% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_9b6ec2bc5fb37634cbfbb07c76684bcf.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_2acb877c85cd2b044d5bd5f0537d9b2e.msa index 62de31e0..a58724da 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_9b6ec2bc5fb37634cbfbb07c76684bcf.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_2acb877c85cd2b044d5bd5f0537d9b2e.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:38123699-38124104 +>syndip_1_chr20:38123699-38124103 taataataatatataatatttataataataatatatataatatgtatatataatatacat atacctattatatataatatacctattatatataatatacatatacctattatatataat atacctattatatataagatacctattatatataatatacatatacctattatatatata @@ -19,8 +19,8 @@ attatatattatatataatacacattatatattatatataatacacattatatattatat aatacacattatatattatatataatacacattatatatataatacacattatatattat atataatacataatacacattatatattatatataatacacattatatataatatataac acacattatatataatatataacacacattatatattatatataatacacattatatatt -atata ->syndip_2_chr20:38123699-38124104 +atat +>syndip_2_chr20:38123699-38124103 taataataatatataatatttataataataatatatataatatgtatatataatatacat atacctattatatataatatacctattatatataatatacatatacctattatatataat atacctattatatataagatacctattatatataatatacatatacctattatatatata @@ -41,8 +41,8 @@ attatatattatatataatacacattatatattatatataatacacattatatattatat aatacacattatatattatatataatacacattatatatataatacacattatatattat atataatacataatacacattatatattatatataatacacattatatataatatataac acacattatatataatatataacacacattatatattatatataatacacattatatatt -atata ->p:HG002_1_chr20:38123699-38124104 +atat +>p:HG002_1_chr20:38123699-38124103 taataataatatataatatttataataataatatatataatatgtatatataatatacat atacctattatatataatatacctattatatataatatacataaacctattatatataat atacctattatatataagatacctattatatataatatacctatacctattatatatata @@ -63,8 +63,8 @@ attatatattatatataatacacattatatattatatataatacacattatatattatat aatacacattatatattatatataatacacattatatatataatacacattatatattat atataatacataatacacattatatattatatataatacacattatatataatatataac acacattatatataatatataacacacattatatattatatataatacacattatatatt -atata ->p:HG002_2_chr20:38123699-38124104 +atat +>p:HG002_2_chr20:38123699-38124103 taataataatatataatatttataataataatatatataatatgtatatataatatacat atacctattatatataatatacctattatatataatatacataaacctattatatataat atacctattatatataagatacctattatatataatatacctatacctattatatatata @@ -85,8 +85,8 @@ attatatattatatataatacacattatatattatatataatacacattatatattatat aatacacattatatattatatataatacacattatatatataatacacattatatattat atataatacataatacacattacctattatatataatacacattatatataatatataac acacattatatataatatataacacacattatatattatatataatacacattatatatt -atata ->ref_chr20:38123699-38124104 +atat +>ref_chr20:38123699-38124103 taataataatatataatatttataataataatatatataatatgtatatataatatacat atacctattatatataatatacctattatatataatatacataaacctattatatataat atacctattatatataagatacctattatatataatatacctat--------tatatata @@ -95,7 +95,7 @@ atataatatacctattatatatatataatatacctattatatatataatatacctattat atatataatatacctattatatatataatatacctattatatataa-----------tac acattatatataata----tataacaca---------------------------catta tatataata----tataacacacattatatattata----tataatacacattatatatt -atata------------------------------------------------------- +atat-------------------------------------------------------- ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ @@ -107,4 +107,4 @@ atata------------------------------------------------------- ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ ------ +---- diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_141ad6632a3577b19e9a4616ecdf6bbe.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_2b01ba4d781cd0dddb30db4277c8ca9b.msa similarity index 97% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_141ad6632a3577b19e9a4616ecdf6bbe.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_2b01ba4d781cd0dddb30db4277c8ca9b.msa index ed52f700..4bc15bf6 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_141ad6632a3577b19e9a4616ecdf6bbe.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_2b01ba4d781cd0dddb30db4277c8ca9b.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:55624382-55625908 +>syndip_1_chr20:55624382-55625907 atcccagctacttgggaggctgaggcaggagaatcacttcaatcagtgaggtggaggctg cagtgggccaagatcatgcgattgcaccccagcctgggtaagaaagtgaggtcctgtctc cctgtctctcatatatatacataactaaaataatatatagtaatatatgttatatattat @@ -35,8 +35,8 @@ tatagtatactatatactatactatactatagtatactctatagtatataaagacatata tatagtcttcaaaatgaagatcctgtatcctatatatatatataacttcaaaagtgagat acatatatatgatgtagaaactttagaatatacaaaattgtcttcttcaggataagaaaa tcctgatattcagcctaacctccactgactgtaaaggaagaatgggtttcactttcaggg -aaggtcaccttcatcc ->syndip_2_chr20:55624382-55625908 +aaggtcaccttcatc +>syndip_2_chr20:55624382-55625907 atcccagctacttgggaggctgaggcaggagaatcacttcaatcagtgaggtggaggctg cagtgggccaagatcatgcgattgcaccccagcctgggtaagaaagtgaggtcctgtctc cctgtctctcatatatatacataactaaaataatatatagtaatatatgttatatattat @@ -73,8 +73,8 @@ tatagtatactatatactatactatactatagtatactctatagtatataaagacatata tatagtcttcaaaatgaagatcctgtatcctatatatatatataacttcaaaagtgagat acatatatatgatgtagaaactttagaatatacaaaattgtcttcttcaggataagaaaa tcctgatattcagcctaacctccactgactgtaaaggaagaatgggtttcactttcaggg -aaggtcaccttcatcc ->p:HG002_1_chr20:55624382-55625908 +aaggtcaccttcatc +>p:HG002_1_chr20:55624382-55625907 atcccagctacttgggaggctgaggcaggataatcacttcaatcagtgaggtggaggctg cagtgggccaagatcatgcgattgcaccccagcctgggtaagaaagtgaggtcctgtctc cctgtctctcatatatatacataactaaaataatatatagtaatatatgttatatattat @@ -111,8 +111,8 @@ tatagtatactatatactatactatactatagtatactctatagtatataaagacatata tatagtcttcaaaatgaagatcctgtatcctatatatatatataacttcaaaagtgagat acatatatatgatgtagaaactttagaatatacaaaattgtcttcttcaggataagaaaa tcctgatattcagcctaacctccactgactgtaaaggaagaatgggtttcactttcaggg -aaggtcaccttcatcc ->p:HG002_2_chr20:55624382-55625908 +aaggtcaccttcatc +>p:HG002_2_chr20:55624382-55625907 atcccagctacttgggaggctgaggcaggagaatcacttcaatcagtgaggtggaggctg cagtgggccaagatcatgcgattgcaccccagcctgggtaagaaagtgaggtcctgtctc cctgtctctcatatatatacataactaaaataatatatagtaatatatgttatatattat @@ -149,8 +149,8 @@ tatagtatactatatactatactatactatagtatactctatagtatataaagacatata tatagtcttcaaaatgaagatcctgtatcctatatatatatataacttcaaaagtgagat acatatatatgatgtagaaactttagaatatacaaaattgtcttcttcaggataagaaaa tcctgatattcagcctaacctccactgactgtaaaggaagaatgggtttcactttcaggg -aaggtcaccttcatcc ->ref_chr20:55624382-55625908 +aaggtcaccttcatc +>ref_chr20:55624382-55625907 atcccagctacttgggaggctgaggcaggagaatcacttcaatcagtgaggtggaggctg cagtgggccaagatcatgcgattgcaccccagcctgggtaagaaagtgaggtcctgtctc cctgtctctcatatatatacataactaaaataatatatagtaatatatgttatatattat @@ -187,4 +187,4 @@ tatagtatactatatactatactatactatagtatactctatagtatataaagacatata tatagtcttcaaaatgaagatcctgtatcctatatatatatataacttcaaaagtgagat acatatatatgatgtagaaactttagaatatacaaaattgtcttcttcaggataagaaaa tcctgatattcagcctaacctccactgactgtaaaggaagaatgggtttcactttcaggg -aaggtcaccttcatcc +aaggtcaccttcatc diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_e0a1b90b66779bb5b8c7086145d6f100.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_2bcd8e5fdfaa68491615ae8e8a2e7e21.msa similarity index 83% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_e0a1b90b66779bb5b8c7086145d6f100.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_2bcd8e5fdfaa68491615ae8e8a2e7e21.msa index e9579aa1..27c9e329 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_e0a1b90b66779bb5b8c7086145d6f100.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_2bcd8e5fdfaa68491615ae8e8a2e7e21.msa @@ -1,40 +1,40 @@ ->syndip_1_chr20:17081116-17081511 +>syndip_1_chr20:17081116-17081510 tgaatattcttttatgtagtaaaagtataaaactatggctggtaaaaatgcatgccacat tgtggatcatgactacctctgggtcagaaggaaatgtgatgggtaaaggaatcaactcca tgtggaattttattttattttattttattttattttattttattttattttattttattt tatt-------------------------------------------------------- ----------ttattttattttattacgacaaggtctggctctgtcgcctaggctggagt gcattgtggtaatcttggcttactgcaacctctgcctcccagactcaaccagtcctccca -cctcagcctcccaagtagctgggactacaagtacgt ->syndip_2_chr20:17081116-17081511 +cctcagcctcccaagtagctgggactacaagtacg +>syndip_2_chr20:17081116-17081510 tgaatattcttttatgtagtaaaagtataaaactatggctggtaaaaatgcatgccacat tgtggatcatgactacctctgggtcagaaggaaatgtgatgggtaaaggaatcaactcca tgtgga-----attttattttattttattttattttattttattttattttattttattt tatt-------------------------------------------------------- ----------ttattttattttattacgacaaggtctggctctgtcgcctaggctggagt gcattgtggtaatcttggcttactgcaacctctgcctcccagactcaaccagtcctccca -cctcagcctcccaagtagctgggactacaagtacgt ->p:HG002_1_chr20:17081116-17081511 +cctcagcctcccaagtagctgggactacaagtacg +>p:HG002_1_chr20:17081116-17081510 tgaatattcttttatgtagtaaaagtataaaactatggctggtaaaaatgcatgccacat tgtggatcatgactacctctgggtcagaaggaaatgtgatgggtaaaggaatcaactcca tgtgga-----attttattttattttattttattttattttattttattttattttattt tatt-------------------------------------------------------- ----------ttattttattttattacgacaaggtctggctctgtcgcctaggctggagt gcattgtggtaatcttggcttactgcaacctctgcctcccagactcaaccagtcctccca -cctcagcctcccaagtagctgggactacaagtacgt ->p:HG002_2_chr20:17081116-17081511 +cctcagcctcccaagtagctgggactacaagtacg +>p:HG002_2_chr20:17081116-17081510 tgaatattcttttatgtagtaaaagtataaaactatggctggtaaaaatgcatgccacat tgtggatcatgactacctctgggtcagaaggaaatgtgatgggtaaaggaatcaactcca tgtggaattttattttattttattttattttattttattttattttattttattttattt tatt-------------------------------------------------------- ----------ttattttattttattacgacaaggtctggctctgtcgcctaggctggagt gcattgtggtaatcttggcttactgcaacctctgcctcccagactcaaccagtcctccca -cctcagcctcccaagtagctgggactacaagtacgt ->ref_chr20:17081116-17081511 +cctcagcctcccaagtagctgggactacaagtacg +>ref_chr20:17081116-17081510 tgaatattcttttatgtagtaaaagtataaaactatggctggtaaaaatgcatgccacat tgtggatcatgactacctctgggtcagaaggaaatgtgatgggtaaaggaatcaactcca tgtggaattttattttattttattttattttattttattttattttattttattttattt tattattttattttattttattttattttattttattttattttattttattttatttta ttttattttattattttattttattacgacaaggtctggctctgtcgcctaggctggagt gcattgtggtaatcttggcttactgcaacctctgcctcccagactcaaccagtcctccca -cctcagcctcccaagtagctgggactacaagtacgt +cctcagcctcccaagtagctgggactacaagtacg diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_dd8b054c3bcd4f4a22fdac0fc01760de.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_2c05a87978c7b4aa656e86b640a18690.msa similarity index 88% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_dd8b054c3bcd4f4a22fdac0fc01760de.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_2c05a87978c7b4aa656e86b640a18690.msa index f35203e4..6bac5dcb 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_dd8b054c3bcd4f4a22fdac0fc01760de.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_2c05a87978c7b4aa656e86b640a18690.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:63027897-63028267 +>syndip_1_chr20:63027897-63028266 acggaccagcatctatcatggctgttttgcgtttcttgatttcacgatgtctgttgtgtg ctcaggcccctcctctccaaggtcccagctgtccttgccctgttccctgtgaagagtgaa gaaccaacgcgggcggggagtggaccggtggggagagggccggtggggagagggccggtg @@ -10,8 +10,8 @@ agggggccggtggggagggggcccgcggggaggggccggtggggagggggcctgcgggga gggggccggcggggaggggccggtggggaggggccggtggggtggggccggtggggaggg ggccggtgggggggggcctgcagggagggggccggtggggaggaggctggtgaagtagcg tcaggtgcgggcctggcgtcggcgagtcaagtcgagctgatccagggccttctttctgca -gcttcatcaggttctcaattagcggagggcgctggcggtggaggagggc ->syndip_2_chr20:63027897-63028267 +gcttcatcaggttctcaattagcggagggcgctggcggtggaggaggg +>syndip_2_chr20:63027897-63028266 acggaccagcatctatcatggctgttttgcgtttcttgatttcacgatgtctgttgtgtg ctcaggcccctcctctccaaggtcccagctgtccttgccctgttccctgtgaagagtgaa gaaccaacgcgggcggggagtggaccggtggggagagggccggtggggagaggaccggtg @@ -23,8 +23,8 @@ ag-----------------------------ggggccggtggggagggggccggcgggga gggggccggcggggaggggccggtggggagtggtctg----------------------- -------------------gtggggagggggccggtggggaggaggctggtgaagtagcg tcaggtgcgggcctggcgtcggcgagtcaagtcgagctgatccagggccttctttctgca -gcttcatcaggttctcaattagcggagggcgctggcggtggaggagggc ->p:HG002_1_chr20:63027897-63028267 +gcttcatcaggttctcaattagcggagggcgctggcggtggaggaggg +>p:HG002_1_chr20:63027897-63028266 acggaccagcatctatcatggctgttttgcgtttcttgatttcacgatgtctgttgtgtg ctcaggcccctcctctccaaggtcccagctgtccttgccctgttccctgtgaagagtgaa gaaccaacgcgggcggggagtggaccggtggggagagggccggtggggagaggaccggtg @@ -36,8 +36,8 @@ ag-----------------------------ggggccggtggggagggggccggcgggga gggggccggcggggaggggccggtggggagtggtctg----------------------- -------------------gtggggagggggccggtggggaggaggctggtgaagtagcg tcaggtgcgggcctggcgtcggcgagtcaagtcgagctgatccagggccttctttctgca -gcttcatcaggttctcaattagcggagggcgctggcggtggaggagggc ->p:HG002_2_chr20:63027897-63028267 +gcttcatcaggttctcaattagcggagggcgctggcggtggaggaggg +>p:HG002_2_chr20:63027897-63028266 acggaccagcatctatcatggctgttttgcgtttcttgatttcacgatgtctgttgtgtg ctcaggcccctcctctccaaggtcccagctgtccttgccctgttccctgtgaagagtgaa gaaccaacgcgggcggggagtggaccggtggggagagggccggtggggagagggccggtg @@ -49,8 +49,8 @@ agggggccggtggggagggggcccgcggggaggggccggtggggagggggcctgcgggga gggggccggcggggaggggccggtggggaggggccggtggggtggggccggtggggaggg ggccggtgggggggggcctgcagggagggggccggtggggaggaggctggtgaagtagcg tcaggtgcgggcctggcgtcggcgagtcaagtcgagctgatccagggccttctttctgca -gcttcatcaggttctcaattagcggagggcgctggcggtggaggagggc ->ref_chr20:63027897-63028267 +gcttcatcaggttctcaattagcggagggcgctggcggtggaggaggg +>ref_chr20:63027897-63028266 acggaccagcatctatcatggctgttttgcgtttcttgatttcacgatgtctgttgtgtg ctcaggcccctcctctccaaggtcccagctgtccttgccctgttccctgtgaagagtgaa gaaccaacgcgggcggggagtggaccggtggggagagggccggtggggagaggaccggtg @@ -62,4 +62,4 @@ gggag--------------tggttt----------------------------------- ------------------------------------------------------------ ---------------------------------ggtggggaggaggctggtgaagtagcg tcaggtgcgggcctggcgtcggcgagtcaagtcgagctgatccagggccttctttctgca -gcttcatcaggttctcaattagcggagggcgctggcggtggaggagggc +gcttcatcaggttctcaattagcggagggcgctggcggtggaggaggg diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_0b1f7552f96468523661603cd4269fe6.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_2e395a6e1327c3cee2e68d6b7c844cc7.msa similarity index 95% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_0b1f7552f96468523661603cd4269fe6.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_2e395a6e1327c3cee2e68d6b7c844cc7.msa index 23695bd4..13fdbb64 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_0b1f7552f96468523661603cd4269fe6.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_2e395a6e1327c3cee2e68d6b7c844cc7.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:5039849-5040600 +>syndip_1_chr20:5039849-5040599 agacacagtgtccaggattgcattatgtggccaccccagatgaaggggagatgagaatgt gagtgttccgttttctaatatgtataatacaggtaggctgggcagaaagagttagacagg tattgggttagccagcctacagtgtctaacacaccaacaaaatcaagcattggagagcat @@ -16,8 +16,8 @@ gggcaacatagggagaacttgtctctattt------------------------------ ------------------------------------------------------------ --------------aaaaaaaaaaatgctcatacgtggttggcaggagtgtaaatagcct caaccaatttggaaagcaatttgccaatactagttaatataacgatctagatagtgggca -acccggattagcctttt ->syndip_2_chr20:5039849-5040600 +acccggattagccttt +>syndip_2_chr20:5039849-5040599 agacacagtgtccaggattgcattatgtggccaccccagatgaaggggagatgagaatgt gagtgttccgttttctaatatgtataatacaggtaggctgggcagaaagagttagacagg tattgggttagccagcctacagtgtctaacacaccaacaaaatcaagcattggagagcat @@ -35,8 +35,8 @@ gggcaacatagggagaacttgtctctattt------------------------------ ------------------------------------------------------------ --------------aaaaaaaaaaatgctcatacgtggttggcaggagtgtaaatagcct caaccaatttggaaagcaatttgccaatactagttaatataacgatctagatagtgggca -acccggattagcctttt ->p:HG002_1_chr20:5039849-5040600 +acccggattagccttt +>p:HG002_1_chr20:5039849-5040599 agacacagtgtccaggattgcattatgtggccaccccagatgaaggggagatgagaatgt gagtgttccgttttctaatatgtataatacaggtaggctgggcagaaagagttagacagg tattgggttagccagcctacagtgtctaacacaccaacaaaatcaagcattggagagcat @@ -54,8 +54,8 @@ cgcggtggcgggcgcctaggcaggagaatggcgtgaacccgggaagcggagcttgcagtg agccgagattgcgccactgcagtccgcagtccgacctgggcgacagagagagactccgtc tcaaaaaaaaaaaaaaaaaaaaaaatgctcatacgtggttggcaggagtgtaaatagcct caaccaatttggaaagcaatttgccaatactagttaatataacgatctagatagtgggca -acccggattagcctttt ->p:HG002_2_chr20:5039849-5040600 +acccggattagccttt +>p:HG002_2_chr20:5039849-5040599 agacacagtgtccaggattgcattatgtggccaccccagatgaaggggagatgagaatgt gagtgttccgttttctaatatgtataatacaggtaggctgggcagaaagagttagacagg tattgggttagccagcctacagtgtctaacacaccaacaaaatcaagcattggagagcat @@ -73,8 +73,8 @@ cgcggtggcgggcgcctaggcaggagaatggcgtgaacccgggaagcggagcttgcagtg agccgagattgcgccactgcagtccgcagtccgacctgggcgacagagagagactccgtc tc---aaaaaaaaaaaaaaaaaaaatgctcatacgtggttggcaggagtgtaaatagcct caaccaatttggaaagcaatttgccaatactagttaatataacgatctagatagtgggca -acccggattagcctttt ->ref_chr20:5039849-5040600 +acccggattagccttt +>ref_chr20:5039849-5040599 agacacagtgtccaggattgcattatgtggccaccccagatgaaggggagatgagaatgt gagtgttccgttttctaatatgtataatacaggtaggctgggcagaaagagttagacagg tattgggttagccagcctacagtgtctaacacaccaacaaaatcaagcattggagagcat @@ -92,4 +92,4 @@ gggcaacatagggagaacttgtctctattt------------------------------ ------------------------------------------------------------ --------------aaaaaaaaaaatgctcatacgtggttggcaggagtgtaaatagcct caaccaatttggaaagcaatttgccaatactagttaatataacgatctagatagtgggca -acccggattagcctttt +acccggattagccttt diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_a9fdcb29d10f9c256c49525ef4fab80f.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_32f6f2dff38622b1e9f06443baeb622f.msa similarity index 88% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_a9fdcb29d10f9c256c49525ef4fab80f.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_32f6f2dff38622b1e9f06443baeb622f.msa index a1bc1c22..d5ef8898 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_a9fdcb29d10f9c256c49525ef4fab80f.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_32f6f2dff38622b1e9f06443baeb622f.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:62270313-62270928 +>syndip_1_chr20:62270313-62270927 ctcctggcctctctgggtcctctccttgtccctcctggcctctctgggtcctctccttgt ccctccctgacctctctgggtcctctccttgtccctctctggcctctctgggtcctctcc ttgtccctctctggcctctctgggtcctctccttgtccctctctgggtcctctccttgtc @@ -9,8 +9,8 @@ ttgtccctctctgggtcctctccttgtccctctctggcctctctgggtcctctccttgtc cctctctggcctctctgggtcctctccttgtccctctctgggtcctctccttgtccctct ctggcctctctgggtcctctccttgtccctctctggcctctctgggtcctctccttgtcc ctctctgggtcctctccttgtccctctctggcctctctgggtcctctccttgtccctctc -tggcctctctgggtcct ->syndip_2_chr20:62270313-62270928 +tggcctctctgggtcc +>syndip_2_chr20:62270313-62270927 ctcctggcctctctgggtcctctccttgtccctcctggcctctctgggtcctctccttgt ccctccctgacctctctgggtcctctccttgtccctctctggcctctctgggtcctctcc ttgtccctctctggcctctctgggtcctctccttgtccctctctgggtcctctccttgtc @@ -19,10 +19,10 @@ ctggcctctctgggtcctctccttgtccctctctgggtcctctccttgtccctctctga- ------------------------------------------cctctctgggtcctctcc ttgtccctctctgggtcctctccttgtc-------------------------------- ---------cctctctgggtcctctccttgtccctctctgggtcctctccttgtccctct -ctggcctctctgggtcctctccttgtccctctctggcctctctgggtcct---------- +ctggcctctctgggtcctctccttgtccctctctggcctctctgggtcc----------- ------------------------------------------------------------ ------------------ ->p:HG002_1_chr20:62270313-62270928 +---------------- +>p:HG002_1_chr20:62270313-62270927 ctcctggcctctctgggtcctctccttgtccctcctggcctctctgggtcctctccttgt ccctccctgacctctctgggtcctctccttgtccctctctggcctctctgggtcctctcc ttgtccctctctggcctctctgggtcctctccttgtccctctctgggtcctctccttgtc @@ -31,10 +31,10 @@ ctggcctctctgggtcctctccttgtccctctctgggtcctctccttgtccctctctgg- ------------------------------------------cctctctgggtcctctc- ----------------------------------------ctctgggtcctctccttgtc cctctctggcctctctgggtcctctccttgtccctctctgggtcctctccttgtccctct -ctggcctctctgggtcctctccttgtccctctctggcctctctgggtcct---------- +ctggcctctctgggtcctctccttgtccctctctggcctctctgggtcc----------- ------------------------------------------------------------ ------------------ ->p:HG002_2_chr20:62270313-62270928 +---------------- +>p:HG002_2_chr20:62270313-62270927 ctcctggcctctctgggtcctctccttgtccctcctggcctctctgggtcctctccttgt ccctccctgacctctctgggtcctctccttgtccctctctggcctctctgggtcctctcc ttgtccctctctggcctctctgggtcctctccttgtccctctctgggtcctctccttgtc @@ -45,8 +45,8 @@ ttgtccctctctgggtcctctccttgtccctctctggcctctctgggtcctctccttgtc cctctctggcctctctgggtcctctccttgtccctctctgggtcctctccttgtccctct ctggcctctctgggtcctctccttgtccctctctggcctctctgggtcctctccttgtcc ctctctgggtcctctccttgtccctctctggcctctctgggtcctctccttgtccctctc -tggcctctctgggtcct ->ref_chr20:62270313-62270928 +tggcctctctgggtcc +>ref_chr20:62270313-62270927 ctcctggcctctctgggtcctctccttgtccctcctggcctctctgggtcctctccttgt ccctccctgacctctctgggtcctctccttgtccctctctggcctctctgggtcctctcc ttgtccctctctggcctctctgggtcctctccttgtccctctctgggtcctctccttgtc @@ -57,4 +57,4 @@ ttgtccctctctgggtcctctccttgtccctctctggcctctctgggtcctctccttgtc cctctctggcctctctgggtcctctccttgtccctctctgggtcctctccttgtccctct ctggcctctctgggtcctctccttgtccctctctggcctctctgggtcctctccttgtcc ctctctgggtcctctccttgtccctctctggcctctctgggtcctctccttgtccctctc -tggcctctctgggtcct +tggcctctctgggtcc diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_6147414e8642b5c9ece9fffa3cd6290f.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_3309323b7a8f49f7d21f616b3f162df2.msa similarity index 96% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_6147414e8642b5c9ece9fffa3cd6290f.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_3309323b7a8f49f7d21f616b3f162df2.msa index 85418d0f..d02c4b68 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_6147414e8642b5c9ece9fffa3cd6290f.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_3309323b7a8f49f7d21f616b3f162df2.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:64134783-64136487 +>syndip_1_chr20:64134783-64136486 agcattaggtgtatctcctaatgctatccctcccctctccccgcaccccacaaaagtccc cggtgtgtgatgttccccttgaaaaggaggcgtttctgcttctcctctttccggtccttg acagtgattccgtgcatcacaaacacacacgctccctcagttacacgaatcacaaacaca @@ -27,8 +27,8 @@ tcagttatgtgcgtcacaaacacagacgctccctcagttttgcgcatcacacacacgctc cctcagttttgtgtg---------------------------------tcacaaacacac acgctccctcagttgtcaggggactttgcactgtgcctttgcatgaaacaccatggcttg aagaggtcccgcatcgttagggcccccttgcccatggtgtggcgtcccctgaccgattta -gccgttattgagggacatcgaggtt ->syndip_2_chr20:64134783-64136487 +gccgttattgagggacatcgaggt +>syndip_2_chr20:64134783-64136486 agcattaggtgtatctcctaatgctatccctcccctctccccgcaccccacaaaagtccc cggtgtgtgatgttccccttgaaaaggaggcgtttctgcttctcctctttccggtccttg acagtgattccgtgcatcacaaacacacacgctccctcagttacacgaatcacaaacaca @@ -57,8 +57,8 @@ tcagttatgtgcgtcacaaacacagacgctccctcagttttgcgcatcacacacacgctc cctcagttttgtgtgtcacaaacacacacgctccctcagttttgcgcatcacaaacacac acgctccctcagttgtcaggggactttgcactgtgcctttgcatgaaacaccatggcttg aagaggtcccgcatcgttagggcccccttgcccatggtgtggcgtcccctgaccgattta -gccgttattgagggacatcgaggtt ->p:HG002_1_chr20:64134783-64136487 +gccgttattgagggacatcgaggt +>p:HG002_1_chr20:64134783-64136486 agcattaggtgtatctcctaatgctatccctcccctctccccgcaccccacaaaagtccc cggtgtgtgatgttccccttgaaaaggaagcgtttctgcttctcctctttccggtccttg acagtgattccgtgcatcacaaacacacacgctccctcagttacacgaatcacaaacaca @@ -87,8 +87,8 @@ tcagttatgtgcgtcacaaacacagacgctccctcagttttgcgcatcacacacacgctc cctcagttttgtgtgtcacaaacacacacgctccctcagttttgcgcatcacaaacacac acgctccctcagttgtcaggggactttgcactgtgcctttgcatgaaacaccatggcttg aagaggtcccgcatcgttagggcccccttgcccatggtgtggcgtcccctgaccgattta -gccgttattgagggacatcgaggtt ->p:HG002_2_chr20:64134783-64136487 +gccgttattgagggacatcgaggt +>p:HG002_2_chr20:64134783-64136486 agcattaggtgtatctcctaatgctatccctcccctctccccgcaccccacaaaagtccc cggtgtgtgatgttccccttgaaaaggaagcgtttctgcttctcctctttccggtccttg acagtgattccgtgcatcacaaacacacacgctccctcagttacacgaatcacaaacaca @@ -117,8 +117,8 @@ tcagttatgtgcgtcacaaacacagacgctccctcagttttgcgcatcacacacacgctc cctcagttttgtgtg---------------------------------tcacaaacacac acgctccctcagttgtcaggggactttgcactgtgcctttgcatgaaacaccatggcttg aagaggtcccgcatcgttagggcccccttgcccatggtgtggcgtcccctgaccgattta -gccgttattgagggacatcgaggtt ->ref_chr20:64134783-64136487 +gccgttattgagggacatcgaggt +>ref_chr20:64134783-64136486 agcattaggtgtatctcctaatgctatccctcccctctccccgcaccccacaaaagtccc cggtgtgtgatgttccccttgaaaaggaggcgtttctgcttctcctctttccggtccttg acagtgattccgtgcatcacaaacacacacgctccctcagttacacgaatcacaaacaca @@ -147,4 +147,4 @@ tcagttatgtgcgtcacaaacacagacgctccctcagttttgcgcatcacacacacgctc cctcagttttgtgtgtcacaaacacacacgctccctcagttttgcgcatcacaaacacac acgctccctcagttgtcaggggactttgcactgtgcctttgcatgaaacaccatggcttg aagaggtcccgcatcgttagggcccccttgcccatggtgtggcgtcccctgaccgattta -gccgttattgagggacatcgaggtt +gccgttattgagggacatcgaggt diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_02b73a4f2fb9c7243d2ae74002dc5b27.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_351515eae575f51a5c58a89edbda4b09.msa similarity index 92% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_02b73a4f2fb9c7243d2ae74002dc5b27.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_351515eae575f51a5c58a89edbda4b09.msa index b67a5751..9e58793b 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_02b73a4f2fb9c7243d2ae74002dc5b27.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_351515eae575f51a5c58a89edbda4b09.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:16395049-16395522 +>syndip_1_chr20:16395049-16395521 acagtcgtggacagcaaagctaaggctcagtacaacaaagggcatacccctgtcatcggt gtcttcttactctctccctaatgtgatgtattcatcctcccagtttagtcaaggcagaaa ggagatgaggaaggggagaggaggggagaggagggaagaggaggggaggggaggggagag @@ -8,8 +8,8 @@ gaggggaggccaagggaggggaggggaggagaggggaggccaagggaggggaggggaggg gaggagaggggaggccaagggaggggaggggaggggaggagaggggaggccaagggagaa gatatgaaagaaatagggaaggagtgggacacagggaggaaaacaaaactatttcctttc atataaatgttgtcatctggggagaagccctcactatctccttggtgcctttggcttttg -gtattcaaa ->syndip_2_chr20:16395049-16395522 +gtattcaa +>syndip_2_chr20:16395049-16395521 acagtcgtggacagcaaagctaaggctcagtacaacaaagggcatacccctgtcatcggt gtcttcttactctctccctaatgtgatgtattcatcctcccagtttagtccaggcagaaa ggagatgaggaaggggagaggaggggagaggaggggag---------------------- @@ -19,8 +19,8 @@ ggagatgaggaaggggagaggaggggagaggaggggag---------------------- -----------------------gggaggggaggggagaggaggggaggccaagggagaa gatatgaaagaaatagggaaggagtgggacacagggaggaaaacaaaactatttcctttc atataaatgttgtcatctggggagaagccctcactatctccttggtgcctttggcttttg -gtattcaaa ->p:HG002_1_chr20:16395049-16395522 +gtattcaa +>p:HG002_1_chr20:16395049-16395521 acagtcatggacagcaaagctaaggctcagtacaacaaagggcatacccctgtcattggt gtcttcttactctctccctaatgtgatgtattcatcctcccagtttagtccaggcagaaa ggagatgaggaaggggagaggaggggagaggaggggag---------------------- @@ -30,8 +30,8 @@ ggagatgaggaaggggagaggaggggagaggaggggag---------------------- -----------------------gggaggggaggggagaggaggggaggccaagggagaa gatatgaaagaaatagggaaggagtgggacacagggaggaaaacaaaactatttcctttc atataaatgttgtcatctggggagaagccctcactatctccttggtgcctttggcttttg -gtattcaaa ->p:HG002_2_chr20:16395049-16395522 +gtattcaa +>p:HG002_2_chr20:16395049-16395521 acagtcgtggacagcaaagctaaggctcagtacaacaaagggcatacccctgtcatcggt gtcttcttactctctccctaatgtgatgtattcatcctcccagtttagtcaaggcagaaa ggagatgaggaaggggagaggaggggagaggagggaagaggaggggaggggaggggagag @@ -41,8 +41,8 @@ gaggggaggccaagggaggggaggggaggagaggggaggccaagggaggggaggggaggg gaggagaggggaggccaagggaggggaggggaggggaggagaggggaggccaagggagaa gatatgaaagaaatagggaaggagtgggacacagggaggaaaacaaaactatttcctttc atataaatgttgtcatctggggagaagccctcactatctccttggtgcctttggcttttg -gtattcaaa ->ref_chr20:16395049-16395522 +gtattcaa +>ref_chr20:16395049-16395521 acagtcgtggacagcaaagctaaggctcagtacaacaaagggcatacccctgtcatcggt gtcttcttactctctccctaatgtgatgtattcatcctcccagtttagtcaaggcagaaa ggagatgaggaaggggagaggaggggagaggagggaagaggaggggaggggaggggagag @@ -52,4 +52,4 @@ gaggggaggccaa----------------------------------------------- -----------------------gggaggggaggggaggagaggggaggccaagggagaa gatatgaaagaaatagggaaggagtgggacacagggaggaaaacaaaactatttcctttc atataaatgttgtcatctggggagaagccctcactatctccttggtgcctttggcttttg -gtattcaaa +gtattcaa diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_bd2f885276994e61366fc4ae806bc3c9.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_35335f6151c790b9a47a9350279b1936.msa similarity index 93% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_bd2f885276994e61366fc4ae806bc3c9.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_35335f6151c790b9a47a9350279b1936.msa index 4e63868f..5194c214 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_bd2f885276994e61366fc4ae806bc3c9.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_35335f6151c790b9a47a9350279b1936.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:20295914-20296431 +>syndip_1_chr20:20295914-20296430 cagaggctcagtcctggctttttccaaactcttctccagaggagcccgcccttgcacacc aacgcctgcatgcttccctccttccttccttccttccctcctttccttccttctttcttt tcttcctccctcccttcct---tcccttccttcccttccttccctccttttcttccttct @@ -13,8 +13,8 @@ ccttccttccctcctttccttccttcacctccttcccttctctcctcccttcatcccttc cttcctttcttccttccctccctcttccttcctttcttcacccctcccccttccctcatc ccttccctccttcccttccttccttccttccctccttccttcccttccttccttccttgc ttccttccttctttcctttcttcatccctccttccctccctccctccttcctgccttcct -tctttctttcttttctttcttcctccc ->syndip_2_chr20:20295914-20296431 +tctttctttcttttctttcttcctcc +>syndip_2_chr20:20295914-20296430 cagaggctcagtcctggctttttccaaactcttctccagaggagcccgcccttgcacacc aacgcctgcatgcttccctccttccttccttccttccctcc-ttccttccttccttcctt ccttcctgcctgcctgcctgccttcctgccttcccttcctccctccc--tccctacctcc @@ -29,8 +29,8 @@ ccttcctttcttcacccctcccccttccctcatcccttccctccttcccttccttccttc cttccctccttccttccct---tccttccttccttccctccttccttcccttccttcctt gcttccttccttccttcccttccttccttgcttccttccttccttcccttccttccttgc ttccttccttctttcctttcttcatccctccttccctccctccctccttcctgccttcct -tctttctttcttttctttcttcctccc ->p:HG002_1_chr20:20295914-20296431 +tctttctttcttttctttcttcctcc +>p:HG002_1_chr20:20295914-20296430 cagaggctcagtcctggctttttccaaactcttctccagaggagcccgcccttgcacacc aacgcctgcatgcttccctccttccttccttccttccctcc-ttccttccttccttcctt ccttcctgcctgcctgcctgccttcctgccttcccttcctccctccc--tccctacctcc @@ -45,8 +45,8 @@ ccttcctttcttcacccctcccccttccctcatcccttccctccttcccttccttccttc cttccctccttccttccct---tccttccttccttccctccttccttcccttccttcctt gcttccttccttccttcccttccttccttgcttccttccttccttcccttccttccttgc ttccttccttctttcctttcttcatccctccttccctccctccctccttcctgccttcct -tctttctttcttttctttcttcctccc ->p:HG002_2_chr20:20295914-20296431 +tctttctttcttttctttcttcctcc +>p:HG002_2_chr20:20295914-20296430 cagaggctcagtcctggctttttccaaactcttctccagaggagcccgcccttgcacacc aacgcctgcatgcttccctccttccttccttccttccctcctttccttccttctttcttt tcttcctccctcccttcct---tcccttccttcccttccttccctccttttcttccttct @@ -61,8 +61,8 @@ ccttccttccctcctttccttccttcacctccttcccttctctcctcccttcatcccttc cttcctttcttccttccctccctcttccttcctttcttcacccctcccccttccctcatc ccttccctccttcccttccttccttccttccctccttccttcccttccttccttccttgc ttccttccttctttcctttcttcatccctccttccctccctccctccttcctgccttcct -tctttctttcttttctttcttcctccc ->ref_chr20:20295914-20296431 +tctttctttcttttctttcttcctcc +>ref_chr20:20295914-20296430 cagaggctcagtcctggctttttccaaactcttctccagaggagcccgcccttgcacacc aacgcctgcatgcttccctccttccttccttccttccctcctttccttccttctttcttt tcttcctccctcccttcct---tcccttccttc--------------------------- @@ -77,4 +77,4 @@ ccttcctttcttcacccctcccccttccctcatcccttccctccttcccttccttccttc cttccctccttccttccct----------------------------------------- ---------------------------------------------tccttccttccttgc ttccttccttctttcctttcttcatccctccttccctccctccctccttcctgccttcct -tctttctttcttttctttcttcctccc +tctttctttcttttctttcttcctcc diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_b6f4c42dfe3dd25d3029d99aa8472281.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_373e1d1ae04c109544621b85b6e2768d.msa similarity index 93% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_b6f4c42dfe3dd25d3029d99aa8472281.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_373e1d1ae04c109544621b85b6e2768d.msa index 070e9101..ed703593 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_b6f4c42dfe3dd25d3029d99aa8472281.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_373e1d1ae04c109544621b85b6e2768d.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:62320931-62321974 +>syndip_1_chr20:62320931-62321973 aggccaggggagaatgatcagctggggccctggggtcattagggtggggagggctcagcc agaggtcatgaggtcagcttagggacacaggacctgtggggaggtcccagagttctggca gagtcatagggtcaggtaggagtcagatgggagtgaagggcatggggtcacccagggaga @@ -18,8 +18,8 @@ gccagcagaggggtggggccagt-----------------ggaggggtggggccagcgga ggggtggggtcagtgggggaggaagggccagcagatggggtggggccagggcacacagga aggctagggacagtagaggggtgcagccagcagtggagctgcagcgctcctcagcgtggt tggagttggacaggcatgggtctctcgggaaatggaaggcagctgggaccactctgggag -gtcgacgttaacactggggtcaccaggctgtgtgggacttggat ->syndip_2_chr20:62320931-62321974 +gtcgacgttaacactggggtcaccaggctgtgtgggacttgga +>syndip_2_chr20:62320931-62321973 aggccaggggagaatgatcagctggggccctggggtcattagggtggggagggctcagcc agaggtcatgaggtcagcttagggacacaggacctgtggggaggtcccagagttctggca gagtcatagggtcaggtaggagtcagatgggagtgaagggcatggggtcacccagggaga @@ -39,8 +39,8 @@ gccagcagaggggtggggccagtggaggggtggggccagcggaggggtggggccagcgga ggggtggggtcagtgggggaggaagggccagcagatggggtggggccagggcacacagga aggctagggacagtagaggggtgcagccagcagtggagctgcagcgctcctcagcgtggt tggagttggacaggcatgggtctctcgggaaatggaaggcagctgggaccactctgggag -gtcgacgttaacactggggtcaccaggctgtgtgggacttggat ->p:HG002_1_chr20:62320931-62321974 +gtcgacgttaacactggggtcaccaggctgtgtgggacttgga +>p:HG002_1_chr20:62320931-62321973 aggccaggggagaatgatcagctggggccctggggtcattagggtggggagggctcagcc agaggtcatgaggtcagcttagggacacaggacctgtggggaggtcccagagttctggca gagtcatagggtcaggtaggagtcagatgggagtgaagggcatggggtcacccagggaga @@ -60,8 +60,8 @@ gccagcagaggggtggggccagtggaggggtggggccagcggaggggtggggccagcgga ggggtggggtcagtgggggaggaagggccagcagatggggtggggccagggcacacagga aggctagggacagtagaggggtgcagccagcagtggagctgcagcgctcctcagcgtggt tggagttggacaggcatgggtctctcgggaaatggaaggcagctgggaccactctgggag -gtcgacgttaacactggggtcaccaggctgtgtgggacttggat ->p:HG002_2_chr20:62320931-62321974 +gtcgacgttaacactggggtcaccaggctgtgtgggacttgga +>p:HG002_2_chr20:62320931-62321973 aggccaggggagaatgatcagctggggccctggggtcattagggtggggagggctcagcc agaggtcatgaggtcagcttagggacacaggacctgtggggaggtcccagagttctggca gagtcatagggtcaggtaggagtcagatgggagtgaagggcatggggtcacccagggaga @@ -81,8 +81,8 @@ gccagcagaggggtggggccagt-----------------ggaggggtggggccagcgga ggggtggggtcagtgggggaggaagggccagcagatggggtggggccagggcacacagga aggctagggacagtagaggggtgcagccagcagtggagctgcagcgctcctcagcgtggt tggagttggacaggcatgggtctctcgggaaatggaaggcagctgggaccactctgggag -gtcgacgttaacactggggtcaccaggctgtgtgggacttggat ->ref_chr20:62320931-62321974 +gtcgacgttaacactggggtcaccaggctgtgtgggacttgga +>ref_chr20:62320931-62321973 aggccaggggagaatgatcagctggggccctggggtcattagggtggggagggctcagcc agaggtcatgaggtcagcttagggacacaggacctgtggggaggtcccagagttctggca gagtcatagggtcaggtaggagtcagatgggagtgaagggcatggggtcacccagggaga @@ -102,4 +102,4 @@ gccagcagaggggtggggtcagt-----------------ggaggggtggggccagtgga ggggtggggtcagtgggggaggaagggccagcagatggggtggggccagggcacacagga aggctagggacagtagaggggtgcagccagcagtggagctgcagcgctcctcagcgtggt tggagttggacaggcatgggtctctcgggaaatggaaggcagctgggaccactctgggag -gtcgacgttaacactggggtcaccaggctgtgtgggacttggat +gtcgacgttaacactggggtcaccaggctgtgtgggacttgga diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_376415b15834e0e44f2cec357e45842b.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_376415b15834e0e44f2cec357e45842b.msa deleted file mode 100644 index f0fd442d..00000000 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_376415b15834e0e44f2cec357e45842b.msa +++ /dev/null @@ -1,295 +0,0 @@ ->syndip_1_chr20:62057473-62059240 -tccaagatccatgtaccccaacaccaccctacctggctacgctcctctcacaactttgag -atcaagatgccacgtgtgccttcctcacatcggcgctttccagtgatgcccagtggggga -cacaaagggcc------------------------------------------------- ------------------------------------------------------------- ----------------------------------------cagcaagacctcaggaaggac -ggacacaggtctacacaccaggccacagacaatggggctcggggggccaacctcagcaag -acctcaggaaggacggacacaggtctacacaccaggccacagacaatggggctcgggggc -caacctcagcaagacctcaggaaggacggacacaggtctacacaccaggccacagacaat -ggggctcggggggccaacctcagcaagacctcaggaaggacggacacaggcctacacacc -aggccacagacaatggggctcgggggccaacctcagcaagacctcaggaaggacggacac -aggcctacacaccaggccacagacaatggggctcggggggccaacctcagcaagacctca -ggaaggacggacacaggtctacacaccaggccacagacaatggggctcgggggccaacct -cagcaagacctcaggaaggacggacacaggtctacacaccaggccacagacaatggggct -cggggggccaacctcagcaagacctcaggaaggacggacacaggcctacacaccaggcca -cagacaatggggctcgggggccaacctcagcaagacctcaggaaggacggacacaggtct -acacaccaggccacagacaatggggctcgggggccaacctcagcaagacctcaggaagga -cggacacaggtctacacaccaggccacagacaatggggctcggggggccaacctcagcaa -gacctcaggaaggacggacacaggcctacacaccaggccacagacaatggggctcggggg -ccaacctcagcaagacctcaggaaggacggacacaggtctacacaccaggccacagacaa -tggggctcggggggccaacctcagcaagacctcaggaaggacggacacaggtctacacac -caggccacagacaatggggctcggggggccaacctcagcaagacctcaggaaggacggac -acaggtctacacaccaggccacagacaatggggctcgggggccaacctcagcaagacctc -aggaaggacggacacaggtctacacaccaggccacagacaatggggctcgggggccaacc -tcagcaagacctcaggaaggacggacacaggtctacacaccaggccacagacaatggggc -tcggggggccaacctcagcaagacctcaggaaggacggacacaggcctacacaccaggcc -acagacaatggggctcggggggccaacctcagcaagacctcaggaaggacggacacaggc -ctacacaccaggccacagacaatggggctcgggggccaacctcagcaagacctcaggaag -gacggacacaggtctacacaccaggccacagacaatggggctcgggggccaacctcagca -agacctcaggaaggacggacacaggtctacacaccaggccacagacaatggggctcgggg -gccaacctcagcaagacctcaggaaggacggacacaggtctacacaccaggccacagaca -atggggctcgggggccaacctcagcaagacctcaggaaggacggacacaggcctacacac -caggccacagacaatggggctcggggggccaacctcagcaagacctcaggaaggacggac -acaggcctacacaccaggccacagacaatggggctcggggggccaacctcagcaagacct -caggaaggacggacacaggtctacacaccaggccacagacagtggggctcggggggccaa -cctcagcaagacctcaggaaggacggacacaggtctacacaccaggccacagacaatggg -gctcgggggccaacctcagcaagacctcaggaaggacggacacaggtctacacaccaggc -cacagacaatggggctcggggggccaacctcagcaagacctcaggaaggacggacacagg -cctacacaccaggccacagacaatggggctcggggggccaacctcagcaagacctcagga -aggacggacacaggtctacacaccaggccacagacaatggggctcgggggccaacctcag -caagacctcaggaaggacggacacaggtctacacaccaggccacagacaatggggctcgg -ggggccaacctcagcaagacctcaggaaggacggacacaggcctacacaccaggccacag -acaatggggctcggggggccaacctcagcaagacctcaggaaggacggacacaggtctac -acaccaggccacagacaatggggctcgggggccaacctcagcaagacctcaggaaggacg -gacacaggcctacacaccaggccacagacaatggggctcggggggccaacctcagcaaga -cctcaggaaggacggacacaggcctacacaccaggccacagacaatggggctcgggggcc -aacctcagcaagacctcaggaaggacggacacaggtctacacaccaggccacagacaatg -gggctcgggggccaacctcagcaagacctcaggaaggacggacacaggtctacacaccag -gccacagacaatggggctcgggggccaacctcagcaagacctcaggaaggacggacacag -gtctacacaccaggccacagacaatggggctcggggggccaacctcagcaagacctcagg -aaggacggacacaggtctacacaccaggccacagacaatggggctcggggggccaacctc -agcaagacctcaggaaggacggacacaggtctacacaccaggccacagacaatggggctc -gggggccaacctcagcaagacctcaggaaggacggacacaggtctacacaccaggccaca -gacaatggggctcgggggccaacctcagcaagacctcaggaaggacggacacaggtctac -acaccaggccacagacaatggggctcggggggccaacctctccagtccccaaacacagtc -cccaacagacaacttctcctgtccccacatgcggctcccaacaaacaatttctcctgccc -tcacacgcggttcccaacgacaacttctgctccacaacatgacacccacggtgggagaag -ctgggggcacttcactacatcatcctgcacactgcagcatggcctgctccaggcagaagg -caggcctggagaagagcctgggaccaccgcggactgacc ->syndip_2_chr20:62057473-62059240 -tccaagatccatgtaccccaacaccaccctacctggctacgctcctctcacaactttgag -atcaagatgccacgtgtgccttcctcacatcggcgctttccagtgatgcccagtggggga -cacaaagggcc------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------cagcaag -acctcaggaaggacggacacaggtctacacaccaggccacagacaatggggctcgggggc -caacctcagcaagacctcaggaaggacggacacaggtctacacaccaggccacagacaat -ggggctc----------------------------------------------------- ------------------------------------------------------------- ------------------------------------gggggccaacctcagcaagacctca -ggaaggacggacacaggtctacacaccaggccacagacaatggggctcgggggccaacct -cagcaagacctcaggaagga---------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- -cggacacaggtctacacaccaggccacagacaatggggctc------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------gggggccaacctcagcaagacctcaggaaggacggac -acaggtctacacaccaggccacagacaatggggctcgggggccaacctcagcaagacctc -aggaaggacggacacaggtctacacaccaggccacagacaatggggctcgggggccaacc -tcagcaagacctcaggaaggacggacacaggtctacacaccaggccacagacaatggggc -tc---------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ----------------------------gggggccaacctctccagtccccaaacacagtc -cccaacagacaacttctcctgtcctcacatgcggctcccaacaaacaatttctcctgccc -tcacacgcggttcccaacgacaacttctgctccacaacatgacacccacggtgggagaag -ctgggggcacttcactacatcatcctgcacactgcagcatggcctgctccaggcagaagg -caggcctggagaagagcctgggaccaccgcggactgacc ->p:HG002_1_chr20:62057473-62059240 -tccaagatccatgtaccccaacaccaccctacctggccacgctcctctcacaactttgag -atcaagatgccacgtgtgccttcctcacatcggcgctttccagtgatgcccagtggggga -cacaaagggcc------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------cagcaag -acctcaggaaggacggacacaggtctacacaccaggccacagacaatggggctcgggggc -caacctcagcaagacctcaggaaggacggacacaggtctacacaccaggccacagacaat -ggggctc----------------------------------------------------- ------------------------------------------------------------- ------------------------------------gggggccaacctcagcaagacctca -ggaaggacggacacaggtctacacaccaggccacagacaatggggctcgggggccaacct -cagcaagacctcaggaagga---------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- -cggacacaggtctacacaccaggccacagacaatggggctc------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------gggggccaacctcagcaagacctcaggaaggacggac -acaggtctacacaccaggccacagacaatggggctcgggggccaacctcagcaagacctc -aggaaggacggacacaggtctacacaccaggccacagacaatggggctcgggggccaacc -tcagcaagacctcaggaaggacggacacaggtctacacaccaggccacagacaatggggc -tc---------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ----------------------------gggggccaacctctccagtccccaaacacagtc -cccaacagacaacttctcctgtcctcacatgcggctcccaacaaacaatttctcctgccc -tcacacgcggttcccaacgacaacttctgctccacaacatgacacccacggtgggagaag -ctgggggcacttcactacatcatcctgcacactgcagcatggcctgctccaggcagaagg -caggcctggagaagagcctgggaccaccgcggactgacc ->p:HG002_2_chr20:62057473-62059240 -tccaagatccatgtaccccaacaccaccctacctggctacgctcctctcacaactttgag -atcaagatgccacgtgtgccttcctcacatcggcgctttccagtgatgcccagtggggga -cacaaagggcc------------------------------------------------- ------------------------------------------------------------- ----------------------------------------cagcaagacctcaggaaggac -ggacacaggtctacacaccaggccacagacaatggggctcggggggccaacctcagcaag -acctcaggaaggacggacacaggtctacacaccaggccacagacaatggggctcgggggc -caacctcagcaagacctcaggaaggacggacacaggtctacacaccaggccacagacaat -ggggctcggggggccaacctcagcaagacctcaggaaggacggacacaggcctacacacc -aggccacagacaatggggctcgggggccaacctcagcaagacctcaggaaggacggacac -aggcctacacaccaggccacagacaatggggctcggggggccaacctcagcaagacctca -ggaaggacggacacaggtctacacaccaggccacagacaatggggctcgggggccaacct -cagcaagacctcaggaaggacggacacaggtctacacaccaggccacagacaatggggct -cggggggccaacctcagcaagacctcaggaaggacggacacaggcctacacaccaggcca -cagacaatggggctcgggggccaacctcagcaagacctcaggaaggacggacacaggtct -acacaccaggccacagacaatggggctcgggggccaacctcagcaagacctcaggaagga -cggacacaggtctacacaccaggccacagacaatggggctcggggggccaacctcagcaa -gacctcaggaaggacggacacaggcctacacaccaggccacagacaatggggctcggggg -ccaacctcagcaagacctcaggaaggacggacacaggtctacacaccaggccacagacaa -tggggctcggggggccaacctcagcaagacctcaggaaggacggacacaggtctacacac -caggccacagacaatggggctcggggggccaacctcagcaagacctcaggaaggacggac -acaggtctacacaccaggccacagacaatggggctcgggggccaacctcagcaagacctc -aggaaggacggacacaggtctacacaccaggccacagacaatggggctcgggggccaacc -tcagcaagacctcaggaaggacggacacaggtctacacaccaggccacagacaatggggc -tcggggggccaacctcagcaagacctcaggaaggacggacacaggcctacacaccaggcc -acagacaatggggctcggggggccaacctcagcaagacctcaggaaggacggacacaggc -ctacacaccaggccacagacaatggggctcgggggccaacctcagcaagacctcaggaag -gacggacacaggtctacacaccaggccacagacaatggggctcgggggccaacctcagca -agacctcaggaaggacggacacaggtctacacaccaggccacagacaatggggctcgggg -gccaacctcagcaagacctcaggaaggacggacacaggtctacacaccaggccacagaca -atggggctcgggggccaacctcagcaagacctcaggaaggacggacacaggcctacacac -caggccacagacaatggggctcggggggccaacctcagcaagacctcaggaaggacggac -acaggcctacacaccaggccacagacaatggggctcggggggccaacctcagcaagacct -caggaaggacggacacaggtctacacaccaggccacagacagtggggctcggggggccaa -cctcagcaagacctcaggaaggacggacacaggtctacacaccaggccacagacaatggg -gctcgggggccaacctcagcaagacctcaggaaggacggacacaggtctacacaccaggc -cacagacaatggggctcggggggccaacctcagcaagacctcaggaaggacggacacagg -cctacacaccaggccacagacaatggggctcggggggccaacctcagcaagacctcagga -aggacggacacaggtctacacaccaggccacagacaatggggctcgggggccaacctcag -caagacctcaggaaggacggacacaggtctacacaccaggccacagacaatggggctcgg -ggggccaacctcagcaagacctcaggaaggacggacacaggcctacacaccaggccacag -acaatggggctcggggggccaacctcagcaagacctcaggaaggacggacacaggtctac -acaccaggccacagacaatggggctcgggggccaacctcagcaagacctcaggaaggacg -gacacaggcctacacaccaggccacagacaatggggctcggggggccaacctcagcaaga -cctcaggaaggacggacacaggcctacacaccaggccacagacaatggggctcgggggcc -aacctcagcaagacctcaggaaggacggacacaggtctacacaccaggccacagacaatg -gggctcgggggccaacctcagcaagacctcaggaaggacggacacaggtctacacaccag -gccacagacaatggggctcgggggccaacctcagcaagacctcaggaaggacggacacag -gtctacacaccaggccacagacaatggggctcggggggccaacctcagcaagacctcagg -aaggacggacacaggtctacacaccaggccacagacaatggggctcggggggccaacctc -agcaagacctcaggaaggacggacacaggtctacacaccaggccacagacaatggggctc -gggggccaacctcagcaagacctcaggaaggacggacacaggtctacacaccaggccaca -gacaatggggctcgggggccaacctcagcaagacctcaggaaggacggacacaggtctac -acaccaggccacagacaatggggctcggggggccaacctctccagtccccaaacacagtc -cccaacagacaacttctcctgtccccacatgcggctcccaacaaacaatttctcctgccc -tcacacgcggttcccaacgacaacttctgctccacaacatgacacccacggtgggagaag -ctgggggcacttcactacatcatcctgcacactgcagcatggcctgctccaggcagaagg -caggcctggagaagagcctgggaccaccgcggactgacc ->ref_chr20:62057473-62059240 -tccaagatccatgtaccccaacaccaccctacctggctacgctcctctcacaactttgag -atcaagatgccacgtgtgccttcctcacatcggcgctttccagtgatgcccagtggggga -cacaaagggcccagcaagacctcaggaaggacggacacaggcctacacaccaggccacag -acaatggggctcggggggccaacctcagcaagacctcaggaaggacggacacaggcctac -acaccaggccacagacaatggggctcggggggccaacctcagcaagacctcaggaaggac -ggacacaggcctacacaccaggccacagacaatggggctcggggggccaacctcagcaag -acctcaggaaggacggacacaggtctacacaccaggccacagacaatggggctcgggggc -caacctcagcaagacctcaggaaggacggacacaggtctacacaccaggccacagacaat -ggggctcggggggccaacctcagcaagacctcaggaaggacggacacaggcctacacacc -aggccacagacaatggggctc--------------------------------------- ------------------------------------gggggccaacctcagcaagacctca -ggaaggacggacacaggtctacacaccaggccacagacaatggggctcgggggccaacct -cagcaagacctcaggaagga---------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- -cggacacaggtctacacaccaggccacagacaatggggctcggggggccaacctcagcaa -gacctcaggaaggacggacacaggcctacacaccaggccacagacaatggggctcggggg -ccaacctcagcaagacctcaggaaggacggacacaggtctacacaccaggccacagacaa -tggggctcggggggccaacctcagcaagacctcaggaaggacggacacaggtctacacac -caggccacagacaatggggctcggggggccaacctcagcaagacctcaggaaggacggac -acaggtctacacaccaggccacagacaatggggctcgggggccaacctcagcaagacctc -aggaaggacggacacaggtctacacaccaggccacagacaatggggctcgggggccaacc -tcagcaagacctcaggaaggacggacacaggtctacacaccaggccacagacaatggggc -tcggggggccaacctcagcaagacctcaggaaggacggacacaggcctacacaccaggcc -acagacaatggggctcggggggccaacctcagcaagacctcaggaaggacggacacaggc -ctacacaccaggccacagacaatggggctc------------------------------ ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------gggggccaacctc -agcaagacctcaggaaggacggacacaggtctacacaccaggccacagacaatggggctc -gggggccaacctcagcaagacctcaggaaggacggacacaggtctacacaccaggccaca -gacaatggggctcgggggccaacctcagcaagacctcaggaaggacggacacaggtctac -acaccaggccacagacaatggggctcggggggccaacctctccagtccccaaacacagtc -cccaacagacaacttctcctgtccccacatgcggctcccaacaaacaatttctcctgccc -tcacacgcggttcccaacgacaacttctgctccacaacatgacacccacggtgggagaag -ctgggggcacttcactacatcatcctgcacactgcagcatggcctgctccaggcagaagg -caggcctggagaagagcctgggaccaccgcggactgacc diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_d27af66810a53f2070224212487e9431.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_37addba43ca0bef896cc3f8296ac4789.msa similarity index 97% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_d27af66810a53f2070224212487e9431.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_37addba43ca0bef896cc3f8296ac4789.msa index 3c5fc164..9d06ac16 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_d27af66810a53f2070224212487e9431.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_37addba43ca0bef896cc3f8296ac4789.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:64131704-64134056 +>syndip_1_chr20:64131704-64134055 aggctgttgtaggtgggccaggaggggctgggcagcccatgagggtgaggctgagactcc acactgagcccaggtttaggggacctggtgggggtcacagcctcccctgtcccagacttc tcctggcctctctcatgtcctatgagaccccctcactgtccactctcatggcctgtagag @@ -66,8 +66,8 @@ tatagagaccccctcactgtccaccctcatgtcctatagagaccccctcactgtccaccc tcatgtcctatagagaccccctcactgtccactctcatggcctgtagagaccccctcact gtccacccatatggcctgtagagaccccctcactgtccaccctcaggaccctctgcattt gctccttgggttcccggaacttgagcttgggtcttggtccacagcagctttgggaagggc -cctcggctgctttttgcttcagcagctggggagcagttaactttcagacccggccc ->syndip_2_chr20:64131704-64134056 +cctcggctgctttttgcttcagcagctggggagcagttaactttcagacccggcc +>syndip_2_chr20:64131704-64134055 aggctgttgtaggtgggccaggaggggctgggcagcccatgagggtgaggctgagactcc acactgagcccaggtttaggggacctggtgggggtcacagcctcccctgtcccagacttc tcctggcctctctcatgtcctatgagaccccctcactgtccactctcatggcctgtagag @@ -135,8 +135,8 @@ tatagagaccccctcactgtccaccctcatgtcctatagagaccccctcactgtccaccc tcgtgtcctatagagaccccctcactgtccacccatatggcctgtagagaccccctcact gtccaccctcatggcctgtagagaccccctcactgtccaccctcaggaccctctgcattt gctccttgggttcccggaacttgagcttgggtcttggtccacagcagctttgggaagggc -cctcggctgctttttgcttcagcagctggggagcagttaactttcagacccggccc ->p:HG002_1_chr20:64131704-64134056 +cctcggctgctttttgcttcagcagctggggagcagttaactttcagacccggcc +>p:HG002_1_chr20:64131704-64134055 aggctgttgtaggtgggccaggaggggctgggcagcccatgagggtgaggctgagactcc acactgagcccaggtttaggggacctggtgggggtcacagcctcccctgtcccagacttc tcctggcctctctcatgtcctatgagaccccctcactgtccactctcatggcctgtagag @@ -204,8 +204,8 @@ tatagagaccccctcactgtccaccctcatgtcctatagagaccccctcactgtccaccc tcgtgtcctatagagaccccctcactgtccacccatatggcctgtagagaccccctcact gtccaccctcatggcctgtagagaccccctcactgtccaccctcaggaccctctgcattt gctccttgggttcccggaacttgagcttgggtcttggtccacagcagctttgggaagggc -cctcggctgctttttgcttcagcagctggggagcagttaactttcagacccggccc ->p:HG002_2_chr20:64131704-64134056 +cctcggctgctttttgcttcagcagctggggagcagttaactttcagacccggcc +>p:HG002_2_chr20:64131704-64134055 aggctgttgtaggtgggccaggaggggctgggcagcccatgagggtgaggctgagactcc acactgagcccaggtttaggggacctggtgggggtcacagcctcccctgtcccagacttc tcctggcctctctcatgtcctatgagaccccctcactgtccactctcatggcctgtagag @@ -273,8 +273,8 @@ tatagagaccccctcactgtccaccctcatgtcctatagagaccccctcactgtccaccc tcatgtcctatagagaccccctcactgtccactctcatggcctgtagagaccccctcact gtccacccatatggcctgtagagaccccctcactgtccaccctcaggaccctctgcattt gctccttgggttcccggaacttgagcttgggtcttggtccacagcagctttgggaagggc -cctcggctgctttttgcttcagcagctggggagcagttaactttcagacccggccc ->ref_chr20:64131704-64134056 +cctcggctgctttttgcttcagcagctggggagcagttaactttcagacccggcc +>ref_chr20:64131704-64134055 aggctgttgtaggtgggccaggaggggctgggcagcccatgagggtgaggctgagactcc acactgagcccaggtttaggggacctggtgggggtcacagcctcccctgtcccagacttc tcctggcctctctcatgtcctatgagaccccctcactgtccaccctcatgtcctgtagag @@ -342,4 +342,4 @@ tgtagagaccccctcactgtccaccctcatggcctgtagagaccccctcactgtccaccc tcatggcctgtagagaccccctcactgtccaccctcatggcctgtagagaccccctcact gtccacccatatggcctgtagagaccccctcactgtccaccctcaggaccctctgcattt gctccttgggttcccggaacttgagcttgggtcttggtccacagcagctttgggaagggc -cctcggctgctttttgcttcagcagctggggagcagttaactttcagacccggccc +cctcggctgctttttgcttcagcagctggggagcagttaactttcagacccggcc diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_19907622767ec08f4f2a547410d2ecfb.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_37dc1e3da979ee027d03cd84019b4904.msa similarity index 95% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_19907622767ec08f4f2a547410d2ecfb.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_37dc1e3da979ee027d03cd84019b4904.msa index 446dd9db..ec43eebc 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_19907622767ec08f4f2a547410d2ecfb.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_37dc1e3da979ee027d03cd84019b4904.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:13848064-13848750 +>syndip_1_chr20:13848064-13848749 tctgtaatcccagttattcaggaggctgaggcaggagaatcacctgaacccgggaggcag aggttccagtgagtcgagatcttgccactgcactccagcctgggggacagagggaaattg cgtctcaaaaaaaaaaaagaa-aagaaaagtagaagacgaagatgaagaagaataagaag @@ -12,8 +12,8 @@ aagaagaggaagaggaagaggaagaggaagaagaagaagaagaagaagaagaagaagaag aagaagaagaagaagaagaagaagaagaagaagaagaagaagaagaagaagaagagaaga caaatatctggagccgggatttgagcctggcccttgacatatgtcctaggcctcttacac tctccttaccagcccctgtcccttaggcctgggcccctgatttgcctgtcagagaagcca -gagaatc ->syndip_2_chr20:13848064-13848750 +gagaat +>syndip_2_chr20:13848064-13848749 tctgtaatcccagttattcaggaggctgaggcaggagaatcacctgaacccgggaggcag aggttccagtgagtcgagatcttgccactgcactccagcctgggggacagagggaaattg cgtctcaaaaaaaaaaaagaa-aagaaaagtagaagacgaagatgaagaagaataagaag @@ -27,8 +27,8 @@ aagaagaggaagaggaagaggaagag---------------gaagaagaagaagaagaag aagaagaagaagaagaagaagaagaagaagaagaagaagaagaagaagaagaagagaaga caaatatctggagccgggatttgagcctggcccttgacatatgtcctaggcctcttacac tctccttaccagcccctgtcccttaggcctgggcccctgatttgcctgtcagagaagcca -gagaatc ->p:HG002_1_chr20:13848064-13848750 +gagaat +>p:HG002_1_chr20:13848064-13848749 tctgtaatcccagttattcaggaggctgaggcaggagaatcacctgaacccgggaggcag aggttccagtgagtcgagatcttgccactgcactccagcctgggggacagagggaaattg cgtctcaaaaaaaaaaaagaa-aagaaaagtagaagacgaagatgaagaagaataagaag @@ -42,8 +42,8 @@ aagaagaggaagaggaagaggaagag---------------gaagaagaagaagaagaag aagaagaagaagaagaagaagaagaagaagaagaagaagaagaagaagaagaagagaaga caaatatctggagccgggatttgagcctggcccttgacatatgtcctaggcctcttacac tctccttaccagcccctgtcccttaggcctgggcccctgatttgcctgtcagagaagcca -gagaatc ->p:HG002_2_chr20:13848064-13848750 +gagaat +>p:HG002_2_chr20:13848064-13848749 tctgtaatcccagttattcaggaggctgaggcaggagaatcacctgaacccgggaggcag aggttccagtgagtcgagatcttgccactgcactccagcctgggggacagagggaaattg cgtctcaaaaaaaaaaaagaa-aagaaaagtagaagacgaagatgaagaagaataagaag @@ -57,8 +57,8 @@ aagaagaggaagaggaagaggaagaggaagaagaagaagaagaagaagaagaagaagaag aagaagaagaagaagaagaagaagaagaagaagaagaagaagaagaagaagaagagaaga caaatatctggagccgggatttgagcctggcccttgacatatgtcctaggcctcttacac tctccttaccagcccctgtcccttaggcctgggcccctgatttgcctgtcagagaagcca -gagaatc ->ref_chr20:13848064-13848750 +gagaat +>ref_chr20:13848064-13848749 tctgtaatcccagttattcaggaggctgaggcaggagaatcacctgaacccgggaggcag aggttccagtgagtcgagatcttgccactgcactccagcctgggggacagagggaaattg cgtctcaaaaaaaaaaaagaagaagaaaagtagaagacgaagatgaagaagaataagaag @@ -72,4 +72,4 @@ aaggagaaggagaaggagaaggagaagga------------------------------- aagaagaagaagaagaagaagaagaagaagaagaagaagaagaagaagaagaagaaaata caaatatctggagccgggatttgagcctggcccttgacatatgtcctaggcctcttacac tctccttaccagcccctgtcccttaggcctgggcccctgatttgcctgtcagagaagcca -gagaatc +gagaat diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_392581e26c6c6574757f793b72e768f0.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_3a3d4bda444ca93397f3574b58e71451.msa similarity index 95% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_392581e26c6c6574757f793b72e768f0.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_3a3d4bda444ca93397f3574b58e71451.msa index f270ff15..681ffc5d 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_392581e26c6c6574757f793b72e768f0.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_3a3d4bda444ca93397f3574b58e71451.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:57090572-57091512 +>syndip_1_chr20:57090572-57091511 agcacagtgaggcccacttcagacttccgagctccagactatgagatcaagtgttgtttt aagccaccaagtttgggaaaatttgccacagcagcaagaggaaacgaacgcactcaggtg ctcctgccacacacactcacagttgcacaagctggtgcagctccagacccacacacctcc @@ -17,8 +17,8 @@ acacctccactggcacacaccacccatgcacacacctccactggcacgcactactccacc tgcgtgcgcctcccacctgcgttcaccctccacctgtgttcgctcccacccactcatgcc tctccacccgcaaaagtgtgagtgctgaggctgggactcaacctgagccgctggctctgg attactgggtcctgccgggctggccaggcctggtcacaggccgggtcccagtcagcagct -ctttacctcaacaaggag ->syndip_2_chr20:57090572-57091512 +ctttacctcaacaagga +>syndip_2_chr20:57090572-57091511 agcacagtgaggcccacttcagacttccgagctccagactatgagatcaagtgttgtttt aagccaccaagtttgggaaaatttgccacagcagcaagaggaaacgaacgcactcaggtg ctcctgccacacacactcacagttgcacaagctggtgcagctccagacccacacacctcc @@ -37,8 +37,8 @@ acacctccactggcacacaccacccatgcacacacctccactggcacgcactactccacc tgcgtgcgcctcccacctgcgttcaccctccacctgtgttcgctcccacccactcatgcc tctccacccgcaaaagtgtgagtgctgaggctgggactcaacctgagccgctggctctgg attactgggtcctgccgggctggccaggcctggtcacaggccgggtcccagtcagcagct -ctttacctcaacaaggag ->p:HG002_1_chr20:57090572-57091512 +ctttacctcaacaagga +>p:HG002_1_chr20:57090572-57091511 agcacagtgaggcccacttcagacttccgagctccagactatgagatcaagtgttgtttt aagccaccaagtttgggaaaatttgccacagcagcaagaggaaacgaacgcactcaggtg ctcctgccacacacactcacagttgcacaagctggtgcagctccagacccacacacctcc @@ -57,8 +57,8 @@ acacctccactggcacacaccacccatgcacacacctccactggcacgcactactccacc tgcgtgcgcctcccacctgcgttcaccctccacctgtgttcgctcccacccactcatgcc tctccacccgcaaaagtgtgagtgctgaggctgggactcaacctgagccgctggctctgg attactgggtcctgccgggctggccaggcctggtcacaggccgggtcccagtcagcagct -ctttacctcaacaaggag ->p:HG002_2_chr20:57090572-57091512 +ctttacctcaacaagga +>p:HG002_2_chr20:57090572-57091511 agcacagtgaggcccacttcagacttccgagctccagactatgagatcaagtgttgtttt aagccacgaagtttgggaaaatttgccacagcagcaagaggaaacgaacgcactcaggtg ctcctgccacacacactcacagttgcacaagctggtgcagctccagacccacacacctcc @@ -77,8 +77,8 @@ acacctccactggcacacaccacccatgcacacacctccactggcacgcactactccacc tgcgtgcgcctcccacctgcgttcaccctccacctgtgttcgctcccacccactcatgcc tctccacccgcaaaagtgtgagtgctgaggctgggactcaacctgagccgctggctctgg attactgggtcctgccggtctggccaggcctggtcacaggccgggtcccagtcagcagct -ctttacctcaacaaggag ->ref_chr20:57090572-57091512 +ctttacctcaacaagga +>ref_chr20:57090572-57091511 agcacagtgaggcccacttcagacttccgagctccagactatgagatcaagtgttgtttt aagccaccaagtttgggaaaatttgccacagcagcaagaggaaacgaacgcactcaggtg ctcctgccacacacactcacagttgcacaagctggtgcagctccagacccacacacctcc @@ -97,4 +97,4 @@ acacctccactggcacacaccacccatgcacacacctccactggcacgcactactccacc tgcgtgcgcctcccacctgcgttcaccctccacctgtgttcgctcccacccactcatgcc tctccacccgcaaaagtgtgagtgctgaggctgggactcaacctgagccgctggctctgg attactgggtcctgccgggctggccaggcctggtcacaggccgggtcccagtcagcagct -ctttacctcaacaaggag +ctttacctcaacaagga diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_3a73de403e528810fa08a7607a17d7f5.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_3a73de403e528810fa08a7607a17d7f5.msa deleted file mode 100644 index 2453346e..00000000 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_3a73de403e528810fa08a7607a17d7f5.msa +++ /dev/null @@ -1,115 +0,0 @@ ->syndip_1_chr20:53203999-53204353 -aatatctccttcatttttataatatgatactattatatcataattttataatatactatt -atatcatattatatgatactatttatatcatcatatgatgatatactat--tatatcatc -atatgatgatata--ctattatatcatcatatgatgatatg--atactatatcatcatat -gatgatatgatactatatcatcatatgatgat---atactatatcatcatatgatgatat -gatatactatatcatcatatgatgatatgatactatatcatcat-----atgatgatatg -atactatatcatcat-----atgatgatatgatactatatcatcatatgatgatatgata -ctatatcatcatatgatgatatgatactatatcatcatatgatgatatgatactatatca -tcatatgatgatatgatactatatcatcatatgatgatatgatactatatcatcatatga -tgatatgatactatatcatcatatgatgatatgatactatatcatcatatgatgatatga -tactatatcatcatatgatgatatgatactatatcatcatatgatgatatgatactatat -catcatatgatgatatgatactatatcatcacatgatgatatgatactatatcatcacat -gatgatatgatactatatcatcacatgatgatatgatactatatcatcacatgatgatat -gatactatatcatcacatgatgatatgatactatatcatcacatgatgatatgatactat -atcatcacatgatgatatgatactatatcatcacatgatgatatgatactatatcatcat ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- -----------------------------ataacatgatgatatgatactatatcatcata -taacatgatgatatgatactatatcatcatataacatgatgatatgatactatatcatca -tataacatgatgat---------------------------------------------- ------------------------------------------------------------- ------------------- ->syndip_2_chr20:53203999-53204353 -aatatctccttcatttttataatatgatactattatatcataattttataatatactatt -atatcatattatatgatactatttatatcatcatatgatgatatgatat--tatatcatc -atatgatgatatg--atactatatcatcatatgatgatatgatatactatatcatcatat -gatgatatgatactatatcatcatatgatgatatgatactatatcatcatatgatgatat -g--atactatatcatcatatgatgatatgatactatatcatcat-----atgatgatatg -atactatatcatcat-----atgatgatatgatactatatcatcatatgatgatatgata -ctatatcatcatatgatgatatgatactatatcatcatatgatgatatgatactatatca -tcatatgatgatatgatactatatcatcatatgatgatatgatactatatcatcatatga -tgatatgatactatatcatcatatgatgatatgatactatatcatcatatgatgatatga -tactatatcatcatatgatgatatgatactatatcatcatatgatgatatgatactatat -catcatatgatgatatgatactatatcatcatatgatgatatgatactatatcatcatat -gatgatatgatactatatcatcatatgatgatatgatactatatcatcatatgatgatat -gatactatatcatcatatgatgatatgatactatatcatcatatgatgatatgatactat -atcatcatatgatgatatgatactatatcatcatatgatgatatgatactatatcatcat -atgatgatatgatactatatcatcatatgatgatatgatactatatcatcatatgatgat -atgatactatatcatcatatgatgatatgatactatatcatcatatgatgatatgatact -atatcatcatatgatgatatgatactatatcataacatgatgatatgatactatatcata -acatgatgatatgatactatatcatcatataacatgatgatatgatactatatcatcata -taacatgatgatatgatactatatcatcatataacatgatgatatgatactatatcatca -tataacatgatgatatgatactatatcatcatataacatgatgatatgatactatatcat -catataacatgatgatatgatactatatcatcatataacatgatgatatgatactatatc -atcatataacatgatgat ->p:HG002_1_chr20:53203999-53204353 -aatatctccttcatttttataatatgatactattatatcataattttataatatactatt -atatcatattatatgatactatttatatcatcatatgatgatatgatatactatatcatc -atatgatgatatgatatactatatcatcatatgatgatatg--atactatatcatcatat -gatgatatgatactatatcatcatatgatgatatgatactatatcatcatatgatgatat -g--atactatatcatcatatgatgatatgatactatatcatcat-----atgatgatatg -atactatatcatcat-----atgatgatatgatactatatcatcatatgatgatatgata -ctatatcatcatatgatgatatgatactatatcatcatatgatgatatgatactatatca -tcatatgatgatatgatactatatcatcatatgatgatatgatactatatcatcatatga -tgatatgatactatatcatcatatgatgatatgatactatatcatcatatgatgatatga -tactatatcatcatatgatgatatgatactatatcatcatatgatgatatgatactatat -catcatatgatgatatgatactatatcatcatatgatgatatgatactatatcatcatat -gatgatatgatactatatcatcatatgatgatatgatactatatcatcatatgatgatat -gatactatatcatcatatgatgatatgatactatatcatcatatgatgatatgatactat -atcatcatatgatgatatgatactatatcatcatatgatgatatgatactatatcatcat -atgatgatatgatactatatcatcatatgatgatatgatactatatcatcatatgatgat -atgatactatatcatcatatgatgatatgatactatatcatcatatgatgatatgatact -atatcatcatatgatgatatgatactatatcataacatgatgatatgatactatatcata -acatgatgatatgatactatatcatcatataacatgatgatatgatactatatcatcata -taacatgatgatatgatactatatcatcatataacatgatgatatgatactatatcatca -tataacatgatgatatgatactatatcatcatataacatgatgatatgatactatatcat -catataacatgatgatatgatactatatcatcatataacatgatgatatgatactatatc -atcatataacatgatgat ->p:HG002_2_chr20:53203999-53204353 -aatatctccttcatttttataatatgatactattatatcataattttataatatactatt -atatcatattatatgatactatttatatcatcatatgatgatatgatatactatatcatc -atatgatgatatg----attatatcatcatatgatgatatg--atactatatcatcatat -gatgatatgatactatatcatcatatgatgat---atactatatcatcatatgatgatat -g--atactatatcatcatatgatgatatgatactatatcatcat-----atgatgatatg -atactatatcatcat-----atgatgatatgatactatatcatcatatgatgatatgata -ctatatcatcatatgatgatatgatactatatcatcatatgatgatatgatactatatca -tcatatgatgatatgatactatatcatcatatgatgatatgatactatatcatcatatga -tgatatgatactatatcatcatatgatgatatgatactatatcatcatatgatgatatga -tactatatcatcatatgatgatatgatactatatcatcatatgatgatatgatactatat -catcatatgatgatatgatactatatcatcacatgatgatatgatactatatcatcacat -gatgatatgatactatatcatcacatgatgatatgatactatatcatcacatgatgatat -gatactatatcatcatatgatgatatgatactatatcatcatatgatgatatgatactat -atcatcatatgatgatatgatactatatcatcatatgatgatatgatactatatcatcat ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- -----------------------------ataacatgatgatatgatactatatcatcata -taacatgatgatatgatactatatcatcatataacatgatgatatgatactatatcatca -tataacatgatgat---------------------------------------------- ------------------------------------------------------------- ------------------- ->ref_chr20:53203999-53204353 -aatatctccttcatttttataatatgatactattatatcataattttataatatactatt -atatcatattatatgatactatttatatcatcatatgatgatatgatatactatatcatc -atatgatgatatgatatactatatcatcatatgatgatatg--atactatatcatcatat -gatgatatgatactatatcatcatatgatgatatgatactatatcatcatatgatgatat -g--atactatatcatcatatgatgatatgatactatatcatcatataacatgatgatatg -atactatatcatcatataacatgatgatatgatactatatcatcatataacatgatgat- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------- diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_946b797ceaa32d6c1b711e085ad7903e.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_3ba0420a0f6db81c0d76f7ba53ba5226.msa similarity index 96% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_946b797ceaa32d6c1b711e085ad7903e.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_3ba0420a0f6db81c0d76f7ba53ba5226.msa index eac01e91..b000d0b7 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_946b797ceaa32d6c1b711e085ad7903e.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_3ba0420a0f6db81c0d76f7ba53ba5226.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:59383704-59385336 +>syndip_1_chr20:59383704-59385335 ggttttgtgcttccaccaagccaggggactgttctgtggccctggaaccaccgtgaaagc aaacaaccctcggctgtgttgggagacagctccagctcctccatcctcaaatgtgagagc tgatgattggggtggaggaggaggtgagtggggagggggtgggggagaaggaggtgatgg @@ -33,8 +33,8 @@ gaggtgagtgtgtaggaggtaagcagggaggaagtgggggggaaggaggtgatggggagg aggtgagaggggaggagctgaggggggaaggaggtaatgaggacaaggtgaggggaaagg aggtaagggaggggaaggggagggaaggagctgagcggggcagagcatgtctcctctctt taagcatcctctgtattctgcagaggccaagcagtctggatgttggccttcctcagtggc -ccatgaaataactttgctccccagattctgagcatgggt ->syndip_2_chr20:59383704-59385336 +ccatgaaataactttgctccccagattctgagcatggg +>syndip_2_chr20:59383704-59385335 ggttttgtgcttccaccaagccaggggactgttctgtggccctggaaccaccgtgaaagc aaacaaccctcggctgtgttgggagacagctccagctcctccatcctcaaatgtgagagc tgatgattggggtggaggaggaggtgagtggggagggggtgggggagaaggaggtgatgg @@ -69,8 +69,8 @@ gaggtgagtgtgtaggaggtaagcagggaggaagtgggggggaaggaggtgatggggagg aggtgagaggggaggagctgaggggggaaggaggtaatgaggacaaggtgaggggaaagg aggtaagggaggggaaggggagggaaggagctgagcggggcagagcatgtctcctctctt taagcatcctctgtattctgcagaggccaagcagtctggatgttggccttcctcagtggc -ccatgaaataactttgctccccagattctgagcatgggt ->p:HG002_1_chr20:59383704-59385336 +ccatgaaataactttgctccccagattctgagcatggg +>p:HG002_1_chr20:59383704-59385335 ggttttgtgcttccaccaagccaggggactgttctgtggccctggaaccaccgtgaaagc aaacaaccctcggctgtgttgggagacagctccagctcctccatcctcaaatgtgagagc tgatgattggggtggaggaggaggtgagtggggagggggtgggggagaaggaggtgatgg @@ -105,8 +105,8 @@ gaggtgagtgtgtaggaggtaagcagggaggaagtgggggggaaggaggtgatggggagg aggtgagaggggaggagctgaggggggaaggaggtaatgaggacaaggtgaggggaaagg aggtaagggaggggaaggggagggaaggagctgagcggggcagagcatgtctcctctctt taagcatcctctgtattctgcagaggccaagcagtctggatgttggccttcctcagtggc -ccatgaaataactttgctccccagattctgagcatgggt ->p:HG002_2_chr20:59383704-59385336 +ccatgaaataactttgctccccagattctgagcatggg +>p:HG002_2_chr20:59383704-59385335 ggttttgtgcttccaccaagccaggggactgttctgtggccctggaaccaccgtgaaagc aaacaaccctcggctgtgttgggagacagctccagctcctccatcctcaaatgtgagagc tgatgattggggtggaggaggaggtgagtggggagggggtgggggagaaggaggtgatgg @@ -141,8 +141,8 @@ gaggtgagtgtgtaggaggtaagcagggaggaagtgggggggaaggaggtgatggggagg aggtgagaggggaggagctgaggggggaaggaggtaatgaggacaaggtgaggggaaagg aggtaagggaggggaaggggagggaaggagctgagcggggcagagcatgtctcctctctt taagcatcctctgtattctgcagaggccaagcagtctggatgttggccttcctcagtggc -ccatgaaataactttgctccccagattctgagcatgggt ->ref_chr20:59383704-59385336 +ccatgaaataactttgctccccagattctgagcatggg +>ref_chr20:59383704-59385335 ggttttgtgcttccaccaagccaggggactgttctgtggccctggaaccaccgtgaaagc aaacaaccctcggctgtgttgggagacagctccagctcctccatcctcaaatgtgagagc tgatgattggggtggaggaggaggtgagtggggagggggtgggggagaaggaggtgatgg @@ -177,4 +177,4 @@ gaggtgagtgtgtaggaggtaagcagggaggaagtgggggggaaggaggtgatggggagg aggtgagaggggaggagctgagtggggaaggaggtaatgaggacaaggtgaggggaaagg aggtaagggaggggaaggggagggaaggagctgagcggggcagagcatgtctcctctctt taagcatcctctgtattctgcagaggccaagcagtctggatgttggccttcctcagtggc -ccatgaaataactttgctccccagattctgagcatgggt +ccatgaaataactttgctccccagattctgagcatggg diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_4dff1cfc21c902e0e7568be207ccbd40.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_3d73cc7182161a9c5e90b812c17b9c4a.msa similarity index 94% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_4dff1cfc21c902e0e7568be207ccbd40.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_3d73cc7182161a9c5e90b812c17b9c4a.msa index 46a6aa62..55858c55 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_4dff1cfc21c902e0e7568be207ccbd40.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_3d73cc7182161a9c5e90b812c17b9c4a.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:278800-279287 +>syndip_1_chr20:278800-279286 gatgaaacccatctcggtcctcttgcaaagcccaggtgaggggagtgaagggtgaagaaa tgcgtcttccaggtggaggaagcaccacgtgcagt------------------------- ------------------------------------------------------------ @@ -17,8 +17,8 @@ gggagggagggcgggacggagggtgggagggagggagggatggaggga----gggaggga gggacggagggcgggacggagggacggagggagggagggagggacggaggttgggacgga gggacggagggcgggcggatgccgcgggtctcaccgccaaggtgtcctgctgctgcagca gggcccgcacgtgcagggtcgtgctgttctccatctcctggatggtctcgatcagctccc -ggttgagcttgctgaggaagttctcacggc ->syndip_2_chr20:278800-279287 +ggttgagcttgctgaggaagttctcacgg +>syndip_2_chr20:278800-279286 gatgaaacccatctcggtcctcttgcaaagcccaggtgaggggagtgaagggtgaagaaa tgcgtcttccaggtggaggaagcaccacgtgcagtgggcggggaaaggcaggagagggcg cgcgagtgcgcggagggagggcgggacggagggagggagggcgggacggagggagggagg @@ -37,8 +37,8 @@ gggagggagggcgggagggatggagggagggagggcgggacggagggagggcgggaggga gggacggagggcgggacggagggacggagggagggagggagggacggaggttgggacgga gggacggagggcgggcggatgccgcgggtctcaccgccaaggtgtcctgctgctgcagca gggcccgcacgtgcagggtcgtgctgttctccatctcctggatggtctcgatcagctccc -ggttgagcttgctgaggaagttctcacggc ->p:HG002_1_chr20:278800-279287 +ggttgagcttgctgaggaagttctcacgg +>p:HG002_1_chr20:278800-279286 gatgaaacccatctcggtcctcttgcaaagcccaggtgaggggagtgaagggtgaagaaa tgcgtcttccaggtggaggaagcaccacgtgcagtgggcggggaaaggcaggagagggcg cgcgagtgcgcggagggagggcgggacggagggagggagggcgggacggagggagggagg @@ -57,8 +57,8 @@ gggagggagggcgggagggatggagggagggagggcgggacggagggagggcgggaggga gggacggagggcgggacggagggacggagggagggagggagggacggaggttgggacgga gggacggagggcgggcggatgccgcgggtctcaccgccaaggtgtcctgctgctgcagca gggcccgcacgtgcagggtcgtgctgttctccatctcctggatggtctcgatcagctccc -ggttgagcttgctgaggaagttctcacggc ->p:HG002_2_chr20:278800-279287 +ggttgagcttgctgaggaagttctcacgg +>p:HG002_2_chr20:278800-279286 gatgaaacccatctcggtcctcttgcaaagcccaggtgaggggagtgaagggtgaagaaa tgcgtcttccaggtggaggaagcaccacgtgcagt------------------------- ------------------------------------------------------------ @@ -77,8 +77,8 @@ gggagggagggcgggacggagggtgggagggagggagggatggaggga----gggaggga gggacggagggcgggacggagggacggagggagggagggagggacggaggttgggacgga gggacggagggcgggcggatgccgcgggtctcaccgccaaggtgtcctgctgctgcagca gggcccgcacgtgcagggtcgtgctgttctccatctcctggatggtctcgatcagctccc -ggttgagcttgctgaggaagttctcacggc ->ref_chr20:278800-279287 +ggttgagcttgctgaggaagttctcacgg +>ref_chr20:278800-279286 gatgaaacccatctcggtcctcttgcaaagcccaggtgaggggagtgaagggtgaagaaa tgcgtcttccaggtggaggaagcaccacgtgcagt------------------------- ------------------------------------------------------------ @@ -97,4 +97,4 @@ gcgggagggcgggacggag----------------------------------------- gggacggagggcgggacggagggacggagggagggagggagggacggaggttgggacgga gggacggagggcgggcggatgccgcgggtctcaccgccagggtgtcctgctgctgcagca gggcccgcacgtgcagggtcgtgctgttctccatctcctggatggtctcgatcagctccc -ggttgagcttgctgaggaagttctcacggc +ggttgagcttgctgaggaagttctcacgg diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_3570b2394a58b5622e26496d8c6f1a42.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_3f90779dcd8c05402fb060d819c516f1.msa similarity index 96% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_3570b2394a58b5622e26496d8c6f1a42.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_3f90779dcd8c05402fb060d819c516f1.msa index d9ca2b8d..4170dc40 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_3570b2394a58b5622e26496d8c6f1a42.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_3f90779dcd8c05402fb060d819c516f1.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:4032257-4033329 +>syndip_1_chr20:4032257-4033328 cccggccgccaccccgtctgggaagtgaggagcgtctctgcttggccacccatcgtctgg gatatgaggagcccctctgcctggctgcccagtctggaaagt------------------ ----------------------gaggagcgtctctgcccggccgccatcccatctaggaa @@ -24,8 +24,8 @@ gcctctgcccggccacccctactgggaagtgaggagcccctctgcccggccagccgcccc gtccgggagggaggcgggggggggggtcggccagccgccccgtccgggagggaggtgggg gggtcagccccctttccggcc--gccgcctgtccgggaggtgaggggcgcctctgcctgg ccgcccctactgggaagtgaggacccctctgcccggccagccgccccgtccgggagggag -gtgggggg ->syndip_2_chr20:4032257-4033329 +gtggggg +>syndip_2_chr20:4032257-4033328 cccggccgccaccccgtctgggaagtgaggagcgtctctgcttggccacccatcgtctgg gatatgaggagcccctctgcctggctgcccagtctggaaagtgaggagcgtctctgcccg gccgccatcccatctaggaagcgaggagcgtctctgcccggccgccatcccatctaggaa @@ -51,8 +51,8 @@ gcctctgcccggccacccctactgggaagtgaggagcccctctgcccggccagccgcccc gtccgggagggaggcgggggggggggtcggccagccgccccgtccgggagggaggtgggg gggtcagccccctttccggcc--gccgcctgtccgggaggtgaggggcgcctctgcctgg ccgcccctactgggaagtgaggacccctctgcccggccagccgccccgtccgggagggag -gtgggggg ->p:HG002_1_chr20:4032257-4033329 +gtggggg +>p:HG002_1_chr20:4032257-4033328 cccggccgccaccccgtctgggaagtgaggagcgtctctgcttggccacccatcgtctgg gatatgaggagcccctctgcctggctgcccagtctggaaagtgaggagcgtctctgcccg gccgccatcccatctaggaagcgaggagcgtctctgcccggccgccatcccatctaggaa @@ -78,8 +78,8 @@ gcctctgcccggccacccctactgggaagtgaggagcccctctgcccggccagccgcccc gtccgggagggaggcgggggggggggtcggccagccgccccgtccgggagggaggtgggg gggtcagccccccttccggccgggccgcctgtccgggaggtgaggggcgcctctgcctgg ccgcccctactgggaagtgaggacccctctgcccggccagccgccccgtccgggagggag -gtgggggg ->p:HG002_2_chr20:4032257-4033329 +gtggggg +>p:HG002_2_chr20:4032257-4033328 cccggccgccaccccgtctgggaagtgaggagcgtctctgcttggccacccatcgtctgg gatatgaggagcccctctgcctggctgcccagtctggaaagtgaggagcgtctctgcccg gccgccatcccatctaggaagcgaggagcgtctctgcccggccgccatcccatctaggaa @@ -105,8 +105,8 @@ gcctctgcccggccacccctactgggaagtgaggagcccctctgcccggccagccgcccc gtccgggagggaggcgggggggggggtcggccagccgccccgtccgggagggaggtgggg gggtcagccccccttccggccggccgccctgtccgggaggtgaggggcgcctctgcctgg ccgcccctactgggaagtgaggacccctctgcccggccagccgccccgtccgggagggag -gtgggggg ->ref_chr20:4032257-4033329 +gtggggg +>ref_chr20:4032257-4033328 cccggccgccaccccgtctgggaagtgaggagcgtctctgcttggccacccatcgtctgg gatatgaggagcccctctgcctggctgcccagtctggaaagtgaggagcgtctctgcccg gccgccatcccatctaggaagcgaggagcgtctctgcccggccgccatcccatctaggaa @@ -129,7 +129,7 @@ ccagccgccccgtccgggaggggggagggggggtcagccccctgcccggccagccgcccc gtccgggagggaggt-ggggggggggtcagccccctttccggcc-gccg----------- ---------------------------------------cctgtccgggaggtgaggggc gcctctgcctggccgcccctactgggaagtgagga-cccctctgcccggccagccgcccc -gtccgggagggaggtgggggg--------------------------------------- +gtccgggagggaggtggggg---------------------------------------- ------------------------------------------------------------ ------------------------------------------------------------ --------- +------- diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_5c074fb37f70d5e11502be5c009c13e2.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_405fa3f356dead7f68ddb49001c0e603.msa similarity index 95% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_5c074fb37f70d5e11502be5c009c13e2.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_405fa3f356dead7f68ddb49001c0e603.msa index 053f59de..22915bfb 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_5c074fb37f70d5e11502be5c009c13e2.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_405fa3f356dead7f68ddb49001c0e603.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:20295855-20296666 +>syndip_1_chr20:20295855-20296665 gatgcctgtggcaggagctgagggcaagggatgagcagaggagaccaggcccctgcagcc agaggctcagtcctggctttttccaaactcttctccagaggagcccgcccttgcacacca acgcctgcatgcttccctccttccttccttccttccctcctttccttccttctttctttt @@ -18,8 +18,8 @@ tttctttcttttctttcttcctccctccctccttccttccctccttcctgccttccttcc ttcctttctttttcctccctccctccttccttctttactcccttccttccttccttcctc ccctcactcaatttgttcatcaaaaaatctagttccttgccatgccacagcctgagtatt taaaaataacttttcctcattttagaagtaatgatcattttttaaaaattagaagctgca -gatagtcaaataactgtaag ->syndip_2_chr20:20295855-20296666 +gatagtcaaataactgtaa +>syndip_2_chr20:20295855-20296665 gatgcctgtggcaggagctgagggcaagggatgagcagaggagaccaggcccctgcagcc agaggctcagtcctggctttttccaaactcttctccagaggagcccgcccttgcacacca acgcctgcatgcttccctccttccttccttccttccttcctt-ccttccttccttccttc @@ -39,8 +39,8 @@ tttctttcttttctttcttcctccctccctccttccttccctccttcctgccttccttcc ttcctttctttttcctccctccctccttccttctttactcccttccttccttccttcctc ccctcactcaatttgttcatcaaaaaatctagttccttgccatgccacagcctgagtatt taaaaataacttttcctcattttagaagtaatgatcattttttaaaaattagaagctgca -gatagtcaaataactgtaag ->p:HG002_1_chr20:20295855-20296666 +gatagtcaaataactgtaa +>p:HG002_1_chr20:20295855-20296665 gatgcctgtggcaggagctgagggcaagggatgagcagaggagaccaggcccctgcagcc agaggctcagtcctggctttttccaaactcttctccagaggagcccgcccttgcacacca acgcctgcatgcttccctccttccttccttccttccttcctt-ccttccttccttccttc @@ -60,8 +60,8 @@ tttctttcttttctttcttcctccctccctccttccttccctccttcctgccttccttcc ttcctttctttttcctccctccctccttccttctttactcccttccttccttccttcctc ccctcactcaatttgttcatcaaaaaatctagttccttgccatgccacagcctgagtatt taaaaataacttttcctcattttagaagtaatgatcattttttaaaaattagaaactgca -gatagtcaaataactgtaag ->p:HG002_2_chr20:20295855-20296666 +gatagtcaaataactgtaa +>p:HG002_2_chr20:20295855-20296665 gatgcctgtggcaggagctgagggcaagggatgagcagaggagaccaggcccctgcagcc agaggctcagtcctggctttttccaaactcttctccagaggagcccgcccttgcacacca acgcctgcatgcttccctccttccttccttccttccctcctttccttccttctttctttt @@ -81,8 +81,8 @@ tttctttcttttctttcttcctccctccctccttccttccctccttcctgccttccttcc ttcctttctttttcctccctccctccttccttctttactcccttccttccttccttcctc ccctcactcaatttgttcatcaaaaaatctagttccttgccatgccacagcctgagtatt taaaaataacttttcctcattttagaagtaatgatcattttttaaaaattagaagctgca -gatagtcaaataactgtaag ->ref_chr20:20295855-20296666 +gatagtcaaataactgtaa +>ref_chr20:20295855-20296665 gatgcctgtggcaggagctgagggcaagggatgagcagaggagaccaggcccctgcagcc agaggctcagtcctggctttttccaaactcttctccagaggagcccgcccttgcacacca acgcctgcatgcttccctccttccttccttccttccctcctttccttccttctttctttt @@ -102,4 +102,4 @@ tttctttcttttctttcttcctccctccctccttccttccctccttcctgccttccttcc ttcctttctttttcctccctccctccttccttctttactcccttccttccttccttcctc ccctcactcaatttgttcatcaaaaaatctagttccttgccatgccacagcctgagtatt taaaaataacttttcctcattttagaagtaatgatcattttttaaaaattagaagctgca -gatagtcaaataactgtaag +gatagtcaaataactgtaa diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_8d91e35548435d3392527d244ebe8371.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_4248c4ccdcf22249a1ca92c0fca9a551.msa similarity index 98% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_8d91e35548435d3392527d244ebe8371.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_4248c4ccdcf22249a1ca92c0fca9a551.msa index 3696203d..999ea0d2 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_8d91e35548435d3392527d244ebe8371.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_4248c4ccdcf22249a1ca92c0fca9a551.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:63491489-63493165 +>syndip_1_chr20:63491489-63493164 gtgagcatgatgcccccacagtgcacacagagatgcctgcaacatctttggctgcagccc actggctgggggtctcctatcagtgcccttctctagggtctcagatttgtttatggtttc tcttcttggtgggatggatggatggatggacggatggatggggaggtgggtggatggatg @@ -49,8 +49,8 @@ agatagatggatggagaggtggatagatggatgggtggatggatggacagaggatggatg ggtggggaggtgggtggatggatggatggatgtgggatggactgtcccacagaaagtgtg tggtaaggggagagattggagacagcccagtcttgagatgccttgtaggggcccctggtc tagggcaggcagagctggccaggcaggagctccagcacagcgcccttgctcaccgccaat -cttgtacacgtcctgcagcggc ->syndip_2_chr20:63491489-63493165 +cttgtacacgtcctgcagcgg +>syndip_2_chr20:63491489-63493164 gtgagcatgatgcccccacagtgcacacagagatgcctgcaacatctttggctgcagccc actggctgggggtctcctatcagtgcccttctctagggtctcagatttgtttatggtttc tcttcttggtgggatggatggatggatggacggatggatggggaggtgggtggatggatg @@ -101,8 +101,8 @@ agatagatggatggagaggtggatagatggatgggtggatggatggacagaggatggatg ggtggggaggtgggtggatggatggatggatgtgggatggactgtcccacagaaagtgtg tggtaaggggagagattggagacagcccagtcttgagatgccttgtaggggcccctggtc tagggcaggcagagctggccaggcaggagctccagcacagcgcccttgctcaccgccaat -cttgtacacgtcctgcagcggc ->p:HG002_1_chr20:63491489-63493165 +cttgtacacgtcctgcagcgg +>p:HG002_1_chr20:63491489-63493164 gtgagcatgatgcccccacagtgcacacagagatgcctgcaacatctttggctgcagccc actggctgggggtctcctatcagtgcccttctctggggtctcagatttgtttatggtttc tcttcttggtgggatggatggatggatggacggatggatggggaggtgggtggatggatg @@ -153,8 +153,8 @@ agatagatggatggagaggtggatagatggatgggtggatggatggacagaggatggatg ggtggggaggtgggtggatggatggatggatgtgggatggactgtcccacagaaagtgtg tggtaaggggagagattggagacagcccagtcttgagatgccttgtaggggcccctggtc tagggcaggcagagctggccaggcaggagctccagcacagcgcccttgctcaccgccaat -cttgtacacgtcctgcagcggc ->p:HG002_2_chr20:63491489-63493165 +cttgtacacgtcctgcagcgg +>p:HG002_2_chr20:63491489-63493164 gtgagcatgatgcccccacagtgcacacagagatgcctgcaacatctttggctgcagccc actggctgggggtctcctatcagtgcccttctctggggtctcagatttgtttatggtttc tcttcttggtgggatggatggatggatggacggatggatggggaggtgggtggatggatg @@ -205,8 +205,8 @@ agatagatggatggagaggtggatagatggatgggtggatggatggacagaggatggatg ggtggggaggtgggtggatggatggatggatgtgggatggactgtcccacagaaagtgtg tggtaaggggagagattggagacagcccagtcttgagatgccttgtaggggcccctggtc tagggcaggcagagctggccaggcaggagctccagcacagcgcccttgctcaccgccaat -cttgtacacgtcctgcagcggc ->ref_chr20:63491489-63493165 +cttgtacacgtcctgcagcgg +>ref_chr20:63491489-63493164 gtgagcatgatgcccccacagtgcacacagagatgcctgcaacatctttggctgcagccc actggctgggggtctcctatcagtgcccttctctagggtctcagatttgtttatggtttc tcttcttggtgggatggatggatggatggacggatggatggggaggtgggtggatggatg @@ -257,4 +257,4 @@ agatagatggatggagaggtggatagatggatgggtggatggatggacagaggatggatg ggtggggaggtgggtggatggatggatggatgtgggatggactgtcccacagaaagtgtg tggtaaggggagagattggagacagcccagtcttgagatgccttgtaggggcccctggtc tagggcaggcagagctggccaggcaggagctccagcacagcgcccttgctcaccgccaat -cttgtacacgtcctgcagcggc +cttgtacacgtcctgcagcgg diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_ff308abc6d0e72bcf1670b2c17001984.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_452bf94642317f3790592594df0a74e2.msa similarity index 96% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_ff308abc6d0e72bcf1670b2c17001984.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_452bf94642317f3790592594df0a74e2.msa index 41aa4b4e..d9ae6133 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_ff308abc6d0e72bcf1670b2c17001984.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_452bf94642317f3790592594df0a74e2.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:63221380-63221977 +>syndip_1_chr20:63221380-63221976 tcccactgcagagaactgtcatctgcctgtaagtaccagtgccaggtgctctggggccga tgtctccgttagccccaaggtggaggctcagggagcagatgtgagcgtgccttgctcatc gcagtagcttc------------------------------------------------- @@ -16,8 +16,7 @@ gattgcagggactgcagggactgcattgctcttgcagcaccacccgcctccgtggacacc ttcagcagtgagacaatgcaggctctgctacaagaggtgcttactgggctctgctctggt gaggagaggcctcgcctggcagccagacagagtcccagcagggcaagatgaaaattcggg gccctcgttcaaacagcaagagaaggctctttccttccttccagcgcccctccctgcacc -t ->syndip_2_chr20:63221380-63221977 +>syndip_2_chr20:63221380-63221976 tcccactgcagagaactgtcatctgcctgtaagtaccagtgccaggtgctctggggccga tgtctccgttagccccaaggtggaggctcagggagcagatgtgagcgtgccttgctcatc gcagtagcttcggactgcagggattgcagggactgcagggattgcagggattgcagggag @@ -35,8 +34,7 @@ gattgcagggactgcagggcctgcattgctcttgcagcaccacccgcctccgtggacacc ttcagcagtgagacaatgcaggctctgctacaagaggtgcttactgggctctgctctggt gaggagaggcctcgcctggcagccagacagagtcccagcagggcaagatgaaaattcggg gccctcgttcaaacagcaagagaaggctctttccttccttccagcgcccctccctgcacc -t ->p:HG002_1_chr20:63221380-63221977 +>p:HG002_1_chr20:63221380-63221976 tcccactgcagagaactgtcatctgcctgtaagtaccagtgccaggtgctctggggccga tgtctccgttagccccaaggtggaggctcagggagcagatgtgagcgtgccttgctcatc gcagtagcttcggactgcagggattgcagggactgcagggattgcagggattgcagggag @@ -54,8 +52,7 @@ gattgcagggactgcagggcctgcattgctcttgcagcaccacccgcctccgtggccacc ttcagcagtgagacaatgcaggctctgctacaagaggtgcttactgggctctgctctggt gaggagaggcctcgcctggcagccagacagagtcccagcagggcaagatgaaaattcggg gccctcgttcaaacagcaagagaaggctctttccttccttccagcgcccctccctgcacc -t ->p:HG002_2_chr20:63221380-63221977 +>p:HG002_2_chr20:63221380-63221976 tcccactgcagagaactgtcatctgcctgtaagtaccagtgccaggtgctctggggccga tgtctccgttagccccaaggtggaggctcagggagcagatgtgagcgtgccttgctcatc gcagtagcttc------------------------------------------------- @@ -73,8 +70,7 @@ gattgcagggactgcagggactgcattgctcttgcagcaccacccgcctccgtggccacc ttcagcagtgagacaatgcaggctctgctacaagaggtgcttactgggctctgctctggt gaggagaggcctcgcctggcagccagacagagtcccagcagggcaagatgaaaattcggg gccctcgttcaaacagcaagagaaggctctttccttccttccagcgcccctccctgcacc -t ->ref_chr20:63221380-63221977 +>ref_chr20:63221380-63221976 tcccactgcagagaactgtcatctgcctgtaagtaccagtgccaggtgctctggggccga tgtctccgttagccccaaggtggaggctcagggagcagatgtgagcgtgccttgctcatc gcagtagcttc------------------------------------------------- @@ -92,4 +88,3 @@ gattgcagggactgcagggactgcattgctcttgcagcaccacccgcctccgtggacacc ttcagcagtgagacaatgcaggctctgctacaagaggtgcttactgggctctgctctggt gaggagaggcctcgcctggcagccagacagagtcccagcagggcaagatgaaaattcggg gccctcgttcaaacagcaagagaaggctctttccttccttccagcgcccctccctgcacc -t diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_2783f53858aad63825027bab32e6c3bb.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_45f1dc1c7a33a6a5fbb511967c1352d7.msa similarity index 94% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_2783f53858aad63825027bab32e6c3bb.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_45f1dc1c7a33a6a5fbb511967c1352d7.msa index b3aae3d8..867d45f9 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_2783f53858aad63825027bab32e6c3bb.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_45f1dc1c7a33a6a5fbb511967c1352d7.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:8661844-8662220 +>syndip_1_chr20:8661844-8662219 atttaataaatcttcagtgaaaggcaagaggctacacacagtgacagacccagagataaa ccctgtgcccctagagatttgcactcctcttaaaagtgtatatatatataattatataat tatttattatataattatatataattatataattatttattatataattatatataatta @@ -22,8 +22,8 @@ tttattacataagtatatatcacttattatataattgcatataattatttattatataat tgcatataa----ttattatataattgtatatagttatttattatataattg-------- -----------------tatatagttatttattatataattctatata-------attat ttattatctaattctatattaaatataattatttattatctaattctatattatatataa -ttatttattatataattctatattatatataattat ->syndip_2_chr20:8661844-8662220 +ttatttattatataattctatattatatataatta +>syndip_2_chr20:8661844-8662219 atttaataaatcttcagtgaaaggcaagaggctacacacagtgacagacccagagataaa ccctgtgcccctagagatttgcactcctcttaaaagtgtatatatatataattatataat tatttattatataattatatataattatat------------------------------ @@ -47,8 +47,8 @@ tttattacataagtatatatcacttattatataattgtatataattatttattatataat tgcatataattatttattatataattgtatatagttatttattatataattctatatagt tatttattatataattctatataattatttattatctaattctatattatatataattat ttattatctaattctatattaaatataattatttattatctaattctatattatatataa -ttatttattatataattctatattatatataattat ->p:HG002_1_chr20:8661844-8662220 +ttatttattatataattctatattatatataatta +>p:HG002_1_chr20:8661844-8662219 atttaataaatcttcagtgaaaggcaagaggctacacacagtgacagacccagagataaa ccctgtgcccctagagatttgcactcctcttaaaagtgtatatatatataattatataat tatttattatataattatatataattatat------------------------------ @@ -72,8 +72,8 @@ tttattacataagtatatatcacttattatataattgtatataattatttattatataat tgcatataattatttattatataattgtatatagttatttattatataattctatatagt tatatttattatacagttatataataaataattatataattatatataattatatattat ttattatctaattctatattaaatataattatttattatctaattctatattatatataa -ttatttattatataattctatattatatataattat ->p:HG002_2_chr20:8661844-8662220 +ttatttattatataattctatattatatataatta +>p:HG002_2_chr20:8661844-8662219 atttaataaatcttcagtgaaaggcaagaggctacacacagtgacagacccagagataaa ccctgtgcccctagagatttgcactcctcttaaaagtgtatatatatataattatataat tatttattatataattatatataattatataattatttattatataattatatataa-ta @@ -97,8 +97,8 @@ tttattacataagtatatatcacttattatataattgcatataattatttattatataat tgcatataa----ttattatatatttat-----tatatttattatacagt---------- -----------------tatataataaataattatataattatatataattatatattat ttattatctaattctatattaaatataattatttattatctaattctatattatatataa -ttatttattatataattctatattatatataattat ->ref_chr20:8661844-8662220 +ttatttattatataattctatattatatataatta +>ref_chr20:8661844-8662219 atttaataaatcttcagtgaaaggcaagaggctacacacagtgacagacccagagataaa ccctgtgcccctagagatttgcactcctcttaaaagtgtatatatatataattatataat tatttattatataattatatataattatat------------------------------ @@ -106,7 +106,7 @@ tatttattatataattatatataattatat------------------------------ taattatatatttattatatttat-----tatacagttatataataaataattatataat tatatataattatat-attatttattatctaattctat---------------------- -atta-aatataattatttattatctaattctatatta-tatataattatttattatata -attctatatta-tatataattat------------------------------------- +attctatatta-tatataatta-------------------------------------- ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ @@ -122,4 +122,4 @@ attctatatta-tatataattat------------------------------------- ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------- +----------------------------------- diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_4857d0eafa15c85843680e3bfaf22f43.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_4857d0eafa15c85843680e3bfaf22f43.msa deleted file mode 100644 index ca25e333..00000000 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_4857d0eafa15c85843680e3bfaf22f43.msa +++ /dev/null @@ -1,165 +0,0 @@ ->syndip_1_chr20:60314230-60315121 -acttgtttcctcatttattcattaacaatattcatttcttagccaaggatggtggtgcac -acctgtacttccagctactcaggaggctgaggtggaaggattgaaaatccatccatctgt -cctcctacccatcctgctatccatccatccatccatcctcccataatctatcttcccatc -catttgtccttccttccttccttccttccttccttccttccttccttccttccttccttc -cacccatccatcctcccattcatcctcccatgcatcctcccatccatccacccaaccatc -ctcccatccatcctcacatccatcttcgcatccatccatccatccatctatccatccatc -catccatcctcccatccatcctcccatccattatcccattcatccatcctcccatccatc -ctcctatccatccatccatccatccaccctcccatccatccatcctcccatccatccact -cttccatccatccatccacccaccctt----------------ccatccatccatcctcc -catccatccatcctcccattcatctatccatccatccctcgatcctcccatccatccatc -ctcccatccatccatccatcctcccatccatccatccaccctcccatccatccatcctcc -catccatctaccctcccatccatccatcctcccacccaccctcccatccatccatcctcc -catcctcccatccatccatccatcctcccatccgtccatccatccttccatccac----- ------------cctcccatccatccatcctcccatccatctaccctcccatccatccatc -ctcccacccaccctcccatccatccaccctcccatccatccatcctcccatccatctatc -catccatcctcccatcctcccatccatccatcatccatccatccatccatccatcctccc -ataatctatcttcccatccatttgtccttccttccttccttccttccttccttccttcgt -tccacccatccatcctcccattcatcctcccatgcatcctcccatccatccacccaacca -tcctcccatccatcctcacatccatcttcgcatccatccatccatccatccatctatcca -t--------------------ccatcctcccatccatcctcccatcctcccatccattct -cccattcatccatcctcccatccatcctcctatccatccatccatccatccaccctccca -tccatccatcctcccatccatccacccttccatccatccatccacccacccttccatcca -tccaccctcccatccatccatcctcccattcatctatccatccatccctcgatcctccca -tccatccatccatcctcccatccgtccatccatccttccatccaccctcccatccatcca -tcctcccatccatctaccctcccatccatccatcctcccacccaccctcccatccatcca -ccctcccatccatccatcctcccatccatctatccatccatcctcccatcctcccatcct -cccatccatccatccatccatccatccatcctcccatcctcccatcctcccatccatcgt -cccatgtgtacatctgtcctgccatctatccatacatccatttctccgtcctcccatcca -tcttcccatccattcatccatccatcttcccatccattcatccatccatcctgccatcca -tccttccattgatccatccatccatcctgcaggagaccaggagttcaaggctgccgtgag -ttataattgtgccactgcactacagcttaagtgacagagagagaaaccatctccaaaaat -gaaaaaaagaatatattgatttcaggccagttacataga ->syndip_2_chr20:60314230-60315121 -acttgtttcctcatttattcattaacaatattcatttcttagccaaggatggtggtgcac -acctgtacttccagctactcaggaggctgaggtggaaggattgaaaatccatccatctgt -cgtcctacccatcctgctatccatccatccatccatcctcccataatctatcttcccatc -catttg--------------------tccttccttccttccttccttccttccttccttc -cacccatccatcctcccatgcatcctcccatgcatcctcccatccatccacccaaccatc -ctcccatccatcctcacatccatcttcgcatccatccatccatccatctatccatccatc -catccatcctcccatccatcctcccatccattatcccattcatccatcctcccatccatc -ctcctatccatccatccatccatccaccctctcatccatccatcctcccatccatccacc -cttccatccatccatccacccacccttccatccatccatcctcccatccatccatcctcc -catccatccatcctcccattcatctatccatccatccctcgatcctcccatccatccatc -ctcccatccatccatccatcctcccatccatccatccaccctcccatccatccatcctcc -catccatctaccctcccatccatccatcctcccatccaccctcccatccatccatccatc -catccaccctctcatccatccatcctcccatccgtccatccatccttccatccaccctcc -catccatccatcctcccatccatccatcctcccatccatctaccctcccatccatccatc -ctcccacccaccctcccatccatccaccctcccatccatccatcctcccatccatctatc -catccatcctcccatcctcccatccatccatcatccatccatccatccatccatcctccc -ataatctatcttcccatccatttgtccttccttccttccttccttccttcc--------t -tccacccatccatcctcccattcatcctcccatgcatcctcccatccatccacccaacca -tcctcccatccatcctcacatccatcttcgcatccatccatccatccatccatctatcca -tccatcctcccatccatcctcccatcctcccatccatcctcccatcctcccatccattct -cccattcatccatcctcccatccatcctcctatccatccatccatccatccaccctccca -tccatccatcctcccatccatccacccttccatccatccatccacccacccttccatcca -tccaccctcccatccatccatcctcccattcatctatccatccatccctcgatcctccca -tccatccatccatcctcccatccgtccatccatccttccatccaccctcccatccatcca -tcctcccatccatctaccctcccatccatccatcctcccacccaccctcccatccatcca -ccctcccatccatccatcctcccatccatctatccatccatcctcccatcctcccatcct -c----ccatccatccatccatccatccatcctcccatcctcccatcctcccatccatcgt -cccatgtgtacatctgtcctgccatctatccatacatccatttctccgtcctcccatcca -tcttcccatccattcatccatccatcttcccatccattcatccatccatcctgccatcca -tccttccattgatccatccatccatcctgcaggagaccaggagttcaaggctgccgtgag -ttataattgtgccactgcactacagcttaagtgacagagagagaaaccatctccaaaaat -gaaaaaaagaatatattgatttcaggccagttacataga ->p:HG002_1_chr20:60314230-60315121 -acttgtttcctcatttattcattaacaatattcatttcttagccaaggatggtggtgcac -acctgtacttccagctactcaggaggctgaggtggaaggattgaaaatccatccatctgt -cgtcctacccatcctgctatccatccatccatccatcctcccataatctatcttcccatc -catttg--------------------tccttccttccttccttccttccttccttccttc -cacccatccatcctcccatgcatcctcccatgcatcctcccatccatccacccaaccatc -ctcccatccatcctcacatccatcttcgcatccatccatccatccatctatccatccatc -catccatcctcccatccatcctcccatccattatcccattcatccatcctcccatccatc -ctcctatccatccatccatccatccaccctctcatccatccatcctcccatccatccacc -cttccatccatccatccacccacccttccatccatccatcctcccatccatccatcctcc -catccatccatcctcccattcatctatccatccatccctcgatcctcccatccatccatc -ctcccatccatccatccatcctcccatccatccatccaccctcccatccatccatcctcc -catccatctaccctcccatccatccatcctcccatccaccctcccatccatccatccatc -catccaccctctcatccatccatcctcccatccgtccatccatccttccatccaccctcc -catccatccatcctcccatccatccatcctcccatccatctaccctcccatccatccatc -ctcccacccaccctcccatccatccaccctcccatccatccatcctcccatccatctatc -catccatcctcccatcctcccatccatccatcatccatccatccatccatccatcctccc -ataatctatcttcccatccatttgtccttccttccttccttccttccttcc--------t -tccacccatccatcctcccattcatcctcccatgcatcctcccatccatccacccaacca -tcctcccatccatcctcacatccatcttcgcatccatccatccatccatccatctatcca -tccatcctcccatccatcctcccatcctcccatccatcctcccatcctcccatccattct -cccattcatccatcctcccatccatcctcctatccatccatccatccatccaccctccca -tccatccatcctcccatccatccacccttccatccatccatccacccacccttccatcca -tccaccctcccatccatccatcctcccattcatctatccatccatccctcgatcctccca -tccatccatccatcctcccatccgtccatccatccttccatccaccctcccatccatcca -tcctcccatccatctaccctcccatccatccatcctcccacccaccctcccatccatcca -ccctcccatccatccatcctcccatccatctatccatccatcctcccatcctcccatcct -c----ccatccatccatccatccatccatcctcccatcctcccatcctcccatccatcgt -cccatgtgtacatctgtcctgccatctatccatacatccatttctccgtcctcccatcca -tcttcccatccattcatccatccatcttcccatccattcatccatccatcctgccatcca -tccttccattgatccatccatccatcctgcaggagaccaggagttcaaggctgccgtgag -ttataattgtgccactgcactacagcttaagtgacagagagagaaaccatctccaaaaat -gaaaaaaagaatatattgatttcaggccagttacataga ->p:HG002_2_chr20:60314230-60315121 -acttgtttcctcatttattcattaacaatattcatttcttagccaaggatggtggtgcac -acctgtacttccagctactcaggaggctgaggtggaaggattgaaaatccatccatctgt -cctcctacccatcctgctatccatccatccatccatcctcccataatctatcttcccatc -catttgtccttccttccttccttccttccttccttccttccttccttccttccttccttc -cacccatccatcctcccattcatcctcccatgcatcctcccatccatccacccaaccatc -ctcccatccatcctcacatccatcttcgcatccatccatccatccatctatccatccatc -catccatcctcccatccatcctcccatccattatcccattcatccatcctcccatccatc -ctcctatccatccatccatccatccaccctcccatccatccatcctcccatccatccact -cttccatccatccatccacccaccctt----------------ccatccatccatcctcc -catccatccatcctcccattcatctatccatccatccctcgatcctcccatccatccatc -ctcccatccatccatccatcctcccatccatccatccaccctcccatccatccatcctcc -catccatctaccctcccatccatccatcctcccacccaccctcccatccatccatcctcc -catcctcccatccatccatccatcctcccatccgtccatccatccttccatccac----- ------------cctcccatccatccatcctcccatccatctaccctcccatccatccatc -ctcccacccaccctcccatccatccaccctcccatccatccatcctcccatccatctatc -catccatcctcccatcctcccatccatccatcatccatccatccatccatccatcctccc -ataatctatcttcccatccatttgtccttccttccttccttccttccttccttccttcgt -tccacccatccatcctcccattcatcctcccatgcatcctcccatccatccacccaacca -tcctcccatccatcctcacatccatcttcgcatccatccatccatccatccatctatcca -t--------------------ccatcctcccatccatcctcccatcctcccatccattct -cccattcatccatcctcccatccatcctcctatccatccatccatccatccaccctccca -tccatccatcctcccatccatccacccttccatccatccatccacccacccttccatcca -tccaccctcccatccatccatcctcccattcatctatccatccatccctcgatcctccca -tccatccatccatcctcccatccgtccatccatccttccatccaccctcccatccatcca -tcctcccatccatctaccctcccatccatccatcctcccacccaccctcccatccatcca -ccctcccatccatccatcctcccatccatctatccatccatcctcccatcctcccatcct -cccatccatccatccatccatccatccatcctcccatcctcccatcctcccatccatcgt -cccatgtgtacatctgtcctgccatctatccatacatccatttctccgtcctcccatcca -tcttcccatccattcatccatccatcttcccatccattcatccatccatcctgccatcca -tccttccattgatccatccatccatcctgcaggagaccaggagttcaaggctgccgtgag -ttataattgtgccactgcactacagcttaagtgacagagagagaaaccatctccaaaaat -gaaaaaaagaatatattgatttcaggccagttacataga ->ref_chr20:60314230-60315121 -acttgtttcctcatttattcattaacaatattcatttcttagccaaggatggtggtgcac -acctgtacttccagctactcaggaggctgaggtggaaggattgaaa-------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- --atccatccatctgtcctcctacccatc---ctgctatccatccatccatccatcctccc -ataatctatcttcccatccatttgtccttccttccttccttccttccttcc--------t -tct---------------------tccttccttccttccttccttccttccacccatcca -tcctcccattcatcctcccatgcatcctc----ccatccatccacccaaccatcctccca -tccatcctcacatcgatcttcgcatccatccatcgatcgatctatccatccatccattct -cccattcatccatcctcccatccatcctcctatccatccatccatccatccaccctccca -tccatccatcctcccatccatccacccttccatccatccatccacccacccttccatcca -tccaccctcccatccatccatcctcccat------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- --------------------------ccatctatccatccatcctcccatcctcccatcct -cccatccatccatccatccatccatccatcctcccatcctcccatcctcccatccatcgt -cccatgtgtacatctgtcctgccatctatccatacatccatttctccgtcctcccatcca -tcttcccatccattcatccatccatcttcccatccattcatccatccatcctgccatcca -tccttccattgatccatccatccatcctgcaggagaccaggagttcaaggctgccgtgag -ttataattgtgccactgcactacagcttaagtgacagagagagaaaccatctccaaaaat -gaaaaaaagaatatattgatttcaggccagttacataga diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_69e3cd763dbc4a7d135dbecb492d8b86.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_499b9b66e7735f3d7b3af193f2b77110.msa similarity index 99% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_69e3cd763dbc4a7d135dbecb492d8b86.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_499b9b66e7735f3d7b3af193f2b77110.msa index dbd71055..c3fc335d 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_69e3cd763dbc4a7d135dbecb492d8b86.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_499b9b66e7735f3d7b3af193f2b77110.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:64131813-64133957 +>syndip_1_chr20:64131813-64133956 tcccagacttctcctggcctctctcatgtcctatgagaccccctcactgtccaccctcat gtcctgtagagaccccctcactgtccactctcatggcctatagagaccccctcactgtca actctcatgtcctatg-agaccccctcactgtccactctcatggcctgtagagaccccct @@ -79,8 +79,8 @@ ctcactgtccaccctcatgtcctatagagaccccctcactgtccaccctcatggcctgta gagaccccctcactgtccaccctcatggcctgtagagaccccctcactgtccaccctcat ggcctgtagagaccccctcactgtccaccctcatggcctgtagagaccccctcactgtcc acccatatggcctgtagagaccccctcactgtccaccctcaggaccctctgcatttgctc -cttgggttcccgg ->syndip_2_chr20:64131813-64133957 +cttgggttcccg +>syndip_2_chr20:64131813-64133956 tcccagacttctcctggcctctctcatgtcctatgagaccccctcactgtccaccctcat gtcctgtagagaccccctcactgtccactctcatggcctatagagaccccctcactgtcc accctcatggcctgtagagaccccctcactgtccaccctcatgtcctatagagaccccct @@ -161,8 +161,8 @@ ctcactgtccaccctcatgtcctatagagaccccctcactgtccaccctcatgtcctata gagaccccctcactgtccaccctcatgtcctatagagaccccctcactgtccaccctcgt gtcctatagagaccccctcactgtccacccatatggcctgtagagaccccctcactgtcc acccatatggcctgtagagaccccctcactgtccaccctcaggaccctctgcatttgctc -cttgggttcccgg ->p:HG002_1_chr20:64131813-64133957 +cttgggttcccg +>p:HG002_1_chr20:64131813-64133956 tcccagacttctcctggcctctctcatgtcctatgagaccccctcactgtccaccctcat gtcctgtagagaccccctcactgtccactctcatggcctatagagaccccctcactgtcc accctcatggcctgtagagaccccctcactgtccaccctcatgtcctatagagaccccct @@ -243,8 +243,8 @@ ctcactgtccaccctcatgtcctatagagaccccctcactgtccaccctcatgtcctata gagaccccctcactgtccaccctcatgtcctatagagaccccctcactgtccaccctcgt ggcctgtagagaccccctcactgtccaccctcatggcctgtagagaccccctcactgtcc acccatatggcctgtagagaccccctcactgtccaccctcaggaccctctgcatttgctc -cttgggttcccgg ->p:HG002_2_chr20:64131813-64133957 +cttgggttcccg +>p:HG002_2_chr20:64131813-64133956 tcccagacttctcctggcctctctcatgtcctatgagaccccctcactgtccaccctcat gtcctgtagagaccccctcactgtccactctcatggcctatagagaccccctcactgtcc accctcatggcctgta-------------------------------------------- @@ -325,8 +325,8 @@ ctcactgtccaccctcatgtcctat-gagaccccctcactgtccaccctcatgtcctata gagaccccctcactgtccaccctcatgtcctgtagagaccccctcactgtccaccctcat ggcctgtagagaccccctcactgtccaccctcatggcctgtagagaccccctcactgtcc acccatatggcctgtagagaccccctcactgtccaccctcaggaccctctgcatttgctc -cttgggttcccgg ->ref_chr20:64131813-64133957 +cttgggttcccg +>ref_chr20:64131813-64133956 tcccagacttctcctggcctctctcatgtcctatgagaccccctcactgtccaccctcat gtcctgtagagaccccctcactgtccactctcatggcctatagagaccccctcactgtcc accctcatggcctgtagagaccccctcactgtccaccctcatgtcctatagagaccccct @@ -407,4 +407,4 @@ ctca-------------------------------------------------------- -----------------------------------------------------------t ggcctgtagagaccccctcactgtccaccctcatggcctgtagagaccccctcactgtcc acccatatggcctgtagagaccccctcactgtccaccctcaggaccctctgcatttgctc -cttgggttcccgg +cttgggttcccg diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_686d29fdc617f6ce5cb3c1f76ee016c0.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_4cdf28703764613af970b6a20a9d76f8.msa similarity index 96% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_686d29fdc617f6ce5cb3c1f76ee016c0.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_4cdf28703764613af970b6a20a9d76f8.msa index 61525439..7d784ca0 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_686d29fdc617f6ce5cb3c1f76ee016c0.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_4cdf28703764613af970b6a20a9d76f8.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:61201722-61202343 +>syndip_1_chr20:61201722-61202342 caggaccctctgcggatgtcgcctccatcctcatcaggaccctccatggtgtcacctcca tcctcactcaggacactccatggtgtcacctccatccttactcaggaccctccatggtgt caccgccatcctcactcaggaccctccatgagtgccacctccatcctcactcaggatccc @@ -24,8 +24,8 @@ accctccatgagtgtcacctccatc----ctcaggaccctccatg--------------- ctccatcctcactcaggaccctccatggtgtcacctccatcctcactcaggaccctccat g-gtgtcacctccatcctcactcaggaccctccatgggtgtcacctccatcctcactcag gaccctccatggtgtcacctccatcctcactcaggaccctccatggtgtcacctccatcc -tcactcaggaccctccat ->syndip_2_chr20:61201722-61202343 +tcactcaggaccctcca +>syndip_2_chr20:61201722-61202342 caggaccctctgcggatgtcgcctccatcctcatcaggaccctccatggtgtcacctcca tcctcactcaggacactccatggtgtcacctccatccttactcaggaccctccatggtgt caccgccatcctcactcaggaccctccatgagtgccacctccatcctcac-caggatccc @@ -51,8 +51,8 @@ tcactcaggaccctccatggtgtcacctccatcctcactcaggacactccatggtgtcac ctccatccttactcaggaccctccatggtgtcaccgccatcctcactcaggaccctccat gagtgccacctccatcctcac-caggatcccctgtaagtgtcacctccatcctcactcag gaccctccatggtgtcacctccatcctcactcaggaccctccatggtgtcacctccatcc -tcactcaggaccctccat ->p:HG002_1_chr20:61201722-61202343 +tcactcaggaccctcca +>p:HG002_1_chr20:61201722-61202342 caggaccctctgcggatgtcgcctccatcctcatcaggaccctccatggtgtcacctcca tcctcactcaggacactccatggtgtcacctccatccttactcaggaccctccatggtgt caccgccatcctcactcaggaccctccatgagtgccacctccatcctcac-caggatccc @@ -78,8 +78,8 @@ tcactcaggaccctccatggtgtcacctccatcctcactcaggacactccatagtgtcac ctccatcctcactcaggaccctccatggtgtcacctccatcctcactcaggaccctccat g-gtgtcacctccatcctcactcaggaccctccatgggtgtcacctccatcctcactcag gaccctccatggtgtcacctccatcctcactcaggaccctccatggtgtcacctccatcc -tcactcaggaccctccat ->p:HG002_2_chr20:61201722-61202343 +tcactcaggaccctcca +>p:HG002_2_chr20:61201722-61202342 caggaccctctgcggatgtcgcctccatcctcatcaggaccctccatggtgtcacctcca tcctcactcaggacactccatggtgtcacctccatccttactcaggaccctccatggtgt caccgccatcctcactcaggaccctccatgagtgccacctccatcctcac-caggatccc @@ -105,8 +105,8 @@ accctccatgagtgtcacctccatc----ctcaggaccctccatg--------------- ctccatcctcactcaggaccctccatggtgtcacctccatcctcactcaggaccctccat g-gtgtcacctccatcctcactcaggaccctccatgggtgtcacctccatcctcactcag gaccctccatggtgtcacctccatcctcactcaggaccctccatggtgtcacctccatcc -tcactcaggaccctccat ->ref_chr20:61201722-61202343 +tcactcaggaccctcca +>ref_chr20:61201722-61202342 caggaccctctgcggatgtcgcctccatcctcatcaggaccctccatg------------ ------------------------------------------------------------ ------------------------------------------------------------ @@ -132,4 +132,4 @@ accctccatgagtgtcacctccatc----ctcaggaccctccatg--------------- ctccatcctcactcaggaccctccatggtgtcacctccatcctcactcaggaccctccat g-gtgtcacctccatcctcactcaggaccctccatgggtgtcacctccatcctcactcag gaccctccatggtgtcacctccatcctcactcaggaccctccatggtgtcacctccatcc -tcactcaggaccctccat +tcactcaggaccctcca diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_9f052ef8e75238e8dd45906b8a291987.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_4eee85d5b4dd5f6e65e1aa2a72f439df.msa similarity index 97% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_9f052ef8e75238e8dd45906b8a291987.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_4eee85d5b4dd5f6e65e1aa2a72f439df.msa index a00f7abb..25be5099 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_9f052ef8e75238e8dd45906b8a291987.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_4eee85d5b4dd5f6e65e1aa2a72f439df.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:63948414-63948859 +>syndip_1_chr20:63948414-63948858 acagaacacaggtgcggacagggaacggcatcctcagagcatggtcaggggagaaaagca agctggaaggagagatgattccatctgtataaaatccaggaaacgaaggacgctcaccgc catgcgcctcggtgtgaggagggaggcgtgtgtgagagggaggggcgtgtgtgagaggga @@ -38,8 +38,8 @@ agagggagggggcgtgtgt--gagggaggggcgtgtgtgagaggga-ggggggcgtgtgt gagagggagggggcgtgtgtgagagggagggggtgtgtgtgagaccgagggggcgtgtaa gggagggggtgtgtgtgagagggaagcgtgtgcacacaccagccctaaccatgatcactc tctggccaccaagatttggaacagcaacttctgtcacatttgaatgtttttaaagcaggt -gccactttgttgccataaaccaaaccgtagtctgt ->syndip_2_chr20:63948414-63948859 +gccactttgttgccataaaccaaaccgtagtctg +>syndip_2_chr20:63948414-63948858 acagaacacaggtgcggacagggaacggcatcctcagagcatggtcaggggagaaaagca agctggaaggagagatgattccatctgtataaaatccaggaaacgaaggacgctcaccgc catgcgcctcggtgtgaggagggaggcgtgtgtgagagggaggggcgtgtgtgagaggga @@ -79,8 +79,8 @@ agagggaaggggcgtgtga--gagggaggggcgtgtgtgagaggga-ggggcgtgtgtga ga---gggaggggcgtgtgtgagagggagggggtgtgtgtgagaccgagggggcgtgtaa gggagggggtgtgtgtgagagggaagcgtgtgcacacaccagccctaaccatgatcactc tctggccaccaagatttggaacagcaacttctgtcacatttgaatgtttttaaagcaggt -gccactttgttgccataaaccaaaccgtagtctgt ->p:HG002_1_chr20:63948414-63948859 +gccactttgttgccataaaccaaaccgtagtctg +>p:HG002_1_chr20:63948414-63948858 acagaacacaggtgcggacagggaacggcatcctcagagcatggtcaggggagaaaagca agctggaaggagagatgattccatctgtataaaatccaggaaacgaaggacgctcaccgc catgcgcctcggtgtgaggagggaggcgtgtgtgagagggaggggcgtgtgtgagaggga @@ -120,8 +120,8 @@ agagggaaggggcgtgtga--gagggaggggcgtgtgtgagaggga-ggggcgtgtgtga ga---gggaggggcgtgtgtgagagggagggggtgtgtgtgagaccgagggggcgtgtaa gggagggggtgtgtgtgagagggaagcgtgtgcacacaccagccctaaccatgatcactc tctggccaccaagatttggaacagcaacttctgtcacatttgaatgtttttaaagcaggt -gccactttgttgccataaaccaaaccgtagtctgt ->p:HG002_2_chr20:63948414-63948859 +gccactttgttgccataaaccaaaccgtagtctg +>p:HG002_2_chr20:63948414-63948858 acagaacacaggtgcggacagggaacggcatcctcagagcatggtcaggggagaaaagca agctggaaggagagatgattccatctgtataaaatccaggaaacgaaggacgctcaccgc catgcgcctcggtgtgaggagggaggcgtgtgtgagagggaggggcgtgtgtgagaggga @@ -161,8 +161,8 @@ agagggagggggcgtgtgt--gagggaggggcgtgtgtgagaggga-ggggggcgtgtgt gagagggagggggcgtgtgtgagagggagggggtgtgtgtgagaccgagggggcgtgtaa gggagggggtgtgtgtgagagggaagcgtgtgcacacaccagccctaaccatgatcactc tctggccaccaagatttggaacagcaacttctgtcacatttgaatgtttttaaagcaggt -gccactttgttgccataaaccaaaccgtagtctgt ->ref_chr20:63948414-63948859 +gccactttgttgccataaaccaaaccgtagtctg +>ref_chr20:63948414-63948858 acagaacacaggtgcggacagggaacggcatcctcagagcatggtcaggggagaaaagca agctggaaggagagatgattccatctgtataaaatccaggaaacgaaggacgctcaccgc catgcgcctcg------------------------------------------------- @@ -202,4 +202,4 @@ agagggagggggcgtgtgtgagagggaggggcgtgtgtgagagggaaggggcgtgtgtga ga---gggaggggcgtgtgtgagagggagggggtgtgtgtgagaccgagggggcgtgtaa gggagggggtgtgtgtgagagggaagcgtgtgcacacaccagccctaaccatgatcactc tctggccaccaagatttggaacagcaacttctgtcacatttgaatgtttttaaagcaggt -gccactttgttgccataaaccaaaccgtagtctgt +gccactttgttgccataaaccaaaccgtagtctg diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_69a6cc9af825b6514565ccdd59336cd0.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_50a9c4a3245e4f0c0cab0597a64af000.msa similarity index 96% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_69a6cc9af825b6514565ccdd59336cd0.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_50a9c4a3245e4f0c0cab0597a64af000.msa index 088b90c7..b4fced49 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_69a6cc9af825b6514565ccdd59336cd0.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_50a9c4a3245e4f0c0cab0597a64af000.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:62875072-62875657 +>syndip_1_chr20:62875072-62875656 aactttcctggcacactggagaggacggcagaacgctgggatggatttatcgtgcaagac cagcttgatgtccttagggcttggaggtcaccccttcacgaggattgtgaggagtgaatt catcggggagccccgcaccctcacctgcaccctcacccccacccccacccccacccccac @@ -18,8 +18,8 @@ ccgcacccccaccctcacccccaccctcacccccaccctcacccccacccccaccctcac ccgcaccctcactcgcaccgcaccctcactggcaccctcacccacaccgcacccacaccc acatcggggaagagctccatgatcattcttttctgtaggcgaaaaactgcggcgggaact gctgccatgcagctgggctcccaatgcagtggggatggtggcgctctgggctggcgggag -ctatcctgcc ->syndip_2_chr20:62875072-62875657 +ctatcctgc +>syndip_2_chr20:62875072-62875656 aactttcctggcacactggagaggacggcagaacgctgggatggatttatcgtgcaagac cagcttgatgtccttagggcttggaggtcaccccttcacgaggattgtgaggagtgaatt catcggggagccccgcaccctcacctgcaccctcacccccacccccacccccacccccac @@ -39,8 +39,8 @@ ccgcacccccaccctcacccccaccctcacccccaccctcacccccacccccaccctcac ccgcaccctcactcgcaccgcaccctcactggcaccctcacccacaccgcacccacaccc acatcggggaagagctccatgatcattcttttctgtaggcgaaaaactgcggcgggaact gctgccatgcagctgggctcccaatgcagtggggatggtggcgctctgggctggcgggag -ctatcctgcc ->p:HG002_1_chr20:62875072-62875657 +ctatcctgc +>p:HG002_1_chr20:62875072-62875656 aactttcctggcacactggagaggacggcagaacgctgggatggatttatcgtgcaagac cagcttgatgtccttagggcttggaggtcaccccttcacgaggattgtgaggagtgaatt catcggggagccccgcaccctcacctgcaccctcacccccacccccacccccacccccac @@ -60,8 +60,8 @@ ccgcacccccaccctcacccccaccctcacccccaccctcacccccacccccaccctcac ccgcaccctcactcgcaccgcaccctcactggcaccctcacccacaccgcacccacaccc acatcggggaagagctccatgatcattcttttctgtaggcgaaaaactgcggcgggaact gctgccatgcagctgggctcccaatgcagtggggatggtggcgctctgggctggcgggag -ctatcctgcc ->p:HG002_2_chr20:62875072-62875657 +ctatcctgc +>p:HG002_2_chr20:62875072-62875656 aactttcctggcacactggagaggacggcagaacgctggaatggatttatcgtgcaagac cagcttgatgtccttagggcttggaggtcaccccttcacgaggattgtgaggagtgaatt catcggggagccccgcaccctcacctgcaccctcacccccacccccacccccacccccac @@ -81,8 +81,8 @@ ccgcacccccaccctcacccccaccctcacccccaccctcacccccacccccaccctcac ccgcaccctcactcgcaccgcaccctcactggcaccctcacccacaccgcacccacaccc acatcggggaagagctccatgatcattcttttctgtaggcgaaaaactgcggcgggaact gctgccatgcagctgggctcccaatgcagtggggatggtggcgctctgggctggcgggag -ctatcctgcc ->ref_chr20:62875072-62875657 +ctatcctgc +>ref_chr20:62875072-62875656 aactttcctggcacactggagaggacggcagaacgctgggatggatttatcgtgcaagac cagcttgatgtccttagggcttggaggtcaccccttcacgaggattgtgaggagtgaatt catcggggagc------------------------------------------------- @@ -102,4 +102,4 @@ ccgcacccccaccctcacccccaccctcacccccaccctcacccccacccccaccctcac ccgcaccctcactcgcaccgcaccctcactggcaccctcacccacaccgcacccacaccc acatcggggaagagctccatgatcattcttttctgtaggcgaaaaactgcggcgggaact gctgccatgcagctgggctcccaatgcagtggggatggtggcgctctgggctggcgggag -ctatcctgcc +ctatcctgc diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_bd5862045dd8751a6c83a3c3d6321d88.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_513fe7d43fe32248d0e906427411d74f.msa similarity index 90% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_bd5862045dd8751a6c83a3c3d6321d88.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_513fe7d43fe32248d0e906427411d74f.msa index 4d33da27..6406e721 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_bd5862045dd8751a6c83a3c3d6321d88.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_513fe7d43fe32248d0e906427411d74f.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:63371965-63372792 +>syndip_1_chr20:63371965-63372791 ggtcaggtgaagactctattcctggctcaggaaaaccaccaggacaggtgagcccagcag catctcgtggagggaggctctgagtcccgtgaggacaaggacggaacaccggggctgcca caggtcgcagagaccctgtgtcaacacccttgttttgaggtgcagagaccaaagttgaga @@ -12,8 +12,8 @@ attaaacgcccagggccactggcctgtccccagccccattaaacgcccagggccactgtc ctgtccccagccccattaaacgcccagggccactggcctgtccccagccccattaaatgc ccagggccactgtcctgtccccagtctcattaaacgcagcaaacgtcttctggacgtgcc gccagtcccacagaaaaccaaagcaaggcagactcaccgctcctgcatcccggagctcag -gtccaggagggtctcgggaaaggatgggggcaactggagtaccatcct ->syndip_2_chr20:63371965-63372792 +gtccaggagggtctcgggaaaggatgggggcaactggagtaccatcc +>syndip_2_chr20:63371965-63372791 ggtcaggtgaagactctattcctggctcaggaaaaccaccaggacaggtgagcccagcag catctcgtggagggaggctctgagtcccgtgaggacaaggacggaacaccggggctgcca caggtcgcagagaccctgtgtcaacacccttgttttgaggtgcagagaccaaagttgaga @@ -27,8 +27,8 @@ gccactgtcctgtccccagccccattaaacgcccagggccactgt--------------- ctgtccccagccccattaaacgcccagggccactggcctgtccccagccccattaaatgc ccagggccactgtcctgtccccagtctcattaaacgcagcaaacgtcttctggacgtgcc gccagtcccacagaaaaccaaagcaaggcagactcaccgctcctgcatcccggagctcag -gtccaggagggtctcgggaaaggatgggggcaactggagtaccatcct ->p:HG002_1_chr20:63371965-63372792 +gtccaggagggtctcgggaaaggatgggggcaactggagtaccatcc +>p:HG002_1_chr20:63371965-63372791 ggtcaggtgaagactctattcctggctcaggaaaaccaccaggacaggtgagcccagcag catctcgtggagggaggctctgagtcccgtgaggacaaggacggaacaccggggctgcca caggtcgcagagaccctgtgtcaacacccttgttttgaggtgcagagaccaaagttgaga @@ -42,8 +42,8 @@ gccactgtcctgtccccagccccattaaacgcccagggccactgt--------------- ctgtccccagccccattaaacgcccagggccactggcctgtccccagccccattaaatgc ccagggccactgtcctgtccccagtctcattaaacgcagcaaacgtcttctggacgtgcc gccagtcccacagaaaaccaaagcaaggcagactcaccgctcctgcatcccggagctcag -gtccaggagggtctcgggaaaggatgggggcaactggagtaccatcct ->p:HG002_2_chr20:63371965-63372792 +gtccaggagggtctcgggaaaggatgggggcaactggagtaccatcc +>p:HG002_2_chr20:63371965-63372791 ggtcaggtgaagactctattcctggctcaggaaaaccaccaggacaggtgagcccagcag catctcgtggagggaggctctgagtcccgtgaggacaaggacggaacaccggggctgcca caggtcgcagagaccctgtgtcaacacccttgttttgaggtgcagagaccaaagttgaga @@ -57,8 +57,8 @@ attaaacgcccagggccactggcctgtccccagccccattaaacgcccagggccactgtc ctgtccccagccccattaaacgcccagggccactggcctgtccccagccccattaaatgc ccagggccactgtcctgtccccagtctcattaaacgcagcaaacgtcttctggacgtgcc gccagtcccacagaaaaccaaagcaaggcagactcaccgctcctgcatcccggagctcag -gtccaggagggtctcgggaaaggatgggggcaactggagtaccatcct ->ref_chr20:63371965-63372792 +gtccaggagggtctcgggaaaggatgggggcaactggagtaccatcc +>ref_chr20:63371965-63372791 ggtcaggtgaagactctattcctggctcaggaaaaccaccaggacaggtgagcccagcag catctcgtggagggaggctctgagtcccgtgaggacaaggacggaacaccggggctgcca caggtcgcagagaccctgtgtcaacacccttgttttgaggtgcagagaccaaagttgaga @@ -72,4 +72,4 @@ attaaacgcccagggccactgtcctgtccccagccccattaaacgcccagggccactgtc ctgtccccagccccattaaacgcccagggccactggcctgtccccagccccattaaatgc ccagggccactgtcctgtccccagtctcattaaacgcagcaaacgtcttctggacgtgcc gccagtcccacagaaaaccaaagcaaggcagactcaccgctcctgcatcccggagctcag -gtccaggagggtctcgggaaaggatgggggcaactggagtaccatcct +gtccaggagggtctcgggaaaggatgggggcaactggagtaccatcc diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_14df5b1a78900153927390d7bafee41c.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_5179b77bf332da7c07500c14adccd838.msa similarity index 96% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_14df5b1a78900153927390d7bafee41c.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_5179b77bf332da7c07500c14adccd838.msa index d154fe9a..f9079649 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_14df5b1a78900153927390d7bafee41c.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_5179b77bf332da7c07500c14adccd838.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:38123490-38124806 +>syndip_1_chr20:38123490-38124805 ttttaagtccacagcctgtgaccaatggcaacagagcagaaggttagagcaattactgtg gatggagcttgcccagtaggcctcctagctccattttctagctttgtgatctcaggcctg tcctgtaacctctctgggcctcagaaacctctctctctctc--tatatatatatatataa @@ -33,8 +33,8 @@ tatatatgtattatattattatagatataatatacatattatatctatatataaaatata tatgtagagagagagagacagggtctcattttgtctcccaggctggagtgcagtggtgca atcttggctcactacaacctccacctcccaggctcaagggatcctccagcttcagccccc cgagtacctgggactacaggaacgcgtcaccatgcctggctaatttttttttattttttg -tagagatggggtttcgccatgttgccgaggctggtgtcaa ->syndip_2_chr20:38123490-38124806 +tagagatggggtttcgccatgttgccgaggctggtgtca +>syndip_2_chr20:38123490-38124805 ttttaagtccacagcctgtgaccaatggcaacagagcagaaggttagagcaattactgtg gatggagcttgcccagtaggcctcctagctccattttctagctttgtgatctcaggcctg tcctgtaacctctctgggcctcagaaacctctctctctctc--tatatatatatatataa @@ -69,8 +69,8 @@ tatatatgtattatattattatagatataatatacatattatatctatatataaaatata tatgtagagagagagagacagggtctcattttgtctcccaggctggagtgcagtggtgca atcttggctcactacaacctccacctcccaggctcaagggatcctccagcttcagccccc cgagtacctgggactacaggaacgcgtcaccatgcctggctaatttttttttattttttg -tagagatggggtttcgccatgttgccgaggctggtgtcaa ->p:HG002_1_chr20:38123490-38124806 +tagagatggggtttcgccatgttgccgaggctggtgtca +>p:HG002_1_chr20:38123490-38124805 ttttaagtccacagcctgtgaccaatggcaacagagcagaaggttagagcaattactgtg gatggagcttgcccagtaggcctcctagctccattttctagctttgtgatctcaggcctg tcctgtaacctctctgggcctcagaaacctctctctctctc--tatatatatatatataa @@ -105,8 +105,8 @@ tatatatgtattatattattatagatataatatacatattatatctatatataaaatata tatgtagagagagagagacagggtctcattttgtctcccaggctggagtgcagtggtgca atcttggctcactacaacctccacctcccaggctcaagggatcctccagcttcagccccc cgagtacctgggactacaggaacgcgtcaccatgcctggctaatttttttttattttttg -tagagatggggtttcgccatgttgccgaggctggtgtcaa ->p:HG002_2_chr20:38123490-38124806 +tagagatggggtttcgccatgttgccgaggctggtgtca +>p:HG002_2_chr20:38123490-38124805 ttttaagtccacagcctgtgaccaatggcaacagagcagaaggttagagcaattactgtg gatggagcttgcccagtaggcctcctagctccattttctagctttgtgatctcaggcctg tcctgtaacctctctgggcctcagaaacctctctctctctc--tatatatatatatataa @@ -141,8 +141,8 @@ tatatatgtattatattattatagatataatatacatattatatctatatataaaatata tatgtagagagagagagacagggtctcattttgtctcccaggctggagtgcagtggtgca atcttggctcactacaacctccacctcccaggctcaagggatcctccagcttcagccccc cgagtacctgggactacaggaacgcgtcaccatgcctggctaatttttttttattttttg -tagagatggggtttcgccatgttgccgaggctggtgtcaa ->ref_chr20:38123490-38124806 +tagagatggggtttcgccatgttgccgaggctggtgtca +>ref_chr20:38123490-38124805 ttttaagtccacagcctgtgaccaatggcaacagagcagaaggttagagcaattactgtg gatggagcttgcccagtaggcctcctagctccattttctagctttgtgatctcaggcctg tcctgtaacctctctgggcctcagaaacctctctctctctctatatatatatatatataa @@ -177,4 +177,4 @@ tatatatgtattatattattatagatataatatacatattatatctatatataaaatata tatgtagagagagagagacagggtctcattttgtctcccaggctggagtgcagtggtgca atcttggctcactacaacctccacctcccaggctcaagggatcctccagcttcagccccc cgagtacctgggactacaggaacgcgtcaccatgcctggctaatttttttttattttttg -tagagatggggtttcgccatgttgccgaggctggtgtcaa +tagagatggggtttcgccatgttgccgaggctggtgtca diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_c95ab5f466d7144d537fb37124016439.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_541535ac45889f61485b69c44137fd45.msa similarity index 97% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_c95ab5f466d7144d537fb37124016439.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_541535ac45889f61485b69c44137fd45.msa index 4f85bc90..a244f140 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_c95ab5f466d7144d537fb37124016439.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_541535ac45889f61485b69c44137fd45.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:14861954-14862745 +>syndip_1_chr20:14861954-14862744 gacattggaggttttatatataaatatatatatatatatttatatatatatttatatata tatatttatatatatttatatatatttatatatatatttatatatatat--ttatatata tatttatatatatatttatatatatatttatatatatatttatatatatatttatatata @@ -25,8 +25,8 @@ aaatatatataaatatatatataaatataaatatatataaatata----taaaaatatat atataaatataaatatatataaatatatatgtataaatatatatataa----------at ataaatatatatatataaatatatatataaatatatatatataaatatatatataaatat atataaatatatatataaatatatataaatatatatatatatatttttttttaatagaga -ggatgaagaccaa ->syndip_2_chr20:14861954-14862745 +ggatgaagacca +>syndip_2_chr20:14861954-14862744 gacattggaggtttt--------------------------------------------- ------------------------------------------------------------ ------------------------------------------------------------ @@ -53,8 +53,8 @@ aaatatataaaaatatatatataaatataaatatatataaatatatatgtataaatatat atataaatataaatatatataaatatatatatataaatatatatataa----------at atatataaatatatataaaaatatatataa--atatatatataaatatatatataaatat atataaatatatatataaatatatataaatatatatatatatatttttttttaatagaga -ggatgaagaccaa ->p:HG002_1_chr20:14861954-14862745 +ggatgaagacca +>p:HG002_1_chr20:14861954-14862744 gacattggaggtttt--------------------------------------------- ------------------------------------------------------------ ------------------------------------------------------------ @@ -81,8 +81,8 @@ aaatatataaaaatatatatataaatataaatatatataaatatatatgtataaatatat atataaatataaatatatataaatatatatatataaatatatatataa----------at atatataaatatatataaaaatatatataa--atatatatataaatatatatataaatat atataaatatatatataaatatatataaatatatatatatatatttttttttaatagaga -ggatgaagaccaa ->p:HG002_2_chr20:14861954-14862745 +ggatgaagacca +>p:HG002_2_chr20:14861954-14862744 gacattggaggttttatatataaatatatatatatatatttatatatatatttatatata tatatttatatatatttatatatatttatatatatatttatatatatataaatatatata aatatataaatatatataaatatatatttatacataaatatataaatatatttatatata @@ -109,8 +109,8 @@ aaatatatataaatatatatataaatataaatatatataaatata----taaaaatatat atataaatataaatatatataaatatatatgtataaatatatatataa----------at ataaatatatatatataaatatatatataaatatatatatataaatatatatataaatat atataaatatatatataaatatatataaatatatatatatatatttttttttaatagaga -ggatgaagaccaa ->ref_chr20:14861954-14862745 +ggatgaagacca +>ref_chr20:14861954-14862744 gacattggaggttt---------------------------------------------- ------------------------------------------------------------ ------------------------------------------------------------ @@ -137,4 +137,4 @@ aaatatatataaatatatatataaatataaatatatataaatata----taaaaatatat atataaatataaatatatataaatatatatgtataaatatatatataaatataaatatat ataaatatatatatataaatatatatataaatatatatatataaatatatatataaatat atataaatatatatataaatatatataaatatatatatatatatttttttttaatagaga -ggatgaagaccaa +ggatgaagacca diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_efa1c6acccfa7bcd26ee40e0acbbc623.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_558c505e413a6398b429691dca26a4ea.msa similarity index 79% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_efa1c6acccfa7bcd26ee40e0acbbc623.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_558c505e413a6398b429691dca26a4ea.msa index 4480a3ba..0e524eae 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_efa1c6acccfa7bcd26ee40e0acbbc623.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_558c505e413a6398b429691dca26a4ea.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:62830550-62830798 +>syndip_1_chr20:62830550-62830797 ccaggcccccagggcctccgaggtgacgtgggcgaccgggtaagtggccctctcagcagg aagctcccctgcaccccctctacccatgtaccacagtcccccaccccccaccgtgctcca ccccccaccacagtcccccaccccccaccccagtccccaaacccctaccacagtccccca @@ -6,9 +6,9 @@ accccaccacagtcccccaccccctaccacagtcccccaccccctaccacagtcccccaa cccccaccacagtcccccacccccaccacagtccgccaacccccatgacagtccctcaac ccccaccacagtcccccaacccccatgacagtcccccaacccccaccacagtcccccaac ccccaccacagt-ccccaccccctaccacagtcccccaacccccaccacagtcccccacc -ccctaccacaatcccccaccccccac---------------------------------- ---------------------------------------------- ->syndip_2_chr20:62830550-62830798 +ccctaccacaatcccccacccccca----------------------------------- +-------------------------------------------- +>syndip_2_chr20:62830550-62830797 ccaggcccccagggcctccgaggtgacgtgggcgaccgggtaagtggccctctcagcagg aagctcccctgcaccccctctacccatgtaccacagtccccca----------------- ccccccaccacagtcccccaccccccaccccagtccccaaacccctaccacagtccccca @@ -17,8 +17,8 @@ cccccaccacagtcccccacccccaccacagtccgccaacccccatgacagtcccccaac ccccaccacagtccccc-acccccaccacagtccgccaacccccatgacagtcccccaac ccccaccacagtcccccaacccccatgacagtcccccaacccccaccacagtcccccaac ccctaccacagtcccccaacccccaccacagtccccaccccctaccacagtcccccaacc -cccaccacagtcccccaccccctaccacaatcccccaccccccac ->p:HG002_1_chr20:62830550-62830798 +cccaccacagtcccccaccccctaccacaatcccccacccccca +>p:HG002_1_chr20:62830550-62830797 ccaggcccccagggcctccgaggtgacgtgggcgaccgggtaagtggccctctcagcagg aagctcccctgcaccccctctacccatgtaccacagtccccca----------------- ccccccaccacagtcccccaccccccaccccagtccccaaacccctaccacagtccccca @@ -27,8 +27,8 @@ cccccaccacagtcccccacccccaccacagtccgccaacccccatgacagtcccccaac ccccaccacagtccccc-acccccaccacagtccgccaacccccatgacagtcccccaac ccccaccacagtcccccaacccccatgacagtcccccaacccccaccacagtcccccaac ccctaccacagtcccccaacccccaccacagtccccaccccctaccacagtcccccaacc -cccaccacagtcccccaccccctaccacaatcccccaccccccac ->p:HG002_2_chr20:62830550-62830798 +cccaccacagtcccccaccccctaccacaatcccccacccccca +>p:HG002_2_chr20:62830550-62830797 ccaggcccccagggcctccgaggtgacgtgggcgaccgggtaagtggccctctcagcagg aagctcccctgcaccccctctacccatgtaccacagtcccccaccccccaccgtgctcca ccccccaccacagtcccccaccccccaccccagtccccaaacccctaccacagtccccca @@ -36,15 +36,15 @@ accccaccacagtcccccaccccctaccacagtcccccaccccctaccacagtcccccaa cccccaccacagtcccccacccccaccacagtccgccaacccccatgacagtcccccaac ccccaccacagtcccccaacccccatgacagtcccccaacccctaccacagtcccccaac ccccaccacagt-ccccaccccctaccacagtcccccaacccccaccacagtcccccacc -ccctaccacaatcccccaccccccac---------------------------------- ---------------------------------------------- ->ref_chr20:62830550-62830798 +ccctaccacaatcccccacccccca----------------------------------- +-------------------------------------------- +>ref_chr20:62830550-62830797 ccaggcccccagggcctccgaggtgacgtgggcgaccgggtaagtggccctctcagcagg aagctcccctgcaccccctctacccatgtaccacagtccccca----------------- -cccccatgacagtcccccaacccccaccacagtcccccaacccctaccacagtccccca acccccaccacagtccccaccccctaccacagtcccccaacccccaccacagtcccccac -cccctaccacaatcccccaccccccac--------------------------------- +cccctaccacaatcccccacccccca---------------------------------- ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ ---------------------------------------------- +-------------------------------------------- diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_b9d356b09aad1b84d7626c820eace838.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_5b9c7d30d7119b8ee01d9d2df56da7df.msa similarity index 93% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_b9d356b09aad1b84d7626c820eace838.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_5b9c7d30d7119b8ee01d9d2df56da7df.msa index 320047f1..b1b3db87 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_b9d356b09aad1b84d7626c820eace838.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_5b9c7d30d7119b8ee01d9d2df56da7df.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:35579913-35580987 +>syndip_1_chr20:35579913-35580986 aaaactccatctcaaaaaaaaaaaaaaaaaaaaaagggacacaatcctggattcaaaaga ctgttgtaaatatcaaatgacataatagaaacataagtactttggagcctacaaagtgct gtcagaagctgggcatggtggctcatgcctgtaaccccagcactttgggaggctgaggca @@ -21,8 +21,8 @@ ataaaatatatatatataaaatatatatataaaatatatatatataaaatatatatatat aaaatatatatatataaaatatatatatataaaatatatatataaaatatatatatataa aatatatatatataaaatatatatatataaaatatatatatggaattccatatctatcaa tggatcctaagggaaacaattttactactgtattggtgggtttattttatgtatttgtag -atatatacatatatacacatatatatgacaattttagcacaaaagagaggaa ->syndip_2_chr20:35579913-35580987 +atatatacatatatacacatatatatgacaattttagcacaaaagagagga +>syndip_2_chr20:35579913-35580986 aaaactccatctcaaaaaaaaaaaaaaaaaaaaaagggacacaatcctggattcaaaaga ctgttgtaaatatcaaatgacataatagaaacataagtactttggagcctacaaagtgct gtcagaagctgggcatggtggctcatgcctgtaaccccagcactttgggaggctgaggca @@ -45,8 +45,8 @@ atat---------------aaaatatatataaaatatatatatataaaatatatatatat aaaatatatatatataaaatat-------------------------------------- ----------------atatatatatataaaatatatatatggaattccatatctatcaa tggatcctaagggaaacaattttactactgtattggtgggtttattttatgtatttgtag -atatatacatatatacacatatatatgacaattttagcacaaaagagaggaa ->p:HG002_1_chr20:35579913-35580987 +atatatacatatatacacatatatatgacaattttagcacaaaagagagga +>p:HG002_1_chr20:35579913-35580986 aaaactccatctcaaaaaaaaaaaaaaaaaaaaaagggacacaatcctggattcaaaaga ctgttgtaaatatcaaatgacataatagaaacataagtactttggagcctacaaagtgct gtcagaagctgggcatggtggctcatgcctgtaaccccagcactttgggaggctgaggca @@ -69,8 +69,8 @@ atat---------------aaaatatatataaaatatatatatataaaatatatatatat aaaatatatatatataaaatat-------------------------------------- ----------------atatatatatataaaatatatatatggaattccatatctatcaa tggatcctaagggaaacaattttactactgtattggtgggtttattttatgtatttgtag -atatatacatatatacacatatatatgacaattttagcacaaaagagaggaa ->p:HG002_2_chr20:35579913-35580987 +atatatacatatatacacatatatatgacaattttagcacaaaagagagga +>p:HG002_2_chr20:35579913-35580986 aaaactccatctcaaaaaaaaaaaaaaaaaaaaaagggacacaatcctggattcaaaaga ctgttgtaaatatcaaatgacataatagaaacataagtactttggagcctacaaagtgct gtcagaagctgggcatggtggctcatgcctgtaaccccagcactttgggaggctgaggca @@ -93,8 +93,8 @@ ataaaatatatatatataaaatatatatataaaatatatatatataaaatatatatatat aaaatatatatatataaaatatatatatataaaatatatatataaaatatatatatataa aatatatatatataaaatatatatatataaaatatatatatggaattccatatctatcaa tggatcctaagggaaacaattttactactgtattggtgggtttattttatgtatttgtag -atatatacatatatacacatatatatgacaattttagcacaaaagagaggaa ->ref_chr20:35579913-35580987 +atatatacatatatacacatatatatgacaattttagcacaaaagagagga +>ref_chr20:35579913-35580986 aaaactccatctc-aaaaaaaaaaaaaaaaaaaaagggacacaatcctggattcaaaaga ctgttgtaaatatcaaatgacataatagaaacataagtactttggagcctacaaagtgct gtcagaagctgggcatggtggctcatgcctgtaaccccagcactttgggaggctgaggca @@ -117,4 +117,4 @@ ataa---------------aatatatatatataatatatatatataaaatatatatatat aaaatatatatatatataaaat-------------------------------------- ----------------atatatatatataaaatatatatatggaattccatatctatcaa tggatcctaagggaaacaattttactactgtattggtgggtttattttatgtatttgtag -atatatacatatatacacatatatatgacaattttagcacaaaagagaggaa +atatatacatatatacacatatatatgacaattttagcacaaaagagagga diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_918ac60720a8b1e5a939e5ef71c3dd30.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_5f56f463de2082f8d7163ff2aada8742.msa similarity index 89% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_918ac60720a8b1e5a939e5ef71c3dd30.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_5f56f463de2082f8d7163ff2aada8742.msa index 7ce5f8b5..ad321691 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_918ac60720a8b1e5a939e5ef71c3dd30.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_5f56f463de2082f8d7163ff2aada8742.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:57110112-57110886 +>syndip_1_chr20:57110112-57110885 agaaaaatcctccagaagatgacagttccagttgatgggactgacaagccccccacagga aggcagtctccagctgaccactgttccgtgaggctgtctgagcaggcttcgctatgcacg cataatagcgacagcgcaaacagcagcagcagcagcagcctcagctggctcggcggggct @@ -11,8 +11,8 @@ tgggcgtgtgcggggc-------------------------------------------- cgtgtgcggggcgttatgctaagtcttccggctgggcgtgtgcggggcgctgtgctgtct tccggttcggcgtgtgcggggcgctgtgctaagtcttccggctgcattcgcactctttcc ctccgactcaaccatggcagtgaggcactgttttgttttgctttgcttctcaattttcct -gtggtaaaatacacctaatattcaccatcttaaccgtttttaagtgtacagttca ->syndip_2_chr20:57110112-57110886 +gtggtaaaatacacctaatattcaccatcttaaccgtttttaagtgtacagttc +>syndip_2_chr20:57110112-57110885 agaaaaatcctccagaagatgacagttccagttgatgggactgacaagccccccacagga aggcagtctccagctgaccactgttccgtgaggctgtctgagcaggcttcgctatgcacg cataatagcgacagcgcaaacagcagcagcagcagcagcctcagctggctcggcggggct @@ -25,8 +25,8 @@ tgggcgtgtgcggggc-------------------------------------------- cgtgtgcggggcgttatgctaagtcttccggctgggcgtgtgcggggcgctgtgctgtct tccggttcggcgtgtgcggggcgctgtgctaagtcttccggctgcattcgcactctttcc ctccgactcaaccatggcagtgaggcactgttttgttttgctttgcttctcaattttcct -gtggtaaaatacacctaatattcaccatcttaaccgtttttaagtgtacagttca ->p:HG002_1_chr20:57110112-57110886 +gtggtaaaatacacctaatattcaccatcttaaccgtttttaagtgtacagttc +>p:HG002_1_chr20:57110112-57110885 agaaaaatcctccagaagatgacagttccagttgatgggactgacaagccccccacagga aggcagtctccagctgaccactgttccgtgaggctgtctgagcaggcttcgctatgcacg cataatagcgacagcgcaaacagcagcagcagcagcagcctcagctggctcggcggggct @@ -39,8 +39,8 @@ tgggcgtgtgcggggc-------------------------------------------- cgtgtgcggggcgttatgctaagtcttccggctgggcgtgtgcggggcgctgtgctgtct tccggttcggcgtgtgcggggcgctgtgctaagtcttccggctgcattcgcactctttcc ctccgactcaaccatggcagtgaggcactgttttgttttgctttgcttctcaattttcct -gtggtaaaatacacctaatattcaccatcttaaccgtttttaagtgtacagttca ->p:HG002_2_chr20:57110112-57110886 +gtggtaaaatacacctaatattcaccatcttaaccgtttttaagtgtacagttc +>p:HG002_2_chr20:57110112-57110885 agaaaaatcctccagaagatgacagttccagttgatgggactgacaagccccccacagga aggcagtctccagctgaccactgttccgtgaggctgtctgagcaggcttcgctatgcacg cataatagcgacagcgcaaacagcagcagcagcagcagcctcagctggctcggcggggct @@ -53,8 +53,8 @@ tgggcgtgtgcggggc-------------------------------------------- cgtgtgcggggcgttatgctaagtcttccggctgggcgtgtgcggggcgctgtgctgtct tccggttcggcgtgtgcggggcgctgtgctaagtcttccggctgcattcgcactctttcc ctccgactcaaccatggcagtgaggcactgttttgttttgctttgcttctcaattttcct -gtggtaaaatacacctaatattcaccatcttaaccgtttttaagtgtacagttca ->ref_chr20:57110112-57110886 +gtggtaaaatacacctaatattcaccatcttaaccgtttttaagtgtacagttc +>ref_chr20:57110112-57110885 agaaaaatcctccagaagatgacagttccagttgatgggactgacaagccccccacagga aggcagtctccagctgaccactgttccgtgaggctgtctgagcaggcttcgctatgcacg cataatagcgacagcgcaaacagcagcagcagcagcagcctcagctggctcggcggggct @@ -67,4 +67,4 @@ gtgctgtgctaagtcttccggctgggcgtgtgcggggcgctgtgctgtcttccggttcgg cgtgtgcggggcgttatgctaagtcttccggctgggcgtgtgcggggcgctgtgctgtct tccggttcggcgtgtgcggggcgctgtgctaagtcttccggctgcattcgcactctttcc ctccgactcaaccatggcagtgaggcactgttttgttttgctttgcttctcaattttcct -gtggtaaaatacacctaatattcaccatcttaaccgtttttaagtgtacagttca +gtggtaaaatacacctaatattcaccatcttaaccgtttttaagtgtacagttc diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_d00295e693e29721bd316e2a98d6b49d.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_6125f7292d70d0bf730d50e79d9e5f9d.msa similarity index 92% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_d00295e693e29721bd316e2a98d6b49d.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_6125f7292d70d0bf730d50e79d9e5f9d.msa index ac205f8c..a524e701 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_d00295e693e29721bd316e2a98d6b49d.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_6125f7292d70d0bf730d50e79d9e5f9d.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:63559188-63560064 +>syndip_1_chr20:63559188-63560063 tgggggggccctggactttccctcccagtcctggcaccttgcaggtggagagggctcttc aggaaggcatagttggcctcctgcagacgcgcacctggccggcaggcacgagggtctgct gagcctcgcagaagtccaggaggctacgccagagggggcagcagcgcagaaggaggtggt @@ -13,8 +13,8 @@ gtcaggtgggaggagtcagggtcaggtgggaggagtcagggtcaggtcggagtcagggtc aggcaggagtcggcagctccctagcccagccctggcctcacctgtgttcccaccctggaa gagcccagccccgccctcaccgatcaggcagagcccctcctgggcccgcgtgacagccac attcacttggttggggtccacaacgaagcccagaaacttcttgagccagctcttggtggg -ccgctggtccaggtcgctcttggcacaggtgcggacg ->syndip_2_chr20:63559188-63560064 +ccgctggtccaggtcgctcttggcacaggtgcggac +>syndip_2_chr20:63559188-63560063 tgggggggccctggactttccctcccagtcctggcaccttgcaggtggagagggctcttc aggaaggcatagttggcctcctgcagacgcgcacctggccggcaggcacgagggtctgct gagcctcgcagaagtccaggaggctacgccagagggggcagcagcgcagaaggaggtggt @@ -29,8 +29,8 @@ gtcaggtgggaggagtcagggtcaggtgggaggagtcagggtcaggtcggagtcagggtc aggcaggagtcggcagctccctagcccagccctggcctcacctgtgttcccaccctggaa gagcccagccccgccctcaccgatcaggcagagcccctcctgggcccgcgtgacagccac attcacttggttggggtccacaacgaagcccagaaacttcttgagccagctcttggtggg -ccgctggtccaggtcgctcttggcacaggtgcggacg ->p:HG002_1_chr20:63559188-63560064 +ccgctggtccaggtcgctcttggcacaggtgcggac +>p:HG002_1_chr20:63559188-63560063 tgggggggccctggactttccctcccagtcctggcaccttgcaggtggagagggctcttc aggaaggcatagttggcctcctgcagacgcgcacctggccggcaggcacgagggtctgct gagcctcgcagaagtccaggaggctacgccagagggggcagcagcgcagaaggaggtggt @@ -45,8 +45,8 @@ gtcaggtgggaggagtcagggtcaggtgggaggagtcagggtcaggtcggagtcagggtc aggcaggagtcggcagctccctagcccagccctggcctcacctgtgttcccaccctggaa gagcccagccccgccctcaccgatcaggcagagcccctcctgggcccgcgtgacagccac gttcacttggttggggtccacaacgaagcccagaaacttcttgagccagctcttggtggg -ccgctggtccaggtcgctcttggcacaggtgcggacg ->p:HG002_2_chr20:63559188-63560064 +ccgctggtccaggtcgctcttggcacaggtgcggac +>p:HG002_2_chr20:63559188-63560063 tgggggggccctggactttccctcccagtcctggcaccttgcaggtggagagggctcttc aggaaggcatagttggcctcctgcagacgcgcacctggccggcaggcacgagggtctgct gagcctcgcagaagtccaggaggctacgccagagggggcagcagcgcagaaggaggtggt @@ -61,8 +61,8 @@ gtcaggtgggaggagtcagggtcaggtgggaggagtcagggtcaggtcggagtcagggtc aggcaggagtcggcagctccctagcccagccctggcctcacctgtgttcccaccctggaa gagcccagccccgccctcaccgatcaggcagagcccctcctgggcccgcgtgacagccac gttcacttggttggggtccacaacgaagcccagaaacttcttgagccagctcttggtggg -ccgctggtccaggtcgctcttggcacaggtgcggacg ->ref_chr20:63559188-63560064 +ccgctggtccaggtcgctcttggcacaggtgcggac +>ref_chr20:63559188-63560063 tgggggggccctggactttccctcccagtcctggcaccttgcaggtggagagggctcttc aggaaggcatagttggcctcctgcagacgcgcacctggccggcaggcacgagggtctgct gagcctcgcagaagtccaggaggctacgccagagggggcagcagcgcagaaggaggtggt @@ -77,4 +77,4 @@ gtcaggtgggaggagtcagggtcaggtgggaggagtcagggtcaggtcggagtcagggtc aggcaggagtcggcagctccctagcccagccctggcctcacctgtgttcccaccctggaa gagcccagccccgccctcaccgatcaggcagagcccctcctgggcccgcgtgacagccac attcacttggttggggtccacaacgaagcccagaaacttcttgagccagctcttggtggg -ccgctggtccaggtcgctcttggcacaggtgcggacg +ccgctggtccaggtcgctcttggcacaggtgcggac diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_04f59078ccc583035a295f8ad3526dbf.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_6272ac8c11659a343276a77a33bea482.msa similarity index 96% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_04f59078ccc583035a295f8ad3526dbf.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_6272ac8c11659a343276a77a33bea482.msa index 60256db1..080f1729 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_04f59078ccc583035a295f8ad3526dbf.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_6272ac8c11659a343276a77a33bea482.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:21721074-21721836 +>syndip_1_chr20:21721074-21721835 agtagggacttgctttgggtcacagtcagcttttattagtgttattattgttgctgtttt cttctttttctttgtacatcaacgttttatggtgaattaaatcgtgttttaaccttgtag accacattggctattacatatatatatataatatatatgtaatatatatatgtaatgtat @@ -20,8 +20,8 @@ acatatataac--------------------------atgtattacatatatgtaataca tatataacatgtattacatatatgtaatacacatataatatatatgttatatatatatat atatataaaattttttaaaaaattgtttcttgagatagggtctcactctgtcacccaggc tggagtataatggtttgatcatagttcactgcagccttgaacttctagactcaggtgatc -ctcccacctcaacctcct ->syndip_2_chr20:21721074-21721836 +ctcccacctcaacctcc +>syndip_2_chr20:21721074-21721835 agtagggacttgctttgggtcacagtcagcttttattagtgttattattgttgctgtttt cttctttttctttgtacatcaacgttttatggtgaattaaatcgtgttttaaccttgtag accacattggctattacatatatatatataatatatatgtaatatatatatgtaatgtat @@ -43,8 +43,8 @@ acatatataacatatatgtaatacatatataacatatatgtattacatatatgtaataca tatataacatgtattacatatatgtaatacacatataatatatatgttatatatatatat atatataaaattttttaaaaaattgtttcttgagatagggtctcactctgtcacccaggc tggagtataatggtttgatcatagttcactgcagccttgaacttctagactcaggtgatc -ctcccacctcaacctcct ->p:HG002_1_chr20:21721074-21721836 +ctcccacctcaacctcc +>p:HG002_1_chr20:21721074-21721835 agtagggacttgctttgggtcacagtcagcttttattagtgttattattgttgctgtttt cttctttttctttgtacatcaacgttttatggtgaattaaatcgtgttttaaccttgtag accacattggctattacatatatatatataatatatatgtaatatatatatgtaatgtat @@ -66,8 +66,8 @@ acatatataacatatatgtaatacatatataacatatatgtattacatatatgtaataca tatataacatgtattacatatatgtaatacacatataatatatatgttatatatatatat atatataaaattttttaaaaaattgtttcttgagatagggtctcactctgtcacccaggc tggagtataatggtttgatcatagttcactgcagccttgaacttctagactcaggtgatc -ctcccacctcaacctcct ->p:HG002_2_chr20:21721074-21721836 +ctcccacctcaacctcc +>p:HG002_2_chr20:21721074-21721835 agtagggacttgctttgggtcacagtcagcttttattagtgttattattgttgctgtttt cttctttttctttgtacatcaacgttttatggtgaattaaatcgtgttttaaccttgtag accacattggctattacatatatatatataatatatatgtaatatatatatgtaatgtat @@ -89,8 +89,8 @@ acatatataac--------------------------atgtattacatatatgtaataca tatataacatgtattacatatatgtaatacacatataatatatatgttatatatatatat atatataaaattttttaaaaaattgtttcttgagatagggtctcactctgtcacccaggc tggagtataatggtttgatcatagttcactgcagccttgaacttctagactcaggtgatc -ctcccacctcaacctcct ->ref_chr20:21721074-21721836 +ctcccacctcaacctcc +>ref_chr20:21721074-21721835 agtagggacttgctttgggtcacagtcagcttttattagtgttattattgttgctgtttt cttctttttctttgtacatcaacgttttatggtgaattaaatcgtgttttaaccttgtag accacattggctattacatatatatatataatatatatgtaatatatatatgtaatgtat @@ -112,4 +112,4 @@ tatatatat--------------------------------------------------- ------------------------------------------------------------ --atataaaattttttaaaaaattgtttcttgagatagggtctcactctgtcacccaggc tggagtataatggtttgatcatagttcactgcagccttgaacttctagactcaggtgatc -ctcccacctcaacctcct +ctcccacctcaacctcc diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_be2ece84b9803ee6d8b5da47e0f1328a.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_649eba2ef8750104fb27abd65692f3e5.msa similarity index 90% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_be2ece84b9803ee6d8b5da47e0f1328a.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_649eba2ef8750104fb27abd65692f3e5.msa index f88555c4..f47c531d 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_be2ece84b9803ee6d8b5da47e0f1328a.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_649eba2ef8750104fb27abd65692f3e5.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:62360310-62360703 +>syndip_1_chr20:62360310-62360702 gtaggggggtgggtgggtggatggatgggtaggggtgggtgggtggatacgttgggggtg ggtgggtgggtgggtcagcaggcagagggatgggataggataggtgggtgggtgggtgga tagggagggggtggggtgggtgcataggtaggggggtggatgggtcagcaggcagcggga @@ -9,8 +9,8 @@ gtgggtgggtgggtggagggatagatgggtgggtgggtgggtggagggatagatgggtgg tgggtgggtggagggatagatgggtggatgggtgggtagaaggataggta---ggtgggt gggtgggtggatggataggtgggtgggtgggtggaggaata--------gatgggtgggt ggatgggtgggtggagggatagatgggtgggtgggtgggtggagggatagatgggtgggt -gggtggagggaccacttcccaggtggagtgga ->syndip_2_chr20:62360310-62360703 +gggtggagggaccacttcccaggtggagtgg +>syndip_2_chr20:62360310-62360702 gtaggggggtgggtgggtggatggatgggtaggggtgggtgggtggatacgttgggggtg ggtgggtgggtgggtcagcaggcagagggatgggataggataggtgggtgggtggaggga tagatgggt--------------------------------------------------- @@ -21,8 +21,8 @@ gtgggtgggtgggtggaggaataga--------tgggtgggtgga--------------- tgggtgggtggagggatagatgggtgggtgggtgggtggagggatagatg---ggtgggt gggtggaggga------------------------------------------------- ------------------------------------------------------------ ------------ccacttcccaggtggagtgga ->p:HG002_1_chr20:62360310-62360703 +-----------ccacttcccaggtggagtgg +>p:HG002_1_chr20:62360310-62360702 gtaggggggtgggtgggtggatggatgggtaggggtgggtgggtggatacgttgggggtg ggtgggtgggtgggtcagcaggcagagggatgggataggataggtgggtgggtggaggga tgggtg------------------------------------------------------ @@ -33,8 +33,8 @@ gtggg----------------------------tgggtgggtgga--------------- tgggtgggtggagggatagatgggtgggtgggtgggtggagggatagatg---ggtgggt gggtggaggga------------------------------------------------- ------------------------------------------------------------ ------------ccacttcccaggtggagtgga ->p:HG002_2_chr20:62360310-62360703 +-----------ccacttcccaggtggagtgg +>p:HG002_2_chr20:62360310-62360702 gtaggggggtgggtgggtggatggatgggtaggggtgggtgggtggatacgttgggggtg ggtgggtgggtgggtcagcaggcagagggatgggataggataggtgggtgggtggaggga tgggtgagtgggtgggtggagggatagatgggtgggtggatgggtcagcaggcagcggga @@ -45,8 +45,8 @@ gtgggtgggtgggtggagggataga--------tgggtgggtggagggatagatgggtgg tgggtgggtggagggatagatgggtgggtgggtgggtggagggatagatgggtggtgggt gggtggagggatagatgggtggatgggtgggtagaaggataggtaggtgggtgggtgggt ggatgggtgggtggagggatagatgggtgggtgggtgggtggagggatagatgggtgggt -gggtggagggaccacttcccaggtggagtgga ->ref_chr20:62360310-62360703 +gggtggagggaccacttcccaggtggagtgg +>ref_chr20:62360310-62360702 gtaggggggtgggtgggtggatggatgggtaggggtgggtgggtggatacgttgggggtg ggtgggtgggtgggtcagcaggcagagggatgggataggataggtgggtgggtggaggga tgggtg------------------------------------------------------ @@ -57,4 +57,4 @@ gtggg----------------------------tgggtgggtgga--------------- tgggtgggtggagggatagatgggtgggtgggtgggtggagggatagatg---ggtgggt gggtggaggga------------------------------------------------- ------------------------------------------------------------ ------------ccacttcccaggtggagtgga +-----------ccacttcccaggtggagtgg diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_709f39dc3727958f3af64bf428a7aa8c.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_66112df94acfa8f42760b311b37ff111.msa similarity index 88% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_709f39dc3727958f3af64bf428a7aa8c.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_66112df94acfa8f42760b311b37ff111.msa index 3b5e22b1..4d2cea0e 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_709f39dc3727958f3af64bf428a7aa8c.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_66112df94acfa8f42760b311b37ff111.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:57350708-57351088 +>syndip_1_chr20:57350708-57351087 aatggcatggtctcggctcactgcaacctccacctcctgggttcaagcgattctcctgcc ttagcctcccaagcagctgggattacaggcacccgccaccatgcccggctaatttttgta tttttagtactgacggggggacggggggacgggggacgggggacggggggcgggggggac @@ -7,8 +7,8 @@ ggggcggggggacgggggcggggggccggggggggcggggggacgggggcgggggggcgg ggggggcggggggacgggggcggggggcgcgggggggcggggggggcggcgcgggggggc gggggggggcggggggcggggcggttcacgatgttggccagacttgtcttgaactcctga ccccaggtgatcagcccgcctcggcctcccaaagtgctgggattacaggcgtgagccacc -gcgcctggccagttgcggatttataaa ->syndip_2_chr20:57350708-57351088 +gcgcctggccagttgcggatttataa +>syndip_2_chr20:57350708-57351087 aatggcatggtctcggctcactgcaacctccacctcctgggttcaagcgattctcctgcc ttagcctcccaagcagctgggattacaggcacccgccaccatgcccggctaatttttgta tttttagtactgacggggggacgggggggc------------------------------ @@ -17,8 +17,8 @@ ggggcggggggacgggggcggggggcgggggggggc------------------------ -------------------------ggcgcgggggggc---------------------- -ggggggggcggggggcggggcggttcacgatgttggccagacttgtcttgaactcctga ccccaggtgatcagcccgcctcggcctcccaaagtgctgggattacaggcgtgagccacc -gcgcctggccagttgcggatttataaa ->p:HG002_1_chr20:57350708-57351088 +gcgcctggccagttgcggatttataa +>p:HG002_1_chr20:57350708-57351087 aatggcatggtctcggctcactgcaacctccacctcctgggttcaagcgattctcctgcc ttagcctcccaagcagctgggattacaggcacccgccaccatgcccggctaatttttgta tttttagtactgacggggggacgggggggc------------------------------ @@ -27,8 +27,8 @@ ggggcggggggacgggggcggggggcgggggggggc------------------------ -------------------------ggcgcgggggggc---------------------- -ggggggggcggggggcggggcggttcacgatgttggccagacttgtcttgaactcctga ccccaggtgatcagcccgcctcggcctcccaaagtgctgggattacaggcgtgagccacc -gcgcctggccagttgcggatttataaa ->p:HG002_2_chr20:57350708-57351088 +gcgcctggccagttgcggatttataa +>p:HG002_2_chr20:57350708-57351087 aatggcatggtctcggctcactgcaacctccacctcctgggttcaagcgattctcctgcc ttagcctcccaagcagctgggattacaggcacccgccaccatgcccggctaatttttgta tttttagtactgacggggggacggggggacgggggacgggggacggggggcgggggggac @@ -37,8 +37,8 @@ ggggcggggggacgggggcggggggccggggggggcggggggacgggggcgggggggcgg ggggggcggggggacgggggcggggggcgcgggggggcggggggggcggcgcgggggggc gggggggggcggggggcggggcggttcacgatgttggccagacttgtcttgaactcctga ccccaggtgatcagcccgcctcggcctcccaaagtgctgggattacaggcgtgagccacc -gcgcctggccagttgcggatttataaa ->ref_chr20:57350708-57351088 +gcgcctggccagttgcggatttataa +>ref_chr20:57350708-57351087 aatggcatggtctcggctcactgcaacctccacctcctgggttcaagcgattctcctgcc ttagcctcccaagcagctgggattacaggcacccgccaccatgcccggctaatttttgta tttttagtactgacggggggacgggggggc------------------------------ @@ -47,4 +47,4 @@ ggggcggggggacgggggcggggggcgggggggggc------------------------ -------------------------ggcgcgggggggc---------------------- -ggggggggcggggggcggggcggttcacgatgttggccagacttgtcttgaactcctga ccccaggtgatcagcccgcctcggcctcccaaagtgctgggattacaggcgtgagccacc -gcgcctggccagttgcggatttataaa +gcgcctggccagttgcggatttataa diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_16a9570a0bae9b86665a101c47ea2a7e.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_69e7c4dc7f053eda2403927861e33459.msa similarity index 94% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_16a9570a0bae9b86665a101c47ea2a7e.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_69e7c4dc7f053eda2403927861e33459.msa index 56d3b9e2..56caa352 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_16a9570a0bae9b86665a101c47ea2a7e.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_69e7c4dc7f053eda2403927861e33459.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:63048933-63049513 +>syndip_1_chr20:63048933-63049512 gcttgtttggacctgaagaccagcctgagcgaggagagtcttgtgcagacggtttatcca ggaggagcccggggagaggggggtgggaaaagccacagttttggaggtcagcaaggtcat gtgggggcagactcctcctcctcctcctcttcctcctcc--------------------- @@ -20,8 +20,8 @@ tcctcctcctcctcttcctcccctcctcctcctcctcttcctcctcctcctcctcttcct cccctcctcctcctcttcctcctcgtcctcttcctccttctgccaggacccttcttccag gacccctgagaagcgcatagaatagcgtccaggactgtctgccccaggcacgctgaccac tggctccactcccctgggcaagggtcgcccctgggctgctgcctcccagcgacacaggca -ggacccagaagcctggtagaaagtgaccatgtgtggcaggt ->syndip_2_chr20:63048933-63049513 +ggacccagaagcctggtagaaagtgaccatgtgtggcagg +>syndip_2_chr20:63048933-63049512 gcttgtttggacctgaagaccagcctgagcgaggagagtcttgtgcagacggtttatcca ggaggagcccggggagaggggggtgggaaaagccacagttttggaggtcagcaaggtcat gtgggggcagactcctcctcctcctcctcttcctcctcctcctcttcctcctcctcctct @@ -43,8 +43,8 @@ tcctcctcctcctcttcctcccctcctcctcctcctcttcctcctcctcctcctcttcct cccctcctcctcctcttcctcctcgtcctcttcctccttctgccaggacccttcttccag gacccctgagaagcgcatagaatagcgtccaggactgtctgccccaggcacgctgaccac tggctccactcccctgggcaagggtcgcccctgggctgctgcctcccagcgacacaggca -ggacccagaagcctggtagaaagtgaccatgtgtggcaggt ->p:HG002_1_chr20:63048933-63049513 +ggacccagaagcctggtagaaagtgaccatgtgtggcagg +>p:HG002_1_chr20:63048933-63049512 gcttgtttggacctgaagaccagcctgagcgaggagagtcttgtgcagacggtttatcca ggaggagcccggggagaggggggtgggaaaagccacagttttggaggtcagcaaggtcat gtgggggcagactcctcctcctcctcctcttcctcctcctcctcttcctcctcctcctct @@ -66,8 +66,8 @@ tcctcctcctcctcttcctcccctcctcctcctcctcttcctcctcctcctcctcttcct cccctcctcctcctcttcctcctcgtcctcttcctccttctgccaggacccttcttccag gacccctgagaagcgcatagaatagcgtccaggactgtctgccccaggcacgctgaccac tggctccactcccctgggcaagggtcgcccctgggctgctgcctcccagcgacacaggca -ggacccagaagcctggtagaaagtgaccatgtgtggcaggt ->p:HG002_2_chr20:63048933-63049513 +ggacccagaagcctggtagaaagtgaccatgtgtggcagg +>p:HG002_2_chr20:63048933-63049512 gcttgtttggacctgaagaccagcctgagcgaggagagtcttgtgcagacggtttatcca ggaggagcccggggagaggggggtgggaaaagccacagttttggaggtcagcaaggtcat gtgggggcagactcctcctcctcctcctcttcctcctcc--------------------- @@ -89,8 +89,8 @@ tcctcctcctcctcttcctcccctcctcctcctcctcttcctcctcctcctcctcttcct cccctcctcctcctcttcctcctcgtcctcttcctccttctgccaggacccttcttccag gacccctgagaagcgcatagaatagcgtccaggactgtctgccccaggcacgctgaccac tggctccactcccctgggcaagggtcgcccctgggctgctgcctcccagcgacacaggca -ggacccagaagcctggtagaaagtgaccatgtgtggcaggt ->ref_chr20:63048933-63049513 +ggacccagaagcctggtagaaagtgaccatgtgtggcagg +>ref_chr20:63048933-63049512 gcttgtttggacctgaagaccagcctgagcgaggagagtcttgtgcagacggtttatcca ggaggagcccggggagaggggggtgggaaaagccacagttttggaggtcagcaaggtcat gtgggggcaga------------------------------------------------- @@ -112,4 +112,4 @@ tcctcctcctcctcttcctctcctcctcctcctcctcttcctcctcctcctcctcttcct cccctcctcctcctcttcctcctcgtcctcttcctccttctgccaggacccttcttccag gacccctgagaagcgcatagaatagcgtccaggactgtctgccccaggcacgctgaccac tggctccactcccctgggcaagggtcgcccctgggctgctgcctcccagcgacacaggca -ggacccagaagcctggtagaaagtgaccatgtgtggcaggt +ggacccagaagcctggtagaaagtgaccatgtgtggcagg diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_b4b5ccc01de0310c6839be1682c17fae.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_6ab622358b0189242a15e470f601cb6b.msa similarity index 89% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_b4b5ccc01de0310c6839be1682c17fae.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_6ab622358b0189242a15e470f601cb6b.msa index 57e2e070..d87897ff 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_b4b5ccc01de0310c6839be1682c17fae.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_6ab622358b0189242a15e470f601cb6b.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:63221409-63221822 +>syndip_1_chr20:63221409-63221821 taagtaccagtgccaggtgctctggggccgatgtctccgttagccccaaggtggaggctc agggagcagatgtgagcgtgccttgctcatcgcagtagcttcggactgcagggattgcag ggactgcagggactgcagggattgcagggactgcagggactgcagggactgcagggattg @@ -12,8 +12,8 @@ cagggattgcagggactgcagggactgca------------------------------- ------------------------------------------------------------ ------------------------------------------------------------ --------------------------------------------------------ttgc -tcttgcagcaccacccgcctccgtggacaccttcagcagtgagacaatgcaggctct ->syndip_2_chr20:63221409-63221822 +tcttgcagcaccacccgcctccgtggacaccttcagcagtgagacaatgcaggctc +>syndip_2_chr20:63221409-63221821 taagtaccagtgccaggtgctctggggccgatgtctccgttagccccaaggtggaggctc agggagcagatgtgagcgtgccttgctcatcgcagtagcttcggactgcagggattgcag ggactgcagggattgcagggattgcagggagtgcagggactgcagggactgcagggattg @@ -27,8 +27,8 @@ cagggattgcagggattgcagggattgcagggactgcagggactgcagggattgcaggga ttgcagggattgcagggattgcagggattgcagggattgcagggactgcagggactgcag ggactgcagggattgcagggattgcagggattgcagggattgcagggactgcagggactg cagggattgcagggattgcagggattgcagggattgcagggactgcagggactgcattgc -tcttgcagcaccacccgcctccgtggacaccttcagcagtgagacaatgcaggctct ->p:HG002_1_chr20:63221409-63221822 +tcttgcagcaccacccgcctccgtggacaccttcagcagtgagacaatgcaggctc +>p:HG002_1_chr20:63221409-63221821 taagtaccagtgccaggtgctctggggccgatgtctccgttagccccaaggtggaggctc agggagcagatgtgagcgtgccttgctcatcgcagtagcttcggactgcagggattgcag ggactgcagggattgcagggattgcagggattgcagggactgcagggattgcagggactg @@ -42,8 +42,8 @@ cagggattgcagggattgcagggattgcagggactgcagggactgcagggattgcaggga ttgcagggattgcagggattgcagggattgcagggattgcagggactgcagggactgcag ggactgcagggattgcagggattgcagggactgcagggactgcagggattgcagggactg cagggactgcagggactgcagggattgcagggattgcagggactgcagggactgcattgc -tcttgcagcaccacccgcctccgtggacaccttcagcagtgagacaatgcaggctct ->p:HG002_2_chr20:63221409-63221822 +tcttgcagcaccacccgcctccgtggacaccttcagcagtgagacaatgcaggctc +>p:HG002_2_chr20:63221409-63221821 taagtaccagtgccaggtgctctggggccgatgtctccgttagccccaaggtggaggctc agggagcagatgtgagcgtgccttgctcatcgcagtagcttcggactgcagggattgcag ggactgcagggactgcagggattgcagggactgcagggactgcagggactgcagggattg @@ -57,8 +57,8 @@ cagggattgcagggactgcagggactgca------------------------------- ------------------------------------------------------------ ------------------------------------------------------------ --------------------------------------------------------ttgc -tcttgcagcaccacccgcctccgtggacaccttcagcagtgagacaatgcaggctct ->ref_chr20:63221409-63221822 +tcttgcagcaccacccgcctccgtggacaccttcagcagtgagacaatgcaggctc +>ref_chr20:63221409-63221821 taagtaccagtgccaggtgctctggggccgatgtctccgttagccccaaggtggaggctc agggagcagatgtgagcgtgccttgctcatcgcagtagcttcggactgcagggattgcag ggactgcagggattgcagggattgcagggattgcagggactgcagggattgcagggactg @@ -72,4 +72,4 @@ ggactgcagggactgcagggattgcagggattgcagggactgcagggactgca------- ------------------------------------------------------------ ------------------------------------------------------------ --------------------------------------------------------ttgc -tcttgcagcaccacccgcctccgtggacaccttcagcagtgagacaatgcaggctct +tcttgcagcaccacccgcctccgtggacaccttcagcagtgagacaatgcaggctc diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_6977cc03bff3fe04a12c3f85e1bf95f6.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_6c3ae6985541ddd1146ec32d22021912.msa similarity index 87% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_6977cc03bff3fe04a12c3f85e1bf95f6.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_6c3ae6985541ddd1146ec32d22021912.msa index 5c8f13d0..662f3074 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_6977cc03bff3fe04a12c3f85e1bf95f6.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_6c3ae6985541ddd1146ec32d22021912.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:23155478-23155958 +>syndip_1_chr20:23155478-23155957 tcatgattgatgcaggcagcacccttctgcagaagtaaattttgccctgcagcacaaaag aggaaggaaagcaagggaagggaaggggaggggaggaagggaaggggaggggaggaaggg aaggggaggggaggaaggcgaggtgaggggaggagaggtgaggggagggggggaggggag @@ -10,8 +10,8 @@ gaaagaaagaaaaaagaaagaaaggaaggagagaaagaaatgaaagagaaagaaagaaag aaagaaagaaagaaagaaaggagagaaaaaggaaaaacaaagaaagagaaaagaaggaga gaaggaaaggaaaaagaaagaaagaaggaaagaaagaaaaaagaaggaaagaaagaagaa agaaagaaagaaagaaagaaagaaagaaagaaagaaagaaagaaagaaagaaagaaagaa -agaaagaaagaaagaaaaagaaagagaaaggaaggaaggaaggaaggtcttaacaccttg ->syndip_2_chr20:23155478-23155958 +agaaagaaagaaagaaaaagaaagagaaaggaaggaaggaaggaaggtcttaacacctt +>syndip_2_chr20:23155478-23155957 tcatgattgatgcaggcagcacccttctgcagaagtaaattttgccctgcagcacaaaag aggaaggaaagcaag-------------------ggaagggaaggggaggggaggaaggg aaggggaggggaggaaggcgaggtgaggggaggagaggtgaggggagggggggaggggag @@ -23,8 +23,8 @@ gaaagaaagaaaaaagaaagaaaggaaggagagaaagaaatgaaag--------agaaag aaagaaagaaagaaagaaaggagagaaaaaggaaaaacaaagaaagagaaaagaaggaga gaaggaaaggaaaaagaaagaaagaaggaaagaaagaaaaaagaaggaaagaaag----- -------------------aagaaagaaagaaagaaagaaagaaagaaagaaagaaagaa -agaaagaaagaaagaaaaagaaagagaaaggaaggaaggaaggaaggtcttaacaccttg ->p:HG002_1_chr20:23155478-23155958 +agaaagaaagaaagaaaaagaaagagaaaggaaggaaggaaggaaggtcttaacacctt +>p:HG002_1_chr20:23155478-23155957 tcatgattgatgcaggcagcacccttctgcagaagtaaattttgccctgcagcacaaaag aggaaggaaagcaag-------------------ggaagggaaggggaggggaggaaggg aaggggaggggaggaaggcgaggtgaggggaggagaggtgaggggagggggggaggggag @@ -36,8 +36,8 @@ gaaagaaagaaaaaagaaagaaaggaaggagagaaagaaatgaaag--------agaaag aaagaaagaaagaaagaaaggagagaaaaaggaaaaacaaagaaagagaaaagaaggaga gaaggaaaggaaaaagaaagaaagaaggaaagaaagaaaaaagaaggaaagaaag----- ---------------aagaaagaaagaaagaaagaaagaaagaaagaaagaaagaaagaa -agaaagaaagaaagaaaaagaaagagaaaggaaggaaggaaggaaggtcttaacaccttg ->p:HG002_2_chr20:23155478-23155958 +agaaagaaagaaagaaaaagaaagagaaaggaaggaaggaaggaaggtcttaacacctt +>p:HG002_2_chr20:23155478-23155957 tcatgattgatgcaggcagcacccttctgcagaagtaaattttgccctgcagcacaaaag aggaaggaaagcaagggaagggaaggggaggggaggaagggaaggggaggggaggaaggg aaggggaggggaggaaggcgaggtgaggggaggagaggtgaggggagggggggaggggag @@ -49,8 +49,8 @@ gaaagaaagaaaaaagaaagaaaggaaggagagaaagaaatgaaagagaaagaaagaaag aaagaaagaaagaaagaaaggagagaaaaaggaaaaacaaagaaagagaaaagaaggaga gaaggaaaggaaaaagaaagaaagaaggaaagaaagaaaaaagaaggaaagaaagaagaa agaaagaaagaaagaaagaaagaaagaaagaaagaaagaaagaaagaaagaaagaaagaa -agaaagaaagaaagaaaaagaaagagaaaggaaggaaggaaggaaggtcttaacaccttg ->ref_chr20:23155478-23155958 +agaaagaaagaaagaaaaagaaagagaaaggaaggaaggaaggaaggtcttaacacctt +>ref_chr20:23155478-23155957 tcatgattgatgcaggcagcacccttctgcagaagtaaattttgccctgcagcacaaaag aggaaggaaagcaag-------------------ggaagggaaggggaggggaggaaggg a----------------------------------------------------------- @@ -62,4 +62,4 @@ gaaagaaagaaaaaagaaagaaaggaaggagagaaagaaatgaaag--------agaaag aaagaaagaaagaaagaaaggagagaaaaaggaaaaacaaagaaagagaaaagaaggaga gaaggaaaggaaaaagaaagaaagaaggaaagaaagaaaaaagaaggaaagaaag----- -------aagaaagaaagaaagaaagaaagaaagaaagaaagaaagaaagaaagaaagaa -agaaagaaagaaagaaaaagaaagagaaaggaaggaaggaaggaaggtcttaacaccttg +agaaagaaagaaagaaaaagaaagagaaaggaaggaaggaaggaaggtcttaacacctt diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_8df6edffbb5bd8f31a6e7d88b05e6592.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_6fecb38e42c1fb0e27f26e0c7db37b3a.msa similarity index 92% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_8df6edffbb5bd8f31a6e7d88b05e6592.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_6fecb38e42c1fb0e27f26e0c7db37b3a.msa index 461d372c..12b6b8ba 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_8df6edffbb5bd8f31a6e7d88b05e6592.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_6fecb38e42c1fb0e27f26e0c7db37b3a.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:10802576-10803059 +>syndip_1_chr20:10802576-10803058 gcccaacttaatccagtgtgactccattttaaatcaattatatctgcaaagatctaattt tcaaatgaggtcacagtcacaagtattgggggttaggacttttgggggacataacccaca atgtcctctgaatatatatatatatatcctccc----------atatatatatatatcct @@ -7,8 +7,8 @@ ccc--atatatatatatatat--------------------------------------- tatatcctcccatatatatatctatcctccc----atatatatatatatatattttcaat gtatccctttattcaagtgattttaagcaatgaaagcaagacagctgttataggatttac ctttctagatggaaaaggacaagtaaataaggtcttaagctccatgtcaagaaaggggca -caac ->syndip_2_chr20:10802576-10803059 +caa +>syndip_2_chr20:10802576-10803058 gcccaacttaatccagtgtgactccattttaaatcaattatatctgcaaagatctaattt tcaaatgaggtcacagtcacaagtattgggggttaggacttttgggggacataacccaca atgtcctctgaatatatatatatatatcctcccatatatatatatatatatatatatcct @@ -17,8 +17,8 @@ cccatatatatatatatatatgtcctcccatatatatatatatatcctcccatatatata tatatcctcccatatatatatctatcctcccatatatatatatatatatatattttcaat gtatccctttattcaagtgattttaagcaatgaaagcaagacagctgttataggatttac ctttctagatggaaaaggacaagtaaataaggtcttaagctccatgtcaagaaaggggca -caac ->p:HG002_1_chr20:10802576-10803059 +caa +>p:HG002_1_chr20:10802576-10803058 gcccaacttaatccagtgtgactccattttaaatcaattatatctgcaaagatctaattt tcaaatgaggtcacagtcacaagtattgggggttaggacttttgggggacataacccaca atgtcctctgaatatatatatatatatcctcccatatatatatatatatatatatatcct @@ -27,8 +27,8 @@ cccatatatatatatatatatgtcctcccatatatatatatatatcctcccatatatata tatatcctcccatatatatatctatcctcccatatatatatatatatatatattttcaat gtatccctttattcaagtgattttaagcaatgaaagcaagacagctgttataggatttac ctttctagatggaaaaggacaagtaaataaggtcttaagctccatgtcaagaaaggggca -caac ->p:HG002_2_chr20:10802576-10803059 +caa +>p:HG002_2_chr20:10802576-10803058 gcccaacttaatccagtgtgactccattttaaatcaattatatctgcaaagatctaattt tcaaatgaggtcacagtcacaagtattgggggttaggacttttgggggacataacccaca atgtcctctgaatatatatatatatatcctccc----------atatatatatatatcct @@ -37,8 +37,8 @@ ccc--atatatatatatatat--------------------------------------- tatatcctcccatatatatatctatcctccc----atatatatatatatatattttcaat gtatccctttattcaagtgattttaagcaatgaaagcaagacagctgttataggatttac ctttctagatggaaaaggacaagtaaataaggtcttaagctccatgtcaagaaaggggca -caac ->ref_chr20:10802576-10803059 +caa +>ref_chr20:10802576-10803058 gcccaacttaatccagtgtgactccattttaaatcaattatatctgcaaagatctaattt tcaaatgaggtcacagtcacaagtattgggggttaggacttttgggggacataacccaca atgtcctctgaatatatatatatatatcctcccatatatatatatatatatatatatcct @@ -47,4 +47,4 @@ cccatatatatatatatatatgtcctcccatatatatatatatatcctcccatatatata tatatcctcccatatatatatctatcctcccatatatatatatatatatatattttcaat gtatccctttattcaagtgattttaagcaatgaaagcaagacagctgttataggatttac ctttctagatggaaaaggacaagtaaataaggtcttaagctccatgtcaagaaaggggca -caac +caa diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_05d37467dfbfe82df764ccf557e66822.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_71d3a3cbc7c7089cca081c650f00a2a6.msa similarity index 95% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_05d37467dfbfe82df764ccf557e66822.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_71d3a3cbc7c7089cca081c650f00a2a6.msa index 36b5fec8..ac4698d9 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_05d37467dfbfe82df764ccf557e66822.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_71d3a3cbc7c7089cca081c650f00a2a6.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:50775444-50776124 +>syndip_1_chr20:50775444-50776123 tgaacaaagtgaactaatagtagttcacgaaatctactggtttataatgtcacataccaa aggcttgtgtctttaaatctatctctagacttggtattctgctttctcattctgtctctc caaagatttcc--------------------atatatatatatatatatatataaatata @@ -12,8 +12,8 @@ taatatatatcatatatttatatattaatatatatcatatatttatatattaatatatat catatatttatatataaatatatatcatatatttatatataaatatatatctacattttt tgagacagagtctggctctgtcaccccagctggagtgcagtggtacgatctcagctcatt gcaacctccacctcccaggttcaagcgattctcgtgcctcagcctccatagtagctggaa -tt ->syndip_2_chr20:50775444-50776124 +t +>syndip_2_chr20:50775444-50776123 tgaacaaagtgaactaatagtagttcacgaaatctactggtttataatgtcacataccaa aggcttgtgtctttaaatctatctctagacttggtattctgctttctcattctgtctctc caaagatttccatatatatatatatatatatatatatatatatatatatatataaatata @@ -27,8 +27,8 @@ taatatatatcatatatttatatattaatatatatcatatatttatatattaatatatat catatatttatatataaatatatatcatatatttatatataaatatatatctacattttt tgagacagagtctggctctgtcaccccagctggagtgcagtggtacgatctcagctcatt gcaacctccacctcccaggttcaagcgattctcgtgcctcagcctccatagtagctggaa -tt ->p:HG002_1_chr20:50775444-50776124 +t +>p:HG002_1_chr20:50775444-50776123 tgaacaaagtgaactaatagtagttcacgaaatctactggtttataatgtcacataccaa aggcttgtgtctttaaatctatctctagacttggtattctgctttctcattctgtctctc caaagatttccatatatatatatatatatatatatatatatatatatatatataaatata @@ -42,8 +42,8 @@ taatatatatcatatatttatatattaatatatatcatatatttatatattaatatatat catatatttatatataaatatatatcatatatttatatataaatatatatctacattttt tgagacagagtctggctctgtcaccccagctggagtgcagtggtacgatctcagctcatt gcaacctccacctcccaggttcaagcgattctcgtgcctcagcctccatagtagctggaa -tt ->p:HG002_2_chr20:50775444-50776124 +t +>p:HG002_2_chr20:50775444-50776123 tgaacaaagtgaactaatagtagttcacgaaatctactggtttataatgtcacataccaa aggcttgtgtctttaaatctatctctagacttggtattctgctttctcattctgtctctc caaagatttcc--------------------atatatatatatatatatatataaatata @@ -57,8 +57,8 @@ taatatatatcatatatttatatattaatatatatcatatatttatatattaatatatat catatatttatatataaatatatatcatatatttatatataaatatatatctacattttt tgagacagagtctggctctgtcaccccagctggagtgcagtggtacgatctcagctcatt gcaacctccacctcccaggttcaagcgattctcgtgcctcagcctccatagtagctggaa -tt ->ref_chr20:50775444-50776124 +t +>ref_chr20:50775444-50776123 tgaacaaagtgaactaatagtagttcacgaaatctactggtttataatgtcacataccaa aggcttgtgtctttaaatctatctctagacttggtattctgctttctcattctgtctctc caaagatttccatatatatatatatatatatatatatatatatatatatatataaatata @@ -72,4 +72,4 @@ taatatatatcatatatttatatattaatatatatcatatatttatatattaatatatat catatatttatatataaatatatatcatatatttatatataaatatatatctacattttt tgagacagagtctggctctgtcaccccagctggagtgcagtggtacgatctcagctcatt gcaacctccacctcccaggttcaagcgattctcgtgcctcagcctccatagtagctggaa -tt +t diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_2cd0e934b9e98532ab4355e1842503c6.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_7253cbbea437a5ec4d1dc4d570629ba6.msa similarity index 83% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_2cd0e934b9e98532ab4355e1842503c6.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_7253cbbea437a5ec4d1dc4d570629ba6.msa index 220ba1f1..5d9682a7 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_2cd0e934b9e98532ab4355e1842503c6.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_7253cbbea437a5ec4d1dc4d570629ba6.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:35580586-35580857 +>syndip_1_chr20:35580586-35580856 gcagtgagccgagatcgcgccactgcagcctaggcagcagagtgagactctgtcttaaaa tatatattatggtttattttattttatggttaaaggccatatatatatatatat-atata tatataatatatatatatataaaatatatatatataaaatatatatatatataaaatata @@ -8,30 +8,30 @@ ataaaatatatatatataaaatatatatatataaaatatatatatatataaaatatatat atataaaatatatatatataaaatatatatatatataatatatatatataaaatatatat atat--aaaatatatataaaatatatatatataaaatatatatataatatatatatataa aatatatatatataatatatatatataaaatatatatatataaaatatatatatatataa -aatatatatatatataaaatatatatatg ->syndip_2_chr20:35580586-35580857 +aatatatatatatataaaatatatatat +>syndip_2_chr20:35580586-35580856 gcagtgagccgagatcgcgccactgcagcctaggcagcagagtgagactctgtcttaaaa tatatattatggtttattttattttatggttaaaggccatatatatat---------ata tatatatataaaatatatataaaatatatatatata---------------------ata tatatataaaatatatatatat------aatatatatatataaaatatatatatataaaa -tatatatatatataaaatatatatatatataaaatatatatatg---------------- +tatatatatatataaaatatatatatatataaaatatatatat----------------- ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------ ->p:HG002_1_chr20:35580586-35580857 +---------------------------- +>p:HG002_1_chr20:35580586-35580856 gcagtgagccgagatcgcgccactgcagcctaggcagcagagtgagactctgtcttaaaa tatatattatggtttattttattttatggttaaaggccatatatat-------------- ------------atatatataaaatatatatatataaaatatatatat------aatata tatatataaaatatatatatat------aatatatatatataaaatatatatatataaaa -tatatatatatataaaatatatatatatataaaatatatatatg---------------- +tatatatatatataaaatatatatatatataaaatatatatat----------------- ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------ ->p:HG002_2_chr20:35580586-35580857 +---------------------------- +>p:HG002_2_chr20:35580586-35580856 gcagtgagccgagatcgcgccactgcagcctaggcagcagagtgagactctgtcttaaaa tatatattatggtttattttattttatggttaaaggccatatatatatatatataaaata tatatatataatatatatataaaatatatatatataaaatatatatatatataaaatata @@ -41,15 +41,15 @@ ataaaatatatatatataaaatatatatatataaaatatatatatatataaaatatatat atataaaatatatatatataaaatatatatatatataatatatatatataaaatatatat atataaaaaatatatataaaatatatatatataaaatatatatataatatatatatataa aatatatatatataatatatatatataaaatatatatatataaaatatatatatatataa -aatatatatatatataaaatatatatatg ->ref_chr20:35580586-35580857 +aatatatatatatataaaatatatatat +>ref_chr20:35580586-35580856 gcagtgagccgagatcgcgccactgcagcctaggcagcagagtgagactctgtcttaaaa tatatattatggtttattttattttatggttaaaggccatatatatatatatataaaata tatatatataaaatatatataaaatatatatatataaaatatatatat------aatata tatatataaaatatatatatat------aatatatatatataaaatatatatatataaaa -tatatatatatataaaatatatatatatataaaatatatatatg---------------- +tatatatatatataaaatatatatatatataaaatatatatat----------------- ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------ +---------------------------- diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_56e2cc0318de6e4a5452f63745c5e250.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_7345416d874be507c2785df6e2e9361d.msa similarity index 92% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_56e2cc0318de6e4a5452f63745c5e250.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_7345416d874be507c2785df6e2e9361d.msa index 48276576..21091b0c 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_56e2cc0318de6e4a5452f63745c5e250.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_7345416d874be507c2785df6e2e9361d.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:35539020-35539825 +>syndip_1_chr20:35539020-35539824 tttttgtatttttagtagagacagggtttcaccatgttagccaggatggtcttgatctcc tgacctcgtgatccgcccgcctcggcctcccaaagtgctgggattacaggcgtgagccac agcgcccggcctatgttttacatatataacaatttatatataactacatatataatatat @@ -12,8 +12,8 @@ atataacagtttatatatataatatatataacagtttatatatataatatatataagttt atatatataatatatataacagtttatatatattatatatatatatatgcacacacacaa agagagagagagagactcgctctgtcgcccaggctgtagtgcagtggtgtaatcacagct cactatgaggcaggagaacagggaatcagggtaaccgggggttaagacataagcaaatga -aggggtgcagccagttctaggcagca ->syndip_2_chr20:35539020-35539825 +aggggtgcagccagttctaggcagc +>syndip_2_chr20:35539020-35539824 tttttgtatttttagtagagacagggtttcaccatgttagccaggatggtcttgatctcc tgacctcgtgatccgcccgcctcggcctcccaaagtgctgggattacaggcgtgagccac agcgcccggcctatgttttacatatataacaatttatatataactacatatataatatat @@ -27,8 +27,8 @@ atataacagtttatatatataatatatataacagtttatatatataatatatataagttt atatatataatatatataacagtttatatatattatatatatatatatgcacacacacaa agagagagagagagactcgctctgtcgcccaggctgtagtgcagtggtgtaatcacagct cactatgaggcaggagaacagggaatcagggtaaccgggggttaagacataagcaaatga -aggggtgcagccagttctaggcagca ->p:HG002_1_chr20:35539020-35539825 +aggggtgcagccagttctaggcagc +>p:HG002_1_chr20:35539020-35539824 tttttgtatttttagtagagacagggtttcaccatgttagccaggatggtcttgatctcc tgacctcgtgatccgcccgcctcggcctcccaaagtgctgggattacaggcgtgagccac agcgcccggcctatgttttacatatataacaatttatatataactacatatataatatat @@ -42,8 +42,8 @@ atataacagtttatatatataatatatataacagtttatatatataatatatataagttt atatatataatatatataacagtttatatatattatatatatatatatgcacacacacaa agagagagagagagactcgctctgtcgcccaggctgtagtgcagtggtgtaatcacagct cactatgaggcaggagaacagggaatcagggtaaccgggggttaagacataagcaaatga -aggggtgcagccagttctaggcagca ->p:HG002_2_chr20:35539020-35539825 +aggggtgcagccagttctaggcagc +>p:HG002_2_chr20:35539020-35539824 tttttgtatttttagtagagacagggtttcaccatgttagccaggatggtcttgatctcc tgacctcgtgatccgcccgcctcggcctcccaaagtgctgggattacaggcgtgagccac agcgcccggcctatgttttacatatataacaatttatatataactacatatataatatat @@ -57,8 +57,8 @@ atataacagtttatatatataatatatataacagtttatatatataatatatataagttt atatatataatatatataacagtttatatatattatatatatatatatgcacacacacaa agagagagagagagactcgctctgtcgcccaggctgtagtgcagtggtgtaatcacagct cactatgaggcaggagaacagggaatcagggtaaccgggggttaagacataagcaaatga -aggggtgcagccagttctaggcagca ->ref_chr20:35539020-35539825 +aggggtgcagccagttctaggcagc +>ref_chr20:35539020-35539824 tttttgtatttttagtagagacagggtttcaccatgttagccaggatggtcttgatctcc tgacctcgtgatccgcccgcctcggcctcccaaagtgctgggattacaggcgtgagccac agcgcccggcctatgttttacatatataacaatttatatataactacatatataatatat @@ -72,4 +72,4 @@ atataacagtttatatatataaaatatataacagtttatatatataatatatataagttt atatatataatatatataacagtttatatatattatatatatatatatgcacacacacaa agagagagagagagactcgctctgtcgcccaggctgtagtgcagtggtgtaatcacagct cactatgaggcaggagaacagggaatcagggtaaccgggggttaagacataagcaaatga -aggggtgcagccagttctaggcagca +aggggtgcagccagttctaggcagc diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_756778e4a53861c2163701e9917f201a.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_76652f704e7f5d8065ba92e110437e4d.msa similarity index 95% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_756778e4a53861c2163701e9917f201a.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_76652f704e7f5d8065ba92e110437e4d.msa index 1f163e65..9b27e60f 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_756778e4a53861c2163701e9917f201a.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_76652f704e7f5d8065ba92e110437e4d.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:62057502-62058869 +>syndip_1_chr20:62057502-62058868 tacctggctacgctcctctcacaactttgagatcaagatgccacgtgtgccttcctcaca tcggcgctttccagtgatgcccagtgggggacacaaagggcccagcaagacctcaggaag gacggacacaggcctacacaccaggccacagacaatggggctcggggggccaacctcagc @@ -47,8 +47,8 @@ accaggccacagacaatggggctc-gggggccaacctcagcaagacctcaggaaggacgg acacaggtctacacaccaggccacagacaatggggctcggggggccaacctcagcaagac ctcaggaaggacggacacaggtctacacaccaggccacagacaatggggctcggggggcc aacctcagcaagacctcaggaaggacggacacaggtctacacaccaggccacagacaatg -gggctcgggggccaacctcagcaagacctcagga ->syndip_2_chr20:62057502-62058869 +gggctcgggggccaacctcagcaagacctcagg +>syndip_2_chr20:62057502-62058868 tacctggctacgctcctctcacaactttgagatcaagatgccacgtgtgccttcctcaca tcggcgctttccagtgatgcccagtgggggacacaaagggcccagcaagacctcaggaag gacggacacaggtctacacaccaggccacagacaatggggctc-gggggccaacctcagc @@ -57,7 +57,7 @@ gggccaacctcagcaagacctcaggaaggacggacacaggtctacacaccaggccacaga caatggggctc-gggggccaacctcagcaagacctcaggaaggacggacacaggcctaca caccaggccacagacaatggggctcgggggccaacctcagcaagacctcaggaaggacgg acacaggtctacacaccaggccacagacaatggggctc-gggggccaacctcagcaagac -ctcagga----------------------------------------------------- +ctcagg------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ @@ -97,8 +97,8 @@ ctcagga----------------------------------------------------- ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ ----------------------------------- ->p:HG002_1_chr20:62057502-62058869 +--------------------------------- +>p:HG002_1_chr20:62057502-62058868 tacctggctacgctcctctcacaactttgagatcaagatgccacgtgtgccttcctcaca tcggcgctttccagtgatgcccagtgggggacacaaagggcccagcaagacctcaggaag gacggacacaggtctacacaccaggccacagacaatggggctc-gggggccaacctcagc @@ -107,7 +107,7 @@ gggccaacctcagcaagacctcaggaaggacggacacaggcctacacaccaggccacaga caatggggctcggggggccaacctcagcaagacctcaggaaggacggacacaggcctaca caccaggccacagacaatggggctcgggggccaacctcagcaagacctcaggaaggacgg acacaggtctacacaccaggccacagacaatggggctc-gggggccaacctcagcaagac -ctcagga----------------------------------------------------- +ctcagg------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ @@ -147,8 +147,8 @@ ctcagga----------------------------------------------------- ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ ----------------------------------- ->p:HG002_2_chr20:62057502-62058869 +--------------------------------- +>p:HG002_2_chr20:62057502-62058868 tacctggctacgctcctctcacaactttgagatcaagatgccacgtgtgccttcctcaca tcggcgctttccagtgatgcccagtgggggacacaaagggcccagcaagacctcaggaag gacggacacaggtctacacaccaggccacagacaatggggctcggggggccaacctcagc @@ -197,8 +197,8 @@ accaggccacagacaatggggctcggggggccaacctcagcaagacctcaggaaggacgg acacaggcctacacaccaggccacagacaatggggctcggggggccaacctcagcaagac ctcaggaaggacggacacaggcctacacaccaggccacagacaatggggctc-gggggcc aacctcagcaagacctcaggaaggacggacacaggtctacacaccaggccacagacaatg -gggctcgggggccaacctcagcaagacctcagga ->ref_chr20:62057502-62058869 +gggctcgggggccaacctcagcaagacctcagg +>ref_chr20:62057502-62058868 tacctggctacgctcctctcacaactttgagatcaagatgccacgtgtgccttcctcaca tcggcgctttccagtgatgcccagtgggggacacaaagggcccagcaagacctcaggaag gacggacacaggcctacacaccaggccacagacaatggggctcggggggccaacctcagc @@ -207,9 +207,9 @@ gggccaacctcagcaagacctcaggaaggacggacacaggcctacacaccaggccacaga caatggggctcggggggccaacctcagcaagacctcaggaaggacggacacaggtctaca caccaggccacagacaatggggctcgggggccaacctcagcaagacctcaggaaggacgg acacaggtctacacaccaggccacagacaatggggctcggggggccaacctcagcaagac -ctcagga----------------------------------------------------- +ctcagg------------------------------------------------------ ------------------------------------------------------------ -----------------------------------aggacggacacaggcctacacaccag +---------------------------------aaggacggacacaggcctacacaccag gccacagacaatggggctcgggggccaacctcagcaagacctcaggaaggacggacacag gtctacacaccaggccacagacaatggggctcgggggccaacctcagcaagacctcagga aggacggacacaggtctacacaccaggccacagacaatggggctcggggggccaacctca @@ -224,7 +224,7 @@ gggctcggggggccaacctcagcaagacctcaggaaggacggacacaggcctacacacca ggccacagacaatggggctcggggggccaacctcagcaagacctcaggaaggacggacac aggcctacacaccaggccacagacaatggggctcgggggccaacctcagcaagacctcag gaaggacggacacaggtctacacaccaggccacagacaatggggctcgggggccaacctc -agcaagacctcagga--------------------------------------------- +agcaagacctcagg---------------------------------------------- ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ @@ -247,4 +247,4 @@ agcaagacctcagga--------------------------------------------- ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ ----------------------------------- +--------------------------------- diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_0c9dec1fcbfa8df4c43412bac17355db.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_7757219ce7f19ac23c5789d167c1c707.msa similarity index 95% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_0c9dec1fcbfa8df4c43412bac17355db.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_7757219ce7f19ac23c5789d167c1c707.msa index b856638b..fb485d2f 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_0c9dec1fcbfa8df4c43412bac17355db.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_7757219ce7f19ac23c5789d167c1c707.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:61282406-61283770 +>syndip_1_chr20:61282406-61283769 ataaaacattctgtattttcaaaatgccaaaaaaattattgttaaagactttagaaaaca gaaaagtacaaagaataaaagggtaacttcatttccctcctctgaacaaaccacttttaa tctttggcatggtgtgtgtg--tgtgcatgtgtgtgtgtgtgtgtgtatgtctatctgta @@ -26,8 +26,8 @@ tgtgcgtttgcacgcgtgtgctgtggtgtgtgatgtaggtgcatttacacgcgtgtgctg tggtgtgtgatgtaggtgcatttgcacgcgtgtgctgtggtgtgtgtgatgtgtgtgcgt ttgcacgtgtgtgctgtggtgtgtgtgtggtgagtggatgggaggatatttccagtgctc tcctcctgggactgatgccatgatgaaaatctgtgtaaaccagacagcacatctctcatc -atttctttgtgatgggtcattagaagcaggattctgaatt ->syndip_2_chr20:61282406-61283770 +atttctttgtgatgggtcattagaagcaggattctgaat +>syndip_2_chr20:61282406-61283769 ataaaacattctgtattttcaaaatgccaaaaaaattattgttaaagactttagaaaaca gaaaagtacaaagaataaaagggtaacttcatttccctcctctgaacaaaccacttttaa tctttggcatggtgtgtgtg--tgtgcatgtgtgtgtgtgtgtgtgtatgtctatctgta @@ -55,8 +55,8 @@ ggtgcgtttgcacgcgtgtgctgtggtgtgtgatgtaggtgcgtttgcacgcgtgtgctg tggtgtgtgatgtaggtgcatttgcacgtgtgtgctgtggtgtgtgtgatgtgtgtgcat ttgcacgtgtgtgctgtggtgtgtgtgtggtgagtggatgggaggatatttccagtgctc tcctcctgggactgatgccatgatgaaaatctgtgtaaaccagacagcacatctctcatc -atttctttgtgatgggtcattagaagcaggattctgaatt ->p:HG002_1_chr20:61282406-61283770 +atttctttgtgatgggtcattagaagcaggattctgaat +>p:HG002_1_chr20:61282406-61283769 ataaaacattctgtattttcaaaatgccaaaaaaattattgttaaagactttagaaaaca gaaaagtacaaagaataaaagggtaacttcatttccctcctctgaacaaaccacttttaa tctttggcatggtgtgtgtg--tgtgcatgtgtgtgtgtgtgtgtgtatgtctatctgta @@ -84,8 +84,8 @@ ggtgcgtttgcacgcgtgtgctgtggtgtgtgatgtaggtgcgtttgcacgcgtgtgctg tggtgtgtgatgtaggtgcatttgcacgtgtgtgctgtggtgtgtgtgatgtgtgtgcat ttgcacgtgtgtgctgtggtgtgtgtgtggtgagtggatgggaggatatttccagtgctc tcctcctgggactgatgccatgatgaaaatctgtgtaaaccagacagcacatctctcatc -atttctttgtgatgggtcattagaagcaggattctgaatt ->p:HG002_2_chr20:61282406-61283770 +atttctttgtgatgggtcattagaagcaggattctgaat +>p:HG002_2_chr20:61282406-61283769 ataaaacattctgtattttcaaaatgccaaaaaaattattgttaaagactttagaaaaca gaaaagtacaaagaataaaagggtaacttcatttccctcctctgaacaaaccacttttaa tctttggcatggtgtgtgtg--tgtgcatgtgtgtgtgtgtgtgtgtatgtctatctgta @@ -113,8 +113,8 @@ tgtgcgtttgcacgcgtgtgctgtggtgtgtgatgtaggtgcatttacacgcgtgtgctg tggtgtgtgatgtaggtgcatttgcacgcgtgtgctgtggtgtgtgtgatgtgtgtgcgt ttgcacgtgtgtgctgtggtgtgtgtgtggtgagtggatgggaggatatttccagtgctc tcctcctgggactgatgccatgatgaaaatctgtgtaaaccagacagcacatctctcatc -atttctttgtgatgggtcattagaagcaggattctgaatt ->ref_chr20:61282406-61283770 +atttctttgtgatgggtcattagaagcaggattctgaat +>ref_chr20:61282406-61283769 ataaaacattctgtattttcaaaatgccaaaaaaattattgttaaagactttagaaaaca gaaaagtacaaagaataaaagggtaacttcatttccctcctctgaacaaaccacttttaa tctttggcatggtgtgtgtgcatgtgtgtgtgtgtgtgtgtgtgtgtatgtctatctgta @@ -142,4 +142,4 @@ ggtgcatttgcacgcgtgtgctgtggtgtgtgatgtaggtgcatttacacgcgtgtgctg tggtgtgtgatgtaggtgcatttgcacgtgtgtgctgtggtgtgtgtgatgtgtgtgcat ttgcacgtgtgtgctgtggtgtgtgtgtggtgagtggatgggaggatatttccagtgctc tcctcctgggactgatgccatgatgaaaatctgtgtaaaccagacagcacatctctcatc -atttctttgtgatgggtcattagaagcaggattctgaatt +atttctttgtgatgggtcattagaagcaggattctgaat diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_bbd528e31e31fa673aa3dd6ff106fa0b.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_783ef04c1562bdfce45b91b59fe4d678.msa similarity index 94% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_bbd528e31e31fa673aa3dd6ff106fa0b.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_783ef04c1562bdfce45b91b59fe4d678.msa index 69a6d6b4..286d2d50 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_bbd528e31e31fa673aa3dd6ff106fa0b.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_783ef04c1562bdfce45b91b59fe4d678.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:641736-642566 +>syndip_1_chr20:641736-642565 atcatccggtgacagggatcatcatccctgcctggcatagaaacccggacatcggaccct gccctcccgctgcttccgcagggacagggggcagccagcagagcctccctctgggggtgc cccccaatctggttgcctgggaggggggcccagcgggggtggagttgcctgggggggggg @@ -22,8 +22,8 @@ ggggcccagcgggggtggagttgcctg-gggggggcccagcgggggtggagttgcctgtg ggggggggcccagcgggggtggagttgcctggggggggcccagcgggggtggagttgcct gggggggggcccagcgggggtggagcaccctggacggccccgcggagccccctgcgggag gcgagggcggccctcgggctctggaaggggccgcgcggcctggaagacattacacggctg -tcgggcggggggcggcgctcgcagttccggaggggg ->syndip_2_chr20:641736-642566 +tcgggcggggggcggcgctcgcagttccggagggg +>syndip_2_chr20:641736-642565 atcatccggtgacagggatcatcatccctgcctggcatagaaacccggacatcggaccct gccctcccgctgcttccgcagggacagggggcagccagcagagcctccctctgggggtgc cccccaatctggttgcctgggacgggggcccagcgggggtggagttgcct-ggggggggg @@ -47,8 +47,8 @@ ggggcccagcgggggtggagttgcctgtgggggggcccagcgggggtggagttgcctgtg ggggggggcccagcgggggtggagttgcctggggggggcccagcgggggtggagttgcct -ggggggggcccagcgggggtggagcaccctggacggccccgcggagccccctgcgggag gcgagggcggccctcgggctctggaaggggccgcgcggcctggaagacattacacggctg -tcgggcggggggcggcgctcgcagttccggaggggg ->p:HG002_1_chr20:641736-642566 +tcgggcggggggcggcgctcgcagttccggagggg +>p:HG002_1_chr20:641736-642565 atcatccggtgacagggatcatcatccctgcctggcatagaaacccggacatcggaccct gccctcccgctgcttccgcagggacagggggcagccagcagagcctccctctgggggtgc cccccaatctggttgcctgggacgggggcccagcgggggtggagttgcct-ggggggggg @@ -72,8 +72,8 @@ ggggcccagcgggggtggagttgcctgtgggggggcccagcgggggtggagttgcctgtg ggggggggcccagcgggggtggagttgcctggggggggcccagcgggggtggagttgcct -ggggggggcccagcgggggtggagcaccctggacggccccgcggagccccctgcgggag gcgagggcggccctcgggctctggaaggggccgcgcggcctggaagacattacacggctg -tcgggcggggggcggcgctcgcagttccggaggggg ->p:HG002_2_chr20:641736-642566 +tcgggcggggggcggcgctcgcagttccggagggg +>p:HG002_2_chr20:641736-642565 atcatccggtgacagggatcatcatccctgcctggcatagaaacccggacatcggaccct gccctcccgctgcttccgcagggacagggggcagccagcagagcctccctctgggggtgc cccccaatctggttgcctgggaggggggcccagcgggggtggagttgcctgggggggggg @@ -97,8 +97,8 @@ ggggcccagcgggggtggagttgcctg-gggggggcccagcgggggtggagttgcctgtg ggggggggcccagcgggggtggagttgcctggggggggcccagcgggggtggagttgcct gggggggggcccagcgggggtggagcaccctggacggccccgcggagccccctgcgggag gcgagggcggccctcgggctctggaaggggccgcgcggcctggaagacattacacggctg -tcgggcggggggcggcgctcgcagttccggaggggg ->ref_chr20:641736-642566 +tcgggcggggggcggcgctcgcagttccggagggg +>ref_chr20:641736-642565 atcatccggtgacagggatcatcatccctgcctggcatagaaacccggacatcggaccct gccctcccgctgcttccgcagggacagggggcagccagcagagcctccctctgggggtgc cccccaatctggttgcctgggacgggggcccagcgggggtggagttgcct-ggggggggg @@ -114,7 +114,7 @@ gggg----ccagcgggggtggagttgcctgggggggggccagcgggggtggagttgcctg tgggggggggccagcgggggtggagttgcctgggggggg----ccagcgggggtggagtt gcctgggggggg----ccagcgggggtggagcaccctggacggcccc--------gcgga gccccctgcgggaggcgagggcggccctcgggctctggaaggggccgcgcggcctggaag -acattacacggctgtcgggcggggggcggcgctcgcagttccggaggggg---------- +acattacacggctgtcgggcggggggcggcgctcgcagttccggagggg----------- ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ @@ -122,4 +122,4 @@ acattacacggctgtcgggcggggggcggcgctcgcagttccggaggggg---------- ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------- +----------------------------------- diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_8b5baa0cc6e1948c19969dc20da6a9e8.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_786136b3c2690bd2db0b1ed0059f1abd.msa similarity index 86% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_8b5baa0cc6e1948c19969dc20da6a9e8.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_786136b3c2690bd2db0b1ed0059f1abd.msa index 74fd3557..9be2c94a 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_8b5baa0cc6e1948c19969dc20da6a9e8.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_786136b3c2690bd2db0b1ed0059f1abd.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:2240860-2241391 +>syndip_1_chr20:2240860-2241390 ctcagtgttgttatctacaaatgggagaatcaaggcaccctcctacaagcggcggctcat cagctcacttggaaaatgtagtcatcttctcaaagttttctttttctttctttctttttc ctttctttctttctttttctttctttctctctctctccttccttccttctttccttccct @@ -9,8 +9,8 @@ cttccttcttccctccctccctctttgtttctttctttctctccctc------------- ------------------------------------------------------------ ----------------------cttccctccctccctttctttctcctt----------- ttcttccttccctccctgcctgtctccctctttctttctcttttttcttttccttccttc -cgtctttttcctcccttctttcctccctcctccctccctccctcctccctctttttcttt ->syndip_2_chr20:2240860-2241391 +cgtctttttcctcccttctttcctccctcctccctccctccctcctccctctttttctt +>syndip_2_chr20:2240860-2241390 ctcagtgttgttatctacaaatgggagaatcaaggcaccctcctacaagcggcggctcat cagctcacttggaaaatgtagtcatcttctcaaagttttctt------------------ -tttctttctttctttttctttctttctctctctctccttccttccttctttccttccct @@ -21,8 +21,8 @@ cttccttcttccctccctccctctttgtttctttctttctctccctccttccctccctcc ctttcttccttctttccttatccctcccttctttccttccttccctttgtttctctttct ttctttctttctctctctcttccttccttccctccctccctccctccttcctcccttctt tccttccctccctgcctctctgtctccctctttctttctcttttttcttttccttccttc -cgtctttttcctcccttctttcctccctcctccctccctccctcctccctctttttcttt ->p:HG002_1_chr20:2240860-2241391 +cgtctttttcctcccttctttcctccctcctccctccctccctcctccctctttttctt +>p:HG002_1_chr20:2240860-2241390 ctcagtgttgttatctacaaatgggagaatcaaggcaccctcctacaagcggcggctcat cagctcacttggaaaatgtagtcatcttctcaaagttttctttttctttctttctttttc ctttctttctttctttttctttctttctctctctctccttccttccttctttccttccct @@ -33,8 +33,8 @@ tttcttctttcttttcctccttcctcccttctttccttcccttcctt------------- ------------------------------------------------------------ ----------------------cttccctccctccctctttgtttctttctttctctccc tccttccctccctccctttctgtctccctctttctttctcttttttcttttccttccttc -cgtctttttcctcccttctttcctccctcctccctccctccctcctccctctttttcttt ->p:HG002_2_chr20:2240860-2241391 +cgtctttttcctcccttctttcctccctcctccctccctccctcctccctctttttctt +>p:HG002_2_chr20:2240860-2241390 ctcagtgttgttatctacaaatgggagaatcaaggcaccctcctacaagcggcggctcat cagctcacttggaaaatgtagtcatcttctcaaagttttctttttctttctttctttttc ctttctttctttctttttctttctttctctctctctccttccttccttctttccttccct @@ -45,8 +45,8 @@ tttcttctttcttttcctccttcctcccttctttccttcccttcctt------------- ------------------------------------------------------------ ----------------------cttccctccctccctctttgtttctttctttctctccc tccttccctccctccctttctgtctccctctttctttctcttttttcttttccttccttc -cgtctttttcctcccttctttcctccctcctccctccctccctcctccctctttttcttt ->ref_chr20:2240860-2241391 +cgtctttttcctcccttctttcctccctcctccctccctccctcctccctctttttctt +>ref_chr20:2240860-2241390 ctcagtgttgttatctacaaatgggagaatcaaggcaccctcctacaagcggcggctcat cagctcacttggaaaatgtagtcatcttctcaaagttttctttttctttctttctttttc ctttctttctttctttttctttctttctctctctctccttccttccttctttccttccct @@ -57,4 +57,4 @@ tttcttctttcttttcctccttcctcccttctttccttcccttcctt------------- ------------------------------------------------------------ ----------------------cttccctccctccctctttgtttctttctttctctccc tccttccctccctccctttctgtctccctctttctttctcttttttcttttccttccttc -cgtctttttcctcccttctttcctccctcctccctccctccctcctccctctttttcttt +cgtctttttcctcccttctttcctccctcctccctccctccctcctccctctttttctt diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_a4866977af1cc1005cd3c013448bd14b.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_7bb50c57d657828978076072c80f8a1f.msa similarity index 95% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_a4866977af1cc1005cd3c013448bd14b.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_7bb50c57d657828978076072c80f8a1f.msa index 0f621360..79860f7c 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_a4866977af1cc1005cd3c013448bd14b.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_7bb50c57d657828978076072c80f8a1f.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:18209039-18210235 +>syndip_1_chr20:18209039-18210234 tacagttatttccaaataaaaaataagcagaaaggaaaagctaatcatctccccctattc cagtcccccaggcatgtatccttccgtagcttctccacacctacacgaacctagacgatt ccttccacaccttccccacacccacacgaacctagacgattccttccacaccttccccac @@ -24,8 +24,8 @@ acacgaacctagacaattccttccacaccttctcacacccacacaaacctagatgaattc cttccacaccttccccacacccacacagacctagacgaattccttccacaccttct---- -------------------------------------ccacacccacacgaacctagaca attccttgcacaccttctcacacccacacaaacctagacgaattccttccacaccttctc -cacacccacacaaaccta ->syndip_2_chr20:18209039-18210235 +cacacccacacaaacct +>syndip_2_chr20:18209039-18210234 tacagttatttccaaataaaaaataagcagaaaggaaaagctaatcatctccccctattc cagtcccccaggcatgtatccttccgtagcttctccacacctacacgaacctagacgatt ccttccacaccttccccacacccacacgaacctagacgattccttccacaccttccccac @@ -51,8 +51,8 @@ acacgaacctagacaattccttccacaccttctcacacccacacaaacctagatgaattc cttccacaccttccccacacccacacagacctagacgaattccttccacaccttct---- -------------------------------------ccacacccacacgaacctagaca attccttgcacaccttctcacacccacacaaacctagacgaattccttccacaccttctc -cacacccacacaaaccta ->p:HG002_1_chr20:18209039-18210235 +cacacccacacaaacct +>p:HG002_1_chr20:18209039-18210234 tacagttatttccaaataaaaaataagcagaaaggaaaagctaatcatctccccctattc cagtcccccaggcatgtatccttccgtagcttctccacacctacacgaacctagacgatt ccttccacaccttccccacacccacacgaacctagacgattccttccacaccttccccac @@ -78,8 +78,8 @@ acacgaacctagacaattccttccacaccttcccacacccacacagacctagacgaattc cttccacaccttctccacacccacacgaacctagacg-attccttccacaccttcc---- -------------------------------------ccacacccacacgaacctagaca attccttgcacaccttctcacacccacacaaacctagacgaattccttccacaccttctc -cacacccacacaaaccta ->p:HG002_2_chr20:18209039-18210235 +cacacccacacaaacct +>p:HG002_2_chr20:18209039-18210234 tacagttatttccaaataaaaaataagcagaaaggaaaagctaatcatctccccctattc cagtcccccaggcatgtatccttccgtagcttctccacacctacacgaacctagacgatt ccttccacaccttccccacacccacacgaacctagacgattccttccacaccttccccac @@ -105,8 +105,8 @@ acacgaacctagacaattccttccacaccttctcacacccacacaaacctagatgaattc cttccacaccttccccacacccacacagacctagacgaattccttccacaccttctccac acccacacgaacctagacgattccttccacaccttccccacacccacacgaacctagaca attccttgcacaccttctcacacccacacaaacctagacgaattccttccacaccttctc -cacacccacacaaaccta ->ref_chr20:18209039-18210235 +cacacccacacaaacct +>ref_chr20:18209039-18210234 tacagttatttccaaataaaaaataagcagaaaggaaaagctaatcatctccccctattc cagtcccccaggcatgtatccttccgtagcttctccacacct------------------ -----------------------acacgaacctagacgattccttccacaccttccccac @@ -129,7 +129,7 @@ ccttccccacacccacacag---------------------------------------- -acctagacgaattccttccacaccttctccacacccacacgaacctagacg-attcctt ccacaccttccccacac-----------------------------------------cc acacgaacctagacaattccttgcacaccttctcacacccacacaaacctagacgaattc -cttccacaccttctccacacccacacaaaccta--------------------------- +cttccacaccttctccacacccacacaaacct---------------------------- ------------------------------------------------------------ ------------------------------------------------------------ ------------------- +----------------- diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_861c06fcaf8a2444ad0e8a092dbbb547.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_7ca10eb32dd50e600280f16b4afb82f5.msa similarity index 90% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_861c06fcaf8a2444ad0e8a092dbbb547.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_7ca10eb32dd50e600280f16b4afb82f5.msa index 0aa2b622..7f787dda 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_861c06fcaf8a2444ad0e8a092dbbb547.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_7ca10eb32dd50e600280f16b4afb82f5.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:57948698-57949591 +>syndip_1_chr20:57948698-57949590 cggagcgggtgtgccccgtggtctctcttcactgttcttaagacattcacaacctcattt tgatggtgggacgctgcagctcacgtctgagtcccatccaagacttggcccactgctgtg ccctgcacagatggacagatggttggaa----ggatggatggatggatggatggatggat @@ -13,8 +13,8 @@ gaatggatgatggatggatgaatgagatggatggatt------------gatggatggat ggatggatggaagaatggatgatggatggatgaatgagatgggtggattgatggatggat ggatggatggatggatggatggatggatggatggcagatggatggatccatctatgtcag gcctgcatgacacctggtgtggaggagccacccagtcaacatttgctgacaggatggaca -gacagagaggaggtgagtgacaggggccctcaggtgctttttctttggagacctgggc ->syndip_2_chr20:57948698-57949591 +gacagagaggaggtgagtgacaggggccctcaggtgctttttctttggagacctggg +>syndip_2_chr20:57948698-57949590 cggagcgggtgtgccccgtggtctctcttcactgttcttaagacattcacaacctcattt tgatggtgggacgctgcagctcacgtctgagtcccatccaagacttggcccactgctgtg ccctgcacagatggacagatggttggaaggatggatggatggatggatggatggatggat @@ -29,8 +29,8 @@ gaatggatgatggatggatgaatgagatggatggattgatggatggatggatggatggat ggatggatggaagaatggatgatggatggatgaatgagatgggtggatt----gatggat ggatggatggatggatggatggatggatggatggcagatggatggatccatctatgtcag gcctgcatgacacctggtgtggaggagccacccagtcaacatttgctgacaggatggaca -gacagagaggaggtgagtgacaggggccctcaggtgctttttctttggagacctgggc ->p:HG002_1_chr20:57948698-57949591 +gacagagaggaggtgagtgacaggggccctcaggtgctttttctttggagacctggg +>p:HG002_1_chr20:57948698-57949590 cggagcgggtgtgccccgtggtctctcttcactgttcttaagacattcacaacctcattt tgatggtgggacgctgcagctcacgtctgagtcccatccaagacttggcccactgctgtg ccctgcacagatggacagatggttggaaggatggatggatggatggatggatggatggat @@ -45,8 +45,8 @@ gaatggatgatggatggatgaatgagatggatggattgatggatggatggatggatggat ggatggatggaagaatggatgatggatggatgaatgagatgggtggatt----gatggat ggatggatggatggatggatggatggatggatggcagatggatggatccatctatgtcag gcctgcatgacacctggtgtggaggagccacccagtcaacatttgctgacaggatggaca -gacagagaggaggtgagtgacaggggccctcaggtgctttttctttggagacctgggc ->p:HG002_2_chr20:57948698-57949591 +gacagagaggaggtgagtgacaggggccctcaggtgctttttctttggagacctggg +>p:HG002_2_chr20:57948698-57949590 cggagcgggtgtgccccgtggtctctcttcactgttcttaagacattcacaacctcattt tgatggtgggacgctgcagctcacgtctgagtcccatccaagacttggcccactgctgtg ccctgcacagatggacagatggttggaa----ggatggatggatggatggatggatggat @@ -61,8 +61,8 @@ gaatggatgatggatggatgaatgagatggatggatt------------gatggatggat ggatggatggaagaatggatgatggatggatgaatgagatgggtggattgatggatggat ggatggatggatggatggatggatggatggatggcagatggatggatccatctatgtcag gcctgcatgacacctggtgtggaggagccacccagtcaacatttgctgacaggatggaca -gacagagaggaggtgagtgacaggggccctcaggtgctttttctttggagacctgggc ->ref_chr20:57948698-57949591 +gacagagaggaggtgagtgacaggggccctcaggtgctttttctttggagacctggg +>ref_chr20:57948698-57949590 cggagcgggtgtgccccgtggtctctcttcactgttcttaagacattcacaacctcattt tgatggtgggacgctgcagctcacgtctgagtcccatccaagacttggcccactgctgtg ccctgcacagatggacagatggttggaaggatggatggatggatggatggatggatggat @@ -77,4 +77,4 @@ gaatggatgatggatggatgaatgagatggatggattgatggatggatggatggatggat ggatggatggaagaatggatgatggatggatgaatgagatgggtggatt----gatggat ggatggatggatggatggatggatggatggatggcagatggatggatccatctatgtcag gcctgcatgacacctggtgtggaggagccacccagtcaacatttgctgacaggatggaca -gacagagaggaggtgagtgacaggggccctcaggtgctttttctttggagacctgggc +gacagagaggaggtgagtgacaggggccctcaggtgctttttctttggagacctggg diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_76590aeb93d34a21c775c3a21a1e3bdf.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_7d4315446a621c1d4a1d8ccb36fdb130.msa similarity index 92% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_76590aeb93d34a21c775c3a21a1e3bdf.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_7d4315446a621c1d4a1d8ccb36fdb130.msa index 206e6406..f1266eda 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_76590aeb93d34a21c775c3a21a1e3bdf.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_7d4315446a621c1d4a1d8ccb36fdb130.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:63154485-63155096 +>syndip_1_chr20:63154485-63155095 gaagagggagatggactggcctcagccaccccggagtacagggatgtcatcacaccagcc ctccagcggctgaaagagccagtgagaggcaggtgggggtgtggatcaggcctgatctcc cctagtctcctcctgcaatgcccagattcttgcccccgtccttccttctccctcctccct @@ -9,8 +9,8 @@ caccctcctccccctttccctccctcctcccgactccctctccccttctccctcttctca ---------------------tcccttcctcctcctccctccttcctccctcctccctcc tccccacactgtccatttgcaggctgcctggtcctagcccatatccaagggcctgtctca gctacaaagttctgctctctgcttttaaatgaggacacaaggtggtttccaaaacacatg -tacaaggcagaaattg ->syndip_2_chr20:63154485-63155096 +tacaaggcagaaatt +>syndip_2_chr20:63154485-63155095 gaagagggagatggactggcctcagccaccccggagtacagggatgtcatcacaccagcc ctccagcggctgaaagagccagtgagaggcaggtgggggtgtggatcaggcctgatctcc cctagtctcctcctgcaatgcccagattcttgcccccgtccttccttctccctcctccct @@ -21,8 +21,8 @@ caccctcctccccctttccctccctcctcccgactccctctccccttctccctcttctca tcccttcctcctcctccctcctcccttcctcctcctccctccttcctccctcctccctcc tccccacactgtccatttgcaggctgcctggtcctagcccatatccaagggcctgtctca gctacaaagttctgctctctgcttttaaatgaggacacaaggtggtttccaaaacacatg -tacaaggcagaaattg ->p:HG002_1_chr20:63154485-63155096 +tacaaggcagaaatt +>p:HG002_1_chr20:63154485-63155095 gaagagggagatggactggcctcagccaccccggagtacagggatgtcatcacaccagcc ctccagcggctgaaagagccagtgagaggcaggtgggggtgtggatcaggcctgatctcc cctagtctcctcctgcaatgcccagattcttgcccccgtccttccttctccctcctccct @@ -33,8 +33,8 @@ caccctcctccccctttccctccctcctcccgactccctctccccttctccctcttctca tcccttcctcctcctccctcctcccttcctcctcctccctccttcctccctcctccctcc tccccacactgtccatttgcaggctgcctggtcctagcccatatccaagggcctgtctca gctacaaagttctgctctctgcttttaaatgaggacacaaggtggtttccaaaacacatg -tacaaggcagaaattg ->p:HG002_2_chr20:63154485-63155096 +tacaaggcagaaatt +>p:HG002_2_chr20:63154485-63155095 gaagagggagatggactggcctcagccaccccggagtacagggatgtcatcacaccagcc ctccagcggctgaaagagccagtgagaggcaggtgggggtgtggatcaggcctgatctcc cctagtctcctcctgcaatgcccagattcttgcccccgtccttccttctccctcctccct @@ -45,8 +45,8 @@ caccctcctccccctttccctccctcctcccgactccctctccccttctccctcttctca ---------------------tcccttcctcctcctccctccttcctccctcctccctcc tccccacactgtccatttgcaggctgcctggtcctagcccatatccaagggcctgtctca gctacaaagttctgctctctgcttttaaatgaggacacaaggtggtttccaaaacacatg -tacaaggcagaaattg ->ref_chr20:63154485-63155096 +tacaaggcagaaatt +>ref_chr20:63154485-63155095 gaagagggagatggactggcctcagccaccccggagtacagggatgtcatcacaccagcc ctccagcggctgaaagagccagtgagaggcaggtgggggtgtggatcaggcctgatctcc cctagtctcctcctgcaatgcccagattcttgcccccgtccttccttctccctcctccct @@ -57,4 +57,4 @@ caccctcctccccctttccctccctcctcccgactccctctccccttctccctcttctca tcccttcctcctcctccctcctcccttcctcctcctccctccttcctccctcctccctcc tccccacactgtccatttgcaggctgcctggtcctagcccatatccaagggcctgtctca gctacaaagttctgctctctgcttttaaatgaggacacaaggtggtttccaaaacacatg -tacaaggcagaaattg +tacaaggcagaaatt diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_66a3a22d415ebc8e9c28636a422a27fb.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_83e1386bb11a239de07f648aed2478a4.msa similarity index 94% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_66a3a22d415ebc8e9c28636a422a27fb.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_83e1386bb11a239de07f648aed2478a4.msa index f560985e..f81be9c5 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_66a3a22d415ebc8e9c28636a422a27fb.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_83e1386bb11a239de07f648aed2478a4.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:61744088-61744755 +>syndip_1_chr20:61744088-61744754 tcaaactgggcactgcaaattctggaatctctcctacctcccgaagtttgtagtccacca ggcccagggcgttctgaagcctggagggatgagaagcaagcggagactgtgtggttggaa aggagccaaccagagagtggggaggcgatggagagaataacagagaggaaggcaggtgca @@ -10,8 +10,8 @@ gggagagagatggaaagaggtggagggagagagagaaggaaagggagggaga-------- -------------------------gagagagacagacagacagcttcacacagacataa aacatctgctgttcaggatcccagctgccctccgactctgcctccttctaacccggctgc ctggctcacccgggtgacagggaggggcgccttcaccatgccctgtcctgaggctccctt -tcatcctg ->syndip_2_chr20:61744088-61744755 +tcatcct +>syndip_2_chr20:61744088-61744754 tcaaactgggcactgcaaattctggaatctctcctacctcccgaagtttgtagtccacca ggcccagggcgttctgaagcctggagggatgagaagcaagcggagactgtgtggttggaa aggagccaaccagagagtggggaggcgatggagagaataagagagaggaaggcaggtgca @@ -23,8 +23,8 @@ gagg-------------------------------------------------------- -------------------------gagagagagagacagacagcttcacacagacataa aacatctgctgttcaggatcccagctgccctccgactctgcctccttctaacccggctgc ctggctcacccgggtgacagggaggggcgccttcaccatgccctgtcctgaggctccctt -tcatcctg ->p:HG002_1_chr20:61744088-61744755 +tcatcct +>p:HG002_1_chr20:61744088-61744754 tcaaactgggcactgcaaattctggaatctctcctacctcccgaagtttgtagtccacca ggcccagggcgttctgaagcctggagggatgagaagcaagcggagactgtgtggttggaa aggagccaaccagagagtggggaggcgatggagagaataagagagaggaaggcaggtgca @@ -36,8 +36,8 @@ gagg-------------------------------------------------------- -------------------------gagagagagagacagacagcttcacacagacataa aacatctgctgttcaggatcccagctgccctccgactctgcctccttctaacccggctgc ctggctcacccgggtgacagggaggggcgccttcaccatgccctgtcctgaggctccctt -tcatcctg ->p:HG002_2_chr20:61744088-61744755 +tcatcct +>p:HG002_2_chr20:61744088-61744754 tcaaactgggcactgcaaattctggaatctctcctacctcccgaagtttgtagtccacca ggcccagggcgttctgaagcctggagggatgagaagcaagcggagactgtgtggttggaa aggagccaaccagagagtggggaggcgatggagagaataacagagaggaaggcaggtgca @@ -49,8 +49,8 @@ gggagagagatggaaagaggtggagggagagagagaaggaaagggagggaga-------- -------------------------gagagagacagacagacagcttcacacagacataa aacatctgctgttcaggatcccagctgccctccgactctgcctccttctaacccggctgc ctggctcacccgggtgacagggaggggcgccttcaccatgccctgtcctgaggctccctt -tcatcctg ->ref_chr20:61744088-61744755 +tcatcct +>ref_chr20:61744088-61744754 tcaaactgggcactgcaaattctggaatctctcctacctcccgaagtttgtagtccacca ggcccagggcgttctgaagcctggagggatgagaagcaagcggagactgtgtggttggaa aggagccaaccagagagtggggaggcgatggagagaataacagagaggaaggcaggtgca @@ -62,4 +62,4 @@ gaggggagaggaagagagggagagagagaaggagggagagagatggagagaggtggaggg agagagagaaggaaagggagggagagagagagacagacagacagcttcacacagacataa aacatctgctgttcaggatcccagctgccctccgactctgcctccttctaacccggctgc ctggctcacccgggtgacagggaggggcgccttcaccatgccctgtcctgaggctccctt -tcatcctg +tcatcct diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_1c62e00ef65f42cbba04638af2263531.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_85836283fdfecd2a963f6b695137738c.msa similarity index 94% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_1c62e00ef65f42cbba04638af2263531.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_85836283fdfecd2a963f6b695137738c.msa index 8de86f4e..a2467e55 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_1c62e00ef65f42cbba04638af2263531.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_85836283fdfecd2a963f6b695137738c.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:62349541-62349927 +>syndip_1_chr20:62349541-62349926 aagagggaggacaaaacagccgttttcagacaattccggatagactgtcatggaatagaa aaactacagggtgcatttcaggaaggaggaagcagcaacccaggggatggggatggggat ggggatggggatggtgggggtgatgggggtgatgatggtgatggtgatggtgatggtgat @@ -23,8 +23,8 @@ ggtgatggtttgggtgggagtaaggtgatggtggaggtaaagaggcag------------ ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ --------------------------------aaaattgctggctga ->syndip_2_chr20:62349541-62349927 +-------------------------------aaaattgctggctg +>syndip_2_chr20:62349541-62349926 aagagggaggacaaaacagccgttttcagacaattccggatagactgtcatggaatagaa aaactacagggtgcatttcaggaaggaggaagcagcaaccca------ggggatggggat ggggatggggatggggatggtgggggtgatgggggtgatgatggtgatggtgggggtgat @@ -49,8 +49,8 @@ ggggtgatgacggtgggggtgggggtgatgacggtgggggtgggggtgatgacggtgatg gtagaggtgatgatggtgggggtgatggtgggggtgggggtgggggtgggggtgatgatg gtgatggtgggggtgatgatggtgagggtgatgatggtgggggtgatggtgggggtgggg gtgggggtgggggtgatgatggtgggagtgggctggtagtgggggtgatggtttgggtgg -gagtaaggtgatggtggaggtaaagaggcagaaaattgctggctga ->p:HG002_1_chr20:62349541-62349927 +gagtaaggtgatggtggaggtaaagaggcagaaaattgctggctg +>p:HG002_1_chr20:62349541-62349926 aagagggaggacaaaacagccgttttcagacaattccggatagactgtcatggaatagaa aaactacagggtgcatttcaggaaggaggaagcagcaaccca------ggggatggggat ggggatggggatggtgggggtgatgggggtgatgatggtgatggtgatggtgatggtgat @@ -75,8 +75,8 @@ ggggtgatgacggtgggggtgggggtgatgacggtgggggtgggggtgatgacggtgatg gtagaggtgatgatggtgggggtgatggtgggggtgggggtgggggtgggggtgatgatg gtgatggtgggggtgatgatggtgagggtgatgatggtgggggtgatggtgggggtggcg gtgatggtgggggtgatgatggtgggagtgggctggtagtgggggtgatggtttgggtgg -gagtaaggtgatggtggaggtaaagaggcagaaaattgctggctga ->p:HG002_2_chr20:62349541-62349927 +gagtaaggtgatggtggaggtaaagaggcagaaaattgctggctg +>p:HG002_2_chr20:62349541-62349926 aagagggaggacaaaacagccgttttcagacaattccggatagactgtcatggaatagaa aaactacagggtgcatttcaggaaggaggaagcagcaaccca------ggggatggggat ggggatggggatggtgggggtgatgggggtgatgatggtgatggtgatggtgatggtgat @@ -101,8 +101,8 @@ ggtgatggtttgggtgggagtaaggtgatggtggaggtaaagaggcag------------ ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ --------------------------------aaaattgctggctga ->ref_chr20:62349541-62349927 +-------------------------------aaaattgctggctg +>ref_chr20:62349541-62349926 aagagggaggacaaaacagccgttttcagacaattccggatagactgtcatggaatagaa aaactacagggtgcatttcaggaaggaggaagcagcaaccca------ggggatggggat ggggatggggatggtgggggtgatgggggtgatgatggtgatggtgatggtgatggtgat @@ -127,4 +127,4 @@ ggagtaaggtgatggtggaggtaaagaggcag---------------------------- ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ --------------------------------aaaattgctggctga +-------------------------------aaaattgctggctg diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_a6937b7b4931c9433881ec8a54af98fd.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_86dba3a0aeae01ccc8257baa7389c2b2.msa similarity index 73% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_a6937b7b4931c9433881ec8a54af98fd.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_86dba3a0aeae01ccc8257baa7389c2b2.msa index a8f660dc..d136d90a 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_a6937b7b4931c9433881ec8a54af98fd.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_86dba3a0aeae01ccc8257baa7389c2b2.msa @@ -1,30 +1,30 @@ ->syndip_1_chr20:7720811-7721104 +>syndip_1_chr20:7720811-7721103 tcacacatttaagatacaattcctttccccaccccaactctccaggtttcctgaattaat tggaaggaaactgttatatttccttaattcctgaagtgggagttaaatctaaatctaaca aatgacattttctttttcttttcttttctttctttttttttttttttttgggatggagtt ccactcttgttgcccaggctggacaggctggaatacaatggcgtgatcttggctctctgc -aacctccgactcccaggttcaagtgattctcctgcctcagcctcccaactagct ->syndip_2_chr20:7720811-7721104 +aacctccgactcccaggttcaagtgattctcctgcctcagcctcccaactagc +>syndip_2_chr20:7720811-7721103 tcacacatttaagatacaattcctttccccaccccaactctccaggtttcctgaattaat tggaaggaaactgttatatttccttaattcctgaagtgggagttaaatctaaatctaaca aatgacgttttctttttcttttc---------------tttttttttttgggatggagtt ccactcttgttgcccaggctggacaggctggaatacaatggcgtgatcttggctctctgc -aacctccgactcccaggttcaagtgattctcctgcctcagcctcccaactagct ->p:HG002_1_chr20:7720811-7721104 +aacctccgactcccaggttcaagtgattctcctgcctcagcctcccaactagc +>p:HG002_1_chr20:7720811-7721103 tcacacatttaagatacaattcctttccccaccccaactctccaggtttcctgaattaat tggaaggaaactgttatatttccttaattcctgaagtgggagttaaatctaaatctaaca aatgacgttttctttttcttttc---------------tttttttttttgggatggagtt ccactcttgttgcccaggctggacaggctggaatacaatggcgtgatcttggctctctgc -aacctccgactcccaggttcaagtgattctcctgcctcagcctcccaactagct ->p:HG002_2_chr20:7720811-7721104 +aacctccgactcccaggttcaagtgattctcctgcctcagcctcccaactagc +>p:HG002_2_chr20:7720811-7721103 tcacacatttaagatacaattcctttccccaccccaactctccaggtttcctgaattaat tggaaggaaactgttatatttccttaattcctgaagtgggagttaaatctaaatctaaca aatgacattttctttttcttttcttttctttctttttttttttttttttgggatggagtt ccactcttgttgcccaggctggacaggctggaatacaatggcgtgatcttggctctctgc -aacctccgactcccaggttcaagtgattctcctgcctcagcctcccaactagct ->ref_chr20:7720811-7721104 +aacctccgactcccaggttcaagtgattctcctgcctcagcctcccaactagc +>ref_chr20:7720811-7721103 tcacacatttaagatacaattcctttccccaccccaactctccaggtttcctgaattaat tggaaggaaactgttatatttccttaattcctgaagtgggagttaaatctaaatctaaca aatgacattttctttttcttttcttttctttctttttttttttttttttgggatggagtt ccactcttgttgcccaggctggacaggctggaatacaatggcgtgatcttggctctctgc -aacctccgactcccaggttcaagtgattctcctgcctcagcctcccaactagct +aacctccgactcccaggttcaagtgattctcctgcctcagcctcccaactagc diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_e42e2915a756c6b9535d501ffcc3f9b1.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_8a7bc612e0bdea73ad6aea0abc428b43.msa similarity index 94% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_e42e2915a756c6b9535d501ffcc3f9b1.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_8a7bc612e0bdea73ad6aea0abc428b43.msa index 782feb92..1e6f680f 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_e42e2915a756c6b9535d501ffcc3f9b1.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_8a7bc612e0bdea73ad6aea0abc428b43.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:44763796-44764386 +>syndip_1_chr20:44763796-44764385 gactccattctcttttccagatggatcactcgtctcaacagcatcagaaattaaaaggtt gcacagcccttgtggtgattataggttatgtgattctcaaaggtttaggagtttacgggt tccctgtccatccattcactacatgctcacctatccatccatccatccatttatccatcc @@ -11,8 +11,8 @@ atccatttatccatccgtccatttatccatccatccatccacccacccatccatccatcc atccacttatccatccacccatccatccatccgtccatccattcatccatccatccatcc tcccacaaactcaccaagcctacaatacaatcaattctgctgtaacatgaaatatacatt tctaaaaattactgtgcaacgcaaaatcttgctataaaaatcacacagcttatgggaaaa -tggggtcagtg ->syndip_2_chr20:44763796-44764386 +tggggtcagt +>syndip_2_chr20:44763796-44764385 gactccattctcttttccagatggatcactcgtctcaacagcatcagaaattaaaaggtt gcacagcccttgtggtgattataggttatgtgattctcaaaggtttaggagtttacgggt tccctgtccatccattcactacatgctcacctatccatccatccatccatttatccatcc @@ -25,8 +25,8 @@ atccatttatccatccgtccatttatccatccatccatccacccacccatccatccatcc atccatttatccatccacccatccatccatccgtccatccattcatccatccatccatcc tcccacaaactcaccaagcctacaatacaatcaattctgctgtaacatgaaatatacatt tctaaaaattactgtgcaacgcaaaatcttgctataaaaatcacacagcttatgggaaaa -tggggtcagtg ->p:HG002_1_chr20:44763796-44764386 +tggggtcagt +>p:HG002_1_chr20:44763796-44764385 gactccattctcttttccagatggatcactcgtctcaacagcatcagaaattaaaaggtt gcacagcccttgtggtgattataggttatgtgattctcaaaggtttaggagtttacgggt tccctgtccatccattcactacatgctcacctatccatccatccatccatttatccatcc @@ -39,8 +39,8 @@ atccatttatccatccgtccatttatccatccatccatccacccacccatccatccatcc atccatttatccatccacccatccatccatccgtccatccattcatccatccatccatcc tcccacaaactcaccaagcctacaatacaatcaattctgctgtaacatgaaatatacatt tctaaaaattactgtgcaacgcaaaatcttgctataaaaatcacacagcttatgggaaaa -tggggtcagtg ->p:HG002_2_chr20:44763796-44764386 +tggggtcagt +>p:HG002_2_chr20:44763796-44764385 gactccattctcttttccagatggatcactcgtctcaacagcatcagaaattaaaaggtt gcacagcccttgtggtgattataggttatgtgattctcaaaggtttaggagtttacgggt tccctgtccatccattcactacatgctcacctatccatccatccatccatttatccatcc @@ -53,8 +53,8 @@ atccatttatccatccgtccatttatccatccatccatccacccacccatccatccatcc atccacttatccatccacccatccatccatccgtccatccattcatccatccatccatcc tcccacaaactcaccaagcctacaatacaatcaattctgctgtaacatgaaatatacatt tctaaaaattactgtgcaacgcaaaatcttgctataaaaatcacacagcttatgggaaaa -tggggtcagtg ->ref_chr20:44763796-44764386 +tggggtcagt +>ref_chr20:44763796-44764385 gactccattctcttttccagatggatcactcgtctcaacagcatcagaaattaaaaggtt gcacagcccttgtggtgattataggttatgtgattctcaaaggtttaggagtttacgggt tccctgtccatccattcactacatgctcacctatccatccatccatccatttatccatcc @@ -67,4 +67,4 @@ atccatccatccatccatccatccatccatccatttatcc-------------------- atccatccatccatccacccatccatccatccgtccatccattcatccatccatccatcc tcccacaaactcaccaagcctacaatgcaatcaattctgctgtaacatgaaatatacatt tctaaaaattactgtgcaacgcaaaatcttgctataaaaatcacacagcttatgggaaaa -tggggtcagtg +tggggtcagt diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_3f97b2feb3fc3f1fe6d75a09563e2084.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_8e86d26235c230b6c03a21914919f32d.msa similarity index 97% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_3f97b2feb3fc3f1fe6d75a09563e2084.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_8e86d26235c230b6c03a21914919f32d.msa index c4967b74..ca0778f7 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_3f97b2feb3fc3f1fe6d75a09563e2084.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_8e86d26235c230b6c03a21914919f32d.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:18208997-18210828 +>syndip_1_chr20:18208997-18210827 gtattctttgtactgtttttatttttgcaacttttctgtgtttacagttatttccaaata aaaaataagcagaaaggaaaagctaatcatctccccctattccagtcccccaggcatgta tccttccgtagcttctccacacctacacgaacctagacgattccttccacaccttcccca @@ -34,8 +34,8 @@ tccacaccttcctaacacccacacgaacctagagaattccttccacaccttcacacccac acaaacctagacgaattccttccacaccttctccacacccacacaaacctagacaaattc ttatacacttaaaatgggattttgttgttttattaaaatgcagtcattctctatgcatta attattgtgcagcttgatttttctcctttttttttgttaaatcttcctccgtcatccagg -ctggagtgcag ->syndip_2_chr20:18208997-18210828 +ctggagtgca +>syndip_2_chr20:18208997-18210827 gtattctttgtactgtttttatttttgcaacttttctgtgtttacagttatttccaaata aaaaataagcagaaaggaaaagctaatcatctccccctattccagtcccccaggcatgta tccttccgtagcttctccacacctacacgaacctagacgattccttccacaccttcccca @@ -71,8 +71,8 @@ tccacaccttcctaacacccacacgaacctagagaattccttccacaccttcacacccac acaaacctagacgaattccttccacaccttctccacacccacacaaacctagacaaattc ttatacacttaaaatgggattttgttgttttattaaaatgcagtcattctctatgcatta attattgtgcagcttgatttttctcctttttttttgttaaatcttcctccgtcatccagg -ctggagtgcag ->p:HG002_1_chr20:18208997-18210828 +ctggagtgca +>p:HG002_1_chr20:18208997-18210827 gtattctttgtactgtttttatttttgcaacttttctgtgtttacagttatttccaaata aaaaataagcagaaaggaaaagctaatcatctccccctattccagtcccccaggcatgta tccttccgtagcttctccacacctacacgaacctagacgattccttccacaccttcccca @@ -108,8 +108,8 @@ tccacaccttcctaacacccacacgaacctagagaattccttccacaccttcacacccac acaaacctagacgaattccttccacaccttctccacacccacacaaacctagacaaattc ttatacacttaaaatgggattttgttgttttattaaaatgcagtcattctctatgcatta attattgtgcagcttgatttttctcctttttttttgttaaatcttcctccgtcatccagg -ctggagttcag ->p:HG002_2_chr20:18208997-18210828 +ctggagttca +>p:HG002_2_chr20:18208997-18210827 gtattctttgtactgtttttatttttgcaacttttctgtgtttacagttatttccaaata aaaaataagcagaaaggaaaagctaatcatctccccctattccagtcccccaggcatgta tccttccgtagcttctccacacctacacgaacctagacgattccttccacaccttcccca @@ -145,8 +145,8 @@ tccacaccttcctaacacccacacgaacctagagaattccttccacaccttcacacccac acaaacctagacgaattccttccacaccttctccacacccacacaaacctagacaaattc ttatacacttaaaatgggattttgttgttttattaaaatgcagtcattctctatgcatta attattgtgcagcttgatttttctcctttttttttgttaaatcttcctccgtcatccagg -ctggagtgcag ->ref_chr20:18208997-18210828 +ctggagtgca +>ref_chr20:18208997-18210827 gtattctttgtactgtttttatttttgcaacttttctgtgtttacagttatttccaaata aaaaataagcagaaaggaaaagctaatcatctccccctattccagtcccccaggcatgta tccttccgtagcttctccacacct------------------------------------ @@ -182,4 +182,4 @@ tccacaccttcctaacacccacacgaacctagagaattccttccacaccttcacacccac acaaacctagacgaattccttccacaccttctccacacccacacaaacctagacaaattc ttatacacttaaaatgggattttgttgttttattaaaatgcagtcattctctatgcatta attattgtgcagcttgatttttctcctttttttttgttaaatcttcctccgtcatccagg -ctggagtgcag +ctggagtgca diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_8f0a476d665d82b7248a60d8b6288338.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_8f0a476d665d82b7248a60d8b6288338.msa deleted file mode 100644 index 611f55b3..00000000 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_8f0a476d665d82b7248a60d8b6288338.msa +++ /dev/null @@ -1,65 +0,0 @@ ->syndip_1_chr20:63641747-63642116 -acgcccagtaaacacgggaggagcccccgacccccaccccagctcagcgcctcggagtcc -ccggccccgctctgcgcccctccgagctccgccctagccccgcccccgcccagtgccccg -ccccctgcctgctgctagccctgcccccgccccggcccctgcccgctccgagctccgccc -tggccccgccccggcccctgcccgctccgagctccgccctggccccgccccccgcccagt -gccccgccccctgcctgctgctagccctgcccccgccccggcccctgcccgctccgagct -ccgccccgg--------------------------------------------------- ------------------------------------------------------------- -----------------------ccccgccccggcccctgcccgctccgagctccgccctg -gccccgcccccgcccagtgccccgccccctgactgctgctagccctgcccccgccccggc -ccctgcccgctccgagctccgccccggccccgccccggcccctgcccgctccgagctccg -ccccggccccgccccggcccctgcccgctccgagcttcgccccggccccgccccggcccc -tgcccgctccgagctccgccccggccccgcccccgcaccttctcgcg ->syndip_2_chr20:63641747-63642116 -acgcccagtaaacacgggaggagcccccgacccccaccccagctcagcgcctcggagtcc -ccggccccgctctgcgcccctccgagctccgccctagccccgcccccgcccagtgccccg -ccccctgcctgctgctagccctgcccccgccccggcccctgcccgctccgagctccgccc -cggccccgccccggcccctgcccgctccgagctccgccctggccccg-cccccgcccagt -gccccgccccctgactgctgctagccctgcccccgccccggcccctgcccgctccgagct -ccaccctggccccgcccccgcccagtgccccgccccctgcctgctgcttgccctgccccc -tccccggcccctgcccgctccgagctccgccccggccccgccccggcccagtgccccgcc -ccctgcctgctgctagccctgcccccgccccggcccctgcccgctccgagctccgccctg -gccccgcccccgcccagtgccccgccccctgcctgctgcttgccctgccccctccccggc -ccctgcccgctccgagctccgccccggcctcgccccggcccctgcccgctccgagcttcg -ccccggccccgccccggcccctgcccgctccgagcttcgccccggccccgccccggcccc -tgcccgctccgagctccgccccggccccgcccccgcaccttctcgcg ->p:HG002_1_chr20:63641747-63642116 -acgcccagtaaacacgggaggagcccccgacccccaccccagctcagcgcctcggagtcc -ccggccccgctctgcgcccctccgagctccgccctagccccgcccccgcccagtgccccg -ccccctgcctgctgctagccctgcccccgccccggcccctgcccgctccgagctccgccc -cggccccgccccggcccctgcccgctccgagctccgccctggccccg-cccccgcccagt -gccccgccccctgactgctgctagccctgcccccgccccggcccctgcccgctccgagct -ccaccctggccccgcccccgcccagtgccccgccccctgcctgctgcttgccctgccccc -tccccggcccctgcccgctccgagctccgccccggccccgccccggcccagtgccccgcc -ccctgcctgctgctagccctgcccccgccccggcccctgcccgctccgagctccgccctg -gccccgcccccgcccagtgccccgccccctgcctgctgcttgccctgccccctccccggc -ccctgcccgctccgagctccgccccggcctcgccccggcccctgcccgctccgagcttcg -ccccggccccgccccggcccctgcccgctccgagcttcgccccggccccgccccggcccc -tgcccgctccgagctccgccccggccccgcccccgcaccttctcgcg ->p:HG002_2_chr20:63641747-63642116 -acgcccagtaaacacgggaggagcccccgacccccaccccagctcagcgcctcggagtcc -ccggccccgctctgcgcccctccgagctccgccctagccccgcccccgcccagtgccccg -ccccctgcctgctgctagccctgcccccgccccggcccctgcccgctccgagctccgccc -cggccccgccccggcccctgcccgctccgagctccgccctggccccg-cccccgcccagt -gccccgccccctgactgctgctagccctgcccccgccccggcccctgcccgctccgagct -ccgccccgg--------------------------------------------------- ------------------------------------------------------------- -----------------------ccccgccccggcccctgcccgctccgagctccgccctg -gccccgcccccgcccagtgccccgccccctgactgctgctagccctgcccccgccccggc -ccctgcccgctccgagctccgccccggccccgccccggcccctgcccgctccgagctccg -ccccggccccgccccggcccctgcccgctccgagcttcgccccggccccgccccggcccc -tgcccgctccgagctccgccccggccccgcccccgcaccttctcgcg ->ref_chr20:63641747-63642116 -acgcccagtaaacacgggaggagcccccgacccccaccccagctcagcgcctcggagtcc -ccggccccgctctgcgcccctccgagctccgccctagccccgcccccgcccagtgccccg -ccccctgcctgctgctagccctgcccccgccccggcccctgcccgctccgagctccgccc -cggccccgccccggcccctgcccgctccgagctccgccctggccccg-cccccgcccagt -gccccgccccctgactgctgctagccctgcccccgccccggcccctgcccgctccgagct -tcgccccgg--------------------------------------------------- ------------------------------------------------------------- -----------------------ccccgccccggcccctgcccgctccgagctccgccccg -gccccgcccccgcaccttctcgcg------------------------------------ ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------ diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_eee9610d6083d41536c3c93d53b55f73.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_8f863700689b63e4e012e6bd667b5518.msa similarity index 94% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_eee9610d6083d41536c3c93d53b55f73.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_8f863700689b63e4e012e6bd667b5518.msa index 60de427f..4942c214 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_eee9610d6083d41536c3c93d53b55f73.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_8f863700689b63e4e012e6bd667b5518.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:63535574-63536346 +>syndip_1_chr20:63535574-63536345 acttccaggtggagcttgcagaagctccaggtgacctgcgccctttgtgtgtctgactcc ctcaggtcactggaacacaaaccccacacagaagaccacggcccagcatggagcccctgg ggtccaccccctccacttccgcacacctggaagcactgccccacctgcccctccaaccac @@ -15,8 +15,8 @@ ccgccccccccacctggcctcccgccccccctcacctggcctccagccccccccacctgg cctcccgct-ccccccccacctggcctcccaccccctcctcacctggcctcctccccact tctcacctgggcgagttctcaggactcagggtctttgcacctgggagctgaccagcctcc ggccagaccccacccctgcacccctgctcactaacgaggtcgtgggtctgcgcgtgaccc -tgctaacgaggtcgtgggtct ->syndip_2_chr20:63535574-63536346 +tgctaacgaggtcgtgggtc +>syndip_2_chr20:63535574-63536345 acttccaggtggagcttgcagaagctccaggtgacctgcgccctttgtgtgtctgactcc ctcaggtcactggaacacaaaccccacacagaagaccacggcccagcatggagcccctgg ggtccaccccctccacttccgcacacctggaagcactgccccacctgcccctccaacca- @@ -33,8 +33,8 @@ ccgccccccccacctggcctcccgccccccctcacctggcctccagccccccccacctag cctcccgctcccccccccacctggcctcccaccccctcctcacctggcctcctccccact tctcacctgggcgagttctcaggactcagggtctttgcacctgggagctgaccagcctcc ggccagaccccacccctgcacccctgctcactaacgaggtcgtgggtctgcgcgtgaccc -tgctaacgaggtcgtgggtct ->p:HG002_1_chr20:63535574-63536346 +tgctaacgaggtcgtgggtc +>p:HG002_1_chr20:63535574-63536345 acttccaggtggagcttgcagaagctccaggtgacctgcgccctttgtgtgtctgactcc ctcaggtcactggaacacaaaccccacacagaagaccacggcccagcatggagcccctgg ggtccaccccctccacttccgcacacctggaagcactgccccacctgcccctccaacca- @@ -51,8 +51,8 @@ ccgccccccccacctggcctcccgccccccctcacctggcctccagccccccccacctag cctcccgctcccccccccacctggcctcccaccccctcctcacctggcctcctccccact tctcacctgggcgagttctcaggactcagggtctttgcacctgggagctgaccagcctcc ggccagaccccacccctgcacccctgctcactaacgaggtcgtgggtctgcgcgtgaccc -tgctaacgaggtcgtgggtct ->p:HG002_2_chr20:63535574-63536346 +tgctaacgaggtcgtgggtc +>p:HG002_2_chr20:63535574-63536345 acttccaggtggagcttgcagaagctccaggtgacctgcgccctttgtgtgtctgactcc ctcaggtcactggaacacaaaccccacacagaagaccacggcccagcatggagcccctgg ggtccaccccctccacttccgcacacctggaagcactgccccacctgcccctccaaccac @@ -69,8 +69,8 @@ ccgccccccccacctggcctcccgccccccctcacctggcctccagccccccccacctgg cctcccgct-ccccccccacctggcctcccaccccctcctcacctggcctcctccccact tctcacctgggcgagttctcaggactcagggtctttgcacctgggagctgaccagcctcc ggccagaccccacccctgcacccctgctcactaacgaggtcgtgggtctgcgcgtgaccc -tgctaacgaggtcgtgggtct ->ref_chr20:63535574-63536346 +tgctaacgaggtcgtgggtc +>ref_chr20:63535574-63536345 acttccaggtggagcttgcagaagctccaggtgacctgcgccctttgtgtgtctgactcc ctcaggtcactggaacacaaaccccacacagaagaccacggcccagcatggagcccctgg ggtccaccccctccacttccgcacacctggaagcactgccccacctgcccctccaacca- @@ -87,4 +87,4 @@ ccgccccccccacctggcctcccgccccccctcacctggcctccagccccccccacctag cctcccgctcccccccccacctggcctcccaccccctcctcacctggcctcctccccact tctcacctgggcgagttctcaggactcagggtctttgcacctgggagctgaccagcctcc ggccagaccccacccctgcacccctgctcactaacgaggtcgtgggtctgcgcgtgaccc -tgctaacgaggtcgtgggtct +tgctaacgaggtcgtgggtc diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_8e66fbe0004e8e66a6d2f5eda4fc033d.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_9b9b387681fd363ece224e3813679abe.msa similarity index 91% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_8e66fbe0004e8e66a6d2f5eda4fc033d.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_9b9b387681fd363ece224e3813679abe.msa index cd1e16cd..80617d12 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_8e66fbe0004e8e66a6d2f5eda4fc033d.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_9b9b387681fd363ece224e3813679abe.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:61561820-61562604 +>syndip_1_chr20:61561820-61562603 acaacccacaaaggtttatagctcctttcctgtgctgaagagctggcccttctctgggcc ccagtggctgcatgcgccatgtttctgggcagtgaccatatgatggcagtaatgctgtgt ctggaaatcctgaccccagggctcccagagagagagagggaccttcgtgtggagaggtgg @@ -14,8 +14,8 @@ tgtggagaggtggaccccagggctcccggagagagagagggacctccgtgtggagaggtg gaccccagggctcccggagagagagagggaccttcgtgtggagaggtggaccccaggact cccggagagagagaggggcctccgtgtgcagaggtggtttgtcgcttctttcatcttcag gacagtcattgcctgggaaactggggggattggtccgaacccaccttgtacaccccagac -ttggtgcgtccctgcaggatctgagcacagaacggcctgcttcagccc ->syndip_2_chr20:61561820-61562604 +ttggtgcgtccctgcaggatctgagcacagaacggcctgcttcagcc +>syndip_2_chr20:61561820-61562603 acaacccacaaaggtttatagctcctttcctgtgctgaagagctggcccttctctgggcc ccagtggctgcatgcgccatgtttctgggcagtgaccatatgatggcagtaatgctgtgt ctggaaatcctgaccccagggctcccagagagagagagggaccttcgtgtggagaggtgg @@ -31,8 +31,8 @@ tgtggagaggtggaccccagggctcccggagagagagagggacctccgtgtggagaggtg gaccccagggctcccggagagagagggggaccttcgtgtggagaggtggaccccaggact cccggagagagagaggggcctccgtgtgcagaggtggtttgtcgcttctttcatcttcag gacagtcattgcctgggaaactggggggattggtccgaacccaccttgtacaccccagac -ttggtgcgtccctgcaggatctgagcacagaacggcctgcttcagccc ->p:HG002_1_chr20:61561820-61562604 +ttggtgcgtccctgcaggatctgagcacagaacggcctgcttcagcc +>p:HG002_1_chr20:61561820-61562603 acaacccacaaaggtttatagctcctttcctgtgctgaagagctggcccttctctgggcc ccagtggctgcatgcgccatgtttctgggcagtgaccatatgatggcagtaatgctgtgt ctggaaatcctgaccccagggctcccagagagagagagggaccttcgtgtggagaggtgg @@ -48,8 +48,8 @@ tgtggagaggtggaccccagggctcccggagagagagagggacctccgtgtggagaggtg gaccccagggctcccggagagagagggggaccttcgtgtggagaggtggaccccaggact cccggagagagagaggggcctccgtgtgcagaggtggtttgtcgcttctttcatcttcag gacagtcattgcctgggaaactggggggattggtccgaacccaccttgtacaccccagac -ttggtgcgtccctgcaggatctgagcacagaacggcctgcttcagccc ->p:HG002_2_chr20:61561820-61562604 +ttggtgcgtccctgcaggatctgagcacagaacggcctgcttcagcc +>p:HG002_2_chr20:61561820-61562603 acaacccacaaaggtttatagctcctttcctgtgctgaagagctggcccttctctgggcc ccagtggctgcatgcgccatgtttctgggcagtgaccatatgatggcagtaatgctgtgt ctggaaatcctgaccccagggctcccagagagagagagggaccttcgtgtggagaggtgg @@ -65,8 +65,8 @@ tgtggagaggtggaccccagggctcccggagagagagagggacctccgtgtggagaggtg gaccccagggctcccggagagagagagggaccttcgtgtggagaggtggaccccaggact cccggagagagagaggggcctccgtgtgcagaggtggtttgtcgcttctttcatcttcag gacagtcattgcctgggaaactggggggattggtccgaacccaccttgtacaccccagac -ttggtgcatccctgcaggatctgagcacagaacggcctgcttcagccc ->ref_chr20:61561820-61562604 +ttggtgcatccctgcaggatctgagcacagaacggcctgcttcagcc +>ref_chr20:61561820-61562603 acaacccacaaaggtttatagctcctttcctgtgctgaagagctggcccttctctgggcc ccagtggctgcatgcgccatgtttctgggcagtgaccatatgatggcagtaatgctgtgt ctggaaatcctgaccccagggctcccagagagagagagggaccttcgtgtggagaggtgg @@ -82,4 +82,4 @@ tgtggagaggtggaccccagggctcccggagagagagagggacctccgtgtggagaggtg gaccccagggctcccggagagagagggggaccttcgtgtggagaggtggaccccaggact cccggagagagagaggggcctccgtgtgcagaggtggtttgtcgcttctttcatcttcag gacagtcattgcctgggaaactggggggattggtccgaacccaccttgtacaccccagac -ttggtgcgtccctgcaggatctgagcacagaacggcctgcttcagccc +ttggtgcgtccctgcaggatctgagcacagaacggcctgcttcagcc diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_06b42ffdbc8be646eb8718b5d6951dcc.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_9e47b8fad37392ee138dfca4c2353754.msa similarity index 96% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_06b42ffdbc8be646eb8718b5d6951dcc.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_9e47b8fad37392ee138dfca4c2353754.msa index 50ae4b6d..1b7c4215 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_06b42ffdbc8be646eb8718b5d6951dcc.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_9e47b8fad37392ee138dfca4c2353754.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:61100692-61102788 +>syndip_1_chr20:61100692-61102787 ttggtacatgtacagattgtgcaagacaataggacaccagacagatccccgggctccaca gcagaagtctggttactgggcaggcaggccccaaacactgtatttaaaacaaaaccccac tgacaagtcctgtgactggagagaccccgagtgaaggcagacctccatgtcctctacatc @@ -36,8 +36,8 @@ acctggggagaccccgagtgagggcagatggccacatcctctaattcctgtgtcacacct ggagacacacagagtaaaggtggacagccacctcctctacgtcctgtgccacacctggag acaccctgagttaaggcagatggccacctcctctaattatttttaagaacttttttgggg tataatttacatgccagaaagttcacccatttcaagcatatgattcaatattaaagctta -aaaatgttatcaatttaataacattgatggttacagagctgtgtaa ->syndip_2_chr20:61100692-61102788 +aaaatgttatcaatttaataacattgatggttacagagctgtgta +>syndip_2_chr20:61100692-61102787 ttggtacatgtacagattgtgcaagacaataggacaccagacagatccccgggctccaca gcagaagtctggttactgggcaggcaggccccaaacactgtatttaaaacaaaaccccac tgacaagtcctgtgactggagagaccccgagtgaaggcagacctccatgtcctctacatc @@ -75,8 +75,8 @@ acctggggagaccccgagtgagggcagacggccacatcctctaattcctgtgtcacacct ggagacacacagagtaaaggtggacagccacctcctctacgtcctgtgccacacctggag acaccctgagttaaggcagatggccacctcctctaattatttttaagaacttttttgggg tataatttacatgccagaaagttcacccatttcaagcatatgattcaatattaaagctta -aaaatgttatcaatttaataacattgatggttacagagctgtgtaa ->p:HG002_1_chr20:61100692-61102788 +aaaatgttatcaatttaataacattgatggttacagagctgtgta +>p:HG002_1_chr20:61100692-61102787 ttggtacatgtacagattgtgcaagacaataggacaccagacagatccccgggctccaca gcagaagtctggttactgggcaggcaggccccaaacactgtatttaaaacaaaaccccac tgacaagtcctgtgactggagagaccccgagtgaaggcagacctccatgtcctctacatc @@ -114,8 +114,8 @@ acctggggagaccccgagtgagggcagacggccacatcctctaattcctgtgtcacacct ggagacacacagagtaaaggtggacagccacctcctctacgtcctgtgccacacctggag acaccctgagttaaggcagatggccacctcctctaattatttttaagaacttttttgggg tataatttacatgccagaaagttcacccatttcaagcatatgattcaatattaaagctta -aaaatgttatcaatttaataacattgatggttacagagctgtgtaa ->p:HG002_2_chr20:61100692-61102788 +aaaatgttatcaatttaataacattgatggttacagagctgtgta +>p:HG002_2_chr20:61100692-61102787 ttggtacatgtacagattgtgcaagacaataggacaccagacagatccccgggctccaca gcagaagtctggttactgggcaggcaggccccaaacactgtatttaaaacaaaaccccac tgacaagtcctgtgactggagagaccccgagtgaaggcagacctccatgtcctctacatc @@ -153,8 +153,8 @@ acctggggagaccccgagtgagggcagatggccacatcctctaattcctgtgtcacacct ggagacacacagagtaaaggtggacagccacctcctctacgtcctgtgccacacctggag acaccctgagttaaggcagatggccacctcctctaattatttttaagaacttttttgggg tataatttacatgccagaaagttcacccatttcaagcatatgattcaatattaaagctta -aaaatgttatcaatttaataacattgatggttacagagctgtgtaa ->ref_chr20:61100692-61102788 +aaaatgttatcaatttaataacattgatggttacagagctgtgta +>ref_chr20:61100692-61102787 ttggtacatgtacagattgtgcaagacaataggacaccagacagatccccgggctccaca gcagaagtctggttactgggcaggcaggccccaaacactgtatttaaaacaaaaccccac tgacaagtcctgtgactggagagaccccgagtgaaggcagacctccatgtcctctacatc @@ -192,4 +192,4 @@ acctggggagaccccgagtgagggcagatggccacatcctctaattcctgtgtcacacct ggagacacacagagtaaaggtggacagccacctcctctacgtcctgtgccacacctggag acaccctgagttaaggcagatggccacctcctctaattatttttaagaacttttttgggg tataatttacatgccagaaagttcacccatttcaagcatatgattcaatattaaagctta -aaaatgttatcaatttaataacattgatggttacagagctgtgtaa +aaaatgttatcaatttaataacattgatggttacagagctgtgta diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_4d5d51153a3589a4d6cd04e486f7c879.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_9e8a807906028744f45dff50046a00aa.msa similarity index 96% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_4d5d51153a3589a4d6cd04e486f7c879.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_9e8a807906028744f45dff50046a00aa.msa index 55b397dc..ea31e8a0 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_4d5d51153a3589a4d6cd04e486f7c879.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_9e8a807906028744f45dff50046a00aa.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:20337100-20337808 +>syndip_1_chr20:20337100-20337807 tacgtggaagcatagggaaggaagggatgtttgttgaggggtggtgtgatgcagcagagg aagaggaccagaggaaagcagccactggaaataaggaaacagggtttaggaaggcctcac tagacctgttaggatggatgcatggatggatggatggatggacagaatgggtgggtggat @@ -35,8 +35,8 @@ ggatggatggatagatgggtgggtggatggatggatggatagatggatagatgggtgggt ggatggatggatggatagatgggtgggtggatggatggatggatagatgggtgggtgggt ggatggatggatggatggatggatagaataaaaagaaagctaggacatggttctagtgtt cttagcagacgtcccaccatggaagaggtcatggagcacaggggctttgggaaaggtgtt -tacaatcctgcctgcctctcacccagacctcttg ->syndip_2_chr20:20337100-20337808 +tacaatcctgcctgcctctcacccagacctctt +>syndip_2_chr20:20337100-20337807 tacgtggaagcatagggaaggaagggatgtttgttgaggggtggtgtgatgcagcagagg aagaggaccagaggaaagcagccactggaaataaggaaacagggtttaggaaggcctcac tagacctgttaggatggatgcatggatggatggatggatggacagaatgggtgggtggat @@ -73,8 +73,8 @@ ggatggatggatagatgggtgggtggatggatggatggatagatggatagatgggtgggt ggatggatggatggatagatgggtgggtggatggatggatggatagatgggtgggtgggt ggatggatggatggatggatggatagaataaaaagaaagctaggacatggttctagtgtt cttagcagacgtcccaccatggaagaggtcatggagcacaggggctttgggaaaggtgtt -tacaatcctgcctgcctctcacccagacctcttg ->p:HG002_1_chr20:20337100-20337808 +tacaatcctgcctgcctctcacccagacctctt +>p:HG002_1_chr20:20337100-20337807 tacgtggaagcatagggaaggaagggatgtttgttgaggggtggtgtgatgcagcagagg aagaggaccagaggaaagcagccactggaaataaggaaacagggtttaggaaggcctcac tagacctgttaggatggatgcatggatggatggatggatggacagaatgggtgggtggat @@ -111,8 +111,8 @@ ggatggatggatagatgggtgggtggatggatggatggatagatggatagatgggtgggt ggatggatggatggatagatgggtgggtggatggatggatggatagatgggtgggtgggt ggatggatggatggatggatggatagaataaaaagaaagctaggacatggttctagtgtt cttagcagacgtcccaccatggaagaggtcatggggcacaggggctttgggaaaggtgtt -tacaatcctgcctccctctcacccagacctcttg ->p:HG002_2_chr20:20337100-20337808 +tacaatcctgcctccctctcacccagacctctt +>p:HG002_2_chr20:20337100-20337807 tacgtggaagcatagggaaggaagggatgtttgttgaggggtggtgtgatgcagcagagg aagaggaccagaggaaagcagccactggaaataaggaaacagggtttaggaaggcctcac tagacctgttaggatggatgcatggatggatggatggatggacagaatgggtgggtggat @@ -149,8 +149,8 @@ ggatggatggatagatgggtgggtggatggatggatggatagatggatagatgggtgggt ggatggatggatggatagatgggtgggtggatggatggatggatagatgggtgggtgggt ggatggatggatggatggatggatagaataaaaagaaagctaggacatggttctagtgtt cttagcagacgtcccaccatggaagaggtcatggggcacaggggctttgggaaaggtgtt -tacaatcctgcctccctctcacccagacctcttg ->ref_chr20:20337100-20337808 +tacaatcctgcctccctctcacccagacctctt +>ref_chr20:20337100-20337807 tacgtggaagcatagggaaggaagggatgtttgttgaggggtggtgtgatgcagcagagg aagaggaccagaggaaagcagccactggaaataaggaaacagggtttaggaaggcctcac tagacctgttaggatggatgcatggatggatggatggatggacagaatgggtgggtggat @@ -187,4 +187,4 @@ ggatagaataaaaaagaaa----------------------------------------- ------------------------------------------------------------ --------------------------------------gctaggacatggttctagtgtt cttagcagacgtcccaccatggaagaggtcatggagcacaggggctttgggaaaggtgtt -tacaatcctgcctgcctctcacccagacctcttg +tacaatcctgcctgcctctcacccagacctctt diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_4bb181db4c68004bc40764f9b6d76b42.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_a2808b205fb6e4ff2a694b9b59fe99d7.msa similarity index 89% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_4bb181db4c68004bc40764f9b6d76b42.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_a2808b205fb6e4ff2a694b9b59fe99d7.msa index 5c387ca0..c5bff334 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_4bb181db4c68004bc40764f9b6d76b42.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_a2808b205fb6e4ff2a694b9b59fe99d7.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:61289562-61290374 +>syndip_1_chr20:61289562-61290373 tgaccacaggccagtggcagtccctgcctttggattaaagagccctttgaggcttgcttg gtgatgtcatggaaaacagcatatgacaccaagatatccccatatccaacgagacttgct gggtttatccccgtatccaacgagacttgctgggtttatccccgtatccaacgagacttg @@ -11,9 +11,9 @@ atgagacttgctgggtttatccccgtatccaatgagacttgctgggtttatccccgtatc caatgagacttgctgggtttatccccgtatccaatgagacttgctgggtttatccccgta tccaatgagacttgctgggtttatccccgtatccaatgagacttgctgggtttatccccg tatccaatgagacttgctgggtttatccccgtatccaatgagacttgctgggtttatccc -gatatccaatgagacttgctggatttatccc----------------------------- ---------------------------------- ->syndip_2_chr20:61289562-61290374 +gatatccaatgagacttgctggatttatcc------------------------------ +-------------------------------- +>syndip_2_chr20:61289562-61290373 tgaccacaggccagtggcagtccctgcctttggattaaagagccctttgaggcttgcttg gtgatgtcatggaaaacagcatatgacaccaagatatccccatatccaacgagacttgct gggtttatccccgtatccaacgagacttgctgggtttatccccgtatccaacgagacttg @@ -27,8 +27,8 @@ caatgagacttgctgggtttatccccgtatccaatgagacttgctgggtttatccccgta tccaatgagacttgctgggtttatccccgtatccaatgagacttgctgggtttatccccg tatccaatgagacttgctgggtttatccccgtatccaatgagacttgctgggtttatccc cgtatccaatgagacttgctgggtttatcccgatatccaatgagacttgctggatttatc -cc------------------------------- ->p:HG002_1_chr20:61289562-61290374 +c------------------------------- +>p:HG002_1_chr20:61289562-61290373 tgaccacaggccagtggcagtccctgcctttggattaaagagccctttgaggcttgcttg gtgatgtcatggaaaacagcatatgacaccaagatatccccatatccaacgagacttgct gggtttatccccgtatccaacgagacttgctgggtttatccccgtatccaacgagacttg @@ -42,8 +42,8 @@ caatgagacttgctgggtttatccccgtatccaatgagacttgctgggtttatccccgta tccaatgagacttgctgggtttatccccgtatccaatgagacttgctgggtttatccccg tatccaatgagacttgctgggtttatccccgtatccaatgagacttgctgggtttatccc cgtatccaatgagacttgctgggtttatcccgatatccaatgagacttgctggatttatc -cc------------------------------- ->p:HG002_2_chr20:61289562-61290374 +c------------------------------- +>p:HG002_2_chr20:61289562-61290373 tgaccacaggccagtggcagtccctgcctttggattaaagagccctttgaggcttgcttg gtgatgtcatggaaaacagcatatgacaccaagatatccccatatccaacgagacttgct gggtttatccccgtatccaacgagacttgctgggtttatccccgtatccaacgagacttg @@ -56,9 +56,9 @@ atgagacttgctgggtttatccccgtatccaatgagacttgctgggtttatccccgtatc caatgagacttgctgggtttatccccgtatccaatgagacttgctgggtttatccccgta tccaatgagacttgctgggtttatccccgtatccaatgagacttgctgggtttatccccg tatccaatgagacttgctgggtttatccccgtatccaatgagacttgctgggtttatccc -gatatccaatgagacttgctggatttatccc----------------------------- ---------------------------------- ->ref_chr20:61289562-61290374 +gatatccaatgagacttgctggatttatcc------------------------------ +-------------------------------- +>ref_chr20:61289562-61290373 tgaccacaggccagtggcagtccctgcctttggattaaagagccctttgaggcttgcttg gtgatgtcatggaaaacagcatatgacaccaagatatccccatatccaacgagacttgct gggtttatccccgtatccaacgagacttgctgggtttatccccgtatccaacgagacttg @@ -72,4 +72,4 @@ caatgagacttgctgggtttatccccgtatccaatgagacttgctgggtttatccccgta tccaatgagacttgctgggtttatccccgtatccaatgagacttgctgggtttatccccg tatccaatgagacttgctgggtttatcccgatatccaatgagacttgctggatttatccc cgtatccaatgagacttgctgggtttatccccgtatccaatgagacttgctgggtttatc -ccgatatccaatgagacttgctggatttatccc +ccgatatccaatgagacttgctggatttatcc diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_a4fa20a9084e2838ff934c5663169e02.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_a4fa20a9084e2838ff934c5663169e02.msa deleted file mode 100644 index 0334106a..00000000 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_a4fa20a9084e2838ff934c5663169e02.msa +++ /dev/null @@ -1,190 +0,0 @@ ->syndip_1_chr20:63948494-63948754 -ccatctgtataaaatccaggaaacgaaggacgctcaccgccatgcgcctcggtgtgagga -gggaggcgtgtgtgagagggaggggatgtgtgtgagaggga-ggggcgtgtgtgagaggg -aggggcgtgtgtgaggg---aggggtgtgtgtgagagggaggggcgtgtgtgagagggag -ggggcgtgtgtgagaggcagggg-gcgtgtgtgtga-ggggcgtgtgtgagagggagggg -cgtgtgtgagagggagggg-gcgtgtgtgagggggggcgtgtgtgagaggtaggggcgtg -tgtgagagggaggggcgtgtgtgagggagggggcgtgtgtgagaggg------------- -----aggggcgtgtgtgtgagggaggggcgtgtgagag-ggagggggcgtgtgtgagagg -caggggcgtgtgtgtgagggaggggcgtgagagggaggggcgtgtgtgagagggaggggg -cgtgtgtgagggggggcgtgtgtgagaggtaggggcgtgtgt----gggagggggcgtgt -gtgagagggagggggcgtgtgtgagagggaggggcgtgtgtgagacggaggggcgtgtgt -gagagggagggggcgtgtgtgagagggagaggcgtgtgtgagagggaggggcgtgtgtga -gggagggg---gcgtgtgagagggaggggcgtgtgtgagagggagggggcgtgtgtgaga -gggagaggcgtgtgtgagagggaggggcgtgtgtgagggagggggcgtgtgtgagaggga -ggggcgtgtgagaggg-aggggcgtgtgtgagagg----------gagggggcgtgtgtg -agagggagggggcgtgtgtgagagggaggggcgtgtgtgaagggaggggcgtgtgtgaga -gggagggggcgtgagagggaggggcgtgtgtgagagggaggggcgtgtgtgagagggagg -gggcgtgtgtgagagggaggggcgtgtgtgagagggaggggcgtgtgtgagagggagggg -cgtgtgtgagaggcaggggcgtgtgtgagggaggggcgtgtgtgagag--------ggag -ggggcgtgtgagagggagggggcgtgtgtgagagggagaggcgtgtgtgagaggga-ggg -gcgtgtgtgagaggcaggggcgtgtgtgagggaggggcgtgtgtgagagggagggggcgt -gtgtgagagggagggggcgtgt-------------gtgagagggagaggcgtgtgtgaga -gggaggggcgtgtgtgagggagggggcgtgtgtgagagggagaggcgtgtgtgagaggga -gggggcgtgtgtgagagggagaggcgtgtgtgagagggaggggcgtgtgtgagggagggg -gcgtgtgtgagagggaggggcgtgtgtgagagggagggggcgtgtgtgagagggagaggc -gtgtgtgagagggaggggcgtgtgtgagagggaggggcgtgtgtgagagggaggggcgtg -tgtgagagggag--------------ggggcgtgtgtgagagggaggggcgtgtgtgaga -gggaggggcgtgtgtgagagggagggggcgtgtgtgagaggga-ggggcgtgtgtgagag -ggaggggcgtgtgtgaagggaggggcgtgtgtgagagtgggggcgtgtgtgagttagggg -cgtgagaggtaggggcgtgtgtgagagggaggggcgtgtgtgagagggagggggcgtg-- ----agagggaggggcgtgtgtgagagggaggggcgtgtgtgagagggaggggcgtgtgtg -agagggaaggggcgtgtgtgagagggaggggcgtgtgtgagagggaggggcgtgtgtgag -aggcaggggcgtgtgtgagggaggggcgtgtgtgagagggagggg-gcgtgtgagaggga -ggggcgtgtgtgagagggaggggc--gtgtgagagggaggggcgtgtgtgagaggga-gg -ggcgtgtgtgagagggaggggcgtgtgtgagagggaggggcgtgagagggagggggcgtg -tgtgagggaggggcgtgtgtgagagggaggggggcgtgtgtgagagggagggggcgtgtg -tgagagggagggggtgtgtgtgagaccgagggggcgtgtaagggagggggtgtgtgtgag -agggaagcgtgtgcacacaccagccctaacc ->syndip_2_chr20:63948494-63948754 -ccatctgtataaaatccaggaaacgaaggacgctcaccgccatgcgcctcggtgtgagga -gggaggcgtgtgtgagagggaggggatgtgtgtgagagggagggggcgtgtgtgagaggg -aggggcgtgtgtgagag---aggggtgtgtgtgagagggaggggcgtgtgtgagagggag -ggggcgtgtgtgaaagggaggggcgtgtgtgaggga-ggggcgtgtgtgagagggagggg -cgtgtgtgagaggcaggggcgtgtgtgagggagggggcgtgtgtgagagggaggggcgtg -tgtgagaggcaggggcgtgtgtgagagtgggggcgtgtgtgagggggggcgtgtgtgaga -gggagggggcgtgtgtgagaggtaggggcgtgtgtgagagggaggggcgtgtgtgagagg -gaggggcgtgtgtgagaggcaggggcgtgtgtggggggggcgtgtgtgag-gggggggcg -tgtgtgagagggagggggcgtgtgagagggaggggcgtgtgtgagagggagggggcgtgt -gtgtgaggga-ggggcgtgtgtgagagggaggggcgtgtgtga---agggaggggtgtgt -gtgagagtgggggcgtgtgtgagaggg--------gcgtgagaggtaggggcgtgtgtga -gagggaggggtgtgtgtgagagggaggggcgtgtgtgagaggga-ggggcgtgtgtgag- -ggagggggcgtgtgtgagagggaggggcgtgtgtgagggagggggcgtgtgtgagaggga -ggggcgtgtgtgagagtgggggcgtgtgtgggggggcgtgtgtgagggggggcgtgtgtg -agaggta-ggggcgtgtgtgagagggaggggcgtgtgtg-agggaggggcgtgtgtgaga -ggcaggggcgtgtgtgggggggggcgtgtgt--gaggggggggcgtgtgtgagagggagg -gggcgtgtgtgagagggaggggcgtgtgtgagagggaggggcgtgtgtgagagggagggg -cgtgtgtgagaggcaggggcgtgtgtggggggggcgtgtgtgaggggggcgtgtgagagg -gaggggcgtgtgtggggaggggcgtgtgtgagagggaggggcgtgtgtgagagggagggg -gcgtgtgtgagagggaggggcgtgtgtgagagggaggggcgtgtgtgaagggaggggtgt -gtgtgagagtgggggcgtgtgtgagagttaggggcgtgagaggtaggggcgtgtgtgaga -gggaggggcgtgtgtgaggga-ggggcgtgtgtgagagggaggggcgtgtgtgagaggga -gggggcgtgtgtgagagggaggggcgtgtgtgagagggaggggcgtgtgtgagggagggg -gcgtgtgcgagagggaggggcgtgtgtgagagggagggggcgtgtgtgag-ggagggggc -gtgtgtgagagggaggggcgtgtgagag-ggagggggcgtgtgtgagaggcaggggcgtg -tgtgtgagggaggggcgtgagagggaggggcgtgtgtga---gggggggcgtgtgtgaga -ggtaggggcgtgtgtgagagggagggggcgtgtgtgagagggagggggcgtgtgtgagag -ggaggggcgtgtgtgagggagggggcgtgt------------------------------ --gtgagagggaggggcgtgtgtgagagggaggggcgtgtgtgagagggagggggcgtgtg -tgagagggagggggcgtgtgtgagagggaggggcgtgtgtga-agggaggggcgtgtgtg -agagtg--ggggcgtgtgtgagaggg--------gcgtgagaggtaggggcgtgtgtgag -agggagggg-gcgtgagagggaggggcgtgtgtgagagggaggggcgtgtgtgagaggga -ggggcgtgtgtgagagggaggggcgtgtgtgagaggcaggggcgtgtgtgagagggaggg -ggcgtgtgtgagagggaggggcgtgt---------------gtgagagggaaggggcgtg -tgagagggaggggcgtgtgtgagaggaaggggcgtgtgtgaga---gggaggggcgtgtg -tgagagggagggggtgtgtgtgagaccgagggggcgtgtaagggagggggtgtgtgtgag -agggaagcgtgtgcacacaccagccctaacc ->p:HG002_1_chr20:63948494-63948754 -ccatctgtataaaatccaggaaacgaaggacgctcaccgccatgcgcctcggtgtgagga -gggaggcgtgtgtgagagggaggggatgtgtgtgagagggagggggcgtgtgtgagaggg -aggggcgtgtgtgagag---aggggtgtgtgtgagagggaggggcgtgtgtgagagggag -ggggcgtgtgtgaaagggaggggcgtgtgtgaggga-ggggcgtgtgtgagagggagggg -cgtgtgtgagaggcaggggcgtgtgtgagggagggggcgtgtgtgagagggaggggcgtg -tgtgagaggcaggggcgtgtgtgagagtgggggcgtgtgtgagggggggcgtgtgtgaga -gggagggggcgtgtgtgagaggtaggggcgtgtgtgagagggaggggcgtgtgtgagagg -gaggggcgtgtgtgagaggcaggggcgtgtgtggggggggcgtgtgtgag-gggggggcg -tgtgtgagagggagggggcgtgtgagagggaggggcgtgtgtgagagggagggggcgtgt -gtgtgaggga-ggggcgtgtgtgagagggaggggcgtgtgtga---agggaggggtgtgt -gtgagagtgggggcgtgtgtgagaggg--------gcgtgagaggtaggggcgtgtgtga -gagggaggggtgtgtgtgagagggaggggcgtgtgtgagaggga-ggggcgtgtgtgag- -ggagggggcgtgtgtgagagggaggggcgtgtgtgagggagggggcgtgtgtgagaggga -ggggcgtgtgtgagagtgggggcgtgtgtgggggggcgtgtgtgagggggggcgtgtgtg -agaggta-ggggcgtgtgtgagagggaggggcgtgtgtg-agggaggggcgtgtgtgaga -ggcaggggcgtgtgtgggggggggcgtgtgt--gaggggggggcgtgtgtgagagggagg -gggcgtgtgtgagagggaggggcgtgtgtgagagggaggggcgtgtgtgagagggagggg -cgtgtgtgagaggcaggggcgtgtgtggggggggcgtgtgtgaggggggcgtgtgagagg -gaggggcgtgtgtggggaggggcgtgtgtgagagggaggggcgtgtgtgagagggagggg -gcgtgtgtgagagggaggggcgtgtgtgagagggaggggcgtgtgtgaagggaggggtgt -gtgtgagagtgggggcgtgtgtgagagttaggggcgtgagaggtaggggcgtgtgtgaga -gggaggggcgtgtgtgaggga-ggggcgtgtgtgagagggaggggcgtgtgtgagaggga -gggggcgtgtgtgagagggaggggcgtgtgtgagagggaggggcgtgtgtgagggagggg -gcgtgtgcgagagggaggggcgtgtgtgagagggagggggcgtgtgtgag-ggagggggc -gtgtgtgagagggaggggcgtgtgagag-ggagggggcgtgtgtgagaggcaggggcgtg -tgtgtgagggaggggcgtgagagggaggggcgtgtgtga---gggggggcgtgtgtgaga -ggtaggggcgtgtgtgagagggagggggcgtgtgtgagagggagggggcgtgtgtgagag -ggaggggcgtgtgtgagggagggggcgtgt------------------------------ --gtgagagggaggggcgtgtgtgagagggaggggcgtgtgtgagagggagggggcgtgtg -tgagagggagggggcgtgtgtgagagggaggggcgtgtgtga-agggaggggcgtgtgtg -agagtg--ggggcgtgtgtgagaggg--------gcgtgagaggtaggggcgtgtgtgag -agggagggg-gcgtgagagggaggggcgtgtgtgagagggaggggcgtgtgtgagaggga -ggggcgtgtgtgagagggaggggcgtgtgtgagaggcaggggcgtgtgtgagagggaggg -ggcgtgtgtgagagggaggggcgtgt---------------gtgagagggaaggggcgtg -tgagagggaggggcgtgtgtgagaggaaggggcgtgtgtgaga---gggaggggcgtgtg -tgagagggagggggtgtgtgtgagaccgagggggcgtgtaagggagggggtgtgtgtgag -agggaagcgtgtgcacacaccagccctaacc ->p:HG002_2_chr20:63948494-63948754 -ccatctgtataaaatccaggaaacgaaggacgctcaccgccatgcgcctcggtgtgagga -gggaggcgtgtgtgagagggaggggatgtgtgtgagagggagggggcgtgtgtgagaggg -aggggcgtgtgtgag-----aggggtgtgtgtgagagggaggggcgtgtgtgagagggag -ggggcgtgtgtgagaggcagggg-gcgtgtgtgtga-ggggcgtgtgtgagagggagggg -cgtgtgtgagagggagggg-gcgtgtgtgagggggggcgtgtgtgagaggtaggggcgtg -tgtgagagggaggggcgtgtgtgagggagggggcgtgtgtgagaggg------------- -----aggggcgtgtgtgtgagggaggggcgtgtgagag-ggagggggcgtgtgtgagagg -caggggcgtgtgtgtgagggaggggcgtgagagggaggggcgtgtgtgagagggaggggg -cgtgtgtgagggggggcgtgtgtgagaggtaggggcgtgtgt----gggagggggcgtgt -gtgagagggagggggcgtgtgtgagagggaggggcgtgtgtgagacggaggggcgtgtgt -gagagggagggggcgtgtgtgagagggagaggcgtgtgtgagagggaggggcgtgtgtga -gggagggg---gcgtgtgagagggaggggcgtgtgtgagagggagggggcgtgtgtgaga -gggagaggcgtgtgtgagagggaggggcgtgtgtgagggagggggcgtgtgtgagaggga -ggggcgtgtgagaggg-aggggcgtgtgtgagagg----------gagggggcgtgtgtg -agagggagggggcgtgtgtgagagggaggggcgtgtgtgaagggaggggcgtgtgtgaga -gggagggggcgtgagagggaggggcgtgtgtgagagggaggggcgtgtgtgagagggagg -gggcgtgtgtgagagggaggggcgtgtgtgagagggaggggcgtgtgtgagagggagggg -cgtgtgtgagaggcaggggcgtgtgtgagggaggggcgtgtgtgagag--------ggag -ggggcgtgtgagagggagggggcgtgtgtgagagggagaggcgtgtgtgagaggga-ggg -gcgtgtgtgagaggcaggggcgtgtgtgagggaggggcgtgtgtgagagggagggggcgt -gtgtgagagggagggggcgtgt-------------gtgagagggagaggcgtgtgtgaga -gggaggggcgtgtgtgagggagggggcgtgtgtgagagggagaggcgtgtgtgagaggga -gggggcgtgtgtgagagggagaggcgtgtgtgagagggaggggcgtgtgtgagggagggg -gcgtgtgtgagagggaggggcgtgtgtgagagggagggggcgtgtgtgagagggagaggc -gtgtgtgagagggaggggcgtgtgtgagagggaggggcgtgtgtgagagggaggggcgtg -tgtgagagggag--------------ggggcgtgtgtgagagggaggggcgtgtgtgaga -gggaggggcgtgtgtgagagggagggggcgtgtgtgagaggga-ggggcgtgtgtgagag -ggaggggcgtgtgtgaagggaggggcgtgtgtgagagtgggggcgtgtgtgagttagggg -cgtgagaggtaggggcgtgtgtgagagggaggggcgtgtgtgagagggagggggcgtg-- ----agagggaggggcgtgtgtgagagggaggggcgtgtgtgagagggaggggcgtgtgtg -agagggagggggcgtgtgtgagagggaggggcgtgtgtgagagggaggggcgtgtgtgag -aggcaggggcgtgtgtgagggaggggcgtgtgtgagagggagggg-gcgtgtgagaggga -ggggcgtgtgtgagagggaggggc--gtgtgagagggaggggcgtgtgtgagaggga-gg -ggcgtgtgtgagagggaggggcgtgtgtgagagggaggggcgtgagagggagggggcgtg -tgtgaggaaggggcgtgtgtgagagggaggggggcgtgtgtgagagggagggggcgtgtg -tgagagggagggggtgtgtgtgagaccgagggggcgtgtaagggagggggtgtgtgtgag -agggaagcgtgtgcacacaccagccctaacc ->ref_chr20:63948494-63948754 -ccatctgtataaaatccaggaaacgaaggacgctcaccgccatgcgcctcggtgtgagga -gggaggcgtgtgtgagagggaggggatgtgtgtgagagggagggggcgtgtgtgagaggg -aggggcgtgtgtgagagggaaggggcgtgtgtgagagggaggggcgtgtgtgagagggag -ggggtgtgtgtgagaccgagggg-gcgtgtaagggagggggtgtgtgtgagagggaagcg -tgtgcaca---------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------caccagccctaacc diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_31498c5319740498de0c72700ff85d56.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_a9d398ab39129efb27eabfa2cdf49d07.msa similarity index 94% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_31498c5319740498de0c72700ff85d56.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_a9d398ab39129efb27eabfa2cdf49d07.msa index 8dbb59b0..b50a92ee 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_31498c5319740498de0c72700ff85d56.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_a9d398ab39129efb27eabfa2cdf49d07.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:62349512-62350033 +>syndip_1_chr20:62349512-62350032 gaatcctacagtcagctcaactatcattcaagagggaggacaaaacagccgttttcagac aattccggatagactgtcatggaatagaaaaactacagggtgcatttcaggaaggaggaa gcagcaacccaggggatggggatggggatggggatggggatggtgggggtgatgggggtg @@ -25,8 +25,8 @@ ggtgggggtgggggtgatgacggtgat--------------------------------- ------------------------ggtgggggtgatgatggtgggagtgggctggtagtg ggggtgatggtttgggtgggagtaaggtgatggtggaggtaaagaggcagaaaattgctg gctgatgacatggagcccccttccctggaggagaggtggggtggcgggtaaagggtgcag -ggagaacatgggcaggctaatgttttgggacactggtgggtgaccaggtcc ->syndip_2_chr20:62349512-62350033 +ggagaacatgggcaggctaatgttttgggacactggtgggtgaccaggtc +>syndip_2_chr20:62349512-62350032 gaatcctacagtcagctcaactatcattcaagagggaggacaaaacagccgttttcagac aattccggatagactgtcatggaatagaaaaactacagggtgcatttcaggaaggaggaa gcagcaacccaggggatggggatggggatggggatggggatggtgggggtgatgggggtg @@ -53,8 +53,8 @@ gggggtgggggtgatgatggtgatggtgggggtgatgatggtgagggtgatgatggtggg ggtgatggtgggggtgggggtgggggtgggggtgatgatggtgggagtgggctggtagtg ggggtgatggtttgggtgggagtaaggtgatggtggaggtaaagaggcagaaaattgctg gctgatgacatggagcccccttccctggaggagaggtggggtggcgggtaaagggtgcag -ggagaacatgggcaggctaatgttttgggacactggtgggtgaccaggtcc ->p:HG002_1_chr20:62349512-62350033 +ggagaacatgggcaggctaatgttttgggacactggtgggtgaccaggtc +>p:HG002_1_chr20:62349512-62350032 gaatcctacagtcagctcaactatcattcaagagggaggacaaaacagccgttttcagac aattccggatagactgtcatggaatagaaaaactacagggtgcatttcaggaaggaggaa gcagcaacccaggggatggggatggggatggggatggggatggtgggggtgatgggggtg @@ -81,8 +81,8 @@ gggggtgggggtgatgatggtgatggtgggggtgatgatggtgagggtgatgatggtggg ggtgatggtgggggtgggggtgggggtgggggtgatgatggtgggagtgggctggtagtg ggggtgatggtttgggtgggagtaaggtgatggtggaggtaaagaggcagaaaattgctg gctgatgacatggagcccccttccctggaggagaggtggggtggcgggtaaagggtgcag -ggagaacatgggcaggctaatgttttgggacactggtgggtgaccaggtcc ->p:HG002_2_chr20:62349512-62350033 +ggagaacatgggcaggctaatgttttgggacactggtgggtgaccaggtc +>p:HG002_2_chr20:62349512-62350032 gaatcctacagtcagctcaactatcattcaagagggaggacaaaacagccgttttcagac aattccggatagactgtcatggaatagaaaaactacagggtgcatttcaggaaggaggaa gcagcaacccaggggatggggatggggatggggatggggatggtgggggtgatgggggtg @@ -109,8 +109,8 @@ ggtgggggtgggggtgatgacggtgat--------------------------------- ------------------------ggtgggggtgatgatggtgggagtgggctggtagtg ggggtgatggtttgggtgggagtaaggtgatggtggaggtaaagaggcagaaaattgctg gctgatgacatggagcccccttccctggaggagaggtggggtggcgggtaaagggtgcag -ggagaacatgggcaggctaatgttttgggacactggtgggtgaccaggtcc ->ref_chr20:62349512-62350033 +ggagaacatgggcaggctaatgttttgggacactggtgggtgaccaggtc +>ref_chr20:62349512-62350032 gaatcctacagtcagctcaactatcattcaagagggaggacaaaacagccgttttcagac aattccggatagactgtcatggaatagaaaaactacagggtgcatttcag---------- ------------------------------------------------------------ @@ -137,4 +137,4 @@ ggtgatggtgggggtgatgacggtgat--------------------------------- ------------------------ggtgggggtgatgatggtgggagtgggctggtagtg ggggtgatggtttgggtgggagtaaggtgatggtggaggtaaagaggcagaaaattgctg gctgatgacatggagcccccttccctggaggagaggtggggtggcgggtaaagggtgcag -ggagaacatgggcaggctaatgttttgggacactggtgggtgaccaggtcc +ggagaacatgggcaggctaatgttttgggacactggtgggtgaccaggtc diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_67b17e44da59d4e4cbdbdfe00749d51f.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_aaf3284dfcd9e22479f7e2d5c0551083.msa similarity index 97% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_67b17e44da59d4e4cbdbdfe00749d51f.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_aaf3284dfcd9e22479f7e2d5c0551083.msa index 7c44fb6d..b9a35234 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_67b17e44da59d4e4cbdbdfe00749d51f.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_aaf3284dfcd9e22479f7e2d5c0551083.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:22081861-22084223 +>syndip_1_chr20:22081861-22084222 ggatggttccatatttttgcagttgtgtattttgctgctataaacatgtgtgtgcaggtg tctttttcatataacaacttcttttcctttgggtaggtttacaaaaactatctttttccc gttcttttacttatatatgcttatttatatatattatatatacacttttaatatatactt @@ -45,8 +45,8 @@ atatataatatatacatatgttgtatatgtatatataatatatacatatgttgtatatgt atatataatatatacatatgttgtatatgtattatatgtatatattacatattatatata cttatgtatgtataatatatatataatatatatatatcttaggcaacttttcattgctca tttttattaaaccctgaaaactttcactttactccatttctaagtttgatattctggtag -aattaaagtaagagttttctttctttcatctcctttttatctctctca ->syndip_2_chr20:22081861-22084223 +aattaaagtaagagttttctttctttcatctcctttttatctctctc +>syndip_2_chr20:22081861-22084222 ggatggttccatatttttgcagttgtgtattttgctgctataaacatgtgtgtgcaggtg tctttttcatataacaacttcttttcctttgggtaggtttacaaaaactatctttttccc gttcttttacttatatatgcttatttatatatattatatatacacttttaatatatactt @@ -93,8 +93,8 @@ atatataatatatacatatgttgtatatgtatatataatatatacatatgttgtatatgt atatataatatatacatatgttgtatatgtattatatgtatatattacatattatatata cttatgtatgtataatatatatataatatatatatatcttaggcaacttttcattgctca tttttattaaaccctgaaaactttcactttactccatttctaagtttgatattctggtag -aattaaagtaagagttttctttctttcatctcctttttatctctctca ->p:HG002_1_chr20:22081861-22084223 +aattaaagtaagagttttctttctttcatctcctttttatctctctc +>p:HG002_1_chr20:22081861-22084222 ggatggttccatatttttgcagttgtgtattttgctgctataaacatgtgtgtgcaggtg tctttttcatataacaacttcttttcctttgggtaggtttacaaaaactatctttttccc gttcttttacttatatatgcttatttatatatattatatatacacttttaatatatactt @@ -141,8 +141,8 @@ atatataatatatacatatgttgtatatgtatatataatatatacatatgttgtatatgt atatataatatatacatatgttgtatatgtattatatgtatatattacatattatatata cttatgtatgtataatatatatataatatatatatatcttaggcaacttttcattgctca tttttattaaaccctgaaaactttcactttactccatttctaagtttgatattctggtag -aattaaagtaagagttttctttctttcatctcctttttatctctctca ->p:HG002_2_chr20:22081861-22084223 +aattaaagtaagagttttctttctttcatctcctttttatctctctc +>p:HG002_2_chr20:22081861-22084222 ggatggttccatatttttgcagttgtgtattttgctgctataaacatgtgtgtgcaggtg tctttttcatataacaacttcttttcctttgggtaggtttacaaaaactatctttttccc gttcttttacttatatatgcttatttatatatattatatatacacttttaatatatactt @@ -189,8 +189,8 @@ atatataatatatacatatgttgtatatgtatatataatatatacatatgttgtatatgt atatataatatatacatatgttgtatatgtattatatgtatatattacatattatatata cttatgtatgtataatatatatataatatatatatatcttaggcaacttttcattgctca tttttattaaaccctgaaaactttcactttactccatttctaagtttgatattctggtag -aattaaagtaagagttttctttctttcatctcctttttatctctctca ->ref_chr20:22081861-22084223 +aattaaagtaagagttttctttctttcatctcctttttatctctctc +>ref_chr20:22081861-22084222 ggatggttccatatttttgcagttgtgtattttgctgctataaacatgtgtgtgcaggtg tctttttcatataacaacttcttttcctttgggtaggtttacaaaaactatctttttccc gttcttttacttatatatgcttatttatatatattatatatacacttttaatatatactt @@ -237,4 +237,4 @@ atatataatatatacatatgttgtatatgtatatataatatatacatatgttgtatatgt atatataatatatacatatgttgtatatgtattatatgtatatattacatattatatata cttatgtatgtataatatatatataatatatatatatcttaggcaacttttcattgctca tttttattaaaccctgaaaactttcactttactccatttctaagtttgatattctggtag -aattaaagtaagagttttctttctttcatctcctttttatctctctca +aattaaagtaagagttttctttctttcatctcctttttatctctctc diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_c7c98c4fca6fe269a9994162cd302ea0.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_ac49004ec0dccaa0aa222eebaca554b5.msa similarity index 97% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_c7c98c4fca6fe269a9994162cd302ea0.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_ac49004ec0dccaa0aa222eebaca554b5.msa index 02da97d1..360afd32 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_c7c98c4fca6fe269a9994162cd302ea0.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_ac49004ec0dccaa0aa222eebaca554b5.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:8661811-8662758 +>syndip_1_chr20:8661811-8662757 gccaacctacgttaagtaattcataacttcagtatttaataaatcttcagtgaaaggcaa gaggctacacacagtgacagacccagagataaaccctgtgcccctagagatttgcactcc tcttaaaagtgtatatatatataattatataattatttattatataattatatataatta @@ -31,8 +31,8 @@ ataatatataattttttataattatgtgtaatatataattatttatcatataattacaat atatattattaacataatatctaaatatattaataatctaatatctataattaatataat atataaatattacttcttgaaatccaacgtgcacctgaatcacctgagccgttgttaaaa tgcagattcgagatcctgcatttctaacaagttactgggtgaggcagaagatgcttagca -aaggcccacacc ->syndip_2_chr20:8661811-8662758 +aaggcccacac +>syndip_2_chr20:8661811-8662757 gccaacctacgttaagtaattcataacttcagtatttaataaatcttcagtgaaaggcaa gaggctacacacagtgacagacccagagataaaccctgtgcccctagagatttgcactcc tcttaaaagtgtatatatatataattatataattatttattatataatgatatataatta @@ -65,8 +65,8 @@ ataatatataattttttataattatgtgtaatatataattatttatcatataattacaat atatattattaacataatatctaaatatattaataatctaatatctataattaatataat atataaatattacttcttgaaatccaacgtgcacctgaatcacctgagccgttgttaaaa tgcagattcgagatcctgcatttctaacaagttactgggtgaggcagaagatgcttagca -aaggcccacacc ->p:HG002_1_chr20:8661811-8662758 +aaggcccacac +>p:HG002_1_chr20:8661811-8662757 gccaacctacgttaagtaattcataacttcagtattcaataaatcttcagtgaaaggcaa gaggctacacacagtgacagacccagagataaaccctgtgcccctagagatttgcactcc tcttaaaagtgtatatatatataattatataattatttattatataatgatatataatta @@ -99,8 +99,8 @@ ataatatataattttttataattatgtgtaatatataattatttatcatataattacaat atatattattaacataatatctaaatatattaataatctaatatctataattaatataat atataaatattacttcttgaaatccaacgtgcacctgaatcacctgagccgttgttaaaa tgcagattcgagatcctgcatttctaacaagttactgggtgaggcagaagatgcttagca -aaggcccacacc ->p:HG002_2_chr20:8661811-8662758 +aaggcccacac +>p:HG002_2_chr20:8661811-8662757 gccaacctacgttaagtaattcataacttcagtatttaataaatcttcagtgaaaggcaa gaggctacacacagtgacagacccagagataaaccctgtgcccctagagatttgcactcc tcttaaaagtgtatatatatataattatataattatttattatataattatatataatta @@ -133,8 +133,8 @@ ataatatataattttttataattatgtgtaatatataattatttatcatataattacaat atatattattaacataatatctaaatatattaataatctaatatctataattaatataat atataaatattacttcttgaaatccaacgtgcacctgaatcacctgagccgttgttaaaa tgcagattcgagatcctgcatttctaacaagttactgggtgaggcagaagatgcttagca -aaggcccacacc ->ref_chr20:8661811-8662758 +aaggcccacac +>ref_chr20:8661811-8662757 gccaacctacgttaagtaattcataacttcagtatttaataaatcttcagtgaaaggcaa gaggctacacacagtgacagacccagagataaaccctgtgcccctagagatttgcactcc tcttaaaag--------------------------------------------------- @@ -167,4 +167,4 @@ ataatatataattttttataattatgtgtaatatataattatttatcatataattacaat atatattattaacataatatctaaatatattaataatctaatatctataattaatataat atataaatattacttcttgaaatccaacgtgcacctgaatcacctgagccgttgttaaaa tgcagattcgagatcctgcatttctaacaagttactgggtgaggcagaagatgcttagca -aaggcccacacc +aaggcccacac diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_ae16a0bdeb31c91e26795edee8ad33fb.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_ae16a0bdeb31c91e26795edee8ad33fb.msa deleted file mode 100644 index a5d29a6d..00000000 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_ae16a0bdeb31c91e26795edee8ad33fb.msa +++ /dev/null @@ -1,660 +0,0 @@ ->syndip_1_chr20:63693225-63693985 -tcggcctgagcacagcattccccgaaggcagcgggcacagccggtcctggaccccactgg -taaatggggccccaggtgggaccctcagactcctgcgtggaaggcagtgtgggccagagt -cctgggctgcttggggtgggcatcctcgggccctgcttggccccgcctctctgttcccct -atgggagtgatgggggcctccacctccaccaccagcaccagcagcaccacctccaccttc -accaccaccacctccaccaccacctccaccacctccacctccaccacctccaccacctcc -accaccaccacctccaccaccaccacctccaccaccaccaccaccaccacctccaccacc -accaccaccaccaccacctccacctccaccacctccaccaccacctccacctccaccacc -acctccacctccacctccaccaccaccacctccaccacctccaccacctccacctccacc -accacctccacctccaccaccacctccacctccacctccacctccacctccaccaccacc -acctccaccacctccaccacctccacctccaccaccaccacctccacctccacctccacc -acctccacctccaccacctccacctccaccacctccaccaccaccaccaccaccaccacc -acctccaccaccaccaccaccaccacctccaccacctccacctccaccacctccaccacc -accacctccaccaccacctccaccaccacctccacctccaccacctccacctccaccacc -tccacctccacctccaccaccacctccaccacctccaccaccacctccaccacctccacc -tccaccaccaccacctccaccaccaccaccaccaccacctccaccacctccacctccacc -acctccaccaccaccacctccaccaccacctccaccaccacctccacctccaccacctcc -acctccaccacctccacctccacctccaccaccacctccaccacctccaccaccacctcc -accacctccaccacctccaccaccacctccaccacctccacctccaccaccaccacctcc -acctccaccaccaccacctccaccaccacctccacctccaccaccaccacctccacctcc -accaccaccacctccacca---ccacctccaccaccaccaccaccaccaccacctccacc -tccaccacctccaccaccaccaccaccacctccacctccaccacctccaccacctccacc -accaccaccaccacctccaccacctccaccaccacctccaccacctccacctccaccacc -accacctccaccaccaccacctccacctccacctccacctccacctccaccaccacctcc -acctccaccaccacctccaccaccacctccacctccaccacctccacctccacctccacc -acctccaccaccaccaccaccaccaccaccaccaccaccacctccaccacctccacctcc -acctccaccaccaccaccaccacctccaccaccaccaccaccacctccaccacctccacc -accacctccaccaccaccaccaccaccaccaccaccacctccaccacctccacctccacc -tccacctccaccacctccaccacctccaccaccacctccaccaccaccaccaccacctcc -accaccacctccacctccaccaccacctccaccaccacctccacctccaccacctccacc -tccaccacctccaccacctccacctccaccacctccacctccacctccaccacctccacc -accaccaccaccaccaccaccaccaccaccacctccaccaccaccacctccaccaccacc -tccaccacctccaccaccacctccaccaccaccaccaccaccaccaccaccaccaccacc -tccaccaccacctccacctccaccaccacctccaccaccacctccacctccaccacctcc -acctccaccacctccaccacctccaccaccaccaccaccacctccaccaccacctccacc -tccaccaccaccaccaccaccaccacctccaccaccaccacctccaccaccacctccacc -acctccaccaccacctccacctccaccacctccaccaccacctccaccaccacctccacc -accaccacctccaccaccacctccaccacctccaccaccacctccacctccaccacctcc -accaccacctccaccaccacctccaccaccaccacctccaccaccacctccaccacctcc -accaccacctccacctccaccacctccacctccacctccaccacctccaccaccaccacc -accaccaccaccaccaccaccaccaccaccaccaccacctccacctccacctccaccacc -tccacctccaccacctccaccaccacctccacctccaccacctccacctccacctccacc -acctccacctccaccacctccacctccacctccaccacctccaccaccaccacctccacc -accacctccaccacctccaccaccacctccacctccaccacctccaccaccacctccacc -accacctccaccaccaccacctccaccaccacctccaccacctccaccaccacctccacc -tccaccacctccacctccacctccaccacctccaccaccaccaccaccaccaccaccacc -accaccacctccaccaccaccacctccaccaccacctccaccaccaccaccaccacctcc -accaccaccaccaccacctccaccaccacctccaccacctccaccaccaccaccaccacc -accaccacctccaccaccacctccaccaccaccaccaccaccaccaccacctccaccacc -tccacctccaccaccaccaccacctccaccaccaccaccaccaccacctccacctccacc -accaccacctccaccaccaccaccacctccaccaccacctccaccaccaccaccaccacc -accaccaccacctccaccacctccaccacctccaccaccacctccaccacctccaccacc -accaccaccacctccaccaccacctccaccaccaccaccaccaccaccacctccacctcc -acctccaccacctccacctccaccacctccacctccaccacctccacctccacctccacc -acctccaccaccacctccacctccaccaccacctcctccaccaccaccacctccaccacc -acctccaccaccaccacctccacctccaccacctccacctccaccaccaccacctccacc -tccaccaccaccacctccaccacctccaccaccacctccaccacctccacctccaccacc -accacctccaccaccaccacctccacctccaccaccaccaccaccaccacctccacctcc -accaccacctccaccaccacctccacctccacctccaccacctccaccaccacctccacc -acctccaccacctccacctccaccaccaccaccacctccaccaccaccaccaccacctct -accacctctaccaccaccaccaccacctccaccaccaccaccaccacctccaccaccacc -accaccaccacctccaccaccacctccaccacctccaccaccaccaccaccacctctacc -acctccacctccaccaccaccaccacctctaccaccaccaccaccacctccaccaccacc -accacctccaccaccaccaccacctccaccaccacctccaccaccacctccaccacctcc -accaccaccaccacctccacctccaccacctccaccaccacctccaccaccaccaccacc -accacctccaccaccacctccaccacctccaccaccaccaccaccacctctaccacctcc -acctccaccaccaccaccacctccacctccaccaccaccaccaccaccaccaccaccacc -acctccaccaccaccaccaccaccacctctaccaccaccaccaccacctccaccaccacc -accacctccaccaccaccaccacctccaccaccacctccaccaccaccaccaccaccacc -accacctctaccaccaccaccaccacctccaccaccaccaccacctccaccaccaccacc -acctccaccaccacctccaccaccaccaccacctccaccaccacctccaccaccacctcc -accacctccaccaccaccaccaccacctccaccaccacctccacctccaccaccaccacc -accacctccaccaccaccaccaccaccaccacctccaccaccacctccaccaccacctcc -accacctccaccaccaccaccaccacctccaccaccacctccacctccaccaccaccacc -accacctccaccaccacctccaccaccaccaccaccaccacctccaccaccacctccacc -acctccaccaccaccaccaccacctctaccacctctaccaccaccaccaccacctccacc -accaccaccacctccaccaccaccaccacctccaccaccaccacctccacctccaccacc -accacctccaccacctccaccaccacctccaccaccaccacctccacctccaccaccacc -acctccaccacctccaccaccaccaccaccaccaccaccacctccaccaccaccaccacc -accacctccaccaccacctccaccacctccaccaccaccaccaccacctctaccacctct -accaccaccaccaccacctccaccaccaccaccacctccaccaccaccaccacctccacc -accacctccaccaccaccacctccacctccaccaccaccacctccaccaccaccacctcc -acctccaccaccaccacctccaccacctccaccaccacctccaccaccaccacctccacc -tccaccaccaccacctccaccacctccaccaccaccacctccaccaccaccacctccacc -acctccaccaccaccaccaccacctccaccacctccaccacctccaccaccaccaccacc -acctccacctccacctccaccacctccaccaccaccacctccaccaccaccaccaccacc -accaccacctccacctccacctccaccacctccacctccaccaccaccaccaccaccacc -accaccaccaccacctccacctccacctccaccacctccaccaccaccacctccaccacc -accacctccacctccaccacctccaccaccacctccaccaccaccacctccacctccacc -accaccacctccaccaccaccacctccacctccaccaccaccacctccaccacctccacc -accacctccaccaccaccacctccacctccaccaccaccacctccaccacctccaccacc -accacctccaccaccaccacctccaccacctccaccaccaccaccaccacctccaccacc -tccaccacctccaccaccaccaccaccacctccacctccacctccaccacctccaccacc -accacctccaccaccaccaccaccaccacctccaccacctccaccaccaccaccaccacc -tccacctccacctccaccacctccaccaccaccacctccaccaccaccaccaccaccacc -accaccacctccacctccacctccaccacctccacctccaccaccaccaccaccaccacc -acctccaccaccacctccacctccacctccaccacctccacctccaccaccaccaccacc -accaccaccaccaccaccacctccacctccacctccaccaccaccacctctaccaccacc -tccaccaccaccaccaccaccaccaccaccacctccaccacctccaccaccaccacctcc -accaccaccacctccacctccaccaccaccaccacctccaccacctccaccaccacctcc -acctccacctccaccacctccaccaccaccacctccaccaccaccaccaccaccaccacc -tccaccacctccaccaccaccacctccaccaccaccacctccacctccaccacctccacc -acctccaccaccaccaccacctccaccaccaccaccaccaccacctccacctccaccacc -tctaccaccacctccaccaccaccaccaccaccacctccacctccaccacctccacctcc -accacctctaccaccacctccaccaccaccacctccacctccacctccaccaccaccacc -tccacctccacctccaccaccaccaccaccaccaccacctccaccaccaccaccaccacc -accaccaccacctccacctccaccaccaccacctccaccaccaccacctccaccacctcc -accaccacctccacctccaccaccaccacctccacctccacctccaccaccaccaccacc -acctccaccaccaccacctccaccaccaacacctccacctccacctccaccaccaccacc -accacctccacctccaccaccacctccaccaccaccacctccacctccaccaccaccacc -tccaccaccacctccaccacctccaccaccaccacctccaccaccacctccaccaccacc -accaccaccacctccacctccacctccaccaccaccacctctaccaccacctccaccacc -accaccaccaccaccaccaccacctccacctccacctccaccacctccacctccaccacc -accaccaccaccaccacctccaccaccacctccacctccacctccaccacctccacctcc -accaccaccaccaccaccaccaccaccaccaccacctccacctccacctccaccaccacc -acctctaccaccacctccaccaccaccaccaccaccaccaccaccacctccaccaccacc -acctccacctccaccaccaccacctccacctccaccaccaccacctccacctccaccacc -accaccaccaccaccacctccaccaccaccacctccaccacctccaccaccacctccacc -accaccacctccaccacctccaccaccacctccacctccaccaccaccacctccaccacc -accacctccacctccaccaccaccacctccacctccacctccaccacctccacctccacc -acctccaccaccaccacctccaccacctccaccaccacctccacctccaccaccaccacc -tccaccaccaccacctccaccacctccaccaccacctccacctccaccaccaccacctcc -acctccacctccaccaccaccaccaccacctccactaccaccacctccacctccaccacc -accacctccaccaccaacacctccacctccacctccacctccaccaccaccacctccacc -tccaccaccaccacctccaccaccaccacctccaccacctccacctccaccaccaccacc -tccaccaccacctccaccaccaccaccacctccaccaccaccacctccaccaccacctcc -accaccaccacctccaccaccacctccaccaccacctccacctccaccaccaccacctgc -accaccacctccacctccaccaccaccaccacctccacctccaccagcagcagcatcact -tgttggggagaccctgtgcaactccatgcacagccctgtccctgccatagccccgacccc -taagcacagccctgtccaactgccacacgtcccctgcctcccatgcatggtcctgggggg -tcaactgcacacgccagggtcctagggtcctagacccctgtcctccctgtttctgcctct -gtttggggtggagtccaagtctc ->syndip_2_chr20:63693225-63693985 -tcggcctgagcacagcattccccgaaggcagcgggcacagccggtcctggaccccactgg -taaatggggccccaggtgggaccctcagactcctgcgtggaaggcagtgtgggccagagt -cctgggctgcttggggtgggcatcctcgggccctgcttggccccgcctctctgttcccct -atgggagtgatgggggcctccacctccaccaccagcaccagcagcaccacctccacca-- --------------------------------cctccacctccaccacct----------- -----------------ccacctccacctccaccaccacctccacctccacctccacctcc -acctccaccaccaccaccaccacctccaccacctccacctccacctccacctccacctcc -acctccacctccacctccaccacgtcca---------------------ccacctccacc -accacctccaccaccacctccacctccacctccacctccaccaccacctccaccaccacc -tccaccaccacct------------------ccaccaccacctccaccaccaccaccacc -accaccacctccaccacctccaccacctccacctccacctccaccaccaccacctccacc -acctccaccacctccaccaccaccaccaccaccacctccacctccaccacct-------- -----------------------------------------ccacctccacctccaccacc -tccacctccacctccaccaccacctccaccacct-------------------------- --------------------------------ccaccacctccaccacctccacctccacc -acctccaccacctccaccaccaccaccaccaccacctccaccaccacctccaccacc--- ----------acctccaccaccacctccaccaccacctccaccacctccacctccacctcc -accacctccaccacctccaccaccaccaccaccaccaccacctccacca----------- --------------------------------ccacctccaccaccaccacctccacctcc -accaccaccacctccaccacctccaccacctccaccaccaccaccaccaccacctccacc -tccaccacctccacctccacctccaccacctccacctccacctccaccaccacctccacc -acctccaccacctccaccacctccacctccaccacctccaccacctccaccaccaccacc -accaccacctccaccaccacctccaccaccacctccaccaccacctccaccaccacct-- ------------------------------------------------------------- -----------------------ccaccacctccacctccacctccaccacctccacctcc -acctccaccaccaccacctccacct---------ccaccaccacctccaccaccaccacc -tccacctccaccacctccaccaccagcaccacctccaccaccaccacctccacctccacc -tccacctccacctccacctccacctccaccaccacctccacctccaccacctccacctcc -accaccaccaccaccaccaccaccaccaccacctccacctccaccaccacctccaccacc -tccaccacctccaccaccaccacctccaccacctccacct-------------------- --------ccaccacctccaccaccacctccaccaccaccacctccacctccaccaccacc -tccaccacct---ccaccacctccaccaccaccaccaccacctccaccacctccaccacc -accacctccacctccacctccaccaccaccaccaccacctccaccacctccaccaccacc -acctccacctccaccaccacctccaccacctccaccaccaccaccaccacctccaccacc -tccaccaccaccacctccaccaccacctccacca---------------------ccacc -acctccacctccaccaccacctccaccacctccaccacctccaccacctccacctccacc -tccaccacctccaccaccacctccaccacctccaccaccacctccaccacctccacctcc -acctccacct---------------------ccacctccaccaccacctccaccacctcc -accaccacctccacctccaccacctccacctccacctccaccacctccaccaccacctcc -accacctccaccacctccaccaccaccacca----------------------------- --ccacctccaccacctccaccaccacctccaccacctccacctccacctccacct----- --------------ccacctccaccaccacctccaccacct---ccaccaccacctccacc -accacctccaccacctccacctccacctccacct-------------------------- -----------------ccacctccaccaccacctccaccacctccaccaccacctccacc -tccaccacctccacctccacctccaccacctccaccaccacctccaccacctccaccacc -acctccacct-------------------------------------------------- -----------ccaccacctccaccaccacctccaccacctccaccacctccaccaccacc -accaccacct-------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- -----------------ccaccacctccaccaccacctccaccacctccacctccacctcc -acct---------------------------------ccacctccaccaccacctccacc -acctccaccaccacctccacctccaccacctccacctccacctccaccacctccaccacc -accacctccaccaccacctccaccaccaccacctccacctccaccaccacctccaccacc -tc---------------------------------------------------------- ------------------------------------------------------------- ------------------------------------caccacctccaccaccacctccacc -acctccaccacctccaccaccaccacc--------------------------------- -accacctccaccacctccaccaccacctccaccaccaccacctccacctccacctccacc -acctccaccacctccaccaccacctccaccacctccaccacctccaccaccacctccacc -acctcca----------------------------------------------------- ------------------------------------------------------------- --------------------------------ccacctccacctccaccaccaccacctcc -accacctccaccaccacctccaccacct-------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- -----------------------ccacctccacctccacctccacctccaccaccacctcc -accacctccaccaccacctccacctccaccacctccacctccacctccaccacctccacc -tccacctccacctccaccaccaccacctccacctccacctccacctccaccacgtccacc -a----------------------------------------------------------- --------------------------------------------------------cctcc -accacctccacctccacctccacctccacctccaccacctccacctccacctccaccacc -tccacctccacctccacctccaccacgtccaccaccaccacctccaccaccaccaccacc -acct-------------------------------------------------------- ------------------------------------------------------------- --------------------------ccaccaccacctccacctccaccaccacctccacc -accacctccaccaccaccacct---------------ccacctccaccaccaccacctcc -acctccaccaccaccaccaccaccacctccaccaccacctccacctccacca---ccacc -tccacctccaccacctccaccaccagcaccaccaccacctccaccaccaccacctccacc -tccaccaccaccaccaccaccacct---------------------ccaccaccacctcc -acctccacctccacctccaccaccacctccaccaccacctccacca-------------- ------------------------------------------------------------- -----------------------------------------------------cctccacc -accacctccacctccaccacctccacctccacctccaccacctccacctccacctccacc -acctccacct-------------------------------------------------- -----------------ccacctccacctccaccaccaccacct----------------- -----------------------------------ccacctccacctccacctccaccacg -tccaccacctccaccacctccacctccacctccacctccacctccaccacct-------- -----------------------------------ccacctccacctccaccacctccacc -tccacctccacctccaccacgtccaccaccaccacctccaccaccaccaccaccacctcc -a---------------------ccaccacctccacctccaccaccacctccaccaccacc -tccaccaccaccacctccacctccaccaccaccacctccacctccaccaccaccaccacc -acca-------------------------------------------------------- -----------------------------------------------------------cc -tccaccaccacctccacctccaccaccacctccacctccaccacctccaccaccaccacc -acctccacctccacca------------------ccaccacctccacctccacctccacc -tccaccaccacctccaccaccacctccaccaccaccacctccacctccacca-------- -----ccaccaccaccaccacctccaccaccacctccacctccaccaccacctccaccacc -acctccaccacctccaccaccaccaccacctccacctccaccaccaccacctccacctcc -accaccaccaccaccaccacctccaccaccacctccacctccaccaccacctccaccacc -tccacctccacctccacctccacctccaccacctccacctccacctccaccaccaccacc -acctccaccacct----------------------------------------------- -----ccaccaccacctccaccacctccacctccacctccacctccaccaccacctccacc -acctccaccacca---------------------------------cctccaccacctcc -accacctccacctccaccaccacctccacctccacctccaccacctccaccaccacca-- --------------------------------------ccaccaccacctccaccaccacc -tccaccaccacctccacctccacct------ccaccacctccaccaccacctccaccacc -accaccacctccacctccaccacctccacctccacctccacca----------------- -----------------------------------------------------ccacctcc -accacctccaccaccaccaccaccacctccaccacctccacctccacct----------- ------------------------------------------------------------- --------------------ccacctccacctccaccacctccacctccacctccaccacc -accaccacctccaccacctccacca----------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- --------------------------------------ccacctccaccacctccacctcc -acctccacctccaccaccacctccaccacctccaccaccacctccaccacctccaccacc -tccacct------------------------------------ccaccaccacctccacc -tccacctccaccacctccaccaccaccaccaccaccacctccaccttcaccaccaccacc -acctccaccacctccaccaccacctcca-------------------------------- --------------------------------------ccaccacctccaccaccacctcc -tccacctccaccacctccaccaccaccaccacctccacctccaccagcagcagcatcact -tgttggggagaccctgtgcaactccatgcacagccctgtccctgccatagccccgacccc -taagcacagccctgtccaactgccacacgtcccctgcctcccatgcatggtcctgggggg -tcaactgcacacgccagggtcctagggtcctagacccctgtcctccctgtttctgcctct -gtttggggtggagtccaagtctc ->p:HG002_1_chr20:63693225-63693985 -tcggcctgagcacagcattccccgaaggcagcgggcacagccggtcctggaccccactgg -taaatggggccccaggtgggaccctcagactcctgcgtggaaggcagtgtgggccagagt -cctgggctgcttggggtgggcatcctcgggccctgcttggccccgcctctctgttcccct -atgggagtgatgggggcctccacctccaccaccagcaccagcagcaccacctccacca-- --------------------------------cctccacctccaccacct----------- -----------------ccacctccacctccaccaccacctccacctccacctccacctcc -acctccaccaccaccaccaccacctccaccacctccacctccacctccacctccacctcc -acctccacctccacctccaccacgtcca---------------------ccacctccacc -accacctccaccaccacctccacctccacctccacctccaccaccacctccaccaccacc -tccaccaccacct------------------ccaccaccacctccaccaccaccaccacc -accaccacctccaccacctccaccacctccacctccacctccaccaccaccacctccacc -acctccaccacctccaccaccaccaccaccaccacctccacctccaccacct-------- -----------------------------------------ccacctccacctccaccacc -tccacctccacctccaccaccacctccaccacct-------------------------- --------------------------------ccaccacctccaccacctccacctccacc -acctccaccacctccaccaccaccaccaccacca------------cctccaccaccacc -tccaccaccacctccaccaccacctccaccaccacctccaccacctccacctccacctcc -accacctccaccacctccaccaccaccaccaccaccaccacctccacca----------- --------------------------------ccacctccaccaccaccacctccacctcc -accaccaccacctccaccacctccaccacctccaccaccaccaccaccaccacctccacc -tccaccacctccacctccacctccaccacctccacctccacctccaccaccacctccacc -acctccaccacctccaccacctccacctccaccacctccaccacctccaccaccaccacc -accaccacctccaccaccacctccaccaccacctccaccaccacctccaccaccacct-- ------------------------------------------------------------- -----------------------ccaccacctccacctccacctccaccacctccacctcc -acctccaccaccaccacctccacct---------ccaccaccacctccaccaccaccacc -tccacctccaccacctccaccaccagcaccacctccaccaccaccacctccacctccacc -tccacctccacctccacctccacctccaccaccacctccacctccaccacctccacctcc -accaccaccaccaccaccaccaccaccaccacctccacctccaccaccacctccaccacc -tccaccacctccaccaccaccacctccaccacctccacct-------------------- --------ccaccacctccaccaccacctccaccaccaccacctccacctccaccaccacc -tccaccacct---ccaccacctccaccaccaccaccaccacctccaccacctccaccacc -accacctccacctccacctccaccaccaccaccaccacctccaccacctccaccaccacc -acctccacctccaccaccacctccaccacctccaccaccaccaccaccacctccaccacc -tccaccaccaccacctccaccaccacctccacca---------------------ccacc -acctccacctccaccaccacctccaccacctccaccacctccaccacctccacctccacc -tccaccacctccaccaccacctccaccacctccaccaccacctccaccacctccacctcc -acctccacct---------------------ccacctccaccaccacctccaccacctcc -accaccacctccacctccaccacctccacctccacctccaccacctccaccaccacctcc -accacctccaccacctccaccaccaccacca----------------------------- --ccacctccaccacctccaccaccacctccaccacctccacctccacctccacct----- --------------ccacctccaccaccacctccaccacct---ccaccaccacctccacc -accacctccaccacctccacctccacctccacct-------------------------- -----------------ccacctccaccaccacctccaccacctccaccaccacctccacc -tccaccacctccacctccacctccaccacctccaccaccacctccaccacctccaccacc -acctccacct-------------------------------------------------- -----------ccaccacctccaccaccacctccaccacctccaccacctccaccaccacc -accaccacct-------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- -----------------ccaccacctccaccaccacctccaccacctccacctccacctcc -acct---------------------------------ccacctccaccaccacctccacc -acctccaccaccacctccacctccaccacctccacctccacctccaccacctccaccacc -accacctccaccaccacctccaccaccaccacctccacctccaccaccacctccaccacc -tc---------------------------------------------------------- ------------------------------------------------------------- ------------------------------------caccacctccaccaccacctccacc -acctccaccacctccaccaccaccacc--------------------------------- -accacctccaccacctccaccaccacctccaccaccaccacctccacctccacctccacc -acctccaccacctccaccaccacctccaccacctccaccacctccaccaccacctccacc -acctcca----------------------------------------------------- ------------------------------------------------------------- --------------------------------ccacctccacctccaccaccaccacctcc -accacctccaccaccacctccaccacct-------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- -----------------------ccacctccacctccacctccacctccaccaccacctcc -accacctccaccaccacctccacctccaccacctccacctccacctccaccacctccacc -tccacctccacctccaccaccaccacctccacctccacctccacctccaccacgtccacc -a----------------------------------------------------------- --------------------------------------------------------cctcc -accacctccacctccacctccacctccacctccaccacctccacctccacctccaccacc -tccacctccacctccacctccaccacgtccaccaccaccacctccaccaccaccaccacc -acct-------------------------------------------------------- ------------------------------------------------------------- --------------------------ccaccaccacctccacctccaccaccacctccacc -accacctccaccaccaccacct---------------ccacctccaccaccaccacctcc -acctccaccaccaccaccaccaccacctccaccaccacctccacctccacca---ccacc -tccacctccaccacctccaccaccagcaccaccaccacctccaccaccaccacctccacc -tccaccaccaccaccaccaccacct---------------------ccaccaccacctcc -acctccacctccacctccaccaccacctccaccaccacctccacca-------------- ------------------------------------------------------------- -----------------------------------------------------cctccacc -accacctccacctccaccacctccacctccacctccaccacctccacctccacctccacc -acctccacct-------------------------------------------------- -----------------ccacctccacctccaccaccaccacct----------------- -----------------------------------ccacctccacctccacctccaccacg -tccaccacctccaccacctccacctccacctccacctccacctccaccacct-------- -----------------------------------ccacctccacctccaccacctccacc -tccacctccacctccaccacgtccaccaccaccacctccaccaccaccaccaccacctcc -a---------------------ccaccacctccacctccaccaccacctccaccaccacc -tccaccaccaccacctccacctccaccaccaccacctccacctccaccaccaccaccacc -acca-------------------------------------------------------- -----------------------------------------------------------cc -tccaccaccacctccacctccaccaccacctccacctccaccacctccaccaccaccacc -acctccacctccacca------------------ccaccacctccacctccacctccacc -tccaccaccacctccaccaccacctccaccaccaccacctccacctccacca-------- -----ccaccaccaccaccacctccaccaccacctccacctccaccaccacctccaccacc -acctccaccacctccaccaccaccaccacctccacctccaccaccaccacctccacctcc -accaccaccaccaccaccacctccaccaccacctccacctccaccaccacctccaccacc -tccacctccacctccacctccacctccaccacctccacctccacctccaccaccaccacc -acctccaccacct----------------------------------------------- -----ccaccaccacctccaccacctccacctccacctccacctccaccaccacctccacc -acctccaccacca---------------------------------cctccaccacctcc -accacctccacctccaccaccacctccacctccacctccaccacctccaccaccacca-- --------------------------------------ccaccaccacctccaccaccacc -tccaccaccacctccacctccacct------ccaccacctccaccaccacctccaccacc -accaccacctccacctccaccacctccacctccacctccacca----------------- -----------------------------------------------------ccacctcc -accacctccaccaccaccaccaccacctccaccacctccacctccacct----------- ------------------------------------------------------------- --------------------ccacctccacctccaccacctccacctccacctccaccacc -accaccacctccaccacctccacca----------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- --------------------------------------ccacctccaccacctccacctcc -acctccacctccaccaccacctccaccacctccaccaccacctccaccacctccaccacc -tccacct------------------------------------ccaccaccacctccacc -tccacctccaccacctccaccaccaccaccaccaccacctccaccttcaccaccaccacc -acctccaccacctccaccaccacctcca-------------------------------- --------------------------------------ccaccacctccaccaccacctcc -tccacctccaccacctccaccaccaccaccacctccacctccaccagcagcagcatcact -tgttggggagaccctgtgcaactccatgcacagccctgtccctgccatagccccgacccc -taagcacagccctgtccaactgccacacgtcccctgcctcccatgcatggtcctgggggg -tcaactgcacacgccagggtcctagggtcctagacccctgtcctccctgtttctgcctct -gtttggggtggagtccaagtctc ->p:HG002_2_chr20:63693225-63693985 -tcggcctgagcacagcattccccgaaggcagcgggcacagccggtcctggaccccactgg -taaatggggccccaggtgggaccctcagactcctgcgtggaaggcagtgtgggccagagt -cctgggctgcttggggtgggcatcctcgggccctgcttggccccgcctctctgttcccct -atgggagtgatgggggcctccacctccaccaccagcaccagcagcaccacctccaccttc -accaccaccacctccaccaccacctccaccacctccacctccaccacctccaccacctcc -accaccaccacctccaccaccaccacctccaccaccaccaccaccaccacctccaccacc -accaccaccaccaccacctccacctccaccacctccaccaccacctccacctccaccacc -acctccacctccacctccaccaccaccacctccaccacctccaccacctccacctccacc -accacctccacctccaccaccacctccacctccacctccacctccacctccaccaccacc -acctccaccacctccaccacctccacctccaccaccaccacctccacctccacctccacc -acctccacctccaccacctccacctccaccacctccaccaccaccaccaccaccaccacc -acctccaccaccaccaccaccaccacctccaccacctccacctccaccacctccaccacc -accacctccaccaccacctccaccaccacctccacctccaccacctccacctccaccacc -tccacctccacctccaccaccacctccaccacctccaccaccacctccaccacctccacc -tccaccaccaccacctccaccaccaccaccaccaccacctccaccacctccacctccacc -acctccaccaccaccacctccaccaccacctccaccaccacctccacctccaccacctcc -acctccaccacctccacctccacctccaccaccacctccaccacctccaccaccacctcc -accacctccaccacctccaccaccacctccaccacctccacctccaccaccaccacctcc -acctccaccaccaccacctccaccaccacctccacctccaccaccaccacctccacctcc -accaccaccacctccacca---ccacctccaccaccaccaccaccaccaccacctccacc -tccaccacctccaccaccaccaccaccacctccacctccaccacctccaccacctccacc -accaccaccaccacctccaccacctccaccaccacctccaccacctccacctccaccacc -accacctccaccaccaccacctccacctccacctccacctccacctccaccaccacctcc -acctccaccaccacctccaccaccacctccacctccaccacctccacctccacctccacc -acctccaccaccaccaccaccaccaccaccaccaccaccacctccaccacctccacctcc -acctccaccaccaccaccaccacctccaccaccaccaccaccacctccaccacctccacc -accacctccaccaccaccaccaccaccaccaccaccacctccaccacctccacctccacc -tccacctccaccacctccaccacctccaccaccacctccaccaccaccaccaccacctcc -accaccacctccacctccaccaccacctccaccaccacctccacctccaccacctccacc -tccaccacctccaccacctccacctccaccacctccacctccacctccaccacctccacc -accaccaccaccaccaccaccaccaccaccacctccaccaccaccacctccaccaccacc -tccaccacctccaccaccacctccaccaccaccaccaccaccaccaccaccaccaccacc -tccaccaccacctccacctccaccaccacctccaccaccacctccacctccaccacctcc -acctccaccacctccaccacctccaccaccaccaccaccacctccaccaccacctccacc -tccaccaccaccaccaccaccaccacctccaccaccaccacctccaccaccacctccacc -acctccaccaccacctccacctccaccacctccaccaccacctccaccaccacctccacc -accaccacctccaccaccacctccaccacctccaccaccacctccacctccaccacctcc -accaccacctccaccaccacctccaccaccaccacctccaccaccacctccaccacctcc -accaccacctccacctccaccacctccacctccacctccaccacctccaccaccaccacc -accaccaccaccaccaccaccaccaccaccaccaccacctccacctccacctccaccacc -tccacctccaccacctccaccaccacctccacctccaccacctccacctccacctccacc -acctccacctccaccacctccacctccacctccaccacctccaccaccaccacctccacc -accacctccaccacctccaccaccacctccacctccaccacctccaccaccacctccacc -accacctccaccaccaccacctccaccaccacctccaccacctccaccaccacctccacc -tccaccacctccacctccacctccaccacctccaccaccaccaccaccaccaccaccacc -accaccacctccaccaccaccacctccaccaccacctccaccaccaccaccaccacctcc -accaccaccaccaccacctccaccaccacctccaccacctccaccaccaccaccaccacc -accaccacctccaccaccacctccaccaccaccaccaccaccaccaccacctccaccacc -tccacctccaccaccaccaccacctccaccaccaccaccaccaccacctccacctccacc -accaccacctccaccaccaccaccacctccaccaccacctccaccaccaccaccaccacc -accaccaccacctccaccacctccaccacctccaccaccacctccaccacctccaccacc -accaccaccacctccaccaccacctccaccaccaccaccaccaccaccacctccacctcc -acctccaccacctccacctccaccacctccacctccaccacctccacctccacctccacc -acctccaccaccacctccacctccaccaccacctcctccaccaccaccacctccaccacc -acctccaccaccaccacctccacctccaccacctccacctccaccaccaccacctccacc -tccaccaccaccacctccaccacctccaccaccacctccaccacctccacctccaccacc -accacctccaccaccaccacctccacctccaccaccaccaccaccaccacctccacctcc -accaccacctccaccaccacctccacctccacctccaccacctccaccaccacctccacc -acctccaccacctccacctccaccaccaccaccacctccaccaccaccaccaccacctct -accacctctaccaccaccaccaccacctccaccaccaccaccaccacctccaccaccacc -accaccaccacctccaccaccacctccaccacctccaccaccaccaccaccacctctacc -acctccacctccaccaccaccaccacctctaccaccaccaccaccacctccaccaccacc -accacctccaccaccaccaccacctccaccaccacctccaccaccacctccaccacctcc -accaccaccaccacctccacctccaccacctccaccaccacctccaccaccaccaccacc -accacctccaccaccacctccaccacctccaccaccaccaccaccacctctaccacctcc -acctccaccaccaccaccacctccacctccaccaccaccaccaccaccaccaccaccacc -acctccaccaccaccaccaccaccacctctaccaccaccaccaccacctccaccaccacc -accacctccaccaccaccaccacctccaccaccacctccaccaccaccaccaccaccacc -accacctctaccaccaccaccaccacctccaccaccaccaccacctccaccaccaccacc -acctccaccaccacctccaccaccaccaccacctccaccaccacctccaccaccacctcc -accacctccaccaccaccaccaccacctccaccaccacctccacctccaccaccaccacc -accacctccaccaccaccaccaccaccaccacctccaccaccacctccaccaccacctcc -accacctccaccaccaccaccaccacctccaccaccacctccacctccaccaccaccacc -accacctccaccaccacctccaccaccaccaccaccaccacctccaccaccacctccacc -acctccaccaccaccaccaccacctctaccacctctaccaccaccaccaccacctccacc -accaccaccacctccaccaccaccaccacctccaccaccaccacctccacctccaccacc -accacctccaccacctccaccaccacctccaccaccaccacctccacctccaccaccacc -acctccaccacctccaccaccaccaccaccaccaccaccacctccaccaccaccaccacc -accacctccaccaccacctccaccacctccaccaccaccaccaccacctctaccacctct -accaccaccaccaccacctccaccaccaccaccacctccaccaccaccaccacctccacc -accacctccaccaccaccacctccacctccaccaccaccacctccaccaccaccacctcc -acctccaccaccaccacctccaccacctccaccaccacctccaccaccaccacctccacc -tccaccaccaccacctccaccacctccaccaccaccacctccaccaccaccacctccacc -acctccaccaccaccaccaccacctccaccacctccaccacctccaccaccaccaccacc -acctccacctccacctccaccacctccaccaccaccacctccaccaccaccaccaccacc -accaccacctccacctccacctccaccacctccacctccaccaccaccaccaccaccacc -accaccaccaccacctccacctccacctccaccacctccaccaccaccacctccaccacc -accacctccacctccaccacctccaccaccacctccaccaccaccacctccacctccacc -accaccacctccaccaccaccacctccacctccaccaccaccacctccaccacctccacc -accacctccaccaccaccacctccacctccaccaccaccacctccaccacctccaccacc -accacctccaccaccaccacctccaccacctccaccaccaccaccaccacctccaccacc -tccaccacctccaccaccaccaccaccacctccacctccacctccaccacctccaccacc -accacctccaccaccaccaccaccaccacctccaccacctccaccaccaccaccaccacc -tccacctccacctccaccacctccaccaccaccacctccaccaccaccaccaccaccacc -accaccacctccacctccacctccaccacctccacctccaccaccaccaccaccaccacc -acctccaccaccacctccacctccacctccaccacctccacctccaccaccaccaccacc -accaccaccaccaccaccacctccacctccacctccaccaccaccacctctaccaccacc -tccaccaccaccaccaccaccaccaccaccacctccaccacctccaccaccaccacctcc -accaccaccacctccacctccaccaccaccaccacctccaccacctccaccaccacctcc -acctccacctccaccacctccaccaccaccacctccaccaccaccaccaccaccaccacc -tccaccacctccaccaccaccacctccaccaccaccacctccacctccaccacctccacc -acctccaccaccaccaccacctccaccaccaccaccaccaccacctccacctccaccacc -tctaccaccacctccaccaccaccaccaccaccacctccacctccaccacctccacctcc -accacctctaccaccacctccaccaccaccacctccacctccacctccaccaccaccacc -tccacctccacctccaccaccaccaccaccaccaccacctccaccaccaccaccaccacc -accaccaccacctccacctccaccaccaccacctccaccaccaccacctccaccacctcc -accaccacctccacctccaccaccaccacctccacctccacctccaccaccaccaccacc -acctccaccaccaccacctccaccaccaacacctccacctccacctccaccaccaccacc -accacctccacctccaccaccacctccaccaccaccacctccacctccaccaccaccacc -tccaccaccacctccaccacctccaccaccaccacctccaccaccacctccaccaccacc -accaccaccacctccacctccacctccaccaccaccacctctaccaccacctccaccacc -accaccaccaccaccaccaccacctccacctccacctccaccacctccacctccaccacc -accaccaccaccaccacctccaccaccacctccacctccacctccaccacctccacctcc -accaccaccaccaccaccaccaccaccaccaccacctccacctccacctccaccaccacc -acctctaccaccacctccaccaccaccaccaccaccaccaccaccacctccaccaccacc -acctccacctccaccaccaccacctccacctccaccaccaccacctccacctccaccacc -accaccaccaccaccacctccaccaccaccacctccaccacctccaccaccacctccacc -accaccacctccaccacctccaccaccacctccacctccaccaccaccacctccaccacc -accacctccacctccaccaccaccacctccacctccacctccaccacctccacctccacc -acctccaccaccaccacctccaccacctccaccaccacctccacctccaccaccaccacc -tccaccaccaccacctccaccacctccaccaccacctccacctccaccaccaccacctcc -acctccacctccaccaccaccaccaccacctccactaccaccacctccacctccaccacc -accacctccaccaccaacacctccacctccacctccacctccaccaccaccacctccacc -tccaccaccaccacctccaccaccaccacctccaccacctccacctccaccaccaccacc -tccaccaccacctccaccaccaccaccacctccaccaccaccacctccaccaccacctcc -accaccaccacctccaccaccacctccaccaccacctccacctccaccaccaccacctgc -accaccacctccacctccaccaccaccaccacctccacctccaccagcagcagcatcact -tgttggggagaccctgtgcaactccatgcacagccctgtccctgccatagccccgacccc -taagcacagccctgtccaactgccacacgtcccctgcctcccatgcatggtcctgggggg -tcaactgcacacgccagggtcctagggtcctagacccctgtcctccctgtttctgcctct -gtttggggtggagtccaagtctc ->ref_chr20:63693225-63693985 -tcggcctgagcacagcattccccgaaggcagcgggcacagccggtcctggaccccactgg -taaatggggccccaggtgggaccctcagactcctgcgtggaaggcagtgtgggccagagt -cctgggctgcttggggtgggcatcctcgggccctgcttggccccgcctctctgttcccct -atgggagtgatgggggcctccacctccaccaccagcaccagcagcaccacctccacct-- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- -----------------------ccacctccacct-------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- -----------------------------------------------------------cc -acctccaccaccacctccacctccaccaccacctcctccaccaccaccacctccaccacc -accaccaccaccaccacctccacctccaccacctccacctccaccaccaccacctccacc -t----------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- -----------------------------------------------------ccaccacc -accacctccacctccaccaccacctccaccaccaccacctccacctccacca-------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- -----------------------------------------------------ccacctcc -accaccaccaccaccaccaccaccaccaccacca-------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------------- --------------------------------------ccacctccaccaccaccacctgc -accaccacctccacctccaccaccaccaccacctccacctccaccagcagcagcatcact -tgttggggagaccctgtgcaactccatgcacagccctgtccctgccatagccccgacccc -taagcacagccctgtccaactgccacacgtcccctgcctcccatgcatggtcctgggggg -tcaactgcacacgccagggtcctagggtcctagacccctgtcctccctgtttctgcctct -gtttggggtggagtccaagtctc diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_21e1c5c85ccc2580c7576396ab027d39.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_ae89171a433f770e25870ba2623263b5.msa similarity index 96% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_21e1c5c85ccc2580c7576396ab027d39.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_ae89171a433f770e25870ba2623263b5.msa index ecf894da..15777a46 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_21e1c5c85ccc2580c7576396ab027d39.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_ae89171a433f770e25870ba2623263b5.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:48449605-48450562 +>syndip_1_chr20:48449605-48450561 catgagaatatgctcagtgataaggcagtggcgttaaatctgcaggcagtgggctatctg tgggtccttaaatctatttagtgcatcttgaacagtatttttaagctgggataagtggaa taggaaggagtggagtggagcatgatggtatgggatgggatggatgggatgggatggatg @@ -23,8 +23,8 @@ tgggatgggatggggtggggtggggtgggatgggatgggatggggtggggtgggatggga ttggatggggtgggatgcgatgg-----gatgagatgagatggatggcatgggatgggtg ggaggggatgggatattatcagatggcaaaagtaagaagtgttcatattttgtcttcaaa atataaatatgagtgcataatggcttgtgatgcaaaatacatttcttactaaagatcatg -gacaaagcacttaaaaccacacagt ->syndip_2_chr20:48449605-48450562 +gacaaagcacttaaaaccacacag +>syndip_2_chr20:48449605-48450561 catgagaatatgctcagtgataaggcagtggcgttaaatctgcaggcagtgggctatctg tgggtccttaaatctatttagtgcatcttgaacagtatttttaagctgggataagtggaa taggaaggagtggagtggagcatgatggtatgggatgggatggatgggatgggatggatg @@ -49,8 +49,8 @@ tgggatgggatggggtggggtggggtgggatgggatgggatggggtggggtgggatggga ttggatggggtgggatgcgatgggatgagatgagatgagatgaatggcatgggatgggtg ggaggggatgggatattatcagatggcaaaagtaagaagtgttcatattttgtcttcaaa atataaatatgagtgcataatggcttgtgatgcaaaatacatttcttactaaagatcatg -gacaaagcacttaaaaccacacagt ->p:HG002_1_chr20:48449605-48450562 +gacaaagcacttaaaaccacacag +>p:HG002_1_chr20:48449605-48450561 catgagaatatgctcagtgataaggcagtggcgttaaatctgcaggcagtgggctatctg tgggtccttaaatctatttagtgcatcttgaacagtatttttaagctgggataagtggaa taggaaggagtggagtggagcatgatggtatgggatgggatggatgggatgggatggatg @@ -75,8 +75,8 @@ tgggatgggatggggtggggtggggtgggatgggatgggatggggtggggtgggatggga ttggatggggtgggatgcgatgggatgagatgagatgagatgaatggcatgggatgggtg ggaggggatgggatattatcagatggcaaaagtaagaagtgttcatattttgtcttcaaa atataaatatgagtgcataatggcttgtgatgcaaaatacatttcttactaaagatcatg -gacaaagcacttaaaaccacacagt ->p:HG002_2_chr20:48449605-48450562 +gacaaagcacttaaaaccacacag +>p:HG002_2_chr20:48449605-48450561 catgagaatatgctcagtgataaggcagtggcgttaaatctgcaggcagtgggctatctg tgggtccttaaatctatttagtgcatcttgaacagtatttttaagctgggataagtggaa taggaaggagtggagtggagcatgatggtatgggatgggatggatgggatgggatggatg @@ -101,8 +101,8 @@ tgggatgggatggggtggggtggggtgggatgggatgggatggggtggggtgggatggga ttggatggggtgggatgcgatgg-----gatgagatgagatggatggcatgggatgggtg ggaggggatgggatattatcagatggcaaaagtaagaagtgttcatattttgtcttcaaa atataaatatgagtgcataatggcttgtgatgcaaaatacatttcttactaaagatcatg -gacaaagcacttaaaaccacacagt ->ref_chr20:48449605-48450562 +gacaaagcacttaaaaccacacag +>ref_chr20:48449605-48450561 catgagaatatgctcagtgataaggcagtggcgttaaatctgcaggcagtgggctatctg tgggtccttaaatctatttagtgcatcttgaacagtatttttaagctgggataagtggaa taggaaggagtggagtggagcatgatggtatgggatgggatggatgggatgggatggatg @@ -127,4 +127,4 @@ tgggatgggatggggtggggtggggtgggatgggatgggatggggtggggtgggatggga ttggatggggtgggatgcgatgg-----gatgagatgagatggatggcatgggatgggtg ggaggggatgggatattatcagatggcaaaagtaagaagtgttcatattttgtcttcaaa atataaatatgagtgcataatggcttgtgatgcaaaatacatttcttactaaagatcatg -gacaaagcacttaaaaccacacagt +gacaaagcacttaaaaccacacag diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_9977644ffc467b937358e85a6ba26103.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_b05e3a291363048d6af177602ce2b043.msa similarity index 97% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_9977644ffc467b937358e85a6ba26103.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_b05e3a291363048d6af177602ce2b043.msa index 285703c8..40bd6c04 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_9977644ffc467b937358e85a6ba26103.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_b05e3a291363048d6af177602ce2b043.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:14861837-14862842 +>syndip_1_chr20:14861837-14862841 gctctcctgctgctggccaagcaggaccccaaggtccactgggatgtacatacttatggt attatgctctgcaatacttgtgggggctgtggaatgcacacttccaaagtgaaataggac attggaggttttatatataaatatatatatatatttatatatatatttatatatatatat @@ -28,8 +28,8 @@ aaatatatatataaatataaatatatatgtataaatatatatataaatatatatatataa atatatatatacatatataaatatatatataaatatatataaaaatatatatatatatat ttttttaatagagaggatgaagaccaaagcagtgagttccacctaagctggcccttgcct tttgtctaacaggaaacccagtccttcctaatgggcacagaattttcagagaagagagca -gaag ->syndip_2_chr20:14861837-14862842 +gaa +>syndip_2_chr20:14861837-14862841 gctctcctgctgctggccaagcaggaccccaaggtccactgggatgtacatacttatggt attatgctctgcaatacttgtgggggctgtggaatgcacacttccaaagtgaaataggac attggaggttttatatataaatatatatatatatatttatatatatatttatatatatat @@ -59,8 +59,8 @@ aaatatatatataaatatatataaatatatataaaaatatatataaatatatatataaat atatatataaatatatataaatatatatataaatatatataaaaatatatatatattttt ttttttaatagagaggatgaagaccaaagcagtgagttccacctaagctggcccttgcct tttgtctaacaggaaacccagtccttcctaatgggcacagaattttcagagaagagagca -gaag ->p:HG002_1_chr20:14861837-14862842 +gaa +>p:HG002_1_chr20:14861837-14862841 gctctcctgctgctggccaagcaggaccccaaggtccactgggatgtacatacttatggt attatgctctgcaatacttgtgggggctgtggaatgcacacttccaaagtgaaataggac attggaggttttatatataaatatatatatatatatttatatatatatttatatatatat @@ -90,8 +90,8 @@ aaatatatatataaatatatataaatatatataaaaatatatataaatatatatataaat atatatataaatatatataaatatatatataaatatatataaaaatatatatatattttt ttttttaatagagaggatgaagaccaaagcagtgagttccacctaagctggcccttgcct tttgtctaacaggaaacccagtccttcctaatgggcacagaattttcagagaagagagca -gaag ->p:HG002_2_chr20:14861837-14862842 +gaa +>p:HG002_2_chr20:14861837-14862841 gctctcctgctgctggccaagcaggaccccaaggtccactgggatgtacatacttatggt attatgctctgcaatacttgtgggggctgtggaatgcacacttccaaagtgaaataggac attggaggttttatatataaatatatatatatatttatatatatatttatatatatatat @@ -121,8 +121,8 @@ aaatatatatataaatataaatatatatgtataaatatatatataaatatatatatataa atatatatatacatatataaatatatatataaatatatataaaaatatatatatatatat ttttttaatagagaggatgaagaccaaagcagtgagttccacctaagctggcccttgcct tttgtctaacaggaaacccagtccttcctaatgggcacagaattttcagagaagagagca -gaag ->ref_chr20:14861837-14862842 +gaa +>ref_chr20:14861837-14862841 gctctcctgctgctggccaagcaggaccccaaggtccactgggatgtacatacttatggt attatgctctgcaatacttgtgggggctgtggaatgcacacttccaaagtgaaataggac attggaggttttatatataaatatatatatatatatttatatatatatttatatatatat @@ -152,4 +152,4 @@ aaatatatatataaat------------------------------atatatatataaat atatatataaatatatataaatatatatataaatatatataaatatatatatatatattt ttttttaatagagaggatgaagaccaaagcagtgagttccacctaagctggcccttgcct tttgtctaacaggaaacccagtccttcctaatgggcacagaattttcagagaagagagca -gaag +gaa diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_c3e3f9602fee97132e713a8085d0122f.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_b179a5501273075e792cbf44d023019b.msa similarity index 98% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_c3e3f9602fee97132e713a8085d0122f.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_b179a5501273075e792cbf44d023019b.msa index eda44657..0b5b5c0f 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_c3e3f9602fee97132e713a8085d0122f.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_b179a5501273075e792cbf44d023019b.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:38462831-38464884 +>syndip_1_chr20:38462831-38464883 caccatgttacctaggctgatcttgaattcctgggctcaagtgttagactcacttcggcc tcccaaagtgttgggattacaggcgtgagccactgcacccagcctgttttgtttggggca agggttttaacatttatatatatataaatatatatatattttatatatacaaatatatat @@ -67,8 +67,8 @@ agtggctcacacctgtaatcccagcactttgggaggctgaggtgggcagatcacgaggtc agcagttcgagaccagcctggccagcatggtgaaaccccatctctactaaaaatacaaaa cattagccggacatggtggcacacacctgtaattccagctactcaggaagctgaggcagg agaatagcttgaacccaggaggcgaaagtttcagtcagccaaaatcacaccactgcactc -cagcctgggcaacagagcaagactctgtctccaaaaaaa- ->syndip_2_chr20:38462831-38464884 +cagcctgggcaacagagcaagactctgtctccaaaaaa- +>syndip_2_chr20:38462831-38464883 caccatgttacctaggctgatcttgaattcctgggctcaagtgttagactcacttcggcc tcccaaagtgttgggattacaggcgtgagccactgcacccagcctgttttgtttggggca agggttttaacatttatatatatataaatatatatatattttatatatacaaatatatat @@ -137,8 +137,8 @@ agtggctcacacctgtaatcccagcactttgggaggctgaggtgggcagatcacgaggtc agcagttcgagaccagcctggccagcatggtgaaaccccatctctactaaaaatacaaaa cattagccggacatggtggcacacacctgtaattccagctactcaggaagctgaggcagg agaatagcttgaacccaggaggcgaaagtttcagtcagccaaaatcacaccactgcactc -cagcctgggcaacagagcaagactctgtctccaaaaaaaa ->p:HG002_1_chr20:38462831-38464884 +cagcctgggcaacagagcaagactctgtctccaaaaaaa +>p:HG002_1_chr20:38462831-38464883 caccatgttacctaggctgatcttgaattcctgggctcaagtgttagactcacttcggcc tcccaaagtgttgggattacaggcgtgagccactgcacccagcctgttttgtttggggca agggttttaacatttatatatatataaatatatatatattttatatatacaaatatatat @@ -207,8 +207,8 @@ agtggctcacacctgtaatcccagcactttgggaggctgaggtgggcagatcacgaggtc agcagttcgagaccagcctggccagcatggtgaaaccccatctctactaaaaatacaaaa cattagccggacatggtggcacacacctgtaattccagctactcaggaagctgaggcagg agaatagcttgaacccaggaggcgaaagtttcagtcagccaaaatcacaccactgcactc -cagcctgggcaacagagcaagactctgtctccaaaaaaaa ->p:HG002_2_chr20:38462831-38464884 +cagcctgggcaacagagcaagactctgtctccaaaaaaa +>p:HG002_2_chr20:38462831-38464883 caccatgttacctaggctgatcttgaattcctgggctcaagtgttagactcacttcggcc tcccaaagtgttgggattacaggcgtgagccactgcacccagcctgttttgtttggggca agggttttaacatttatatatatataaatatatatatattttatatatacaaatatatat @@ -277,8 +277,8 @@ agtggctcacacctgtaatcccagcactttgggaggctgaggtgggcagatcacgaggtc agcagttcgagaccagcctggccagcatggtgaaaccccatctctactaaaaatacaaaa cattagccggacatggtggcacacacctgtaattccagctactcaggaagctgaggcagg agaatagcttgaacccaggaggcgaaagtttcagtcagccaaaatcacaccactgcactc -cagcctgggcaacagagcaagactctgtctccaaaaaaa- ->ref_chr20:38462831-38464884 +cagcctgggcaacagagcaagactctgtctccaaaaaa- +>ref_chr20:38462831-38464883 caccatgttacctaggctgatcttgaattcctgggctcaagtgttagactcacttcggcc tcccaaagtgttgggattacaggcgtgagccactgcacccagcctgttttgtttggggca agggttttaacatttatatatatataaatatatatatattttatatatacaaatatatat @@ -347,4 +347,4 @@ agtggctcacacctgtaatcccagcactttgggaggctgaggtgggcagatcacgaggtc agcagttcgagaccagcctggccagcatggtgaaaccccatctctactaaaaatacaaaa cattagccggacatggtggcacacacctgtaattccagctactcaggaagctgaggcagg agaatagcttgaacccaggaggcgaaagtttcagtcagccaaaatcacaccactgcactc -cagcctgggcaacagagcaagactctgtctccaaaaaaa- +cagcctgggcaacagagcaagactctgtctccaaaaaa- diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_4abdcd3bd686c6653b94c23219e870d1.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_b206928da83df6d728521bb51cfc7a4d.msa similarity index 97% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_4abdcd3bd686c6653b94c23219e870d1.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_b206928da83df6d728521bb51cfc7a4d.msa index 5f9abff0..29d71e41 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_4abdcd3bd686c6653b94c23219e870d1.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_b206928da83df6d728521bb51cfc7a4d.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:4032119-4033702 +>syndip_1_chr20:4032119-4033701 cgggattgcagatgaagtctcgttcactcagtgctcagtggtgtccaggctggagtgcag cggcgtgatctcggctcgctacaa---ccacctcccagccgcctgccttggcctcccaaa gagccgagattgcagcctctgcccggccgccaccccgtctgggaagtgaggagcgtctct @@ -31,8 +31,8 @@ agtgggggggtcagccccccgcccggccagccgccccatccgggaggtgaggggcgcttc tgcccggccgcccctactgggaagtgaggagcccctctgcccggccacgaccccgtctgg gaggtgtgcccagcggctcattggggatgggccatgatgacaatggcggttttgtggaat agaaaggcgggaagggtggggaaaaaattgagaaatcggatggttgccgggtctgtgtgg -atggaagtagacatg ->syndip_2_chr20:4032119-4033702 +atggaagtagacat +>syndip_2_chr20:4032119-4033701 cgggattgcagatgaagtctcgttcactcagtgctcagtggtgtccaggctggagtgcag cggcgtgatctcggctcgctacaa---ccacctcccagccgcctgccttggcctcccaaa gagccgagattgcagcctctgcccggccgccaccccgtctgggaagtgaggagcgtctct @@ -65,8 +65,8 @@ agtgggggggtcagccccccgcccggccagccgccccatccgggaggtgaggggcgcttc tgcccggccgcccctactgggaagtgaggagcccctctgcccggccacgaccccgtctgg gaggtgtgcccagcggctcattggggatgggccatgatgacaatggcggttttgtggaat agaaaggcgggaagggtggggaaaaaattgagaaatcggatggttgccgggtctgtgtgg -atggaagtagacatg ->p:HG002_1_chr20:4032119-4033702 +atggaagtagacat +>p:HG002_1_chr20:4032119-4033701 cgggattgcagatgaagtctcgttcactcagtgctcagtggtgtccaggctggagtgcag cggcgtgatctcggctcgctacaa---ccacctcccagccgcctgccttggcctcccaaa gagccgagattgcagcctctgcccggccgccaccccgtctgggaagtgaggagcgtctct @@ -99,8 +99,8 @@ agtgggggggtcagccccccgcccggccagccgccccatccgggaggtgaggggcgcttc tgcccggccgcccctactgggaagtgaggagcccctctgcccggccacgaccccgtctgg gaggtgtgcccagcggctcattggggatgggccatgatgacaatggcggttttgtggaat agaaaggcgggaagggtggggaaaaaattgagaaatcggatggttgccgggtctgtgtgg -atggaagtagacatg ->p:HG002_2_chr20:4032119-4033702 +atggaagtagacat +>p:HG002_2_chr20:4032119-4033701 cgggattgcagatgaagtctcgttcactcagtgctcaatggtgcccaggctggagtgcag tggcgtgatctccgctcgctacaacctccacctcccagccgcctgccttggcctcccaaa gagccgagattgcagcctctgcccggccgccaccccgtctgggaagtgaggagcgtctct @@ -133,8 +133,8 @@ agtgggggggtcagccccccgcccggccagccgccccatccgggaggtgaggggcgcttc tgcccggccgcccctactgggaagtgaggagcccctctgcccggccacgaccccgtctgg gaggtgtgcccagcggctcattggggatgggccatgatgacaatggcggttttgtggaat agaaaggcgggaagggtggggaaaaaattgagaaatcggatggttgccgggtctgtgtgg -atggaagtagacatg ->ref_chr20:4032119-4033702 +atggaagtagacat +>ref_chr20:4032119-4033701 cgggattgcagatgaagtctcgttcactcagtgctcagtggtgtccaggctggagtgcag cggcgtgatctcggctcgctacaa---ccacctcccagccgcctgccttggcctcccaaa gagccgagattgcagcctctgcccggccgccaccccgtctgggaagtgaggagcgtctct @@ -167,4 +167,4 @@ agtgggggggtcagccccccgcccggccagccgccccatccgggaggtgaggggcgcttc tgcccggccgcccctactgggaagtgaggagcccctctgcccggccacgaccccgtctgg gaggtgtgcccagcggctcattggggatgggccatgatgacaatggcggttttgtggaat agaaaggcgggaagggtggggaaaaaattgagaaatcggatggttgccgggtctgtgtgg -atggaagtagacatg +atggaagtagacat diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_ef7cdfd52679d33bb0bc51f849df45e1.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_b4b25fe75fa812465ce27c19d1fe0ff9.msa similarity index 98% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_ef7cdfd52679d33bb0bc51f849df45e1.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_b4b25fe75fa812465ce27c19d1fe0ff9.msa index 6107bb73..2b529a23 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_ef7cdfd52679d33bb0bc51f849df45e1.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_b4b25fe75fa812465ce27c19d1fe0ff9.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:56280322-56282215 +>syndip_1_chr20:56280322-56282214 ggtggacatagtgtatctacagatgggacaatctaagatggttacaacattactacatag tatcgcaaatagaaatggaaagcctgaatctaaccatgagaaaatttaaaataactatca aaaaggtaacatccatatacagatataatatctataacatatatagatatatggatataa @@ -42,8 +42,8 @@ tagagagatatatagatacatctacatagagagatatatagatacatctacatagagata tatatagatacatctacatagagatatatatagatacatctatatagagatatatctacc tatacttcctgtgagttgtgtttctctggagaactaataggtagggtgactgtataattt actctccatactgggacacttttaatcattgctataatacattaattttaatctaatctt -cttatccct ->syndip_2_chr20:56280322-56282215 +cttatccc +>syndip_2_chr20:56280322-56282214 ggtggacatagtgtatctacagatgggacaatctaagatggttacaacattactacatag tatcgcaaatagaaatggaaagcctgaatctaaccatgagaaaatttaaaataactatca aaaaggtaacatccatatacagatataatatctataacatatatagatatatggatataa @@ -87,8 +87,8 @@ tagagagatatatagatacatctacatagagagatatatagatacatctacatagagata tatatagatacatctacatagagatatatatagatacatctatatagagatatatctacc tatacttcctgtgagttgtgtttctctggagaactaataggtagggtgactgtataattt actctccatactgggacacttttaatcattgctataatacattaattttaatctaatctt -cttatccct ->p:HG002_1_chr20:56280322-56282215 +cttatccc +>p:HG002_1_chr20:56280322-56282214 ggtggacatagtgtatctacagatgggacaatctaagatggttacaacattactacatag tatcgcaaatagaaatggaaagcctgaatctaaccatgagaaaatttaaaataactatca aaaaggtaacatccatatacagatataatatctataacatatatagatatatggatataa @@ -132,8 +132,8 @@ tagagagatatatagatacatctacatagagagatatatagatacatctacatagagata tatatagatacatctacatagagatatatatagatacatctatatagagatatatctacc tatacttcctgtgagttgtgtttctctggagaactaataggtagggtgactgtataattt actctccatactgggacacttttaatcattgctataatacattaattttaatctaatctt -cttatccct ->p:HG002_2_chr20:56280322-56282215 +cttatccc +>p:HG002_2_chr20:56280322-56282214 ggtggacatagtgtatctacagatgggacaatctaagatggttacaacattactacatag tatcgcaaatagaaatggaaagcctgaatctaaccatgagaaaatttaaaataactatca aaaaggtaacatccatatacagatataatatctataacatatatagatatatggatataa @@ -177,8 +177,8 @@ tagagagatatatagatacatctacatagagagatatatagatacatctacatagagata tatatagatacatctacatagagatatatatagatacatctatatagagatatatctacc tatacttcctgtgagttgtgtttctctggagaactaataggtagggtgactgtataattt actctcgatactgggacacttttaatcattgctataatacattaattttaatctaatctt -cttatccct ->ref_chr20:56280322-56282215 +cttatccc +>ref_chr20:56280322-56282214 ggtggacatagtgtatctacagatgggacaatctaagatggttacaacattactacatag tatcgcaaatagaaatggaaagcctgaatctaaccatgagaaaatttaaaataactatca aaaaggtaacatccatatacagatataatatctataacatatatagatatatggatataa @@ -222,4 +222,4 @@ tagagagatatatagatacatctacatagagagatatatagatacatctacatagagata tatatagatacatctacatagagatatatatagatacatctatatagagatatatctacc tatacttcctgtgagttgtgtttctctggagaactaataggtagggtgactgtataattt actctccatactgggacacttttaatcattgctataatacattaattttaatctaatctt -cttatccct +cttatccc diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_dbde05d75f12d66b45769c5ffa50cc28.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_b592bd7606102ee30703461d14728bdd.msa similarity index 93% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_dbde05d75f12d66b45769c5ffa50cc28.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_b592bd7606102ee30703461d14728bdd.msa index 9af4c6cc..82b0e36d 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_dbde05d75f12d66b45769c5ffa50cc28.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_b592bd7606102ee30703461d14728bdd.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:2240825-2241827 +>syndip_1_chr20:2240825-2241826 gtgacccttaggtaaatcgttccaactctctgagcctcagtgttgttatctacaaatggg agaatcaaggcaccctcctacaagcggcggctcatcagctcacttggaaaatgtagtcat cttctcaaagttttctttttctttctttctttttcctttctttctttctttttctttctt @@ -16,8 +16,8 @@ ctataggtgcatgccatcacacccagctaattttttaaaagaaattttgtaaagacaggg gtcttgctatgttccccaggctggtctcaaactcctgggctcaagtgatcctccagcctc agtatcccaaagtgctgggattataggtgtgagtcactgcagccagctcaaatattttct taaatgaatgtaaggttgaggatgcattattataatggctaatttagatctaaaaaatgc -tttgcagtttaccagattcacttatgtacattctc ->syndip_2_chr20:2240825-2241827 +tttgcagtttaccagattcacttatgtacattct +>syndip_2_chr20:2240825-2241826 gtgacccttaggtaaatcgttccaactctctgagcctcagtgttgttatctacaaatggg agaatcaaggcaccctcctacaagcggcggctcatcagctcacttggaaaatgtagtcat cttctcaaagttttctt-------------------tttctttctttctttttctttctt @@ -35,8 +35,8 @@ ctataggtgcatgccatcacacccagctaattttttaaaagaaattttgtaaagacaggg gtcttgctatgttccccaggctggtctcaaactcctgggctcaagtgatcctccagcctc agtatcccaaagtgctgggattataggtgtgagtcactgcagccagctcaaatattttct taaatgaatgtaaggttgaggatgcattattataatggctaatttagatctaaaaaatgc -tttgcagtttaccagattcacttatgtacattctc ->p:HG002_1_chr20:2240825-2241827 +tttgcagtttaccagattcacttatgtacattct +>p:HG002_1_chr20:2240825-2241826 gtgacccttaggtaaatcgttccaactctctgagcctcagtgttgttatctacaaatggg agaatcaaggcaccctcctacaagcggcggctcatcagctcacttggaaaatgtagtcat cttctcaaagttttctt-------------------tttctttctttctttttctttctt @@ -54,8 +54,8 @@ ctataggtgcatgccatcacacccagctaattttttaaaagaaattttgtaaagacaggg gtcttgctatgttccccaggctagtctcaaactcctgggctcaagtgatcctccagcctc agtatcccaaagtgctgggattataggtgtgagtcactgcagccagctcaaatattttct taaatgaatgtaaggttgaggatgcattattataatggctaatttagatctaaaaaatgc -tttgcagtttaccagattcacttatgtacattctc ->p:HG002_2_chr20:2240825-2241827 +tttgcagtttaccagattcacttatgtacattct +>p:HG002_2_chr20:2240825-2241826 gtgacccttaggtaaatcgttccaactctctgagcctcagtgttgttatctacaaatggg agaatcaaggcaccctcctacaagcggcggctcatcagctcacttggaaaatgtagtcat cttctcaaagttttctttttctttctttctttttcctttctttctttctttttctttctt @@ -73,8 +73,8 @@ ctataggtgcatgccatcacacccagctaattttttaaaagaaattttgtaaagacaggg gtcttgctatgttccccaggctggtctcaaactcctgggctcaagtgatcctccagcctc agtatcccaaagtgctgggattataggtgtgagtcactgcagccagctcaaatattttct taaatgaatgtaaggttgaggatgcattattataatggctaatttagatctaaaaaatgc -tttgcagtttaccagattcacttatgtacattctc ->ref_chr20:2240825-2241827 +tttgcagtttaccagattcacttatgtacattct +>ref_chr20:2240825-2241826 gtgacccttaggtaaatcgttccaactctctgagcctcagtgttgttatctacaaatggg agaatcaaggcaccctcctacaagcggcggctcatcagctcacttggaaaatgtagtcat cttctcaaagttttctttttctttctttctttttcctttctttctttctttttctttctt @@ -92,4 +92,4 @@ ctataggtgcatgccatcacacccagctaattttttaaaagaaattttgtaaagacaggg gtcttgctatgttccccaggctggtctcaaactcctgggctcaagtgatcctccagcctc agtatcccaaagtgctgggattataggtgtgagtcactgcagccagctcaaatattttct taaatgaatgtaaggttgaggatgcattattataatggctaatttagatctaaaaaatgc -tttgcagtttaccagattcacttatgtacattctc +tttgcagtttaccagattcacttatgtacattct diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_cc4f07f5b329711758a6a183602ba892.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_b8963eabf50fbefd5fbf150779abc34d.msa similarity index 93% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_cc4f07f5b329711758a6a183602ba892.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_b8963eabf50fbefd5fbf150779abc34d.msa index a3c838e8..c9333764 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_cc4f07f5b329711758a6a183602ba892.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_b8963eabf50fbefd5fbf150779abc34d.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:57189846-57190641 +>syndip_1_chr20:57189846-57190640 ctgggagctgactggcgagggagatcagctcaggggtgcacagtgctgtgtctgatggtg gcaaggactaggaagaacctaggacagggtggcagggacatgacagtggggcacagtgta cagggcagagtgagccatgggaggttttggggaggaaatgttcctggcagagggaatggc @@ -12,8 +12,8 @@ gtgaggagcttgaaggggtgaggctggagacagtgagcgaggaaccagagggggtgaggc tggagagcgtgagtgaggagcccgagggggtgaggctggagagagtgagtgaggagccca agggggtgaagccgaggtgggtatggggctgggtcacctggggtctcccagcccagtggg ggatttgggcagggggcgacgtggccagggttctgtggtgggagacccattctgaatgcc -gtgtggggaaccaacaaggc ->syndip_2_chr20:57189846-57190641 +gtgtggggaaccaacaagg +>syndip_2_chr20:57189846-57190640 ctgggagctgactggcgagggagatcagctcaggggtgcacagtgctgtgtctgatggtg gcaaggactaggaagaacctaggacagggtggcagggacatgacagtggggcacagtgta cagggcagagtgagccatgggaggttttggggaggaaatgttcctggcagagggaatggc @@ -27,8 +27,8 @@ aaggggtgaggctggagacagtgagcgaggaacca------------------------- tggagagcgtgagtgaggagcccgagggggtgaggctggagagagtgagtgaggagccca agggggtgaagccgaggtgggtatggggctgggtcacctggggtctcccagcccagtggg ggatttgggcagggggcgacgtggccagggttctgtggtgggagacccattctgaatgcc -gtgtggggaaccaacaaggc ->p:HG002_1_chr20:57189846-57190641 +gtgtggggaaccaacaagg +>p:HG002_1_chr20:57189846-57190640 ctgggagctgactggcgagggagatcagctcaggggtgcacagtgctgtgtctgatggtg gcaaggactaggaagaacctaggacagggtggcagggacatgacagtggggcacagtgta cagggcagagtgagccatgggaggctttggggaggaaatgttcctggcagagggaatggc @@ -42,8 +42,8 @@ aaggggtgaggctggagacagtgagcgaggaacca------------------------- tggagagcgtgagtgaggagcccgagggggtgaggctggagagagtgagtgaggagccca agggggtgaagccgaggtgggtatggggctgggtcacctggggtctcccagcccagtggg ggatttgggcagggggcgacgtggccagggttctgtggtgggagacccattctgaatgcc -gtgtggggaaccaacaaggc ->p:HG002_2_chr20:57189846-57190641 +gtgtggggaaccaacaagg +>p:HG002_2_chr20:57189846-57190640 ctgggagctgactggcgagggagatcagctcaggggtgcacagtgctgtgtctgatggtg gcaaggactaggaagaacctaggacagggtggcagggacatgacagtggggcacagtgta cagggcagagtgagccatgggaggctttggggaggaaatgttcctggcagagggaatggc @@ -57,8 +57,8 @@ gtgaggagcttgaaggggtgaggctggagacagtgagcgaggaaccagagggggtgaggc tggagagcgtgagtgaggagcccgagggggtgaggctggagagagtgagtgaggagccca agggggtgaagccgaggtgggtatggggctgggtcacctggggtctcccagcccagtggg ggatttgggcagggggcgacgtggccagggttctgtggtgggagacccattctgaatgcc -gtgtggggaaccaacaaggc ->ref_chr20:57189846-57190641 +gtgtggggaaccaacaagg +>ref_chr20:57189846-57190640 ctgggagctgactggcgagggagatcagctcaggggtgcacagtgctgtgtctgatggtg gcaaggactaggaagaacctaggacagggtggcagggacatgacagtggggcacagtgta cagggcagagtgagccatgggaggttttggggaggaaatgttcctggcagagggaatggc @@ -72,4 +72,4 @@ gtgaggagcccgagggggtgaggctggagagcgtgagtgaggagcccgagggggtgaggc tggagagcgtgagtgaggagcccgagggggtgaggctggagagagtgagtgaggagccca agggggtgaagccgaggtgggtatggggctgggtcacctggggtctcccagcccagtggg ggatttgggcagggggcgacgtggccagggttctgtggtgggagacccattctgaatgcc -gtgtggggaaccaacaaggc +gtgtggggaaccaacaagg diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_0dad6d58073d73f50143dbc812c12c48.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_bb633e8a0fe19e3e0ef87827050edb6d.msa similarity index 98% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_0dad6d58073d73f50143dbc812c12c48.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_bb633e8a0fe19e3e0ef87827050edb6d.msa index db18f85a..5353344b 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_0dad6d58073d73f50143dbc812c12c48.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_bb633e8a0fe19e3e0ef87827050edb6d.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:64125009-64128075 +>syndip_1_chr20:64125009-64128074 attacaggtgtgcaccaccacacccagctaatttttttgtattttcagtagagatgaggt ttcactgtgttagccaggatggtctcgatttcctgacctcatgatccacccaccttggcc tcccaaagtgctgggattacaggtgtgagccactgcgcccggacacactttgctagtttt @@ -53,8 +53,8 @@ tacactgtcttgattactac---------------------------------------- atagctttatactcaaataccacactgtcttgattactatagctttatactctgttttga ggtcaggtagtgtcagtccctctaacttcatttttcaaagttgttaggggattataggtt ctttgcaaatttctatgcaaattttggaatcagtttgttaatttccagaaaaaagcctct -tgc ->syndip_2_chr20:64125009-64128075 +tg +>syndip_2_chr20:64125009-64128074 attacaggtgtgcaccaccacacccagctaatttttttgtattttcagtagagatgaggt ttcactgtgttagccaggatggtctcgatttcctgacctcatgatccacccaccttggcc tcccaaagtgctgggattacaggtgtgagccactgcgcccggacacactttgctagtttt @@ -109,8 +109,8 @@ cacactgtcttgattactac---------------------------------------- atagctttatactcaaataccacactgtcttgattactatagctttatactctgttttga ggtcaggtagtgtcagtccctctaacttcatttttcaaagttgttaggggattataggtt ctttgcaaatttctatgcaaattttggaatcagtttgttaatttccagaaaaaagcctct -tgc ->p:HG002_1_chr20:64125009-64128075 +tg +>p:HG002_1_chr20:64125009-64128074 attacaggtgtgcaccaccacacccagctaatttttttgtattttcagtagagatgaggt ttcactgtgttagccaggatggtctcgatttcctgacctcatgatccacccaccttggcc tcccaaagtgctgggattacaggtgtgagccactgcgcccggacacactttgctagtttt @@ -165,8 +165,8 @@ cacactgtcttgattactac---------------------------------------- atagctttatactcaaataccacactgtcttgattactatagctttatactctgttttga ggtcaggtagtgtcagtccctctaacttcatttttcaaagttgttaggggattataggtt ctttgcaaatttctatgcaaattttggaatcagtttgttaatttccagaaaaaagcctct -tgc ->p:HG002_2_chr20:64125009-64128075 +tg +>p:HG002_2_chr20:64125009-64128074 attacaggtgtgcaccaccacacccagctaatttttttgtattttcagtagagatgaggt ttcactgtgttagccaggatggtctcgatttcctgacctcatgatccacccaccttggcc tcccaaagtgctgggattacaggtgtgagccactgcgcccggacacactttgctagtttt @@ -221,8 +221,8 @@ tacactgtcttgattactac---------------------------------------- atagctttatactcaaataccacactgtcttgattactatagctttatactctgttttga ggtcaggtagtgtcagtccctctaacttcatttttcaaagttgttaggggattataggtt ctttgcaaatttctatgcaaattttggaatcagtttgttaatttccagaaaaaagcctct -tgc ->ref_chr20:64125009-64128075 +tg +>ref_chr20:64125009-64128074 attacaggtgtgcaccaccacacccagctaatttttttgtattttcagtagagatgaggt ttcactgtgttagccaggatggtctcgatttcctgacctcatgatccacccaccttggcc tcccaaagtgctgggattacaggtgtgagccactgcgcccggacacactttgctagtttt @@ -277,4 +277,4 @@ atactacactgtcttgattaccacagctttatactcaaatactacactgtcttgattact atagctttatactcaaataccacactgtcttgattactatagctttatactctgttttga ggtcaggtagtgtcagtccctctaacttcatttttcaaagttgttaggggattataggtt ctttgcaaatttctatgcaaattttggaatcagtttgttaatttccagaaaaaagcctct -tgc +tg diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_80443f0aed254f4a2d537e2b754a9a7c.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_bb7fd33627eeebb611fd58a76e4c129f.msa similarity index 88% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_80443f0aed254f4a2d537e2b754a9a7c.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_bb7fd33627eeebb611fd58a76e4c129f.msa index 26e8b92e..92f76260 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_80443f0aed254f4a2d537e2b754a9a7c.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_bb7fd33627eeebb611fd58a76e4c129f.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:61562009-61562353 +>syndip_1_chr20:61562009-61562352 ctcccggagagagagaggggccttcgtgtggagaggtggaccccagggctcccggagaga gagaggggcctccgtgtggagaggtggaccccagggctcccggagagagggacttccgtg tggagaggtggaccccagggctcccggagagagagagggacctccctgagagagagaggg @@ -7,8 +7,8 @@ agaggtggaccccagggctcccagagagagagagggaccttcgtgtggagaggtggaccc cagggctcccggagagagagagggacctccgtgtggagaggtggaccccagggctcccgg agagagagagggacctccgtgtggagaggtggaccccagggctcccg--gagagagggac ctccgtgtggagaggtggaccccagggctcccggagagagagagggacctctgtgtggag -aggtggaccccagggctcccggagagagag ->syndip_2_chr20:61562009-61562353 +aggtggaccccagggctcccggagagaga +>syndip_2_chr20:61562009-61562352 ctcccggagagagagaggggccttcgtgtggagaggtggaccccagggctcccggagaga gagaggggcctccgtgtggagaggtggaccccagggctcccggagagagggacctccgtg tggagaggtggaccccagggctcccggagagagagagggacctccc-------------- @@ -17,8 +17,8 @@ tggagaggtggaccccagggctcccggagagagagagggacctccc-------------- -------------------------------tgtggagaggtggaccccagggctcccag agagagagagggaccttcgtgtggagaggtggaccccagggctcccggagagagagggac ctccgtgtggagaggtggaccccagggctcccggagagagagagggacctctgtgtggag -aggtggaccccagggctcccggagagagag ->p:HG002_1_chr20:61562009-61562353 +aggtggaccccagggctcccggagagaga +>p:HG002_1_chr20:61562009-61562352 ctcccggagagagagaggggccttcgtgtggagaggtggaccccagggctcccggagaga gagaggggcctccgtgtggagaggtggaccccagggctcccggagagagggacctccgtg tggagaggtggaccccagggctcccggagagagagagggacctccc-------------- @@ -27,8 +27,8 @@ tggagaggtggaccccagggctcccggagagagagagggacctccc-------------- -------------------------------tgtggagaggtggaccccagggctcccag agagagagagggaccttcgtgtggagaggtggaccccagggctcccggagagagagggac ctccgtgtggagaggtggaccccagggctcccggagagagagagggacctctgtgtggag -aggtggaccccagggctcccggagagagag ->p:HG002_2_chr20:61562009-61562353 +aggtggaccccagggctcccggagagaga +>p:HG002_2_chr20:61562009-61562352 ctcccggagagagagaggggccttcgtgtggagaggtggaccccagggctcccggagaga gagaggggcctccgtgtggagaggtggaccccagggctcccggagagagggacctccgtg tggagaggtggaccccagggctcccggagagagagagggacctccctgagagagagaggg @@ -37,8 +37,8 @@ agaggtggaccccagggctcccagagagagagagggaccttcgtgtggagaggtggaccc cagggctcccggagagagagagggacctccgtgtggagaggtggaccccagggctcccag agagagagagggaccttcgtgtggagaggtggaccccagggctcccggagagagagggac ctccgtgtggagaggtggaccccagggctcccggagagagagagggacctctgtgtggag -aggtggaccccagggctcccggagagagag ->ref_chr20:61562009-61562353 +aggtggaccccagggctcccggagagaga +>ref_chr20:61562009-61562352 ctcccggagagagagaggggccttcgtgtggagaggtggaccccagggctcccggagaga gagaggggcctccgtgtggagaggtggaccccagggctcccggagagagggacctccgtg tggagaggtggaccccagggctcccggagagagagagggacctccc-------------- @@ -47,4 +47,4 @@ tggagaggtggaccccagggctcccggagagagagagggacctccc-------------- -------------------------------tgtggagaggtggaccccagggctcccag agagagagagggaccttcgtgtggagaggtggaccccagggctcccggagagagagggac ctccgtgtggagaggtggaccccagggctcccggagagagagagggacctctgtgtggag -aggtggaccccagggctcccggagagagag +aggtggaccccagggctcccggagagaga diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_bbc70dbaa5d67ccdda8d60a17b075dca.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_bbc70dbaa5d67ccdda8d60a17b075dca.msa new file mode 100644 index 00000000..94768ca5 --- /dev/null +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_bbc70dbaa5d67ccdda8d60a17b075dca.msa @@ -0,0 +1,295 @@ +>syndip_1_chr20:62057473-62059239 +tccaagatccatgtaccccaacaccaccctacctggctacgctcctctcacaactttgag +atcaagatgccacgtgtgccttcctcacatcggcgctttccagtgatgcccagtggggga +cacaaagggcccagcaagacctcaggaaggacggacacaggtctacacaccaggccacag +acaatggggctcggggggccaacctcagcaagacctcaggaaggacggacacaggtctac +acaccaggccacagacaatggggctcgggggccaacctcagcaagacctcaggaaggacg +gacacaggtctacacaccaggccacagacaatggggctcggggggccaacctcagcaaga +cctcaggaaggacggacacaggcctacacaccaggccacagacaatggggctcgggggcc +aacctcagcaagacctcaggaaggacggacacaggcctacacaccaggccacagacaatg +gggctcggggggccaacctcagcaagacctcaggaaggacggacacaggtctacacacca +ggccacagacaatggggctcgggggccaacctcagcaagacctcaggaaggacggacaca +ggtctacacaccaggccacagacaatggggctcggggggccaacctcagcaagacctcag +gaaggacggacacaggcctacacaccaggccacagacaatggggctcgggggccaacctc +agcaagacctcaggaaggacggacacaggtctacacaccaggccacagacaatggggctc +gggggccaacctcagcaagacctcaggaaggacggacacaggtctacacaccaggccaca +gacaatggggctcggggggccaacctcagcaagacctcaggaaggacggacacaggccta +cacaccaggccacagacaatggggctcgggggccaacctcagcaagacctcaggaaggac +ggacacaggtctacacaccaggccacagacaatggggctcggggggccaacctcagcaag +acctcaggaaggacggacacaggtctacacaccaggccacagacaatggggctcgggggg +ccaacctcagcaagacctcaggaaggacggacacaggtctacacaccaggccacagacaa +tggggctcgggggccaacctcagcaagacctcaggaaggacggacacaggtctacacacc +aggccacagacaatggggctcgggggccaacctcagcaagacctcaggaaggacggacac +aggtctacacaccaggccacagacaatggggctcggggggccaacctcagcaagacctca +ggaaggacggacacaggcctacacaccaggccacagacaatggggctcggggggccaacc +tcagcaagacctcaggaaggacggacacaggcctacacaccaggccacagacaatggggc +tcgggggccaacctcagcaagacctcaggaaggacggacacaggtctacacaccaggcca +cagacaatggggctcgggggccaacctcagcaagacctcaggaaggacggacacaggtct +acacaccaggccacagacaatggggctcgggggccaacctcagcaagacctcaggaagga +cggacacaggtctacacaccaggccacagacaatggggctcgggggccaacctcagcaag +acctcaggaaggacggacacaggcctacacaccaggccacagacaatggggctcgggggg +ccaacctcagcaagacctcaggaaggacggacacaggcctacacaccaggccacagacaa +tggggctcggggggccaacctcagcaagacctcaggaaggacggacacaggtctacacac +caggccacagacagtggggctcggggggccaacctcagcaagacctcaggaaggacggac +acaggtctacacaccaggccacagacaatggggctcgggggccaacctcagcaagacctc +aggaaggacggacacaggtctacacaccaggccacagacaatggggctcggggggccaac +ctcagcaagacctcaggaaggacggacacaggcctacacaccaggccacagacaatgggg +ctcggggggccaacctcagcaagacctcaggaaggacggacacaggtctacacaccaggc +cacagacaatggggctcgggggccaacctcagcaagacctcaggaaggacggacacaggt +ctacacaccaggccacagacaatggggctcggggggccaacctcagcaagacctcaggaa +ggacggacacaggcctacacaccaggccacagacaatggggctcggggggccaacctcag +caagacctcaggaaggacggacacaggtctacacaccaggccacagacaatggggctcgg +gggccaacctcagcaagacctcaggaaggacggacacaggcctacacaccaggccacaga +caatggggctcggggggccaacctcagcaagacctcaggaaggacggacacaggcctaca +caccaggccacagacaatggggctcgggggccaacctcagcaagacctcaggaaggacgg +acacaggtctacacaccaggccacagacaatggggctcgggggccaacctcagcaagacc +tcaggaaggacggacacaggtctacacaccaggccacagacaatggggctcgggggccaa +cctcagcaagacctcaggaaggacggacacaggtctacacaccaggccacagacaatggg +gctcggggggccaacctcagcaagacctcaggaaggacggacacaggtctacacaccagg +ccacagacaatggggctcggggggccaacctcagcaagacctcaggaaggacggacacag +gtctacacaccaggccacagacaatggggctcgggggccaacctcagcaagacctcagga +aggacg------------------------------------------------------ +-------------------gacacaggtctacacaccaggccacagacaatggggctcgg +gggccaacctcagcaagacctcaggaaggacggacacaggtctacacaccaggccacaga +caatggggctc------------------------------------------------- +------------------------ggggggccaacctctccagtccccaaacacagtccc +caacagacaacttctcctgtccccacatgcggctcccaacaaacaatttctcctgccctc +acacgcggttcccaacgacaacttctgctccacaacatgacacccacggtgggagaagct +gggggcacttcactacatcatcctgcacactgcagcatggcctgctccaggcagaaggca +ggcctggagaagagcctgggaccaccgcggactgac +>syndip_2_chr20:62057473-62059239 +tccaagatccatgtaccccaacaccaccctacctggctacgctcctctcacaactttgag +atcaagatgccacgtgtgccttcctcacatcggcgctttccagtgatgcccagtggggga +cacaaagggcccagcaagacctcaggaaggacggacacaggtctacacaccaggccacag +acaatggggctc------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-------------------------------------------------------ggggg +ccaacctcagcaagacctcaggaaggacggacacaggtctacacaccaggccacagacaa +tggggctcgggggccaacctca-------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +----------------------------------------------------gcaagacc +tcaggaaggacggacacaggtctacacaccaggccacagacaatggggctcgggggccaa +cctcagcaagacctcaggaaggacggacacaggt-------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +--ctacacaccaggccacagacaatggggctcgggggccaacctcagcaagacctcagga +aggacggacacaggtctacacaccaggccacagacaatggggctcgggggccaacctcag +caagacctcaggaaggacggacacaggtctacacaccaggccacagacaatggggctcgg +gggccaacctcagcaagacctcaggaaggacggacacaggtctacacaccaggccacaga +caatggggctc------------------------------------------------- +-------------------------gggggccaacctctccagtccccaaacacagtccc +caacagacaacttctcctgtcctcacatgcggctcccaacaaacaatttctcctgccctc +acacgcggttcccaacgacaacttctgctccacaacatgacacccacggtgggagaagct +gggggcacttcactacatcatcctgcacactgcagcatggcctgctccaggcagaaggca +ggcctggagaagagcctgggaccaccgcggactgac +>p:HG002_1_chr20:62057473-62059239 +tccaagatccatgtaccccaacaccaccctacctggccacgctcctctcacaactttgag +atcaagatgccacgtgtgccttcctcacatcggcgctttccagtgatgcccagtggggga +cacaaagggcccagcaagacctcaggaaggacggacacaggtctacacaccaggccacag +acaatggggctc------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-------------------------------------------------------ggggg +ccaacctcagcaagacctcaggaaggacggacacaggtctacacaccaggccacagacaa +tggggctcgggggccaacctca-------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +----------------------------------------------------gcaagacc +tcaggaaggacggacacaggtctacacaccaggccacagacaatggggctcgggggccaa +cctcagcaagacctcaggaaggacggacacaggt-------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +--ctacacaccaggccacagacaatggggctcgggggccaacctcagcaagacctcagga +aggacggacacaggtctacacaccaggccacagacaatggggctcgggggccaacctcag +caagacctcaggaaggacggacacaggtctacacaccaggccacagacaatggggctcgg +gggccaacctcagcaagacctcaggaaggacggacacaggtctacacaccaggccacaga +caatggggctc------------------------------------------------- +-------------------------gggggccaacctctccagtccccaaacacagtccc +caacagacaacttctcctgtcctcacatgcggctcccaacaaacaatttctcctgccctc +acacgcggttcccaacgacaacttctgctccacaacatgacacccacggtgggagaagct +gggggcacttcactacatcatcctgcacactgcagcatggcctgctccaggcagaaggca +ggcctggagaagagcctgggaccaccgcggactgac +>p:HG002_2_chr20:62057473-62059239 +tccaagatccatgtaccccaacaccaccctacctggctacgctcctctcacaactttgag +atcaagatgccacgtgtgccttcctcacatcggcgctttccagtgatgcccagtggggga +cacaaagggcccagcaagacctcaggaaggacggacacaggtctacacaccaggccacag +acaatggggctcggggggccaacctcagcaagacctcaggaaggacggacacaggtctac +acaccaggccacagacaatggggctcgggggccaacctcagcaagacctcaggaaggacg +gacacaggtctacacaccaggccacagacaatggggctcggggggccaacctcagcaaga +cctcaggaaggacggacacaggcctacacaccaggccacagacaatggggctcgggggcc +aacctcagcaagacctcaggaaggacggacacaggcctacacaccaggccacagacaatg +gggctcggggggccaacctcagcaagacctcaggaaggacggacacaggtctacacacca +ggccacagacaatggggctcgggggccaacctcagcaagacctcaggaaggacggacaca +ggtctacacaccaggccacagacaatggggctcggggggccaacctcagcaagacctcag +gaaggacggacacaggcctacacaccaggccacagacaatggggctcgggggccaacctc +agcaagacctcaggaaggacggacacaggtctacacaccaggccacagacaatggggctc +gggggccaacctcagcaagacctcaggaaggacggacacaggtctacacaccaggccaca +gacaatggggctcggggggccaacctcagcaagacctcaggaaggacggacacaggccta +cacaccaggccacagacaatggggctcgggggccaacctcagcaagacctcaggaaggac +ggacacaggtctacacaccaggccacagacaatggggctcggggggccaacctcagcaag +acctcaggaaggacggacacaggtctacacaccaggccacagacaatggggctcgggggg +ccaacctcagcaagacctcaggaaggacggacacaggtctacacaccaggccacagacaa +tggggctcgggggccaacctcagcaagacctcaggaaggacggacacaggtctacacacc +aggccacagacaatggggctcgggggccaacctcagcaagacctcaggaaggacggacac +aggtctacacaccaggccacagacaatggggctcggggggccaacctcagcaagacctca +ggaaggacggacacaggcctacacaccaggccacagacaatggggctcggggggccaacc +tcagcaagacctcaggaaggacggacacaggcctacacaccaggccacagacaatggggc +tcgggggccaacctcagcaagacctcaggaaggacggacacaggtctacacaccaggcca +cagacaatggggctcgggggccaacctcagcaagacctcaggaaggacggacacaggtct +acacaccaggccacagacaatggggctcgggggccaacctcagcaagacctcaggaagga +cggacacaggtctacacaccaggccacagacaatggggctcgggggccaacctcagcaag +acctcaggaaggacggacacaggcctacacaccaggccacagacaatggggctcgggggg +ccaacctcagcaagacctcaggaaggacggacacaggcctacacaccaggccacagacaa +tggggctcggggggccaacctcagcaagacctcaggaaggacggacacaggtctacacac +caggccacagacagtggggctcggggggccaacctcagcaagacctcaggaaggacggac +acaggtctacacaccaggccacagacaatggggctcgggggccaacctcagcaagacctc +aggaaggacggacacaggtctacacaccaggccacagacaatggggctcggggggccaac +ctcagcaagacctcaggaaggacggacacaggcctacacaccaggccacagacaatgggg +ctcggggggccaacctcagcaagacctcaggaaggacggacacaggtctacacaccaggc +cacagacaatggggctcgggggccaacctcagcaagacctcaggaaggacggacacaggt +ctacacaccaggccacagacaatggggctcggggggccaacctcagcaagacctcaggaa +ggacggacacaggcctacacaccaggccacagacaatggggctcggggggccaacctcag +caagacctcaggaaggacggacacaggtctacacaccaggccacagacaatggggctcgg +gggccaacctcagcaagacctcaggaaggacggacacaggcctacacaccaggccacaga +caatggggctcggggggccaacctcagcaagacctcaggaaggacggacacaggcctaca +caccaggccacagacaatggggctcgggggccaacctcagcaagacctcaggaaggacgg +acacaggtctacacaccaggccacagacaatggggctcgggggccaacctcagcaagacc +tcaggaaggacggacacaggtctacacaccaggccacagacaatggggctcgggggccaa +cctcagcaagacctcaggaaggacggacacaggtctacacaccaggccacagacaatggg +gctcggggggccaacctcagcaagacctcaggaaggacggacacaggtctacacaccagg +ccacagacaatggggctcggggggccaacctcagcaagacctcaggaaggacggacacag +gtctacacaccaggccacagacaatggggctcgggggccaacctcagcaagacctcagga +aggacg------------------------------------------------------ +-------------------gacacaggtctacacaccaggccacagacaatggggctcgg +gggccaacctcagcaagacctcaggaaggacggacacaggtctacacaccaggccacaga +caatggggctc------------------------------------------------- +------------------------ggggggccaacctctccagtccccaaacacagtccc +caacagacaacttctcctgtccccacatgcggctcccaacaaacaatttctcctgccctc +acacgcggttcccaacgacaacttctgctccacaacatgacacccacggtgggagaagct +gggggcacttcactacatcatcctgcacactgcagcatggcctgctccaggcagaaggca +ggcctggagaagagcctgggaccaccgcggactgac +>ref_chr20:62057473-62059239 +tccaagatccatgtaccccaacaccaccctacctggctacgctcctctcacaactttgag +atcaagatgccacgtgtgccttcctcacatcggcgctttccagtgatgcccagtggggga +cacaaagggcccagcaagacctcaggaaggacggacacaggcctacacaccaggccacag +acaatggggctcggggggccaacctcagcaagacctcaggaaggacggacacaggcctac +acaccaggccacagacaatggggctcg--------------------------------- +------------------------------------------------------------ +-----------------------------------------------------gggggcc +aacctcagcaagacctcaggaaggacggacacaggcctacacaccaggccacagacaatg +gggctcggggggccaacctcagcaagacctcaggaaggacggacacaggtctacacacca +ggccacagacaatggggctcgggggccaacctcagcaagacctcaggaaggacggacaca +ggtctacacaccaggccacagacaatggggctcggggggccaacctcagcaagacctcag +gaaggacggacacaggcctacacaccaggccacagacaatggggctcgggggccaacctc +agcaagacctcaggaaggacggacacaggtctacacaccaggccacagacaatggggctc +gggggccaacctcagcaagacctcaggaaggacggacacaggtctacacaccaggccaca +gacaatggggctcggggggccaacctcagcaagacctcaggaaggacggacacaggccta +cacaccaggccacagacaatggggctcgggggccaacctcagcaagacctcaggaaggac +ggacacaggtctacacaccaggccacagacaatggggctcggggggccaacctcagcaag +acctcaggaaggacggacacaggtctacacaccaggccacagacaatggggctcgggggg +ccaacctcagcaagacctcaggaaggacggacacaggtctacacaccaggccacagacaa +tggggctcgggggccaacctca-------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +----------------------------------------------------gcaagacc +tcaggaaggacggacacaggtctacacaccaggccacagacaatggggctcgggggccaa +cctcagcaagacctcaggaaggacggacacaggtctacacaccaggccacagacaatggg +gctcggggggccaacctcagcaagacctcaggaaggacggacacaggcctacacaccagg +ccacagacaatggggctcggggggccaacctcagcaagacctcaggaaggacggacacag +gcctacacaccaggccacagacaatggggctcgggggccaacctcagcaagacctcagga +aggacg------------------------------------------------------ +-------------------gacacaggtctacacaccaggccacagacaatggggctcgg +gggccaacctcagcaagacctcaggaaggacggacacaggtctacacaccaggccacaga +caatggggctcgggggccaacctcagcaagacctcaggaaggacggacacaggtctacac +accaggccacagacaatggggctcggggggccaacctctccagtccccaaacacagtccc +caacagacaacttctcctgtccccacatgcggctcccaacaaacaatttctcctgccctc +acacgcggttcccaacgacaacttctgctccacaacatgacacccacggtgggagaagct +gggggcacttcactacatcatcctgcacactgcagcatggcctgctccaggcagaaggca +ggcctggagaagagcctgggaccaccgcggactgac diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_b1f9512148e344cd8be442d0b872be80.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_bd402172ece6674cf9fbe88768975d74.msa similarity index 97% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_b1f9512148e344cd8be442d0b872be80.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_bd402172ece6674cf9fbe88768975d74.msa index 359a8d58..32620fa1 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_b1f9512148e344cd8be442d0b872be80.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_bd402172ece6674cf9fbe88768975d74.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:64173309-64176630 +>syndip_1_chr20:64173309-64176629 cagaggtgggctgttggacaggtgcggctgatagagtgggtgtcttttgtgggcttctcc ccagcctgtggcctggcctagtcagtagagatggtcaggggagcaggagggagcccatac cccagtgcctgtagttgtgtccatttcccctccctgacttctcctcctgcagcatccttc @@ -68,8 +68,8 @@ gtagttgtgtccatttcccctccctggcttctcctgcagcatctttccctgtagttgtgt -------------------ccatttcccctccctggcttctcctgcagcatctttccctg tagttgtgtccatttcccctccctggcttctcctcttctagcatctttcctgctccctgg tagcctcttgggagccacctatttctggaggtccccactctgggttccttgtcctgggtg -tggggcgaatgtgctggactggggtcacagcattgaaccccacttggagctgagga ->syndip_2_chr20:64173309-64176630 +tggggcgaatgtgctggactggggtcacagcattgaaccccacttggagctgagg +>syndip_2_chr20:64173309-64176629 cagaggtgggctgttggacaggtgcggctgatagagtgggtgtcttttgtgggcttctcc ccagcctgtggcctggcctagtcagtagagatggtcaggggagcaggagggagcccatac cccagtgcctgtagttgtgtccatttcccctccctgacttctcctcctgcagcatccttc @@ -139,8 +139,8 @@ gtagttgtgtccatttcccctccctggcttctcctgcagcatctttccctgtagttgtgt -------------------ccatttcccctccctggcttctcctgcagcatctttccctg tagttgtgtccatttcccctccctggcttctcctcttctagcatctttcctgctccctgg tagcctcttgggagccacctatttctggaggtccccactctgggttccttgtcctgggtg -tggggcgaatgtgctggactggggtcacagcattgaaccccacttggagctgagga ->p:HG002_1_chr20:64173309-64176630 +tggggcgaatgtgctggactggggtcacagcattgaaccccacttggagctgagg +>p:HG002_1_chr20:64173309-64176629 cagaggtgggctgttggacaggtgcggctgatagagtgggtgtcttttgtgggcttctcc ccagcctgtggcctggcctagtcagtagagatggtcaggggagcaggagggagcccatac cccagtgcctgtagttgtgtccatttcccctccctgacttctcctcctgcagcatccttc @@ -210,8 +210,8 @@ gtagttgtgtccatttcccctccctggcttctcctgcagcatctttccctgtagttgtgt -------------------ccatttcccctccctggcttctcctgcagcatctttccctg tagttgtgtccatttcccctccctggcttctcctcttctagcatctttcctgctccctgg tagcctcttgggagccacctatttctggaggtccccactctgggttccttgtcctgggtg -tggggcgaatgtgctggactggggtcacagcattgaaccccacttggagctgagga ->p:HG002_2_chr20:64173309-64176630 +tggggcgaatgtgctggactggggtcacagcattgaaccccacttggagctgagg +>p:HG002_2_chr20:64173309-64176629 cagaggtgggctgttggacaggtgcggctgatagagtgggtgtcttttgtgggcttctcc ccagcctgtggcctggcctagtcagtagagatggtcaggggagcaggagggagcccatac cccagtgcctgtagttgtgtccatttcccctccctgacttctcctcctgcagcatccttc @@ -281,8 +281,8 @@ gtagttgtgtccatttcccctccctggcttctcctgcagcatctttccctgtagttgtgt -------------------ccatttcccctccctggcttctcctgcagcatctttccctg tagttgtgtccatttcccctccctggcttctcctcttctagcatctttcctgctccctgg tagcctcttgggagccacctatttctggaggtccccactctgggttccttgtcctgggtg -tggggcgaatgtgctggactggggtcacagcattgaaccccacttggagctgagga ->ref_chr20:64173309-64176630 +tggggcgaatgtgctggactggggtcacagcattgaaccccacttggagctgagg +>ref_chr20:64173309-64176629 cagaggtgggctgttggacaggtgcggctgatagagtgggtgtcttttgtgggcttctcc ccagcctgtggcctggcctagtcagtagagatggtcaggggagcaggagggagcccatac cccagtgcctgtagttgtgtccatttcccctccctgacttctcctcctgcagcatccttc @@ -352,4 +352,4 @@ tcctgcagcatctttccctgtagttgtgtccatttcccctccctggcttctcctgcagca tctttccctgtagttgtgtccatttcccctccctggcttctcctgcagcatctttccctg tagttgtgtccatttcccctccctggcttctcctcttctagcatctttcctgctccctgg tagcctcttgggagccacctatttctggaggtccccactctgggttccttgtcctgggtg -tggggcgaatgtgctggactggggtcacagcattgaaccccacttggagctgagga +tggggcgaatgtgctggactggggtcacagcattgaaccccacttggagctgagg diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_16e133fb197adcc9bd932887b742868b.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_c00504a27ac55beec570218a4eedf56f.msa similarity index 83% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_16e133fb197adcc9bd932887b742868b.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_c00504a27ac55beec570218a4eedf56f.msa index cb9e5d5b..9a20f373 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_16e133fb197adcc9bd932887b742868b.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_c00504a27ac55beec570218a4eedf56f.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:60702842-60703216 +>syndip_1_chr20:60702842-60703215 tatggtaactgcatgttgactcttttgaggagcttccagactgtttttcaaagtgatggc acaattttcctttcccctcaataatgtatgagggttgcaatgtcgctacatattaccaac atttgtcattgtctttctttcttttctttctctctctctctttcttttccttctttcttt @@ -6,8 +6,8 @@ ctttctttctttctttctttctttctttctttctttctttctttcttttctttccttctt ctttctttctttctttctttctttctttctttctttctttctttctttctttctttcttt ctttctttctttctttcttctttctttctttttctttcttttttttttgggatggagtct cgctctgtcacccaggctggagtgcagtggcatgatctcggctcgctgcaatctccacct -cccaggttcaagcgattgaactcactgcctcagcctcccaagtagctg ->syndip_2_chr20:60702842-60703216 +cccaggttcaagcgattgaactcactgcctcagcctcccaagtagct +>syndip_2_chr20:60702842-60703215 tatggtaactgcatgttgactcttttgaggagcttccagactgtttttcaaagtgatggc acaattttcctttcccctcaataatgtatgagggttgcaatgtcgctacatattaccaac atttgtcattgtctttctttcttttctttctctctctctctctctttcttttccttcttt @@ -15,8 +15,8 @@ ctttctttctttctttctttcttcctttctttctttctttctttctttctttctttcttt ctttct---tttctttccttctttctttctttctttc----------------------- ---------------------------------------tttttttttgggatggagtct cgctctgtcacccaggctggagtgcagtggcatgatctcggctcgctgcaatctccacct -cccaggttcaagcgattgaactcactgcctcagcctcccaagtagctg ->p:HG002_1_chr20:60702842-60703216 +cccaggttcaagcgattgaactcactgcctcagcctcccaagtagct +>p:HG002_1_chr20:60702842-60703215 tatggtaactgcatgttgactcttttgaggagcttccagactgtttttcaaagtgatggc acaattttcctttcccctcaataatgtatgagggttgcaatgtcgctacatattaccaac atttgtcattgtctttctttcttttctttctctctctctctctctttcttttccttcttt @@ -24,8 +24,8 @@ ctttctttctttctttctttcttcctttctttctttctttctttctttctttctttcttt ctttct---tttctttccttctttctttctttctttc----------------------- ---------------------------------------tttttttttgggatggagtct cgctctgtcacccaggctggagtgcagtggcatgatctcggctcgctgcaacctccacct -cccaggttcaagcgattgaactcactgcctcagcctcccaagtagctg ->p:HG002_2_chr20:60702842-60703216 +cccaggttcaagcgattgaactcactgcctcagcctcccaagtagct +>p:HG002_2_chr20:60702842-60703215 tatggtaactgcatgttgactcttttgaggagcttccagactgtttttcaaagtgatggc acaattttcctttcccctcaataatgtatgagggttgcaatgtcgctacatattaccaac atttgtcattgtctttctttcttttctttctctctctctctttcttttccttctttcttt @@ -33,8 +33,8 @@ ctttctttctttctttctttctttctttctttctttctttctttcttttctttccttctt ctttctttctttctttctttctttctttctttctttctttctttctttctttctttcttt ctttctttctttctttcttctttctttctttttctttcttttttttttgggatggagtct cgctctgtcacccaggctggagtgcagtggcatgatctcggctcgctgcaacctccacct -cccaggttcaagcgattgaactcactgcctcagcctcccaagtagctg ->ref_chr20:60702842-60703216 +cccaggttcaagcgattgaactcactgcctcagcctcccaagtagct +>ref_chr20:60702842-60703215 tatggtaactgcatgttgactcttttgaggagcttccagactgtttttcaaagtgatggc acaattttcctttcccctcaataatgtatgagggttgcaatgtcgctacatattaccaac atttgtcattgtctttctttcttttctttctctctctctctttct--------------- @@ -42,4 +42,4 @@ atttgtcattgtctttctttcttttctttctctctctctctttct--------------- cttttc---tttccttctttctttctttctttctttc----------------------- ---------------------------------------tttttttttgggatggagtct cgctctgtcacccaggctggagtgcagtggcatgatctcggctcgctgcaatctccacct -cccaggttcaagcgattgaactcactgcctcagcctcccaagtagctg +cccaggttcaagcgattgaactcactgcctcagcctcccaagtagct diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_751763f0a26e66cde66421e24b5f03e2.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_c09854a1826e6b88567130408713a7f9.msa similarity index 84% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_751763f0a26e66cde66421e24b5f03e2.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_c09854a1826e6b88567130408713a7f9.msa index 2be47406..c0ef88d2 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_751763f0a26e66cde66421e24b5f03e2.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_c09854a1826e6b88567130408713a7f9.msa @@ -1,15 +1,15 @@ ->syndip_1_chr20:24681888-24682378 +>syndip_1_chr20:24681888-24682377 ctctgctgctggggtgtactcgatgtcctggctcgactgacacagtcctgatcttgccag ttggtgtctgtggacctgtgttttggcatggacaggcacaaggctaaggaggtcagcatg cagcaggaggcaaactgtgaagtacctctctggc-------------------------- ---------------------------------ctggcctttgtgaattaccacagtgc-- ---------------------------------------------------------gggg +------------------------------------------------------------ +------------------------------ctggcctttgtgaattaccacagtgcgggg tccacccaggaaactgtgaagtacctctctgggcgttgtgaattaccacagttaggggtt tacccaggaaactgtgaagtatctctctggcctttgtgaatcaccacagttaggggtcta tgcaggagtactggtcattcacagactcagctcattccaggccttagcagtgtgcacctc agagtgaagcagagaggccccaggagaagccatggccacgccctcaacctcctgctccga -ggaggagac ->syndip_2_chr20:24681888-24682378 +ggaggaga +>syndip_2_chr20:24681888-24682377 ctctgctgctggggtgtactcgatgtcctggctcgactgacacagtcctgatcttgccag ttggtgtctgtggacctgtgttttggcatggacaggcacaaggctaaggaggtcagcatg cagcaggaggcaaactgtgaagtacctctctggcctggcctttgtgaattaccacagtca @@ -19,8 +19,8 @@ tccacccaggaaactgtgaagtacctctctgggcgttgtgaattaccacagttaggggtt tacccaggaaactgtgaagtatctctctggcctttgtgaatcaccacagttaggggtcta tgcaggagtactggtcattcacagactcagctcattccaggccttagcagtgtgcacctc agagtgaagcagagaggccccaggagaagccatggccacgccctcaacctcctgctccga -ggaggagac ->p:HG002_1_chr20:24681888-24682378 +ggaggaga +>p:HG002_1_chr20:24681888-24682377 ctctgctgctggggtgtactcgatgtcctggctcgactgacacagtcctgatcttgccag ttggtgtctgtggacctgtgttttggcatggacaggcacaaggctaaggaggtcagcatg cagcaggaggcaaactgtgaagtacctctctggcctggcctttgtgaattaccacagtca @@ -30,19 +30,19 @@ tccacccaggaaactgtgaagtacctctctgggcgttgtgaattaccacagttaggggtt tacccaggaaactgtgaagtatctctctggcctttgtgaatcaccacagttaggggtcta tgcaggagtactggtcattcacagactcagctcattccaggccttagcagtgtgcacctc agagtgaagcagagaggccccaggagaagccatggccacgccctcaacctcctgctccga -ggaggagac ->p:HG002_2_chr20:24681888-24682378 +ggaggaga +>p:HG002_2_chr20:24681888-24682377 ctctgctgctggggtgtactcgatgtcctggctcgactgacacagtcctgatcttgccag ttggtgtctgtggacctgtgttttggcatggacaggcacaaggctaaggaggtcagcatg cagcaggaggcaaactgtgaagtacctctctggc-------------------------- ---------------------------------ctggcctttgtgaattaccacagtgc-- ---------------------------------------------------------gggg +------------------------------------------------------------ +------------------------------ctggcctttgtgaattaccacagtgcgggg tccacccaggaaactgtgaagtacctctctgggcgttgtgaattaccacagttaggggtt tacccaggaaactgtgaagtatctctctggcctttgtgaatcaccacagttaggggtcta tgcaggagtactggtcattcacagactcagctcattccaggccttagcagtgtgcacctc agagtgaagcagagaggccccaggagaagccatggccacgccctcaacctcctgctctga -ggaggagac ->ref_chr20:24681888-24682378 +ggaggaga +>ref_chr20:24681888-24682377 ctctgctgctggggtgtactcgatgtcctggctcgactgacacagtcctgatcttgccag ttggtgtctgtggacctgtgttttggcatggacaggcacaaggctaaggaggtcagcatg cagcaggaggcaaactgtgaagtacctctctggc-------------------------- @@ -52,4 +52,4 @@ tccacccaggaaactgtgaagtacctctctgggcgttgtgaattaccacagttaggggtt tacccaggaaactgtgaagtatctctctggcctttgtgaatcaccacagttaggggtcta tgcaggagtactggtcattcacagactcagctcattccaggccttagcagtgtgcacctc agagtgaagcagagaggccccaggagaagccatggccacgccctcaacctcctgctccga -ggaggagac +ggaggaga diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_8f54b846b732cff5d58c80e0ddff85e7.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_c3ca784845992e7f185ad2e6a992de0e.msa similarity index 87% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_8f54b846b732cff5d58c80e0ddff85e7.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_c3ca784845992e7f185ad2e6a992de0e.msa index 209611ec..24a437db 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_8f54b846b732cff5d58c80e0ddff85e7.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_c3ca784845992e7f185ad2e6a992de0e.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:57948901-57949447 +>syndip_1_chr20:57948901-57949446 gatggatggatggatagtggatagatggatgaatagtggatggataaatggattaatgga tggatgaatggatgatggatggatgaatgagatggatggatt----------------ga tggatggatggatggatggatggatggaagaatggatgatggatggatga---------- @@ -7,9 +7,9 @@ tggatggatggatggatggatggatggaagaatggatgatggatggatga---------- agatggatgaatagtggatggataaatggattaatggatggatgaatggatgatggatgg atgaatgagatggatggattgatggatggatggatggatggaagaatggatgatggatgg atgaatgagatgggtggattgatggatggatggatggatggatggatggatggatggatg -g----------------------------------------------------------- -------------- ->syndip_2_chr20:57948901-57949447 +------------------------------------------------------------ +------------ +>syndip_2_chr20:57948901-57949446 gatggatggatggatagtggatagatggatgaatagtggatggataaatggattaatgga tggatgaatggatgatggatggatgaatgagatggatggattgatggatggatggatgga tggatggatggatggatggatggatggaagaatggatgatggatggatgagatggatgga @@ -19,8 +19,8 @@ agatggatgaatagtgaatggataaatggattaatggatggatggatagtggatagatgg atgaat------agtggatggataaatggattaatggatggatgaatggatgatggatgg atgaatgagatggatggattgatggatggatggatggatggatggatggatggaagaatg gatgatggatggatgaatgagatgggtggattgatggatggatggatggatggatggatg -gatggatggatgg ->p:HG002_1_chr20:57948901-57949447 +gatggatggatg +>p:HG002_1_chr20:57948901-57949446 gatggatggatggatagtggatagatggatgaatagtggatggataaatggattaatgga tggatgaatggatgatggatggatgaatgagatggatggattgatggatggatggatgga tggatggatggatggatggatggatggaagaatggatgatggatggatgagatggatgga @@ -30,8 +30,8 @@ agatggatgaatagtgaatggataaatggattaatggatggatggatagtggatagatgg atgaat------agtggatggataaatggattaatggatggatgaatggatgatggatgg atgaatgagatggatggattgatggatggatggatggatggatggatggatggaagaatg gatgatggatggatgaatgagatgggtggattgatggatggatggatggatggatggatg -gatggatggatgg ->p:HG002_2_chr20:57948901-57949447 +gatggatggatg +>p:HG002_2_chr20:57948901-57949446 gatggatggatggatagtggatagatggatgaatagtggatggataaatggattaatgga tggatgaatggatgatggatggatgaatgagatggatggattgatggatggatggatgga tggatggatggatggatggatggatggaaga----------------------------- @@ -40,9 +40,9 @@ tggatggatggatggatggggggttggatggatggatggatggatggatggatagtggat agatggatgaatagtgaatggataaatggattaatggatggatggatagtggatagatg- -tgatg------gatggatggatggatggatggatggatggaagaatggatgatggatgg atgaatgagatgggtggattgatggatggatggatggatggatggatggatggatggatg -g----------------------------------------------------------- -------------- ->ref_chr20:57948901-57949447 +------------------------------------------------------------ +------------ +>ref_chr20:57948901-57949446 gatggatggatggatagtggatagatggatgaatagtggatggataaatggattaatgga tggatgaatggatgatggatggatgaatgagatggatggattgatggatggatggatgga tggatggatggatggatggatggatggaagaatggatgatggatggatgagatggatgga @@ -52,4 +52,4 @@ agatggatgaatagtgaatggataaatggattaatggatggatggatagtggatagatgg atgaat------agtggatggataaatggattaatggatggatgaatggatgatggatgg atgaatgagatggatggattgatggatggatggatggatggatggatggatggaagaatg gatgatggatggatgaatgagatgggtggattgatggatggatggatggatggatggatg -gatggatggatgg +gatggatggatg diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_34fd267f3c76809107dec2452a21e62a.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_c50e1186af8d764e9f4a7ebc3e6ca294.msa similarity index 95% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_34fd267f3c76809107dec2452a21e62a.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_c50e1186af8d764e9f4a7ebc3e6ca294.msa index b106611d..2b643f53 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_34fd267f3c76809107dec2452a21e62a.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_c50e1186af8d764e9f4a7ebc3e6ca294.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:20320239-20320620 +>syndip_1_chr20:20320239-20320619 ggaaatttgaaacctggagagttaatgatgataagcaattatgattaatttttttagcag taaagatggtatttcagatatatagatatatatatattatatatatgtaatatatataat atatgtaatatatattatatatatgtaatatatataatatatgtaatatattatatatat @@ -26,8 +26,8 @@ ttatatatgtaatatatattatatatattatatatgtaatatatattatatatattatat atgtaatatatattatatatattatatatgtaatatatattatatatattatatatgtaa tatatattatatatattatatatgtaatatatattatatatattatatatgtaatatata ttatatatattatatatgtaatatatatatatttaaaaacagaaccattatcttttagag -atacatactgaagtgtctggagacatgcttcaagataacccag ->syndip_2_chr20:20320239-20320620 +atacatactgaagtgtctggagacatgcttcaagataaccca +>syndip_2_chr20:20320239-20320619 ggaaatttgaaacctggagagttaatgatgataagcaattatgattaatttttttagcag taaagatggtatttcagatatatagatatatatatattatatatatgtaatatatataat atatgtaatatata---------------------------ttatatatatgtaatatat @@ -55,8 +55,8 @@ tatatatatttaaaaacagaaccattatcttttagagatac------------------- ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ -----atactgaagtgtctggagacatgcttcaagataacccag ->p:HG002_1_chr20:20320239-20320620 +----atactgaagtgtctggagacatgcttcaagataaccca +>p:HG002_1_chr20:20320239-20320619 ggaaatttgaaacctggagagttaatgatgataagcaattatgattaatttttttagcag taaagatggtatttcagatatatagatatatatatattatatatatgtaatatatataat atatgtaatatata---------------------------ttatatatatgtaatatat @@ -84,8 +84,8 @@ tatatatatttaaaaacagaaccattatcttttagagatac------------------- ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ -----atactgaagtgtctggagacatgcttcaagataacccag ->p:HG002_2_chr20:20320239-20320620 +----atactgaagtgtctggagacatgcttcaagataaccca +>p:HG002_2_chr20:20320239-20320619 ggaaatttgaaacctggagagttaatgatgataagcaattatgattaatttttttagcag taaagatggtatttcagatatatagatatatatatat--tatatatgtaatatatataat atatgtaatatgta--atatatataatatatgtaatatatattatatatatgtaatatat @@ -113,8 +113,8 @@ ttatatatgtaatatatattatatatattatatatgtaatatatattatatatattatat atgtaatatatattatatatattatatatgtaatatatattatatatattatatatgtaa tatatattatatatattatatatgtaatatatatgtaatatatattatatatgtaatata taatatatattatatatgtaatatatatatatttaaaaacagaaccattatcttttagag -atacatactgaagtgtctggagacatgcttcaagataacccag ->ref_chr20:20320239-20320620 +atacatactgaagtgtctggagacatgcttcaagataaccca +>ref_chr20:20320239-20320619 ggaaatttgaaacctggagagttaatgatgataagcaattatgattaatttttttagcag taaagatggtatttcagatatatagatatatatatattatatatatgtaatatatataat atatgtaatatata---------------------------ttatatatatgtaatatat @@ -142,4 +142,4 @@ tatatatatttaaaaacagaaccattatcttttagagatac------------------- ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ -----atactgaagtgtctggagacatgcttcaagataacccag +----atactgaagtgtctggagacatgcttcaagataaccca diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_26bf1dadf04f074d20f37e2e9c8a8e4f.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_c84764c85e3bc2208c93471f25974884.msa similarity index 99% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_26bf1dadf04f074d20f37e2e9c8a8e4f.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_c84764c85e3bc2208c93471f25974884.msa index ce45ee3c..9df36af2 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_26bf1dadf04f074d20f37e2e9c8a8e4f.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_c84764c85e3bc2208c93471f25974884.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:63693349-63693833 +>syndip_1_chr20:63693349-63693832 ggctgcttggggtgggcatcctcgggccctgcttggccccgcctctctgttcccctatgg gagtgatgggggcctccacctccaccaccagcaccagcagcaccacctccaccttcacca ccaccacctccaccaccacctccaccacctccacctccaccacctccaccacctccacca @@ -125,8 +125,8 @@ ccaccaccacctccaccaccacctccaccaccaccaccacctccaccaccaccacctcca ccaccacctccaccaccaccacctccaccaccacctccaccaccacctccacctccacca ccaccacctgcaccaccacctccacctccaccaccaccaccacctccacctccaccagca gcagcatcacttgttggggagaccctgtgcaactccatgcacagccctgtccctgccata -gc ->syndip_2_chr20:63693349-63693833 +g +>syndip_2_chr20:63693349-63693832 ggctgcttggggtgggcatcctcgggccctgcttggccccgcctctctgttcccctatgg gagtgatgggggcctccacctccaccaccagcaccagcagcaccacctccac---cacct ccacctcca---ccacctccacctccacctccaccaccacctccacctccacctccacct @@ -253,8 +253,8 @@ ccacctccacctccaccacctccacca---------------ccaccaccaccaccacct ccaccttcaccaccaccaccacctccaccacctccaccaccacctccaccaccacctcca ccaccacctcctccacctccaccacctccaccaccaccaccacctccacctccaccagca gcagcatcacttgttggggagaccctgtgcaactccatgcacagccctgtccctgccata -gc ->p:HG002_1_chr20:63693349-63693833 +g +>p:HG002_1_chr20:63693349-63693832 ggctgcttggggtgggcatcctcgggccctgcttggccccgcctctctgttcccctatgg gagtgatgggggcctccacctccaccaccagcaccagcagcaccacctccacctccacct ccacctccacctccaccaccacctccacctccaccaccacctcctccaccaccaccacct @@ -381,8 +381,8 @@ ccacctccacctccaccaccacctcca---------------ccaccaccacctccacct ccaccaccacctccaccaccaccaccaccaccaccaccaccaccaccaccacctccacca ccaccacctgcaccaccacctccacctccaccaccaccaccacctccacctccaccagca gcagcatcacttgttggggagaccctgtgcaactccatgcacagccctgtccctgccata -gc ->p:HG002_2_chr20:63693349-63693833 +g +>p:HG002_2_chr20:63693349-63693832 ggctgcttggggtgggcatcctcgggccctgcttggccccgcctctctgttcccctatgg gagtgatgggggcctccacctccaccaccagcaccagcagcaccacctccacctccacct ccacctccacctccaccaccacctccacctccaccaccacctcctccaccaccaccacct @@ -509,8 +509,8 @@ ccaccaccacctccaccaccacctccaccaccaccaccacctccaccaccacctccacct ccaccaccacctccaccaccaccaccaccaccaccaccaccaccaccaccacctccacca ccaccacctgcaccaccacctccacctccaccaccaccaccacctccacctccaccagca gcagcatcacttgttggggagaccctgtgcaactccatgcacagccctgtccctgccata -gc ->ref_chr20:63693349-63693833 +g +>ref_chr20:63693349-63693832 ggctgcttggggtgggcatcctcgggccctgcttggccccgcctctctgttcccctatgg gagtgatgggggcctccacctccaccaccagcaccagcagcaccacctccacctccacct ccacctccacctccaccaccacctccacctccaccaccacctcctccaccacc------- @@ -637,4 +637,4 @@ ccaccacctcca------------------------------------------------ ---------------ccaccaccaccaccaccaccaccaccaccaccaccacctccacca ccaccacctgcaccaccacctccacctccaccaccaccaccacctccacctccaccagca gcagcatcacttgttggggagaccctgtgcaactccatgcacagccctgtccctgccata -gc +g diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_95ddd1fba647caa20dd06cdf75db78e8.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_c8767647c15df9e79d6194de26f7cdc5.msa similarity index 52% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_95ddd1fba647caa20dd06cdf75db78e8.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_c8767647c15df9e79d6194de26f7cdc5.msa index 1f228f20..9635edad 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_95ddd1fba647caa20dd06cdf75db78e8.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_c8767647c15df9e79d6194de26f7cdc5.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:20354470-20358424 +>syndip_1_chr20:20354470-20358423 tccagtctgggtgacagagagagaccttgtctcaaaaaaaaaaaaaaaaaaatcaggtac agaaaggtaaatactgcacgatctcacttacatgtggaatctaaagaagttgaattcata agggtaatgaggggaggaaggggtggtcatactgtgtgaagggagatggtcacactgagg @@ -40,60 +40,61 @@ tcacactgtgaagggtggtagtcacactgagaggaggtgttcacactgaggggaagtggt cacactgaggggaggtggtcatactgtgaggggaagtggtcacactgaggggaggtggtc atagtgtgaggggaggtggtcacactgaggggaggtggtcacactgaggggaggtggtca cagtgtgaggggaggtggtcacactgaggggaggtggtcacagtgtgaggggagatggtc -acactgaggggaggtggtcacagtgtgaggggaggtggtcacactgaggggaggtggtca -tagtgtgaggggaggtggtcacactggggaggtggtcacactgtgaggggaggtggtcac -actgagaggaggtggtcacactgaggggaagtggtcacactgaggggaggtggtcaca-- -ctgaggggaggtggtcatagtgtgaggggaggtggtcacactgtgaggggaggtggtcac -actgagaggaggtggtcacactgaggggaagtggtcacactgaggggaggtggtcacact -gaggggaggtggtcatagtgtgaggggaggtggtcacactggggaggtggtcacactgtg -aggggaggtggtcacactgtgaggggaggtggtcacactgagaggaggtggtcacactga -ggggaagtggtcacactgaggggaggtggtcacactgaggggaggtggtcacagtgtgag -gggaggtggtcatactgtgagtgaggaggtagtcacac--tgaggggagggggtcatact +acactgaggggaggtggtcacagtgtgaggggaggtggtcacac---------------- +------------------------tgaggggaggtggtcatagtgtgaggggaggtggtc +acactggggaggtggtcacactgtgaggggaggtggtcacactgagaggaggtggtcaca +ctgaggggaagtggtcacactgaggggaggtggtcaca--ctgaggggaggtggtcatag +tgtgaggggaggtggtcacactgtgaggggaggtggtcacactgagaggaggtggtcaca +ctgaggggaagtggtcacactgaggggaggtggtcacactgaggggaggtggtcatagtg +tgaggggaggtggtcacactggggaggtggtcacactgtgaggggaggtggtcacactgt +gaggggaggtggtcacactgagaggaggtggtcacactgaggggaagtggtcacactgag +gggaggtggtcacactgaggggaggtggtcacagtgtgaggggaggtggtcatactgtga +gtgaggaggtagtcacac--tgaggggagggggtcatactgcgagtggaggtagtcacac +tgtgaagggtggtagtcacactgagaggaggtggtcacactgaggggaagtggtcacact +gaggggaggtggtcatactg---------------------------------------- +tgaggggaagtggtcacactgaggggaggtggtcatagtgtga--ggggaggtggtcaca +ctgaggggaggtggtcacagtgtgaggggaggtggtcacac--tgaggggaggtggtcac +actgaggggaggtggtcacagtgtgagaggaggtggtcacactgaggggaggtggtcaca +gtgtgaggggagatggtcacactgaggggaggtggtcacagtgtgaggggaggtggtcac +actgaggggaggtggtcacagtgtgaggggaggtggtcacactgaggggaggtggtcaca +ctgaggggaggtggtcacagtgtgaggggaggtggtcacactgaggggaggtggtcacag +tgtgaggggagatggtcacactgaggggaggtggtcacagtgtgaggggaggtggtcaca +ctgaggggaggtggtcaca-------------------gtgtgaggggaggtggtcacac +tgaggggaggtggtcacactgagaggaggtggtcacactgaggggaggtggtcatactgt +gagtgaggaggtagtcacactgaggggaagtggtcacactgaggggaggtg--------- +---------------------------------gtcacactgaggggagggggtcatact gcgagtggaggtagtcacactgtgaagggtggtagtcacactgagaggaggtggtcacac -tgaggggaagtggtcacactgaggggaggtggtcatactg-------------------- ---------------------tgaggggaagtggtcacactgaggggaggtggtcatagtg -tga--ggggaggtggtcacactgaggggaggtggtcacagtgtgaggggaggtggtcaca -c--tgaggggaggtggtcacactgaggggaggtggtcacagtgtgagaggaggtggtcac -actgaggggaggtggtcacagtgtgaggggagatggtcacactgaggggaggtggtcaca -gtgtgaggggaggtggtcacactgaggggaggtggtcacagtgtgaggggaggtggtcac -actgaggggaggtggtcacactgaggggaggtggtcacagtgtgaggggaggtggtcaca -ctgaggggaggtggtcacagtgtgaggggagatggtcacactgaggggaggtggtcacag -tgtgaggggaggtggtcacactgaggggaggtggtcaca-------------------gt -gtgaggggaggtggtcacactgaggggaggtggtcacactgagaggaggtggtcacactg -aggggaggtggtcatactgtgagtgaggaggtagtcacactgaggggaagtggtcacact -gaggggaggtg------------------------------------------gtcacac -tgaggggagggggtcatactgcgagtggaggtagtcacactgtgaagggtggtagtcaca -ctgagaggaggtggtcacactgaggggaagtggtcacactgaggggaggtggtcatactg -tgaggggaagtggtcacactgaggggaggtggtcatagtgtgaggggag----------- ------------------------------------------------------------- -------------------------------gtggtcacactgaggggaggtggtcacact -gaggggaggtggtcacagtgtga--ggggaggtggtcacactgaggggaggtggtcacag -tgtacggggaggtggtcatac--tgaggggaggtggtcacagtgtgaggggaggtggtca -cactgaggggaggtggtcacactgaggggaggtggtcacagtgtgaggggaggtggtcac -actgaggggaggtggtcacactgtgaggggaggtggtcacactgaggagaggtggtcata -gtgtgaggggaggtggtcacactgaggggaggtggtcaca-c-tgaggagaggtggtcat --------------------agtgtgaggggaggtggtcacac------------------ ------------------------------------------------------------- ------------------------tgaggggaggtggtcacactgaggggaggtggtcata -gtgtgaggggaggtggtcacactgaggagaggtggtcatagtgtgaggggaggtggtcac -actgaggggaggtggtcacactgaggggaggtggtcatagtgtgaggggaggtggtcaca -gtg--------------------------------------tgaggggaggtggtcacac -tgaggggaggtggtcatagtgtgaggggaggtggtcacactgaggggaggtggtcacact -gaggggaggtggtcatagtgtgaggggaggtggtcacactgtgaggggaggtggtcacat -tgaggagaggtggtcatagtgtgaggggaggtggtcacactgaggggaggtggtcacact -gaggggaggtggtcatagtgtgaggggaggtggtcacactgaggagaggtggtcatagtg -tgcggggaggtggtcacactgaggggaggtggtcacactgaggggaggtggtcacagtgt -gaggggaggtggtcacagtgtgaggggaggtggtcacactgaggggaggtggtcatagtg -tgaggggaggtggtcacactgaggggaggtggtcacactgagggaaggtggtcacactga -ggggaggtggtcatagtgtgaggggaggtggtcatagtgtgaggggaggtggtcacagtg -tgaggggaggtggtcacactgaggagaggtggtcatagtgtgaggggaggtggtcacact -gaggggaggtggtcacactgtgaggggaggtgggcacactgtgaggggtggtagtcacac -tgagaggaggtggtcacactgaggggaggtggtcacactgaggggaggtggtcacactga -ggggaatggtcatactgtgaggggaggtggtcacactgaggggaagtggtcacactgagg -ggaatggtcatactgtgaggggaggtggtcacactgtgaaagaagatgatcacagtatga -gaggtcatccttcaagaggcaagtgcccaaaaatctgtttataatctagagcaatatttg -cctgaaataaactttaaaagtggttttacgctatttaacccagtgtcaga ->syndip_2_chr20:20354470-20358424 +tgaggggaagtggtcacactgaggggaggtggtcatactgtgaggggaagtggtcacact +gaggggaggtggtcatagtgtgaggggag------------------------------- +------------------------------------------------------------ +----------gtggtcacactgaggggaggtggtcacactgaggggaggtggtcacagtg +tga--ggggaggtggtcacactgaggggaggtggtcacagtgtacggggaggtggtcata +c--tgaggggaggtggtcacagtgtgaggggaggtggtcacactgaggggaggtggtcac +actgaggggaggtggtcacagtgtgaggggaggtggtcacactgaggggaggtggtcaca +ctgtgaggggaggtggtcacactgaggagaggtggtcatagtgtgaggggaggtggtcac +actgaggggaggtggtcaca-c-tgaggagaggtggtcat-------------------a +gtgtgaggggaggtggtcacac-------------------------------------- +------------------------------------------------------------ +---tgaggggaggtggtcacactgaggggaggtggtcatagtgtgaggggaggtggtcac +actgaggagaggtggtcatagtgtgaggggaggtggtcacactgaggggaggtggtcaca +ctgaggggaggtggtcatagtgtgaggggaggtggtcacagtg----------------- +---------------------tgaggggaggtggtcacactgaggggaggtggtcatagt +gtgaggggaggtggtcacactgaggggaggtggtcacactgaggggaggtggtcatagtg +tgaggggaggtggtcacactgtgaggggaggtggtcacattgaggagaggtggtcatagt +gtgaggggaggtggtcacactgaggggaggtggtcacactgaggggaggtggtcatagtg +tgaggggaggtggtcacactgaggagaggtggtcatagtgtgcggggaggtggtcacact +gaggggaggtggtcacactgaggggaggtggtcacagtgtgaggggaggtggtcacagtg +tgaggggaggtggtcacactgaggggaggtggtcatagtgtgaggggaggtggtcacact +gaggggaggtggtcacactgagggaaggtggtcacactgaggggaggtggtcatagtgtg +aggggaggtggtcatagtgtgaggggaggtggtcacagtgtgaggggaggtggtcacact +gaggagaggtggtcatagtgtgaggggaggtggtcacactgaggggaggtggtcacactg +tgaggggaggtgggcacactgtgaggggtggtagtcacactgagaggaggtggtcacact +gaggggaggtggtcacactgaggggaggtggtcacactgaggggaatggtcatactgtga +ggggaggtggtcacactgaggggaagtggtcacactgaggggaatggtcatactgtgagg +ggaggtggtcacactgtgaaagaagatgatcacagtatgagaggtcatccttcaagaggc +aagtgcccaaaaatctgtttataatctagagcaatatttgcctgaaataaactttaaaag +tggttttacgctatttaacccagtgtcag +>syndip_2_chr20:20354470-20358423 tccagtctgggtgacagagagagaccttgtctc--aaaaaaaaaaaaaaaaatcaggtac agaaaggtaaatactgcacgatctcacttacatgtggaatctaaagaagttgaattcata agggtaatgaggggaggaaggggtggtcatactgtgtgaagggagatggtcacactgagg @@ -134,61 +135,62 @@ atactgtgagtgaggaggtagtcacactgaggggaggggtcatactgcgagtggaggtgg tcacactgtgaagggtggtagtcacactgagaggaggtggtcacactgaggggaagtggt cacactgaggggaggtggtcatactgtgaggggaagtggtcacactgaggggaggtggtc atagtgtgaggggaggtggtcacactgaggggaggtggtcacactgaggggaggtggtca -cagtgtgaggggaggtggtcacactgaggggaggtggtcacagtgtgaggggagatggtc -acactgaggggaggtggtcacagtg-------------------tgaggggaggtggtca -tagtgtgaggggaggtggtcacactggggaggtggtcacactgtgaggggaggtggtcac -actgtgaggaggtggtcacactgaggggaagtggtcacactgaggggaggtggtcatagt -gtgaggggaggtggtcacact----ggggaggtggtcacactgtgaggggaggtggtcac -actgagaggaggtggtcacactgaggggaagtggtcacactgaggggaggtggtcacact -gaggggaggtggtcatagtgtgaggggaggtggtcacact-------------------- --ggggaggtggtcacactgtgaggggaggtggtcacactgagaggaggtggtcacactga -ggggaagtggtcacactgaggggaggtggtcacactgaggggaggtggtcatagtgtgag -gggaggtggtcacac------tggggaggtggtcacactgtgaggggaggtggtcacact -gtgaggggaggtggtcacac--tgagaggaggtggtcacactgaggggaagtggtcacac +cagtgtgaggggagg----------------------------------------tggtc +acactgaggggaggtggtcacagtgtgaggggagatggtcacactgaggggaggtggtca +cagtg-------------------tgaggggaggtggtcatagtgtgaggggaggtggtc +acactggggaggtggtcacactgtgaggggaggtggtcacactgtgaggaggtggtcaca +ctgaggggaagtggtcacactgaggggaggtggtcatagtgtgaggggaggtggtcacac +t----ggggaggtggtcacactgtgaggggaggtggtcacactgagaggaggtggtcaca +ctgaggggaagtggtcacactgaggggaggtggtcacactgaggggaggtggtcatagtg +tgaggggaggtggtcacact---------------------ggggaggtggtcacactgt +gaggggaggtggtcacactgagaggaggtggtcacactgaggggaagtggtcacactgag +gggaggtggtcacactgaggggaggtggtcatagtgtgaggggaggtggtcacac----- +-tggggaggtggtcacactgtgaggggaggtggtcacactgtgaggggaggtggtcacac +--tgagaggaggtggtcacactgaggggaagtggtcacactgaggggaggtggtcacact +gaggggaggtggtcatactgtgagtgaggaggtagtcacactgaggggaagtggtcacac tgaggggaggtggtcacactgaggggaggtggtcatactgtgagtgaggaggtagtcaca +ctgaggggagggggtcatactgcgagtggaggtagtcacactgtgaagggtggtagtcac +actgagaggaggtggtcaca--ctgaggggaagtggtcacactgaggggaggtggtcata +ctgtgaggggaagtggtcacactgaggggaggtggtcatagtgtgaggggaggtggtcac +actgaggggaggtggtcacagtgtgaggggaggtggtcacactgaggggaggtggtcaca +ctgaggggaggtggtcacagtgtgaggggaggtggtcacactgaggggaggtggtcacag +tgtgaggggaggtggtcacactgaggggaggtggtcacagtgtgaggggaggtggtcaca +ctgaggggaggtggtcacactgaggggaggtggtcatagtgtgaggggaggtggtcacac +tgaggggaggtggtcacactgagaggaggtggtcacactgaggggaggtggtcatactgt +gagtgaggaagtagtcacactgaggggaagtggtcacactgaggggaggtggtcacactg +aggggaggtggtcatactgtgagtgaggaggtagtcacactgaggggagggggtcatact +gcgagtggaggtagtcacactgtgaagggtggtagtcacactgagaggaggtggtcacac +tgaggggaagtggtcacactgaggggaggtggtcatactgtgaggggaagtggtcacact +gaggggaggtggtcatagtgtgaggggaggtggtcacactgtgaggggaggtggtcacac +tgagaggaggtggtcacactgaggggaggtggtcatactgtgagtgaggaggtagtcaca ctgaggggaagtggtcacactgaggggaggtggtcacactgaggggaggtggtcatactg tgagtgaggaggtagtcacactgaggggagggggtcatactgcgagtggaggtagtcaca -ctgtgaagggtggtagtcacactgagaggaggtggtcaca--ctgaggggaagtggtcac +ctgtgaagggtggtagtcaca--ctgagaggaggtggtcacactgaggggaagtggtcac actgaggggaggtggtcatactgtgaggggaagtggtcacactgaggggaggtggtcata -gtgtgaggggaggtggtcacactgaggggaggtggtcacagtgtgaggggaggtggtcac -actgaggggaggtggtcacactgaggggaggtggtcacagtgtgaggggaggtggtcaca -ctgaggggaggtggtcacagtgtgaggggaggtggtcacactgaggggaggtggtcacag +gtgtgaggggaggtggtcacactgaggggaggtggtcatagtgtgaggggaggtggtcac +actgaggggaggtggtcacagtgtgcggggaggtggtcatactgaggggaggtggtcaca +gtgtgaggggaggtggtcacactgaggggaggtggtcacagtgtgcggggaggtggtcac +actgaggggaggtggtcatagtgtgaggggaggtggtcatactgaggggaggtggtcaca +ctgtgaggggaggtggtcacactgaggagaggtggtcacactgtgaggggaggtggtcac +actgaggagaggtggtcatagtgtgaggggaggtggtcacactgaggggaggtggtcaca +ctgaggggaggtggtcatagtgtgaggggaggtggtcacactgaggagaggtggtcatag tgtgaggggaggtggtcacactgaggggaggtggtcacactgaggggaggtggtcatagt -gtgaggggaggtggtcacactgaggggaggtggtcacactgagaggaggtggtcacactg -aggggaggtggtcatactgtgagtgaggaagtagtcacactgaggggaagtggtcacact -gaggggaggtggtcacactgaggggaggtggtcatactgtgagtgaggaggtagtcacac -tgaggggagggggtcatactgcgagtggaggtagtcacactgtgaagggtggtagtcaca -ctgagaggaggtggtcacactgaggggaagtggtcacactgaggggaggtggtcatactg -tgaggggaagtggtcacactgaggggaggtggtcatagtgtgaggggaggtggtcacact -gtgaggggaggtggtcacactgagaggaggtggtcacactgaggggaggtggtcatactg -tgagtgaggaggtagtcacactgaggggaagtggtcacactgaggggaggtggtcacact -gaggggaggtggtcatactgtgagtgaggaggtagtcacactgaggggagggggtcatac -tgcgagtggaggtagtcacactgtgaagggtggtagtcaca--ctgagaggaggtggtca -cactgaggggaagtggtcacactgaggggaggtggtcatactgtgaggggaagtggtcac -actgaggggaggtggtcatagtgtgaggggaggtggtcacactgaggggaggtggtcata -gtgtgaggggaggtggtcacactgaggggaggtggtcacagtgtgcggggaggtggtcat -actgaggggaggtggtcacagtgtgaggggaggtggtcacactgaggggaggtggtcaca -gtgtgcggggaggtggtcacactgaggggaggtggtcatagtgtgaggggaggtggtcat -actgaggggaggtggtcacactgtgaggggaggtggtcacactgaggagaggtggtcaca -ctgtgaggggaggtggtcacactgaggagaggtggtcatagtgtgaggggaggtggtcac -actgaggggaggtggtcacactgaggggaggtggtcatagtgtgaggggaggtggtcaca -ctgaggagaggtggtcatagtgtgaggggaggtggtcacactgaggggaggtggtcacac -tgaggggaggtggtcatagtgtgaggggaggtggtgacactgaggggaggtggtcacact -gaggggaggtggtcatagtgtgaggggaggtggtcacactgtgaggggaggtggtcacat -tgaggagaggtggtcatagtgtgaggggaggtggtcacactgaggggaggtggtcacact -gaggggaggtggtcatagtgtgaggggaggtggtcacactgaggagaggtggtcatagtg -tgaggggaggtggtcacactgaggggaggtggtcacactgaggggaggtggtcacagtgt -gaggggaggtggtcacagtgtgaggggaggtggtcacactgaggggaggtggtcatagtg -tgaggggaggtggtcacactgaggggaggtggtcacactgagggaaggtggtcacactga -ggggaggtggtcatagtgtgaggggaggtggtcatagtgtgaggggaggtggtcacagtg +gtgaggggaggtggtgacactgaggggaggtggtcacactgaggggaggtggtcatagtg +tgaggggaggtggtcacactgtgaggggaggtggtcacattgaggagaggtggtcatagt +gtgaggggaggtggtcacactgaggggaggtggtcacactgaggggaggtggtcatagtg tgaggggaggtggtcacactgaggagaggtggtcatagtgtgaggggaggtggtcacact -gaggggaggtggtcacactgtgaggggaggtgggcacactgtgaggggtggtagtcacac -tgagaggaggtggtcacactgaggggaggtggtcacactgaggggaagtggtcacactga -ggggaatggtcatactgtgaggggaggtggtcacactgaggggaagtggtcacactgagg -ggaatggtcatactgtgaggggaggtggtcacactgtgaaagaagatgatcacagtatga -gaggtcatccttcaagaggcaagtgcccaaaaatctgtttataatctagagcaatatttg -cctgaaataaactttaaaagtggttttacgctatttaacccagtgtcaga ->p:HG002_1_chr20:20354470-20358424 +gaggggaggtggtcacactgaggggaggtggtcacagtgtgaggggaggtggtcacagtg +tgaggggaggtggtcacactgaggggaggtggtcatagtgtgaggggaggtggtcacact +gaggggaggtggtcacactgagggaaggtggtcacactgaggggaggtggtcatagtgtg +aggggaggtggtcatagtgtgaggggaggtggtcacagtgtgaggggaggtggtcacact +gaggagaggtggtcatagtgtgaggggaggtggtcacactgaggggaggtggtcacactg +tgaggggaggtgggcacactgtgaggggtggtagtcacactgagaggaggtggtcacact +gaggggaggtggtcacactgaggggaagtggtcacactgaggggaatggtcatactgtga +ggggaggtggtcacactgaggggaagtggtcacactgaggggaatggtcatactgtgagg +ggaggtggtcacactgtgaaagaagatgatcacagtatgagaggtcatccttcaagaggc +aagtgcccaaaaatctgtttataatctagagcaatatttgcctgaaataaactttaaaag +tggttttacgctatttaacccagtgtcag +>p:HG002_1_chr20:20354470-20358423 tccagtctgggtgacagagagagaccttgtctc--aaaaaaaaaaaaaaaaatcaggtac agaaaggtaaatactgcacgatctcacttacatgtggaatctaaagaagttgaattcata agggtaatgaggggaggaaggggtggtcatactgtgtgaagggagatggtcacactgagg @@ -229,61 +231,62 @@ atactgtgagtgaggaggtagtcacactgaggggaggggtcatactgcgagtggaggtgg tcacactgtgaagggtggtagtcacactgagaggaggtggtcacactgaggggaagtggt cacactgaggggaggtggtcatactgtgaggggaagtggtcacactgaggggaggtggtc atagtgtgaggggaggtggtcacactgaggggaggtggtcacactgaggggaggtggtca -cagtgtgaggggaggtggtcacactgaggggaggtggtcacagtgtgaggggagatggtc -acactgaggggaggtggtcacagtg-------------------tgaggggaggtggtca -tagtgtgaggggaggtggtcacactggggaggtggtcacactgtgaggggaggtggtcac -actgtgaggaggtggtcacactgaggggaagtggtcacactgaggggaggtggtcatagt -gtgaggggaggtggtcacact----ggggaggtggtcacactgtgaggggaggtggtcac -actgagaggaggtggtcacactgaggggaagtggtcacactgaggggaggtggtcacact -gaggggaggtggtcatagtgtgaggggaggtggtcacact-------------------- --ggggaggtggtcacactgtgaggggaggtggtcacactgagaggaggtggtcacactga -ggggaagtggtcacactgaggggaggtggtcacactgaggggaggtggtcatagtgtgag -gggaggtggtcacac------tggggaggtggtcacactgtgaggggaggtggtcacact -gtgaggggaggtggtcacac--tgagaggaggtggtcacactgaggggaagtggtcacac +cagtgtgaggggagg----------------------------------------tggtc +acactgaggggaggtggtcacagtgtgaggggagatggtcacactgaggggaggtggtca +cagtg-------------------tgaggggaggtggtcatagtgtgaggggaggtggtc +acactggggaggtggtcacactgtgaggggaggtggtcacactgtgaggaggtggtcaca +ctgaggggaagtggtcacactgaggggaggtggtcatagtgtgaggggaggtggtcacac +t----ggggaggtggtcacactgtgaggggaggtggtcacactgagaggaggtggtcaca +ctgaggggaagtggtcacactgaggggaggtggtcacactgaggggaggtggtcatagtg +tgaggggaggtggtcacact---------------------ggggaggtggtcacactgt +gaggggaggtggtcacactgagaggaggtggtcacactgaggggaagtggtcacactgag +gggaggtggtcacactgaggggaggtggtcatagtgtgaggggaggtggtcacac----- +-tggggaggtggtcacactgtgaggggaggtggtcacactgtgaggggaggtggtcacac +--tgagaggaggtggtcacactgaggggaagtggtcacactgaggggaggtggtcacact +gaggggaggtggtcatactgtgagtgaggaggtagtcacactgaggggaagtggtcacac tgaggggaggtggtcacactgaggggaggtggtcatactgtgagtgaggaggtagtcaca +ctgaggggagggggtcatactgcgagtggaggtagtcacactgtgaagggtggtagtcac +actgagaggaggtggtcaca--ctgaggggaagtggtcacactgaggggaggtggtcata +ctgtgaggggaagtggtcacactgaggggaggtggtcatagtgtgaggggaggtggtcac +actgaggggaggtggtcacagtgtgaggggaggtggtcacactgaggggaggtggtcaca +ctgaggggaggtggtcacagtgtgaggggaggtggtcacactgaggggaggtggtcacag +tgtgaggggaggtggtcacactgaggggaggtggtcacagtgtgaggggaggtggtcaca +ctgaggggaggtggtcacactgaggggaggtggtcatagtgtgaggggaggtggtcacac +tgaggggaggtggtcacactgagaggaggtggtcacactgaggggaggtggtcatactgt +gagtgaggaagtagtcacactgaggggaagtggtcacactgaggggaggtggtcacactg +aggggaggtggtcatactgtgagtgaggaggtagtcacactgaggggagggggtcatact +gcgagtggaggtagtcacactgtgaagggtggtagtcacactgagaggaggtggtcacac +tgaggggaagtggtcacactgaggggaggtggtcatactgtgaggggaagtggtcacact +gaggggaggtggtcatagtgtgaggggaggtggtcacactgtgaggggaggtggtcacac +tgagaggaggtggtcacactgaggggaggtggtcatactgtgagtgaggaggtagtcaca ctgaggggaagtggtcacactgaggggaggtggtcacactgaggggaggtggtcatactg tgagtgaggaggtagtcacactgaggggagggggtcatactgcgagtggaggtagtcaca -ctgtgaagggtggtagtcacactgagaggaggtggtcaca--ctgaggggaagtggtcac +ctgtgaagggtggtagtcaca--ctgagaggaggtggtcacactgaggggaagtggtcac actgaggggaggtggtcatactgtgaggggaagtggtcacactgaggggaggtggtcata -gtgtgaggggaggtggtcacactgaggggaggtggtcacagtgtgaggggaggtggtcac -actgaggggaggtggtcacactgaggggaggtggtcacagtgtgaggggaggtggtcaca -ctgaggggaggtggtcacagtgtgaggggaggtggtcacactgaggggaggtggtcacag +gtgtgaggggaggtggtcacactgaggggaggtggtcatagtgtgaggggaggtggtcac +actgaggggaggtggtcacagtgtgcggggaggtggtcatactgaggggaggtggtcaca +gtgtgaggggaggtggtcacactgaggggaggtggtcacagtgtgcggggaggtggtcac +actgaggggaggtggtcatagtgtgaggggaggtggtcatactgaggggaggtggtcaca +ctgtgaggggaggtggtcacactgaggagaggtggtcacactgtgaggggaggtggtcac +actgaggagaggtggtcatagtgtgaggggaggtggtcacactgaggggaggtggtcaca +ctgaggggaggtggtcatagtgtgaggggaggtggtcacactgaggagaggtggtcatag tgtgaggggaggtggtcacactgaggggaggtggtcacactgaggggaggtggtcatagt -gtgaggggaggtggtcacactgaggggaggtggtcacactgagaggaggtggtcacactg -aggggaggtggtcatactgtgagtgaggaagtagtcacactgaggggaagtggtcacact -gaggggaggtggtcacactgaggggaggtggtcatactgtgagtgaggaggtagtcacac -tgaggggagggggtcatactgcgagtggaggtagtcacactgtgaagggtggtagtcaca -ctgagaggaggtggtcacactgaggggaagtggtcacactgaggggaggtggtcatactg -tgaggggaagtggtcacactgaggggaggtggtcatagtgtgaggggaggtggtcacact -gtgaggggaggtggtcacactgagaggaggtggtcacactgaggggaggtggtcatactg -tgagtgaggaggtagtcacactgaggggaagtggtcacactgaggggaggtggtcacact -gaggggaggtggtcatactgtgagtgaggaggtagtcacactgaggggagggggtcatac -tgcgagtggaggtagtcacactgtgaagggtggtagtcaca--ctgagaggaggtggtca -cactgaggggaagtggtcacactgaggggaggtggtcatactgtgaggggaagtggtcac -actgaggggaggtggtcatagtgtgaggggaggtggtcacactgaggggaggtggtcata -gtgtgaggggaggtggtcacactgaggggaggtggtcacagtgtgcggggaggtggtcat -actgaggggaggtggtcacagtgtgaggggaggtggtcacactgaggggaggtggtcaca -gtgtgcggggaggtggtcacactgaggggaggtggtcatagtgtgaggggaggtggtcat -actgaggggaggtggtcacactgtgaggggaggtggtcacactgaggagaggtggtcaca -ctgtgaggggaggtggtcacactgaggagaggtggtcatagtgtgaggggaggtggtcac -actgaggggaggtggtcacactgaggggaggtggtcatagtgtgaggggaggtggtcaca -ctgaggagaggtggtcatagtgtgaggggaggtggtcacactgaggggaggtggtcacac -tgaggggaggtggtcatagtgtgaggggaggtggtgacactgaggggaggtggtcacact -gaggggaggtggtcatagtgtgaggggaggtggtcacactgtgaggggaggtggtcacat -tgaggagaggtggtcatagtgtgaggggaggtggtcacactgaggggaggtggtcacact -gaggggaggtggtcatagtgtgaggggaggtggtcacactgaggagaggtggtcatagtg -tgaggggaggtggtcacactgaggggaggtggtcacactgaggggaggtggtcacagtgt -gaggggaggtggtcacagtgtgaggggaggtggtcacactgaggggaggtggtcatagtg -tgaggggaggtggtcacactgaggggaggtggtcacactgagggaaggtggtcacactga -ggggaggtggtcatagtgtgaggggaggtggtcatagtgtgaggggaggtggtcacagtg +gtgaggggaggtggtgacactgaggggaggtggtcacactgaggggaggtggtcatagtg +tgaggggaggtggtcacactgtgaggggaggtggtcacattgaggagaggtggtcatagt +gtgaggggaggtggtcacactgaggggaggtggtcacactgaggggaggtggtcatagtg tgaggggaggtggtcacactgaggagaggtggtcatagtgtgaggggaggtggtcacact -gaggggaggtggtcacactgtgaggggaggtgggcacactgtgaggggtggtagtcacac -tgagaggaggtggtcacactgaggggaggtggtcacactgaggggaagtggtcacactga -ggggaatggtcatactgtgaggggaggtggtcacactgaggggaagtggtcacactgagg -ggaatggtcatactgtgaggggaggtggtcacactgtgaaagaagatgatcacagtatga -gaggtcatccttcaagaggcaagtgcccaaaaatctgtttataatctagagcaatatttg -cctgaaataaactttaaaagtggttttacgctatttaacccagtgtcaga ->p:HG002_2_chr20:20354470-20358424 +gaggggaggtggtcacactgaggggaggtggtcacagtgtgaggggaggtggtcacagtg +tgaggggaggtggtcacactgaggggaggtggtcatagtgtgaggggaggtggtcacact +gaggggaggtggtcacactgagggaaggtggtcacactgaggggaggtggtcatagtgtg +aggggaggtggtcatagtgtgaggggaggtggtcacagtgtgaggggaggtggtcacact +gaggagaggtggtcatagtgtgaggggaggtggtcacactgaggggaggtggtcacactg +tgaggggaggtgggcacactgtgaggggtggtagtcacactgagaggaggtggtcacact +gaggggaggtggtcacactgaggggaagtggtcacactgaggggaatggtcatactgtga +ggggaggtggtcacactgaggggaagtggtcacactgaggggaatggtcatactgtgagg +ggaggtggtcacactgtgaaagaagatgatcacagtatgagaggtcatccttcaagaggc +aagtgcccaaaaatctgtttataatctagagcaatatttgcctgaaataaactttaaaag +tggttttacgctatttaacccagtgtcag +>p:HG002_2_chr20:20354470-20358423 tccagtctgggtgacagagagagaccttgtctcaaaaaaaaaaaaaaaaaaatcaggtac agaaaggtaaatactgcacgatctcacttacatgtggaatctaaagaagttgaattcata agggtaatgaggggaggaaggggtggtcatactgtgtgaagggagatggtcacactgagg @@ -325,61 +328,62 @@ tcacactgtgaagggtggtagtcacactgagaggaggtgttcacactgaggggaagtggt cacactgaggggaggtggtcatactgtgaggggaagtggtcacactgaggggaggtggtc atagtgtgaggggaggtggtcacactgaggggaggtggtcacactgaggggaggtggtca cagtgtgaggggaggtggtcacactgaggggaggtggtcacagtgtgaggggagatggtc -acactgaggggaggtggtcacagtgtgaggggaggtggtcacactgaggggaggtggtca -tagtgtgaggggaggtggtcacactggggaggtggtcacactgtgaggggaggtggtcac -actgagaggaggtggtcacactgaggggaagtggtcacactgaggggaggtggtcaca-- -ctgaggggaggtggtcatagtgtgaggggaggtggtcacactgtgaggggaggtggtcac -actgagaggaggtggtcacactgaggggaagtggtcacactgaggggaggtggtcacact -gaggggaggtggtcatagtgtgaggggaggtggtcacactggggaggtggtcacactgtg -aggggaggtggtcacactgtgaggggaggtggtcacactgagaggaggtggtcacactga -ggggaagtggtcacactgaggggaggtggtcacactgaggggaggtggtcacagtgtgag -gggaggtggtcatactgtgagtgaggaggtagtcacac--tgaggggagggggtcatact +acactgaggggaggtggtcacagtgtgaggggaggtggtcacac---------------- +------------------------tgaggggaggtggtcatagtgtgaggggaggtggtc +acactggggaggtggtcacactgtgaggggaggtggtcacactgagaggaggtggtcaca +ctgaggggaagtggtcacactgaggggaggtggtcaca--ctgaggggaggtggtcatag +tgtgaggggaggtggtcacactgtgaggggaggtggtcacactgagaggaggtggtcaca +ctgaggggaagtggtcacactgaggggaggtggtcacactgaggggaggtggtcatagtg +tgaggggaggtggtcacactggggaggtggtcacactgtgaggggaggtggtcacactgt +gaggggaggtggtcacactgagaggaggtggtcacactgaggggaagtggtcacactgag +gggaggtggtcacactgaggggaggtggtcacagtgtgaggggaggtggtcatactgtga +gtgaggaggtagtcacac--tgaggggagggggtcatactgcgagtggaggtagtcacac +tgtgaagggtggtagtcacactgagaggaggtggtcacactgaggggaagtggtcacact +gaggggaggtggtcatactg---------------------------------------- +tgaggggaagtggtcacactgaggggaggtggtcatagtgtga--ggggaggtggtcaca +ctgaggggaggtggtcacagtgtgaggggaggtggtcacac--tgaggggaggtggtcac +actgaggggaggtggtcacagtgtgagaggaggtggtcacactgaggggaggtggtcaca +gtgtgaggggagatggtcacactgaggggaggtggtcacagtgtgaggggaggtggtcac +actgaggggaggtggtcacagtgtgaggggaggtggtcacactgaggggaggtggtcaca +ctgaggggaggtggtcacagtgtgaggggaggtggtcacactgaggggaggtggtcacag +tgtgaggggagatggtcacactgaggggaggtggtcacagtgtgaggggaggtggtcaca +ctgaggggaggtggtcaca-------------------gtgtgaggggaggtggtcacac +tgaggggaggtggtcacactgagaggaggtggtcacactgaggggaggtggtcatactgt +gagtgaggaggtagtcacactgaggggaagtggtcacactgaggggaggtg--------- +---------------------------------gtcacactgaggggagggggtcatact gcgagtggaggtagtcacactgtgaagggtggtagtcacactgagaggaggtggtcacac -tgaggggaagtggtcacactgaggggaggtggtcatactg-------------------- ---------------------tgaggggaagtggtcacactgaggggaggtggtcatagtg -tga--ggggaggtggtcacactgaggggaggtggtcacagtgtgaggggaggtggtcaca -c--tgaggggaggtggtcacactgaggggaggtggtcacagtgtgagaggaggtggtcac -actgaggggaggtggtcacagtgtgaggggagatggtcacactgaggggaggtggtcaca -gtgtgaggggaggtggtcacactgaggggaggtggtcacagtgtgaggggaggtggtcac -actgaggggaggtggtcacactgaggggaggtggtcacagtgtgaggggaggtggtcaca -ctgaggggaggtggtcacagtgtgaggggagatggtcacactgaggggaggtggtcacag -tgtgaggggaggtggtcacactgaggggaggtggtcaca-------------------gt -gtgaggggaggtggtcacactgaggggaggtggtcacactgagaggaggtggtcacactg -aggggaggtggtcatactgtgagtgaggaggtagtcacactgaggggaagtggtcacact -gaggggaggtg------------------------------------------gtcacac -tgaggggagggggtcatactgcgagtggaggtagtcacactgtgaagggtggtagtcaca -ctgagaggaggtggtcacactgaggggaagtggtcacactgaggggaggtggtcatactg -tgaggggaagtggtcacactgaggggaggtggtcatagtgtgaggggag----------- ------------------------------------------------------------- -------------------------------gtggtcacactgaggggaggtggtcacact -gaggggaggtggtcacagtgtga--ggggaggtggtcacactgaggggaggtggtcacag -tgtacggggaggtggtcatac--tgaggggaggtggtcacagtgtgaggggaggtggtca -cactgaggggaggtggtcacactgaggggaggtggtcacagtgtgaggggaggtggtcac -actgaggggaggtggtcacactgtgaggggaggtggtcacactgaggagaggtggtcata -gtgtgaggggaggtggtcacactgaggggaggtggtcaca-c-tgaggagaggtggtcat --------------------agtgtgaggggaggtggtcacac------------------ ------------------------------------------------------------- ------------------------tgaggggaggtggtcacactgaggggaggtggtcata -gtgtgaggggaggtggtcacactgaggagaggtggtcatagtgtgaggggaggtggtcac -actgaggggaggtggtcacactgaggggaggtggtcatagtgtgaggggaggtggtcaca -gtg--------------------------------------tgaggggaggtggtcacac -tgaggggaggtggtcatagtgtgaggggaggtggtcacactgaggggaggtggtcacact -gaggggaggtggtcatagtgtgaggggaggtggtcacactgtgaggggaggtggtcacat -tgaggagaggtggtcatagtgtgaggggaggtggtcacactgaggggaggtggtcacact -gaggggaggtggtcatagtgtgaggggaggtggtcacactgaggagaggtggtcatagtg -tgcggggaggtggtcacactgaggggaggtggtcacactgaggggaggtggtcacagtgt -gaggggaggtggtcacagtgtgaggggaggtggtcacactgaggggaggtggtcatagtg -tgaggggaggtggtcacactgaggggaggtggtcacactgagggaaggtggtcacactga -ggggaggtggtcatagtgtgaggggaggtggtcatagtgtgaggggaggtggtcacagtg -tgaggggaggtggtcacactgaggagaggtggtcatagtgtgaggggaggtggtcacact -gaggggaggtggtcacactgtgaggggaggtgggcacactgtgaggggtggtagtcacac -tgagaggaggtggtcacactgaggggaggtggtcacactgaggggaggtggtcacactga -ggggaatggtcatactgtgaggggaggtggtcacactgaggggaagtggtcacactgagg -ggaatggtcatactgtgaggggaggtggtcacactgtgaaagaagatgatcacagtatga -gaggtcatccttcaagaggcaagtgcccaaaaatctgtttataatctagagcaatatttg -cctgaaataaactttaaaagtggttttacgctatttaacccagtgtcaga ->ref_chr20:20354470-20358424 -tccagtctgggtgacagagagagaccttgtctc-aaaaaaaaaaaaaaaaaatcaggtac +tgaggggaagtggtcacactgaggggaggtggtcatactgtgaggggaagtggtcacact +gaggggaggtggtcatagtgtgaggggag------------------------------- +------------------------------------------------------------ +----------gtggtcacactgaggggaggtggtcacactgaggggaggtggtcacagtg +tga--ggggaggtggtcacactgaggggaggtggtcacagtgtacggggaggtggtcata +c--tgaggggaggtggtcacagtgtgaggggaggtggtcacactgaggggaggtggtcac +actgaggggaggtggtcacagtgtgaggggaggtggtcacactgaggggaggtggtcaca +ctgtgaggggaggtggtcacactgaggagaggtggtcatagtgtgaggggaggtggtcac +actgaggggaggtggtcaca-c-tgaggagaggtggtcat-------------------a +gtgtgaggggaggtggtcacac-------------------------------------- +------------------------------------------------------------ +---tgaggggaggtggtcacactgaggggaggtggtcatagtgtgaggggaggtggtcac +actgaggagaggtggtcatagtgtgaggggaggtggtcacactgaggggaggtggtcaca +ctgaggggaggtggtcatagtgtgaggggaggtggtcacagtg----------------- +---------------------tgaggggaggtggtcacactgaggggaggtggtcatagt +gtgaggggaggtggtcacactgaggggaggtggtcacactgaggggaggtggtcatagtg +tgaggggaggtggtcacactgtgaggggaggtggtcacattgaggagaggtggtcatagt +gtgaggggaggtggtcacactgaggggaggtggtcacactgaggggaggtggtcatagtg +tgaggggaggtggtcacactgaggagaggtggtcatagtgtgcggggaggtggtcacact +gaggggaggtggtcacactgaggggaggtggtcacagtgtgaggggaggtggtcacagtg +tgaggggaggtggtcacactgaggggaggtggtcatagtgtgaggggaggtggtcacact +gaggggaggtggtcacactgagggaaggtggtcacactgaggggaggtggtcatagtgtg +aggggaggtggtcatagtgtgaggggaggtggtcacagtgtgaggggaggtggtcacact +gaggagaggtggtcatagtgtgaggggaggtggtcacactgaggggaggtggtcacactg +tgaggggaggtgggcacactgtgaggggtggtagtcacactgagaggaggtggtcacact +gaggggaggtggtcacactgaggggaggtggtcacactgaggggaatggtcatactgtga +ggggaggtggtcacactgaggggaagtggtcacactgaggggaatggtcatactgtgagg +ggaggtggtcacactgtgaaagaagatgatcacagtatgagaggtcatccttcaagaggc +aagtgcccaaaaatctgtttataatctagagcaatatttgcctgaaataaactttaaaag +tggttttacgctatttaacccagtgtcag +>ref_chr20:20354470-20358423 +tccagtctgggtgacagagagagaccttgtctca-aaaaaaaaaaaaaaaaatcaggtac agaaaggtaaatactgcacgatctcacttacatgtggaatctaaagaagttgaattcata agggtaatgaggggaggaaggggtggtcatactgtgtgaagggagatagtcacactgagg gaaggtggtcacactgtgagaagggaggtagtcacattgtgagagaaaaagtgatcacac @@ -419,40 +423,40 @@ atactgtgagtgaggagctagtcacactgaggggaggggtcatactgcgagtggaggtag tcacactgtgaagggtggtagtcacactgagaggaggtggtcacactgaggggaagtggt cacactgaggggaggtggtcatactgtgaggggaagtggtcacactgaggggaggtggtc atagtgtgaggggaggtggtcacactgaggggaggtggtcacactgaggggaggtggtca -cagtgtgaggggaggtggtcacactgaggggaggtggtcacagtgtgaggggagatggtc -acactgaggggaggtggtcacagtgtgaggggaggtggtcacactgaggggaggtggtca -tagtgtgaggggaggtggtcacactggggaggtggtcacactgtgaggggaggtggtcac -actgagaggaggtggtcacactgaggggaagtggtcacactgaggggaggtggtcatagt -gtgaggggaggtggtcacact----ggggaggtggtcacactgtgaggggaggtggtcac -actgagaggaggtggtcacactgaggggaagtggtcacactgaggggaggtggtcacact -gaggggaggtggtcatagtgtgaggggaggtggtcacact-------------------- --ggggaggtggtcacactgtgaggggaggtggtcacactgagaggaggtggtcacactga -ggggaagtggtcacactgaggggaggtggtcacactgaggggaggtggtcatagtgtgag -gggaggtggtcacac------tggggaggtggtcacactgtgaggggaggtggtcacact -gtgaggggaggtggtcacac--tgagaggaggtggtcacactgaggggaagtggtcacac -tgagaggaggtggtcacactgaggggaggtggtcatactgtgagtgaggaggtagtcaca -ctgaggggaagtggtcacactgaggggaggtggtcacactgaggggaggtggtcatactg -tgagtgaggaggtagtcacactgaggggagggggtcatactgcgagtggaggtagtcaca -ctgtgaagggtggtagtcacactgagaggaggtggtcaca--ctgaggggaagtggtcac -actgaggggaggtggtcatactgtgaggggaagtggtcacactgaggggaggtggtcata -gtgtgaggggaggtggtcacactgaggggaggtggtcacagtgtgaggggaggtggtcac -actgaggggaggtggtcaca---------------------------------------- +cagtgtgaggggagg----------------------------------------tggtc +acactgaggggaggtggtcacagtgtgaggggagatggtcacactgaggggaggtggtca +cagtgtgaggggaggtggtcacactgaggggaggtggtcatagtgtgaggggaggtggtc +acactggggaggtggtcacactgtgaggggaggtggtcacactgagaggaggtggtcaca +ctgaggggaagtggtcacactgaggggaggtggtcatagtgtgaggggaggtggtcacac +t----ggggaggtggtcacactgtgaggggaggtggtcacactgagaggaggtggtcaca +ctgaggggaagtggtcacactgaggggaggtggtcacactgaggggaggtggtcatagtg +tgaggggaggtggtcacact---------------------ggggaggtggtcacactgt +gaggggaggtggtcacactgagaggaggtggtcacactgaggggaagtggtcacactgag +gggaggtggtcacactgaggggaggtggtcatagtgtgaggggaggtggtcacac----- +-tggggaggtggtcacactgtgaggggaggtggtcacactgtgaggggaggtggtcacac +--tgagaggaggtggtcacactgaggggaagtggtcacactgagaggaggtggtcacact +gaggggaggtggtcatactgtgagtgaggaggtagtcacactgaggggaagtggtcacac +tgaggggaggtggtcacactgaggggaggtggtcatactgtgagtgaggaggtagtcaca +ctgaggggagggggtcatactgcgagtggaggtagtcacactgtgaagggtggtagtcac +actgagaggaggtggtcaca--ctgaggggaagtggtcacactgaggggaggtggtcata +ctgtgaggggaagtggtcacactgaggggaggtggtcatagtgtgaggggaggtggtcac +actgaggggaggtggtcacagtgtgaggggaggtggtcacactgaggggaggtggtcaca ------------------------------------------------------------ -----------------------------------------------------------gt -gtgcggggaggtggtcatactgaggggaggtggtcacactgaggggaggtggtcacactg -aggggaggtggtcatactgtgagt------------------------------------ ------------------------------------------------------------ +--------------------------------------gtgtgcggggaggtggtcatac +tgaggggaggtggtcacactgaggggaggtggtcacactgaggggaggtggtcatactgt +gagt-------------------------------------------------------- ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ --------------------------gaggaggtagtcacactgaggggagggggtcatac -tgcgagtggaggtagtcacactgtgaagggtggtagtcaca--ctgagaggaggtggtca -cactgaggggaagtggtcacactgaggggaggtggtcatactgtgaggggaagtggtcac -actgaggggaggtggtcatagtgtgaggggaggtggtcacactgaggggaggtggtcaca -gtgtgaggggaggtggtcacactgaggggaggtggtcacagtgtgcggggaggtggtcat ------------------------------------------------------------ +-----gaggaggtagtcacactgaggggagggggtcatactgcgagtggaggtagtcaca +ctgtgaagggtggtagtcaca--ctgagaggaggtggtcacactgaggggaagtggtcac +actgaggggaggtggtcatactgtgaggggaagtggtcacactgaggggaggtggtcata +gtgtgaggggaggtggtcacactgaggggaggtggtcacagtgtgaggggaggtggtcac +actgaggggaggtggtcacagtgtgcggggaggtggtcat-------------------- ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ @@ -462,14 +466,15 @@ gtgtgaggggaggtggtcacactgaggggaggtggtcacagtgtgcggggaggtggtcat ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ -------------------------------------actgaggggaggtggtcacagtgt -gaggggaggtggtcacagtgtgaggggaggtggtcacactgaggggaggtggtcatagtg -tgaggggaggtggtcacactgaggggaggtggtcacactgagggaaggtggtcacactga -ggggaggtggtcatagtgtgaggggaggtggtcatagtgtgaggggaggtggtcacagtg -tgaggggaggtggtcacactgaggagaggtggtcatagtgtgaggggaggtggtcacact -gaggggaggtggtcacactgtgaggggaggtgggcacactgtgaggggtggtagtcacac -tgagaggaggtggtcacactgaggggaggtggtcacactgaggggaagtggtcacactga -ggggaatggtcatactgtgaggggaggtggtcacactgaggggaagtggtcacactgagg -ggaatggtcatactgtgaggggaggtggtcacactgtgaaagaagatgatcacagtatga -gaggtcatccttcaagaggcaagtgcccaaaaatctgtttataatctagagcaatatttg -cctgaaataaactttaaaagtggttttacgctatttaacccagtgtcaga +------------------------------------------------------------ +----------------actgaggggaggtggtcacagtgtgaggggaggtggtcacagtg +tgaggggaggtggtcacactgaggggaggtggtcatagtgtgaggggaggtggtcacact +gaggggaggtggtcacactgagggaaggtggtcacactgaggggaggtggtcatagtgtg +aggggaggtggtcatagtgtgaggggaggtggtcacagtgtgaggggaggtggtcacact +gaggagaggtggtcatagtgtgaggggaggtggtcacactgaggggaggtggtcacactg +tgaggggaggtgggcacactgtgaggggtggtagtcacactgagaggaggtggtcacact +gaggggaggtggtcacactgaggggaagtggtcacactgaggggaatggtcatactgtga +ggggaggtggtcacactgaggggaagtggtcacactgaggggaatggtcatactgtgagg +ggaggtggtcacactgtgaaagaagatgatcacagtatgagaggtcatccttcaagaggc +aagtgcccaaaaatctgtttataatctagagcaatatttgcctgaaataaactttaaaag +tggttttacgctatttaacccagtgtcag diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_11e116fedba8bcf09b5d2c98cbaa4d60.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_ce6ba4dc0f8a352c89d3dfc839c96d2e.msa similarity index 99% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_11e116fedba8bcf09b5d2c98cbaa4d60.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_ce6ba4dc0f8a352c89d3dfc839c96d2e.msa index d5dd141a..7df1d60e 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_11e116fedba8bcf09b5d2c98cbaa4d60.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_ce6ba4dc0f8a352c89d3dfc839c96d2e.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:64173338-64176431 +>syndip_1_chr20:64173338-64176430 gatagagtgggtgtcttttgtgggcttctccccagcctgtggcctggcctagtcagtaga gatggtcaggggagcaggagggagcccataccccagtgcctgtagttgtgtccatttccc ctccctgacttctcctcctgcagcatccttccctttagttgtgtccatttcccctccctg @@ -64,8 +64,8 @@ tctttccctgtagttgtgtccatttcccctccctggcttctcctgcagcatctttccctg tagttgtgtccatttcccctccctggcttctcctcctgcagcatctttccctgtagttgt gtccattccccctccctggcttctcctcctgcagcatctttccctgtagttgtgtccatt tcccctccctggcttctcctgcagcatctttccctgtagttgtgtccatttcccctccct -ggc ->syndip_2_chr20:64173338-64176431 +gg +>syndip_2_chr20:64173338-64176430 gatagagtgggtgtcttttgtgggcttctccccagcctgtggcctggcctagtcagtaga gatggtcaggggagcaggagggagcccataccccagtgcctgtagttgtgtccatttccc ctccctgacttctcctcctgcagcatccttccctttagttgtgtccatttcccctccctg @@ -131,8 +131,8 @@ tcctgtagcatctttccctgtagttgtgtccattccccctccctgg-------------- ------------------------------------------------------------ ----------------------ctcctcctgcagcatctttccctgtagttgtgtccatt tcccctccctggcttctcctgcagcatctttccctgtagttgtgtccatttcccctccct -ggc ->p:HG002_1_chr20:64173338-64176431 +gg +>p:HG002_1_chr20:64173338-64176430 gatagagtgggtgtcttttgtgggcttctccccagcctgtggcctggcctagtcagtaga gatggtcaggggagcaggagggagcccataccccagtgcctgtagttgtgtccatttccc ctccctgacttctcctcctgcagcatccttccctttagttgtgtccatttcccctccctg @@ -198,8 +198,8 @@ tcctgtagcatctttccctgtagttgtgtccattccccctccctgg-------------- ------------------------------------------------------------ ----------------------ctcctcctgcagcatctttccctgtagttgtgtccatt tcccctccctggcttctcctgcagcatctttccctgtagttgtgtccatttcccctccct -ggc ->p:HG002_2_chr20:64173338-64176431 +gg +>p:HG002_2_chr20:64173338-64176430 gatagagtgggtgtcttttgtgggcttctccccagcctgtggcctggcctagtcagtaga gatggtcaggggagcaggagggagcccataccccagtgcctgtagttgtgtccatttccc ctccctgacttctcctcctgcagcatccttccctttagttgtgtccatttcccctccctg @@ -265,8 +265,8 @@ tctttccctgtagttgtgtccatttcccctccctggcttctcctgcagcatctttccctg tagttgtgtccatttcccctccctggcttctcctcctgcagcatctttccctgtagttgt gtccattccccctccctggcttctcctcctgcagcatctttccctgtagttgtgtccatt tcccctccctggcttctcctgcagcatctttccctgtagttgtgtccatttcccctccct -ggc ->ref_chr20:64173338-64176431 +gg +>ref_chr20:64173338-64176430 gatagagtgggtgtcttttgtgggcttctccccagcctgtggcctggcctagtcagtaga gatggtcaggggagcaggagggagcccataccccagtgcctgtagttgtgtccatttccc ctccctgacttctcctcctgcagcatccttccctttagttgtgtccatttcccctccctg @@ -332,4 +332,4 @@ tcctgtagcatctttccctgtagttgtgtccattccccctccctggcgtctc-------- tagttgtgtccatttcccctccctggcttctcctcctgcagcatctttccctgtagttgt gtccattccccctccctggcttctcctcctgcagcatctttccctgtagttgtgtccatt tcccctccctggcttctcctgcagcatctttccctgtagttgtgtccatttcccctccct -ggc +gg diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_64dff7b5bf598bbdb48ee4b45e644ad7.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_cea716cb5fb96bc6d8bf0051a8d70890.msa similarity index 94% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_64dff7b5bf598bbdb48ee4b45e644ad7.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_cea716cb5fb96bc6d8bf0051a8d70890.msa index a82c95f9..af4d665d 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_64dff7b5bf598bbdb48ee4b45e644ad7.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_cea716cb5fb96bc6d8bf0051a8d70890.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:63027966-63029131 +>syndip_1_chr20:63027966-63029130 ctcctctccaaggtcccagctgtccttgccctgttccctgtgaagagtgaagaaccaacg cgggcggggagtggaccggtggggagagggccggtggggagagggccggtggggagagga ccggtggggagggggccggtggagagggggccggtggggagggggccggtggggagagga @@ -35,8 +35,8 @@ tccgtgtgcaggtcccccgggcctccgcgcctccgtgtgcaggtcccccgggcctccgcg cctccgtgtgcaggtcccccgggcctccgcgcctccgtgtgcaggtcccccgggcctccg cgtctgtgtgtgcaggtcccccgggcctccgcgtctgtgtgtgcaggtcccccgggcctc cgcgcctccgtgtgcaggtcccccgggcctccgcgcctccgtgtgcaggtcccccgggcc -tccgcgcctccgtgtgcaggtcccccgggcctccgcgcctccgtgtgcaggtcccccg ->syndip_2_chr20:63027966-63029131 +tccgcgcctccgtgtgcaggtcccccgggcctccgcgcctccgtgtgcaggtccccc +>syndip_2_chr20:63027966-63029130 ctcctctccaaggtcccagctgtccttgccctgttccctgtgaagagtgaagaaccaacg cgggcggggagtggaccggtggggagagggccggtggggagaggaccggtggggag-ggg ccggtggagagggggccggtggggagggggccggcggggag------------------- @@ -60,7 +60,6 @@ ctcagctctcaacaatgcacaagtggcttgagtagaagagctgctcctgctgggaggcgc aggaggctgagcgaggcccaccctgcaggggcgaggccacggtttgtgttatttcccatg atgactccaaacgcacccgagtcccccgggcctccgcgcctccgtgtgcaggtcccccgg gcctccgcgcctccgtgtgcaggtcccccgggcctccgcgcctccgtgtgcaggtccccc -g----------------------------------------------------------- ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ @@ -73,8 +72,9 @@ g----------------------------------------------------------- ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ ----------------------------------------------------------- ->p:HG002_1_chr20:63027966-63029131 +------------------------------------------------------------ +--------------------------------------------------------- +>p:HG002_1_chr20:63027966-63029130 ctcctctccaaggtcccagctgtccttgccctgttccctgtgaagagtgaagaaccaacg cgggcggggagtggaccggtggggagagggccggtggggagaggaccggtggggag-ggg ccggtggagagggggccggtggggagggggccggcggggag------------------- @@ -98,7 +98,6 @@ ctcagctctcaacaatgcacaagtggcttgagtagaagagctgctcctgctgggaggcgc aggaggctgagcgaggcccaccctgcaggggcgaggccacggtttgtgttatttcccatg atgactccaaacgcacccgagtcccccgggcctccgcgcctccgtgtgcaggtcccccgg gcctccgcgcctccgtgtgcaggtcccccgggcctccgcgcctccgtgtgcaggtccccc -g----------------------------------------------------------- ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ @@ -111,8 +110,9 @@ g----------------------------------------------------------- ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ ----------------------------------------------------------- ->p:HG002_2_chr20:63027966-63029131 +------------------------------------------------------------ +--------------------------------------------------------- +>p:HG002_2_chr20:63027966-63029130 ctcctctccaaggtcccagctgtccttgccctgttccctgtgaagagtgaagaaccaacg cgggcggggagtggaccggtggggagagggccggtggggagaggaccggtggggag-ggg ccggtggagagggggccggtggggagggggccggtggggagggggccggtggggagagga @@ -149,8 +149,8 @@ tccgtgtgcaggtcccccgggcctccgcgcctccgtgtgcaggtcccccgggcctccgcg cctccgtgtgcaggtcccccgggcctccgcgcctccgtgtgcaggtcccccgggcctccg cgtctgtgtgtgcaggtcccccgggcctccgcgcctccgtgtgcaggtcccccgggcctc cgcgcctccgtgtgcaggtcccccgggcctccgcgcctccgtgtgcaggtcccccgggcc -tccgcgcctccgtgtgcaggtcccccgggcctccgcgcctccgtgtgcaggtcccccg ->ref_chr20:63027966-63029131 +tccgcgcctccgtgtgcaggtcccccgggcctccgcgcctccgtgtgcaggtccccc +>ref_chr20:63027966-63029130 ctcctctccaaggtcccagctgtccttgccctgttccctgtgaagagtgaagaaccaacg cgggcggggagtggaccggtggggagagggccggtggggagaggaccggtggggag-ggg ccggtggagagggggccggtggggagggggccggcggggag------------------- @@ -176,7 +176,7 @@ atgactccaaacgcacccgagtcccccgggcctccgcgcctccgtgtgcaggtcccccgg gcctccgcgcctccgtgtgcaggtcccccgggcctccgcgtctgtgtgtgcaggtccccc gggcctccgcgcctccgtgtgcaggtcccccgggcctccgcgcctccgtgtgcaggtccc ccgggcctccgcgcctccgtgtgcaggtcccccgggcctccgcgcctccgtgtgcaggtc -ccccg------------------------------------------------------- +cccc-------------------------------------------------------- ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ @@ -187,4 +187,4 @@ ccccg------------------------------------------------------- ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ ----------------------------------------------------------- +--------------------------------------------------------- diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_751f12a6ae4022208996c7a282d999de.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_cf9425969f7f5ed38f54639e08586a04.msa similarity index 87% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_751f12a6ae4022208996c7a282d999de.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_cf9425969f7f5ed38f54639e08586a04.msa index 0403e378..db141b39 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_751f12a6ae4022208996c7a282d999de.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_cf9425969f7f5ed38f54639e08586a04.msa @@ -1,40 +1,40 @@ ->syndip_1_chr20:57190156-57190529 +>syndip_1_chr20:57190156-57190528 gctggagagagtgagccaggaatcagaggaggtgaggccagagagagtgagcgaggaacc agagggggtgaggctggagagagtgaccgaggaaccagaggaggtgaggcc----agagt gagtgaggagcttgaaggggtgaggctggagacagtgagcgaggaaccagagggggtgag gctggagagagtgagccaggaaccagaggaggtgaggccaga----gtgagtgaggagct tgaaggggtgaggctggagacagtgagcgaggaaccagagggggtgaggctggagagcgt gagtgaggagcccgagggggtgaggctggagagagtgagtgaggagcccaagggggtgaa -gccgaggtgggtatgggg ->syndip_2_chr20:57190156-57190529 +gccgaggtgggtatggg +>syndip_2_chr20:57190156-57190528 gctggagagagtgagccaggaatcagaggaggtgaggccagagagagtgagcgaggaacc agagggggtgaggctggagagagtgaccgaggaaccagaggaggtgaggctggagagagt gagccaggaaccagaggaggtgaggccag----agtgagtgaggagcttgaaggggtgag gctggagacagtgagcgaggaaccagagggggtgaggctggagagcgtgagtgaggagcc cgagggggtgaggctggagagcgtgagtgaggagcccgagggggtgaggctggagagagt gagtgaggagcccaagggggtgaagccgaggtgggt------------------------ -------------atgggg ->p:HG002_1_chr20:57190156-57190529 +------------atggg +>p:HG002_1_chr20:57190156-57190528 gctggagagagtgagccaggaatcagaggaggtgaggccagagagagtgagcgaggaacc agagggggtgaggctggagagagtgaccgaggaaccagaggaggtgaggctggagagagt gagccaggaaccagaggaggtgaggccag----agtgagtgaggagcttgaaggggtgag gctggagacagtgagcgaggaaccagagggggtgaggctggagagcgtgagtgaggagcc cgagggggtgaggctggagagcgtgagtgaggagcccgagggggtgaggctggagagagt gagtgaggagcccaagggggtgaagccgaggtgggt------------------------ -------------atgggg ->p:HG002_2_chr20:57190156-57190529 +------------atggg +>p:HG002_2_chr20:57190156-57190528 gctggagagagtgagccaggaatcagaggaggtgaggccagagagagtgagcgaggaacc agagggggtgaggctggagagagtgaccgaggaaccagaggaggtgaggctggagagagt gagccaggaaccagaggaggtgaggccag----agtgagtgaggagcttgaaggggtgag gctggagacagtgagcgaggaaccagagggggtgaggctggagagcgtgagtgaggagcc cgagggggtgaggctggagagcgtgagtgaggagcccgagggggtgaggctggagagcgt gagtgaggagcccgagggggtgaggctggagagagtgagtgaggagcccaagggggtgaa -gccgaggtgggtatgggg ->ref_chr20:57190156-57190529 +gccgaggtgggtatggg +>ref_chr20:57190156-57190528 gctggagagagtgagccaggaatcagaggaggtgaggccagagagagtgagcgaggaacc agagggggtgaggctggagagagtgaccgaggaaccagaggaggtgaggctggagagagt gagccaggaaccagaggaggtgaggccag----agtgagtgaggagcttgaaggggtgag gctggagacagtgagcgaggaaccagagggggtgaggctggagagcgtgagtgaggagcc cgagggggtgaggctggagagcgtgagtgaggagcccgagggggtgaggctggagagcgt gagtgaggagcccgagggggtgaggctggagagagtgagtgaggagcccaagggggtgaa -gccgaggtgggtatgggg +gccgaggtgggtatggg diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_71fa13a9fc89d5f2aef29ccb10589a60.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_d2242ccd448a7c982cfcff72641d3495.msa similarity index 97% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_71fa13a9fc89d5f2aef29ccb10589a60.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_d2242ccd448a7c982cfcff72641d3495.msa index cf2b055d..78229f17 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_71fa13a9fc89d5f2aef29ccb10589a60.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_d2242ccd448a7c982cfcff72641d3495.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:51952383-51954993 +>syndip_1_chr20:51952383-51954992 ctctgtctcaaaaaaaaaaaaagtcattgattgatctctttctggtttaactgctattat ttagttttatagcattacattcaagtttttatagcattacaattcctagattctattttc aaatattgatttgcttaa--ttttttttttttttgagatggggttttgctctgtcgccca @@ -47,8 +47,8 @@ ccacacccagctaattttttcatttttagtagagacggggtttcaccatgttggtcaggc ttgtctcgaactcctgacctcaggtgatccgtccaacttggcctcccaaagtgctgagat tacaggcatgagctaccgtgcccagctctcaattgtcacttttagtgcatattttctggt tgtataaaagctctcccatgcaagagggctgatgttataacagtcgattattatactgtc -ctgcattttcatcaggtaaagaaagcttttcatgtttcactgag ->syndip_2_chr20:51952383-51954993 +ctgcattttcatcaggtaaagaaagcttttcatgtttcactga +>syndip_2_chr20:51952383-51954992 ctctgtctc-aaaaaaaaaaaagtcattgattgatctctttctggtttaactgctattat ttagttttatagcattacattcaagtttttatagcattacaattcctagattctattttc aaatattgatttgcttaa-tttttttttttttttgagatggggttttgctctgtcgccca @@ -97,8 +97,8 @@ ccacacccagctaattttttcatttttagtagagacggggtttcaccatgttggtcaggc ttgtctcgaactcctgacctcaggtgatccgtccaacttggcctcccaaagtgctgagat tacaggcatgagctaccgtgcccagctctcaattgtcacttttagtgcatattttctggt tgtataaaagctctcccatgcaagagggctgatgttataacagtcgattattatactgtc -ctgcattttcatcaggtaaagaaagcttttcatgtttcactgag ->p:HG002_1_chr20:51952383-51954993 +ctgcattttcatcaggtaaagaaagcttttcatgtttcactga +>p:HG002_1_chr20:51952383-51954992 ctctgtctc-aaaaaaaaaaaagtcattgattgatctctttctggtttaactgctattat ttagttttatagcattacattcaagtttttatagcattacaattcctagattctattttc aaatattgatttgcttaa-tttttttttttttttgagatggggttttgctctgtcgccca @@ -147,8 +147,8 @@ ccacacccagctaattttttcatttttagtagagacggggtttcaccatgttggtcaggc ttgtctcgaactcctgacctcaggtgatccgtccaacttggcctcccaaagtgctgagat tacaggcatgagctaccgtgcccagctctcaattgtcacttttagtgcatattttctggt tgtataaaagctctcccatgcaagagggctgatgttataacagtcgattattatactgtc -ctgcattttcatcaggtaaagaaagcttttcatgtttcactgag ->p:HG002_2_chr20:51952383-51954993 +ctgcattttcatcaggtaaagaaagcttttcatgtttcactga +>p:HG002_2_chr20:51952383-51954992 ctctgtctcaaaaaaaaaaaaagtcattgattgatctctttctggtttaactgctattat ttagttttatagcattacattcaagtttttatagcattacaattcctagattctattttc aaatattgatttgcttaa--ttttttttttttttgagatggggttttgctctgtcgccca @@ -197,8 +197,8 @@ ccacacccagctaattttttcatttttagtagagacggggtttcaccatgttggtcaggc ttgtctcgaactcctgacctcaggtgatccgtccaacttggcctcccaaagtgctgagat tacaggcatgagctaccgtgcccagctctcaattgtcacttttagtgcatattttctggt tgtataaaagctctcccatgcaagagggctgatgttataacagtcgattattatactgtc -ctgcattttcatcaggtaaagaaagcttttcatgtttcactgag ->ref_chr20:51952383-51954993 +ctgcattttcatcaggtaaagaaagcttttcatgtttcactga +>ref_chr20:51952383-51954992 ctctgtctc-aaaaaaaaaaaagtcattgattgatctctttctggtttaactgctattat ttagttttatagcattacattcaagtttttatagcattacaattcctagattctattttc aaatattgatttgcttaattttttttttttttttgagatggggttttgctctgtcgccca @@ -247,4 +247,4 @@ ccacacccagctaattttttcatttttagtagagacggggtttcaccatgttggtcaggc ttgtctcgaactcctgacctcaggtgatccgtccaacttggcctcccaaagtgctgagat tacaggcatgagctaccgtgcccagctctcaattgtcacttttagtgcatattttctggt tgtataaaagctctcccatgcaagagggctgatgttataacagtcgattattatactgtc -ctgcattttcatcaggtaaagaaagcttttcatgtttcactgag +ctgcattttcatcaggtaaagaaagcttttcatgtttcactga diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_3e3662b58e37c8d091efddc27cbaa4e4.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_d4b126e5900cc00f6c75e82b4be27559.msa similarity index 90% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_3e3662b58e37c8d091efddc27cbaa4e4.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_d4b126e5900cc00f6c75e82b4be27559.msa index 86814ea1..7f48c7be 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_3e3662b58e37c8d091efddc27cbaa4e4.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_d4b126e5900cc00f6c75e82b4be27559.msa @@ -1,40 +1,40 @@ ->syndip_1_chr20:34235798-34236082 +>syndip_1_chr20:34235798-34236081 aaagaaagaaagaaagaaagaaagaaagaaagaaagaaagaaagaaggaaggaaggaagg aaggaaggaaaggaaggaaggaaggaaggaaggaaggaaggaaggaagcgggagggaggg aagaaggaaggaaggaggagggagggaagaatgaaggaaggaaggaaggaaggaaggaag gaaggaagcgggagggagggaagaaggaaggaaggaaggaaggagggagggagggaagaa ggaaggaggagggagggagggaagcaggaaagaaggaaggaaggagaaagagagaaagag agagagaagaagaaagagaaagaaacagaaaaggaaggaaggagagagagaaagaaagag -aaagag ->syndip_2_chr20:34235798-34236082 +aaaga +>syndip_2_chr20:34235798-34236081 aaagaaagaaagaaagaaagaaagaaagaaagaaagaaagaaagaaggaaggaaggaagg aaggaaggaaaggaaggaaggaaggaaggaaggaaggaaggaaggaagcgggagggaggg aagaaggaaggaaggaggagggagggaagaatgaaggaaggaaggaaggaaggaaggaag gaaggaagcgggagggagggaagaaggaaggaaggaaggaaggagggagggagggaagaa ggaaggaggagggagggagggaagcaggaaagaaggaaggaaggagaaagagagaaagag agagagaagaagaaagagaaagaaacagaaaaggaaggaaggagagagagaaagaaagag -aaagag ->p:HG002_1_chr20:34235798-34236082 +aaaga +>p:HG002_1_chr20:34235798-34236081 aaagaaagaaagaaagaaagaaagaaagaaagaaagaaagaaagaaggaaggaaggaagg aaggaaggaaaggaaggaaggaaggaaggaaggaaggaaggaaaggaa-ggaaggaagga aggaaggaaggaagcgggagggagggaagaaggaaggaaggaaggaaggagggaaggaag gaaggaagcgggagggagggaagaaggaaggaaggaaggaaggagggagggagggaagaa ggaaggaggagggagggagggaagaaggaaagaaggaaggaaggagaaagagagaaagag agagagaagaagaaagagaaagaaacagaaaaggaaggaaggagagagagaaagaaagag -aaagag ->p:HG002_2_chr20:34235798-34236082 +aaaga +>p:HG002_2_chr20:34235798-34236081 aaagaaagaaagaaagaaagaaagaaagaaagaaagaaagaaagaaggaaggaaggaagg aaggaaggaaaggaaggaaggaaggaaggaaggaaggaaggaaaggaa-ggaaggaagga aggaaggaaggaagcgggagggagggaagaaggaaggaaggaaggaaggagggaaggaag gaaggaagcgggagggagggaagaaggaaggaaggaaggaaggagggagggagggaagaa ggaaggaggagggagggagggaagaaggaaagaaggaaggaaggagaaagagagaaagag agagagaagaagaaagagaaagaaacagaaaaggaaggaaggagagagagaaagaaagag -aaagag ->ref_chr20:34235798-34236082 +aaaga +>ref_chr20:34235798-34236081 aaagaaagaaagaaagaaagaaagaaagaaagaaagaaagaaagaaggaaggaaggaagg aaggaaggaaaggaaggaaggaaggaaggaaggaaggaaggaaaggaa------------ -------------------------------------------ggaaggaaggaaggaag gaaggaagcgggagggagggaagaaggaaggaaggaaggaaggagggagggagggaaga- -------------------------aggaaagaaggaaggaaggagaaagagagaaagag agagagaagaagaaagagaaagaaacagaaaaggaaggaaggagagagagaaagaaagag -aaagag +aaaga diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_cd64f44baadcf644191d9a4521bc7d85.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_d68e9f67e20c4afd7f0f9e6fada0a063.msa similarity index 91% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_cd64f44baadcf644191d9a4521bc7d85.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_d68e9f67e20c4afd7f0f9e6fada0a063.msa index 189cda58..60e6926f 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_cd64f44baadcf644191d9a4521bc7d85.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_d68e9f67e20c4afd7f0f9e6fada0a063.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:55944172-55945276 +>syndip_1_chr20:55944172-55945275 atggcaccactgcattccagctgggcaacagagagagactctttcttaaatatatatatt ttatatatatatagatacacacacacacacacacacacacac------------------ ----------------atatatatatatatatatatatatatatatatatgcatgcatgc @@ -19,8 +19,8 @@ gatatctatctagatagtatctgtcgctatctagatatctatctagatagtatctgtcgc tatctagatatctatctagatagtatctgtcgctatctagatatctatctagatagtatc tgtcgctatctagatatctatctagatagtatctgtcgctatctagatatctatctagat agtatctgtcgctatctagatatctatctagatagtatctgtcgctatctagatatctat -ctagatagtatctgtcgctatctagatatctatctagatagta ->syndip_2_chr20:55944172-55945276 +ctagatagtatctgtcgctatctagatatctatctagatagt +>syndip_2_chr20:55944172-55945275 atggcaccactgcattccagctgggcaacagagagagactctttcttaaatatatatatt ttatatatatatagatacacacacacacacacacacacacacacacacacacacacacac acatatatatatatatatatatatatatatatatatatatatatatatatgcatgcatgc @@ -38,11 +38,11 @@ tagatagtatctatctagatatcggtctagat-----agtatctatctagatatctgtgt agatgtctagatatctatctagatat---tatctatctagatatctgtcgctatctagat atctatctagatatctgtcgctatctagatatctatctaga---tatctgtcgctatcta gatatctatctagatagtatctgtcgctatctagatatctatctagatagtatctgtcgc -tatctagatatctatctagatagta----------------------------------- +tatctagatatctatctagatagt------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ -------------------------------------------- ->p:HG002_1_chr20:55944172-55945276 +------------------------------------------ +>p:HG002_1_chr20:55944172-55945275 atggcaccactgcattccagctgggcaacagagagagactctttcttaaatatatatatt ttatatatatatagatacacacacacacacacacacacacacacacacacacacacacac acatatatatatatatatatatatatatatatatatatatatatatatatgcatgcatgc @@ -60,11 +60,11 @@ tagatagtatctatctagatatctgtgtagatgtctagatatctatctagatat-----t atctatctagatatctgtcgctatctagatatctatctagatatctgtcgctatctagat atctatctagatatctgtcgctatctagatatctatctaga---tatctgtcgctatcta gatatctatctagatagtatctgtcgctatctagatatctatctagatagtatctgtcgc -tatctagatatctatctagatagta----------------------------------- +tatctagatatctatctagatagt------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ -------------------------------------------- ->p:HG002_2_chr20:55944172-55945276 +------------------------------------------ +>p:HG002_2_chr20:55944172-55945275 atggcaccactgcattccagctgggcaacagagagagactctttcttaaatatatatatt ttatatatatatagatacacacacacacacacacacacacac------------------ ----------------atatatatatatatatatatatatatatatatatgcatgcatgc @@ -85,8 +85,8 @@ gatatctatctagatagtatctgtcgctatctagatatctatctagatagtatctgtcgc tatctagatatctatctagatagtatctgtcgctatctagatatctatctagatagtatc tgtcgctatctagatatctatctagatagtatctgtcgctatctagatatctatctagat agtatctgtcgctatctagatatctatctagatagtatctgtcgctatctagatatctat -ctagatagtatctgtcgctatctagatatctatctagatagta ->ref_chr20:55944172-55945276 +ctagatagtatctgtcgctatctagatatctatctagatagt +>ref_chr20:55944172-55945275 atggcaccactgcattccagctgggcaacagagagagactctttcttaaatatatatatt ttatatatatatagatacacacacacacacacacacacacac------------------ ----------------atatatatatatatatatatatatatatatatatgcatgcatgc @@ -106,5 +106,5 @@ atctatctagatatctgtcgctatctagatatctatctagatagtatctgtcgctatcta gatatctatctagatagtatctgtcgctatctagatatctatctagatagtatctgtcgc tatctagatatctatctagatagtatctgtcgctatctagatatctatctagatagtatc tgtcgctatctagatatctatctagatagtatctgtcgctatctagatatctatctagat -agta-------------------------------------------------------- -------------------------------------------- +agt--------------------------------------------------------- +------------------------------------------ diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_205f920d471a6a80b754572645188116.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_d758d8d416dd205a6f2a3a988afe6ca4.msa similarity index 91% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_205f920d471a6a80b754572645188116.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_d758d8d416dd205a6f2a3a988afe6ca4.msa index 38013a4c..e5a411fe 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_205f920d471a6a80b754572645188116.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_d758d8d416dd205a6f2a3a988afe6ca4.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:62830512-62830939 +>syndip_1_chr20:62830512-62830938 cccagggccagaagggcagcatgggagaccccggccttccaggcccccagggcctccgag gtgacgtgggcgaccgggtaagtggccctctcagcaggaagctcccctgcaccccctcta cccatgtaccacagtcccccaccccccaccgtgctccacc-------------------- @@ -10,8 +10,8 @@ cccccatgacagtcccccaacccccaccacagtcccccaacccccaccacagtccccacc ccctaccacagtcccccaacccccaccacagtcccccaccccctaccacaatccccca-c ccccaccacagtcccctgggacgcagacagggagaggcccttgcagctcccagtgggaaa tctggccatgggcagtgtctccctgcgtggcggaggcagtggcatcagggccccgactgt -ggcgcctttggcccctctgaccttcca ->syndip_2_chr20:62830512-62830939 +ggcgcctttggcccctctgaccttcc +>syndip_2_chr20:62830512-62830938 cccagggccagaagggcagcatgggagaccccggccttccaggcccccagggcctccgag gtgacgtgggcgaccgggtaagtggccctctcagcaggaagctcccctgcaccccctcta cccatgtaccacagtcccccaccccccaccacagtcccccaccccccaccccagtcccca @@ -23,8 +23,8 @@ cccccaccacagtcccccaacccctaccacagtcccccaacccccaccacagtccccacc ccctaccacagtcccccaacccccaccacagtcccccaccccctaccacaatcccccacc ccccaccacagtcccctgggacgcagacagggagaggcccttgcagctcccagtgggaaa tctggccatgggcagtgtctccctgcgtggcggaggcagtggcatcagggccccgactgt -ggcgcctttggcccctctgaccttcca ->p:HG002_1_chr20:62830512-62830939 +ggcgcctttggcccctctgaccttcc +>p:HG002_1_chr20:62830512-62830938 cccagggccagaagggcagcatgggagaccccggccttccaggcccccagggcctccgag gtgacgtgggcgaccgggtaagtggccctctcagcaggaagctcccctgcaccccctcta cccatgtaccacagtcccccaccccccaccacagtcccccaccccccaccccagtcccca @@ -36,8 +36,8 @@ cccccaccacagtcccccaacccctaccacagtcccccaacccccaccacagtccccacc ccctaccacagtcccccaacccccaccacagtcccccaccccctaccacaatcccccacc ccccaccacagtcccctgggacgcagacagggagaggcccttgcagctcccagtgggaaa tctggccatgggcagtgtctccctgcgtggcggaggcagtggcatcagggccccgactgt -ggcgcctttggcccctctgaccttcca ->p:HG002_2_chr20:62830512-62830939 +ggcgcctttggcccctctgaccttcc +>p:HG002_2_chr20:62830512-62830938 cccagggccagaagggcagcatgggagaccccggccttccaggcccccagggcctccgag gtgacgtgggcgaccgggtaagtggccctctcagcaggaagctcccctgcaccccctcta cccatgtaccacagtcccccaccccccaccgtgctccacc-------------------- @@ -49,8 +49,8 @@ cccccatgacagtcccccaacccccaccacagtcccccaacccccaccacagtccccacc ccctaccacagtcccccaacccccaccacagtcccccaccccctaccacaatccccca-c ccccaccacagtcccctgggacgcagacagggagaggcccttgcagctcccagtgggaaa tctggccatgggcagtgtctccctgcgtggcggaggcagtggcatcagggccccgactgt -ggcgcctttggcccctctgaccttcca ->ref_chr20:62830512-62830939 +ggcgcctttggcccctctgaccttcc +>ref_chr20:62830512-62830938 cccagggccagaagggcagcatgggagaccccggccttccaggcccccagggcctccgag gtgacgtgggcgaccgggtaagtggccctctcagcaggaagctcccctgcaccccctcta cccatgtaccacagtccccc---------------------------------------- @@ -62,4 +62,4 @@ cccccaccacagtcccccaacccctaccacagtcccccaacccccaccacagtccccacc ccctaccacagtcccccaacccccaccacagtcccccaccccctaccacaatcccccacc ccccaccacagtcccctgggacgcagacagggagaggcccttgcagctcccagtgggaaa tctggccatgggcagtgtctccctgcgtggcggaggcagtggcatcagggccccgactgt -ggcgcctttggcccctctgaccttcca +ggcgcctttggcccctctgaccttcc diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_ca43b50e2a5d770bb34202d8a7b62421.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_d7c982150b9084cabc20a2d51cc2fab7.msa similarity index 95% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_ca43b50e2a5d770bb34202d8a7b62421.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_d7c982150b9084cabc20a2d51cc2fab7.msa index 79f62fef..58ba55a3 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_ca43b50e2a5d770bb34202d8a7b62421.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_d7c982150b9084cabc20a2d51cc2fab7.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:20356430-20357911 +>syndip_1_chr20:20356430-20357910 ctgaggggaggtggtcacagtgtgaggggaggtggtcacactgaggggaggtggtcacag tgtgaggggagatggtcacactgaggggaggtggtcacagtgtgaggggaggtggtcaca ctgaggggaggtggtcatagtgtgaggggaggtggtcacactggggaggtggtcacactg @@ -31,8 +31,8 @@ gaggggaggtggtcatactgtga--ggggaagtggtcacactgaggggaggtggtcatag tgtgaggggaggtggtcacac--tgaggggaggtggtcacactgaggggaggtggtcaca gtgtgaggggaggtggtcacactgaggggaggtggtcacagtgtgcggggaggtggtcat actgaggggaggtggtcacagtgtgaggggaggtggtcaca------------------- ---ctgaggggaggtggtcacactgaggggaggtggtcacagtgtgaggggaggtggtcac ------------------------------------------------------------a +--ctgaggggaggtggtcacactgaggggaggtggtcacagtgtgaggggaggtggtca- +----------------------------------------------------------ca ctgaggggaggtggtcacactgtgaggggaggtggtcacactgaggagaggtggtcatag tgtgaggggaggtggtcacactgaggggaggtggtcaca--ctgaggagaggtggtcata gtgtgaggggaggtggtcacac--tgaggggaggtggtcacactgaggggaggtggtcat @@ -45,8 +45,8 @@ ttgaggagaggtggtcatagtgtgaggggaggtggtcacactgaggggaggtggtcacac tgaggggaggtggtcatagtgtgaggggaggtggtcacactgaggagaggtggtcatagt gtgcggggaggtggtcacactgaggggaggtggtcacactgaggggaggtggtcacagtg tgaggggaggtggtcacagtgtgaggggaggtggtcacactgaggggaggtggtcatagt -gtgaggggaggtggtcac ->syndip_2_chr20:20356430-20357911 +gtgaggggaggtggtca +>syndip_2_chr20:20356430-20357910 ctgaggggaggtggtcacagtgtgaggggaggtggtcacactgaggggaggtggtcacag tgtgaggggagatggtcacactgaggggaggtggtcacagtg------------------ -tgaggggaggtggtcatagtgtgaggggaggtggtcacactggggaggtggtcacactg @@ -79,8 +79,8 @@ gaggggaggtggtcatactgtgagtgaggaggtagtcacactgaggggagggggtcatac tgcgagtggaggtagtcacactgtgaagggtggtagtcacactgagaggaggtggtcaca --ctgaggggaagtggtcacactgaggggaggtggtcatactgtgaggggaagtggtcac actgaggggaggtggtcatagtgtgaggggaggtggtcacactgaggggaggtggtcaca -gtgtgaggggaggtggtcacactgaggggaggtggtcacagtgtgcggggaggtggtcat ------------------------------------------------------------a +gtgtgaggggaggtggtcacactgaggggaggtggtcacagtgtgcggggaggtggtca- +----------------------------------------------------------ta ctgaggggaggtggtcacagtgtgaggggaggtggtcacactgaggggaggtggtcacag tgtgcggggaggtggtcacactgaggggaggtggtcatagtgtgaggggaggtggtcata --ctgaggggaggtggtcacactgtgaggggaggtggtcacactgaggagaggtggtcac @@ -93,8 +93,8 @@ ttgaggagaggtggtcatagtgtgaggggaggtggtcacactgaggggaggtggtcacac tgaggggaggtggtcatagtgtgaggggaggtggtcacactgaggagaggtggtcatagt gtgaggggaggtggtcacactgaggggaggtggtcacactgaggggaggtggtcacagtg tgaggggaggtggtcacagtgtgaggggaggtggtcacactgaggggaggtggtcatagt -gtgaggggaggtggtcac ->p:HG002_1_chr20:20356430-20357911 +gtgaggggaggtggtca +>p:HG002_1_chr20:20356430-20357910 ctgaggggaggtggtcacagtgtgaggggaggtggtcacactgaggggaggtggtcacag tgtgaggggagatggtcacactgaggggaggtggtcacagtg------------------ -tgaggggaggtggtcatagtgtgaggggaggtggtcacactggggaggtggtcacactg @@ -127,8 +127,8 @@ gaggggaggtggtcatactgtgagtgaggaggtagtcacactgaggggagggggtcatac tgcgagtggaggtagtcacactgtgaagggtggtagtcacactgagaggaggtggtcaca --ctgaggggaagtggtcacactgaggggaggtggtcatactgtgaggggaagtggtcac actgaggggaggtggtcatagtgtgaggggaggtggtcacactgaggggaggtggtcaca -gtgtgaggggaggtggtcacactgaggggaggtggtcacagtgtgcggggaggtggtcat ------------------------------------------------------------a +gtgtgaggggaggtggtcacactgaggggaggtggtcacagtgtgcggggaggtggtca- +----------------------------------------------------------ta ctgaggggaggtggtcacagtgtgaggggaggtggtcacactgaggggaggtggtcacag tgtgcggggaggtggtcacactgaggggaggtggtcatagtgtgaggggaggtggtcata --ctgaggggaggtggtcacactgtgaggggaggtggtcacactgaggagaggtggtcac @@ -141,8 +141,8 @@ ttgaggagaggtggtcatagtgtgaggggaggtggtcacactgaggggaggtggtcacac tgaggggaggtggtcatagtgtgaggggaggtggtcacactgaggagaggtggtcatagt gtgaggggaggtggtcacactgaggggaggtggtcacactgaggggaggtggtcacagtg tgaggggaggtggtcacagtgtgaggggaggtggtcacactgaggggaggtggtcatagt -gtgaggggaggtggtcac ->p:HG002_2_chr20:20356430-20357911 +gtgaggggaggtggtca +>p:HG002_2_chr20:20356430-20357910 ctgaggggaggtggtcacagtgtgaggggaggtggtcacactgaggggaggtggtcacag tgtgaggggagatggtcacactgaggggaggtggtcacagtgtgaggggaggtggtcaca ctgaggggaggtggtcatagtgtgaggggaggtggtcacactggggaggtggtcacactg @@ -189,8 +189,8 @@ ttgaggagaggtggtcatagtgtgaggggaggtggtcacactgaggggaggtggtcacac tgaggggaggtggtcatagtgtgaggggaggtggtcacactgaggagaggtggtcatagt gtgcggggaggtggtcacactgcggggaggtggtcatactgaggggaggtggtcacagtg tgaggggaggtggtcacagtgtgaggggaggtggtcacactgaggggaggtggtcatagt -gtgaggggaggtggtcac ->ref_chr20:20356430-20357911 +gtgaggggaggtggtca +>ref_chr20:20356430-20357910 ctgaggggaggtggtcacagtgtgaggggaggtggtcacactgaggggaggtggtcacag tgtgaggggagatggtcacactgaggggaggtggtcacagtgtgaggggaggtggtcaca ctgaggggaggtggtcatagtgtgaggggaggtggtcacactggggaggtggtcacactg @@ -223,7 +223,7 @@ gaggggaggtggtcatactgtga--ggggaagtggtcacactgaggggaggtggtcatag tg---------------------tgaggggaggtggtcacactgaggggaggtggtcaca gtgtgaggggaggtggtcacactgaggggaggtggtcacagtgtgcggggaggtggtcat actgaggggaggtggtcacagtgtgaggggaggtggtcaca------------------- -gtgtgaggggaggtggtcacactgaggggaggtggtcatagtgtgaggggaggtggtcac +gtgtgaggggaggtggtcacactgaggggaggtggtcatagtgtgaggggaggtggtca- ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ @@ -237,4 +237,4 @@ gtgtgaggggaggtggtcacactgaggggaggtggtcatagtgtgaggggaggtggtcac ------------------------------------------------------------ ------------------------------------------------------------ ------------------------------------------------------------ ------------------- +----------------- diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_d82a691e489dc8fab00bca3d0d072f7e.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_d82a691e489dc8fab00bca3d0d072f7e.msa deleted file mode 100644 index e85bad4b..00000000 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_d82a691e489dc8fab00bca3d0d072f7e.msa +++ /dev/null @@ -1,65 +0,0 @@ ->syndip_1_chr20:5041841-5042369 -aaccctttttcaagagtctctccccagctgctgctagcgggattttttttttaactctct -atcctatttaacctcctcactgatgaccttttaatgcgtaac------------------ -----------ttttttttttttttttttttttgagacggagtctcgctctcccaggctgg -agtgcagtggtgtgatctcggctcactgcaacctgtgcctcccaggttcaagcgatctcc -tgcctcagcctccccagtacctaggattacaggtgcgcgcctccagcatagctaatgttt -gtatttttagtagagacggggtttcaccatgttggccaggctggttatgcataacttttt -atcacagatgtttgatctcagttttaatcttttcaacatttgtaaacccttggttcattg -tgcttctccctttattactc------------ttttttttttttttttgagacggagttt -cgctcttgttgcccaggctggagtgcagtggcgcactctcggttcactgcaatctccacc -tcataggttcaagcgat------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------ ->syndip_2_chr20:5041841-5042369 -aaccctttttcaagagtctctccccagctgctgctagcgggattttttttttaactctct -atcctatttaacctcctcactgatgaccttttaatgcgtaactttttttttttttttttt -ttttttttttttttttttttttttttttttttgagacggagtctcgctctcccaggctgg -agtgcagtggtgtgatctcggctcactgcaacctgtgcctcccaggttcaagcgatctcc -tgcctcagcctccccagtacctaggattacaggtgcgcgcctccagcatagctaatgttt -gtatttttagtagagacggggtttcaccatgttggccaggctggttatgcataacttttt -atcacagatgtttgatctcagttttaatcttttcaacatttgtaaacccttggttcattg -tgcttctccctttattactcttttttttttttttttttttttttttttgagacggagttt -cgctcttgttgcccaggctggagtgcagtggcgcactctcggttcactgcaatctccacc -tcataggttcaagcgat------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------ ->p:HG002_1_chr20:5041841-5042369 -aaccctttttcaagagtctctccccagctgctgctagcgggattttttttttaactctct -atcctatttaacctcctcactgatgaccttttaatgcgtaactttttttttttttttttt -ttttttttttttttttttttttttttttttttgagacggagtctcgctctcccaggctgg -agtgcagtggtgtgatctcggctcactgcaacctgtgcctcccaggttcaagcgatctcc -tgcctcagcctccccagtacctaggattacaggtgcgcgcctccagcatagctaatgttt -gtatttttagtagagacggggtttcaccatgttggccaggctggttatgcataacttttt -atcacagatgtttgatctcagttttaatcttttcaacatttgtaaacccttggttcattg -tgcttctccctttattactcttttttttttttttttttttttttttttgagacggagttt -cgctcttgttgcccaggctggagtgcagtggcgcactctcggttcactgcaatctccacc -tcataggttcaagcgat------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------ ->p:HG002_2_chr20:5041841-5042369 -aaccctttttcaagagtctctccccagctgctgctagcgggattttttttttaactctct -atcctatttaacctcctcactgatgaccttttaatgcgtaac------------------ -----------ttttttttttttttttttttttgagacggagtctcgctctcccaggctgg -agtgcagtggtgtgatctcggctcactgcaacctgtgcctcccaggttcaagcgatctcc -tgcctcagcctccccagtacctaggattacaggtgcgcgcctccagcatagctaatgttt -gtatttttagtagagacggggtttcaccatgttggccaggctggttatgcataacttttt -atcacagatgtttgatctcagttttaatcttttcaacatttgtaaacccttggttcattg -tgcttctccctttattactc------------ttttttttttttttttgagacggagttt -cgctcttgttgcccaggctggagtgcagtggagcactctcggttcactgcaacctccacc -tcataggttcaagcgattctcctgcctcagcctcctgagtagctgggattacaggtgccc -accaccatgcccttttttttttttgagacggagtttcgctcttgttgcccaggctggagt -gcagtggcgcactctcggttcactgcaatctccacctcataggttcaagcgat ->ref_chr20:5041841-5042369 -aaccctttttcaagagtctctccccagctgctgctagcgggattttttttttaactctct -atcctatttaacctcctcactgatgaccttttaatgcgtaac------------------ -----------ttttttttttttttttttttttgagacggagtctcgctctcccaggctgg -agtgcagtggtgtgatctcggctcactgcaacctgtgcctcccaggttcaagcgatctcc -tgcctcagcctccccagtacctaggattacaggtgcgcgcctccagcatagctaatgttt -gtatttttagtagagacggggtttcaccatgttggccaggctggttatgcataacttttt -atcacagatgtttgatctcagttttaatcttttcaacatttgtaaacccttggttcattg -tgcttctccctttattactcttttttttttttttttttttttttttttgagacggagttt -cgctcttgttgcccaggctggagtgcagtggcgcactctcggttcactgcaatctccacc -tcataggttcaagcgat------------------------------------------- ------------------------------------------------------------- ------------------------------------------------------ diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_146844a1e2baa87f3462a854600507de.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_d83685774be2675f1a182646478490ed.msa similarity index 85% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_146844a1e2baa87f3462a854600507de.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_d83685774be2675f1a182646478490ed.msa index b7aa8658..73601803 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_146844a1e2baa87f3462a854600507de.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_d83685774be2675f1a182646478490ed.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:21120112-21120640 +>syndip_1_chr20:21120112-21120639 acagactgagactttgtcttaaaatcaaattaatttaatttaaaaaacaaatgcctttga gcaaagtgctgttagtaacctcatgtcttcctgtattcatcagggttcttcagagaaaca gaaccagtaggacacacacacacgcacacacacacacgtgtgtgtatatatatgtgtgta @@ -7,8 +7,8 @@ tgtatatatatatata-------------------------------------------- --------------------tatatttatatatatatatatatt----tatatatatata tatttatatatatatatatatatatggagagagagagatttatttttaggaagtggttca tgagattgtgagggacttcattcttttctcctaaggccttcaactgactggatgaggccc -atccacattttggagagcagtctactttcctcaaagcctactgcttcaa ->syndip_2_chr20:21120112-21120640 +atccacattttggagagcagtctactttcctcaaagcctactgcttca +>syndip_2_chr20:21120112-21120639 acagactgagactttgtcttaaaatcaaattaatttaatttaaaaaacaaatgcctttga gcaaagtgctgttagtaacctcatgtcttcctgtattcatcagggttcttcagagaaaca gaaccagtaggacacacacacacgcacacacacacacgtgtgtgtatatatatgtgtgta @@ -17,8 +17,8 @@ tgtatatatatatata-------------------------------------------- --------------------tatatttatatatatatatatatttatatatatatatata tatttatatatatatatatatatatggagagagagagatttatttttaggaagtggttca tgagattgtgagggacttcattcttttctcctaaggccttcaactgactggatgaggccc -atccacattttggagagcagtctactttcctcaaagcctactgcttcaa ->p:HG002_1_chr20:21120112-21120640 +atccacattttggagagcagtctactttcctcaaagcctactgcttca +>p:HG002_1_chr20:21120112-21120639 acagactgagactttgtcttaaaatcaaattaatttaatttaaaaaacaaatgcctttga gcaaagtgctgttagtaacctcatgtcttcctgtattcatcagggttcttcagagaaaca gaaccagtaggacacacacacacgcacacacacacacgtgtgtgtatatatatgtgtgta @@ -27,8 +27,8 @@ tgtatatatatatata-------------------------------------------- --------------------tatatttatatatatatatatatttatatatatatatata tatttatatatatatatatatatatggagagagagagatttatttttaggaagtggttca tgagattgtgagggacttcattcttttctcctaaggccttcaactgactggatgaggccc -atccacattttggagagcagtctactttcctcaaagcctactgcttcaa ->p:HG002_2_chr20:21120112-21120640 +atccacattttggagagcagtctactttcctcaaagcctactgcttca +>p:HG002_2_chr20:21120112-21120639 acagactgagactttgtcttaaaatcaaattaatttaatttaaaaaacaaatgcctttga gcaaagtgctgttagtaacctcatgtcttcctgtattcatcagggttcttcagagaaaca gaaccagtaggacacacacacacgcacacacacacacgtgtgtgtatatatatgtgtgta @@ -37,8 +37,8 @@ tgtatatatatatata-------------------------------------------- --------------------tatatttatatatatatatatatt----tatatatatata tatttatatatatatatatatatatggagagagagagatttatttttaggaagtggttca tgagattgtgagggacttcattcttttctcctaaggccttcaactgactggatgaggccc -atccacattttggagagcagtctactttcctcaaagcctactgcttcaa ->ref_chr20:21120112-21120640 +atccacattttggagagcagtctactttcctcaaagcctactgcttca +>ref_chr20:21120112-21120639 acagactgagactttgtcttaaaatcaaattaatttaatttaaaaaacaaatgcctttga gcaaagtgctgttagtaacctcatgtcttcctgtattcatcagggttcttcagagaaaca gaaccagtaggacacacacacacgcacacacacacacgtgtgtgtatatatatgtgtgta @@ -47,4 +47,4 @@ tatttatatatatatatatttatatatatatatttatatatatatatttatatatatata tatttatatatatatatatttatatatatatatttatatatatatatatttatatatata tatttatatatatatatatttatatggagagagagagatttatttttaggaagtggttca tgagattgtgagggacttcattcttttctcctaaggccttcaactgactggatgaggccc -atccacattttggagagcagtctactttcctcaaagcctactgcttcaa +atccacattttggagagcagtctactttcctcaaagcctactgcttca diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_198bdf87b60e4ea96dbdacfeb79a01dc.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_da9e583f8bb0c9329678fe2798c29f69.msa similarity index 97% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_198bdf87b60e4ea96dbdacfeb79a01dc.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_da9e583f8bb0c9329678fe2798c29f69.msa index d10b6f5d..518d1963 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_198bdf87b60e4ea96dbdacfeb79a01dc.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_da9e583f8bb0c9329678fe2798c29f69.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:63028776-63029503 +>syndip_1_chr20:63028776-63029502 caacaatgcacaagtggcttgagtagaagagctgctcctgctgggaggcgcaggaggctg agcgaggcccaccctgcaggggcgaggccacggtttgtgttatttcccatgatgactcca aacgcacccgagtcccccgggcctccgcgcctccgtgtgcaggtcccccgggcctccgcg @@ -23,8 +23,7 @@ gcgcctccgtgtgcaggtcccccgggcctccgcgcctccgtgtgcaggtcccccgggcct ccgcgcctccgtgtgcaggtcccccgggcctctgcgtctctgtgtgcaggctcaagtttg ccaacgtccatgcacgtctcagcctctcagcctggactggacaactgggcttcgggaatt catttaaattctacccgctacacgccttccctggattcagggcggcgtccagtgcattca -t ->syndip_2_chr20:63028776-63029503 +>syndip_2_chr20:63028776-63029502 caacaatgcacaagtggcttgagtagaagagctgctcctgctgggaggcgcaggaggctg agcgaggcccaccctgcaggggcgaggccacggtttgtgttatttcccatgatgactcca aa---------------------------------------------------------- @@ -49,8 +48,7 @@ gcgtctgtgtgtgcaggtcccccgggcctccgcgcctccgtgtgcaggtcccccgggcct ccgcgtctccgtgtgcaggtcccccgggcccctgcgtctctgtgtgcaggctcaagtttg ccaacgtccatgcacgtctcagcctctcagcctggactggacaactgggcttcgggaatt catttaaattctacccgctacacgccttccctggattcagggcggcgtccagtgcattca -t ->p:HG002_1_chr20:63028776-63029503 +>p:HG002_1_chr20:63028776-63029502 caacaatgcacaagtggcttgagtagaagagctgctcctgctgggaggcgcaggaggctg agcgaggcccaccctgcaggggcgaggccacggtttgtgttatttcccatgatgactcca aa---------------------------------------------------------- @@ -75,8 +73,7 @@ gcgtctgtgtgtgcaggtcccccgggcctccgcgcctccgtgtgcaggtcccccgggcct ccgcgtctccgtgtgcaggtcccccgggcccctgcgtctctgtgtgcaggctcaagtttg ccaacgtccatgcacgtctcagcctctcagcctggactggacaactgggcttcgggaatt catttaaattctacccgctacacgccttccctggattcagggcggcgtccagtgcattca -t ->p:HG002_2_chr20:63028776-63029503 +>p:HG002_2_chr20:63028776-63029502 caacaatgcacaagtggcttgagtagaagagctgctcctgctgggaggcgcaggaggctg agcgaggcccaccctgcaggggcgaggccacggtttgtgttatttcccatgatgactcca aacgcacccgagtcccccgggcctccgcgcctccgtgtgcaggtcccccgggcctccgcg @@ -101,8 +98,7 @@ gcgcctccgtgtgcaggtcccccgggcctccgcgcctccgtgtgcaggtcccccgggcct ccgcgcctccgtgtgcaggtcccccgggcctctgcgtctctgtgtgcaggctcaagtttg ccaacgtccatgcacgtctcagcctctcagcctggactggacaactcggcttcgggaatt catttaaattctacccgctacacgccttccctggattcagggcggcgtccagtgaattca -t ->ref_chr20:63028776-63029503 +>ref_chr20:63028776-63029502 caacaatgcacaagtggcttgagtagaagagctgctcctgctgggaggcgcaggaggctg agcgaggcccaccctgcaggggcgaggccacggtttgtgttatttcccatgatgactcca aa---------------------------------------------------------- @@ -127,4 +123,3 @@ gcgcctccgtgtgcaggtcccccgggcctccgcgcctccgtgtgcaggtcccccgggcct ccgcgtctccgtgtgcaggtcccccgggcctctgcgtctctgtgtgcaggctcaagtttg ccaacgtccatgcacgtctcagcctctcagcctggactggacaactgggcttcgggaatt catttaaattctacccgctacacgccttccctggattcagggcggcgtccagtgcattca -t diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_37a09937c45cf5969ce12d15082abfbb.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_ddb507ec4de0d55fd44561115d512b4c.msa similarity index 87% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_37a09937c45cf5969ce12d15082abfbb.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_ddb507ec4de0d55fd44561115d512b4c.msa index 96858d9d..d1f1f4b4 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_37a09937c45cf5969ce12d15082abfbb.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_ddb507ec4de0d55fd44561115d512b4c.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:49834009-49834659 +>syndip_1_chr20:49834009-49834658 gaggctgagacaggcggatcacctgaggttgggagttcaagaccagcttggccaacatgg tgaaactctgtctctgctaaaaatacaaaaattagccgggcgtggtggtgcacgcttgta atattagcttgtctctctctctctctctctctctctctctctctctctctctctc----- @@ -9,8 +9,8 @@ acagtgtgtatatatatatatactgtgtgtatatatatatatactgtata---------- ---------------------------------------tgtatatatatatatatactg tatatatacacacacacactatatatatgccatatataatatctggtggccagttgtgtg aaatacattcattttctctgttgggtagattgatttgacctaactatgagcctcaatctc -aagaggtaaagtggtagtttgccaatttaatgctgcacaaaggattatttaag ->syndip_2_chr20:49834009-49834659 +aagaggtaaagtggtagtttgccaatttaatgctgcacaaaggattatttaa +>syndip_2_chr20:49834009-49834658 gaggctgagacaggcggatcacctgaggttgggagttcaagaccagcttggccaacatgg tgaaactctgtctctgctaaaaatacaaaaattagccgggcgtggtggtgcacgcttgta atattagcttgtctctctctctctctctctctctctctctctctctctctctctctctct @@ -21,8 +21,8 @@ gtgtatatatatatatactgtatatatatatatactgtgtatatatatatactgtatata tatactgtatatatatatatatactgtatatatatatactgtgtatatatatatatactg tatatatacacacacacactatatatatgccatatataatatctggtggccagttgtgtg aaatacattcattttctctgttgggtagattgatttgacctaactatgagcctcaatctc -aagaggtaaagtggtagtttgccaatttaatgctgcacaaaggattatttaag ->p:HG002_1_chr20:49834009-49834659 +aagaggtaaagtggtagtttgccaatttaatgctgcacaaaggattatttaa +>p:HG002_1_chr20:49834009-49834658 gaggctgagacaggcggatcacctgaggttgggagttcaagaccagcttggccaacatgg tgaaactctgtctctgctaaaaatacaaaaattagccgggcgtggtggtgcacgcttgta atattagcttgtctctctctctctctctctctctctctctctctctctctctctctctct @@ -33,8 +33,8 @@ gtgtatatatatatatactgtatatatatatatactgtgtatatatatatactgtatata tatactgtatatatatatatatactgtatatatatatactgtgtatatatatatatactg tatatatacacacacacactatatatatgccatatataatatctggtggccagttgtgtg aaatacattcattttctctgttgggtagattgatttgacctaactatgagcctcaatctc -aagaggtaaagtggtagtttgccaatttaatgctgcacaaaggattatttaag ->p:HG002_2_chr20:49834009-49834659 +aagaggtaaagtggtagtttgccaatttaatgctgcacaaaggattatttaa +>p:HG002_2_chr20:49834009-49834658 gaggctgagacaggcggatcacctgaggttgggagttcaagaccagcttggccaacatgg tgaaactctgtctctgctaaaaatacaaaaattagccgggcgtggtggtgcacgcttgta atattagcttgtctctctctctctctctctctctctctctctctctctctctctc----- @@ -45,8 +45,8 @@ acagtgtgtatatatatatatactgtgtgtatatatatatatactgtata---------- ---------------------------------------tgtatatatatatatatactg tatatatacacacacacactatatatatgccatatataatatctggtggccagttgtgtg aaatacattcattttctctgttgggtagattgatttgacctaactttgagcctcaatctc -aagaggtaaagtggtagtttgccaatttaatgctgcacaaaggattatttaag ->ref_chr20:49834009-49834659 +aagaggtaaagtggtagtttgccaatttaatgctgcacaaaggattatttaa +>ref_chr20:49834009-49834658 gaggctgagacaggcggatcacctgaggttgggagttcaagaccagcttggccaacatgg tgaaactctgtctctgctaaaaatacaaaaattagccgggcgtggtggtgcacgcttgta atattagcttg--tctctctctctctctctctctctctctctctctctctctctctctct @@ -57,4 +57,4 @@ gtgtatatatatatatactgtgtatatatatatactgtgtatatatatatactgtatata tatactgtatatatatatatatactgtatatatatatactgtgtatatatatatatactg tatatatacacacacacactatatatatgccatatataatatctggtggccagttgtgtg aaatacattcattttctctgttgggtagattgatttgacctaactatgagcctcaatctc -aagaggtaaagtggtagtttgccaatttaatgctgcacaaaggattatttaag +aagaggtaaagtggtagtttgccaatttaatgctgcacaaaggattatttaa diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_047c63d2654e7637150b4279117c89de.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_ddb6122dabcdfbfdfd1b60aa0d3cffd6.msa similarity index 91% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_047c63d2654e7637150b4279117c89de.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_ddb6122dabcdfbfdfd1b60aa0d3cffd6.msa index 0cb1fc7f..818a0ba0 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_047c63d2654e7637150b4279117c89de.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_ddb6122dabcdfbfdfd1b60aa0d3cffd6.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:62360090-62360813 +>syndip_1_chr20:62360090-62360812 aaggacatttccccaccccactccagggagtccctgcccccaccctgaccaaggctggca gccccctggcctaggccgagacaggggccttggtagatacatccacgtggtcaggaaagt gggtgagaaggaaggaagacggtgcaggtgtggagggaggaatgagcagacaggtggaca @@ -14,8 +14,8 @@ gtgggtgggtgggtagaaggataggtaggtgggtgggtgggtggatggataggtgggtgg gtgggtggaggaata--------gatgggtgggtggatgggtgggtggagggatagatgg gtgggtgggtgggtggagggatagatgggtgggtgggtggagggaccacttcccaggtgg agtggaaagggttcctcttccctccgtatcattttacagttggtgaaactgaggctgtga -gaggcaagacgctcacatggcggaacctgatttctggcccagaacaatctaaaagc ->syndip_2_chr20:62360090-62360813 +gaggcaagacgctcacatggcggaacctgatttctggcccagaacaatctaaaag +>syndip_2_chr20:62360090-62360812 aaggacatttccccaccccactccagggagtccctgcccccaccctgaccaaggctggca gccccctggcctaggccgagacaggggccttggtagatacatccacgtggtcaggaaagt gggtgagaaggaaggaagacggtgcaggtgtggagggaggaatgagcagacaggtggaca @@ -31,8 +31,8 @@ ggtg-------------------------------------------------------- gtgggtggaggaata--------gatgggtgggtggatgggtgggtggagggatagatgg gtgggtgggtgggtggagggatagatgggtgggtgggtggagggaccacttcccaggtgg agtggaaagggttcctcttccctccgtatcattttacagttggtgaaactgaggctgtga -gaggcaagacgctcacatggcggaacctgatttctggcccagaacaatctaaaagc ->p:HG002_1_chr20:62360090-62360813 +gaggcaagacgctcacatggcggaacctgatttctggcccagaacaatctaaaag +>p:HG002_1_chr20:62360090-62360812 aaggacatttccccaccccactccagggagtccctgcccccaccctgaccaaggctggca gccccctggcctaggccgagacaggggccttggtagatacatccacgtggtcaggaaagt gggtgagaaggaaggaagacggtgcaggtgtggagggaggaatgagcagacaggtggaca @@ -48,8 +48,8 @@ ggtg-------------------------------------------------------- gtgggtggaggaata--------gatgggtgggtggatgggtgggtggagggatagatgg gtgggtgggtgggtggagggatagatgggtgggtgggtggagggaccacttcccaggtgg agtggaaagggttcctcttccctccgtatcattttacagttggtgaaactgaggctgtga -gaggcaagacgctcacatggcggaacctgatttctggcccagaacaatctaaaagc ->p:HG002_2_chr20:62360090-62360813 +gaggcaagacgctcacatggcggaacctgatttctggcccagaacaatctaaaag +>p:HG002_2_chr20:62360090-62360812 aaggacatttccccaccccactccagggagtccctgcccccaccctgaccaaggctggca gccccctggcctaggccgagacaggggccttggtagatacatccacgtggtcaggaaagt gggtgagaaggaaggaagacggtgcaggtgtggagggaggaatgagcagacaggtggaca @@ -65,8 +65,8 @@ gtgggtgggtgggtagaaggataggtaggtgggtgggtgggtggatggataggtgggtgg gtgggtggaggaata--------gatgggtgggtggatgggtgggtggagggatagatgg gtgggtgggtgggtggagggatagatgggtgggtgggtggagggaccacttcccaggtgg agtggaaagggttcctcttccctccgtatcattttacagttggtgaaactgaggctgtga -gaggcaagacgctcacatggcggaacctgatttctggcccagaacaatctaaaagc ->ref_chr20:62360090-62360813 +gaggcaagacgctcacatggcggaacctgatttctggcccagaacaatctaaaag +>ref_chr20:62360090-62360812 aaggacatttccccaccccactccagggagtccctgcccccaccctgaccaaggctggca gccccctggcctaggccgagacaggggccttggtagatacatccacgtggtcaggaaagt gggtgagaaggaaggaagacggtgcaggtgtggagggaggaatgagcagacaggtggaca @@ -82,4 +82,4 @@ ggt--------------------------------------------------------- gtgggtagaaggataggtaggtgggtgggtgggtggatgggtgggtggagggatagatgg gtgggtgggtgggtggagggatagatgggtgggtgggtggagggaccacttcccaggtgg agtggaaagggttcctcttccctccgtatcattttacagttggtgaaactgaggctgtga -gaggcaagacgctcacatggcggaacctgatttctggcccagaacaatctaaaagc +gaggcaagacgctcacatggcggaacctgatttctggcccagaacaatctaaaag diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_6f2f5299f80273895addda455b7332af.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_df55c8c1ea86802a42a7e9b21d75a6f1.msa similarity index 88% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_6f2f5299f80273895addda455b7332af.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_df55c8c1ea86802a42a7e9b21d75a6f1.msa index e922cf47..fd3568ef 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_6f2f5299f80273895addda455b7332af.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_df55c8c1ea86802a42a7e9b21d75a6f1.msa @@ -1,40 +1,40 @@ ->syndip_1_chr20:23560839-23561199 +>syndip_1_chr20:23560839-23561198 gtattctgaatgtcagtatgtacttagtttcactagggagatttttattccctaatgttc ttcagcattcctttctctttctttctttctttctttctttctttccttccttccttcctt ccttccttccttccttccttccttccttccttc--------------------------- ------------------------------------------------------------ --------------------------------ctttctttctttctttctttctttcttt ctttctttctttctttctttcttctttctttctttttcttttacagagtcttactcttgt -tgcccaggctgta ->syndip_2_chr20:23560839-23561199 +tgcccaggctgt +>syndip_2_chr20:23560839-23561198 gtattctgaatgtcagtatgtacttagtttcactagggagatttttattccctaatgttc ttcagcattcctttctctttctttctttctttctttctttctttctttctttctttcttt ccttctttccttccttccttccttccttccttccttccttccttccttccttccttcctt ccttctttccttctttccttctttccttctttccttctttccttctttccttctttcctt ctttctttctttctttccttctttccttctttctttctttctttctttctttctttcttt ctttctttctttctttctttcttctttctttctttttcttttacagagtcttactcttgt -tgcccaggctgta ->p:HG002_1_chr20:23560839-23561199 +tgcccaggctgt +>p:HG002_1_chr20:23560839-23561198 gtattctgaatgtcagtatgtacttagtttcactagggagatttttattccctaatgttc ttcagcattcctttctctttctttctttctttctttctttctttctttctttctttcttt ccttctttccttccttccttccttccttccttccttccttccttccttccttccttcttt ccttctttccttctttccttctttccttctttccttctttccttctttccttctttcctt ctttctttctttctttctttctttccttctttctttctttctttctttctttctttcttt ctttctttctttctttctttcttctttctttctttttcttttacagagtcttactcttgt -tgcccaggctgta ->p:HG002_2_chr20:23560839-23561199 +tgcccaggctgt +>p:HG002_2_chr20:23560839-23561198 gtattctgaatgtcagtatgtacttagtttcactagggagatttttattccctaatgttc ttcagcattcctttctctttctttctttctttctttctttctttctttctttctttcttt ccttctttccttccttccttccttccttccttc--------------------------- ------------------------------------------------------------ --------------------------------ctttctttctttctttctttctttcttt ctttctttctttctttctttcttctttctttctttttcttttacagagtcttactcttgt -tgcccaggctgta ->ref_chr20:23560839-23561199 +tgcccaggctgt +>ref_chr20:23560839-23561198 gtattctgaatgtcagtatgtacttagtttcactagggagatttttattccctaatgttc ttcagcattcctttctctttctttctttctttctttctttctttctttctttctttcttt ccttctttccttccttccttccttccttccttccttccttccttccttccttccttcctt ccttctttccttctttccttctttccttctttccttctttccttctttccttctttcc-- ----------ttctttccttctttccttctttctttctttctttctttctttctttcttt ctttctttctttctttctttcttctttctttctttttcttttacagagtcttactcttgt -tgcccaggctgta +tgcccaggctgt diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_73d49ee7e8516143213fa7125202aeab.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_e449e9cc3dfd676b949352d747ef2fae.msa similarity index 88% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_73d49ee7e8516143213fa7125202aeab.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_e449e9cc3dfd676b949352d747ef2fae.msa index 54b133c9..48e09ece 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_73d49ee7e8516143213fa7125202aeab.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_e449e9cc3dfd676b949352d747ef2fae.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:37361652-37362159 +>syndip_1_chr20:37361652-37362158 catcaggggtggggcagctgggggctctcggaaagatgacctacggctgggagagagcaa aacttgccacctcagtgttctctgtgatcctctcctgacactgagcaggggcagggcagg agtgctgctgagggc--------------------------------------------- @@ -7,8 +7,8 @@ tgtgcaggtagagatgccggggtctctgtgtgcaggtagagatgccggggtctctgtgtg caggtagagatgtagagatgctggggtctctgtgtgcaggtagagatgctggggtctctg tgtgcaggcagagatgctgaagcccaggttagctcttatttatgtctttatttatttttt tgagacaaggtctcactctgttgcccaggctggagtgcagtggtgtaatcatagctcact -gcagcctcaaactcctgggtgcaagcaa ->syndip_2_chr20:37361652-37362159 +gcagcctcaaactcctgggtgcaagca +>syndip_2_chr20:37361652-37362158 catcaggggtggggcagctgggggctctcggaaagatgacctacggctgggagagagcaa aacttgccacctcagtgttctctgtgatcctctcctgacactgagcaggggcagggcagg agtgctgctgagggcctctgtgtgcaggtagagatgccggggtctctgtgtgcag----- @@ -17,8 +17,8 @@ tgtgcaggtagagatgccggggtctctgtgtgcaggtagagatgccggggtctctgtgtg caggtagagatgtagagatgctggggtctctgtgtgcaggtagagatgctggggtctctg tgtgcaggcagagatgctgaagcccaggttagctcttatttatgtctttatttatttttt tgagacaaggtctcactctgttgcccaggctggagtgcagtggtgtaatcatagctcact -gcagcctcaaactcctgggtgcaagcaa ->p:HG002_1_chr20:37361652-37362159 +gcagcctcaaactcctgggtgcaagca +>p:HG002_1_chr20:37361652-37362158 catcaggggtggggcagctgggcgctctcggaaagatgacctacggctgggagagagcaa aacttgccacctcagtgttctctgtgatcctctcctgacactgagcaggggcagggcagg agtgctgctgagggcctctgtgtgcaggtagagatgccggggtctctgtgtgcag----- @@ -27,8 +27,8 @@ tgtgcaggtagagatgccggggtctctgtgtgcaggtagagatgccggggtctctgtgtg caggtagagatgtagagatgctggggtctctgtgtgcaggtagagatgctggggtctctg tgtgcaggcagagatgctgaagcccaggttagctcttatttatgtctttatttatttttt tgagacaaggtctcactctgttgcccaggctggagtgcagtggtgtaatcatagctcact -gcagcctcaaactcctgggtgcaagcaa ->p:HG002_2_chr20:37361652-37362159 +gcagcctcaaactcctgggtgcaagca +>p:HG002_2_chr20:37361652-37362158 catcaggggtggggcagctgggggctctcggaaagatgacctacggctgggagagagcaa aacttgccacctcagtgttctctgtgatcctctcctgacactgagcaggggcagggcagg agtgctgctgagggc--------------------------------------------- @@ -37,8 +37,8 @@ tgtgcaggtagagatgccggggtctctgtgtgcaggtagagatgccggggtctctgtgtg caggtagagatgtagagatgctggggtctctgtgtgcaggtagagatgctggggtctctg tgtgcaggcagagatgctgaagcccaggttagctcttatttatgtctttatttatttttt tgagacaaggtctcactctgttgcccaggctggagtgcagtggtgtaatcatagctcact -gcagcctcaaactcctgggtgcaagcaa ->ref_chr20:37361652-37362159 +gcagcctcaaactcctgggtgcaagca +>ref_chr20:37361652-37362158 catcaggggtggggcagctgggggctctcggaaagatgacctacggctgggagagagcaa aacttgccacctcagtgttctctgtgatcctctcctgacactgagcaggggcagggcagg agtgctgctgagggcctctgtgtgcaggtagagatgccggggtctctgtgtgcaggtaga @@ -47,4 +47,4 @@ tgtgcaggtagagatgccggggtctctgtgtgcaggtagagatgccggggtctctgtgtg caggtagagatgtagagatgctggggtctctgtgtgcaggtagagatgctggggtctctg tgtgcaggcagagatgctgaagcccaggttagctcttatttatgtctttatttatttttt tgagacaaggtctcactctgttgcccaggctggagtgcagtggtgtaatcatagctcact -gcagcctcaaactcctgggtgcaagcaa +gcagcctcaaactcctgggtgcaagca diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_8cddeaa2d934e71e817f7dc57cefd1ed.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_e595ac0692ae672cf3ea7a03e1e6ad6c.msa similarity index 97% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_8cddeaa2d934e71e817f7dc57cefd1ed.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_e595ac0692ae672cf3ea7a03e1e6ad6c.msa index 2d37ed2b..eaecc408 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_8cddeaa2d934e71e817f7dc57cefd1ed.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_e595ac0692ae672cf3ea7a03e1e6ad6c.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:63964668-63966718 +>syndip_1_chr20:63964668-63966717 cctggcctgcttgggtggaattgggcaggtgtccaggctgtccgcagccctgcagggagc aggtgaggtgagggccaggagggcctaactgtgggccccattacccccaggccgtggccg aggag------------------------------------------------------- @@ -33,8 +33,8 @@ actggcacgggcctagtgaccgggatggccttggtgactggcatgggtctgccgaccgag actggcttgctgatgccaatgggtttgctgacacccacaggtttgccaatagtgacaggt ttgctcactccgatgggcttgctgaccccaacaggccggctgatggtgaccggcctgctg atgggcccaggcttggaggcaggcaggggtcggtccatgtggttccagatccggtgtttc -tccagctgggt ->syndip_2_chr20:63964668-63966718 +tccagctggg +>syndip_2_chr20:63964668-63966717 cctggcctgcttgggtggaattgggcaggtgtccaggctgtccgcagccctgcagggagc aggtgaggtgagggccaggagggcctaactgtgggccccattacccccaggccgtggccg aggagcaccctgacctggaacgagcccccataccttttctcaccactgttccccgacctg @@ -69,8 +69,8 @@ actggcacgggcctagtgaccgggatggccttggtgactggcatgggtctgccgaccgag actggcttgctgatgccaatgggtttgctgacacccacaggtttgccaatagtgacaggt ttgctcactccgatgggcttgctgaccccaacaggccggctgatggtgaccggcctgctg atgggcccaggcttggaggcaggcaggggtcggtccatgtggttccagatccggtgtttc -tccagctgggt ->p:HG002_1_chr20:63964668-63966718 +tccagctggg +>p:HG002_1_chr20:63964668-63966717 cctggcctgcttgggtggaattgggcaggtgtccaggctgtccgcagccctgcagggagc aggtgaggtgagggccaggagggcctaactgtgggccccattacccccaggccgtggccg aggagcaccctgacctggaacgagcccccataccttttctcaccactgttccccgacctg @@ -105,8 +105,8 @@ actggcacgggcctagtgaccgggatggccttggtgactggcatgggtctgccgaccgag actggcttgctgatgccaatgggtttgctgacacccacaggtttgccaatagtgacaggt ttgctcactccgatgggcttgctgaccccaacaggccggctgatggtgaccggcctgctg atgggcccaggcttggaggcaggcaggggtcggtccatgtggttccagatccggtgtttc -tccagctgggt ->p:HG002_2_chr20:63964668-63966718 +tccagctggg +>p:HG002_2_chr20:63964668-63966717 cctggcctgcttgggtggaattgggcaggtgtccaggctgtccgcagccctgcagggagc aggtgaggtgagggccaggagggcctaactgtgggccccattacccccaggccgtggccg aggag------------------------------------------------------- @@ -141,8 +141,8 @@ actggcacgggcctagtgaccgggatggccttggtgactggcatgggtctgccgaccgag actggcttgctgatgccaatgggtttgctgacacccacaggtttgccaatagtgacaggt ttgctcactccgatgggcttgctgaccccaacaggccggctgatggtgaccggcctgctg atgggcccaggcttggaggcaggcaggggtcggtccatgtggttccagatccggtgtttc -tccagctgggt ->ref_chr20:63964668-63966718 +tccagctggg +>ref_chr20:63964668-63966717 cctggcctgcttgggtggaattgggcaggtgtccaggctgtccgcagccctgcagggagc aggtgaggtgagggccaggagggcctaactgtgggccccattacccccaggccgtggccg aggagcaccctgacctggaacgagcccccataccttttctcaccactgttccccgacctg @@ -177,4 +177,4 @@ actggcacgggcctagtgaccgggatggccttggtgactggcatgggtctgccgaccgag actggcttgctgatgccaatgggtttgctgacacccacaggtttgccaatagtgacaggt ttgctcactccgatgggcttgctgaccccaacaggccggctgatggtgaccggcctgctg atgggcccaggcttggaggcaggcaggggtcggtccatgtggttccagatccggtgtttc -tccagctgggt +tccagctggg diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_8ac3ffdda713c1cb5ce15ef3970c84f1.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_e7315f7330dde260068b31e9e14647d0.msa similarity index 95% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_8ac3ffdda713c1cb5ce15ef3970c84f1.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_e7315f7330dde260068b31e9e14647d0.msa index d04a37da..7b2e62c3 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_8ac3ffdda713c1cb5ce15ef3970c84f1.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_e7315f7330dde260068b31e9e14647d0.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:53203887-53204574 +>syndip_1_chr20:53203887-53204573 cataccacatggggctactaagcccttcaaatgtgactggtccaaattaaaatatactgt aagtataaaacacactgaacttgaaaggcttagtccaaaaaccgtagtgcacaatatctc cttcatttttataatatgatactattatatcataattttataatatactattatatcata @@ -25,8 +25,8 @@ atatcatcatataacatgatgatatgatactatatcatcatataacatgatgatatgata ctatatcatcatataacatgatgatatgatactatatcatcatataacatgatgatatga tactattatatcataatatgatataatattttggacatactaggttaaataagatatagt tatataaatcaatttcacttatttctttctcattctttaatgtgactactgaagcatatg -aaattccatgcatagctgacattctacttctattg ->syndip_2_chr20:53203887-53204574 +aaattccatgcatagctgacattctacttctatt +>syndip_2_chr20:53203887-53204573 cataccacatggggctactaagcccttcaaatgtgactggtccaaattaaaatatactgt aagtataaaacacactgaacttgaaaggcttagtccaaaaaccgtagtgcacaatatctc cttcatttttataatatgatactattatatcataattttataatatactattatatcata @@ -53,8 +53,8 @@ atatcatcatataacatgatgatatgatactatatcatcatataacatgatgatatgata ctatatcatcatataacatgatgatatgatactatatcatcatataacatgatgatatga tactattatatcataatatgatataatattttggacatactaggttaaataagatatagt tatataaatcaatttcacttatttctttctcattctttaatgtgactactgaagcatatg -aaattccatgcatagctgacattctacttctattg ->p:HG002_1_chr20:53203887-53204574 +aaattccatgcatagctgacattctacttctatt +>p:HG002_1_chr20:53203887-53204573 cataccacatggggctactaagcccttcaaatgtgactggtccaaattaaaatatactgt aagtataaaacacactgaacttgaaaggcttagtccaaaaaccgtagtgcacaatatctc cttcatttttataatatgatactattatatcataattttataatatactattatatcata @@ -81,8 +81,8 @@ atatcatcatataacatgatgatatgatactatatcatcatataacatgatgatatgata ctatatcatcatataacatgatgatatgatactatatcatcatataacatgatgatatga tactattatatcataatatgatataatattttggacatactaggttaaataagatatagt tatataaatcaatttcacttatttctttctcattctttaatgtgactactgaagcatatg -aaattccatgcatagctgacattctacttctattg ->p:HG002_2_chr20:53203887-53204574 +aaattccatgcatagctgacattctacttctatt +>p:HG002_2_chr20:53203887-53204573 cataccacatggggctactaagcccttcaaatgtgactggtccaaattaaaatatactgt aagtataaaacacactgaacttgaaaggcttagtccaaaaaccgtagtgcacaatatctc cttcatttttataatatgatactattatatcataattttataatatactattatatcata @@ -109,8 +109,8 @@ atatcatcatataacatgatgatatgatactatatcatcatataacatgatgatatgata ctatatcatcatataacatgatgatatgatactatatcatcatataacatgatgatatga tactattatatcataatatgatataatattttggacatactaggttaaataagatatagt tatataaatcaatttcacttatttctttctcattctttaatgtgactactgaagcatatg -aaattccatgcatagctgacattctacttctattg ->ref_chr20:53203887-53204574 +aaattccatgcatagctgacattctacttctatt +>ref_chr20:53203887-53204573 cataccacatggggctactaagcccttcaaatgtgactggtccaaattaaaatatactgt aagtataaaacacactgaacttgaaaggcttagtccaaaaaccgtagtgcacaatatctc cttcatttttataatatgatactattatatcataattttataatatactattatatcata @@ -137,4 +137,4 @@ atatcatcatataacatgatgatatgatactatatcatcatataacatgatgatatgata ctatatcatcatataacatgatgatatgatactatatcatcatataacatgatgatatga tactattatatcataatatgatataatattttggacatactaggttaaataagatatagt tatataaatcaatttcacttatttctttctcattctttaatgtgactactgaagcatatg -aaattccatgcatagctgacattctacttctattg +aaattccatgcatagctgacattctacttctatt diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_3a4684a9cebe728f709cb7d7bf349ea7.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_e98e8b7a2d661c3ba62b21a0dbc4f7d7.msa similarity index 87% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_3a4684a9cebe728f709cb7d7bf349ea7.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_e98e8b7a2d661c3ba62b21a0dbc4f7d7.msa index 7e9c806f..26a82542 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_3a4684a9cebe728f709cb7d7bf349ea7.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_e98e8b7a2d661c3ba62b21a0dbc4f7d7.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:45600469-45600847 +>syndip_1_chr20:45600469-45600846 aatattgggaattttactgatggtaaaattaaaataggaggaggaggttacatcactatc atatggtaacattcccttaaaaatcatctctttatctttcctaaatattccactgtcttt gactgctatacctctctctcttttctttcaccaagcctctctctttgctttccttctggt @@ -8,8 +8,8 @@ tctctctctctccctctctctctctttctccctctctctccctctctctctctctctctc cctctctctctctttctccctctctctccctctctccccctctctctccctctctccctc tctccctctctctccctctctctctacccctgaatctggcctttctaaacaccctcaact tgtgtacctgtatactatactgagctacactaaagtttctatatttagccaaatccagcc -catcataccagctcttcccctggtatattcctaggagtgga ->syndip_2_chr20:45600469-45600847 +catcataccagctcttcccctggtatattcctaggagtgg +>syndip_2_chr20:45600469-45600846 aatattgggaattttactgatggtaaaattaaaataggaggaggaggttacatcactatc atatggtaacattcccttaaaaatcatctctttatctttcctaaatattccactgtcttt gactgctatacctctctctcttttctttcaccaagcctctctctttgctttccttctggt @@ -19,8 +19,8 @@ tctctctctctccctctctct--------------------------------------- -------------------------------------ccctctctccccctctctctccc tctctccctctctccctctctctctacccctgaatctggcctttctaaacaccctcaact tgtgtacctgtatactatactgagctacactaaagtttctatatttagccaaatccagcc -catcataccagctcttcccctggtatattcctaggagtgga ->p:HG002_1_chr20:45600469-45600847 +catcataccagctcttcccctggtatattcctaggagtgg +>p:HG002_1_chr20:45600469-45600846 aatattgggaattttactgatggtaaaattaaaataggaggaggaggttacatcactatc atatggtaacattcccttaaaaatcatctctttatctttcctaaatattccactgtcttt gactgctatacctctctctcttttctttcaccaagcctctctctttgctttccttctggt @@ -30,8 +30,8 @@ tctctctctctccctctctct--------------------------------------- -------------------------------------ccctctctccccctctctctccc tctctccctctctccctctctctctacccctgaatctggcctttctaaacaccctcaact tgtgtacctgtatactatactgagctacactaaagtttctatatttagccaaatccagcc -catcataccagctcttcccctggtatattcctaggagtgga ->p:HG002_2_chr20:45600469-45600847 +catcataccagctcttcccctggtatattcctaggagtgg +>p:HG002_2_chr20:45600469-45600846 aatattgggaattttactgatggtaaaattaaaataggaggaggaggttacatcactatc atatggtaacattcccttaaaaatcatctctttatctttcctaaatattccactgtcttt gactgctatacctctctctcttttctttcaccaagcctctctctttgctttccttctggt @@ -41,8 +41,8 @@ tctctctctctccctctctctctctttctccctctctctccctctctctctctctctctc cctctctctctctttctccctctctctccctctctccccctctctctccctctctccctc tctccctctctctccctctctctctacccctgaatctggcctttctaaacaccctcaact tgtgtacctgtatactatactgagctacactaaagtttctatatttagccaaatccagcc -catcataccagctcttcccctggtatattcctaggagtgga ->ref_chr20:45600469-45600847 +catcataccagctcttcccctggtatattcctaggagtgg +>ref_chr20:45600469-45600846 aatattgggaattttactgatggtaaaattaaaataggaggaggaggttacatcactatc atatggtaacattcccttaaaaatcatctctttatctttcctaaatattccactgtcttt gactgctatacctctctctcttttctttcaccaagcctctctctttgctttccttctggt @@ -52,4 +52,4 @@ aaaagcagtctctctctctctctctctctctctct------------------------- ---------------------------------------------------------ctc tatccccccctctccctctctctctacccctgaatctggcctttctaaacaccctcaact tgtgtacctgtatactatactgagctacactaaagtttctatatttagccaaatccagcc -catcataccagctcttcccctggtatattcctaggagtgga +catcataccagctcttcccctggtatattcctaggagtgg diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_fbbef252bd5883cf143b0f4c4649c512.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_ed1042b42c059891aecf75a626d0d1a8.msa similarity index 98% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_fbbef252bd5883cf143b0f4c4649c512.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_ed1042b42c059891aecf75a626d0d1a8.msa index e6db8916..9504a597 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_fbbef252bd5883cf143b0f4c4649c512.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_ed1042b42c059891aecf75a626d0d1a8.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:55943819-55945908 +>syndip_1_chr20:55943819-55945907 gtgctgggtactgtcccagtcctcctacctgtttacctcacttccacccttgcctccttc caagtcaatcccatgggaaccagcattttctgaaactcaataaaattagctagcatttca gccaggtgcagtggctcaagcctgtaatcccagcactttgggaggccaaggtgggtggat @@ -36,8 +36,8 @@ ctagatagctatctagatagtatctgtccagatatctgtagctatctagattgtatctgt ctagatatctgtagatatctagatatctatatctatctggatatctatagatatttaact gccgggcaatgtcctaaaccctttacactcaacacatttcattttataacaaaatatgag gtagatattattatttccctcaactcacagagaagaaaaaaatgaagcatatggagccca -tgggc ->syndip_2_chr20:55943819-55945908 +tggg +>syndip_2_chr20:55943819-55945907 gtgctgggtactgtcccagtcctcctacctgtttacctcacttccacccttgcctccttc caagtcaatcccatgggaaccagcattttctgaaactcaataaaattagctagcatttca gccaggtgcagtggctcaagcctgtaatcccagcactttgggaggccaaggtgggtggat @@ -75,8 +75,8 @@ ctagatagctatctagatagtatctgtccagatatctgtagctatctagattgtatctgt ctagatatctgtagatatctagatatctatatctatctggatatctatagatatttaact gccgggcaatgtcctaaaccctttacactcaacacatttcattttataacaaaatatgag gtagatattattatttccctcaactcacagagaagaaaaaaatgaagcatatggagccca -tgggc ->p:HG002_1_chr20:55943819-55945908 +tggg +>p:HG002_1_chr20:55943819-55945907 gtgctgggtactgtcccagtcctcctacctgtttacctcacttccacccttgcctccttc caagtcaatcccatgggaaccagcattttctgaaactcaataaaattagctagcatttca gccaggtgcagtggctcaagcctgtaatcccagcactttgggaggccaaggtgggtggat @@ -114,8 +114,8 @@ ctagatagctatctagatagtatctgtccagatatctgtagctatctagattgtatctgt ctagatatctgtagatatctagatatctatatctatctggatatctatagatatttaact gccgggcaatgtcctaaaccctttacactcaacacatttcattttataacaaaatatgag gtagatattattatttccctcaactcacagagaagaaaaaaatgaagcatatggagccca -tgggc ->p:HG002_2_chr20:55943819-55945908 +tggg +>p:HG002_2_chr20:55943819-55945907 gtgctgggtactgtcccagtcctcctacctgtttacctcacttccacccttgcctccttc caagtcaatcccatgggaaccagcattttctgaaactcaataaaattagctagcatttca gccaggtgcagtggctcaagcctgtaatcccagcactttgggaggccaaggtgggtggat @@ -153,8 +153,8 @@ ctagatagctatctagatagtatctgtccagatatctgtagctatctagattgtatctgt ctagatatctgtagatatctagatatctatatctatctggatatctatagatatttaact gccgggcaatgtcctaaaccctttacactcaacacatttcattttataacaaaatatgag gtagatattattatttccctcaactcacagagaagaaaaaaatgaagcatatggagccca -tgggc ->ref_chr20:55943819-55945908 +tggg +>ref_chr20:55943819-55945907 gtgctgggtactgtcccagtcctcctacctgtttacctcacttccacccttgcctccttc caagtcaatcccatgggaaccagcattttctgaaactcaataaaattagctagcatttca gccaggtgcagtggctcaagcctgtaatcccagcactttgggaggccaaggtgggtggat @@ -192,4 +192,4 @@ ctagatagctatctagatagtatctgtccagatatctgtagctatctagattgtatctgt ctagatatctgtagatatctagatatctatatctatctggatatctatagatatttaact gccgggcaatgtcctaaaccctttacactcaacacatttcattttataacaaaatatgag gtagatattattatttccctcaactcacagagaagaaaaaaatgaagcatatggagccca -tgggc +tggg diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_67fd23689620e3340fe2a29e2ea77933.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_ed11e856d3a32487f771047b0b77f027.msa similarity index 96% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_67fd23689620e3340fe2a29e2ea77933.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_ed11e856d3a32487f771047b0b77f027.msa index fbf24462..156d29b9 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_67fd23689620e3340fe2a29e2ea77933.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_ed11e856d3a32487f771047b0b77f027.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:20320185-20320680 +>syndip_1_chr20:20320185-20320679 atcctgattcagataaactgttaaacaaaaatctgtgccatttatgaggtgattggaaat ttgaaacctggagagttaatgatgataagcaattatgattaatttttttagcagtaaaga tggtatttcagatatatag--atatatatattatatatatgtaatatatataatatatgt @@ -28,8 +28,8 @@ tatatattatatatattatatatgtaatatatattatatatattatatatgtaatatata ttatatatattatatatgtaatatatattatatatattatatatgtaatatatattatat atattatatatgtaatatatatatatttaaaaacagaaccattatcttttagagatacat actgaagtgtctggagacatgcttcaagataacccaggagggagaatggtagaaggaact -agagatgacccaagactgcccttgagctaataactgt ->syndip_2_chr20:20320185-20320680 +agagatgacccaagactgcccttgagctaataactg +>syndip_2_chr20:20320185-20320679 atcctgattcagataaactgttaaacaaaaatctgtgccatttatgaggtgattggaaat ttgaaacctggagagttaatgatgataagcaattatgattaatttttttagcagtaaaga tggtatttcagatatatagatatatatatattatatatatgtaatatatataatatatgt @@ -59,8 +59,8 @@ gaagtgtctggagacatgcttcaagata-------------------------------- ------------------------------------------------------------ ------------------------------------------------------------ -------------------------------acccaggagggagaatggtagaaggaact -agagatgacccaagactgcccttgagctaataactgt ->p:HG002_1_chr20:20320185-20320680 +agagatgacccaagactgcccttgagctaataactg +>p:HG002_1_chr20:20320185-20320679 atcctgattcagataaactgttaaacaaaaatctgtgccatttatgaggtgattggaaat ttgaaacctggagagttaatgatgataagcaattatgattaatttttttagcagtaaaga tggtatttcagatatatagatatatatatattatatatatgtaatatatataatatatgt @@ -90,8 +90,8 @@ gaagtgtctggagacatgcttcaagata-------------------------------- ------------------------------------------------------------ ------------------------------------------------------------ -------------------------------acccaggagggagaatggtagaaggaact -agagatgacccaagactgcccttgagctaataactgt ->p:HG002_2_chr20:20320185-20320680 +agagatgacccaagactgcccttgagctaataactg +>p:HG002_2_chr20:20320185-20320679 atcctgattcagataaactgttaaacaaaaatctgtgccatttatgaggtgattggaaat ttgaaacctggagagttaatgatgataagcaattatgattaatttttttagcagtaaaga tggtatttcagatatatag--atatatatattatatatatgtaatatatataatatatgt @@ -121,8 +121,8 @@ tatatattatatatattatatatgtaatatatattatatatattatatatgtaatatata ttatatatattatatatgtaatatatattatatatattatatatgtaatatatattatat atattatatatgtaatatatatatatttaaaaacagaaccattatcttttagagatacat actgaagtgtctggagacatgcttcaagataacccaggagggagaatggtagaaggaact -agagatgacccaagactgcccttgagctaataactgt ->ref_chr20:20320185-20320680 +agagatgacccaagactgcccttgagctaataactg +>ref_chr20:20320185-20320679 atcctgattcagataaactgttaaacaaaaatctgtgccatttatgaggtgattggaaat ttgaaacctggagagttaatgatgataagcaattatgattaatttttttagcagtaaaga tggtatttcagatatatagatatatatatattatatatatgtaatatatataatatatgt @@ -152,4 +152,4 @@ gaagtgtctggagacatgcttcaagata-------------------------------- ------------------------------------------------------------ ------------------------------------------------------------ -------------------------------acccaggagggagaatggtagaaggaact -agagatgacccaagactgcccttgagctaataactgt +agagatgacccaagactgcccttgagctaataactg diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_4891d56deb85c16ff9f7722808d6cd91.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_edce4871de28f10080aaa834a9139704.msa similarity index 91% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_4891d56deb85c16ff9f7722808d6cd91.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_edce4871de28f10080aaa834a9139704.msa index f4bd8df9..dca76d89 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_4891d56deb85c16ff9f7722808d6cd91.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_edce4871de28f10080aaa834a9139704.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:62270179-62271195 +>syndip_1_chr20:62270179-62271194 cgtcctagtccctggtgcagacgaggagcctgagaggcttagcagctcatgtgtgtgtca gctgggaacccacagcaagtggattccaaagcctgccatgtccctgttgccactagcacc tcctcctccactccctcctggcctctctgggtcctctccttgtccctcctggcctctctg @@ -15,8 +15,8 @@ tccttgtccctctctggcctctctgggtcctctccttgtccctctctgggtcctctcctt gtccctctctggcctctctgggtcctctccttgtccctctctggcctctctgggtccttt ccttgtccctttctggtctctctggtcctctccttctctccttgtccctctccctctgcc tctgctttgcccaccccagaccctgctccttcctgaagagcctgccctgaggtgacagct -gtgtttccctggaatgcctactgcccttgggatccctttcctggaagcccggtgggaa ->syndip_2_chr20:62270179-62271195 +gtgtttccctggaatgcctactgcccttgggatccctttcctggaagcccggtggga +>syndip_2_chr20:62270179-62271194 cgtcctagtccctggtgcagacgaggagcctgagaggcttagcagctcatgtgtgtgtca gctgggaacccacagcaagtggattccaaagcctgccatgtccctgttgccactagcacc tcctcctccactccctcctggcctctctgggtcctctccttgtccctcctggcctctctg @@ -33,8 +33,8 @@ tccttgtccctctctggcctctctgggtcctctccttgtccctctctgggtcctctcctt gtccctctctggcctctctgggtcctctccttgtccctctctggcctctctgggtccttt ccttgtccctttctggtctctctggtcctctccttctctccttgtccctctccctctgcc tctgctttgcccaccccagaccctgctccttcctgaagagcctgccctgaggtgacagct -gtgtttccctggaatgcctactgcccttgggatccctttcctggaagcccggtgggaa ->p:HG002_1_chr20:62270179-62271195 +gtgtttccctggaatgcctactgcccttgggatccctttcctggaagcccggtggga +>p:HG002_1_chr20:62270179-62271194 cgtcctagtccctggtgcagacgaggagcctgagaggcttagcagctcatgtgtgtgtca gctgggaacccacagcaagtggattccaaagcctgccatgtccctgttgccactagcacc tcctcctccactccctcctggcctctctgggtcctctccttgtccctcctggcctctctg @@ -51,8 +51,8 @@ tccttgtccctctctggcctctctgggtcctctccttgtccctctctgggtcctctcctt gtccctctctggcctctctgggtcctctccttgtccctctctggcctctctgggtccttt ccttgtccctttctggtctctctggtcctctccttctctccttgtccctctccctctgcc tctgctttgcccaccccagaccctgctccttcctgaagagcctgccctgaggtgacagct -gtgtttccctggaatgcctactgcccttgggatccctttcctggaagcccggtgggaa ->p:HG002_2_chr20:62270179-62271195 +gtgtttccctggaatgcctactgcccttgggatccctttcctggaagcccggtggga +>p:HG002_2_chr20:62270179-62271194 cgtcctagtccctggtgcagacgaggagcctgagaggcttagcagctcatgtgtgtgtca gctgggaacccacagcaagtggattccaaagcctgccatgtccctgttgccactagcacc tcctcctccactccctcctggcctctctgggtcctctccttgtccctcctggcctctctg @@ -69,8 +69,8 @@ tccttgtccctctctggcctctctgggtcctctccttgtccctctctgggtcctctcctt gtccctctctggcctctctgggtcctctccttgtccctctctggcctctctgggtccttt ccttgtccctttctggtctctctggtcctctccttctctccttgtccctctccctctgcc tctgctttgcccaccccagaccctgctccttcctgaagagcctgccctgaggtgacagct -gtgtttccctggaatgcctactgcccttgggatccctttcctggaagcccggtgagaa ->ref_chr20:62270179-62271195 +gtgtttccctggaatgcctactgcccttgggatccctttcctggaagcccggtgaga +>ref_chr20:62270179-62271194 cgtcctagtccctggtgcagacgaggagcctgagaggcttagcagctcatgtgtgtgtca gctgggaacccacagcaagtggattccaaagcctgccatgtccctgttgccactagcacc tcctcctccactccctcctggcctctctgggtcctctccttgtccctcctggcctctctg @@ -87,4 +87,4 @@ tccttgtccctctctggcctctctgggtcctctccttgtccctctctgggtcctctcctt gtccctctctggcctctctgggtcctctccttgtccctctctggcctctctgggtccttt ccttgtccctttctggtctctctggtcctctccttctctccttgtccctctccctctgcc tctgctttgcccaccccagaccctgctccttcctgaagagcctgccctgaggtgacagct -gtgtttccctggaatgcctactgcccttgggatccctttcctggaagcccggtgggaa +gtgtttccctggaatgcctactgcccttgggatccctttcctggaagcccggtggga diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_89449e11a504a87076c2d55f89a99cbd.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_f421dd5eadc752e68b598f461ab861a8.msa similarity index 95% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_89449e11a504a87076c2d55f89a99cbd.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_f421dd5eadc752e68b598f461ab861a8.msa index df5fb594..faa3b7a3 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_89449e11a504a87076c2d55f89a99cbd.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_f421dd5eadc752e68b598f461ab861a8.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:23155404-23156076 +>syndip_1_chr20:23155404-23156075 gggaaagacacagaggtagaatctgcgttaggaataaaaaccgctactctccgttgttct gtgtgcttttgcagtcatgattgatgcaggcagcacccttctgcagaagtaaattttgcc ctgcagcacaaaagaggaaggaaagcaagggaagggaaggggaggggaggaagggaaggg @@ -14,8 +14,8 @@ gaaagaaagaagaaagaaagaaagaaagaaagaaagaaagaaagaaagaaagaaagaaag aaagaaagaaagaaagaaagaaagaaagaaaaagaaagagaaaggaaggaaggaaggaag gtcttaacaccttggtaatttttgtgctcttccctggactttggactttgcagccttagg gagacagagctctttaatgtacactttcccatcaggcactgcatggccaagaacctgcct -gcaaagccccca ->syndip_2_chr20:23155404-23156076 +gcaaagccccc +>syndip_2_chr20:23155404-23156075 gggaaagacacagaggtagaatctgcgttaggaataaaaaccgctactctccgttgttct gtgtgcttttgcagtcatgattgatgcaggcagcacccttctgcagaagtaaattttgcc ctgcagcacaaaagaggaaggaaagcaag-------------------ggaagggaaggg @@ -31,8 +31,8 @@ gaaagaaag------------------------aagaaagaaagaaagaaagaaagaaag aaagaaagaaagaaagaaagaaagaaagaaaaagaaagagaaaggaaggaaggaaggaag gtcttaacaccttggtaatttttgtgctcttccctggactttggactttgcagccttagg gagacagagctctttaatgtacactttcccatcaggcactgcatggccaagaacctgcct -gcaaagccccca ->p:HG002_1_chr20:23155404-23156076 +gcaaagccccc +>p:HG002_1_chr20:23155404-23156075 gggaaagacacagaggtagaatctgcgttaggaataaaaaccgctactctccgttgttct gtgtgcttttgcagtcatgattgatgcaggcagcacccttctgcagaagtaaattttgcc ctgcagcacaaaagaggaaggaaagcaag-------------------ggaagggaaggg @@ -48,8 +48,8 @@ gaaagaaag------------------------aagaaagaaagaaagaaagaaagaaag aaagaaagaaagaaagaaagaaagaaagaaaaagaaagagaaaggaaggaaggaaggaag gtcttaacaccttggtaatttttgtgctcttccctggactttggactttgcagccttagg gagacagagctctttaatgtacactttcccatcaggcactgcatggccaagaacctgcct -gcaaagccccca ->p:HG002_2_chr20:23155404-23156076 +gcaaagccccc +>p:HG002_2_chr20:23155404-23156075 gggaaagacacagaggtagaatctgcgttaggaataaaaaccgctactctccgttgttct gtgtgcttttgcagtcatgattgatgcaggcagcacccttctgcagaagtaaattttgcc ctgcagcacaaaagaggaaggaaagcaagggaagggaaggggaggggaggaagggaaggg @@ -65,8 +65,8 @@ gaaagaaagaagaaagaaagaaagaaagaaagaaagaaagaaagaaagaaagaaagaaag aaagaaagaaagaaagaaagaaagaaagaaaaagaaagagaaaggaaggaaggaaggaag gtcttaacaccttggtaatttttgtgctcttccctggactttggactttgcagccttagg gagacagagctctttaatgtacactttcccatcaggcactgcatggccaagaacctgcct -gcaaagccccca ->ref_chr20:23155404-23156076 +gcaaagccccc +>ref_chr20:23155404-23156075 gggaaagacacagaggtagaatctgcgttaggaataaaaaccgctactctccgttgttct gtgtgcttttgcagtcatgattgatgcaggcagcacccttctgcagaagtaaattttgcc ctgcagcacaaaagaggaaggaaagcaag-------------------ggaagggaaggg @@ -82,4 +82,4 @@ gaaagaaag------------aagaaagaaagaaagaaagaaagaaagaaagaaagaaag aaagaaagaaagaaagaaagaaagaaagaaaaagaaagagaaaggaaggaaggaaggaag gtcttaacaccttggtaatttttgtgctcttccctggactttggactttgcagccttagg gagacagagctctttaatgtacactttcccatcaggcactgcatggccaagaacctgcct -gcaaagccccca +gcaaagccccc diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_85aa1e76d010d424a529fed9eb355830.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_f5ab8103fdc862862724ddfcfe43644a.msa similarity index 97% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_85aa1e76d010d424a529fed9eb355830.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_f5ab8103fdc862862724ddfcfe43644a.msa index af467528..39e41974 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_85aa1e76d010d424a529fed9eb355830.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_f5ab8103fdc862862724ddfcfe43644a.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:61201583-61202575 +>syndip_1_chr20:61201583-61202574 aatacagcatatcaaaatgtatgggatgcagctaaagcagtgctcagaggacaatctata ctactagaggcttatattagaaaaaaaaatttaaatcaataacctaagctttcaccgtta gagaaggaatttcctcactcaggaccctctgcggatgtcacctccatcctcatcaggacc @@ -30,8 +30,8 @@ cacctccatcctcactcaggaccctccatggtgtcacctccatcctcactcaggaccctc cgtgagtgccagctctgtctaccctcgggaccctccatgagtgtctcctctatctaccct caggaccctctgtgaatgtcctcgccatccatactcccccctcttagacccagactgttt cagggttggacactgagctgtggcccctgagcctagctcttctccgagcttctcttgggc -acctctctctcagactaggct ->syndip_2_chr20:61201583-61202575 +acctctctctcagactaggc +>syndip_2_chr20:61201583-61202574 aatacagcatatcaaaatgtatgggatgcagctaaagcagtgctcagaggacaatctata ctactagaggcttatattagaaaaaaaaatttaaatcaataacctaagctttcaccatta gagaaggaatttcctcactcaggaccctctgcggatgtcgcctccatcctcatcaggacc @@ -63,8 +63,8 @@ cacctccatcctcactcaggaccctccatggtgtcacctccatcctcactcaggaccctc cgtgagtgccagctctgtctaccctcgggaccctccatgagtgtctcctctatctaccct aaggaccctctgtgaatgtcctcgccatccatactcccccctcttagacccagactgttt cagggttggacactgagctgtggcccctgagcctagctcttctccgagcttctcttgggc -acctctctctcagactaggct ->p:HG002_1_chr20:61201583-61202575 +acctctctctcagactaggc +>p:HG002_1_chr20:61201583-61202574 aatacagcatatcaaaatgtatgggatgcagctaaagcagtgctcagaggacaatctata ctactagaggcttatattagaaaaaaaaatttaaatcaataacctaagctttcaccatta gagaaggaatttcctcactcaggaccctctgcggatgtcgcctccatcctcatcaggacc @@ -96,8 +96,8 @@ cacctccatcctcactcaggaccctccatggtgtcacctccatcctcactcaggaccctc cgtgagtgccagctctgtctaccctcgggaccctccatgagtgtctcctctatctaccct aaggaccctctgtgaatgtcctcgccatccatactcccccctcttagacccagactgttt cagggttggacactgagctgtggcccctgagcctagctcttctccgagcttctcttgggc -acctctctctcagactaggct ->p:HG002_2_chr20:61201583-61202575 +acctctctctcagactaggc +>p:HG002_2_chr20:61201583-61202574 aatacagcatatcaaaatgtatgggatgcagctaaagcagtgctcagaggacaatctata ctactagaggcttatattagaaaaaaaaatttaaatcaataacctaagctttcaccgtta gagaaggaatttcctcactcaggaccctctgcggatgtcacctccatcctcatcaggacc @@ -129,8 +129,8 @@ cacctccatcctcactcaggaccctccatggtgtcacctccatcctcactcaggaccctc cgtgagtgccagctctgtctaccctcgggaccctccatgagtgtctcctctatctaccct caggaccctctgtgaatgtcctcgccatccatactcccccctcttagacccagactgttt cagggttggacactgagctgtggcccctgagcctagctcttctccgagcttctcttgggc -acctctctctcagactaggct ->ref_chr20:61201583-61202575 +acctctctctcagactaggc +>ref_chr20:61201583-61202574 aatacagcatatcaaaatgtatgggatgcagctaaagcagtgctcagaggacaatctata ctactagaggcttatattagaaaaaaaaatttaaatcaataacctaagctttcaccatta gagaaggaatttcctcactcaggaccctctgcggatgtcgcctccatcctcatcaggacc @@ -162,4 +162,4 @@ cacctccatcctcactcaggaccctccatggtgtcacctccatcctcactcaggaccctc cgtgagtgccagctctgtctaccctcgggaccctccatgagtgtctcctctatctaccct caggaccctctgtgaatgtcctcgccatccatactcccccctcttagacccagactgttt cagggttggacactgagctgtggcccctgagcctagctcttctccgagcttctcttgggc -acctctctctcagactaggct +acctctctctcagactaggc diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_652adf9a4e7824ea572dca71a0562a04.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_f80b3949aaede63b2c726a14abd6e835.msa similarity index 92% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_652adf9a4e7824ea572dca71a0562a04.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_f80b3949aaede63b2c726a14abd6e835.msa index d158507f..0379a39e 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_652adf9a4e7824ea572dca71a0562a04.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_f80b3949aaede63b2c726a14abd6e835.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:61289525-61290656 +>syndip_1_chr20:61289525-61290655 tgtgtgggtgcccaggggcatgagtgcctggggagtgtgaccacaggccagtggcagtcc ctgcctttggattaaagagccctttgaggcttgcttggtgatgtcatggaaaacagcata tgacaccaagatatccccatatccaacgagacttgctgggtttatccccgtatccaacga @@ -17,8 +17,8 @@ gatttatccccgtatccaatgagacttgctgggtttatccccgtatccaatgagacttgc tgggtttatcccgatatccaatgagacttgctggatttatccccgtatccaatgagactt gctggatttatccccatatccaatgagacttgctgggtttagcaccacagatgccagtca cagaggaaattgatggcatggcagacagcgtgacatgttgactacctgccctggtccttg -cagatggcatgggcacaaaggggatgtggggtgaagagttgaaacgagtgcc ->syndip_2_chr20:61289525-61290656 +cagatggcatgggcacaaaggggatgtggggtgaagagttgaaacgagtgc +>syndip_2_chr20:61289525-61290655 tgtgtgggtgcccaggggcatgagtgcctggggagtgtgaccacaggccagtggcagtcc ctgcctttggattaaagagccctttgaggcttgcttggtgatgtcatggaaaacagcata tgacaccaagatatccccatatccaacgagacttgctgggtttatccccgtatccaacga @@ -37,8 +37,8 @@ gatttatccccgtatccaatgagacttgctgggtttatccccgtatccaatgagacttgc tgggtttatcccgatatccaatgagacttgctggatttatccccgtatccaatgagactt gctggatttatccccatatccaatgagacttgctgggtttagcaccacagatgccagtca cagaggaaattgatggcatggcagacagcgtgacatgttgactacctgccctggtccttg -cagatggcatgggcacaaaggggatgtggggtgaagagttgaaacgagtgcc ->p:HG002_1_chr20:61289525-61290656 +cagatggcatgggcacaaaggggatgtggggtgaagagttgaaacgagtgc +>p:HG002_1_chr20:61289525-61290655 tctgtgggtgcccaggggcatgagtgcctggggagtgtgaccacaggccagtggcagtcc ctgcctttggattaaagagccctttgaggcttgcttggtgatgtcatggaaaacagcata tgacaccaagatatccccatatccaacgagacttgctgggtttatccccgtatccaacga @@ -57,8 +57,8 @@ gatttatccccgtatccaatgagacttgctgggtttatccccgtatccaatgagacttgc tgggtttatcccgatatccaatgagacttgctggatttatccccgtatccaatgagactt gctggatttatccccatatccaatgagacttgctgggtttagcaccacagatgccagtca cagaggaaattgatggcatggcagacagcgtgacatgttgactacctgccctggtccttg -cagatggcatgggcacaaaggggatgtggggtgaagagttgaaacgagtgcc ->p:HG002_2_chr20:61289525-61290656 +cagatggcatgggcacaaaggggatgtggggtgaagagttgaaacgagtgc +>p:HG002_2_chr20:61289525-61290655 tgtgtgggtgcccaggggcatgagtgcctggggagtgtgaccacaggccagtggcagtcc ctgcctttggattaaagagccctttgaggcttgcttggtgatgtcatggaaaacagcata tgacaccaagatatccccatatccaacgagacttgctgggtttatccccgtatccaacga @@ -77,8 +77,8 @@ gatttatccccgtatccaatgagacttgctgggtttatccccgtatccaatgagacttgc tgggtttatcccgatatccaatgagacttgctggatttatccccgtatccaatgagactt gctggatttatccccatatccaatgagacttgctgggtttagcaccacagatgccagtca cagaggaaattgatggcttggcagacagcgtgacatgttgactacctgccctggtccttg -cagatggcatgggcacaaaggggatgtggggtgaagagttgaaacgagtgcc ->ref_chr20:61289525-61290656 +cagatggcatgggcacaaaggggatgtggggtgaagagttgaaacgagtgc +>ref_chr20:61289525-61290655 tgtgtgggtgcccaggggcatgagtgcctggggagtgtgaccacaggccagtggcagtcc ctgcctttggattaaagagccctttgaggcttgcttggtgatgtcatggaaaacagcata tgacaccaagatatccccatatccaacgagacttgctgggtttatccccgtatccaacga @@ -97,4 +97,4 @@ gatttatccccgtatccaatgagacttgctgggtttatccccgtatccaatgagacttgc tgggtttatcccgatatccaatgagacttgctggatttatccccgtatccaatgagactt gctggatttatccccatatccaatgagacttgctgggtttagcaccacagatgccagtca cagaggaaattgatggcatggcagacagcgtgacatgttgactacctgccctggtccttg -cagatggcatgggcacaaaggggatgtggggtgaagagttgaaacgagtgcc +cagatggcatgggcacaaaggggatgtggggtgaagagttgaaacgagtgc diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_72f8e0bf8162cc30b5455b515c643479.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_fb52ef3187571c67509303c4c8e9c08d.msa similarity index 93% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_72f8e0bf8162cc30b5455b515c643479.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_fb52ef3187571c67509303c4c8e9c08d.msa index 09fa7e28..d35585ef 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_72f8e0bf8162cc30b5455b515c643479.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_fb52ef3187571c67509303c4c8e9c08d.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:41196110-41196749 +>syndip_1_chr20:41196110-41196748 agagtgttgttgaaatcctctatttcctgattgatcttctgtctggtttctatatccatt atcggacatggaatattaaagtctccaactatttttgtctttctccctttaattctgtca tttctgtttcatatgttttggagctctgttaggggtatatatatatataatatatataaa @@ -12,8 +12,8 @@ tatatattatatattataaatatatattatatattatatattatataaatattatatata atatatattatatataatatatataaaaatatatatatttatatatataaatattttatc aatataatatccttgtctcttgtaacctttcttgatttaacatttattttgtgtgccttt ttcaaaagaggacactggaggtgaagaaggaagctcttgactctcctaaagctgatccca -aagtgaaggctttgaaggcca ->syndip_2_chr20:41196110-41196749 +aagtgaaggctttgaaggcc +>syndip_2_chr20:41196110-41196748 agagtgttgttgaaatcctctatttcctgattgatcttctgtctggtttctatatccatt atcggacatggaatattaaagtctccaactatttttgtctttctccctttaattctgtca tttctgtttcatatgttttggagctctgttaggggtatatatatatataatatatataaa @@ -27,8 +27,8 @@ tatataatatatattataaatatatattatatattatatattatataaatattatatata atatatattatatataatatatataaaaatatatatatttatatatataaatattttatc aatataatatccttgtctcttgtaacctttcttgatttaacatttattttgtgtgccttt ttcaaaagaggacactggaggtgaagaaggaagctcttgactctcctaaagctgatccca -aagtgaaggctttgaaggcca ->p:HG002_1_chr20:41196110-41196749 +aagtgaaggctttgaaggcc +>p:HG002_1_chr20:41196110-41196748 agagtgttgttgaaatcctctatttcctgattgatcttctgtctggtttctatatccatt atcggacatggaatattaaagtctccaactatttttgtctttctccctttaattctgtca tttctgtttcatatgttttggagctctgttaggggtatatatatatataatatatataaa @@ -42,8 +42,8 @@ tatataatatatattataaatatatattatatattatatattatataaatattatatata atatatattatatataatatatataaaaatatatatatttatatatataaatattttatc aatataatatccttgtctcttgtaacctttcttgatttaacatttattttgtgtgccttt ttcaaaagaggacactggaggtgaagaaggaagctcttgactctcctaaagctgatccca -aagtgaaggctttgaaggcca ->p:HG002_2_chr20:41196110-41196749 +aagtgaaggctttgaaggcc +>p:HG002_2_chr20:41196110-41196748 agagtgttgttgaaatcctctatttcctgattgatcttctgtctggtttctatatccatt atcggacatggaatattaaagtctccaactatttttgtctttctccctttaattctgtca tttctgtttcatatgttttggagctctgttaggggtatatatatatataatatatataaa @@ -57,8 +57,8 @@ tatatattatatattataaatatatattatatattatatattatataaatattatatata atatatattatatataatatatataaaaatatatatatttatatatataaatattttatc aatataatatccttgtctcttgtaacctttcttgatttaacatttattttgtgtgccttt ttcaaaagaggacactggaggtgaagaaggaagctcttgactctcctaaagctgatccca -aagtgaaggctttgaaggcca ->ref_chr20:41196110-41196749 +aagtgaaggctttgaaggcc +>ref_chr20:41196110-41196748 agagtgttgttgaaatcctctatttcctgattgatcttctgtctggtttctatatccatt atcggacatggaatattaaagtctccaactatttttgtctttctccctttaattctgtca tttctgtttcatatgttttggagctctgttaggggtatatatatatataatatatataaa @@ -72,4 +72,4 @@ atattatatataatatatttatatataatatatatttatatattataaatatatat---- atatatattatatataatatatataaaaatatatatatttatatatataaatattttatc aatataatatccttgtctcttgtaacctttcttgatttaacatttattttgtgtgccttt ttcaaaagaggacactggaggtgaagaaggaagctcttgactctcctaaagctgatccca -aagtgaaggctttgaaggcca +aagtgaaggctttgaaggcc diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_8f331d26b3bb88ff009d81b04c1c6f26.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_fc40819c3b411d6b2869794670b475f9.msa similarity index 87% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_8f331d26b3bb88ff009d81b04c1c6f26.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_fc40819c3b411d6b2869794670b475f9.msa index 950ef9f8..9d83252e 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_8f331d26b3bb88ff009d81b04c1c6f26.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_fc40819c3b411d6b2869794670b475f9.msa @@ -1,40 +1,40 @@ ->syndip_1_chr20:60702905-60703188 +>syndip_1_chr20:60702905-60703187 attttcctttcccctcaataatgtatgagggttgcaatgtcgctacatattaccaacatt tgtcattgtctttctttcttttctttctctctctctctttcttttccttctttctttctt tctttctttctttctttctttctttctttctttctttctttcttttctttccttcttctt tctttctttctttctttctttctttctttctttctttctttctttctttctttctttctt tctttctttctttcttctttctttctttttctttcttttttttttgggatggagtctcgc tctgtcacccaggctggagtgcagtggcatgatctcggctcgctgcaatctccacctccc -aggttcaagcgattgaa ->syndip_2_chr20:60702905-60703188 +aggttcaagcgattga +>syndip_2_chr20:60702905-60703187 attttcctttcccctcaataatgtatgagggttgcaatgtcgctacatattaccaacatt tgtcattgtctttctttcttttctttctctctctctctttcttttccttctttctttctt tctttctttctttcttc----------------------------------------ctt tctttctttctttctttctttctttctttctttctttcttt------------------- ------tctttccttctttctttctttctttctttctttttttttgggatggagtctcgc tctgtcacccaggctggagtgcagtggcatgatctcggctcgctgcaatctccacctccc -aggttcaagcgattgaa ->p:HG002_1_chr20:60702905-60703188 +aggttcaagcgattga +>p:HG002_1_chr20:60702905-60703187 attttcctttcccctcaataatgtatgagggttgcaatgtcgctacatattaccaacatt tgtcattgtctttctttcttttctttctctctctctctttctttcttttccttctttctt tctttctttctttcctt----------------------------------------ctt tctttctttctttctttctttctttctttctttctttcttt------------------- ------tctttccttctttctttctttctttctttctttttttttgggatggagtctcgc tctgtcacccaggctggagtgcagtggcatgatctcggctcgctgcaatctccacctccc -aggttcaagcgattgaa ->p:HG002_2_chr20:60702905-60703188 +aggttcaagcgattga +>p:HG002_2_chr20:60702905-60703187 attttcctttcccctcaataatgtatgagggttgcaatgtcgctacatattaccaacatt tgtcattgtctttctttcttttctttctctctctctctttcttttccttctttctttctt tctttctttctttctttctttctttctttctttctttctttcttttctttccttcttctt tctttctttctttctttctttctttctttctttctttctttctttctttctttctttctt tctttctttctttcttcttctttctttctttctttctttttttttgggatggagtctcgc tctgtcacccaggctggagtgcagtggcatgatctcggctcgctgcaatctccacctccc -aggttcaagcgattgaa ->ref_chr20:60702905-60703188 +aggttcaagcgattga +>ref_chr20:60702905-60703187 attttcctttcccctcaataatgtatgagggttgcaatgtcgctacatattaccaacatt tgtcattgtctttctttcttttctttctctctctctctttcttttcctt----------- ---------------------------------------------------------ctt tctttctttctttctttctttctttctttctttctttcttt------------------- ------tctttccttctttctttctttctttctttctttttttttgggatggagtctcgc tctgtcacccaggctggagtgcagtggcatgatctcggctcgctgcaatctccacctccc -aggttcaagcgattgaa +aggttcaagcgattga diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_79d4b9c2180cea7a5edfdeebbbf56834.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_fd198272bc91bdb42e2dbb32cc9c9935.msa similarity index 99% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_79d4b9c2180cea7a5edfdeebbbf56834.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_fd198272bc91bdb42e2dbb32cc9c9935.msa index c27b67dc..d7d2eb13 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_79d4b9c2180cea7a5edfdeebbbf56834.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_fd198272bc91bdb42e2dbb32cc9c9935.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:63167287-63167728 +>syndip_1_chr20:63167287-63167727 aatggccagcccgtcccgccgaggccagtggacatgccaaatccctcccgcaacaagttg ctatccatcccctcgactaatacttaccaaattaattccagatttggagtcactcacggt tcccaccagcactcagccccatctcagcccccatctcagcccccaccgcagcctcacctc @@ -74,8 +74,8 @@ ccccacctcagcccccacctccgcccccacctcagcccctcctcagcccccacctctgcc cccacctcagaccctcctcagcccctcctcagcccctcctcatcccctcctcagcagctg ctggaatgggctaccctcaggagttggcaccccacaggctccttcccttcccacctccgc cacaccctgacgagggaaaaatggggagctgctgggagggaacttgcaccctgttgggcg -ccttgt ->syndip_2_chr20:63167287-63167728 +ccttg +>syndip_2_chr20:63167287-63167727 aatggccagcccgtcccgccgaggccagtggacatgccaaatccctcccgcaacaagttg ctatccatcccctcgactaatacttaccaaattaattccagatttggattcactcacggt tcccaccagcactcagccccatctcagcccccatctcagcccccaccgcagcctcacctc @@ -151,8 +151,8 @@ cccctcctcagcccccacctcagcccccacctcagcccctcctcagcccccacctccgcc cccacctcaga-----------ccctcctcagcccctcctcatcccctcctcagcagctg ctggaatgggctaccctcaggagttggcaccccacaggctccttcccttcccacctccgc cacaccctgacgagggaaaaatggggagctgctgggagggaacttgcaccctgttgggcg -ccttgt ->p:HG002_1_chr20:63167287-63167728 +ccttg +>p:HG002_1_chr20:63167287-63167727 aatggccagcccgtcccgccgaggccagtggacatgccaaatccctcccgcaacaagttg ctatccatcccctcgactaatacttaccaaattaattccagatttggattcactcacggt tcccaccagcactcagccccatctcagcccccatctcagcccccaccgcagcctcacctc @@ -228,8 +228,8 @@ cccctcctcagcccccacctcagcccccacctcagcccctcctcagcccccacctccgcc cccacctcaga-----------ccctcctcagcccctcctcatcccctcctcagcagctg ctggaatgggctaccctcaggagttggcaccccacaggctccttcccttcccacctccgc cacaccctgacgagggaaaaatggggagctgctgggagggaacttgcaccctgttgggcg -ccttgt ->p:HG002_2_chr20:63167287-63167728 +ccttg +>p:HG002_2_chr20:63167287-63167727 aatggccagcccgtcccgccgaggccagtggacatgccaaatccctcccgcaacaagttg ctatccatcccctcgactaatacttaccaaattaattccagatttggagtcactcacggt tcccaccagcactcagccccatctcagcccccatctcagcccccaccgcagcctcacctc @@ -305,8 +305,8 @@ ccccacctcagcccccacctccgcccccacctcagcccctcctcagcccccacctctgcc cccacctcagaccctcctcagcccctcctcagcccctcctcatcccctcctcagcagctg ctggaatgggctaccctcaggagttggcaccccacagcctccttcccttcccacctccgc cacaccctgacgagggaaaaatggggagctgctgggagggaacttgcaccctgttgggcg -ccttgt ->ref_chr20:63167287-63167728 +ccttg +>ref_chr20:63167287-63167727 aatggccagcccgtcccgccgaggccagtggacatgccaaatccctcccgcaacaagttg ctatccatcccctcgactaatacttaccaaattaattccagatttggagtcactcatggt tcccaccagcactcagccccatctcagcccccatctcagcccccaccgcagcctcacctc @@ -382,4 +382,4 @@ agtgcccacctcagccccctcctcagccccca---------------------------- cccacctcaga-----------ccctcctcagcccctcctcatcccctcctcagcagctg ctggaatgggctaccctcaggagttggcaccccacaggctccttcccttcccacctccgc cacaccctgacgagggaaaaatggggagctgctgggagggaacttgcaccctgttgggcg -ccttgt +ccttg diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_fdcb2c8d04f052ec0422554bbca6ba2d.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_fdcb2c8d04f052ec0422554bbca6ba2d.msa new file mode 100644 index 00000000..761b2923 --- /dev/null +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_fdcb2c8d04f052ec0422554bbca6ba2d.msa @@ -0,0 +1,165 @@ +>syndip_1_chr20:60314230-60315120 +acttgtttcctcatttattcattaacaatattcatttcttagccaaggatggtggtgcac +acctgtacttccagctactcaggaggctgaggtggaaggattgaaaatccatccatctgt +cctcctacccatcctgctatccatccatccatccatcctcccataatctatcttcccatc +catttgtccttccttccttccttccttccttc---cttccttccttccttccttccttcc +ttccacccatccatcctcccattcatcctcccatgcatcctcccatccatccacccaacc +atcctcccatccatcctcacatccatcttcgcatccatccatccatccatctatccatcc +atccatccatcctcccatccatcctcccatccattatcccattcatccatcctcccatcc +atcctcctatccatccatccatccatccaccctcccatccatccatcctcccatccatcc +actcttccatccatccatccacccaccctt----------------ccatccatccatcc +tcccatccatccatcctcccattcatctatccatccatccctcgatcctcccatccatcc +atcctcccatccatccatccatcctcccatccatccatccaccctcccatccatccatcc +tcccatccatctaccctcccatccatccatcctcccacccaccctcccatccatccatcc +tcccatcctcccatccatccatccatcctcccatccgtccatccatccttccatccac-- +--------------cctcccatccatccatcctcccatccatctaccctcccatccatcc +atcctcccacccaccctcccatccatccaccctcccatccatccatcctcccatccatct +atccatccatcctcccatcctcccatccatccatcatccatccatccatccatccatcct +cccataatctatcttcccatccatttgtccttccttccttccttccttccttccttcctt +cgttccacccatccatcctcccattcatcctcccatgcatcctcccatccatccacccaa +ccatcctcccatccatcctcacatccatcttcgcatccatccatccatccatccatctat +ccat--------------------ccatcctcccatccatcctcccatcctcccatccat +tctcccattcatccatcctcccatccatcctcctatccatccatccatccatccaccctc +ccatccatccatcctcccatccatccacccttccatccatccatccacccacccttccat +ccatccaccctcccatccatccatcctcccattcatctatccatccatccctcgatcctc +ccatccatccatccatcctcccatccgtccatccatccttccatccaccctcccatccat +ccatcctcccatccatctaccctcccatccatccatcctcccacccaccctcccatccat +ccaccctcccatccatccatcctcccatccatctatccatccatcctcccatcctcccat +cctcccatccatccatccatccatccatccatcctcccatcctcccatcctcccatccat +cgtcccatgtgtacatctgtcctgccatctatccatacatccatttctccgtcctcccat +ccatcttcccatccattcatccatccatcttcccatccattcatccatccatcctgccat +ccatccttccattgatccatccatccatcctgcaggagaccaggagttcaaggctgccgt +gagttataattgtgccactgcactacagcttaagtgacagagagagaaaccatctccaaa +aatgaaaaaaagaatatattgatttcaggccagttacatag +>syndip_2_chr20:60314230-60315120 +acttgtttcctcatttattcattaacaatattcatttcttagccaaggatggtggtgcac +acctgtacttccagctactcaggaggctgaggtggaaggattgaaaatccatccatctgt +cgtcctacccatcctgctatccatccatccatccatcctcccataatctatcttcccatc +catttg--------------------tccttc---cttccttccttccttccttccttcc +ttccacccatccatcctcccatgcatcctcccatgcatcctcccatccatccacccaacc +atcctcccatccatcctcacatccatcttcgcatccatccatccatccatctatccatcc +atccatccatcctcccatccatcctcccatccattatcccattcatccatcctcccatcc +atcctcctatccatccatccatccatccaccctctcatccatccatcctcccatccatcc +acccttccatccatccatccacccacccttccatccatccatcctcccatccatccatcc +tcccatccatccatcctcccattcatctatccatccatccctcgatcctcccatccatcc +atcctcccatccatccatccatcctcccatccatccatccaccctcccatccatccatcc +tcccatccatctaccctcccatccatccatcctcccatccaccctcccatccatccatcc +atccatccaccctctcatccatccatcctcccatccgtccatccatccttccatccaccc +tcccatccatccatcctcccatccatccatcctcccatccatctaccctcccatccatcc +atcctcccacccaccctcccatccatccaccctcccatccatccatcctcccatccatct +atccatccatcctcccatcctcccatccatccatcatccatccatccatccatccatcct +cccataatctatcttcccatccatttgtccttccttccttccttccttccttcc------ +--ttccacccatccatcctcccattcatcctcccatgcatcctcccatccatccacccaa +ccatcctcccatccatcctcacatccatcttcgcatccatccatccatccatccatctat +ccatccatcctcccatccatcctcccatcctcccatccatcctcccatcctcccatccat +tctcccattcatccatcctcccatccatcctcctatccatccatccatccatccaccctc +ccatccatccatcctcccatccatccacccttccatccatccatccacccacccttccat +ccatccaccctcccatccatccatcctcccattcatctatccatccatccctcgatcctc +ccatccatccatccatcctcccatccgtccatccatccttccatccaccctcccatccat +ccatcctcccatccatctaccctcccatccatccatcctcccacccaccctcccatccat +ccaccctcccatccatccatcctcccatccatctatccatccatcctcccatcctcccat +cctc----ccatccatccatccatccatccatcctcccatcctcccatcctcccatccat +cgtcccatgtgtacatctgtcctgccatctatccatacatccatttctccgtcctcccat +ccatcttcccatccattcatccatccatcttcccatccattcatccatccatcctgccat +ccatccttccattgatccatccatccatcctgcaggagaccaggagttcaaggctgccgt +gagttataattgtgccactgcactacagcttaagtgacagagagagaaaccatctccaaa +aatgaaaaaaagaatatattgatttcaggccagttacatag +>p:HG002_1_chr20:60314230-60315120 +acttgtttcctcatttattcattaacaatattcatttcttagccaaggatggtggtgcac +acctgtacttccagctactcaggaggctgaggtggaaggattgaaaatccatccatctgt +cgtcctacccatcctgctatccatccatccatccatcctcccataatctatcttcccatc +catttg--------------------tccttc---cttccttccttccttccttccttcc +ttccacccatccatcctcccatgcatcctcccatgcatcctcccatccatccacccaacc +atcctcccatccatcctcacatccatcttcgcatccatccatccatccatctatccatcc +atccatccatcctcccatccatcctcccatccattatcccattcatccatcctcccatcc +atcctcctatccatccatccatccatccaccctctcatccatccatcctcccatccatcc +acccttccatccatccatccacccacccttccatccatccatcctcccatccatccatcc +tcccatccatccatcctcccattcatctatccatccatccctcgatcctcccatccatcc +atcctcccatccatccatccatcctcccatccatccatccaccctcccatccatccatcc +tcccatccatctaccctcccatccatccatcctcccatccaccctcccatccatccatcc +atccatccaccctctcatccatccatcctcccatccgtccatccatccttccatccaccc +tcccatccatccatcctcccatccatccatcctcccatccatctaccctcccatccatcc +atcctcccacccaccctcccatccatccaccctcccatccatccatcctcccatccatct +atccatccatcctcccatcctcccatccatccatcatccatccatccatccatccatcct +cccataatctatcttcccatccatttgtccttccttccttccttccttccttcc------ +--ttccacccatccatcctcccattcatcctcccatgcatcctcccatccatccacccaa +ccatcctcccatccatcctcacatccatcttcgcatccatccatccatccatccatctat +ccatccatcctcccatccatcctcccatcctcccatccatcctcccatcctcccatccat +tctcccattcatccatcctcccatccatcctcctatccatccatccatccatccaccctc +ccatccatccatcctcccatccatccacccttccatccatccatccacccacccttccat +ccatccaccctcccatccatccatcctcccattcatctatccatccatccctcgatcctc +ccatccatccatccatcctcccatccgtccatccatccttccatccaccctcccatccat +ccatcctcccatccatctaccctcccatccatccatcctcccacccaccctcccatccat +ccaccctcccatccatccatcctcccatccatctatccatccatcctcccatcctcccat +cctc----ccatccatccatccatccatccatcctcccatcctcccatcctcccatccat +cgtcccatgtgtacatctgtcctgccatctatccatacatccatttctccgtcctcccat +ccatcttcccatccattcatccatccatcttcccatccattcatccatccatcctgccat +ccatccttccattgatccatccatccatcctgcaggagaccaggagttcaaggctgccgt +gagttataattgtgccactgcactacagcttaagtgacagagagagaaaccatctccaaa +aatgaaaaaaagaatatattgatttcaggccagttacatag +>p:HG002_2_chr20:60314230-60315120 +acttgtttcctcatttattcattaacaatattcatttcttagccaaggatggtggtgcac +acctgtacttccagctactcaggaggctgaggtggaaggattgaaaatccatccatctgt +cctcctacccatcctgctatccatccatccatccatcctcccataatctatcttcccatc +catttgtccttccttccttccttccttccttc---cttccttccttccttccttccttcc +ttccacccatccatcctcccattcatcctcccatgcatcctcccatccatccacccaacc +atcctcccatccatcctcacatccatcttcgcatccatccatccatccatctatccatcc +atccatccatcctcccatccatcctcccatccattatcccattcatccatcctcccatcc +atcctcctatccatccatccatccatccaccctcccatccatccatcctcccatccatcc +actcttccatccatccatccacccaccctt----------------ccatccatccatcc +tcccatccatccatcctcccattcatctatccatccatccctcgatcctcccatccatcc +atcctcccatccatccatccatcctcccatccatccatccaccctcccatccatccatcc +tcccatccatctaccctcccatccatccatcctcccacccaccctcccatccatccatcc +tcccatcctcccatccatccatccatcctcccatccgtccatccatccttccatccac-- +--------------cctcccatccatccatcctcccatccatctaccctcccatccatcc +atcctcccacccaccctcccatccatccaccctcccatccatccatcctcccatccatct +atccatccatcctcccatcctcccatccatccatcatccatccatccatccatccatcct +cccataatctatcttcccatccatttgtccttccttccttccttccttccttccttcctt +cgttccacccatccatcctcccattcatcctcccatgcatcctcccatccatccacccaa +ccatcctcccatccatcctcacatccatcttcgcatccatccatccatccatccatctat +ccat--------------------ccatcctcccatccatcctcccatcctcccatccat +tctcccattcatccatcctcccatccatcctcctatccatccatccatccatccaccctc +ccatccatccatcctcccatccatccacccttccatccatccatccacccacccttccat +ccatccaccctcccatccatccatcctcccattcatctatccatccatccctcgatcctc +ccatccatccatccatcctcccatccgtccatccatccttccatccaccctcccatccat +ccatcctcccatccatctaccctcccatccatccatcctcccacccaccctcccatccat +ccaccctcccatccatccatcctcccatccatctatccatccatcctcccatcctcccat +cctcccatccatccatccatccatccatccatcctcccatcctcccatcctcccatccat +cgtcccatgtgtacatctgtcctgccatctatccatacatccatttctccgtcctcccat +ccatcttcccatccattcatccatccatcttcccatccattcatccatccatcctgccat +ccatccttccattgatccatccatccatcctgcaggagaccaggagttcaaggctgccgt +gagttataattgtgccactgcactacagcttaagtgacagagagagaaaccatctccaaa +aatgaaaaaaagaatatattgatttcaggccagttacatag +>ref_chr20:60314230-60315120 +acttgtttcctcatttattcattaacaatattcatttcttagccaaggatggtggtgcac +acctgtacttccagctactcaggaggctgaggtggaaggattgaaaatccatccatctgt +cctcctacccatcctgctatccatccatccatccatcctcccataatctatcttcccatc +catttgtccttccttccttccttccttccttccttcttccttccttccttccttccttcc +ttccacccatccatcctcccattcatcctcccatgcatcctcccatccatccacccaacc +atcctcccatccatcctcacatcgatcttcgcatccatccatcgatcgatctatccatcc +atccattc----------------------------tcccattcatccatcctcccatcc +atcctcctatccatccatccatccatccaccctcccatccatccatcctcccatccatcc +acccttccatccatccatccacccaccctt----------------ccatccatccaccc +tcccatccatccatcctcccatccatctatccatccatcctcccatcctcccatcctccc +at---------------------------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +--------ccatccatccatccatccatccatcctcccatcctcccatcctcccatccat +cgtcccatgtgtacatctgtcctgccatctatccatacatccatttctccgtcctcccat +ccatcttcccatccattcatccatccatcttcccatccattcatccatccatcctgccat +ccatccttccattgatccatccatccatcctgcaggagaccaggagttcaaggctgccgt +gagttataattgtgccactgcactacagcttaagtgacagagagagaaaccatctccaaa +aatgaaaaaaagaatatattgatttcaggccagttacatag diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_ff0ae7d50dec8afd13f0c22741c4925e.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_ff0ae7d50dec8afd13f0c22741c4925e.msa new file mode 100644 index 00000000..55e2be1d --- /dev/null +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_ff0ae7d50dec8afd13f0c22741c4925e.msa @@ -0,0 +1,660 @@ +>syndip_1_chr20:63693225-63693984 +tcggcctgagcacagcattccccgaaggcagcgggcacagccggtcctggaccccactgg +taaatggggccccaggtgggaccctcagactcctgcgtggaaggcagtgtgggccagagt +cctgggctgcttggggtgggcatcctcgggccctgcttggccccgcctctctgttcccct +atgggagtgatgggggcctccacctccaccaccagcaccagcagcaccacctccaccttc +accaccaccacctccaccaccacctccaccacctccacctccaccacctccaccacctcc +accaccaccacctccaccaccaccacctccaccaccaccaccaccaccacctccaccacc +accaccaccaccaccacctccacctccaccacctccaccaccacctccacctccaccacc +acctccacctccacctccaccaccaccacctccaccacctccaccacctccacctccacc +accacctccacctccaccaccacctccacctccacctccacctccacctccaccaccacc +acctccaccacctccaccacctccacctccaccaccaccacctccacctccacctccacc +acctccacctccaccacctccacctccaccacctccaccaccaccaccaccaccaccacc +acctccaccaccaccaccaccaccacctccaccacctccacctccaccacctccaccacc +accacctccaccaccacctccaccaccacctccacctccaccacctccacctccaccacc +tccacctccacctccaccaccacctccaccacctccaccaccacctccaccacctccacc +tccaccaccaccacctccaccaccaccaccaccaccacctccaccacctccacctccacc +acctccaccaccaccacctccaccaccacctccaccaccacctccacctccaccacctcc +acctccaccacctccacctccacctccaccaccacctccaccacctccaccaccacctcc +accacctccaccacctccaccaccacctccaccacctccacctccaccaccaccacctcc +acctccaccaccaccacctccaccaccacctccacctccaccaccaccacctccacctcc +accaccaccacctccaccaccacctccaccaccaccaccaccaccaccacctccacctcc +accacctccaccaccaccaccaccacctccacctccaccacctccaccacctccaccacc +accaccaccacctccaccacctccaccaccacctccaccacctccacctccaccaccacc +acctccaccaccaccacctccacctccacctccacctccacctccaccaccacctccacc +tccaccaccacctccaccaccacctccacctccaccacctccacctccacctccaccacc +tccaccaccaccaccaccaccaccaccaccaccaccacctccaccacctccacctccacc +tccaccaccaccaccaccacctccaccaccaccaccaccacctccaccacctccaccacc +acctccaccaccaccaccaccaccaccaccaccacctccaccacctccacctccacctcc +acctccaccacctccaccacctccaccaccacctccaccaccaccaccaccacctccacc +accacctccacctccaccaccacctccaccaccacctccacctccaccacctccacctcc +accacctccaccacctccacctccaccacctccacctccacctccaccacctccaccacc +accaccaccaccaccaccaccaccaccacctccaccaccaccacctccaccaccacctcc +accacctccaccaccacctccaccaccaccaccaccaccaccaccaccaccaccacctcc +accaccacctccacctccaccaccacctccaccaccacctccacctccaccacctccacc +tccaccacctccaccacctccaccaccaccaccaccacctccaccaccacctccacctcc +accaccaccaccaccaccaccacctccaccaccaccacctccaccaccacctccaccacc +tccaccaccacctccacctccaccacctccaccaccacctccaccaccacctccaccacc +accacctccaccaccacctccaccacctccaccaccacctccacctccaccacctccacc +accacctccaccaccacctccaccaccaccacctccaccaccacctccaccacctccacc +accacctccacctccaccacctccacctccacctccaccacctccaccaccaccaccacc +accaccaccaccaccaccaccaccaccaccaccacctccacctccacctccaccacctcc +acctccaccacctccaccaccacctccacctccaccacctccacctccacctccaccacc +tccacctccaccacctccacctccacctccaccacctccaccaccaccacctccaccacc +acctccaccacctccaccaccacctccacctccaccacctccaccaccacctccaccacc +acctccaccaccaccacctccaccaccacctccaccacctccaccaccacctccacctcc +accacctccacctccacctccaccacctccaccaccaccaccaccaccaccaccaccacc +accacctccaccaccaccacctccaccaccacctccaccaccaccaccaccacctccacc +accaccaccaccacctccaccaccacctccaccacctccaccaccaccaccaccaccacc +accacctccaccaccacctccaccaccaccaccaccaccaccaccacctccaccacctcc +acctccaccaccaccaccacctccaccaccaccaccaccaccacctccacctccaccacc +accacctccaccaccaccaccacctccaccaccacctccaccaccaccaccaccaccacc +accaccacctccaccacctccaccacctccaccaccacctccaccacctccaccaccacc +accaccacctccaccaccacctccaccaccaccaccaccaccaccacctccacctccacc +tccaccacctccacctccaccacctccacctccaccacctccacctccacctccaccacc +tccaccaccacctccacctccaccaccacctcctccaccaccaccacctccaccaccacc +tccaccaccaccacctccacctccaccacctccacctccaccaccaccacctccacctcc +accaccaccacctccaccacctccaccaccacctccaccacctccacctccaccaccacc +acctccaccaccaccacctccacctccaccaccaccaccaccaccacctccacctccacc +accacctccaccaccacctccacctccacctccaccacctccaccaccacctccaccacc +tccaccacctccacctccaccaccaccaccacctccaccaccaccaccaccacctctacc +acctctaccaccaccaccaccacctccaccaccaccaccaccacctccaccaccaccacc +accaccacctccaccaccacctccaccacctccaccaccaccaccaccacctctaccacc +tccacctccaccaccaccaccacctctaccaccaccaccaccacctccaccaccaccacc +acctccaccaccaccaccacctccaccaccacctccaccaccacctccaccacctccacc +accaccaccacctccacctccaccacctccaccaccacctccaccaccaccaccaccacc +acctccaccaccacctccaccacctccaccaccaccaccaccacctctaccacctccacc +tccaccaccaccaccacctccacctccaccaccaccaccaccaccaccaccaccaccacc +tccaccaccaccaccaccaccacctctaccaccaccaccaccacctccaccaccaccacc +acctccaccaccaccaccacctccaccaccacctccaccaccaccaccaccaccaccacc +acctctaccaccaccaccaccacctccaccaccaccaccacctccaccaccaccaccacc +tccaccaccacctccaccaccaccaccacctccaccaccacctccaccaccacctccacc +acctccaccaccaccaccaccacctccaccaccacctccacctccaccaccaccaccacc +acctccaccaccaccaccaccaccaccacctccaccaccacctccaccaccacctccacc +acctccaccaccaccaccaccacctccaccaccacctccacctccaccaccaccaccacc +acctccaccaccacctccaccaccaccaccaccaccacctccaccaccacctccaccacc +tccaccaccaccaccaccacctctaccacctctaccaccaccaccaccacctccaccacc +accaccacctccaccaccaccaccacctccaccaccaccacctccacctccaccaccacc +acctccaccacctccaccaccacctccaccaccaccacctccacctccaccaccaccacc +tccaccacctccaccaccaccaccaccaccaccaccacctccaccaccaccaccaccacc +acctccaccaccacctccaccacctccaccaccaccaccaccacctctaccacctctacc +accaccaccaccacctccaccaccaccaccacctccaccaccaccaccacctccaccacc +acctccaccaccaccacctccacctccaccaccaccacctccaccaccaccacctccacc +tccaccaccaccacctccaccacctccaccaccacctccaccaccaccacctccacctcc +accaccaccacctccaccacctccaccaccaccacctccaccaccaccacctccaccacc +tccaccaccaccaccaccacctccaccacctccaccacctccaccaccaccaccaccacc +tccacctccacctccaccacctccaccaccaccacctccaccaccaccaccaccaccacc +accacctccacctccacctccaccacctccacctccaccaccaccaccaccaccaccacc +accaccaccacctccacctccacctccaccacctccaccaccaccacctccaccaccacc +acctccacctccaccacctccaccaccacctccaccaccaccacctccacctccaccacc +accacctccaccaccaccacctccacctccaccaccaccacctccaccacctccaccacc +acctccaccaccaccacctccacctccaccaccaccacctccaccacctccaccaccacc +acctccaccaccaccacctccaccacctccaccaccaccaccaccacctccaccacctcc +accacctccaccaccaccaccaccacctccacctccacctccaccacctccaccaccacc +acctccaccaccaccaccaccaccacctccaccacctccaccaccaccaccaccacctcc +acctccacctccaccacctccaccaccaccacctccaccaccaccaccaccaccaccacc +accacctccacctccacctccaccacctccacctccaccaccaccaccaccaccaccacc +tccaccaccacctccacctccacctccaccacctccacctccaccaccaccaccaccacc +accaccaccaccaccacctccacctccacctccaccaccaccacctctaccaccacctcc +accaccaccaccaccaccaccaccaccacctccaccacctccaccaccaccacctccacc +accaccacctccacctccaccaccaccaccacctccaccacctccaccaccacctccacc +tccacctccaccacctccaccaccaccacctccaccaccaccaccaccaccaccacctcc +accacctccaccaccaccacctccaccaccaccacctccacctccaccacctccaccacc +tccaccaccaccaccacctccaccaccaccaccaccaccacctccacctccaccacctct +accaccacctccaccaccaccaccaccaccacctccacctccaccacctccacctccacc +acctctaccaccacctccaccaccaccacctccacctccacctccaccaccaccacctcc +acctccacctccaccaccaccaccaccaccaccacctccaccaccaccaccaccaccacc +accaccacctccacctccaccaccaccacctccaccaccaccacctccaccacctccacc +accacctccacctccaccaccaccacctccacctccacctccaccaccaccaccaccacc +tccaccaccaccacctccaccaccaacacctccacctccacctccaccaccaccaccacc +acctccacctccaccaccacctccaccaccaccacctccacctccaccaccaccacctcc +accaccacctccaccacctccaccaccaccacctccaccaccacctccaccaccaccacc +accaccacctccacctccacctccaccaccaccacctctaccaccacctccaccaccacc +accaccaccaccaccaccacctccacctccacctccaccacctccacctccaccaccacc +accaccaccaccacctccaccaccacctccacctccacctccaccacctccacctccacc +accaccaccaccaccaccaccaccaccaccacctccacctccacctccaccaccaccacc +tctaccaccacctccaccaccaccaccaccaccaccaccaccacctccaccaccaccacc +tccacctccaccaccaccacctccacctccaccaccaccacctccacctccaccaccacc +accaccaccaccacctccaccaccaccacctccaccacctccaccaccacctccaccacc +accacctccaccacctccaccaccacctccacctccaccaccaccacctccaccaccacc +acctccacctccaccaccaccacctccacctccacctccaccacctccacctccaccacc +tccaccaccaccacctccaccacctccaccaccacctccacctccaccaccaccacctcc +accaccaccacctccaccacctccaccaccacctccacctccaccaccaccacctccacc +tccacctccaccaccaccaccaccacctccactaccaccacctccacctccaccaccacc +acctccaccaccaacacctccacctccacctccacctccaccaccaccacctccacctcc +accaccaccacctccaccaccaccacctccaccacctccacctccaccaccaccacctcc +accaccacctccaccaccaccaccacctccaccaccaccacctccaccaccacctccacc +accaccacctccaccaccacctccaccaccacctccacctccaccaccaccacctgcacc +accacctccacctccaccaccaccaccacctccacctccaccagcagcagcatcacttgt +tggggagaccctgtgcaactccatgcacagccctgtccctgccatagccccgacccctaa +gcacagccctgtccaactgccacacgtcccctgcctcccatgcatggtcctggggggtca +actgcacacgccagggtcctagggtcctagacccctgtcctccctgtttctgcctctgtt +tggggtggagtccaagtct +>syndip_2_chr20:63693225-63693984 +tcggcctgagcacagcattccccgaaggcagcgggcacagccggtcctggaccccactgg +taaatggggccccaggtgggaccctcagactcctgcgtggaaggcagtgtgggccagagt +cctgggctgcttggggtgggcatcctcgggccctgcttggccccgcctctctgttcccct +atgggagtgatgggggcctccacctccaccaccagcaccagcagcaccacctccacca-- +-------------------------------cctccacctccaccacct----------- +----------------ccacctccacctccaccaccacctccacctccacctccacctcc +acctccaccaccaccaccaccacctccaccacct------ccacctccacctccacctcc +acctccacctccacctccacctccaccacgtccaccacctccacca---ccacctccacc +accacctccacctccacctccacctccaccaccacctccaccaccacctccaccaccacc +tccaccaccacctccaccaccaccaccaccaccaccacctccaccacctccaccacctcc +acctccacctccaccaccaccacctccaccacctccaccacctccaccaccaccaccacc +accacctccacctccaccacctccacctccacctccaccacct------------ccacc +tccacctccaccaccacctccaccacctccaccacctccaccacctccacctccaccacc +tccaccacctccaccaccaccaccaccaccacctccaccaccacctccacca---ccacc +tccaccaccacctccaccaccacctccaccacctccacctccacctccaccacctccacc +acctccaccaccaccaccaccaccaccacctccaccaccacctccaccaccaccacctcc +acctccaccaccaccacctccaccacctccaccacctccaccaccaccaccaccaccacc +tccacctccaccacctccacctccacctccaccacctccacct---------ccacctcc +accaccacctccaccacctccaccacctccaccacctccacctccaccacctccaccacc +tccaccaccaccaccaccaccacctccaccaccacctccaccaccacctccaccaccacc +tccaccaccacctccaccacctccacctccacctccaccacct---ccacctccacctcc +accaccaccacct---ccacctccaccaccacctccaccaccaccacctccacctccacc +acctccaccaccagcaccacctccaccaccaccacctccacctccacctccacctccacc +tccacctccacctccaccaccacctccacctccaccacctccacct-------------- +----ccaccaccaccaccaccaccaccaccaccaccacctccacc--------------- +---------------------------------------------------tccaccacc +acctccaccacctccaccacctccaccaccaccacctccaccacctccacct-------- +------------------------------------------------------------ +----------------ccaccacctccaccaccacctccaccaccaccacctccacctcc +acca---------------------------------ccacctccaccacctccaccacc +tccaccaccaccaccaccacctccaccacctccaccaccaccacctccacctccacctcc +accaccaccaccaccacctccaccacctccaccaccaccacctccacctccaccaccacc +tccaccacctccaccaccaccaccaccacctccaccacctccaccaccaccacctcca-- +----------ccaccacctccaccaccaccacctccacctccaccaccacctccaccacc +tccaccacctccaccacctccacctccacctccaccacctccaccaccacctccaccacc +tccaccaccacctccaccacctccacctccacctccacct-------------------- +------------------------------------------------------------ +----------------------------ccacctccaccaccacctccaccacctccacc +accacctccacctccaccacctccacctccacctccaccacctccaccaccacctccacc +acctccaccacctccaccaccaccacca------------------------------cc +acctccaccacctccaccaccacctccaccacctccacctccacctccacct-------- +----------ccacctccaccaccacctccaccacct---ccaccaccacctccaccacc +acctccaccacctccacctccacctccacct----------------------------- +-------------ccacctccaccaccacctccaccacctccaccaccacctccacctcc +accacctccacctccacctccaccacctccaccaccacctccaccacctccaccaccacc +tccacct----------------------------------------------------- +-------ccaccacctccaccaccacctccaccacctccaccacctccaccaccaccacc +accacct----------------------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-------------ccaccacctccaccaccacctccaccacctccacctccacctccacc +t---------------------------------ccacctccaccaccacctccaccacc +tccaccaccacctccacctccaccacctccacctccacctccaccacctccaccaccacc +acctccaccaccacctccaccaccaccacctccacctccaccaccacctccaccacctc- +------------------------------------------------------------ +------------------------------------------------------------ +--------------------------------caccacctccaccaccacctccaccacc +tccaccacctccaccaccaccacc---------------------------------acc +acctccaccacctccaccaccacctccaccaccaccacctccacctccacctccaccacc +tccaccacctccaccaccacctccaccacctccaccacctccaccaccacctccaccacc +tcca-------------------------------------------------------- +------------------------------------------------------------ +----------------------------ccacctccacctccaccaccaccacctccacc +acctccaccaccacctccaccacct----------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-------------------ccacctccacctccacctccacctccaccaccacctccacc +acctccaccaccacctccacctccaccacctccacctccacctccaccacctccacctcc +acctccacctccaccaccaccacctccacctccacctccacctccaccacgtccacca-- +------------------------------------------------------------ +----------------------------------------------------cctccacc +acctccacctccacctccacctccacctccaccacctccacctccacctccaccacctcc +acctccacctccacctccaccacgtccaccaccaccacctccaccaccaccaccaccacc +t----------------------------------------------------------- +------------------------------------------------------------ +----------------------ccaccaccacctccacctccaccaccacctccaccacc +acctccaccaccaccacct---------------ccacctccaccaccaccacctccacc +tccaccaccaccaccaccaccacctccaccaccacctccacctccacca---ccacctcc +acctccaccacctccaccaccagcaccaccaccacctccaccaccaccacctccacctcc +accaccaccaccaccaccacct---------------------ccaccaccacctccacc +tccacctccacctccaccaccacctccaccaccacctccacca----------------- +------------------------------------------------------------ +-------------------------------------------------cctccaccacc +acctccacctccaccacctccacctccacctccaccacctccacctccacctccaccacc +tccacct----------------------------------------------------- +-------------ccacctccacctccaccaccaccacct-------------------- +-------------------------------ccacctccacctccacctccaccacgtcc +accacctccaccacctccacctccacctccacctccacctccaccacct----------- +-------------------------------ccacctccacctccaccacctccacctcc +acctccacctccaccacgtccaccaccaccacctccaccaccaccaccaccacctcca-- +-------------------ccaccacctccacctccaccaccacctccaccaccacctcc +accaccaccacctccacctccaccaccaccacctccacctccaccaccaccaccaccacc +a----------------------------------------------------------- +-------------------------------------------------------cctcc +accaccacctccacctccaccaccacctccacctccaccacctccaccaccaccaccacc +tccacctccacca------------------ccaccacctccacctccacctccacctcc +accaccacctccaccaccacctccaccaccaccacctccacctccacca----------- +-ccaccaccaccaccacctccaccaccacctccacctccaccaccacctccaccaccacc +tccaccacctccaccaccaccaccacctccacctccaccaccaccacctccacctccacc +accaccaccaccaccacctccaccaccacctccacctccaccaccacctccaccacctcc +acctccacctccacctccacctccaccacctccacctccacctccaccaccaccaccacc +tccaccacct-------------------------------------------------- +-ccaccaccacctccaccacctccacctccacctccacctccaccaccacctccaccacc +tccaccacca---------------------------------cctccaccacctccacc +acctccacctccaccaccacctccacctccacctccaccacctccaccaccacca----- +----------------------------------ccaccaccacctccaccaccacctcc +accaccacctccacctccacct------ccaccacctccaccaccacctccaccaccacc +accacctccacctccaccacctccacctccacctccacca-------------------- +-------------------------------------------------ccacctccacc +acctccaccaccaccaccaccacctccaccacctccacctccacct-------------- +------------------------------------------------------------ +----------------ccacctccacctccaccacctccacctccacctccaccaccacc +accacctccaccacctccacca-------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +----------------------------------ccacctccaccacctccacctccacc +tccacctccaccaccacctccaccacctccaccaccacctccaccacctccaccacctcc +acct------------------------------------ccaccaccacctccacctcc +acctccaccacctccaccaccaccaccaccaccacctccaccttcaccaccaccaccacc +tccaccacctccaccaccacctcca----------------------------------- +----------------------------------ccaccacctccaccaccacctcctcc +acctccaccacctccaccaccaccaccacctccacctccaccagcagcagcatcacttgt +tggggagaccctgtgcaactccatgcacagccctgtccctgccatagccccgacccctaa +gcacagccctgtccaactgccacacgtcccctgcctcccatgcatggtcctggggggtca +actgcacacgccagggtcctagggtcctagacccctgtcctccctgtttctgcctctgtt +tggggtggagtccaagtct +>p:HG002_1_chr20:63693225-63693984 +tcggcctgagcacagcattccccgaaggcagcgggcacagccggtcctggaccccactgg +taaatggggccccaggtgggaccctcagactcctgcgtggaaggcagtgtgggccagagt +cctgggctgcttggggtgggcatcctcgggccctgcttggccccgcctctctgttcccct +atgggagtgatgggggcctccacctccaccaccagcaccagcagcaccacctccacca-- +-------------------------------cctccacctccaccacct----------- +----------------ccacctccacctccaccaccacctccacctccacctccacctcc +acctccaccaccaccaccaccacctccaccacct------ccacctccacctccacctcc +acctccacctccacctccacctccaccacgtccaccacctccacca---ccacctccacc +accacctccacctccacctccacctccaccaccacctccaccaccacctccaccaccacc +tccaccaccacctccaccaccaccaccaccaccaccacctccaccacctccaccacctcc +acctccacctccaccaccaccacctccaccacctccaccacctccaccaccaccaccacc +accacctccacctccaccacctccacctccacctccaccacct------------ccacc +tccacctccaccaccacctccaccacctccaccacctccaccacctccacctccaccacc +tccaccacctccaccaccaccaccaccaccacctccaccaccacctccacca---ccacc +tccaccaccacctccaccaccacctccaccacctccacctccacctccaccacctccacc +acctccaccaccaccaccaccaccaccacctccaccaccacctccaccaccaccacctcc +acctccaccaccaccacctccaccacctccaccacctccaccaccaccaccaccaccacc +tccacctccaccacctccacctccacctccaccacctccacct---------ccacctcc +accaccacctccaccacctccaccacctccaccacctccacctccaccacctccaccacc +tccaccaccaccaccaccaccacctccaccaccacctccaccaccacctccaccaccacc +tccaccaccacctccaccacctccacctccacctccaccacct---ccacctccacctcc +accaccaccacct---ccacctccaccaccacctccaccaccaccacctccacctccacc +acctccaccaccagcaccacctccaccaccaccacctccacctccacctccacctccacc +tccacctccacctccaccaccacctccacctccaccacctccacct-------------- +----ccaccaccaccaccaccaccaccaccaccaccacctccacc--------------- +---------------------------------------------------tccaccacc +acctccaccacctccaccacctccaccaccaccacctccaccacctccacct-------- +------------------------------------------------------------ +----------------ccaccacctccaccaccacctccaccaccaccacctccacctcc +acca---------------------------------ccacctccaccacctccaccacc +tccaccaccaccaccaccacctccaccacctccaccaccaccacctccacctccacctcc +accaccaccaccaccacctccaccacctccaccaccaccacctccacctccaccaccacc +tccaccacctccaccaccaccaccaccacctccaccacctccaccaccaccacctcca-- +----------ccaccacctccaccaccaccacctccacctccaccaccacctccaccacc +tccaccacctccaccacctccacctccacctccaccacctccaccaccacctccaccacc +tccaccaccacctccaccacctccacctccacctccacct-------------------- +------------------------------------------------------------ +----------------------------ccacctccaccaccacctccaccacctccacc +accacctccacctccaccacctccacctccacctccaccacctccaccaccacctccacc +acctccaccacctccaccaccaccacca------------------------------cc +acctccaccacctccaccaccacctccaccacctccacctccacctccacct-------- +----------ccacctccaccaccacctccaccacct---ccaccaccacctccaccacc +acctccaccacctccacctccacctccacct----------------------------- +-------------ccacctccaccaccacctccaccacctccaccaccacctccacctcc +accacctccacctccacctccaccacctccaccaccacctccaccacctccaccaccacc +tccacct----------------------------------------------------- +-------ccaccacctccaccaccacctccaccacctccaccacctccaccaccaccacc +accacct----------------------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-------------ccaccacctccaccaccacctccaccacctccacctccacctccacc +t---------------------------------ccacctccaccaccacctccaccacc +tccaccaccacctccacctccaccacctccacctccacctccaccacctccaccaccacc +acctccaccaccacctccaccaccaccacctccacctccaccaccacctccaccacctc- +------------------------------------------------------------ +------------------------------------------------------------ +--------------------------------caccacctccaccaccacctccaccacc +tccaccacctccaccaccaccacc---------------------------------acc +acctccaccacctccaccaccacctccaccaccaccacctccacctccacctccaccacc +tccaccacctccaccaccacctccaccacctccaccacctccaccaccacctccaccacc +tcca-------------------------------------------------------- +------------------------------------------------------------ +----------------------------ccacctccacctccaccaccaccacctccacc +acctccaccaccacctccaccacct----------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-------------------ccacctccacctccacctccacctccaccaccacctccacc +acctccaccaccacctccacctccaccacctccacctccacctccaccacctccacctcc +acctccacctccaccaccaccacctccacctccacctccacctccaccacgtccacca-- +------------------------------------------------------------ +----------------------------------------------------cctccacc +acctccacctccacctccacctccacctccaccacctccacctccacctccaccacctcc +acctccacctccacctccaccacgtccaccaccaccacctccaccaccaccaccaccacc +t----------------------------------------------------------- +------------------------------------------------------------ +----------------------ccaccaccacctccacctccaccaccacctccaccacc +acctccaccaccaccacct---------------ccacctccaccaccaccacctccacc +tccaccaccaccaccaccaccacctccaccaccacctccacctccacca---ccacctcc +acctccaccacctccaccaccagcaccaccaccacctccaccaccaccacctccacctcc +accaccaccaccaccaccacct---------------------ccaccaccacctccacc +tccacctccacctccaccaccacctccaccaccacctccacca----------------- +------------------------------------------------------------ +-------------------------------------------------cctccaccacc +acctccacctccaccacctccacctccacctccaccacctccacctccacctccaccacc +tccacct----------------------------------------------------- +-------------ccacctccacctccaccaccaccacct-------------------- +-------------------------------ccacctccacctccacctccaccacgtcc +accacctccaccacctccacctccacctccacctccacctccaccacct----------- +-------------------------------ccacctccacctccaccacctccacctcc +acctccacctccaccacgtccaccaccaccacctccaccaccaccaccaccacctcca-- +-------------------ccaccacctccacctccaccaccacctccaccaccacctcc +accaccaccacctccacctccaccaccaccacctccacctccaccaccaccaccaccacc +a----------------------------------------------------------- +-------------------------------------------------------cctcc +accaccacctccacctccaccaccacctccacctccaccacctccaccaccaccaccacc +tccacctccacca------------------ccaccacctccacctccacctccacctcc +accaccacctccaccaccacctccaccaccaccacctccacctccacca----------- +-ccaccaccaccaccacctccaccaccacctccacctccaccaccacctccaccaccacc +tccaccacctccaccaccaccaccacctccacctccaccaccaccacctccacctccacc +accaccaccaccaccacctccaccaccacctccacctccaccaccacctccaccacctcc +acctccacctccacctccacctccaccacctccacctccacctccaccaccaccaccacc +tccaccacct-------------------------------------------------- +-ccaccaccacctccaccacctccacctccacctccacctccaccaccacctccaccacc +tccaccacca---------------------------------cctccaccacctccacc +acctccacctccaccaccacctccacctccacctccaccacctccaccaccacca----- +----------------------------------ccaccaccacctccaccaccacctcc +accaccacctccacctccacct------ccaccacctccaccaccacctccaccaccacc +accacctccacctccaccacctccacctccacctccacca-------------------- +-------------------------------------------------ccacctccacc +acctccaccaccaccaccaccacctccaccacctccacctccacct-------------- +------------------------------------------------------------ +----------------ccacctccacctccaccacctccacctccacctccaccaccacc +accacctccaccacctccacca-------------------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +----------------------------------ccacctccaccacctccacctccacc +tccacctccaccaccacctccaccacctccaccaccacctccaccacctccaccacctcc +acct------------------------------------ccaccaccacctccacctcc +acctccaccacctccaccaccaccaccaccaccacctccaccttcaccaccaccaccacc +tccaccacctccaccaccacctcca----------------------------------- +----------------------------------ccaccacctccaccaccacctcctcc +acctccaccacctccaccaccaccaccacctccacctccaccagcagcagcatcacttgt +tggggagaccctgtgcaactccatgcacagccctgtccctgccatagccccgacccctaa +gcacagccctgtccaactgccacacgtcccctgcctcccatgcatggtcctggggggtca +actgcacacgccagggtcctagggtcctagacccctgtcctccctgtttctgcctctgtt +tggggtggagtccaagtct +>p:HG002_2_chr20:63693225-63693984 +tcggcctgagcacagcattccccgaaggcagcgggcacagccggtcctggaccccactgg +taaatggggccccaggtgggaccctcagactcctgcgtggaaggcagtgtgggccagagt +cctgggctgcttggggtgggcatcctcgggccctgcttggccccgcctctctgttcccct +atgggagtgatgggggcctccacctccaccaccagcaccagcagcaccacctccaccttc +accaccaccacctccaccaccacctccaccacctccacctccaccacctccaccacctcc +accaccaccacctccaccaccaccacctccaccaccaccaccaccaccacctccaccacc +accaccaccaccaccacctccacctccaccacctccaccaccacctccacctccaccacc +acctccacctccacctccaccaccaccacctccaccacctccaccacctccacctccacc +accacctccacctccaccaccacctccacctccacctccacctccacctccaccaccacc +acctccaccacctccaccacctccacctccaccaccaccacctccacctccacctccacc +acctccacctccaccacctccacctccaccacctccaccaccaccaccaccaccaccacc +acctccaccaccaccaccaccaccacctccaccacctccacctccaccacctccaccacc +accacctccaccaccacctccaccaccacctccacctccaccacctccacctccaccacc +tccacctccacctccaccaccacctccaccacctccaccaccacctccaccacctccacc +tccaccaccaccacctccaccaccaccaccaccaccacctccaccacctccacctccacc +acctccaccaccaccacctccaccaccacctccaccaccacctccacctccaccacctcc +acctccaccacctccacctccacctccaccaccacctccaccacctccaccaccacctcc +accacctccaccacctccaccaccacctccaccacctccacctccaccaccaccacctcc +acctccaccaccaccacctccaccaccacctccacctccaccaccaccacctccacctcc +accaccaccacctccaccaccacctccaccaccaccaccaccaccaccacctccacctcc +accacctccaccaccaccaccaccacctccacctccaccacctccaccacctccaccacc +accaccaccacctccaccacctccaccaccacctccaccacctccacctccaccaccacc +acctccaccaccaccacctccacctccacctccacctccacctccaccaccacctccacc +tccaccaccacctccaccaccacctccacctccaccacctccacctccacctccaccacc +tccaccaccaccaccaccaccaccaccaccaccaccacctccaccacctccacctccacc +tccaccaccaccaccaccacctccaccaccaccaccaccacctccaccacctccaccacc +acctccaccaccaccaccaccaccaccaccaccacctccaccacctccacctccacctcc +acctccaccacctccaccacctccaccaccacctccaccaccaccaccaccacctccacc +accacctccacctccaccaccacctccaccaccacctccacctccaccacctccacctcc +accacctccaccacctccacctccaccacctccacctccacctccaccacctccaccacc +accaccaccaccaccaccaccaccaccacctccaccaccaccacctccaccaccacctcc +accacctccaccaccacctccaccaccaccaccaccaccaccaccaccaccaccacctcc +accaccacctccacctccaccaccacctccaccaccacctccacctccaccacctccacc +tccaccacctccaccacctccaccaccaccaccaccacctccaccaccacctccacctcc +accaccaccaccaccaccaccacctccaccaccaccacctccaccaccacctccaccacc +tccaccaccacctccacctccaccacctccaccaccacctccaccaccacctccaccacc +accacctccaccaccacctccaccacctccaccaccacctccacctccaccacctccacc +accacctccaccaccacctccaccaccaccacctccaccaccacctccaccacctccacc +accacctccacctccaccacctccacctccacctccaccacctccaccaccaccaccacc +accaccaccaccaccaccaccaccaccaccaccacctccacctccacctccaccacctcc +acctccaccacctccaccaccacctccacctccaccacctccacctccacctccaccacc +tccacctccaccacctccacctccacctccaccacctccaccaccaccacctccaccacc +acctccaccacctccaccaccacctccacctccaccacctccaccaccacctccaccacc +acctccaccaccaccacctccaccaccacctccaccacctccaccaccacctccacctcc +accacctccacctccacctccaccacctccaccaccaccaccaccaccaccaccaccacc +accacctccaccaccaccacctccaccaccacctccaccaccaccaccaccacctccacc +accaccaccaccacctccaccaccacctccaccacctccaccaccaccaccaccaccacc +accacctccaccaccacctccaccaccaccaccaccaccaccaccacctccaccacctcc +acctccaccaccaccaccacctccaccaccaccaccaccaccacctccacctccaccacc +accacctccaccaccaccaccacctccaccaccacctccaccaccaccaccaccaccacc +accaccacctccaccacctccaccacctccaccaccacctccaccacctccaccaccacc +accaccacctccaccaccacctccaccaccaccaccaccaccaccacctccacctccacc +tccaccacctccacctccaccacctccacctccaccacctccacctccacctccaccacc +tccaccaccacctccacctccaccaccacctcctccaccaccaccacctccaccaccacc +tccaccaccaccacctccacctccaccacctccacctccaccaccaccacctccacctcc +accaccaccacctccaccacctccaccaccacctccaccacctccacctccaccaccacc +acctccaccaccaccacctccacctccaccaccaccaccaccaccacctccacctccacc +accacctccaccaccacctccacctccacctccaccacctccaccaccacctccaccacc +tccaccacctccacctccaccaccaccaccacctccaccaccaccaccaccacctctacc +acctctaccaccaccaccaccacctccaccaccaccaccaccacctccaccaccaccacc +accaccacctccaccaccacctccaccacctccaccaccaccaccaccacctctaccacc +tccacctccaccaccaccaccacctctaccaccaccaccaccacctccaccaccaccacc +acctccaccaccaccaccacctccaccaccacctccaccaccacctccaccacctccacc +accaccaccacctccacctccaccacctccaccaccacctccaccaccaccaccaccacc +acctccaccaccacctccaccacctccaccaccaccaccaccacctctaccacctccacc +tccaccaccaccaccacctccacctccaccaccaccaccaccaccaccaccaccaccacc +tccaccaccaccaccaccaccacctctaccaccaccaccaccacctccaccaccaccacc +acctccaccaccaccaccacctccaccaccacctccaccaccaccaccaccaccaccacc +acctctaccaccaccaccaccacctccaccaccaccaccacctccaccaccaccaccacc +tccaccaccacctccaccaccaccaccacctccaccaccacctccaccaccacctccacc +acctccaccaccaccaccaccacctccaccaccacctccacctccaccaccaccaccacc +acctccaccaccaccaccaccaccaccacctccaccaccacctccaccaccacctccacc +acctccaccaccaccaccaccacctccaccaccacctccacctccaccaccaccaccacc +acctccaccaccacctccaccaccaccaccaccaccacctccaccaccacctccaccacc +tccaccaccaccaccaccacctctaccacctctaccaccaccaccaccacctccaccacc +accaccacctccaccaccaccaccacctccaccaccaccacctccacctccaccaccacc +acctccaccacctccaccaccacctccaccaccaccacctccacctccaccaccaccacc +tccaccacctccaccaccaccaccaccaccaccaccacctccaccaccaccaccaccacc +acctccaccaccacctccaccacctccaccaccaccaccaccacctctaccacctctacc +accaccaccaccacctccaccaccaccaccacctccaccaccaccaccacctccaccacc +acctccaccaccaccacctccacctccaccaccaccacctccaccaccaccacctccacc +tccaccaccaccacctccaccacctccaccaccacctccaccaccaccacctccacctcc +accaccaccacctccaccacctccaccaccaccacctccaccaccaccacctccaccacc +tccaccaccaccaccaccacctccaccacctccaccacctccaccaccaccaccaccacc +tccacctccacctccaccacctccaccaccaccacctccaccaccaccaccaccaccacc +accacctccacctccacctccaccacctccacctccaccaccaccaccaccaccaccacc +accaccaccacctccacctccacctccaccacctccaccaccaccacctccaccaccacc +acctccacctccaccacctccaccaccacctccaccaccaccacctccacctccaccacc +accacctccaccaccaccacctccacctccaccaccaccacctccaccacctccaccacc +acctccaccaccaccacctccacctccaccaccaccacctccaccacctccaccaccacc +acctccaccaccaccacctccaccacctccaccaccaccaccaccacctccaccacctcc +accacctccaccaccaccaccaccacctccacctccacctccaccacctccaccaccacc +acctccaccaccaccaccaccaccacctccaccacctccaccaccaccaccaccacctcc +acctccacctccaccacctccaccaccaccacctccaccaccaccaccaccaccaccacc +accacctccacctccacctccaccacctccacctccaccaccaccaccaccaccaccacc +tccaccaccacctccacctccacctccaccacctccacctccaccaccaccaccaccacc +accaccaccaccaccacctccacctccacctccaccaccaccacctctaccaccacctcc +accaccaccaccaccaccaccaccaccacctccaccacctccaccaccaccacctccacc +accaccacctccacctccaccaccaccaccacctccaccacctccaccaccacctccacc +tccacctccaccacctccaccaccaccacctccaccaccaccaccaccaccaccacctcc +accacctccaccaccaccacctccaccaccaccacctccacctccaccacctccaccacc +tccaccaccaccaccacctccaccaccaccaccaccaccacctccacctccaccacctct +accaccacctccaccaccaccaccaccaccacctccacctccaccacctccacctccacc +acctctaccaccacctccaccaccaccacctccacctccacctccaccaccaccacctcc +acctccacctccaccaccaccaccaccaccaccacctccaccaccaccaccaccaccacc +accaccacctccacctccaccaccaccacctccaccaccaccacctccaccacctccacc +accacctccacctccaccaccaccacctccacctccacctccaccaccaccaccaccacc +tccaccaccaccacctccaccaccaacacctccacctccacctccaccaccaccaccacc +acctccacctccaccaccacctccaccaccaccacctccacctccaccaccaccacctcc +accaccacctccaccacctccaccaccaccacctccaccaccacctccaccaccaccacc +accaccacctccacctccacctccaccaccaccacctctaccaccacctccaccaccacc +accaccaccaccaccaccacctccacctccacctccaccacctccacctccaccaccacc +accaccaccaccacctccaccaccacctccacctccacctccaccacctccacctccacc +accaccaccaccaccaccaccaccaccaccacctccacctccacctccaccaccaccacc +tctaccaccacctccaccaccaccaccaccaccaccaccaccacctccaccaccaccacc +tccacctccaccaccaccacctccacctccaccaccaccacctccacctccaccaccacc +accaccaccaccacctccaccaccaccacctccaccacctccaccaccacctccaccacc +accacctccaccacctccaccaccacctccacctccaccaccaccacctccaccaccacc +acctccacctccaccaccaccacctccacctccacctccaccacctccacctccaccacc +tccaccaccaccacctccaccacctccaccaccacctccacctccaccaccaccacctcc +accaccaccacctccaccacctccaccaccacctccacctccaccaccaccacctccacc +tccacctccaccaccaccaccaccacctccactaccaccacctccacctccaccaccacc +acctccaccaccaacacctccacctccacctccacctccaccaccaccacctccacctcc +accaccaccacctccaccaccaccacctccaccacctccacctccaccaccaccacctcc +accaccacctccaccaccaccaccacctccaccaccaccacctccaccaccacctccacc +accaccacctccaccaccacctccaccaccacctccacctccaccaccaccacctgcacc +accacctccacctccaccaccaccaccacctccacctccaccagcagcagcatcacttgt +tggggagaccctgtgcaactccatgcacagccctgtccctgccatagccccgacccctaa +gcacagccctgtccaactgccacacgtcccctgcctcccatgcatggtcctggggggtca +actgcacacgccagggtcctagggtcctagacccctgtcctccctgtttctgcctctgtt +tggggtggagtccaagtct +>ref_chr20:63693225-63693984 +tcggcctgagcacagcattccccgaaggcagcgggcacagccggtcctggaccccactgg +taaatggggccccaggtgggaccctcagactcctgcgtggaaggcagtgtgggccagagt +cctgggctgcttggggtgggcatcctcgggccctgcttggccccgcctctctgttcccct +atgggagtgatgggggcctccacctccaccaccagcaccagcagcaccacctccacct-- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-------------------ccacctccacct----------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-------------------------------------------------------ccacc +tccaccaccacctccacctccaccaccacctcctccaccaccaccacctccaccaccacc +accaccaccaccacctccacctccaccacctccacctccaccaccaccacctccacct-- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-------------------------------------------------ccaccaccacc +acctccacctccaccaccacctccaccaccaccacctccacctccacca----------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +-------------------------------------------------ccacctccacc +accaccaccaccaccaccaccaccaccacca----------------------------- +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +------------------------------------------------------------ +----------------------------------ccacctccaccaccaccacctgcacc +accacctccacctccaccaccaccaccacctccacctccaccagcagcagcatcacttgt +tggggagaccctgtgcaactccatgcacagccctgtccctgccatagccccgacccctaa +gcacagccctgtccaactgccacacgtcccctgcctcccatgcatggtcctggggggtca +actgcacacgccagggtcctagggtcctagacccctgtcctccctgtttctgcctctgtt +tggggtggagtccaagtct diff --git a/repo_utils/test_files/external/fake_mafft/lookup/fm_0e637d93ff26bf168ee302bf13d84ca8.msa b/repo_utils/test_files/external/fake_mafft/lookup/fm_ff5bbed9fc29f7f0637d3929908a94a3.msa similarity index 90% rename from repo_utils/test_files/external/fake_mafft/lookup/fm_0e637d93ff26bf168ee302bf13d84ca8.msa rename to repo_utils/test_files/external/fake_mafft/lookup/fm_ff5bbed9fc29f7f0637d3929908a94a3.msa index b9fb50de..25f289a0 100644 --- a/repo_utils/test_files/external/fake_mafft/lookup/fm_0e637d93ff26bf168ee302bf13d84ca8.msa +++ b/repo_utils/test_files/external/fake_mafft/lookup/fm_ff5bbed9fc29f7f0637d3929908a94a3.msa @@ -1,4 +1,4 @@ ->syndip_1_chr20:63641679-63642236 +>syndip_1_chr20:63641679-63642235 tctattactgcggctagttactgtcccgccaggaccagactctggacctgcctcgtgcgc tgctggggacgcccagtaaacacgggaggagcccccgacccccaccccagctcagcgcct cggagtccccggccccgctctgcgcccctccgagctccgccctagccccgcccccgccca @@ -13,8 +13,8 @@ gccccggcccctgcccgctccgagctccgccccggccccgccccggcccctgcccgctcc gagctccgccccggccccgccccggcccctgcccgctccgagcttcgccccggccccgcc ccggcccctgcccgctccgagctccgccccggccccgcccccgcaccttctcgcgcagcc gctcgcgcagtgcggccaggtgtgcctcgcggatctccttgctgagctccatcttgtagt -tgagcttctcctccgcctggcggctgaagttgttattctcctccagcgccttgtg ->syndip_2_chr20:63641679-63642236 +tgagcttctcctccgcctggcggctgaagttgttattctcctccagcgccttgt +>syndip_2_chr20:63641679-63642235 tctattactgcggctagttactgtcccgccaggaccagactctggacctgcctcgtgcgc tgctggggacgcccagtaaacacgggaggagcccccgacccccaccccagctcagcgcct cggagtccccggccccgctctgcgcccctccgagctccgccctagccccgcccccgccca @@ -29,8 +29,8 @@ tccccggcccctgcccgctccgagctccgccccggcctcgccccggcccctgcccgctcc gagcttcgccccggccccgccccggcccctgcccgctccgagcttcgccccggccccgcc ccggcccctgcccgctccgaactccgccccggccccgcccccgcaccttctcgcgcagcc gctcgcgcagtgcggccaggtgtgcctcgcggatctccttgctgagctccatcttgtagt -tgagcttctcctccgcctggcggctgaagttgttattctcctccagcgccttgtg ->p:HG002_1_chr20:63641679-63642236 +tgagcttctcctccgcctggcggctgaagttgttattctcctccagcgccttgt +>p:HG002_1_chr20:63641679-63642235 tctattactgcggctagttactgtcccgccaggaccagactctggacctgcctcgtgcgc tgctggggacgcccagtaaacacgggaggagcccccgatccccaccccagctcagcgcct cggagtccccggccccgctctgcgcccctccgagctccgccctagccccgcccccgccca @@ -45,8 +45,8 @@ tccccggcccctgcccgctccgagctccgccccggcctcgccccggcccctgcccgctcc gagcttcgccccggccccgccccggcccctgcccgctccgagcttcgccccggccccgcc ccggcccctgcccgctccgaactccgccccggccccgcccccgcaccttctcgcgcagcc gctcgcgcagtgcggccaggtgtgcctcgcggatctccttgctgagctccatcttgtagt -tgagcttctcctccgcctggcggctgaagttgttattctcctccagcgccttgtg ->p:HG002_2_chr20:63641679-63642236 +tgagcttctcctccgcctggcggctgaagttgttattctcctccagcgccttgt +>p:HG002_2_chr20:63641679-63642235 tctattactgcggctagttactgtcccgccaggaccagactctggacctgcctcgtgcgc tgctggggacgcccagtaaacacgggaggagcccccgacccccaccccagctcagcgcct cggagtccccggccccgctctgcgcccctccgagctccgccctagccccgcccccgccca @@ -61,8 +61,8 @@ gccccggcccctgcccgctccgagctccgccccggccccgccccggcccctgcccgctcc gagctccgccccggccccgccccggcccctgcccgctccgagcttcgccccggccccgcc ccggcccctgcccgctccgagctccgccccggccccgcccccgcaccttctcgcgcagcc gctcgcgcagtgcggccaggtgtgcctcgcggatctccttgctgagctccatcttgtagt -tgagcttctcctccgcctggcggctgaagttgttattctcctccagcgccttgtg ->ref_chr20:63641679-63642236 +tgagcttctcctccgcctggcggctgaagttgttattctcctccagcgccttgt +>ref_chr20:63641679-63642235 tctattactgcggctagttactgtcccgccaggaccagactctggacctgcctcgtgcgc tgctggggacgcccagtaaacacgggaggagcccccgacccccaccccagctcagcgcct cggagtccccggccccgctctgcgcccctccgagctccgccctagccccgcccccgccca @@ -77,4 +77,4 @@ cgcccagtgccccgccccctgactgctgctagccctgc---------------------- --------------ccccgccccggcccctgcccgctccgagcttcgccccggccccgcc ccggcccctgcccgctccgagctccgccccggccccgcccccgcaccttctcgcgcagcc gctcgcgcagtgcggccaggtgtgcctcgcggatctccttgctgagctccatcttgtagt -tgagcttctcctccgcctggcggctgaagttgttattctcctccagcgccttgtg +tgagcttctcctccgcctggcggctgaagttgttattctcctccagcgccttgt diff --git a/repo_utils/test_files/variants/boundary.vcf.gz b/repo_utils/test_files/variants/boundary.vcf.gz new file mode 100644 index 00000000..18eca1be Binary files /dev/null and b/repo_utils/test_files/variants/boundary.vcf.gz differ diff --git a/repo_utils/test_files/variants/boundary.vcf.gz.tbi b/repo_utils/test_files/variants/boundary.vcf.gz.tbi new file mode 100644 index 00000000..e4a1af46 Binary files /dev/null and b/repo_utils/test_files/variants/boundary.vcf.gz.tbi differ diff --git a/repo_utils/test_files/variants/boundary_cpx.vcf.gz b/repo_utils/test_files/variants/boundary_cpx.vcf.gz new file mode 100644 index 00000000..b5c7614e Binary files /dev/null and b/repo_utils/test_files/variants/boundary_cpx.vcf.gz differ diff --git a/repo_utils/test_files/variants/boundary_cpx.vcf.gz.tbi b/repo_utils/test_files/variants/boundary_cpx.vcf.gz.tbi new file mode 100644 index 00000000..b72f4166 Binary files /dev/null and b/repo_utils/test_files/variants/boundary_cpx.vcf.gz.tbi differ diff --git a/repo_utils/truvari_ssshtests.sh b/repo_utils/truvari_ssshtests.sh index 6f7c17d0..045fcd1c 100644 --- a/repo_utils/truvari_ssshtests.sh +++ b/repo_utils/truvari_ssshtests.sh @@ -19,10 +19,12 @@ source $TESTSRC/sub_tests/consistency.sh source $TESTSRC/sub_tests/divide.sh source $TESTSRC/sub_tests/doctests.sh source $TESTSRC/sub_tests/entry_main.sh +source $TESTSRC/sub_tests/ga4gh.sh source $TESTSRC/sub_tests/phab.sh source $TESTSRC/sub_tests/refine.sh source $TESTSRC/sub_tests/segment.sh source $TESTSRC/sub_tests/stratify.sh +source $TESTSRC/sub_tests/unittest.sh source $TESTSRC/sub_tests/vcf2df.sh source $TESTSRC/sub_tests/version.sh diff --git a/truvari/__init__.py b/truvari/__init__.py index 980faaf2..3bfcb603 100644 --- a/truvari/__init__.py +++ b/truvari/__init__.py @@ -19,6 +19,7 @@ :meth:`entry_size_similarity` :meth:`entry_to_hash` :meth:`entry_to_key` +:meth:`entry_within` :meth:`entry_variant_type` Extra methods: @@ -51,6 +52,7 @@ :meth:`chunker` :meth:`cmd_exe` :meth:`consolidate_phab_vcfs` +:meth:`coords_within` :meth:`count_entries` :meth:`file_zipper` :meth:`help_unknown_cmd` @@ -58,6 +60,7 @@ :meth:`opt_gz_open` :meth:`optimize_df_memory` :meth:`performance_metrics` +:meth:`region_filter` :meth:`restricted_float` :meth:`restricted_int` :meth:`setup_logging` @@ -85,7 +88,7 @@ :data:`truvari.SZBINTYPE` """ -__version__ = '4.2.0' +__version__ = '4.2.1' from truvari.annotations.af_calc import ( @@ -101,6 +104,7 @@ ) from truvari.comparisons import ( + coords_within, create_pos_haplotype, entry_boundaries, entry_distance, @@ -116,6 +120,7 @@ entry_to_hash, entry_to_key, entry_variant_type, + entry_within, overlap_percent, overlaps, reciprocal_overlap, @@ -141,7 +146,8 @@ from truvari.region_vcf_iter import ( RegionVCFIterator, - build_anno_tree + build_anno_tree, + region_filter, ) from truvari.stratify import ( diff --git a/truvari/__main__.py b/truvari/__main__.py index 0b0a138c..e079d05e 100755 --- a/truvari/__main__.py +++ b/truvari/__main__.py @@ -20,6 +20,7 @@ from truvari.stratify import stratify_main from truvari.segmentation import segment_main from truvari.consistency import consistency_main +from truvari.make_ga4gh import make_ga4gh_main def flat_version(args): """Print the version""" @@ -39,6 +40,7 @@ def flat_version(args): "divide": divide_main, "phab": phab_main, "refine": refine_main, + "ga4gh": make_ga4gh_main, "version": flat_version} USAGE = f"""\ @@ -55,6 +57,7 @@ def flat_version(args): [bold][cyan]divide[/][/] Divide a VCF into independent shards [bold][cyan]phab[/][/] Variant harmonization using MSA [bold][cyan]refine[/][/] Automated bench result refinement with phab + [bold][cyan]ga4gh[/][/] Convert Truvari result to GA4GH [bold][cyan]version[/][/] Print the Truvari version and exit """ diff --git a/truvari/bench.py b/truvari/bench.py index 9f829d37..67891b3d 100644 --- a/truvari/bench.py +++ b/truvari/bench.py @@ -52,7 +52,7 @@ def parse_args(args): thresg.add_argument("-t", "--typeignore", action="store_true", default=defaults.typeignore, help="Don't compare variant types (%(default)s)") thresg.add_argument("--pick", type=str, default=defaults.pick, choices=PICKERS.keys(), - help="Number of matches reported per-call") + help="Number of matches reported per-call (%(default)s)") thresg.add_argument("--dup-to-ins", action="store_true", help="Assume DUP svtypes are INS (%(default)s)") thresg.add_argument("-C", "--chunksize", type=truvari.restricted_int, default=defaults.chunksize, @@ -584,7 +584,7 @@ def check_refine_candidate(self, result): if match.comp is not None: chrom = match.comp.chrom pos.extend(truvari.entry_boundaries(match.comp)) - if has_unmatched and pos: # I don't think I need to confirm pos, but unsure + if has_unmatched and pos: self.refine_candidates.append(f"{chrom}\t{min(*pos)}\t{max(*pos)}") diff --git a/truvari/collapse.py b/truvari/collapse.py index 816a7c0f..de0989ab 100644 --- a/truvari/collapse.py +++ b/truvari/collapse.py @@ -15,6 +15,7 @@ from functools import cmp_to_key, partial import pysam +import numpy as np from intervaltree import IntervalTree import truvari @@ -30,6 +31,7 @@ class CollapsedCalls(): match_id: str matches: list = field(default_factory=list) gt_consolidate_count: int = 0 + genotype_mask: str = "" # bad def combine(self, other): """ @@ -66,6 +68,32 @@ def annotate_entry(self, header, med_info): if med_info: self.entry.info["CollapseStart"], self.entry.info["CollapseEnd"], self.entry.info["CollapseSize"] = self.calc_median_sizepos() + @staticmethod + def make_genotype_mask(entry, gtmode): + """ + Populate the genotype mask + """ + if gtmode == 'off': + return None + to_mask = (lambda x: 1 in x) if gtmode == 'all' else ( + lambda x: x.count(1) == 1) + return np.array([to_mask(_.allele_indices) for _ in entry.samples.values()], dtype=bool) + + def gt_conflict(self, other, which_gt): + """ + Return true if entry's genotypes conflict with any of the current collapse + which_gt all prevents variants present in the same sample from being collapsed + which_gt het only prevents two het variants from being collapsed. + """ + if which_gt == 'off': + return False + + o_mask = self.make_genotype_mask(other, which_gt) + if (self.genotype_mask & o_mask).any(): + return True + self.genotype_mask |= o_mask + return False + def chain_collapse(cur_collapse, all_collapse, matcher): """ @@ -98,10 +126,12 @@ def collapse_chunk(chunk, matcher): call_id += 1 m_collap = CollapsedCalls(remaining_calls.pop(0), f'{chunk_id}.{call_id}') - unmatched = [] - # Sort based on size difference of current call - remaining_calls.sort(key=partial(relative_size_sorter, m_collap.entry)) - for candidate in remaining_calls: + # quicker genotype comparison - needs to be refactored + m_collap.genotype_mask = m_collap.make_genotype_mask( + m_collap.entry, matcher.gt) + + # Sort based on size difference to current call + for candidate in sorted(remaining_calls, key=partial(relative_size_sorter, m_collap.entry)): mat = matcher.build_match(m_collap.entry, candidate, m_collap.match_id, @@ -109,26 +139,23 @@ def collapse_chunk(chunk, matcher): short_circuit=True) if matcher.hap and not hap_resolve(m_collap.entry, candidate): mat.state = False - if mat.state and gt_conflict(m_collap, candidate, matcher.gt): + if mat.state and m_collap.gt_conflict(candidate, matcher.gt): mat.state = False if mat.state: m_collap.matches.append(mat) - else: - unmatched.append(candidate) # Does this collap need to go into a previous collap? if not matcher.chain or not chain_collapse(m_collap, ret, matcher): ret.append(m_collap) - remaining_calls = unmatched # If hap, only allow the best match if matcher.hap and m_collap.matches: mats = sorted(m_collap.matches, reverse=True) m_collap.matches = [mats.pop(0)] - remaining_calls.extend(mat.comp for mat in mats) - # Sort based on the desired sorting to choose the next one - remaining_calls.sort(key=matcher.sorter) + # Remove everything that was used + to_rm = [_.comp for _ in m_collap.matches] + remaining_calls = [_ for _ in remaining_calls if _ not in to_rm] if matcher.no_consolidate: for val in ret: @@ -146,12 +173,14 @@ def collapse_chunk(chunk, matcher): ret.sort(key=cmp_to_key(lambda x, y: x.entry.pos - y.entry.pos)) return ret + def relative_size_sorter(base, comp): """ Sort calls based on the absolute size difference of base and comp """ return abs(truvari.entry_size(base) - truvari.entry_size(comp)) + def collapse_into_entry(entry, others, hap_mode=False): """ Consolidate information for genotypes where sample is unset @@ -214,6 +243,7 @@ def gt_conflict(cur_collapse, entry, which_gt): Return true if entry's genotypes conflict with any of the current collapse which_gt all prevents variants present in the same sample from being collapsed which_gt het only prevents two het variants from being collapsed. + Might be deprecated, now? """ if which_gt == 'off': return False @@ -245,6 +275,7 @@ def checker(base, comp): return False + def get_ac(gt): """ Helper method to get allele count. assumes only 1s as ALT diff --git a/truvari/comparisons.py b/truvari/comparisons.py index 332bbfd9..8dea1a47 100644 --- a/truvari/comparisons.py +++ b/truvari/comparisons.py @@ -10,6 +10,31 @@ import truvari +def coords_within(qstart, qend, rstart, rend, end_within): + """ + Returns if a span is within the provided [start, end). All coordinates assumed 0-based + + :param `qstart`: query start position + :type `qstart`: integer + :param `qend`: query end position + :type `qend`: integer + :param `start`: start of span + :type `start`: integer + :param `end`: end of span + :type `end`: integer + :param `end_within`: if true, qend <= rend, else qend < rend + :type `end_within`: bool + + :return: If the coordinates are within the span + :rtype: bool + """ + if end_within: + ending = qend <= rend + else: + ending = qend < rend + return qstart >= rstart and ending + + def create_pos_haplotype(a1, a2, ref, min_len=0): """ Create haplotypes of two allele's regions that are assumed to be overlapping @@ -459,6 +484,13 @@ def entry_variant_type(entry): return truvari.get_svtype(mat.groupdict()["SVTYPE"]) return truvari.get_svtype("UNK") +def entry_within(entry, rstart, rend): + """ + Extract entry boundaries and type to call `coords_within` + """ + qstart, qend = truvari.entry_boundaries(entry) + end_within = truvari.entry_variant_type(entry) != truvari.SV.INS + return coords_within(qstart, qend, rstart, rend, end_within) def overlap_percent(astart, aend, bstart, bend): """ diff --git a/truvari/make_ga4gh.py b/truvari/make_ga4gh.py new file mode 100644 index 00000000..3df8a789 --- /dev/null +++ b/truvari/make_ga4gh.py @@ -0,0 +1,194 @@ +""" +Consolidate truvari by truth/query and annotate with GA4GH intermediates tags +""" +import os +import sys +import json +import logging +import argparse +from collections import defaultdict + +import pysam +import pandas as pd +from intervaltree import IntervalTree + +import truvari + +def parse_args(args): + """ + Argument parser + """ + parser = argparse.ArgumentParser(prog="ga4gh", description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter) + parser.add_argument("-i", "--input", required=True, + help="Truvari result directory") + parser.add_argument("-o", "--output", required=True, + help="Output suffix") + parser.add_argument("-w", "--with-refine", action="store_true", + help="Consolidate with `truvari refine` output") + parser.add_argument("-B", "--buffer", type=int, default=100, + help="Buffer length used by phab during refine (100)") + parser.add_argument("-b", "--bSample", default=0, + help="Sample name to annotate in truth VCF (default first)") + parser.add_argument("-c", "--cSample", default=0, + help="Sample name to annotate in query VCF (default first)") + + args = parser.parse_args(args) + return args + +def edit_header(header): + """ + Add INFO for new fields to vcf + """ + header.add_line(('##INFO=')) + header.add_line(('##FORMAT=')) + header.add_line(('##FORMAT=')) + return header + +def build_tree(regions, buffer=0): + """ + Build tree from regions + """ + tree = defaultdict(IntervalTree) + for _, i in regions.iterrows(): + tree[i['chrom']].addi(i['start'] - buffer, i['end'] + buffer + 1) + for i in tree: + tree[i].merge_overlaps() + return tree + +def get_truvari_filenames(in_dir): + """ + Join in_dir to relevant filenames + """ + return {'tp-base': os.path.join(in_dir, 'tp-base.vcf.gz'), + 'tp-comp': os.path.join(in_dir, 'tp-comp.vcf.gz'), + 'fn': os.path.join(in_dir, 'fn.vcf.gz'), + 'fp': os.path.join(in_dir, 'fp.vcf.gz'), + 'params': os.path.join(in_dir, 'params.json')} + + +def move_record(entry, out_vcf, sample): + """ + Move the new entry (with the single specified sample) + to be writeable to the out_vcf + """ + ret = out_vcf.new_record(contig=entry.contig, start=entry.start, stop=entry.stop, alleles=entry.alleles, + id=entry.id, qual=entry.qual, filter=entry.filter, info=entry.info) + for k,v in entry.samples[sample].items(): + ret.samples[0][k] = v + return ret + +def parse_bench_dir(in_dir, t_vcf, q_vcf, tree, is_refined): + """ + Pull relevant entries from relevant files + """ + in_vcfs = get_truvari_filenames(in_dir) + params = None + with open(in_vcfs['params'], 'r') as fh: + params = json.load(fh) + bsamp = params['bSample'] + csamp = params['cSample'] + ops_to_do = [("tp-base", "TP", t_vcf, bsamp), ("fn", "FN", t_vcf, bsamp), + ("tp-comp", "TP", q_vcf, csamp), ("fp", "FP", q_vcf, csamp)] + for filename, bdkey, out_vcf, samp in ops_to_do: + m_vcf = pysam.VariantFile(in_vcfs[filename]) + m_iter = m_vcf if tree is None else truvari.region_filter(m_vcf, tree, is_refined) + for entry in m_iter: + n_entry = move_record(entry, out_vcf, samp) + n_entry.info["IsRefined"] = is_refined + n_entry.samples[0]["BD"] = bdkey + if bdkey.startswith('T'): + n_entry.samples[0]["BK"] = 'gm' if entry.info["GTMatch"] == 0 else 'am' + else: + n_entry.samples[0]["BK"] = '.' if entry.info["TruScore"] is None else 'lm' + out_vcf.write(n_entry) + +def check_bench_dir(dirname): + """ + Make sure all the files are there + """ + check_fail = False + b_files = get_truvari_filenames(dirname) + for k, v in b_files.items(): + if not os.path.exists(v): + logging.error("%s bench file doesn't exist", k) + check_fail = True + return check_fail + + +def check_args(args): + """ + Ensure everything we're looking for is available + return True if check failed + """ + check_fail = False + if not os.path.isdir(args.input): + logging.error("input is not a directory") + check_fail = True + else: + check_fail |= check_bench_dir(args.input) + + if os.path.exists(args.output + '_truth.vcf.gz'): + logging.error("%s_truth.vcf.gz already exists", args.output) + check_fail = True + if os.path.exists(args.output + '_truth.vcf.gz.tbi'): + logging.error("%s_truth.vcf.gz.tbi already exists", args.output) + check_fail = True + if os.path.exists(args.output + '_query.vcf.gz'): + logging.error("%s_query.vcf.gz already exists", args.output) + check_fail = True + if os.path.exists(args.output + '_query.vcf.gz.tbi'): + logging.error("%s_query.vcf.gz.tbi already exists", args.output) + check_fail = True + + if not args.with_refine: + return check_fail + + pdir = os.path.join(args.input, 'phab_bench') + if not os.path.exists(pdir): + logging.error("phab_bench dir doesn't exist") + check_fail = True + else: + check_fail |= check_bench_dir(pdir) + + return check_fail + +def make_ga4gh_main(args): + """ + Main entrypoint + """ + args = parse_args(args) + truvari.setup_logging() + + if check_args(args): + logging.error("Unable to do conversion") + sys.exit(1) + + logging.info("Consolidating VCFs") + + bench_vcfs = get_truvari_filenames(args.input) + + b_header = edit_header(pysam.VariantFile(bench_vcfs['tp-base'], 'r').header.copy()) + t_vcf_fn = args.output + '_truth.vcf' + t_vcf = pysam.VariantFile(t_vcf_fn, 'w', header=b_header) + + c_header = edit_header(pysam.VariantFile(bench_vcfs['tp-comp'], 'r').header.copy()) + q_vcf_fn = args.output + '_query.vcf' + q_vcf = pysam.VariantFile(q_vcf_fn, 'w', header=c_header) + + refine_tree = None + if args.with_refine: + regions = pd.read_csv(os.path.join(args.input, "refine.regions.txt"), sep='\t') + refine_tree = build_tree(regions[regions['refined']], args.buffer) + parse_bench_dir(os.path.join(args.input, 'phab_bench'), t_vcf, q_vcf, + tree=refine_tree, is_refined=True) + + parse_bench_dir(args.input, t_vcf, q_vcf, tree=refine_tree, is_refined=False) + t_vcf.close() + q_vcf.close() + truvari.compress_index_vcf(t_vcf_fn) + truvari.compress_index_vcf(q_vcf_fn) + logging.info("Finished") diff --git a/truvari/matching.py b/truvari/matching.py index 4dc99112..68827a43 100644 --- a/truvari/matching.py +++ b/truvari/matching.py @@ -90,6 +90,8 @@ def __init__(self, args=None): self.reference = None if self.params.reference is not None: + #sys.stderr.write("WARNING `--reference` is no longer recommended for use with bench/collapse ") + #sys.stderr.write("results will be slower and less accurate.\n") self.reference = pysam.FastaFile(self.params.reference) @staticmethod @@ -170,11 +172,11 @@ def filter_call(self, entry, base=False): or (not base and size < self.params.sizefilt): return True - samp = self.params.bSample if base else self.params.cSample prefix = 'b' if base else 'c' - if (self.params.no_ref in ["a", prefix] or self.params.pick == 'ac') \ - and not truvari.entry_is_present(entry, samp): - return True + if self.params.no_ref in ["a", prefix] or self.params.pick == 'ac': + samp = self.params.bSample if base else self.params.cSample + if not truvari.entry_is_present(entry, samp): + return True return False diff --git a/truvari/msatovcf.py b/truvari/msatovcf.py index 6224d9fc..c4b18619 100644 --- a/truvari/msatovcf.py +++ b/truvari/msatovcf.py @@ -142,7 +142,7 @@ def msa2vcf(msa, anchor_base='N'): >>> import truvari >>> from truvari.phab import fasta_reader >>> msa_dir = "repo_utils/test_files/external/fake_mafft/lookup/" - >>> msa_file = "fm_ca43b50e2a5d770bb34202d8a7b62421.msa" + >>> msa_file = "fm_7bb50c57d657828978076072c80f8a1f.msa" >>> seqs = open(msa_dir + msa_file).read() >>> fasta = dict(fasta_reader(seqs)) >>> m_entries_str = truvari.msa2vcf(fasta) diff --git a/truvari/phab.py b/truvari/phab.py index c7bb42ae..c5c32437 100644 --- a/truvari/phab.py +++ b/truvari/phab.py @@ -108,6 +108,8 @@ def make_consensus(data, ref_fn): vcf_fn, sample, prefix = data reference = pysam.FastaFile(ref_fn) vcf = pysam.VariantFile(vcf_fn) + # could swap these fors with data structures and more memory.. + # or I could do the work to iter chroms and pretty much -T it o_samp = 'p:' + sample if prefix else sample ret = {} for ref in list(reference.references): @@ -119,7 +121,7 @@ def make_consensus(data, ref_fn): correction = [-start, -start] for entry in vcf.fetch(chrom, start, end): # Variant must be within boundaries - if entry.start < start or entry.stop > end: + if not truvari.entry_within(entry, start, end): continue if entry.samples[sample]['GT'][0] == 1: correction[0] = incorporate(haps[0], entry, correction[0]) diff --git a/truvari/refine.py b/truvari/refine.py index 2076de7a..cadc7132 100644 --- a/truvari/refine.py +++ b/truvari/refine.py @@ -81,7 +81,7 @@ def resolve_regions(params, args): reeval_trees, count = truvari.build_anno_tree(args.regions, idxfmt="") logging.info("%d --regions loaded", count) - return [[chrom, intv.begin, intv.end] + return [[chrom, intv.begin, intv.end - 1] for chrom, all_intv in reeval_trees.items() for intv in sorted(all_intv)] @@ -143,7 +143,8 @@ def refined_stratify(outdir, to_eval_coords, regions, threads=1): """ update regions in-place with the output variant counts """ - counts = truvari.benchdir_count_entries(outdir, to_eval_coords, True, threads) + counts = truvari.benchdir_count_entries( + outdir, to_eval_coords, True, threads) counts.index = regions[regions['refined']].index counts.columns = ["out_tpbase", "out_tp", "out_fn", "out_fp"] regions = regions.join(counts) @@ -173,33 +174,30 @@ def recount_variant_report(orig_dir, phab_dir, regions): Count original variants not in refined regions and consolidate with the refined counts. """ - def falls_in_count(fn, no_count): - """ count number of variants that don't start in no_count """ - vcf = pysam.VariantFile(fn) - count = 0 - for entry in vcf: - if entry.chrom in no_count.index: - chrom = no_count.loc[entry.chrom] - if ((chrom['start'] <= entry.start) & (entry.start <= chrom['end'])).any(): - continue - count += 1 - return count + tree = defaultdict(IntervalTree) + n_regions = regions[regions["refined"]].copy() + for _, row in n_regions.iterrows(): + tree[row['chrom']].addi(row['start'], row['end'] + 1) summary = truvari.StatsBox() with open(os.path.join(phab_dir, "summary.json")) as fh: summary.update(json.load(fh)) - # if the variant starts in a refined region, skip it - no_count = regions[regions["refined"]].copy().set_index('chrom') - no_count['start'] -= PHAB_BUFFER - no_count['end'] += PHAB_BUFFER - tpb = falls_in_count(os.path.join(orig_dir, 'tp-base.vcf.gz'), no_count) + # Adding the original counts to the updated phab counts + vcf = pysam.VariantFile(os.path.join(orig_dir, 'tp-base.vcf.gz')) + tpb = len(list(truvari.region_filter(vcf, tree, False))) summary["TP-base"] += tpb - tpc = falls_in_count(os.path.join(orig_dir, 'tp-comp.vcf.gz'), no_count) + + vcf = pysam.VariantFile(os.path.join(orig_dir, 'tp-comp.vcf.gz')) + tpc = len(list(truvari.region_filter(vcf, tree, False))) summary["TP-comp"] += tpc - fp = falls_in_count(os.path.join(orig_dir, 'fp.vcf.gz'), no_count) + + vcf = pysam.VariantFile(os.path.join(orig_dir, 'fp.vcf.gz')) + fp = len(list(truvari.region_filter(vcf, tree, False))) summary["FP"] += fp - fn = falls_in_count(os.path.join(orig_dir, 'fn.vcf.gz'), no_count) + + vcf = pysam.VariantFile(os.path.join(orig_dir, 'fn.vcf.gz')) + fn = len(list(truvari.region_filter(vcf, tree, False))) summary["FN"] += fn summary["comp cnt"] += tpc + fp @@ -431,7 +429,6 @@ def refine_main(cmdargs): args.benchdir, 'refine.variant_summary.json')) report = make_region_report(regions) - regions['end'] -= 1 # Undo IntervalTree's correction regions.to_csv(os.path.join( args.benchdir, 'refine.regions.txt'), sep='\t', index=False) with open(os.path.join(args.benchdir, "refine.region_summary.json"), 'w') as fout: diff --git a/truvari/region_vcf_iter.py b/truvari/region_vcf_iter.py index c300970a..7de75c11 100644 --- a/truvari/region_vcf_iter.py +++ b/truvari/region_vcf_iter.py @@ -4,12 +4,11 @@ import sys import copy import logging -from collections import defaultdict +from collections import defaultdict, deque from intervaltree import IntervalTree import truvari - class RegionVCFIterator(): """ Helper class to specify include regions of the genome when iterating a VCF @@ -94,14 +93,13 @@ def include(self, entry): # Filter these early so we don't have to keep checking overlaps if self.max_span is not None and aend - astart > self.max_span: return False - if astart == aend - 1: - return self.tree[entry.chrom].overlaps(astart) + m_ovl = self.tree[entry.chrom].overlap(astart, aend) if len(m_ovl) != 1: return False m_ovl = list(m_ovl)[0] - # Edge case - the variant spans the entire include region - return astart >= m_ovl.begin and aend <= m_ovl.end + end_within = truvari.entry_variant_type(entry) != truvari.SV.INS + return truvari.coords_within(astart, aend, m_ovl.begin, m_ovl.end - 1, end_within) def extend(self, pad): """ @@ -140,6 +138,8 @@ def build_anno_tree(filename, chrom_col=0, start_col=1, end_col=2, one_based=Fal :type `end_col`: integer, optional :param `one_based`: True if coordinates are one-based :type `one_based`: bool, optional + :param `is_pyintv`: add 1 to end position to correct pyintervaltree.overlap behavior + :type `is_pyintv`: bool, optional :param `comment`: ignore lines if they start with this string :type `comment`: string, optional :param `idxfmt`: Index of column in file with chromosome @@ -165,3 +165,70 @@ def build_anno_tree(filename, chrom_col=0, start_col=1, end_col=2, one_based=Fal tree[chrom].addi(start, end + 1, data=m_idx) idx += 1 return tree, idx + +def region_filter(vcf, tree, inside=True, with_region=False): + """ + Given a VariantRecord iter and defaultdict(IntervalTree), + yield variants which are inside/outside the tree regions + The region associated with the entry can be retuned also when using with_region. + with_region returns (entry, (chrom, Interval)) + """ + ret_type = (lambda x,y,z: (x,(y,z))) if with_region else (lambda x,y,z: x) + for chrom, cur_tree in tree.items(): + cur_tree = deque(sorted(cur_tree)) + try: + cur_intv = cur_tree.popleft() + except IndexError: + # region-less chromosome + if not inside: + try: + for cur_entry in vcf.fetch(chrom): + yield ret_type(cur_entry, chrom, cur_intv) + except ValueError: + pass # region on chromosome not in vcf + continue + + try: + cur_iter = vcf.fetch(chrom) + except ValueError: + continue # region on chromosome not in vcf + try: + cur_entry = next(cur_iter) + except StopIteration: + # variant-less chromosome + continue + cur_start, cur_end = truvari.entry_boundaries(cur_entry) + + while True: + # if start is after this interval, we need the next interval + if cur_start > cur_intv.end: + try: + cur_intv = cur_tree.popleft() + except IndexError: + if not inside: + yield ret_type(cur_entry, chrom, cur_intv) # pass this back before flush after the while + break + # well before, we need the next entry + elif cur_end < cur_intv.begin: + if not inside: + yield ret_type(cur_entry, chrom, cur_intv) + try: + cur_entry = next(cur_iter) + cur_start, cur_end = truvari.entry_boundaries(cur_entry) + except StopIteration: + break + else: + end_within = truvari.entry_variant_type(cur_entry) != truvari.SV.INS + is_within = truvari.coords_within(cur_start, cur_end, cur_intv.begin, cur_intv.end - 1, end_within) + if is_within == inside: + yield ret_type(cur_entry, chrom, cur_intv) + try: + cur_entry = next(cur_iter) + cur_start, cur_end = truvari.entry_boundaries(cur_entry) + except StopIteration: + break + + # if we finished the intervals first, need to flush the rest of the outside entries + if not inside: + for cur_entry in cur_iter: + yield ret_type(cur_entry, chrom, cur_intv) diff --git a/truvari/stratify.py b/truvari/stratify.py index b259cfaa..e1bdbab6 100644 --- a/truvari/stratify.py +++ b/truvari/stratify.py @@ -6,10 +6,12 @@ import argparse import multiprocessing from functools import partial +from collections import defaultdict import pysam import numpy as np import pandas as pd +from intervaltree import IntervalTree import truvari @@ -45,16 +47,18 @@ def count_entries(vcf, chroms, regions, within): """ if isinstance(vcf, str): vcf = pysam.VariantFile(vcf) + tree = defaultdict(IntervalTree) + counts_idx = {} counts = [0] * len(regions) for idx, row in enumerate(zip(chroms, regions)): chrom, coords = row start, end = coords - for entry in vcf.fetch(chrom, start, end): - if within: - ent_start, ent_end = truvari.entry_boundaries(entry) - if not (start <= ent_start and ent_end <= end): - continue - counts[idx] += 1 + end += 1 + tree[chrom].addi(start, end) + counts_idx[(chrom, start, end)] = idx + for _, location in truvari.region_filter(vcf, tree, within, True): + key = (location[0], location[1].begin, location[1].end) + counts[counts_idx[key]] += 1 return counts