Skip to content

Commit

Permalink
fix links, cron, bam dl script
Browse files Browse the repository at this point in the history
  • Loading branch information
samwachspress committed Apr 15, 2021
1 parent c1ab67b commit 78d3286
Show file tree
Hide file tree
Showing 9 changed files with 54 additions and 43 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -100,3 +100,4 @@ invoke

.vscode/launch.json
cloud_functions/heritability_run/strain_data.tsv
base/bam_bai_signed_download_script.sh
2 changes: 0 additions & 2 deletions base/application.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,8 +127,6 @@ def register_extensions(app):
csrf.exempt(maintenance_bp)
app.config['csrf'] = csrf
jwt.init_app(app)
CSRFProtect(app)
app.config['csrf'] = CSRFProtect(app)

def register_blueprints(app):
"""Register blueprints with the Flask application."""
Expand Down
2 changes: 2 additions & 0 deletions base/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
STRAIN_PHOTO_PATH = 'photos/Celegans/'

USER_ROLES = [('user', 'User'), ('admin', 'Admin')]
BAM_BAI_DOWNLOAD_SCRIPT_NAME = "bam_bai_signed_download_script.sh"


class PRICES:
DIVERGENT_SET = 160
Expand Down
2 changes: 1 addition & 1 deletion base/static/content/help/FAQ.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ __[See our filter optimization report for further details](/static/reports/filte

### How are strains grouped by isotype? <a name='strain-groups'></a>

In 2012, we [published](http://dx.doi.org/10.1038/ng.1050) genome-wide variant data from reduced representation sequencing of approximately 10% of the C. elegans genome (RAD-seq). Using these data, we grouped strains into isotypes. We also found many strains that were mislabeled as wild isolates but were instead N2 derivatives, recombinants from laboratory experiments, and mutagenesis screen isolates (detailed in <a href="#strain-issues">Strain issues</a>). These strains were not characterized further. For the isotypes, we chose one strain to be the isotype reference strain. This strain can be ordered through CeNDR [here]({{ url_for('strains.strains_catalog') }}).
In 2012, we [published](http://dx.doi.org/10.1038/ng.1050) genome-wide variant data from reduced representation sequencing of approximately 10% of the C. elegans genome (RAD-seq). Using these data, we grouped strains into isotypes. We also found many strains that were mislabeled as wild isolates but were instead N2 derivatives, recombinants from laboratory experiments, and mutagenesis screen isolates (detailed in [Strain issues]{{ url_for('strains.strains_issues') }}). These strains were not characterized further. For the isotypes, we chose one strain to be the isotype reference strain. This strain can be ordered through CeNDR [here]({{ url_for('strains.strains_catalog') }}).

After 2012, with advances in genome sequencing, we transitioned our sequencing to whole-genome short-read sequencing.
All isotype reference strains were resequenced whole-genome. The other strains within an isotype were not,
Expand Down
2 changes: 1 addition & 1 deletion base/static/content/help/Variant-Prediction.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Variant Prediction

We use [SnpEff](https://snpeff.sourceforge.net/) to perform variant prediction. Variants are labeled as having a LOW, MODERATE, or HIGH impact based on the location and nature of the variant. We do not include MODIFIER annotations provided by SnpEff. Variant impacts are detailed within [The Sequence Ontology Project](https://www.sequenceontology.org/).
We use [SnpEff](https://pcingola.github.io/SnpEff/) to perform variant prediction. Variants are labeled as having a LOW, MODERATE, or HIGH impact based on the location and nature of the variant. We do not include MODIFIER annotations provided by SnpEff. Variant impacts are detailed within [The Sequence Ontology Project](http://www.sequenceontology.org/).

<table class="table table-striped table-hover">
<thead>
Expand Down
2 changes: 1 addition & 1 deletion base/templates/browser.html
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ <h5>Tracks <a href="{{ url_for('primary.help_item', filename = 'Variant-Predicti
</div>

<div class="checkbox">
<label><input type="checkbox" class="track-select normal-track" value="Divergent Regions" />Divergent Regions Strain<br /> (<a href="">Lee <em>et al.</em> 2020)</a></label>
<label><input type="checkbox" class="track-select normal-track" value="Divergent Regions" />Divergent Regions Strain<br /> (<a href="https://andersenlab.org/publications/2020LeebioRxiv.pdf">Lee <em>et al.</em> 2020)</a></label>
</div>

<h5>Variant Impact <a href="{{ url_for('primary.help_item', filename = 'Variant-Browser') }}#variant-effects"><span class="glyphicon glyphicon-question-sign"></span></a></h5>
Expand Down
42 changes: 5 additions & 37 deletions base/views/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@

from datetime import timedelta
from simplejson.errors import JSONDecodeError
from flask import make_response, render_template, Blueprint
from flask import make_response, render_template, Blueprint, send_file

from base.constants import GOOGLE_CLOUD_BUCKET
from base.constants import BAM_BAI_DOWNLOAD_SCRIPT_NAME, GOOGLE_CLOUD_BUCKET
from base.config import config
from base.extensions import cache
from base.models import Strain
Expand Down Expand Up @@ -123,25 +123,16 @@ def strain_issues(selected_release=None):
@cache.cached(timeout=60*60*24)
@jwt_required()
def download_script(selected_release):
script_content = generate_bam_download_script(release=selected_release)
download_page = render_template('download_script.sh', **locals())
response = make_response(download_page)
response.headers["Content-Type"] = "text/plain"
return response
return send_file(BAM_BAI_DOWNLOAD_SCRIPT_NAME, as_attachment=True)



@data_bp.route('/release/latest/download/download_strain_bams.sh')
@data_bp.route('/release/<string:selected_release>/download/download_strain_bams.sh')
@cache.cached(timeout=60*60*24)
@jwt_required()
def download_script_strain_v2(selected_release=None):
if selected_release is None:
selected_release = config['DATASET_RELEASE']
script_content = generate_bam_download_script(release=selected_release)
download_page = render_template('download_script.sh', **locals())
response = make_response(download_page)
response.headers["Content-Type"] = "text/plain"
return response
return send_file(BAM_BAI_DOWNLOAD_SCRIPT_NAME, as_attachment=True)


@data_bp.route('/download/files/<string:blob_name>')
Expand All @@ -153,29 +144,6 @@ def download_bam_url(blob_name=''):
return render_template('download.html', **locals())


@cache.memoize(timeout=60*60*24)
def generate_bam_download_script(release):
''' Generates signed downloads urls for every sequenced strain and creates a script to download them '''
script_content = ''
expiration = timedelta(days=7)
strain_listing = query_strains(release=release, is_sequenced=True)

for strain in strain_listing:
script_content += f'\n\n# Strain: {strain}'

bam_path = 'bam/{}.bam'.format(strain)
bam_signed_url = generate_download_signed_url_v4(bam_path, expiration=expiration)
if bam_signed_url:
script_content += '\nwget "{}"'.format(bam_signed_url)

bai_path = 'bam/{}.bam.bai'.format(strain)
bai_signed_url = generate_download_signed_url_v4(bai_path, expiration=expiration)
if bai_signed_url:
script_content += '\nwget "{}"'.format(bai_signed_url)

return script_content


# ============= #
# Browser #
# ============= #
Expand Down
40 changes: 39 additions & 1 deletion base/views/maintenance.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,52 @@
from base.utils.gcloud import generate_download_signed_url_v4
import os
import time

from datetime import timedelta
from flask import jsonify, Blueprint

from base.config import config
from base.constants import BAM_BAI_DOWNLOAD_SCRIPT_NAME
from base.views.api.api_strain import query_strains
from base.utils.cache import delete_expired_cache

maintenance_bp = Blueprint('maintenance',
__name__)


@maintenance_bp.route('/cleanup_cache', methods=['POST'])
@maintenance_bp.route('/cleanup_cache', methods=['GET'])
def cleanup_cache():
result = delete_expired_cache()
response = jsonify({"result": result})
response.status_code = 200
return response

@maintenance_bp.route('/create_bam_bai_download_script', methods=['GET'])
def create_bam_bai_download_script():
''' Generates signed downloads urls for every sequenced strain and creates a script to download them '''
filename = f'base/{BAM_BAI_DOWNLOAD_SCRIPT_NAME}'
if os.path.exists(filename):
os.remove(filename)
f = open(filename, "a")

expiration = timedelta(days=7)
strain_listing = query_strains(release=config["DATASET_RELEASE"], is_sequenced=True)

for strain in strain_listing:
f.write(f'\n\n# Strain: {strain}')

bam_path = 'bam/{}.bam'.format(strain)
bam_signed_url = generate_download_signed_url_v4(bam_path, expiration=expiration)
if bam_signed_url:
f.write('\nwget "{}"'.format(bam_signed_url))

bai_path = 'bam/{}.bam.bai'.format(strain)
bai_signed_url = generate_download_signed_url_v4(bai_path, expiration=expiration)
if bai_signed_url:
f.write('\nwget "{}"'.format(bai_signed_url))

f.close()

response = jsonify({})
response.status_code = 200
return response
4 changes: 4 additions & 0 deletions cron.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,8 @@ cron:

- description: delete_expired_cache_entries
url: /tasks/cleanup_cache
schedule: every 24 hours

- description: generate_bam_bai_signed_download_script
url: /tasks/create_bam_bai_download_script
schedule: every 24 hours

0 comments on commit 78d3286

Please sign in to comment.