fix links, cron, bam dl script

AndersenLab · Apr 15, 2021 · 78d3286 · 78d3286
1 parent c1ab67b
commit 78d3286
Show file tree

Hide file tree

Showing 9 changed files with 54 additions and 43 deletions.
diff --git a/.gitignore b/.gitignore
@@ -100,3 +100,4 @@ invoke
 
 .vscode/launch.json
 cloud_functions/heritability_run/strain_data.tsv
+base/bam_bai_signed_download_script.sh
diff --git a/base/application.py b/base/application.py
@@ -127,8 +127,6 @@ def register_extensions(app):
     csrf.exempt(maintenance_bp)
     app.config['csrf'] = csrf
     jwt.init_app(app)
-    CSRFProtect(app)
-    app.config['csrf'] = CSRFProtect(app)
 
 def register_blueprints(app):
     """Register blueprints with the Flask application."""

diff --git a/base/constants.py b/base/constants.py
@@ -13,6 +13,8 @@
 STRAIN_PHOTO_PATH = 'photos/Celegans/'
 
 USER_ROLES = [('user', 'User'), ('admin', 'Admin')]
+BAM_BAI_DOWNLOAD_SCRIPT_NAME = "bam_bai_signed_download_script.sh"
+
 
 class PRICES:
   DIVERGENT_SET = 160

diff --git a/base/static/content/help/FAQ.md b/base/static/content/help/FAQ.md
@@ -45,7 +45,7 @@ __[See our filter optimization report for further details](/static/reports/filte
 
 ### How are strains grouped by isotype? <a name='strain-groups'></a>
 
-In 2012, we [published](http://dx.doi.org/10.1038/ng.1050) genome-wide variant data from reduced representation sequencing of approximately 10% of the C. elegans genome (RAD-seq). Using these data, we grouped strains into isotypes. We also found many strains that were mislabeled as wild isolates but were instead N2 derivatives, recombinants from laboratory experiments, and mutagenesis screen isolates (detailed in <a href="#strain-issues">Strain issues</a>). These strains were not characterized further. For the isotypes, we chose one strain to be the isotype reference strain. This strain can be ordered through CeNDR [here]({{ url_for('strains.strains_catalog') }}).
+In 2012, we [published](http://dx.doi.org/10.1038/ng.1050) genome-wide variant data from reduced representation sequencing of approximately 10% of the C. elegans genome (RAD-seq). Using these data, we grouped strains into isotypes. We also found many strains that were mislabeled as wild isolates but were instead N2 derivatives, recombinants from laboratory experiments, and mutagenesis screen isolates (detailed in [Strain issues]{{ url_for('strains.strains_issues') }}). These strains were not characterized further. For the isotypes, we chose one strain to be the isotype reference strain. This strain can be ordered through CeNDR [here]({{ url_for('strains.strains_catalog') }}).
 
 After 2012, with advances in genome sequencing, we transitioned our sequencing to whole-genome short-read sequencing.
 All isotype reference strains were resequenced whole-genome.  The other strains within an isotype were not,

diff --git a/base/static/content/help/Variant-Prediction.md b/base/static/content/help/Variant-Prediction.md
@@ -1,6 +1,6 @@
 # Variant Prediction
 
-We use [SnpEff](https://snpeff.sourceforge.net/) to perform variant prediction. Variants are labeled as having a LOW, MODERATE, or HIGH impact based on the location and nature of the variant. We do not include MODIFIER annotations provided by SnpEff. Variant impacts are detailed within [The Sequence Ontology Project](https://www.sequenceontology.org/).
+We use [SnpEff](https://pcingola.github.io/SnpEff/) to perform variant prediction. Variants are labeled as having a LOW, MODERATE, or HIGH impact based on the location and nature of the variant. We do not include MODIFIER annotations provided by SnpEff. Variant impacts are detailed within [The Sequence Ontology Project](http://www.sequenceontology.org/).
 
 <table class="table table-striped table-hover">
 <thead>

diff --git a/base/templates/browser.html b/base/templates/browser.html
@@ -116,7 +116,7 @@ <h5>Tracks <a href="{{ url_for('primary.help_item', filename = 'Variant-Predicti
         </div>
 
         <div class="checkbox">
-          <label><input type="checkbox" class="track-select normal-track" value="Divergent Regions" />Divergent Regions Strain<br /> (<a href="">Lee <em>et al.</em> 2020)</a></label>
+          <label><input type="checkbox" class="track-select normal-track" value="Divergent Regions" />Divergent Regions Strain<br /> (<a href="https://andersenlab.org/publications/2020LeebioRxiv.pdf">Lee <em>et al.</em> 2020)</a></label>
         </div>
 
         <h5>Variant Impact <a href="{{ url_for('primary.help_item', filename = 'Variant-Browser') }}#variant-effects"><span class="glyphicon glyphicon-question-sign"></span></a></h5>

diff --git a/base/views/data.py b/base/views/data.py
@@ -2,9 +2,9 @@
 
 from datetime import timedelta
 from simplejson.errors import JSONDecodeError
-from flask import make_response, render_template, Blueprint
+from flask import make_response, render_template, Blueprint, send_file
 
-from base.constants import GOOGLE_CLOUD_BUCKET
+from base.constants import BAM_BAI_DOWNLOAD_SCRIPT_NAME, GOOGLE_CLOUD_BUCKET
 from base.config import config
 from base.extensions import cache
 from base.models import Strain
@@ -123,25 +123,16 @@ def strain_issues(selected_release=None):
 @cache.cached(timeout=60*60*24)
 @jwt_required()
 def download_script(selected_release):
-  script_content = generate_bam_download_script(release=selected_release)
-  download_page = render_template('download_script.sh', **locals())
-  response = make_response(download_page)
-  response.headers["Content-Type"] = "text/plain"
-  return response
+  return send_file(BAM_BAI_DOWNLOAD_SCRIPT_NAME, as_attachment=True)
+
 
 
 @data_bp.route('/release/latest/download/download_strain_bams.sh')
 @data_bp.route('/release/<string:selected_release>/download/download_strain_bams.sh')
 @cache.cached(timeout=60*60*24)
 @jwt_required()
 def download_script_strain_v2(selected_release=None):
-  if selected_release is None:
-      selected_release = config['DATASET_RELEASE']
-  script_content = generate_bam_download_script(release=selected_release)
-  download_page = render_template('download_script.sh', **locals())
-  response = make_response(download_page)
-  response.headers["Content-Type"] = "text/plain"
-  return response
+  return send_file(BAM_BAI_DOWNLOAD_SCRIPT_NAME, as_attachment=True)
 
 
 @data_bp.route('/download/files/<string:blob_name>')
@@ -153,29 +144,6 @@ def download_bam_url(blob_name=''):
   return render_template('download.html', **locals())
 
 
-@cache.memoize(timeout=60*60*24)
-def generate_bam_download_script(release):
-  ''' Generates signed downloads urls for every sequenced strain and creates a script to download them ''' 
-  script_content = ''
-  expiration = timedelta(days=7)
-  strain_listing = query_strains(release=release, is_sequenced=True)
-
-  for strain in strain_listing:
-    script_content += f'\n\n# Strain: {strain}'
-
-    bam_path = 'bam/{}.bam'.format(strain)
-    bam_signed_url = generate_download_signed_url_v4(bam_path, expiration=expiration)
-    if bam_signed_url:
-      script_content += '\nwget "{}"'.format(bam_signed_url)
-
-    bai_path = 'bam/{}.bam.bai'.format(strain)
-    bai_signed_url = generate_download_signed_url_v4(bai_path, expiration=expiration)
-    if bai_signed_url:
-      script_content += '\nwget "{}"'.format(bai_signed_url)
-
-  return script_content
-
-
 # ============= #
 #   Browser     #
 # ============= #

diff --git a/base/views/maintenance.py b/base/views/maintenance.py
@@ -1,14 +1,52 @@
+from base.utils.gcloud import generate_download_signed_url_v4
+import os
 import time
+
+from datetime import timedelta
 from flask import jsonify, Blueprint
+
+from base.config import config
+from base.constants import BAM_BAI_DOWNLOAD_SCRIPT_NAME
+from base.views.api.api_strain import query_strains
 from base.utils.cache import delete_expired_cache
 
 maintenance_bp = Blueprint('maintenance',
                      __name__)
 
 
-@maintenance_bp.route('/cleanup_cache', methods=['POST'])
+@maintenance_bp.route('/cleanup_cache', methods=['GET'])
 def cleanup_cache():
   result = delete_expired_cache()
   response = jsonify({"result": result})
   response.status_code = 200
   return response
+
+@maintenance_bp.route('/create_bam_bai_download_script', methods=['GET'])
+def create_bam_bai_download_script():
+  ''' Generates signed downloads urls for every sequenced strain and creates a script to download them ''' 
+  filename = f'base/{BAM_BAI_DOWNLOAD_SCRIPT_NAME}'
+  if os.path.exists(filename):
+    os.remove(filename)
+  f = open(filename, "a")
+
+  expiration = timedelta(days=7)
+  strain_listing = query_strains(release=config["DATASET_RELEASE"], is_sequenced=True)
+
+  for strain in strain_listing:
+    f.write(f'\n\n# Strain: {strain}')
+
+    bam_path = 'bam/{}.bam'.format(strain)
+    bam_signed_url = generate_download_signed_url_v4(bam_path, expiration=expiration)
+    if bam_signed_url:
+      f.write('\nwget "{}"'.format(bam_signed_url))
+
+    bai_path = 'bam/{}.bam.bai'.format(strain)
+    bai_signed_url = generate_download_signed_url_v4(bai_path, expiration=expiration)
+    if bai_signed_url:
+      f.write('\nwget "{}"'.format(bai_signed_url))
+
+  f.close()
+
+  response = jsonify({})
+  response.status_code = 200
+  return response
diff --git a/cron.yaml b/cron.yaml
@@ -5,4 +5,8 @@ cron:
 
 - description: delete_expired_cache_entries
   url: /tasks/cleanup_cache
+  schedule: every 24 hours
+
+- description: generate_bam_bai_signed_download_script
+  url: /tasks/create_bam_bai_download_script
   schedule: every 24 hours
Original file line number	Diff line number	Diff line change
Expand Up		@@ -100,3 +100,4 @@ invoke

		.vscode/launch.json
		cloud_functions/heritability_run/strain_data.tsv
		base/bam_bai_signed_download_script.sh