diff --git a/.envrc b/.envrc index 740d348d..a6e8f579 100644 --- a/.envrc +++ b/.envrc @@ -1,6 +1,7 @@ export FLASK_APP=main:app export GAE_VERSION=development-`cat .travis.yml | grep 'VERSION_NUM=' | cut -f 2 -d '='` -export GOOGLE_APPLICATION_CREDENTIALS=env_config/client-secret.json +export CLOUD_CONFIG=1 +export GOOGLE_APPLICATION_CREDENTIALS=$(pwd)/env_config/client-secret.json export PYTHONPATH=$(pwd) export WERKZEUG_DEBUG_PIN=off export TEMPLATES_AUTO_RELOAD=1 diff --git a/.gitignore b/.gitignore index 533f72d4..7490bb8e 100644 --- a/.gitignore +++ b/.gitignore @@ -61,7 +61,7 @@ target/ #Random *.DS_Store -*.json +package-lock.json tmp/ @@ -72,6 +72,10 @@ mapping_worker/.envrc env_config/ env_config.zip +env_config_mti/ +env_config_mti.zip +env_config_al/ +env_config_al.zip .python-version @@ -92,4 +96,10 @@ photos/* *.done # Heritability run go tool -invoke \ No newline at end of file +invoke + +.vscode/launch.json +cloud_functions/heritability_run/strain_data.tsv +base/bam_bai_signed_download_script.sh + +uploads/ diff --git a/.gitmodules b/.gitmodules index e69de29b..484b7775 100644 --- a/.gitmodules +++ b/.gitmodules @@ -0,0 +1,18 @@ +[submodule "external/generate-thumbnails"] + path = external/generate-thumbnails + url = https://github.com/AndersenLab/generate-thumbnails +[submodule "external/h2calc"] + path = external/h2calc + url = https://github.com/AndersenLab/h2calc +[submodule "external/nscalc"] + path = external/nscalc + url = https://github.com/AndersenLab/nscalc +[submodule "external/ipcalc"] + path = external/ipcalc + url = https://github.com/AndersenLab/ipcalc +[submodule "external/dockerfile"] + path = external/dockerfile + url = https://github.com/AndersenLab/dockerfile +[submodule "external/NemaScan"] + path = external/NemaScan + url = https://github.com/AndersenLab/NemaScan diff --git a/.travis-al.yml b/.travis-al.yml new file mode 100644 index 00000000..56497392 --- /dev/null +++ b/.travis-al.yml @@ -0,0 +1,29 @@ +language: bash + +env: + - GOOGLE_CLOUD_BUCKET="elegansvariation.org" GOOGLE_CLOUD_PROJECT_ID="andersen-lab" + +install: +- openssl aes-256-cbc -K $encrypted_f7a2e30d9b29_key -iv $encrypted_f7a2e30d9b29_iv -in env_config_al.zip.enc -out env_config.zip -d +- unzip -qo env_config.zip +- mv env_config_al env_config +- export VERSION_NUM=1-5-9 +- export APP_CONFIG=master +- export CLOUD_CONFIG=1 +- if [ "${TRAVIS_BRANCH}" != "master" ]; then export APP_CONFIG=development; fi; +- export GAE_VERSION=${APP_CONFIG}-${VERSION_NUM} +- export GOOGLE_APPLICATION_CREDENTIALS=env_config/client-secret.json +- export GOOGLE_CLOUD_BUCKET=${GOOGLE_CLOUD_BUCKET} +- export GOOGLE_CLOUD_PROJECT_ID=${GOOGLE_CLOUD_PROJECT_ID} + + +deploy: + provider: gae + version: "${GAE_VERSION}" + project: "${GOOGLE_CLOUD_PROJECT_ID}" + keyfile: env_config/client-secret.json + on: + all_branches: true + no_promote: true + no_stop_previous_version: true + skip_cleanup: true diff --git a/.travis-mti.yml b/.travis-mti.yml new file mode 100644 index 00000000..5481654f --- /dev/null +++ b/.travis-mti.yml @@ -0,0 +1,28 @@ +language: bash + +env: + - GOOGLE_CLOUD_BUCKET="elegansvariation" GOOGLE_CLOUD_PROJECT_ID="andersen-lab-302418" + +install: +- openssl aes-256-cbc -K $encrypted_eb81f51f2e9b_key -iv $encrypted_eb81f51f2e9b_iv -in env_config_mti.zip.enc -out env_config.zip -d +- unzip -qo env_config.zip +- mv env_config_mti env_config +- export VERSION_NUM=1-5-9 +- export APP_CONFIG=master +- export CLOUD_CONFIG=1 +- if [ "${TRAVIS_BRANCH}" != "master" ]; then export APP_CONFIG=development; fi; +- export GAE_VERSION=${APP_CONFIG}-${VERSION_NUM} +- export GOOGLE_APPLICATION_CREDENTIALS=env_config/client-secret.json +- export GOOGLE_CLOUD_BUCKET=${GOOGLE_CLOUD_BUCKET} +- export GOOGLE_CLOUD_PROJECT_ID=${GOOGLE_CLOUD_PROJECT_ID} + +deploy: + provider: gae + version: "${GAE_VERSION}" + project: "${GOOGLE_CLOUD_PROJECT_ID}" + keyfile: env_config/client-secret.json + on: + all_branches: true + no_promote: true + no_stop_previous_version: true + skip_cleanup: true diff --git a/.travis.yml b/.travis.yml index 9d3e3e97..817b1625 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,18 +1,24 @@ language: bash +env: + - GOOGLE_CLOUD_BUCKET="elegansvariation.org" GOOGLE_CLOUD_PROJECT_ID="andersen-lab" + install: -- openssl aes-256-cbc -K $encrypted_53077b9a3e95_key -iv $encrypted_53077b9a3e95_iv -in env_config.zip.enc -out env_config.zip -d +- openssl aes-256-cbc -K $encrypted_86f5a1ab1ccf_key -iv $encrypted_86f5a1ab1ccf_iv -in env_config.zip.enc -out env_config.zip -d - unzip -qo env_config.zip -- export VERSION_NUM=1-5-3 -- export APP_CONFIG=master +- export VERSION_NUM=9-9-9-9 +- export APP_CONFIG=development +- export CLOUD_CONFIG=1 - if [ "${TRAVIS_BRANCH}" != "master" ]; then export APP_CONFIG=development; fi; - export GAE_VERSION=${APP_CONFIG}-${VERSION_NUM} - export GOOGLE_APPLICATION_CREDENTIALS=env_config/client-secret.json +- export GOOGLE_CLOUD_BUCKET=${GOOGLE_CLOUD_BUCKET} +- export GOOGLE_CLOUD_PROJECT_ID=${GOOGLE_CLOUD_PROJECT_ID} deploy: provider: gae version: "${GAE_VERSION}" - project: andersen-lab + project: "${GOOGLE_CLOUD_PROJECT_ID}" keyfile: env_config/client-secret.json on: all_branches: true diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 3a474210..3e6e89b0 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,6 +1,6 @@ ## Making Changes -* Propose an issue or change you would like ot make using the issue tracker. +* Propose an issue or change you would like to make using the issue tracker. * Create a fork of the development branch. Once you have completed your work, create a pull request. * Rebase the latest development branch changes if updates were made in the interim. -* email danielecook@gmail.com if you have further questions. +* email danielecook@gmail.com or sam.wachspress@gmail.com if you have further questions. diff --git a/Dockerfile b/Dockerfile index 3312cf15..0a52cb5e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -16,6 +16,11 @@ tabix \ graphviz \ libgraphviz-dev \ pkg-config \ +libxml2 \ +xmlsec1 \ +libxml2-dev \ +libxmlsec1-dev \ +libxmlsec1-openssl \ && rm -rf /var/lib/apt/lists/* ENV BCFTOOLS_BIN="bcftools-1.10.tar.bz2" \ @@ -56,6 +61,6 @@ ADD . /app RUN FLASK_APP=main:app GAE_VERSION=blank-blank flask # Download the database; GAE_VERSION set as dummy variable -RUN FLASK_APP=main:app GAE_VERSION=blank-blank flask download_db +# RUN FLASK_APP=main:app GAE_VERSION=blank-blank flask download_db CMD gunicorn -b :$PORT main:app \ No newline at end of file diff --git a/app.yaml b/app.yaml index 8add486c..688f422d 100644 --- a/app.yaml +++ b/app.yaml @@ -1,6 +1,10 @@ +beta_settings: + cloud_sql_instances: andersen-lab:us-central1:cendr + runtime: custom +service: dev env: flex -entrypoint: gunicorn -b :$PORT main:app +entrypoint: gunicorn -b :$PORT main:app --ssl-version TLSv1_2 runtime_config: python_version: 3 @@ -32,15 +36,15 @@ resources: liveness_check: path: "/liveness_check" - check_interval_sec: 30 - timeout_sec: 4 + check_interval_sec: 60 + timeout_sec: 10 failure_threshold: 2 success_threshold: 2 readiness_check: path: "/readiness_check" - check_interval_sec: 5 - timeout_sec: 4 + check_interval_sec: 120 + timeout_sec: 10 failure_threshold: 2 success_threshold: 2 app_start_timeout_sec: 300 \ No newline at end of file diff --git a/base/application.py b/base/application.py index 5de26672..84db0220 100644 --- a/base/application.py +++ b/base/application.py @@ -1,31 +1,40 @@ +from datetime import datetime import os import json import requests + from os.path import basename -from base.config import config from flask import Flask, render_template from flask_wtf.csrf import CSRFProtect -from base.utils.text_utils import render_markdown from werkzeug.middleware.proxy_fix import ProxyFix from werkzeug.exceptions import HTTPException + +from base.constants import GOOGLE_CLOUD_BUCKET +from base.config import config +from base.utils.text_utils import render_markdown from base.manage import (initdb, update_strains, update_credentials, - decrypt_credentials, - download_db) + decrypt_credentials) # --------- # # Routing # # --------- # from base.views.about import about_bp from base.views.primary import primary_bp -from base.views.strains import strain_bp +from base.views.strains import strains_bp from base.views.order import order_bp from base.views.data import data_bp from base.views.mapping import mapping_bp from base.views.gene import gene_bp from base.views.user import user_bp +from base.views.maintenance import maintenance_bp +from base.views.admin.admin import admin_bp +from base.views.admin.users import users_bp +from base.views.admin.data import data_admin_bp + + # Tools from base.views.tools import (tools_bp, @@ -42,9 +51,9 @@ from base.views.api.api_data import api_data_bp # Auth -from base.auth import (auth_bp, - google_bp, - github_bp) +from base.views.auth import (auth_bp, + google_bp, + saml_bp) # ---- End Routing ---- # @@ -54,7 +63,8 @@ cache, debug_toolbar, sslify, - sqlalchemy) + sqlalchemy, + jwt) # Template filters from base.filters import (comma, format_release) @@ -86,7 +96,7 @@ def configure_ssl(app): # Running on server app.debug = False # Ignore leading slash of urls; skips must use start of path - sslify(app) + sslify(app, skips=['tasks']) elif app.config['DEBUG']: debug_toolbar(app) app.config['PRESERVE_CONTEXT_ON_EXCEPTION'] = True @@ -97,8 +107,7 @@ def register_commands(app): for command in [initdb, update_strains, update_credentials, - decrypt_credentials, - download_db]: + decrypt_credentials]: app.cli.add_command(command) @@ -110,20 +119,26 @@ def register_template_filters(app): def register_extensions(app): markdown(app) cache.init_app(app, config={'CACHE_TYPE': 'base.utils.cache.datastore_cache'}) - sqlalchemy(app) - CSRFProtect(app) - app.config['csrf'] = CSRFProtect(app) - + sqlalchemy.init_app(app) + # protect all routes (except the ones listed) from cross site request forgery + csrf = CSRFProtect(app) + csrf.exempt(auth_bp) + csrf.exempt(saml_bp) + csrf.exempt(maintenance_bp) + app.config['csrf'] = csrf + jwt.init_app(app) def register_blueprints(app): """Register blueprints with the Flask application.""" app.register_blueprint(primary_bp, url_prefix='') app.register_blueprint(about_bp, url_prefix='/about') - app.register_blueprint(strain_bp, url_prefix='/strain') + app.register_blueprint(strains_bp, url_prefix='/strains') app.register_blueprint(order_bp, url_prefix='/order') app.register_blueprint(data_bp, url_prefix='/data') app.register_blueprint(mapping_bp, url_prefix='') app.register_blueprint(gene_bp, url_prefix='/gene') + + # User app.register_blueprint(user_bp, url_prefix='/user') # Tools @@ -138,33 +153,51 @@ def register_blueprints(app): app.register_blueprint(api_data_bp, url_prefix='/api') # Auth - app.register_blueprint(auth_bp, url_prefix='') + app.register_blueprint(auth_bp, url_prefix='/auth') + app.register_blueprint(saml_bp, url_prefix='/saml') app.register_blueprint(google_bp, url_prefix='/login') - app.register_blueprint(github_bp, url_prefix='/login') - # Healthchecks + # Admin + app.register_blueprint(admin_bp, url_prefix='/admin') + app.register_blueprint(users_bp, url_prefix='/admin/users') + app.register_blueprint(data_admin_bp, url_prefix='/admin/data') + + # Healthchecks/Maintenance + app.register_blueprint(maintenance_bp, url_prefix='/tasks') app.register_blueprint(check_bp, url_prefix='') def gs_static(url, prefix='static'): - return f"https://storage.googleapis.com/elegansvariation.org/{prefix}/{url}" + return f"https://storage.googleapis.com/{GOOGLE_CLOUD_BUCKET}/{prefix}/{url}" def configure_jinja(app): # Injects "contexts" into templates @app.context_processor def inject(): - return dict(version=os.environ.get("GAE_VERSION", "-9-9-9").split("-", 1)[1].replace("-", "."), - json=json, - list=list, - str=str, - int=int, - len=len, - gs_static=gs_static, - basename=basename, - render_markdown=render_markdown) - - + return dict(version=os.environ.get("GAE_VERSION", "-9-9-9").split("-", 1)[1].replace("-", "."), + json=json, + list=list, + str=str, + int=int, + len=len, + gs_static=gs_static, + basename=basename, + render_markdown=render_markdown) + + # Datetime filters for Jinja + @app.template_filter('date_format') + def _jinja2_filter_datetime(date, fmt=None): + if fmt: + return date.strftime(fmt) + else: + return date.strftime('%c') + +''' +2021-04-14 17:26:51.348674+00:00 + +'%Y-%m-%d %H:%M:%S.%f+%z' +''' def register_errorhandlers(app): def render_error(e="generic"): diff --git a/base/auth.py b/base/auth.py deleted file mode 100644 index d05f09c5..00000000 --- a/base/auth.py +++ /dev/null @@ -1,97 +0,0 @@ -import arrow -import os -from flask import (redirect, - render_template, - url_for, - session, - request, - flash) -from functools import wraps -from base.models import user_ds -from base.utils.data_utils import unique_id -from slugify import slugify -from logzero import logger - -from flask_dance.contrib.google import make_google_blueprint, google -from flask_dance.contrib.github import make_github_blueprint, github -from flask_dance.consumer import oauth_authorized - -from flask import Blueprint -auth_bp = Blueprint('auth', - __name__, - template_folder='') - -google_bp = make_google_blueprint(scope=["https://www.googleapis.com/auth/userinfo.profile", - "https://www.googleapis.com/auth/userinfo.email"], - offline=True) -github_bp = make_github_blueprint(scope="user:email") -# dropbox_bp = make_dropbox_blueprint() - - -@auth_bp.route("/login/select", methods=['GET']) -def choose_login(error=None): - # Relax scope for Google - if not session.get("login_referrer", "").endswith("/login/select"): - session["login_referrer"] = request.referrer - os.environ['OAUTHLIB_RELAX_TOKEN_SCOPE'] = "true" - VARS = {'page_title': 'Choose Login'} - if error: - flash(error, 'danger') - return render_template('login.html', **VARS) - - -@oauth_authorized.connect -def authorized(blueprint, token): - if google.authorized: - user_info = google.get("/oauth2/v2/userinfo") - assert user_info.ok - user_info = {'google': user_info.json()} - user_email = user_info['google']['email'].lower() - elif github.authorized: - user_emails = github.get("/user/emails") - user_email = [x for x in user_emails.json() if x['primary']][0]["email"].lower() - user_info = {'github': github.get('/user').json()} - user_info['github']['email'] = user_email - else: - flash("Error logging in!") - return redirect(url_for("auth.choose_login")) - - # Create or get existing user. - user = user_ds(user_email) - if not user._exists: - user.user_email = user_email - user.user_info = user_info - user.email_confirmation_code = unique_id() - user.user_id = unique_id()[0:8] - user.username = slugify("{}_{}".format(user_email.split("@")[0], unique_id()[0:4])) - - user.last_login = arrow.utcnow().datetime - user.save() - - session['user'] = user.to_dict() - logger.debug(session) - - flash("Successfully logged in!", 'success') - return redirect(session.get("login_referrer", url_for('primary.primary'))) - - -def login_required(f): - @wraps(f) - def func(*args, **kwargs): - if not session.get('user'): - logger.info(session) - with app.app_context(): - session['redirect_url'] = request.url - return redirect(url_for('auth.choose_login')) - return f(*args, **kwargs) - return func - - -@auth_bp.route('/logout') -def logout(): - """ - Logs the user out. - """ - session.clear() - flash("Successfully logged out", "success") - return redirect(request.referrer) diff --git a/base/cloud_config.py b/base/cloud_config.py new file mode 100644 index 00000000..5998467d --- /dev/null +++ b/base/cloud_config.py @@ -0,0 +1,203 @@ +# Application Cloud Configuration for Site Static Content hosted externally +import os +import shutil +import json + +from os import path +from logzero import logger +from google.oauth2 import service_account +from google.cloud import datastore, storage + +from base.constants import REPORT_V1_FILE_LIST, REPORT_V2_FILE_LIST, GOOGLE_CLOUD_BUCKET +from base.utils.data_utils import dump_json, unique_id + +class CloudConfig: + + ds_client = None + storage_client = None + kind = 'cloud-config' + default_cc = { 'releases' : [{'dataset': '20210121', 'wormbase': 'WS276', 'version': 'v2'}, + {'dataset': '20200815', 'wormbase': 'WS276', 'version': 'v2'}, + {'dataset': '20180527', 'wormbase': 'WS263', 'version': 'v1'}, + {'dataset': '20170531', 'wormbase': 'WS258', 'version': 'v1'}, + {'dataset': '20160408', 'wormbase': 'WS245', 'version': 'v1'}] } + + def __init__(self, name, cc=default_cc, kind_prefix='', local=True): + self.kind = '{}{}'.format(kind_prefix, self.kind) + self.name = name + self.filename = f"{name}.txt" + self.cc = cc + self.local = local + + def get_ds_client(self): + if not self.ds_client: + self.ds_client = datastore.Client(credentials=service_account.Credentials.from_service_account_file('env_config/client-secret.json')) + return self.ds_client + + def get_storage_client(self): + if not self.storage_client: + self.storage_client = storage.Client(credentials=service_account.Credentials.from_service_account_file('env_config/client-secret.json')) + return self.storage_client + + def download_file(self, name, fname): + client = self.get_storage_client() + bucket = client.get_bucket(GOOGLE_CLOUD_BUCKET) + blob = bucket.blob(name) + blob.download_to_file(open(fname, 'wb')) + + def ds_save(self): + data = {'cloud_config': self.cc} + m = datastore.Entity(key=self.get_ds_client().key(self.kind, self.name)) + for key, value in data.items(): + if isinstance(value, dict): + m[key] = 'JSON:' + dump_json(value) + else: + m[key] = value + logger.debug(f"store: {self.kind} - {self.name}") + self.get_ds_client().put(m) + + def ds_load(self): + """ Retrieves a cloud config object from datastore """ + result = self.get_ds_client().get(self.get_ds_client().key(self.kind, self.name)) + logger.debug(f"get: {self.kind} - {self.name}") + try: + result_out = {'_exists': True} + for k, v in result.items(): + if isinstance(v, str) and v.startswith("JSON:"): + result_out[k] = json.loads(v[5:]) + elif v: + result_out[k] = v + self.cc = result_out.get('cloud_config') + except AttributeError: + return None + + def file_load(self): + """ Retrieves a cloud config object from a local file """ + if path.exists(self.filename): + with open(self.filename) as json_file: + data = json.load(json_file) + cc = data.get('cloud_config') if data else None + self.cc = cc + + def file_save(self): + """ Saves a cloud config object to a local file """ + with open(self.filename, 'w') as outfile: + data = {'cloud_config': self.cc} + json.dump(data, outfile) + + def save(self): + if self.local: + self.file_save() + else: + self.ds_save() + + def load(self): + if self.local: + self.file_load() + else: + self.ds_load() + + def remove_release(self, dataset): + ''' Removes a data release from the cloud config object ''' + releases = self.cc['releases'] + for i, r in enumerate(releases): + if r['dataset'] == dataset: + del releases[i] + + self.cc['releases'] = releases + self.save() + + def remove_release_files(self, dataset): + ''' Removes files linked to a data release from the GAE server ''' + report_path = f"base/static/reports/{dataset}" + if os.path.exists(report_path): + shutil.rmtree(report_path) + + def remove_release_db(self, dataset, wormbase): + ''' Removes sqlite db linked to a data release from the GAE server ''' + db_path = f"base/cendr.{dataset}.{wormbase}.db" + os.remove(db_path) + + def add_release(self, dataset, wormbase, version): + ''' Adds a data release to the cloud config object ''' + releases = self.cc['releases'] + # remove dataset if there is an existing one in the config + for i, r in enumerate(releases): + if r['dataset'] == dataset: + del releases[i] + + releases = [{'dataset': dataset, 'wormbase': wormbase, 'version': version}] + releases + self.cc['releases'] = releases + self.save() + + def get_release_files(self, dataset, files, refresh=False): + ''' Downloads files linked to a data release from the cloud bucket to the GAE server''' + local_path = 'base/static/reports/{}'.format(dataset) + if os.path.exists(local_path): + if refresh == True: + shutil.rmtree(local_path) + else: + return + + os.makedirs(local_path) + name_str = 'data_reports/{}/{}' + fname_str = '{}/{}' + + try: + for n in files: + name = f"data_reports/{dataset}/{n}" + fname = f"{local_path}/{n}" + self.download_file(name=name, fname=fname) + except: + return None + return files + + def get_release_db(self, dataset, wormbase, refresh=False): + db_name = f"db/cendr.{dataset}.{wormbase}.db" + db_fname = f"base/cendr.{dataset}.{wormbase}.db" + if os.path.exists(db_fname): + if refresh == True: + os.remove(db_fname) + else: + return + + self.download_file(name=db_name, fname=db_fname) + return True + + def create_backup(self): + name = self.name + self.name = '{}_{}'.format(name, unique_id()) + self.save() + self.name = name + + def get_properties(self): + ''' Converts the cloud_config object into a format that matches the regular config object ''' + releases = self.cc['releases'] + RELEASES = [] + for r in releases: + RELEASES.append((r['dataset'], r['wormbase'])) + RELEASES.sort(reverse=True) + + # Set the most recent release + DATASET_RELEASE, WORMBASE_VERSION = RELEASES[0] + + return {'DATASET_RELEASE': DATASET_RELEASE, + 'WORMBASE_VERSION': WORMBASE_VERSION, + 'RELEASES': RELEASES} + + def get_external_content(self): + releases = self.cc['releases'] + current_release = releases[0] + + # get data reports + for r in releases: + files = [] + if r['version'] == 'v1': + files = REPORT_V1_FILE_LIST + elif r['version'] == 'v2': + files = REPORT_V2_FILE_LIST + self.get_release_files(r['dataset'], files, refresh=False) + + # get sqlite db + self.get_release_db(current_release['dataset'], current_release['wormbase'], refresh=False) + diff --git a/base/config.py b/base/config.py index 5c513097..c9e82dac 100644 --- a/base/config.py +++ b/base/config.py @@ -1,57 +1,79 @@ # Application Configuration import os import yaml + +from logzero import logger from base.utils.data_utils import json_encoder +from base.constants import DEFAULT_CLOUD_CONFIG +from base.cloud_config import CloudConfig + +# Whether or not to load config properties from cloud datastore +try: + CLOUD_CONFIG = os.environ['CLOUD_CONFIG'] +except: + CLOUD_CONFIG = 0 # CeNDR Version APP_CONFIG, CENDR_VERSION = os.environ['GAE_VERSION'].split("-", 1) if APP_CONFIG not in ['development', 'master']: - APP_CONFIG = 'development' + APP_CONFIG = 'development' CENDR_VERSION = CENDR_VERSION.replace("-", '.') # BUILDS AND RELEASES # The first release is the current release # (RELEASE, ANNOTATION_GENOME) -RELEASES = [("20200815", "WS276"), - ("20180527", "WS263"), - ("20170531", "WS258"), - ("20160408", "WS245")] +RELEASES = [("20210121", "WS276"), ("20200815", "WS276"), ("20180527", "WS263"), ("20170531", "WS258"), ("20160408", "WS245")] # The most recent release DATASET_RELEASE, WORMBASE_VERSION = RELEASES[0] -# SQLITE DATABASE -SQLITE_PATH = f"base/cendr.{DATASET_RELEASE}.{WORMBASE_VERSION}.db" - - def load_yaml(path): - return yaml.load(open(path), Loader=yaml.SafeLoader) - + return yaml.load(open(path), Loader=yaml.SafeLoader) # CONFIG def get_config(APP_CONFIG): - """Load all configuration information including - constants defined above. - - (BASE_VARS are the same regardless of whether we are debugging or in production) - """ - config = dict() - BASE_VARS = load_yaml("env_config/base.yaml") - APP_CONFIG_VARS = load_yaml(f"env_config/{APP_CONFIG}.yaml") - config.update(BASE_VARS) - config.update(APP_CONFIG_VARS) - # Add configuration variables - # Remove base prefix for SQLAlchemy as it is loaded - # from application folder - config["SQLALCHEMY_DATABASE_URI"] = f"sqlite:///{SQLITE_PATH}".replace("base/", "") - config['json_encoder'] = json_encoder - config.update({"CENDR_VERSION": CENDR_VERSION, - "APP_CONFIG": APP_CONFIG, - "DATASET_RELEASE": DATASET_RELEASE, - "WORMBASE_VERSION": WORMBASE_VERSION, - "RELEASES": RELEASES}) - return config - - -# Generate the configuration + """Load all configuration information including + constants defined above. + + (BASE_VARS are the same regardless of whether we are debugging or in production) + """ + config = dict() + BASE_VARS = load_yaml("env_config/base.yaml") + APP_CONFIG_VARS = load_yaml(f"env_config/{APP_CONFIG}.yaml") + + logger.info(f'APP_CONFIG: {APP_CONFIG}') + DB_USER = APP_CONFIG_VARS['PSQL_DB_USERNAME'] + DB_PASS = APP_CONFIG_VARS['PSQL_DB_PASSWORD'] + CONNECTION = APP_CONFIG_VARS['PSQL_DB_CONNECTION_NAME'] + DB = APP_CONFIG_VARS['PSQL_DB_NAME'] + + + config.update(BASE_VARS) + config.update(APP_CONFIG_VARS) + + config['json_encoder'] = json_encoder + config.update({"CENDR_VERSION": CENDR_VERSION, + "APP_CONFIG": APP_CONFIG, + "DATASET_RELEASE": DATASET_RELEASE, + "WORMBASE_VERSION": WORMBASE_VERSION, + "RELEASES": RELEASES}) + + config['DS_PREFIX'] = '' + if APP_CONFIG == 'development': + config['DS_PREFIX'] = 'DEV_' + cc = None + local = True if CLOUD_CONFIG == 1 else False + # Add configuration variables from cloud + cc = CloudConfig(DEFAULT_CLOUD_CONFIG, kind_prefix=config['DS_PREFIX'], local=local) + cc.load() + cc.get_external_content() + props = cc.get_properties() + config.update(props) + config['cloud_config'] = cc + + config['SQLALCHEMY_DATABASE_URI'] = f'postgres+psycopg2://{DB_USER}:{DB_PASS}@/{DB}?host=/cloudsql/{CONNECTION}' + + return config + + config = get_config(APP_CONFIG) diff --git a/base/constants.py b/base/constants.py index 1111a850..4e0478ad 100644 --- a/base/constants.py +++ b/base/constants.py @@ -6,15 +6,29 @@ Author: Daniel E. Cook (danielecook@gmail.com) """ -from base.config import WORMBASE_VERSION +import os + +WORMBASE_VERSION = 'WS276' + +STRAIN_PHOTO_PATH = 'photos/Celegans/' + +USER_ROLES = [('user', 'User'), ('admin', 'Admin')] +BAM_BAI_DOWNLOAD_SCRIPT_NAME = "bam_bai_signed_download_script.sh" class PRICES: - DIVERGENT_SET = 160 - STRAIN_SET = 640 - STRAIN = 15 - SHIPPING = 65 + DIVERGENT_SET = 160 + STRAIN_SET = 640 + STRAIN = 15 + SHIPPING = 65 + + +SHIPPING_OPTIONS = [('UPS', 'UPS'), + ('FEDEX', 'FEDEX'), + ('Flat Rate Shipping', '${} Flat Fee'.format(PRICES.SHIPPING))] +PAYMENT_OPTIONS = [('check', 'Check'), + ('credit_card', 'Credit Card')] # Maps chromosome in roman numerals to integer CHROM_NUMERIC = {"I": 1, @@ -25,6 +39,12 @@ class PRICES: "X": 6, "MtDNA": 7} + + +GOOGLE_CLOUD_BUCKET = 'elegansvariation.org' +GOOGLE_CLOUD_PROJECT_ID = 'andersen-lab' +GOOGLE_CLOUD_LOCATION = 'us-central1' + # WI Strain Info Dataset GOOGLE_SHEETS = {"orders": "1BCnmdJNRjQR3Bx8fMjD_IlTzmh3o7yj8ZQXTkk6tTXM", "WI": "1V6YHzblaDph01sFDI8YK_fP0H7sVebHQTXypGdiQIjI"} @@ -39,35 +59,31 @@ class URLS: URLs are stored here so they can be easily integrated into the database for provenance purposes. """ - # - # AWS URLS + # BAMs are now hosted on google cloud buckets # - BAM_URL_PREFIX = "https://s3.us-east-2.amazonaws.com/elegansvariation.org/bam" + BAM_URL_PREFIX = f"https://storage.googleapis.com/{GOOGLE_CLOUD_BUCKET}/bam" + + # Variant Annotation CSV + STRAIN_VARIANT_ANNOTATION_URL = "https://storage.googleapis.com/elegansvariation.org/db/WI.20210121.strain-annotation.bcsq.20210401.csv" """ Wormbase URLs """ - # Gene GTF - GENE_GTF_URL = f"ftp://ftp.wormbase.org/pub/wormbase/releases/{WORMBASE_VERSION}/species/c_elegans/PRJNA13758/c_elegans.PRJNA13758.{WORMBASE_VERSION}.canonical_geneset.gtf.gz" - + GENE_GTF_URL = "ftp://ftp.wormbase.org/pub/wormbase/releases/{WB}/species/c_elegans/PRJNA13758/c_elegans.PRJNA13758.{WB}.canonical_geneset.gtf.gz" # GENE GFF_URL - GENE_GFF_URL = f"ftp://ftp.wormbase.org/pub/wormbase/releases/{WORMBASE_VERSION}/species/c_elegans/PRJNA13758/c_elegans.PRJNA13758.{WORMBASE_VERSION}.annotations.gff3.gz" - + GENE_GFF_URL = "ftp://ftp.wormbase.org/pub/wormbase/releases/{WB}/species/c_elegans/PRJNA13758/c_elegans.PRJNA13758.{WB}.annotations.gff3.gz" # Maps wormbase ID to locus name GENE_IDS_URL = "ftp://ftp.wormbase.org/pub/wormbase/species/c_elegans/annotation/geneIDs/c_elegans.PRJNA13758.current.geneIDs.txt.gz" - # Lists C. elegans orthologs ORTHOLOG_URL = "ftp://ftp.wormbase.org/pub/wormbase/species/c_elegans/PRJNA13758/annotation/orthologs/c_elegans.PRJNA13758.current_development.orthologs.txt" # # Ortholog URLs # - # Homologene HOMOLOGENE_URL = 'https://ftp.ncbi.nih.gov/pub/HomoloGene/current/homologene.data' - # Taxon IDs TAXON_ID_URL = 'ftp://ftp.ncbi.nih.gov/pub/taxonomy/taxdump.tar.gz' @@ -91,4 +107,11 @@ class URLS: TABLE_COLORS = {"LOW": 'success', "MODERATE": 'warning', - "HIGH": 'danger'} \ No newline at end of file + "HIGH": 'danger'} + + +DEFAULT_CLOUD_CONFIG = 'default' + +REPORT_VERSIONS = ['', 'v1', 'v2'] +REPORT_V1_FILE_LIST = ['methods.md'] +REPORT_V2_FILE_LIST = ['alignment_report.html', 'concordance_report.html', 'gatk_report.html', 'methods.md', 'reads_mapped_by_strain.tsv', 'release_notes.md'] \ No newline at end of file diff --git a/base/database/__init__.py b/base/database/__init__.py index bc094675..3ae82224 100644 --- a/base/database/__init__.py +++ b/base/database/__init__.py @@ -1,179 +1,238 @@ import os import arrow import pickle - from rich.console import Console + from base import constants -from base.constants import URLS +from base.constants import URLS, GOOGLE_CLOUD_BUCKET +from base.config import config from base.utils.data_utils import download -from base.utils.gcloud import upload_file -from base.models import (db, +from base.utils.decorators import timeit +from base.models import (StrainAnnotatedVariants, db, Strain, Homologs, Metadata, WormbaseGene, WormbaseGeneSummary) -from base.config import (CENDR_VERSION, - APP_CONFIG, - DATASET_RELEASE, - WORMBASE_VERSION, - RELEASES) # ETL Pipelines - fetch and format data for -# input into the sqlite database +# input into the postgres database from base.database.etl_homologene import fetch_homologene from base.database.etl_strains import fetch_andersen_strains from base.database.etl_wormbase import (fetch_gene_gff_summary, fetch_gene_gtf, fetch_orthologs) +from base.database.etl_variant_annot import fetch_strain_variant_annotation_data -console = Console() DOWNLOAD_PATH = ".download" - +console = Console() def download_fname(download_path: str, download_url: str): - return os.path.join(download_path, - download_url.split("/")[-1]) - + return os.path.join(download_path, + download_url.split("/")[-1]) -def initialize_sqlite_database(sel_wormbase_version, +@timeit +def initialize_postgres_database(sel_wormbase_version, strain_only=False): - """Create a static sqlite database - Args: - sel_wormbase_version - e.g. WS245 - - Generate an sqlite database - """ - start = arrow.utcnow() - console.log("Initializing Database") - - SQLITE_PATH = f"base/cendr.{DATASET_RELEASE}.{sel_wormbase_version}.db" - SQLITE_BASENAME = os.path.basename(SQLITE_PATH) - - # Download wormbase files - if strain_only is False: - if os.path.exists(SQLITE_PATH): - os.remove(SQLITE_PATH) - - if not os.path.exists(DOWNLOAD_PATH): - os.makedirs(DOWNLOAD_PATH) - - # Parallel URL download - console.log("Downloading Wormbase Data") - download([URLS.GENE_GFF_URL, - URLS.GENE_GTF_URL, - URLS.GENE_IDS_URL, - URLS.HOMOLOGENE_URL, - URLS.ORTHOLOG_URL, - URLS.TAXON_ID_URL], - DOWNLOAD_PATH) - - gff_fname = download_fname(DOWNLOAD_PATH, URLS.GENE_GFF_URL) - gtf_fname = download_fname(DOWNLOAD_PATH, URLS.GENE_GTF_URL) - gene_ids_fname = download_fname(DOWNLOAD_PATH, URLS.GENE_IDS_URL) - homologene_fname = download_fname(DOWNLOAD_PATH, URLS.HOMOLOGENE_URL) - ortholog_fname = download_fname(DOWNLOAD_PATH, URLS.ORTHOLOG_URL) - - from base.application import create_app - app = create_app() - app.config['SQLALCHEMY_DATABASE_URI'] = f"sqlite:///{SQLITE_BASENAME}" - app.app_context().push() - - if strain_only is True: - db.metadata.drop_all(bind=db.engine, checkfirst=True, tables=[Strain.__table__]) - db.metadata.create_all(bind=db.engine, tables=[Strain.__table__]) - else: - db.create_all(app=app) - db.session.commit() - - console.log(f"Created {SQLITE_PATH}") - - ################ - # Load Strains # - ################ - console.log('Loading strains...') - db.session.bulk_insert_mappings(Strain, fetch_andersen_strains()) - db.session.commit() - console.log(f"Inserted {Strain.query.count()} strains") - - if strain_only is True: - console.log('Finished loading strains') - return - - ################ - # Set metadata # - ################ - console.log('Inserting metadata') - metadata = {} - metadata.update(vars(constants)) - metadata.update({"CENDR_VERSION": CENDR_VERSION, - "APP_CONFIG": APP_CONFIG, - "DATASET_RELEASE": DATASET_RELEASE, - "WORMBASE_VERSION": sel_wormbase_version, - "RELEASES": RELEASES, - "DATE": arrow.utcnow()}) - for k, v in metadata.items(): - if not k.startswith("_"): - # For nested constants: - if type(v) == type: - for name in [x for x in dir(v) if not x.startswith("_")]: - key_val = Metadata(key="{}/{}".format(k, name), - value=getattr(v, name)) - db.session.add(key_val) - else: - key_val = Metadata(key=k, value=str(v)) - db.session.add(key_val) - - db.session.commit() - - ############## - # Load Genes # - ############## - console.log('Loading summary gene table') - genes = fetch_gene_gff_summary(gff_fname) - db.session.bulk_insert_mappings(WormbaseGeneSummary, genes) - db.session.commit() - - console.log('Loading gene table') - db.session.bulk_insert_mappings(WormbaseGene, fetch_gene_gtf(gtf_fname, gene_ids_fname)) - gene_summary = db.session.query(WormbaseGene.feature, - db.func.count(WormbaseGene.feature)) \ - .group_by(WormbaseGene.feature) \ - .all() - gene_summary = '\n'.join([f"{k}: {v}" for k, v in gene_summary]) - console.log(f"============\nGene Summary\n------------\n{gene_summary}\n============") - - ############################### - # Load homologs and orthologs # - ############################### - console.log('Loading homologs from homologene') - db.session.bulk_insert_mappings(Homologs, fetch_homologene(homologene_fname)) - db.session.commit() - - console.log('Loading orthologs from WormBase') - db.session.bulk_insert_mappings(Homologs, fetch_orthologs(ortholog_fname)) - db.session.commit() - - ############# - # Upload DB # - ############# - - # Upload the file using todays date for archiving purposes - console.log(f"Uploading Database ({SQLITE_BASENAME})") - upload_file(f"db/{SQLITE_BASENAME}", SQLITE_PATH) - - diff = int((arrow.utcnow() - start).total_seconds()) - console.log(f"{diff} seconds") - - # =========================== # - # Generate gene id dict # - # =========================== # - # Create a gene dictionary to match wormbase IDs to either the locus name - # or a sequence id - gene_dict = {x.gene_id: x.locus or x.sequence_name for x in WormbaseGeneSummary.query.all()} - pickle.dump(gene_dict, open("base/static/data/gene_dict.pkl", 'wb')) - - -def download_sqlite_database(): - SQLITE_PATH = f"base/cendr.{DATASET_RELEASE}.{WORMBASE_VERSION}.db" - SQLITE_BASENAME = os.path.basename(SQLITE_PATH) - download([f"https://storage.googleapis.com/elegansvariation.org/db/{SQLITE_BASENAME}"], "base") + """Create a postgres database + Args: + sel_wormbase_version - e.g. WS276 + + Generate a postgres database + """ + console.log("Initializing Database") + DATASET_RELEASE = config['DATASET_RELEASE'] + + # Download wormbase files + if strain_only is False: + f = download_external_data(sel_wormbase_version) + + from base.application import create_app + app = create_app() + app.app_context().push() + + app.config['SQLALCHEMY_DATABASE_URI'] = f'postgresql://admin:password@localhost/cendr' + + + if strain_only is True: + reset_tables(app, db, tables=[Strain.__table__]) + else: + reset_tables(app, db) + + load_strains(db) + if strain_only is True: + console.log('Finished loading strains') + return + + load_metadata(db, sel_wormbase_version) + load_genes_summary(db, f) + load_genes_table(db, f) + load_homologs(db, f) + load_orthologs(db, f) + load_variant_annotation(db, f) + generate_gene_dict() + + +########################## +# Download external data # +########################## +@timeit +def download_external_data(sel_wormbase_version): + console.log('Downloading External Data...') + if not os.path.exists(DOWNLOAD_PATH): + os.makedirs(DOWNLOAD_PATH) + + # Parallel URL download + console.log("Downloading Wormbase Data") + GENE_GFF_URL = URLS.GENE_GFF_URL.format(WB=sel_wormbase_version) + GENE_GTF_URL = URLS.GENE_GTF_URL.format(WB=sel_wormbase_version) + download([URLS.STRAIN_VARIANT_ANNOTATION_URL, + GENE_GFF_URL, + GENE_GTF_URL, + URLS.GENE_IDS_URL, + URLS.HOMOLOGENE_URL, + URLS.ORTHOLOG_URL, + URLS.TAXON_ID_URL], + DOWNLOAD_PATH) + + fnames = { + "sva": download_fname(DOWNLOAD_PATH,URLS.STRAIN_VARIANT_ANNOTATION_URL), + "gff": download_fname(DOWNLOAD_PATH, GENE_GFF_URL), + "gtf": download_fname(DOWNLOAD_PATH, GENE_GTF_URL), + "gene_ids": download_fname(DOWNLOAD_PATH, URLS.GENE_IDS_URL), + "homologene": download_fname(DOWNLOAD_PATH, URLS.HOMOLOGENE_URL), + "ortholog": download_fname(DOWNLOAD_PATH, URLS.ORTHOLOG_URL) + } + return fnames + + +################ +# Reset Tables # +################ +@timeit +def reset_tables(app, db, tables = None): + if tables is None: + console.log('Dropping all tables...') + db.drop_all(app=app) + console.log('Creating all tables...') + db.create_all(app=app) + else: + console.log(f'Dropping tables: ${tables}') + db.metadata.drop_all(bind=db.engine, checkfirst=True, tables=tables) + console.log(f'Creating tables: ${tables}') + db.metadata.create_all(bind=db.engine, tables=tables) + + db.session.commit() + + + +################ +# Load Strains # +################ +@timeit +def load_strains(db): + console.log('Loading strains...') + andersen_strains = fetch_andersen_strains() + db.session.bulk_insert_mappings(Strain, andersen_strains) + db.session.commit() + console.log(f"Inserted {Strain.query.count()} strains") + + +################ +# Set metadata # +################ +@timeit +def load_metadata(db, sel_wormbase_version): + start = arrow.utcnow() + console.log('Inserting metadata') + metadata = {} + metadata.update(vars(constants)) + metadata.update({"CENDR_VERSION": config['CENDR_VERSION'], + "APP_CONFIG": config['APP_CONFIG'], + "DATASET_RELEASE": config['DATASET_RELEASE'], + "WORMBASE_VERSION": sel_wormbase_version, + "RELEASES": config['RELEASES'], + "DATE": arrow.utcnow()}) + + for k, v in metadata.items(): + if not k.startswith("_"): + # For nested constants: + if type(v) == type: + for name in [x for x in dir(v) if not x.startswith("_")]: + key_val = Metadata(key="{}/{}".format(k, name), + value=getattr(v, name)) + db.session.add(key_val) + else: + key_val = Metadata(key=k, value=str(v)) + db.session.add(key_val) + + db.session.commit() + + +############## +# Load Genes # +############## +@timeit +def load_genes_summary(db, f): + console.log('Loading summary gene table') + gene_summary = fetch_gene_gff_summary(f['gff']) + db.session.bulk_insert_mappings(WormbaseGeneSummary, gene_summary) + db.session.commit() + + +@timeit +def load_genes_table(db, f): + console.log('Loading gene table') + genes = fetch_gene_gtf(f['gtf'], f['gene_ids']) + db.session.bulk_insert_mappings(WormbaseGene, genes) + db.session.commit(); + + results = db.session.query(WormbaseGene.feature, db.func.count(WormbaseGene.feature)) \ + .group_by(WormbaseGene.feature) \ + .all() + result_summary = '\n'.join([f"{k}: {v}" for k, v in results]) + console.log(f"============\nGene Summary\n------------\n{result_summary}\n============\n") + + +############################### +# Load homologs # +############################### +@timeit +def load_homologs(db, f): + console.log('Loading homologs from homologene') + homologene = fetch_homologene(f['homologene']) + db.session.bulk_insert_mappings(Homologs, homologene) + db.session.commit() + + +############################### +# Load Orthologs # +############################### +@timeit +def load_orthologs(db, f): + console.log('Loading orthologs from WormBase') + orthologs = fetch_orthologs(f['ortholog']) + db.session.bulk_insert_mappings(Homologs, orthologs) + db.session.commit() + + +###################################### +# Load Strain Variant Annotated Data # +###################################### +@timeit +def load_variant_annotation(db, f): + console.log('Loading strain variant annotated csv') + sva_data = fetch_strain_variant_annotation_data(f['sva']) + db.session.bulk_insert_mappings(StrainAnnotatedVariants, sva_data) + db.session.commit() + + +# =========================== # +# Generate gene id dict # +# =========================== # +# Create a gene dictionary to match wormbase IDs to either the locus name +# or a sequence id +@timeit +def generate_gene_dict(): + console.log('Generating gene_dict.pkl') + gene_dict = {x.gene_id: x.locus or x.sequence_name for x in WormbaseGeneSummary.query.all()} + pickle.dump(gene_dict, open("base/static/data/gene_dict.pkl", 'wb')) diff --git a/base/database/etl_homologene.py b/base/database/etl_homologene.py index bb71d8bf..2798e527 100644 --- a/base/database/etl_homologene.py +++ b/base/database/etl_homologene.py @@ -8,11 +8,15 @@ import re import tarfile import csv + +from logzero import logger from urllib.request import urlretrieve from tempfile import NamedTemporaryFile from base.models import WormbaseGeneSummary from base.constants import URLS +C_ELEGANS_PREFIX = 'CELE_' +C_ELEGANS_HOMOLOG_ID = 6239 def fetch_taxon_ids(): """ @@ -57,19 +61,31 @@ def fetch_homologene(homologene_fname: str): taxon_ids = fetch_taxon_ids() # First, fetch records with a homolog ID that possesses a C. elegans gene. - elegans_set = dict([[int(x[0]), x[3]] for x in response_csv if x[1] == '6239']) + elegans_set = dict([[int(x[0]), x[3]] for x in response_csv if x[1] == str(C_ELEGANS_HOMOLOG_ID)]) + + # Remove CELE_ prefix from some gene names + for k, v in elegans_set.items(): + elegans_set[k] = v.replace(C_ELEGANS_PREFIX, '') + idx = 0 + count = 0 for line in response_csv: - tax_id = int(line[1]) - homolog_id = int(line[0]) - if homolog_id in elegans_set.keys() and tax_id != 6239: - # Try to resolve the wormbase WB ID if possible. - gene_name = elegans_set[homolog_id] - gene_id = WormbaseGeneSummary.resolve_gene_id(gene_name) or line[2] - yield {'gene_id': gene_id, - 'gene_name': gene_name, - 'homolog_species': taxon_ids[tax_id], - 'homolog_taxon_id': tax_id, - 'homolog_gene': line[3], - 'homolog_source': "Homologene", - 'is_ortholog': False} + idx += 1 + tax_id = int(line[1]) + homolog_id = int(line[0]) + if homolog_id in elegans_set.keys() and tax_id != int(C_ELEGANS_HOMOLOG_ID): + # Try to resolve the wormbase WB ID if possible. + gene_name = elegans_set[homolog_id] + gene_id = WormbaseGeneSummary.resolve_gene_id(gene_name) + ref = WormbaseGeneSummary.query.filter(WormbaseGeneSummary.gene_id == gene_id).first() + if idx % 10000 == 0: + logger.info(f'Processed {idx} records yielding {count} inserts') + if ref: + count += 1 + yield {'gene_id': gene_id, + 'gene_name': gene_name, + 'homolog_species': taxon_ids[tax_id], + 'homolog_taxon_id': tax_id, + 'homolog_gene': line[3], + 'homolog_source': "Homologene", + 'is_ortholog': False } diff --git a/base/database/etl_strains.py b/base/database/etl_strains.py index 7f45c04b..48c1fd06 100644 --- a/base/database/etl_strains.py +++ b/base/database/etl_strains.py @@ -15,6 +15,7 @@ from logzero import logger from base.config import config +NULL_VALS = ["None", "", "NA", None] def elevation_cache(func): """quick and simple cache for lat/lon""" @@ -71,13 +72,13 @@ def fetch_andersen_strains(): WI = get_google_sheet(config['ANDERSEN_LAB_STRAIN_SHEET']) strain_records = WI.get_all_records() # Only take records with a release reported - strain_records = list(filter(lambda x: x.get('release') not in ['', None, 'NA'], strain_records)) + strain_records = list(filter(lambda x: x.get('release') not in NULL_VALS, strain_records)) results = [] for n, record in enumerate(strain_records): record = {k.lower(): v for k, v in record.items()} for k, v in record.items(): # Set NA to None - if v in ["NA", '']: + if v in NULL_VALS: v = None record[k] = v if k in ['sampling_date'] and v: @@ -95,12 +96,12 @@ def fetch_andersen_strains(): record["issues"] = record["issues"] == "TRUE" # Set isotype_ref_strain = FALSE if no isotype is assigned. - if record['isotype'] in [None, "", "NA"]: + if record['isotype'] in NULL_VALS: record['isotype_ref_strain'] = False record['wgs_seq'] = False # Skip strains that lack an isotype - if record['isotype'] in [None, "", "NA"] and record['issues'] is False: + if record['isotype'] in NULL_VALS and record['issues'] is False: continue diff --git a/base/database/etl_variant_annot.py b/base/database/etl_variant_annot.py new file mode 100644 index 00000000..9ba2a37c --- /dev/null +++ b/base/database/etl_variant_annot.py @@ -0,0 +1,67 @@ +# -*- coding: utf-8 -*- +""" +Loads the Strain Variant Annotated CSV into the SQLite DB + +Author: Sam Wachspress +""" +import csv +import re + +from logzero import logger +from sqlalchemy.sql.expression import null +from base.models import StrainAnnotatedVariants + +def fetch_strain_variant_annotation_data(sva_fname: str): + """ + Load strain variant annotation table data: + + CHROM,POS,REF,ALT,CONSEQUENCE,WORMBASE_ID,TRANSCRIPT,BIOTYPE, + STRAND,AMINO_ACID_CHANGE,DNA_CHANGE,Strains,BLOSUM,Grantham, + Percent_Protein,GENE,VARIANT_IMPACT,DIVERGENT + + """ + with open(sva_fname) as csv_file: + csv_reader = csv.reader(csv_file, delimiter=',') + + line_count = -1 + for row in csv_reader: + if line_count == -1: + print(f'Column names are {", ".join(row)}') + line_count += 1 + else: + line_count += 1 + if line_count % 100000 == 0: + logger.info(f"Processed {line_count} lines;") + + target_consequence = None + consequence = row[4] if row[4] else None + pattern = '^@[0-9]*$' + alt_target = re.match(pattern, consequence) + if alt_target: + target_consequence = int(consequence[1:]) + consequence = None + + yield { + 'id': line_count, + 'chrom': row[0], + 'pos': int(row[1]), + 'ref_seq': row[2] if row[2] else None, + 'alt_seq': row[3] if row[3] else None, + 'consequence': consequence, + 'target_consequence': target_consequence, + 'gene_id': row[5] if row[5] else None, + 'transcript': row[6] if row[6] else None, + 'biotype': row[7] if row[7] else None, + 'strand': row[8] if row[8] else None, + 'amino_acid_change': row[9] if row[9] else None, + 'dna_change': row[10] if row[10] else None, + 'strains': row[11] if row[11] else None, + 'blosum': int(row[12]) if row[12] else None, + 'grantham': int(row[13]) if row[13] else None, + 'percent_protein': float(row[14]) if row[14] else None, + 'gene': row[15] if row[15] else None, + 'variant_impact': row[16] if row[16] else None, + 'divergent': True if row[17] == 'D' else False, + } + + print(f'Processed {line_count} lines.') diff --git a/base/database/etl_wormbase.py b/base/database/etl_wormbase.py index 0cd02e02..d6648239 100644 --- a/base/database/etl_wormbase.py +++ b/base/database/etl_wormbase.py @@ -8,6 +8,7 @@ Author: Daniel E. Cook (danielecook@gmail.com) """ +from base.models import WormbaseGeneSummary import csv import gzip from logzero import logger @@ -46,7 +47,11 @@ def fetch_gene_gtf(gtf_fname: str, gene_ids_fname: str): gene_gtf.frame = gene_gtf.frame.apply(lambda x: x if x != "." else None) gene_gtf.exon_number = gene_gtf.exon_number.apply(lambda x: x if x != "" else None) gene_gtf['arm_or_center'] = gene_gtf.apply(lambda row: arm_or_center(row['chrom'], row['pos']), axis=1) + idx = 0 for row in gene_gtf.to_dict('records'): + idx += 1 + if idx % 100000 == 0: + logger.info(f"Processed {idx} lines") yield row @@ -98,17 +103,25 @@ def fetch_orthologs(orthologs_fname: str): """ csv_out = list(csv.reader(open(orthologs_fname, 'r'), delimiter='\t')) + idx = 0 + count = 0 for line in csv_out: - size_of_line = len(line) - if size_of_line < 2: - continue - elif size_of_line == 2: - wb_id, locus_name = line - else: - yield {'gene_id': wb_id, - 'gene_name': locus_name, - 'homolog_species': line[0], - 'homolog_taxon_id': None, - 'homolog_gene': line[2], - 'homolog_source': line[3], - 'is_ortholog': line[0] == 'Caenorhabditis elegans'} + idx += 1 + size_of_line = len(line) + if size_of_line < 2: + continue + elif size_of_line == 2: + wb_id, locus_name = line + else: + ref = WormbaseGeneSummary.query.filter(WormbaseGeneSummary.gene_id == wb_id).first() + if idx % 10000 == 0: + logger.info(f'Processed {idx} records yielding {count} inserts') + if ref: + count += 1 + yield {'gene_id': wb_id, + 'gene_name': locus_name, + 'homolog_species': line[0], + 'homolog_taxon_id': None, + 'homolog_gene': line[2], + 'homolog_source': line[3], + 'is_ortholog': line[0] == 'Caenorhabditis elegans'} diff --git a/base/database/readme.md b/base/database/readme.md new file mode 100644 index 00000000..87d0c4eb --- /dev/null +++ b/base/database/readme.md @@ -0,0 +1,13 @@ +# CeNDR Database + +This directory contains the scripts to perform the 'initdb' flask action. +It requires a local PostgreSQL instance to be running. + +The table can then be dumped with + +''' +pg_dump -U admin --format=plain --no-owner --no-acl cendr > cendr.sql + +''' + +The .sql file can then be uploaded to Google Cloud Buckets and batch imported to the Cloud SQL instance diff --git a/base/extensions.py b/base/extensions.py index ed5042e3..c846102e 100644 --- a/base/extensions.py +++ b/base/extensions.py @@ -5,9 +5,12 @@ from flask_sslify import SSLify from flask_debugtoolbar import DebugToolbarExtension from flask_sqlalchemy import SQLAlchemy +from flask_jwt_extended import JWTManager -sqlalchemy = SQLAlchemy + +sqlalchemy = SQLAlchemy() markdown = Markdown cache = Cache(config={'CACHE_TYPE': 'base.utils.cache.datastore_cache'}) sslify = SSLify debug_toolbar = DebugToolbarExtension +jwt = JWTManager() diff --git a/base/forms.py b/base/forms.py index 09bbe586..6e20e02e 100644 --- a/base/forms.py +++ b/base/forms.py @@ -2,20 +2,34 @@ import pandas as pd import numpy as np -from flask_wtf import Form, RecaptchaField +from flask_wtf import FlaskForm, RecaptchaField, Form from wtforms import (StringField, + DateField, + BooleanField, TextAreaField, IntegerField, SelectField, + SelectMultipleField, + widgets, FieldList, HiddenField, RadioField) -from wtforms.validators import Required, Length, Email, DataRequired -from wtforms.validators import ValidationError +from wtforms.fields.simple import PasswordField +from wtforms.validators import (Required, + Length, + Email, + DataRequired, + EqualTo, + Optional, + ValidationError) + +from wtforms.fields.html5 import EmailField + +from base.constants import PRICES, USER_ROLES, SHIPPING_OPTIONS, PAYMENT_OPTIONS from base.utils.gcloud import query_item -from base.constants import PRICES +from base.models import user_ds from base.views.api.api_strain import query_strains from base.utils.data_utils import is_number, list_duplicates from slugify import slugify @@ -23,24 +37,87 @@ from logzero import logger - -class donation_form(Form): - """ - The donation form - """ - name = StringField('Name', [Required(), Length(min=3, max=100)]) - address = TextAreaField('Address', [Length(min=10, max=200)]) - email = StringField('Email', [Email(), Length(min=3, max=100)]) - total = IntegerField('Donation Amount') - recaptcha = RecaptchaField() +class MultiCheckboxField(SelectMultipleField): + widget = widgets.ListWidget(prefix_label=False) + option_widget = widgets.CheckboxInput() + + +class file_upload_form(FlaskForm): + pass + + +class basic_login_form(FlaskForm): + """ + The simple username/password login form + """ + username = StringField('Username', [Required(), Length(min=5, max=30)]) + password = PasswordField('Password', [Required(), Length(min=5, max=30)]) + recaptcha = RecaptchaField() + + +class markdown_form(FlaskForm): + """ + markdown editing form + """ + title = StringField('Title', [Optional()]) + content = StringField('Content', [Optional()]) + date = DateField('Date (mm-dd-YYYY)', [Optional()], format='%m-%d-%Y') + type = StringField('Type', [Optional()]) + publish = BooleanField('Publish', [Optional()]) + + +class user_register_form(FlaskForm): + """ + Register as a new user with username/password + """ + username = StringField('Username', [Required(), Length(min=5, max=30)]) + full_name = StringField('Full Name', [Required(), Length(min=5, max=50)]) + email = EmailField('Email Address', [Required(), Email(), Length(min=6, max=50)]) + password = PasswordField('Password', [Required(), EqualTo('confirm_password', message='Passwords must match'), Length(min=5, max=30)]) + confirm_password = PasswordField('Confirm Password', [Required(), EqualTo('password', message='Passwords must match'), Length(min=5, max=30)]) + recaptcha = RecaptchaField() + + def validate_username(form, field): + user = user_ds(field.data) + if user._exists: + raise ValidationError("Username already exists") + + +class user_update_form(FlaskForm): + """ + Modifies an existing users profile + """ + full_name = StringField('Full Name', [Required(), Length(min=5, max=50)]) + email = EmailField('Email Address', [Required(), Email(), Length(min=6, max=50)]) + password = PasswordField('Password', [Optional(), EqualTo('confirm_password', message='Passwords must match'), Length(min=5, max=30)]) + confirm_password = PasswordField('Confirm Password', [Optional(), EqualTo('password', message='Passwords must match'), Length(min=5, max=30)]) + + +class admin_edit_user_form(FlaskForm): + """ + A form for one or more roles + """ + roles = MultiCheckboxField('User Roles', choices=USER_ROLES) + + +class data_report_form(FlaskForm): + """ + A form for creating a data release + """ + dataset = SelectField('Release Dataset', validators=[Required()]) + wormbase = StringField('Wormbase Version WS:', validators=[Required()]) + version = SelectField('Report Version', validators=[Required()]) -SHIPPING_OPTIONS = [('UPS', 'UPS'), - ('FEDEX', 'FEDEX'), - ('Flat Rate Shipping', '${} Flat Fee'.format(PRICES.SHIPPING))] - -PAYMENT_OPTIONS = [('check', 'Check'), - ('credit_card', 'Credit Card')] +class donation_form(Form): + """ + The donation form + """ + name = StringField('Name', [Required(), Length(min=3, max=100)]) + address = TextAreaField('Address', [Length(min=10, max=200)]) + email = StringField('Email', [Email(), Length(min=3, max=100)]) + total = IntegerField('Donation Amount') + recaptcha = RecaptchaField() class order_form(Form): @@ -100,8 +177,10 @@ class heritability_form(Form): # -# Perform Mapping Form +# Variant Browser Forms # +class vbrowser_form(FlaskForm): + pass class TraitData(HiddenField): diff --git a/base/manage.py b/base/manage.py index eef1eded..cb5b72f8 100644 --- a/base/manage.py +++ b/base/manage.py @@ -12,8 +12,7 @@ from click import secho from base.utils.gcloud import get_item from base.utils.data_utils import zipdir -from base.database import (initialize_sqlite_database, - download_sqlite_database) +from base.database import initialize_postgres_database from base import constants from subprocess import Popen, PIPE @@ -23,19 +22,21 @@ @click.command(help="Initialize the database") @click.argument("wormbase_version", default=constants.WORMBASE_VERSION) def initdb(wormbase_version=constants.WORMBASE_VERSION): - initialize_sqlite_database(wormbase_version) + initialize_postgres_database(wormbase_version) @click.command(help="Updates the strain table of the database") @click.argument("wormbase_version", default=constants.WORMBASE_VERSION) def update_strains(wormbase_version): - initialize_sqlite_database(wormbase_version, strain_only=True) + initialize_postgres_database(wormbase_version, strain_only=True) -@click.command(help="Download the database (used in docker container)") -def download_db(): +# Todo: allow downloading postgres dump/local db in docker container +# or just link to .sql in cloud storage (even better!) +#@click.command(help="Download the database (used in docker container)") +#def download_db(): # Downloads the latest SQLITE database - download_sqlite_database() + #download_sqlite_database() @click.command(help="Update credentials") @@ -46,6 +47,7 @@ def update_credentials(): from base.application import create_app app = create_app() app.app_context().push() + click.secho("Zipping env_config", fg='green') zipdir('env_config/', 'env_config.zip') zip_creds = get_item('credential', 'travis-ci-cred') @@ -73,6 +75,7 @@ def decrypt_credentials(): from base.application import create_app app = create_app() app.app_context().push() + click.secho("Decrypting env_config.zip.enc", fg='green') zip_creds = get_item('credential', 'travis-ci-cred') comm = ['travis', diff --git a/base/models.py b/base/models.py index fd1264cf..da51d248 100644 --- a/base/models.py +++ b/base/models.py @@ -1,23 +1,27 @@ import os +import re import arrow import json import pandas as pd import numpy as np import datetime import requests + from io import StringIO from flask import Markup, url_for from flask_sqlalchemy import SQLAlchemy from sqlalchemy import or_, func -from logzero import logger +from werkzeug.security import safe_str_cmp -from base.constants import URLS +from base.config import config +from base.constants import GOOGLE_CLOUD_BUCKET, STRAIN_PHOTO_PATH +from base.extensions import sqlalchemy from base.utils.gcloud import get_item, store_item, query_item, get_cendr_bucket, check_blob from base.utils.aws import get_aws_client +from base.utils.data_utils import hash_password, unique_id from gcloud.datastore.entity import Entity from collections import defaultdict from botocore.exceptions import ClientError -from base.config import DATASET_RELEASE db = SQLAlchemy() @@ -83,6 +87,7 @@ class trait_ds(datastore_model): If a task is re-run the report will only display the latest version. """ kind = 'trait' + kind = '{}{}'.format(config['DS_PREFIX'], kind) def __init__(self, *args, **kwargs): """ @@ -233,9 +238,9 @@ def gs_base_url(self): The URL schema changed from REPORT_VERSION v1 to v2. """ if self.REPORT_VERSION == 'v2': - return f"https://storage.googleapis.com/elegansvariation.org/reports/{self.gs_path}" + return f"https://storage.googleapis.com/{GOOGLE_CLOUD_BUCKET}/reports/{self.gs_path}" elif self.REPORT_VERSION == 'v1': - return f"https://storage.googleapis.com/elegansvariation.org/reports/{self.gs_path}" + return f"https://storage.googleapis.com/{GOOGLE_CLOUD_BUCKET}/reports/{self.gs_path}" def get_gs_as_dataset(self, fname): """ @@ -260,7 +265,7 @@ def list_report_files(self): cendr_bucket = get_cendr_bucket() items = cendr_bucket.list_blobs(prefix=f"reports/{self.gs_path}") - return {os.path.basename(x.name): f"https://storage.googleapis.com/elegansvariation.org/{x.name}" for x in items} + return {os.path.basename(x.name): f"https://storage.googleapis.com/{GOOGLE_CLOUD_BUCKET}/{x.name}" for x in items} def file_url(self, fname): """ @@ -276,6 +281,7 @@ class mapping_ds(datastore_model): The mapping/peak interval model """ kind = 'mapping' + kind = '{}{}'.format(config['DS_PREFIX'], kind) def __init__(self, *args, **kwargs): super(mapping_ds, self).__init__(*args, **kwargs) @@ -287,14 +293,35 @@ class user_ds(datastore_model): information on users. """ kind = 'user' + kind = '{}{}'.format(config['DS_PREFIX'], kind) def __init__(self, *args, **kwargs): super(user_ds, self).__init__(*args, **kwargs) + + def set_properties(self, **kwargs): + if 'username' in kwargs: + self.username = kwargs.get('username') + if 'full_name' in kwargs: + self.full_name = kwargs.get('full_name') + if 'password' in kwargs: + self.set_password(kwargs.get('password'), kwargs.get('salt')) + if 'email' in kwargs: + self.set_email(kwargs.get('email')) + if 'roles' in kwargs: + self.roles = kwargs.get('roles') + + def save(self, *args, **kwargs): + now = arrow.utcnow().datetime + if not self._exists: + self.created_on = now + self.modified_on = now + super(user_ds, self).save(*args, **kwargs) + def reports(self): - filters = [('user_id', '=', self.user_id)] + filters = [('user_id', '=', self.name)] # Note this requires a composite index defined very precisely. - results = query_item('trait', filters=filters, order=['user_id', '-created_on']) + results = query_item(self.kind, filters=filters, order=['user_id', '-created_on']) results = sorted(results, key=lambda x: x['created_on'], reverse=True) results_out = defaultdict(list) for row in results: @@ -302,17 +329,226 @@ def reports(self): # Generate report objects return results_out + def get_all(self, keys_only=False): + results = query_item(self.kind, keys_only=keys_only) + return results + + def set_password(self, password, salt): + # calling set_password with self.password + if hasattr(self, 'password'): + if (len(password) > 0) and (password != self.password): + self.password = hash_password(password + salt) + else: + self.password = hash_password(password + salt) + + def set_email(self, email): + if hasattr(self, 'email'): + if not safe_str_cmp(email, self.email): + self.email = email + self.email_confirmation_code = unique_id() + self.verified_email = False + else: + self.email = email + self.email_confirmation_code = unique_id() + self.verified_email = False + + def check_password(self, password, salt): + return safe_str_cmp(self.password, hash_password(password + salt)) + + +class markdown_ds(datastore_model): + """ + The Markdown model - for creating and retrieving + documents uploaded to the site + """ + kind = 'markdown' + kind = '{}{}'.format(config['DS_PREFIX'], kind) + + def __init__(self, *args, **kwargs): + super(markdown_ds, self).__init__(*args, **kwargs) + + def get_all(self, keys_only=False): + results = query_item(self.kind, keys_only=keys_only) + return results + + def query_by_type(self, type, keys_only=False): + filters = [('type', '=', type)] + results = query_item(self.kind, filters=filters, keys_only=keys_only) + return results + + def save(self, *args, **kwargs): + now = arrow.utcnow().datetime + self.modified_on = now + if not self._exists: + self.created_on = now + super(markdown_ds, self).save(*args, **kwargs) + + +class ns_calc_ds(datastore_model): + """ + The NemaScan Task Model - metadata for NemaScan nextflow pipeline + execution tasks executed by Google Life Sciences + """ + kind = 'ns_calc' + kind = '{}{}'.format(config['DS_PREFIX'], kind) + + + def __init__(self, *args, **kwargs): + super(ns_calc_ds, self).__init__(*args, **kwargs) + + def query_by_username(self, username, keys_only=False): + filters = [('username', '=', username)] + results = query_item(self.kind, filters=filters, keys_only=keys_only) + return results + + def save(self, *args, **kwargs): + now = arrow.utcnow().datetime + self.modified_on = now + if not self._exists: + self.created_on = now + super(ns_calc_ds, self).save(*args, **kwargs) + + +class gls_op_ds(datastore_model): + """ + The Google Lifesciences Operation Model - metadata for pipeline + task executed by Google Life Sciences + """ + kind = 'gls_operation' + + def __init__(self, *args, **kwargs): + super(gls_op_ds, self).__init__(*args, **kwargs) + + +class h2calc_ds(datastore_model): + """ + The Heritability Calculation Task Model - for creating and retrieving + data and status information about a heritability calculation task + executed in Google Cloud Run + """ + kind = 'h2calc' + kind = '{}{}'.format(config['DS_PREFIX'], kind) + + + def __init__(self, *args, **kwargs): + super(h2calc_ds, self).__init__(*args, **kwargs) + + def query_by_username(self, username, keys_only=False): + filters = [('username', '=', username)] + results = query_item(self.kind, filters=filters, keys_only=keys_only) + return results + + def save(self, *args, **kwargs): + now = arrow.utcnow().datetime + self.modified_on = now + if not self._exists: + self.created_on = now + super(h2calc_ds, self).save(*args, **kwargs) + + + +class ip_calc_ds(datastore_model): + """ + The Indel Primer Calculation Task Model - for creating and retrieving + data and status information about an indel primer calculation task + executed in Google Cloud Run + """ + kind = 'ip_calc' + kind = '{}{}'.format(config['DS_PREFIX'], kind) + + + def __init__(self, *args, **kwargs): + super(ip_calc_ds, self).__init__(*args, **kwargs) + + def query_by_username(self, username, keys_only=False): + filters = [('username', '=', username)] + results = query_item(self.kind, filters=filters, keys_only=keys_only) + return results + + def save(self, *args, **kwargs): + now = arrow.utcnow().datetime + self.modified_on = now + if not self._exists: + self.created_on = now + super(ip_calc_ds, self).save(*args, **kwargs) + + +class data_report_ds(datastore_model): + """ + The Data Report model - for creating and retrieving + releases of genomic data + """ + kind = 'data-report' + kind = '{}{}'.format(config['DS_PREFIX'], kind) + + def init(self): + self.dataset = '' + self.wormbase = '' + self.version = '' + self.initialized = False + self.published_on = '' + self.publish = False + self.created_on = arrow.utcnow().datetime + self.report_synced_on = '' + self.db_synced_on = '' + + def __init__(self, *args, **kwargs): + super(data_report_ds, self).__init__(*args, **kwargs) + + def get_all(self, keys_only=False): + results = query_item(self.kind, keys_only=keys_only) + return results + + def list_bucket_dirs(): + """ + Lists 'directories' in GCP Bucket 'data_reports' (unique blob prefixes matching date format) + """ + cendr_bucket = get_cendr_bucket() + items = cendr_bucket.list_blobs(prefix=f"data_reports/") + dirs = [] + pattern = r"^(data_reports\/)([0-9]{8})/" + for i in items: + match = re.search(pattern, i.name) + if match: + dir = match.group(2) + if not dir in dirs: + dirs.append(dir) + + return dirs + + def save(self, *args, **kwargs): + now = arrow.utcnow().datetime + self.modified_on = now + super(data_report_ds, self).save(*args, **kwargs) + + +class config_ds(datastore_model): + """ + The Data Config model - Config stored in the cloud + for the site's data sources + """ + kind = 'config' + kind = '{}{}'.format(config['DS_PREFIX'], kind) + + def __init__(self, *args, **kwargs): + super(config_ds, self).__init__(*args, **kwargs) + + def save(self, *args, **kwargs): + now = arrow.utcnow().datetime + self.modified_on = now + if not self._exists: + self.created_on = now + super(config_ds, self).save(*args, **kwargs) class DictSerializable(object): - def _asdict(self): - result = {} - for key in self.__mapper__.c.keys(): - result[key] = getattr(self, key) - return result + def _asdict(self): + result = {} + for key in self.__mapper__.c.keys(): + result[key] = getattr(self, key) + return result # --------- Break datastore here ---------# - class Metadata(DictSerializable, db.Model): """ Table for storing information about other tables @@ -322,6 +558,48 @@ class Metadata(DictSerializable, db.Model): value = db.Column(db.String) +class WormbaseGeneSummary(DictSerializable, db.Model): + """ + This is a condensed version of the WormbaseGene model; + It is constructed out of convenience and only defines the genes + (not exons/introns/etc.) + """ + __tablename__ = "wormbase_gene_summary" + id = db.Column(db.Integer, primary_key=True) + chrom = db.Column(db.String(7), index=True) + chrom_num = db.Column(db.Integer(), index=True) + start = db.Column(db.Integer(), index=True) + end = db.Column(db.Integer(), index=True) + locus = db.Column(db.String(30), index=True) + gene_id = db.Column(db.String(25), unique=True, index=True) + gene_id_type = db.Column(db.String(15), index=False) + sequence_name = db.Column(db.String(30), index=True) + biotype = db.Column(db.String(30), nullable=True) + gene_symbol = db.column_property(func.coalesce(locus, sequence_name, gene_id)) + interval = db.column_property(func.format("%s:%s-%s", chrom, start, end)) + arm_or_center = db.Column(db.String(12), index=True) + + __gene_id_constraint__ = db.UniqueConstraint(gene_id) + + + def to_json(self): + return {k: v for k, v in self._asdict().items() if not k.startswith("_")} + + + @classmethod + def resolve_gene_id(cls, query): + """ + query - a locus name or transcript ID + output - a wormbase gene ID + + Example: + WormbaseGene.resolve_gene_id('pot-2') --> WBGene00010195 + """ + result = cls.query.filter(or_(cls.locus == query, cls.sequence_name == query)).first() + if result: + return result.gene_id + + class Strain(DictSerializable, db.Model): __tablename__ = "strain" species_id_method = db.Column(db.String(50), nullable=True) @@ -365,12 +643,19 @@ def __repr__(self): return self.strain def to_json(self): - return {k: v for k, v in self.__dict__.items() if not k.startswith("_")} + return {k: v for k, v in self._asdict().items() if not k.startswith("_")} def strain_photo_url(self): # Checks if photo exists and returns URL if it does try: - return check_blob(f"photos/isolation/{self.strain}.jpg").public_url + return check_blob(f"{STRAIN_PHOTO_PATH}{self.strain}.jpg").public_url + except AttributeError: + return None + + def strain_thumbnail_url(self): + # Checks if thumbnail exists and returns URL if it does + try: + return check_blob(f"{STRAIN_PHOTO_PATH}{self.strain}.thumb.jpg").public_url except AttributeError: return None @@ -378,13 +663,16 @@ def strain_bam_url(self): """ Return bam / bam_index url set """ - + bam_file=self.strain + '.bam' + bai_file=self.strain + '.bam.bai' + bam_download_link = url_for('data.download_bam_url', blob_name=bam_file) + bai_download_link = url_for('data.download_bam_url', blob_name=bai_file) url_set = Markup(f""" - + BAM / - + bai """.strip()) @@ -404,13 +692,16 @@ def isotype_bam_url(self): """ Return bam / bam_index url set """ - + bam_file=self.isotype + '.bam' + bai_file=self.isotype + '.bam.bai' + bam_download_link = url_for('data.download_bam_url', blob_name=bam_file) + bai_download_link = url_for('data.download_bam_url', blob_name=bai_file) url_set = Markup(f""" - + BAM / - + bai """.strip()) @@ -452,19 +743,25 @@ def cum_sum_strain_isotype(cls): @classmethod def release_summary(cls, release): - """ - Returns isotype and strain count for a data release. - - Args: - release - the data release - """ - counts = {'strain_count': cls.query.filter((cls.release <= release) & (cls.issues == False)).count(), - 'strain_count_sequenced': cls.query.filter((cls.release <= release) & (cls.issues == False) & (cls.sequenced == True)).count(), - 'isotype_count': cls.query.filter((cls.release <= release) & (cls.issues == False) & (cls.isotype != None)).group_by(cls.isotype).count()} - return counts + """ + Returns isotype and strain count for a data release. + + Args: + release - the data release + """ + release = int(release) + strain_count = cls.query.filter((cls.release <= release) & (cls.issues == False)).count() + strain_count_sequenced = cls.query.filter((cls.release <= release) & (cls.issues == False) & (cls.sequenced == True)).count() + isotype_count = cls.query.with_entities(cls.isotype).filter((cls.isotype != None), (cls.release <= release), (cls.issues == False)).group_by(cls.isotype).count() + + return { + 'strain_count': strain_count, + 'strain_count_sequenced': strain_count_sequenced, + 'isotype_count': isotype_count + } def as_dict(self): - return {c.name: getattr(self, c.name) for c in self.__table__.columns} + return {c.name: getattr(self, c.name) for c in self.__table__.columns} class WormbaseGene(DictSerializable, db.Model): @@ -487,45 +784,15 @@ class WormbaseGene(DictSerializable, db.Model): protein_id = db.Column(db.String(30), nullable=True, index=True) arm_or_center = db.Column(db.String(12), index=True) - gene_summary = db.relationship("WormbaseGeneSummary", backref='gene_components') + __gene_summary__ = db.relationship("WormbaseGeneSummary", backref='wormbase_gene', lazy='joined') - def __repr__(self): - return f"{self.gene_id}:{self.feature} [{self.seqname}:{self.start}-{self.end}]" + def to_json(self): + return {k: v for k, v in self.__dict__.items() if not k.startswith("_")} -class WormbaseGeneSummary(DictSerializable, db.Model): - """ - This is a condensed version of the WormbaseGene model; - It is constructed out of convenience and only defines the genes - (not exons/introns/etc.) - """ - __tablename__ = "wormbase_gene_summary" - id = db.Column(db.Integer, primary_key=True) - chrom = db.Column(db.String(7), index=True) - chrom_num = db.Column(db.Integer(), index=True) - start = db.Column(db.Integer(), index=True) - end = db.Column(db.Integer(), index=True) - locus = db.Column(db.String(30), index=True) - gene_id = db.Column(db.String(25), index=True) - gene_id_type = db.Column(db.String(15), index=False) - sequence_name = db.Column(db.String(30), index=True) - biotype = db.Column(db.String(30), nullable=True) - gene_symbol = db.column_property(func.coalesce(locus, sequence_name, gene_id)) - interval = db.column_property(func.printf("%s:%s-%s", chrom, start, end)) - arm_or_center = db.Column(db.String(12), index=True) - - @classmethod - def resolve_gene_id(cls, query): - """ - query - a locus name or transcript ID - output - a wormbase gene ID - Example: - WormbaseGene.resolve_gene_id('pot-2') --> WBGene00010195 - """ - result = cls.query.filter(or_(cls.locus == query, cls.sequence_name == query)).first() - if result: - return result.gene_id + def __repr__(self): + return f"{self.gene_id}:{self.feature} [{self.seqname}:{self.start}-{self.end}]" class Homologs(DictSerializable, db.Model): @@ -534,23 +801,149 @@ class Homologs(DictSerializable, db.Model): """ __tablename__ = "homologs" id = db.Column(db.Integer, primary_key=True) - gene_id = db.Column(db.ForeignKey('wormbase_gene_summary.gene_id'), nullable=False, index=True) - gene_name = db.Column(db.String(40), index=True) - homolog_species = db.Column(db.String(50), index=True) + gene_id = db.Column(db.ForeignKey('wormbase_gene_summary.gene_id'), nullable=True, index=True) + gene_name = db.Column(db.String(60), index=True) + homolog_species = db.Column(db.String(60), index=True) homolog_taxon_id = db.Column(db.Integer, index=True, nullable=True) # If available - homolog_gene = db.Column(db.String(50), index=True) - homolog_source = db.Column(db.String(40)) + homolog_gene = db.Column(db.String(60), index=True) + homolog_source = db.Column(db.String(60)) + is_ortholog = db.Column(db.Boolean(), index=True, nullable=True) + + __gene_summary__ = db.relationship("WormbaseGeneSummary", backref='homologs', lazy='joined') + + + def to_json(self): + return {k: v for k, v in self.__dict__.items() if not k.startswith("_")} - gene_summary = db.relationship("WormbaseGeneSummary", backref='homologs', lazy='joined') def unnest(self): """ Used with the gene API - returns an unnested homolog datastructure combined with the wormbase gene summary model. """ - self.__dict__.update(self.gene_summary.__dict__) - self.__dict__['gene_summary'] = None + self.__dict__.update(self.__gene_summary__.__dict__) return self def __repr__(self): return f"homolog: {self.gene_name} -- {self.homolog_gene}" + + +class StrainAnnotatedVariants(DictSerializable, db.Model): + """ + The Strain Annotated Variant table combines several features linked to variants: + Genetic location, base pairs affected, consequences of reading, gene information, + strains affected, and severity of impact + + """ + __tablename__ = 'variant_annotation' + id = db.Column(db.Integer, primary_key=True) + chrom = db.Column(db.String(7), index=True) + pos = db.Column(db.Integer(), index=True) + ref_seq = db.Column(db.String(), nullable=True) + alt_seq = db.Column(db.String(), nullable=True) + consequence = db.Column(db.String(), nullable=True) + target_consequence = db.Column(db.Integer(), nullable=True) + gene_id = db.Column(db.ForeignKey('wormbase_gene_summary.gene_id'), index=True, nullable=True) + transcript = db.Column(db.String(), index=True, nullable=True) + biotype = db.Column(db.String(), nullable=True) + strand = db.Column(db.String(1), nullable=True) + amino_acid_change = db.Column(db.String(), nullable=True) + dna_change = db.Column(db.String(), nullable=True) + strains = db.Column(db.String(), nullable=True) + blosum = db.Column(db.Integer(), nullable=True) + grantham = db.Column(db.Integer(), nullable=True) + percent_protein = db.Column(db.Float(), nullable=True) + gene = db.Column(db.String(), index=True, nullable=True) + variant_impact = db.Column(db.String(), nullable=True) + divergent = db.Column(db.Boolean(), nullable=True) + + __gene_summary__ = db.relationship("WormbaseGeneSummary", backref='variant_annotation', lazy='joined') + + + column_details = [ + {'id': 'chrom', 'name': 'Chromosome'}, + {'id': 'pos', 'name': 'Position'}, + {'id': 'ref_seq', 'name': 'Ref Sequence'}, + {'id': 'alt_seq', 'name': 'Alt Sequence'}, + {'id': 'consequence', 'name': 'Consequence'}, + {'id': 'target_consequence', 'name': 'Target Consequence'}, + {'id': 'gene_id', 'name': 'Gene ID'}, + {'id': 'transcript', 'name': 'Transcript'}, + {'id': 'biotype', 'name': 'Biotype'}, + {'id': 'strand', 'name': 'Strand'}, + {'id': 'amino_acid_change', 'name': 'Amino Acid Change'}, + {'id': 'dna_change', 'name': 'DNA Change'}, + {'id': 'strains', 'name': 'Strains'}, + {'id': 'blosum', 'name': 'BLOSUM'}, + {'id': 'grantham', 'name': 'Grantham'}, + {'id': 'percent_protein', 'name': 'Percent Protein'}, + {'id': 'gene', 'name': 'Gene'}, + {'id': 'variant_impact', 'name': 'Variant Impact'}, + {'id': 'divergent', 'name': 'Divergent'} + ] + + @classmethod + def generate_interval_sql(cls, interval): + interval = interval.replace(',','') + chrom = interval.split(':')[0] + range = interval.split(':')[1] + start = int(range.split('-')[0]) + stop = int(range.split('-')[1]) + + q = f"SELECT * FROM {cls.__tablename__} WHERE chrom='{chrom}' AND pos > {start} AND pos < {stop};" + return q + + + ''' TODO: implement input checks here and in the browser form''' + @classmethod + def verify_interval_query(cls, q): + query_regex = "^(I|II|III|IV|V|X|MtDNA):[0-9,]+-[0-9,]+$" + match = re.search(query_regex, q) + return True if match else False + + + @classmethod + def run_interval_query(cls, q): + q = cls.generate_interval_sql(q) + df = pd.read_sql_query(q, db.engine) + + try: + result = df[['id', 'chrom', 'pos', 'ref_seq', 'alt_seq', 'consequence', 'target_consequence', 'gene_id', 'transcript', 'biotype', 'strand', 'amino_acid_change', 'dna_change', 'strains', 'blosum', 'grantham', 'percent_protein', 'gene', 'variant_impact', 'divergent']].dropna(how='all') \ + .fillna(value="") \ + .agg(list) \ + .to_dict() + except ValueError: + result = {} + return result + + + @classmethod + def generate_position_sql(cls, pos): + pos = pos.replace(',','') + chrom = pos.split(':')[0] + pos = int(pos.split(':')[1]) + + q = f"SELECT * FROM {cls.__tablename__} WHERE chrom='{chrom}' AND pos = {pos};" + return q + + + @classmethod + def verify_position_query(cls, q): + query_regex = "^(I|II|III|IV|V|X|MtDNA):[0-9,]+$" + match = re.search(query_regex, q) + return True if match else False + + + @classmethod + def run_position_query(cls, q): + q = cls.generate_position_sql(q) + df = pd.read_sql_query(q, db.engine) + + try: + result = df[['id', 'chrom', 'pos', 'ref_seq', 'alt_seq', 'consequence', 'target_consequence', 'gene_id', 'transcript', 'biotype', 'strand', 'amino_acid_change', 'dna_change', 'strains', 'blosum', 'grantham', 'percent_protein', 'gene', 'variant_impact', 'divergent']].dropna(how='all') \ + .fillna(value="") \ + .agg(list) \ + .to_dict() + except ValueError: + result = {} + return result diff --git a/base/static/content/help/Change-Log.md b/base/static/content/help/Change-Log.md index f0fc719c..bc1b7b02 100644 --- a/base/static/content/help/Change-Log.md +++ b/base/static/content/help/Change-Log.md @@ -2,12 +2,12 @@ --- -##### v1.5.2 (2020-09-07) +#### v1.5.2 (2020-09-07) * A divergent region summmary track has been added to the primer indel tool. * Sweep haplotypes have been added to the latest release. -##### v1.5.1 (2020-08-30) +#### v1.5.1 (2020-08-30) * The [primer indel tool](/tools/pairwise_indel_finder) has been released. diff --git a/base/static/content/help/FAQ.md b/base/static/content/help/FAQ.md index a9c74e67..7ecdc88b 100644 --- a/base/static/content/help/FAQ.md +++ b/base/static/content/help/FAQ.md @@ -31,7 +31,7 @@ Or use this bibtex entry ### What are hyper-divergent regions? How should I use variants that fall within these regions? -Hyper-divergent regions are genomic intervals that contain sequences not found in the N2 reference strain. They were identified by high levels of variation and low coverage from read alignments. For a more full description, please read [this paper](https://andersenlab.org/publications/2020LeebioRxiv.pdf). We highly recommend that you use the variant browser and view the BAM files for strains of interest. We also released a genomic view track to see where we have classified divergent regions. If you find that your region of interest overlaps with a hyper-divergent region, then we recommend taking any variants as preliminary. Long-read sequencing is required to identify the actual genomic sequences in this region. +Hyper-divergent regions are genomic intervals that contain sequences not found in the N2 reference strain. They were identified by high levels of variation and low coverage from read alignments. For a more full description, please read [this paper](https://andersenlab.org/publications/2020LeebioRxiv.pdf). We highly recommend that you use the genome browser and view the BAM files for strains of interest. We also released a genomic view track to see where we have classified divergent regions. If you find that your region of interest overlaps with a hyper-divergent region, then we recommend taking any variants as preliminary. Long-read sequencing is required to identify the actual genomic sequences in this region. ### How much confidence do we have in the indel variants? @@ -45,7 +45,7 @@ __[See our filter optimization report for further details](/static/reports/filte ### How are strains grouped by isotype? -In 2012, we [published](http://dx.doi.org/10.1038/ng.1050) genome-wide variant data from reduced representation sequencing of approximately 10% of the C. elegans genome (RAD-seq). Using these data, we grouped strains into isotypes. We also found many strains that were mislabeled as wild isolates but were instead N2 derivatives, recombinants from laboratory experiments, and mutagenesis screen isolates (detailed in Strain issues). These strains were not characterized further. For the isotypes, we chose one strain to be the isotype reference strain. This strain can be ordered through CeNDR [here]({{ url_for('strain.strain_catalog') }}). +In 2012, we [published](http://dx.doi.org/10.1038/ng.1050) genome-wide variant data from reduced representation sequencing of approximately 10% of the C. elegans genome (RAD-seq). Using these data, we grouped strains into isotypes. We also found many strains that were mislabeled as wild isolates but were instead N2 derivatives, recombinants from laboratory experiments, and mutagenesis screen isolates (detailed in [Strain issues]{{ url_for('strains.strains_issues') }}). These strains were not characterized further. For the isotypes, we chose one strain to be the isotype reference strain. This strain can be ordered through CeNDR [here]({{ url_for('strains.strains_catalog') }}). After 2012, with advances in genome sequencing, we transitioned our sequencing to whole-genome short-read sequencing. All isotype reference strains were resequenced whole-genome. The other strains within an isotype were not, diff --git a/base/static/content/help/Variant-Browser.md b/base/static/content/help/Variant-Browser.md index 3a857c7f..3a384816 100644 --- a/base/static/content/help/Variant-Browser.md +++ b/base/static/content/help/Variant-Browser.md @@ -1,4 +1,4 @@ -# Variant Browser +# Genome Browser northwestern \ No newline at end of file diff --git a/base/static/img/northwestern-university.svg b/base/static/img/northwestern-university.svg new file mode 100644 index 00000000..3b74ac8f --- /dev/null +++ b/base/static/img/northwestern-university.svg @@ -0,0 +1,79 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/base/static/img/pin-drop.svg b/base/static/img/pin-drop.svg new file mode 100644 index 00000000..9780ff61 --- /dev/null +++ b/base/static/img/pin-drop.svg @@ -0,0 +1,7 @@ + + + + + diff --git a/base/static/img/social-media-icons.png b/base/static/img/social-media-icons.png new file mode 100644 index 00000000..93b350f3 Binary files /dev/null and b/base/static/img/social-media-icons.png differ diff --git a/base/static/reports/20170531/pipelines.md b/base/static/reports/20160408/methods.md similarity index 98% rename from base/static/reports/20170531/pipelines.md rename to base/static/reports/20160408/methods.md index 36280ca3..992b9566 100644 --- a/base/static/reports/20170531/pipelines.md +++ b/base/static/reports/20160408/methods.md @@ -1,4 +1,4 @@ -# Methods / Pipelines +# Methods __Note__: These methods operated on sequence data at the isotype level. diff --git a/base/static/reports/20180527/pipelines.md b/base/static/reports/20170531/methods.md similarity index 98% rename from base/static/reports/20180527/pipelines.md rename to base/static/reports/20170531/methods.md index 36280ca3..992b9566 100644 --- a/base/static/reports/20180527/pipelines.md +++ b/base/static/reports/20170531/methods.md @@ -1,4 +1,4 @@ -# Methods / Pipelines +# Methods __Note__: These methods operated on sequence data at the isotype level. diff --git a/base/static/reports/20160408/pipelines.md b/base/static/reports/20180527/methods.md similarity index 98% rename from base/static/reports/20160408/pipelines.md rename to base/static/reports/20180527/methods.md index 36280ca3..992b9566 100644 --- a/base/static/reports/20160408/pipelines.md +++ b/base/static/reports/20180527/methods.md @@ -1,4 +1,4 @@ -# Methods / Pipelines +# Methods __Note__: These methods operated on sequence data at the isotype level. diff --git a/base/static/reports/20200815/pipelines.md b/base/static/reports/20200815/methods.md similarity index 99% rename from base/static/reports/20200815/pipelines.md rename to base/static/reports/20200815/methods.md index 311a92c0..464ca73f 100644 --- a/base/static/reports/20200815/pipelines.md +++ b/base/static/reports/20200815/methods.md @@ -1,4 +1,4 @@ -# Methods / Pipelines +# Methods This tab links to the nextflow pipelines used to process wild isolate sequence data. diff --git a/base/static/reports/20200815/release_notes.md b/base/static/reports/20200815/release_notes.md index 13c4acd4..fa59e884 100644 --- a/base/static/reports/20200815/release_notes.md +++ b/base/static/reports/20200815/release_notes.md @@ -1,5 +1,5 @@ The 20200815 release includes genotypes from whole-genome sequences and reduced representation (RAD) sequencing. Genotypes are compared for concordance, and strains that are 99.95% identical to each other are [grouped into isotypes]({{ url_for("primary.help_item", filename="FAQ", _anchor="strain-groups") }}). -One strain within each isotype is the reference strain for that isotype. To look up isotype assignment, see Alignment Data tab. -All isotype reference strains are [available on CeNDR]({{ url_for("strain.strain_catalog") }}). +One strain within each isotype is the reference strain for that isotype. +All isotype reference strains are [available on CeNDR]({{ url_for("strains.strains_catalog") }}). diff --git a/base/static/reports/20210121/alignment_report.html b/base/static/reports/20210121/alignment_report.html new file mode 100644 index 00000000..7a94a49a --- /dev/null +++ b/base/static/reports/20210121/alignment_report.html @@ -0,0 +1,2926 @@ + + + + + + + + + + + + + + +alignment_report.utf8 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + +
+

Overview

+
    +
  • Total strains : 1238
  • +
  • Sequenced libraries : 2152
  • +
  • Median mapped reads : 36 million
  • +
  • Median coverage : 32x
  • +
+


+
+
+

Reads Mapped by Strain

+
+ + +


+
+
+

Alignment Metrics

+

+
+ + + + +
+ + + + + + + + + + + + + + + diff --git a/base/static/reports/20210121/concordance_report.html b/base/static/reports/20210121/concordance_report.html new file mode 100644 index 00000000..5c93f031 --- /dev/null +++ b/base/static/reports/20210121/concordance_report.html @@ -0,0 +1,2995 @@ + + + + + + + + + + + + + + +concordance_report.utf8 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + +
+

Overview

+

Concordance analysis allows us to group strains that are genetically almost identical into an isotype. The following table summarizes the number of isotypes from previous and current releases, and the number of strains that belong to those isotypes.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
IsotypesStrains IncludedStrains with WGS dataStrains with RAD-seq data
Isotypes from Previous Release400*910770140
New Isotypes from Current Release1404684680
Total54013781238140
+

*Four strains were reduced to a single isotype group so this number was reduced from 403 to 400 (see below for details)

+


+
+
+

Concordance score distribution and cutoff

+

We examined the pairwise concordance scores of all strains. Concordance values for every pair of strains were calculated as the number of shared variant sites divided by the total number of variants called for each pair. If the concordance score was more than 0.9997, the strain pair is grouped into the same isotype.

+

+


+
+
+

Search for concordance for strain pairs

+

Strain comparisons are listed in the table below. Only concordance scores > 0.999 are shown.

+
+ +


+
    +
  • ECA2649 had high concordance to two distinct isotype groups. However, upon investigating the relationships to each group, we chose to manually place ECA2649 with isotype ECA2551, not ECA2672.
  • +
+

+
+
+

Changes from previous release

+


+
    +
  • This release used only SNVs for isotype assignment.

  • +
  • Four strains (ECA2677, ECA2678, ECA2679, and ECA2686) were removed because they were frozen as “dirty” strains and have now been cleaned, frozen, and re-sequenced. Because these four strains were isotype reference strains, a new isotype reference strain was assigned. It appears that the other six strains in these isotype groups changed isotypes, but they remain in the same group as before with a new, clean isotype reference strain. Details can be found below.

  • +
+


+ +++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Dirty Strain (Old)Clean Strain (New)Previous IsotypeNew IsotypeOther Strains in Isotype Group
ECA2677ECA1202ECA2677ECA1202ECA1201
ECA2678ECA1206ECA2678ECA1206ECA1973, ECA1979, ECA1983
ECA2679ECA1211ECA2679ECA1212ECA1209, ECA1211
ECA2686NA*ECA2686ECA1243NA
+

*Clean strain for ECA2686 is ECA2803 but has not been sequenced yet.

+


+
    +
  • Strains ECA1465, ECA1467, ECA1493, ECA1515 were each their own isotype in 20200815 release. They were grouped into the same isotype in this release, which resulted in a reduce of count of previous isotypes from 403 to 400. Below is their pairwise concordance value in this release (top) and in 20200815 release (bottom).
  • +
+


+

+


+

+
+ + + + +
+ + + + + + + + + + + + + + + diff --git a/base/static/reports/20210121/gatk_report.html b/base/static/reports/20210121/gatk_report.html new file mode 100644 index 00000000..462df631 --- /dev/null +++ b/base/static/reports/20210121/gatk_report.html @@ -0,0 +1,3898 @@ + + + + + + + + + + + + + + +gatk_report_v1.10c.utf8 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + +
+

Overview

+
    +
  • Total strains : 1,238
  • +
  • Total SNVs before filter : 5,065,794
  • +
  • Total SNVs after filter : 2,970,933
  • +
  • Total indels before filter : 1,908,754
  • +
  • Total indels after filter : 611,052
  • +
+


+
+
+

Site-level quality filters

+

+


+
+
+

Number of variant sites removed by each filter

+

Each bar shows counts of variant sites removed by the combinations of filters indicated by the dots below.

+

+


+
+
+

Pre-filter statistics

+

Variant counts for each strain based on vcf containing all variant sites called by GATK (“soft-filter.vcf”).

+
+ +

+


+
+
+

Post-filter statistics

+

Variant count for each strain based on the VCF containing only sites pass all filters (“hard-filter.vcf”). All heterozygous sites on main chromosomes were converted to either homozygous or missing. The remaining heterozygous sites are all located on mitochondria chromosomes.

+
+ +

+
+

+


+
+
+

Relationship between heterozyous SNVs and total SNVs

+

Number of variants versus the number of heterozygous calls shows strains that might have mixed genotypes or low quality calls (high het but low variation).

+
+ +
+ + + + +
+ + + + + + + + + + + + + + + diff --git a/base/static/reports/20210121/methods.md b/base/static/reports/20210121/methods.md new file mode 100644 index 00000000..3b7c1f6e --- /dev/null +++ b/base/static/reports/20210121/methods.md @@ -0,0 +1,113 @@ +# Methods / Pipelines + +This tab links to the nextflow pipelines used to process wild isolate sequence data. + +![](/static/img/overview.drawio.svg) + +### FASTQ QC and Trimming +__[andersenlab/trim-fq-nf](https://github.com/andersenlab/trim-fq-nf) -- (Latest [d637d0b](https://github.com/AndersenLab/trim-fq-nf/tree/d637d0b))__ + +Adapters and low quality sequences were trimmed off of raw reads using [fastp (0.20.0)](https://github.com/OpenGene/fastp) and default parameters. Reads shorter than 20 bp after trimming were discarded. + +### __Alignment__ + +__[andersenlab/alignment-nf](https://github.com/andersenlab/alignment-nf) -- ([1c96b4a](https://github.com/AndersenLab/alignment-nf/tree/1c96b4a))__ + +Trimmed reads were aligned to _C. elegans_ reference genome (project PRJNA13758 version WS276 from the [Wormbase](https://wormbase.org/)) using `bwa mem` [BWA (0.7.17)](http://bio-bwa.sourceforge.net/). Libraries of the same strain were merged together and indexed by [sambamba (0.7.0)](https://lomereiter.github.io/sambamba/). Duplicates were flagged with [Picard (2.21.3)](https://broadinstitute.github.io/picard/). + +Strains with less than 14x coverage were not included in the alignment report and subsequent analyses. + +### __Variant Calling__ + +__[andersenlab/wi-gatk](https://github.com/andersenlab/wi-gatk) -- ([a84ba4f](https://github.com/AndersenLab/wi-gatk/tree/a84ba4f))__ + +Variants for each strain were called using `gatk HaplotypeCaller`. After the initial variant calling, variants were combined and then recalled jointly using `gatk GenomicsDBImport` and `gatk GenotypeGVCFs` [GATK (4.1.4.0)](https://gatk.broadinstitute.org/hc/en-us/sections/360007279452-4-1-4-0?page=6#articles). + +The variants were further processed and filtered with custom-written scripts for [heterozygous SNV polarization](https://github.com/AndersenLab/wi-gatk/blob/master/env/het_polarization.nim), GATK (4.1.4.0), and [bcftools (1.10)](http://samtools.github.io/bcftools/bcftools.html). + + +
+ +Warning
+ +Heterozygous polarization and filtering thresholds were optimized for single nucleotide variants (SNVs). + +

+Additionally, insertion or deletion (indel) variants less than 50 bp are more reliably called than indel variants greater than this size. In general, indel variants should be considered less reliable than SNVs. +
+ +#### Site-level filtering and annotation + +__[andersenlab/post-gatk-nf](https://github.com/andersenlab/post-gatk-nf) -- ([84d3a28](https://github.com/AndersenLab/post-gatk-nf/tree/add_annotation/84d3a28))__ + +1. __Heterozygous SNV polarization__: Because _C. elegans_ is a selfing species, heterozygous SNV sites are most likely errors. Biallelic heterozygous SNVs were converted to homozygous REF or ALT if we had sufficient evidence for conversion. Only biallelic SNVs that are not on mitochondria DNA were included in this step. Specifically, the SNV was converted if the normalized Phred-scaled likelihoods (PL) met the following criteria (a smaller PL means more confidence). Any heterozygous SNVs that did not meet these criteria were left unchanged. + * If PL-ALT/PL-REF <= 0.5 and PL-ALT <= 200, convert to homozygous ALT + * If PL-REF/PL-ALT <= 0.5 and PL-REF <= 200, convert to homozygous REF + +2. __Soft filtering__: Low quality sites were flagged but not modified or removed. + + For the __site-level__ soft filter, variant sites that meet the following conditions were flagged as PASS. These stats were computed across all samples for each site. + + * Variant quality (QUAL) > 30 (this filter is very lenient, only three sites failed) + * Variant quality normalized by read depth (QD) > 20 + * Strand bias of ALT calls: strand odds ratio (SOR) < 5 + * Strand bias of ALT calls: Fisherstrand (FS) < 100 + * Fraction of samples with missing genotype < 95% + * Fraction of samples with heterozygous genotype after heterozygous site polarization < 10% + + For the __sample-level__ soft filter, genotypes that meet the following filters were flagged as PASS for each site in each sample: + + * Read depth (DP) > 5 + * Site is not heterozygous + +3. __SnpEff Annotation__: The predicted impact of each variant site was annotated with [SnpEff (4.3.1t)](https://pcingola.github.io/SnpEff/SnpEff.html). + +4. For the hard-filtered VCF, low quality sites were modified or removed using the following criteria. + + * For the __site-level__ hard filter, variant sites not flagged as PASS were removed. + * For the __sample-level__ hard filter, genotypes not flagged as PASS were converted to missing (`./.`), with the exception that heterozygous sites on mitochondria where kept unchanged. + + After the steps above, sites that are invariant (`0/0` or `1/1` across all samples, not counting missing `./.`) were removed. + +5. __BCSQ Annotation__: Variant impacts were then annotated using `bcftools csq`, which takes into consideration nearby variants and annotates variant impacts based on haplotypes. + +#### Determination of filter thresholds + +We re-examined our filter thresholds for this release. A variant simulation pipeline was used as part of this process: + +* __Variant Simulations - __[andersenlab/variant-simulations-nf](https://github.com/andersenlab/variant-simulations-nf) + +Please see the [filter optimization report](/static/reports/filter_optimization/20200803_optimization_report.html) for further details. + +### __Isotype Assignment__ +__[andersenlab/concordance-nf](https://github.com/andersenlab/concordance-nf) -- ([5160f9f](https://github.com/andersenlab/concordance-nf/tree/5160f9f))__ + +Isotype groups contain strains that are likely identical to each other and were sampled from the same isolation locations. For any phenotypic assay, only the isotype reference strain needs to be scored. Users interested in individual strain genotypes can use the strain-level data. + +Strains were grouped into isotypes using the following steps: + +1. Using all high quality variants (only SNPs from the hard-filtered VCF) and `bcftools gtcheck`, concordance for each pair of strains was calculated as a fraction of shared variants over the total variants in each pair. + +2. Strain pairs with concordance > 0.9997 were grouped into the same isotype group. The threshold 0.9997 was determined by: + + * Examining the distribution of concordance scores. + * Capturing similarity between strains to minimize the number of strains that get assigned to multiple isotype groups. + * Agreement with the isotype groups in previous releases. + +3. The following issues, which were rare, were resolved on a case-by-case basis: + + * If one strain was assigned to multiple isotypes. + * If one isotype from previous releases matches to multiple new isotype groups. + * If one new isotype group contains strains from multiple isotypes from previous releases. + +When issues arose, the pairwise concordance between all strains within an isotype were examined manually. Strains and isotypes may be re-assigned with the goal that strains within the same isotype group should have high concordance with each other, and strains from different isotype groups should have lower concordance. + +### __Tree Generation__ + +__[andersenlab/post-gatk-nf](https://github.com/andersenlab/post-gatk-nf) -- ([84d3a28](https://github.com/AndersenLab/post-gatk-nf/tree/add_annotation/84d3a28))__ + +Trees were generated by converting the hard-filtered VCF to Phylip format using [vcf2phylip (030b8d)](https://github.com/edgardomortiz/vcf2phylip/tree/030b8d). Then, the Phylip format was converted to Stockholm format using [Bioconvert (0.3.0)](https://bioconvert.readthedocs.io/en/master/index.html), which was then used to construct a tree with [QuickTree (2.5)](https://github.com/tseemann/quicktree) using default settings. The trees were plotted with [FigTree (1.4.4)](http://tree.bio.ed.ac.uk/software/figtree/) rooting on the most diverse strain XZ1516. + +### __Imputation__ + +Imputation was not done for this release. diff --git a/base/static/reports/20210121/reads_mapped_by_strain.tsv b/base/static/reports/20210121/reads_mapped_by_strain.tsv new file mode 100644 index 00000000..05bdc06f --- /dev/null +++ b/base/static/reports/20210121/reads_mapped_by_strain.tsv @@ -0,0 +1,1239 @@ +Sample raw_total_sequences reads_mapped reads_mapped_percent coverage +AB1 85.02 84.5 99.4 64 +AB4 71.15 70.56 99.2 52 +BRC20067 38.8 35.9 92.5 30 +BRC20113 43.18 41.42 95.9 38 +BRC20231 56.18 47.58 84.7 43 +BRC20263 51.1 48.94 95.8 45 +CB4852 204.11 203.16 99.5 144 +CB4854 51.09 50.83 99.5 38 +CB4856 86.55 85.21 98.5 61 +CB4932 60.81 60.53 99.5 45 +CX11254 63.1 62.09 98.4 46 +CX11262 49.89 49.3 98.8 38 +CX11264 71.94 71.13 98.9 52 +CX11271 50.57 50.32 99.5 39 +CX11276 60.66 59.95 98.8 45 +CX11285 109.76 108.51 98.9 82 +CX11292 54.06 53.65 99.2 39 +CX11307 70.35 69.66 99 51 +CX11314 71.66 70.94 99 53 +CX11315 61.66 60.93 98.8 49 +DL200 44.16 43.85 99.3 35 +DL226 97.65 96.7 99 71 +DL238 66.56 65.36 98.2 49 +ECA1069 74.79 74.43 99.5 51 +ECA1070 22 21.84 99.3 25 +ECA1071 59.23 58.9 99.4 34 +ECA1072 34.69 34.53 99.5 20 +ECA1073 36.52 36.22 99.2 24 +ECA1074 65.67 65.29 99.4 39 +ECA1184 38.71 38.28 98.9 34 +ECA1185 33.35 32.82 98.4 26 +ECA1186 32.73 32.21 98.4 25 +ECA1187 42.37 42.05 99.2 36 +ECA1188 36.13 35.84 99.2 33 +ECA1189 25.32 25.09 99.1 23 +ECA1190 35.58 35.28 99.2 32 +ECA1191 27.19 26.83 98.7 22 +ECA1192 43.2 42.82 99.1 38 +ECA1193 29.44 28.35 96.3 23 +ECA1194 23.62 23.42 99.2 20 +ECA1195 38.17 35.32 92.5 28 +ECA1196 16.59 16.44 99.1 16 +ECA1197 30.04 29.78 99.1 27 +ECA1198 37.19 36.73 98.8 34 +ECA1199 35.22 34.9 99.1 32 +ECA1200 35.4 35.04 99 32 +ECA1201 28.31 27.85 98.4 22 +ECA1202 33.26 32.43 97.5 30 +ECA1203 29.92 29.48 98.5 24 +ECA1204 31.5 30.78 97.7 27 +ECA1205 29.34 28.63 97.6 22 +ECA1206 37.45 35.77 95.5 32 +ECA1207 34.88 34.33 98.4 30 +ECA1208 31.3 30.94 98.9 26 +ECA1209 29.27 29.02 99.1 27 +ECA1210 33.7 33.42 99.2 30 +ECA1211 23.6 23.3 98.8 22 +ECA1212 39.75 38.93 97.9 31 +ECA1213 26.66 26.14 98 22 +ECA1214 32.23 31.8 98.7 26 +ECA1215 27.19 26.66 98 23 +ECA1216 36.07 35.74 99.1 29 +ECA1217 37.84 36.67 96.9 29 +ECA1218 22.34 22.03 98.6 19 +ECA1220 49.25 48.76 99 39 +ECA1221 16.44 16.31 99.2 15 +ECA1222 20.68 20.05 96.9 17 +ECA1223 45.01 43.73 97.2 35 +ECA1224 34.71 33.96 97.8 27 +ECA1225 22.41 21.83 97.4 18 +ECA1226 18.86 18.68 99.1 16 +ECA1227 28.07 27.79 99 25 +ECA1228 22.21 22.01 99.1 19 +ECA1229 38.85 38.35 98.7 30 +ECA1230 33.28 32.71 98.3 30 +ECA1231 25.08 24.82 98.9 23 +ECA1232 28.52 28.21 98.9 22 +ECA1233 26.48 26.23 99.1 21 +ECA1234 27.43 27.21 99.2 22 +ECA1235 38.4 38.07 99.1 34 +ECA1236 18.98 18.78 98.9 16 +ECA1237 38.12 37.83 99.2 31 +ECA1238 53.65 53.22 99.2 42 +ECA1239 36.28 36 99.2 29 +ECA1240 32.69 32.43 99.2 26 +ECA1241 27.94 27.69 99.1 23 +ECA1242 20.38 20.2 99.1 17 +ECA1243 24.61 24.45 99.4 21 +ECA1244 27.98 27.82 99.4 23 +ECA1245 23.42 23.27 99.4 19 +ECA1246 23.77 23.57 99.2 19 +ECA1247 38.58 38.22 99.1 31 +ECA1249 37.26 36.95 99.2 33 +ECA1250 18.19 17.87 98.2 15 +ECA1251 23.79 23.45 98.6 19 +ECA1252 40.5 40.07 98.9 36 +ECA1253 39.76 39.34 98.9 31 +ECA1254 33.21 32.93 99.2 31 +ECA1255 36.77 35.99 97.9 29 +ECA1256 31.2 30.57 98 24 +ECA1257 28.03 27.68 98.7 24 +ECA1258 24.8 24.49 98.7 21 +ECA1259 25.91 25.72 99.3 21 +ECA1260 34.17 33.89 99.2 26 +ECA1261 21.32 21.14 99.1 18 +ECA1262 29.11 28.83 99 23 +ECA1263 26.3 26.04 99 20 +ECA1264 25.22 24.88 98.6 21 +ECA1265 19.88 19.48 98 16 +ECA1266 50.19 49.5 98.6 38 +ECA1267 22.53 22.26 98.8 19 +ECA1268 28.01 27.5 98.2 21 +ECA1269 28.2 27.13 96.2 22 +ECA1270 22.72 21.8 96 19 +ECA1271 24.43 24.24 99.2 20 +ECA1272 30.09 29.84 99.1 24 +ECA1273 37.22 36.86 99 29 +ECA1274 21.12 20.89 98.9 18 +ECA1275 26.47 26.04 98.4 21 +ECA1276 32.86 32.44 98.7 27 +ECA1277 28.93 28.7 99.2 23 +ECA1278 29.15 28.91 99.2 24 +ECA1279 37.17 36.88 99.2 29 +ECA1281 43.3 42.98 99.3 35 +ECA1282 29.06 28.86 99.3 24 +ECA1283 38.07 37.77 99.2 29 +ECA1284 35.75 35.47 99.2 28 +ECA1285 29.4 29.14 99.1 24 +ECA1286 40.76 40.41 99.1 32 +ECA1287 28.3 28.05 99.1 22 +ECA1288 51.62 51.04 98.9 40 +ECA1289 33.46 32.89 98.3 26 +ECA1290 26.86 26.51 98.7 21 +ECA1291 36.57 36.39 99.5 29 +ECA1292 36.06 35.86 99.4 28 +ECA1293 55.17 54.71 99.2 45 +ECA1294 29.34 29.09 99.2 23 +ECA1295 37.42 37.21 99.4 30 +ECA1296 34.8 34.6 99.4 27 +ECA1297 25.67 25.47 99.2 24 +ECA1298 49.6 49.34 99.5 38 +ECA1316 66.18 65.97 99.7 47 +ECA1385 26.97 26.71 99.1 21 +ECA1389 34.54 34.36 99.5 28 +ECA1391 30.3 30.12 99.4 23 +ECA1409 46.05 45.52 98.8 29 +ECA1413 36.27 35.84 98.8 28 +ECA1415 27.68 27.29 98.6 22 +ECA1441 28.26 27.95 98.9 23 +ECA1465 25.56 25.14 98.4 20 +ECA1467 28.23 27.76 98.3 22 +ECA1493 42.71 36.01 84.3 28 +ECA1515 35.46 34.11 96.2 26 +ECA1689 25.94 25.76 99.3 19 +ECA1691 33.81 33.56 99.3 28 +ECA1693 23.73 23.58 99.3 21 +ECA1695 39.7 39.41 99.3 32 +ECA1709 21.19 20.28 95.7 18 +ECA1711 29.63 29.07 98.1 25 +ECA1713 35.68 34.78 97.5 29 +ECA1715 37.1 33.06 89.1 29 +ECA1717 35.32 32.59 92.3 28 +ECA1718 25.78 25.49 98.9 24 +ECA1721 27.11 26.87 99.1 20 +ECA1723 41.15 40.52 98.5 34 +ECA1725 42.03 41.53 98.8 27 +ECA1727 33.87 33.35 98.5 27 +ECA1729 21.63 21.31 98.5 18 +ECA1731 45.75 37.16 81.2 25 +ECA1733 37.52 37.21 99.2 25 +ECA1735 39.78 39.38 99 28 +ECA1737 33.67 33.18 98.5 28 +ECA1739 35.81 35.33 98.7 24 +ECA1741 41.25 40.76 98.8 34 +ECA1743 16.23 16 98.6 15 +ECA1745 41.55 41.18 99.1 28 +ECA1747 47.45 46.57 98.1 39 +ECA1749 44.39 43.6 98.2 36 +ECA1751 50.84 50.18 98.7 41 +ECA1753 34.98 34.46 98.5 30 +ECA1755 33.33 33.14 99.4 23 +ECA1757 42.31 42.07 99.4 28 +ECA1759 27.04 26.86 99.3 18 +ECA1761 27.02 26.71 98.8 19 +ECA1763 46.41 45.79 98.7 35 +ECA1765 25.81 25.53 98.9 18 +ECA1767 23.01 22.71 98.7 19 +ECA1769 49.42 48.63 98.4 40 +ECA1771 39.53 38.99 98.7 33 +ECA1779 30.36 30.05 99 24 +ECA1781 59.92 59.52 99.3 43 +ECA1783 30.81 30.56 99.2 25 +ECA1785 27.88 27.68 99.3 20 +ECA1787 18.85 18.71 99.2 14 +ECA1789 19.52 19.35 99.1 16 +ECA1791 23.12 22.93 99.2 17 +ECA1793 25.15 24.8 98.6 22 +ECA1795 24.35 24.12 99.1 18 +ECA1797 24.65 24.44 99.1 17 +ECA1799 28.64 28.49 99.5 24 +ECA1801 44.76 44.5 99.4 42 +ECA1803 31.69 31.5 99.4 25 +ECA1805 39.45 39.24 99.5 34 +ECA1807 34.1 33.79 99.1 27 +ECA1809 30.27 30.02 99.2 24 +ECA1811 26.45 26.21 99.1 22 +ECA1813 32.66 32.46 99.4 28 +ECA1815 34.97 34.73 99.3 28 +ECA1817 35.07 34.78 99.2 27 +ECA1819 28.35 28.13 99.2 23 +ECA1821 45.27 43.07 95.2 36 +ECA1823 24.65 22.72 92.1 19 +ECA1825 34.02 30.06 88.4 24 +ECA1827 30.78 27.07 87.9 21 +ECA1829 56.58 51.18 90.5 41 +ECA1831 178.32 18.98 10.6 16 +ECA1833 31.37 29.9 95.3 24 +ECA1835 43.79 43.41 99.1 30 +ECA1837 26.33 25.82 98.1 22 +ECA1839 39.32 38 96.7 31 +ECA1841 122.4 63.48 51.9 45 +ECA1843 48.16 26.34 54.7 24 +ECA1845 22.65 22.35 98.7 16 +ECA1847 26.51 24.29 91.6 22 +ECA1849 29.26 27.71 94.7 23 +ECA1851 43.69 41.63 95.3 36 +ECA1853 88.25 45.67 51.7 39 +ECA1855 15.37 15.08 98.1 15 +ECA1857 17.02 16.59 97.5 16 +ECA1859 118.55 44.12 37.2 33 +ECA1861 63.31 37.21 58.8 30 +ECA1863 51.16 34.67 67.8 27 +ECA1865 35.71 34.63 97 27 +ECA1867 38.92 37.62 96.6 29 +ECA1869 20.2 19.88 98.4 17 +ECA1871 41.18 40.81 99.1 32 +ECA1873 31.16 30.72 98.6 26 +ECA1875 44.71 44.4 99.3 35 +ECA1877 22.85 22.49 98.4 16 +ECA1878 22.02 21.78 98.9 20 +ECA1885 27.54 27.18 98.7 18 +ECA1887 40.38 39.82 98.6 26 +ECA1889 24.99 24.79 99.2 17 +ECA189 44.32 43.73 98.7 35 +ECA1891 26.16 25.98 99.3 17 +ECA1895 31.82 31.53 99.1 24 +ECA1897 46.84 46.48 99.2 34 +ECA190 57.19 56.47 98.8 44 +ECA1901 33.73 32.9 97.6 28 +ECA1903 41.78 40.11 96 32 +ECA1907 35.01 34.64 98.9 23 +ECA1909 28.71 28.28 98.5 19 +ECA191 47.12 46.1 97.8 42 +ECA1911 25.86 21.19 82 15 +ECA1913 22.71 22.49 99 15 +ECA1915 35.28 35.03 99.3 25 +ECA1917 57.11 56.55 99 51 +ECA1919 29.14 28.87 99.1 26 +ECA192 44.35 43.8 98.7 29 +ECA1921 26.35 26.15 99.3 20 +ECA1923 23.86 23.69 99.3 17 +ECA1925 46.75 45.98 98.4 36 +ECA1927 27.52 27.26 99.1 25 +ECA1929 21.79 21.61 99.2 17 +ECA193 49.83 49.17 98.7 41 +ECA1931 28.44 28.26 99.4 24 +ECA1933 32.61 32.4 99.4 26 +ECA1935 36.38 36.09 99.2 30 +ECA1937 31.57 31.36 99.3 27 +ECA1939 34.02 33.8 99.4 29 +ECA1943 40.66 40.4 99.4 33 +ECA1951 33.05 32.82 99.3 27 +ECA1953 36.33 36.08 99.3 28 +ECA1967 22.75 22.51 98.9 16 +ECA1969 44.99 44.58 99.1 30 +ECA1971 43.43 41.22 94.9 32 +ECA1973 29.22 28.6 97.9 24 +ECA1975 52.97 52.04 98.2 34 +ECA1977 29.93 29.68 99.2 20 +ECA1979 37.19 34.91 93.9 29 +ECA1981 31.97 29.1 91 20 +ECA1983 33.92 33.35 98.3 22 +ECA1985 25.99 24.33 93.6 17 +ECA1987 39.58 37.69 95.2 31 +ECA1989 28.34 27.79 98.1 19 +ECA1991 37.42 33.4 89.3 22 +ECA1995 36.72 36.33 98.9 25 +ECA1997 34.36 33.88 98.6 27 +ECA2041 39.88 39.63 99.4 32 +ECA2043 26.4 26.15 99.1 22 +ECA2065 28.87 28.64 99.2 23 +ECA2067 35.28 35.03 99.3 28 +ECA2069 28.88 28.65 99.2 23 +ECA2071 28.03 27.84 99.3 23 +ECA2073 39.56 39.28 99.3 33 +ECA2075 29.58 29.34 99.2 24 +ECA2079 20.45 20.26 99.1 17 +ECA2081 48.34 47.93 99.2 40 +ECA2085 37.56 37.25 99.2 31 +ECA2091 109.69 107.5 98 76 +ECA2095 40.02 39.23 98 31 +ECA2097 29.48 29.02 98.4 19 +ECA2099 34.42 34.01 98.8 29 +ECA2101 35.84 35.44 98.9 24 +ECA2103 40.95 40.5 98.9 27 +ECA2107 30.74 30.55 99.4 22 +ECA2109 24.72 24.42 98.8 18 +ECA2111 39.49 35.27 89.3 26 +ECA2117 37.44 37.11 99.1 25 +ECA2119 53.52 53.08 99.2 37 +ECA2121 34.46 34.07 98.9 22 +ECA2122 21.37 21.14 98.9 20 +ECA2125 30.36 29.98 98.8 20 +ECA2127 51.16 50.69 99.1 33 +ECA2131 25.47 25.24 99.1 19 +ECA2135 25.81 25.53 98.9 18 +ECA2139 23.66 23.4 98.9 16 +ECA2143 18.41 18.24 99.1 15 +ECA2147 20.61 20.41 99.1 15 +ECA2151 50.69 50.13 98.9 36 +ECA2155 45.17 44.76 99.1 32 +ECA2159 53.85 53.28 98.9 38 +ECA2163 21.27 21.04 98.9 16 +ECA2167 31.24 30.9 98.9 22 +ECA2171 19.3 19.11 99 14 +ECA2175 50.26 49.75 99 35 +ECA2179 26.8 26.46 98.7 19 +ECA2183 21.01 20.77 98.9 16 +ECA2187 30.08 29.76 98.9 20 +ECA2191 26.44 26 98.3 19 +ECA2195 32.39 31.92 98.5 22 +ECA2199 24.83 24.47 98.6 18 +ECA2203 26.45 26.08 98.6 18 +ECA2207 29.12 28.7 98.5 20 +ECA2247 30.52 30.28 99.2 26 +ECA2249 23.47 23.27 99.1 19 +ECA2251 34.95 34.7 99.3 28 +ECA2253 23.46 23.25 99.1 20 +ECA2255 22.37 22.19 99.2 18 +ECA2281 68.17 67.38 98.8 46 +ECA2283 19.46 19.22 98.8 14 +ECA2285 22.55 22.28 98.8 17 +ECA2287 29.28 28.92 98.8 21 +ECA2289 18.74 18.55 99 15 +ECA2291 31.33 30.98 98.9 24 +ECA2307 36.95 36.62 99.1 27 +ECA2309 46.13 45.56 98.8 41 +ECA2311 48.02 47.4 98.7 42 +ECA2313 18.29 18.09 98.9 14 +ECA2315 24.06 23.79 98.9 18 +ECA2319 35.25 34.89 99 24 +ECA2321 64.49 63.5 98.5 59 +ECA2322 20.11 19.96 99.3 17 +ECA2324 35.18 34.88 99.2 29 +ECA2326 28.47 28.27 99.3 24 +ECA2328 30.68 30.46 99.3 24 +ECA2330 19.7 19.54 99.2 17 +ECA2332 34.54 34.32 99.4 29 +ECA2334 30.93 30.5 98.6 25 +ECA2336 43.73 43.17 98.7 30 +ECA2338 34.11 33.65 98.7 28 +ECA2340 26.5 26.09 98.5 20 +ECA2342 28.44 28.25 99.3 23 +ECA2344 52.88 52.03 98.4 42 +ECA2348 29.89 29.64 99.2 24 +ECA2350 31.93 31.69 99.3 26 +ECA2352 23.13 22.99 99.4 20 +ECA2354 27.09 26.81 99 22 +ECA2356 22.57 22.35 99 19 +ECA2358 30.05 29.79 99.1 26 +ECA2360 23.67 23.48 99.2 18 +ECA2362 41.48 41.09 99 33 +ECA2365 38.08 37.76 99.2 26 +ECA2367 27.95 27.68 99 18 +ECA2375 32.56 31.87 97.9 22 +ECA2377 34.27 33.74 98.5 24 +ECA2401 38.36 27.45 71.6 20 +ECA2403 31.99 31.6 98.8 23 +ECA2405 30.72 30.35 98.8 20 +ECA2413 33.7 33.17 98.4 25 +ECA2415 35.42 34.88 98.5 24 +ECA2417 59.72 58.78 98.4 41 +ECA2419 41.51 41.13 99.1 30 +ECA2421 36.47 34.2 93.8 24 +ECA2423 40.92 40.54 99.1 30 +ECA2429 36.87 34.94 94.8 23 +ECA243 78.84 77.43 98.2 56 +ECA2431 91.41 90.75 99.3 64 +ECA2433 24.48 24.21 98.9 19 +ECA2435 85.25 84.55 99.2 59 +ECA2437 38.05 37.81 99.4 34 +ECA2439 18.27 18.05 98.8 14 +ECA2443 40.17 39.78 99 30 +ECA2445 26.38 25.98 98.5 20 +ECA245 50.76 50.35 99.2 39 +ECA2452 19.97 19.75 98.9 18 +ECA246 54.9 54.44 99.2 44 +ECA2467 35.19 34.75 98.7 25 +ECA2473 78.61 78.1 99.4 57 +ECA2475 80.55 79.95 99.3 59 +ECA2477 47.36 40.88 86.3 37 +ECA2479 49.44 49.05 99.2 45 +ECA248 44.84 44.46 99.2 31 +ECA2481 55.95 55.53 99.2 52 +ECA2482 26.64 23.83 89.4 22 +ECA2485 43.81 43 98.2 40 +ECA2487 39.67 39.07 98.5 36 +ECA2489 109.1 108.41 99.4 76 +ECA249 66.66 66.13 99.2 51 +ECA250 46.35 45.96 99.2 34 +ECA251 84.22 83.46 99.1 66 +ECA2521 23.71 23.58 99.5 16 +ECA2522 26.61 26.48 99.5 20 +ECA2523 24.25 24.11 99.4 18 +ECA2524 20.77 20.65 99.4 15 +ECA2525 24.09 23.94 99.4 17 +ECA2526 29.88 29.77 99.6 21 +ECA2527 25.85 25.71 99.5 19 +ECA2528 25.23 25.1 99.5 17 +ECA2529 34.67 34.53 99.6 25 +ECA253 46.8 46.77 99.9 35 +ECA2532 26.5 26.36 99.5 19 +ECA2533 42.83 42.6 99.5 30 +ECA2534 26.57 26.42 99.5 19 +ECA2535 35.2 35.07 99.6 25 +ECA2536 40.01 39.85 99.6 28 +ECA2537 31.59 31.41 99.5 22 +ECA254 19.46 18.64 95.8 13 +ECA2546 31.92 31.78 99.6 23 +ECA2547 25.6 24.46 95.6 18 +ECA2548 22.14 22.06 99.6 16 +ECA2549 45.72 45.61 99.8 32 +ECA2550 26.11 26.02 99.6 19 +ECA2551 30.32 30.21 99.6 22 +ECA2552 26.24 26.15 99.7 19 +ECA2553 25.44 25.31 99.5 18 +ECA2554 22.03 21.98 99.8 16 +ECA2555 22.89 22.81 99.7 16 +ECA2556 29.37 29.29 99.7 20 +ECA2557 25.88 25.76 99.6 19 +ECA2558 19.47 19.37 99.5 14 +ECA2559 29.9 29.81 99.7 21 +ECA2560 28.89 28.33 98.1 22 +ECA2561 27.34 27.19 99.5 20 +ECA2562 33.27 33.2 99.8 24 +ECA2563 26.81 26.76 99.8 20 +ECA2564 33.7 33.63 99.8 24 +ECA2565 35.1 35.04 99.8 26 +ECA2566 31.11 31.05 99.8 23 +ECA2567 26.82 26.77 99.8 19 +ECA2568 20.59 20.55 99.8 15 +ECA2569 22.35 21.56 96.5 16 +ECA2570 19.35 19.24 99.4 15 +ECA2571 21.59 21.44 99.3 16 +ECA2572 26.07 25.89 99.3 18 +ECA2573 29.06 28.87 99.3 20 +ECA2574 26.05 26 99.8 18 +ECA2575 18.3 18.27 99.8 14 +ECA2576 27.57 27.42 99.5 20 +ECA2577 29.21 29.15 99.8 21 +ECA2578 25.37 25.33 99.8 18 +ECA2579 22.67 22.53 99.4 17 +ECA2580 50.97 50.65 99.4 46 +ECA2581 33.08 32.86 99.3 23 +ECA2582 26.48 24.66 93.2 18 +ECA2583 50.43 50.15 99.4 45 +ECA2584 31.33 31.21 99.6 22 +ECA2585 30.39 30.26 99.6 22 +ECA2586 28.72 28.57 99.5 20 +ECA2589 24.69 24.6 99.6 17 +ECA259 46.64 46.24 99.1 36 +ECA2590 35.83 35.53 99.2 29 +ECA2591 25.36 25.29 99.7 19 +ECA2592 28.96 28.87 99.7 21 +ECA2593 21.94 21.79 99.3 15 +ECA2594 34.88 34.78 99.7 24 +ECA2595 25.82 25.64 99.3 19 +ECA2596 25.75 25.66 99.6 19 +ECA2597 19.29 19.18 99.5 14 +ECA2598 26.18 26.11 99.7 19 +ECA2599 30.18 29.94 99.2 24 +ECA2600 20.03 19.88 99.2 17 +ECA2601 66.64 66.12 99.2 53 +ECA2602 30.93 30.84 99.7 22 +ECA2603 34.95 34.8 99.6 24 +ECA2605 33.47 33.23 99.3 28 +ECA2606 42.01 41.84 99.6 34 +ECA2607 37.32 37.2 99.7 32 +ECA2608 39.1 38.9 99.5 31 +ECA2609 37.79 37.66 99.7 27 +ECA2610 34.27 34.14 99.6 25 +ECA2611 33.21 32.93 99.2 28 +ECA2612 37.29 37.01 99.2 29 +ECA2615 28.81 28.74 99.8 21 +ECA2641 55.06 54.86 99.6 36 +ECA2642 39.99 39.84 99.6 27 +ECA2643 29.1 28.95 99.5 24 +ECA2644 26.72 23.1 86.4 18 +ECA2648 37.1 36.97 99.6 31 +ECA2649 26.62 26.49 99.5 22 +ECA2650 21.3 21.25 99.7 18 +ECA2651 22.01 21.86 99.3 19 +ECA2652 30.01 29.88 99.6 25 +ECA2653 20.07 19.91 99.2 16 +ECA2654 30.64 30.52 99.6 26 +ECA2656 28.84 28.74 99.7 25 +ECA2657 26.4 26.25 99.4 23 +ECA2658 26.37 26.18 99.3 23 +ECA2659 37.98 37.67 99.2 31 +ECA2660 29.73 29.54 99.4 21 +ECA2672 36.39 36.26 99.6 26 +ECA2673 22.41 22.33 99.6 15 +ECA2674 25.84 25.75 99.6 19 +ECA2675 20.78 20.71 99.7 16 +ECA2676 29.62 29.48 99.5 21 +ECA347 42.94 42.22 98.3 43 +ECA348 51.96 49.24 94.8 50 +ECA349 42.7 42.21 98.9 38 +ECA350 20.28 20.13 99.3 23 +ECA36 41.14 40.31 98 32 +ECA363 54.68 52.7 96.4 47 +ECA369 24.47 24.09 98.4 27 +ECA372 54.51 53.32 97.8 48 +ECA393 29.48 29.25 99.2 33 +ECA394 48.8 48.25 98.9 48 +ECA395 22.6 22.27 98.6 25 +ECA396 38.15 37.72 98.9 40 +ECA397 34.03 33.64 98.9 37 +ECA398 25.13 24.84 98.8 28 +ECA399 37.35 36.84 98.6 39 +ECA551 38.44 38.11 99.1 16 +ECA552 71.03 70.54 99.3 47 +ECA571 85.16 84.55 99.3 56 +ECA572 68.45 67.98 99.3 47 +ECA589 86.18 85.62 99.4 72 +ECA592 68.17 67.15 98.5 45 +ECA593 66.84 65.76 98.4 45 +ECA594 75.56 74.35 98.4 47 +ECA615 26.17 25.95 99.2 29 +ECA616 57.6 57.09 99.1 51 +ECA640 40.15 39.87 99.3 38 +ECA694 25.35 25.21 99.4 22 +ECA695 16.01 15.91 99.4 15 +ECA701 31.34 30.5 97.3 30 +ECA702 31.45 30.15 95.8 29 +ECA703 30.75 30.46 99 29 +ECA704 29.77 29.5 99.1 30 +ECA705 37.11 36.71 98.9 36 +ECA706 35.66 35.23 98.8 35 +ECA707 34.55 34.22 99.1 33 +ECA708 31.64 31.3 98.9 31 +ECA709 37.15 36.79 99 36 +ECA710 39.82 39.39 98.9 38 +ECA711 31.56 31.27 99.1 31 +ECA712 36.82 36.43 98.9 35 +ECA713 28.33 28.08 99.1 27 +ECA714 31.07 30.79 99.1 31 +ECA715 36 35.63 99 35 +ECA716 35.12 34.75 98.9 34 +ECA717 36.01 33.76 93.8 33 +ECA718 36.45 33.05 90.7 32 +ECA719 31.7 31.38 99 31 +ECA720 35.44 35.08 99 34 +ECA721 34.17 27.31 79.9 26 +ECA722 41.19 39.92 96.9 38 +ECA723 41.39 40.28 97.3 38 +ECA724 36.54 34.78 95.2 34 +ECA725 29.81 28.95 97.1 29 +ECA726 31.39 31.01 98.8 30 +ECA727 31.84 31.4 98.6 32 +ECA728 32.6 32.19 98.7 31 +ECA729 39.47 38.73 98.1 37 +ECA730 32.91 32.48 98.7 31 +ECA731 35.23 34 96.5 33 +ECA732 32.98 32.19 97.6 32 +ECA733 35.52 35.15 99 34 +ECA734 33.04 32.75 99.1 32 +ECA735 28.82 28.53 99 29 +ECA736 30.91 30.52 98.8 30 +ECA737 34.95 34.56 98.9 34 +ECA738 38.14 37.69 98.8 36 +ECA739 37.61 37.18 98.9 37 +ECA740 44.07 33.47 75.9 31 +ECA741 34.31 33.8 98.5 33 +ECA742 38.53 37.96 98.5 36 +ECA743 33.26 32.89 98.9 33 +ECA744 30.33 29.98 98.9 30 +ECA745 44.64 44.06 98.7 41 +ECA746 35.24 34.76 98.7 35 +ECA747 30.65 30.28 98.8 30 +ECA748 37.95 37.6 99.1 36 +ECA749 33.67 33.29 98.9 33 +ECA750 35.33 34.96 99 34 +ECA751 34.43 33.97 98.7 34 +ECA752 36.58 36.26 99.1 35 +ECA753 32.04 31.5 98.3 31 +ECA754 36.14 34.19 94.6 33 +ECA755 32.14 31.43 97.8 31 +ECA756 35.89 27.95 77.9 27 +ECA757 35.02 34.7 99.1 34 +ECA758 38.34 34.54 90.1 33 +ECA759 32.53 31.89 98 32 +ECA760 39.69 39.23 98.8 38 +ECA761 37.64 36.23 96.3 36 +ECA762 39.52 39.12 99 37 +ECA763 34.09 33.85 99.3 33 +ECA764 33.11 32.63 98.5 32 +ECA765 35.73 33.77 94.5 34 +ECA766 32.3 31.3 96.9 31 +ECA767 33.78 30.48 90.2 30 +ECA768 38.01 33.88 89.1 32 +ECA769 38.12 37.08 97.3 36 +ECA770 34.74 30.7 88.3 30 +ECA771 35.56 32.02 90 32 +ECA772 35.45 32.82 92.6 31 +ECA773 34.39 32.79 95.4 32 +ECA774 40.99 34.65 84.5 34 +ECA775 35.6 34.16 96 34 +ECA776 41.29 40.87 99 39 +ECA777 35.67 34.98 98.1 35 +ECA778 47.44 46.51 98.1 43 +ECA779 29.92 23.09 77.2 23 +ECA780 38.48 37.79 98.2 37 +ECA781 44.06 43.25 98.1 41 +ECA782 36.31 35.7 98.3 35 +ECA783 36.11 35.68 98.8 35 +ECA784 33.96 32.87 96.8 33 +ECA785 33.3 32.99 99.1 33 +ECA786 44.49 44 98.9 42 +ECA787 40.38 35.78 88.6 35 +ECA807 35.13 34.8 99.1 34 +ECA808 35.01 34.71 99.2 34 +ECA809 38.15 37.76 99 38 +ECA810 33.66 33.29 98.9 33 +ECA811 37.5 36.79 98.1 35 +ECA812 44.31 43.89 99 42 +ECA813 38.79 38.36 98.9 37 +ECA822 32.76 31.37 95.8 31 +ECA922 29.88 29.65 99.2 31 +ECA923 35.87 35.59 99.2 37 +ECA924 23.93 23.77 99.3 26 +ECA925 27.63 27.4 99.1 30 +ECA926 28.25 27.76 98.3 29 +ECA927 26.66 26.4 99.1 29 +ECA928 28.48 28.29 99.3 30 +ECA930 28.07 27.88 99.3 30 +ED3005 80.67 79.87 99 58 +ED3011 60.19 59.48 98.8 45 +ED3012 37.73 37.58 99.6 29 +ED3017 63.11 62.64 99.3 47 +ED3040 66.61 66.25 99.5 49 +ED3046 66.63 65.98 99 50 +ED3048 66.87 66.53 99.5 49 +ED3049 60.54 58.08 95.9 47 +ED3052 68.74 68.19 99.2 53 +ED3073 73.77 73.26 99.3 53 +ED3077 66.69 66.22 99.3 51 +EG4347 57.15 56.92 99.6 43 +EG4349 46.73 46.25 99 37 +EG4724 48.82 48.37 99.1 38 +EG4725 63.18 62.46 98.9 42 +EG4946 46.22 44.21 95.6 33 +GXW1 47.16 46.81 99.3 35 +JR4305 25.44 25.26 99.3 23 +JT11398 71.07 70.74 99.5 57 +JU1088 62.44 61.74 98.9 45 +JU1172 54.41 53.99 99.2 39 +JU1200 55.51 55.4 99.8 42 +JU1212 49.33 48.99 99.3 37 +JU1213 42.23 41.87 99.1 33 +JU1242 74.09 73.54 99.3 50 +JU1246 66.21 65.72 99.3 47 +JU1249 46.55 45.87 98.5 40 +JU1395 24.86 24.75 99.5 21 +JU1400 82.12 76.96 93.7 53 +JU1409 63.81 63.47 99.5 48 +JU1440 61.15 60.77 99.4 47 +JU1491 62.36 61.9 99.3 47 +JU1511 49.69 49.43 99.5 30 +JU1516 94.73 93.2 98.4 63 +JU1530 37.37 37.14 99.4 30 +JU1543 30.03 29.94 99.7 34 +JU1568 78.99 78.82 99.8 59 +JU1580 93.62 92.43 98.7 72 +JU1581 46.26 45.89 99.2 34 +JU1586 62.68 62.44 99.6 49 +JU1652 41.36 40.97 99 31 +JU1656 28.71 28.43 99 25 +JU1666 50.67 50.09 98.9 45 +JU1762 40.64 40.24 99 37 +JU1770 54.46 53.93 99 49 +JU1792 56.77 56.42 99.4 52 +JU1793 55.23 54.48 98.6 51 +JU1807 58.72 57.43 97.8 52 +JU1808 42.41 41.99 99 39 +JU1896 61.97 61.6 99.4 45 +JU1920 42.32 41.8 98.8 39 +JU1922 38.31 36.55 95.4 34 +JU1924 31.48 31.13 98.9 28 +JU1926 48.89 48.31 98.8 30 +JU1929 36.79 36.32 98.7 34 +JU1931 79.35 78.8 99.3 44 +JU1934 53.19 52.74 99.2 49 +JU1941 38.68 38.44 99.4 24 +JU1960 60.35 59.8 99.1 49 +JU2001 76.77 75.84 98.8 60 +JU2007 59.54 59.13 99.3 47 +JU2016 47.57 43.4 91.2 40 +JU2017 46.71 42.76 91.5 39 +JU2106 50.18 49.57 98.8 46 +JU2131 36.25 34.07 94 32 +JU2139 31.41 31.02 98.8 28 +JU2141 42.22 41.87 99.2 39 +JU2151 33.58 33.12 98.6 31 +JU2234 51.19 50.69 99 47 +JU2250 39.81 36.84 92.5 34 +JU2257 35.01 34.85 99.5 31 +JU2287 34.51 34.23 99.2 33 +JU2316 62.62 52.92 84.5 48 +JU2460 66.18 60.69 91.7 50 +JU2464 36.31 35.96 99 32 +JU2466 57.43 56.78 98.9 54 +JU2467 58.41 57.89 99.1 49 +JU2468 38.42 38.07 99.1 32 +JU2478 60.61 56.39 93 50 +JU2513 41.85 40.93 97.8 35 +JU2519 56.07 47.3 84.4 46 +JU2522 44.38 41.32 93.1 35 +JU2526 39.04 34.95 89.5 30 +JU2527 57.53 57.06 99.2 52 +JU2534 77.16 76.71 99.4 64 +JU2565 34.86 34.68 99.5 33 +JU2566 49.3 49.06 99.5 44 +JU2570 56.67 56.21 99.2 51 +JU2572 62.3 61.77 99.2 63 +JU2575 46.94 45.28 96.5 42 +JU2576 59.29 58.54 98.7 53 +JU2578 43.93 43.51 99 40 +JU258 77.62 76.48 98.5 56 +JU2581 35.23 34.8 98.8 31 +JU2586 45.73 45.16 98.8 42 +JU2587 58.31 56.75 97.3 36 +JU2592 45.35 44.89 99 41 +JU2593 61.28 60.34 98.5 50 +JU2600 43.98 43.46 98.8 40 +JU2604 47.51 46.92 98.8 43 +JU2605 34.44 33.96 98.6 32 +JU2610 58.79 58.18 99 49 +JU2619 44.93 38.45 85.6 38 +JU2800 70.14 69.59 99.2 58 +JU2802 35.9 35.5 98.9 33 +JU2811 42.82 42.33 98.9 37 +JU2825 51.01 50.42 98.9 50 +JU2828 33.63 33.27 98.9 30 +JU2829 50.22 49.66 98.9 45 +JU2830 37.85 37.22 98.3 34 +JU2838 59.68 55.87 93.6 50 +JU2841 47.83 42.07 88 39 +JU2853 58.53 58.21 99.4 53 +JU2860 41.6 39.29 94.5 36 +JU2862 51.63 48.94 94.8 45 +JU2866 61.25 60.66 99 50 +JU2878 86.16 85.3 99 69 +JU2879 70.51 69.65 98.8 57 +JU2906 35.1 34.85 99.3 32 +JU2907 42.49 42.23 99.4 39 +JU2908 40.96 40.72 99.4 38 +JU310 61.46 61.1 99.4 43 +JU311 51.68 51.41 99.5 38 +JU312 51.41 51.01 99.2 45 +JU3125 34.89 34.69 99.4 38 +JU3127 25.19 24.89 98.8 28 +JU3128 25.58 25.34 99 28 +JU3131 17.85 17.71 99.2 21 +JU3132 54.95 54 98.3 57 +JU3133 25.18 25.07 99.6 28 +JU3134 45.39 45.12 99.4 49 +JU3135 23.52 23.22 98.7 26 +JU3136 31.51 31.35 99.5 35 +JU3137 32.03 31.87 99.5 36 +JU3138 29.4 28.62 97.3 32 +JU3139 17.39 17.22 99 20 +JU3140 50.04 49.64 99.2 49 +JU3141 45.32 43.92 96.9 44 +JU3142 22.32 22.16 99.3 25 +JU3144 23.86 23.64 99.1 26 +JU315 80.77 80.32 99.4 60 +JU3166 81.52 80.32 98.5 53 +JU3167 53.54 52.78 98.6 39 +JU3169 64.89 64.06 98.7 43 +JU3224 20.19 20.08 99.4 22 +JU3225 27.08 26.97 99.6 29 +JU3226 22.76 22.49 98.8 24 +JU3227 30.6 30.45 99.5 33 +JU3228 35.53 35.35 99.5 38 +JU323 70.96 70.48 99.3 52 +JU3271 37.94 37.65 99.2 37 +JU3280 31.11 30.87 99.2 31 +JU3282 35.39 35.15 99.3 35 +JU3291 35.68 35.37 99.1 35 +JU3318 23.49 23.31 99.2 27 +JU3398 50.78 50.19 98.8 31 +JU3399 80.07 79.57 99.4 49 +JU3400 47.26 46.94 99.3 32 +JU3401 57.67 57.18 99.2 35 +JU3402 78.9 78.41 99.4 50 +JU3403 54.83 54.44 99.3 37 +JU345 28.49 28.28 99.3 26 +JU346 60.57 59.3 97.9 44 +JU360 69.83 68.95 98.7 54 +JU363 80.34 78.49 97.7 62 +JU367 69.05 68.74 99.5 52 +JU3785 35.2 30.81 87.5 31 +JU3786 25.74 25.6 99.5 27 +JU3791 68.22 67.25 98.6 71 +JU3795 53.11 52.94 99.7 55 +JU393 51.46 51 99.1 38 +JU394 58.87 58.27 99 51 +JU397 73.79 73.33 99.4 54 +JU4047 44.58 44.3 99.4 32 +JU4048 28.12 27.99 99.5 18 +JU4054 35.15 35.02 99.6 24 +JU406 48.5 48.21 99.4 40 +JU4067 28.87 28.75 99.6 19 +JU4069 27.03 26.91 99.6 19 +JU4071 21.48 21.4 99.7 14 +JU4072 27.2 27.08 99.6 19 +JU4073 36.88 36.75 99.6 24 +JU4074 25.81 25.71 99.6 18 +JU4075 21.91 21.81 99.6 15 +JU4082 32.83 32.65 99.4 22 +JU4085 27.61 27.44 99.4 20 +JU4098 28.74 28.68 99.8 20 +JU440 55.89 55.68 99.6 42 +JU561 53.92 53.56 99.3 41 +JU642 59.1 58.74 99.4 41 +JU751 74.35 73.45 98.8 55 +JU755 37.44 36.34 97.1 24 +JU774 62.86 62.27 99.1 46 +JU775 53.01 52.25 98.6 40 +JU778 108.21 106.71 98.6 83 +JU782 85.47 84.35 98.7 68 +JU792 96.64 95.94 99.3 76 +JU830 75.55 74.73 98.9 56 +JU847 43.37 42.65 98.3 33 +KR314 71.48 70.86 99.1 52 +LKC34 60.25 59.91 99.4 45 +MY1 60.12 59.7 99.3 45 +MY10 52.71 52.1 98.8 41 +MY16 59.63 58.75 98.5 44 +MY18 72.15 71.31 98.8 51 +MY2001 56.87 56.46 99.3 53 +MY2004 54.39 53.95 99.2 44 +MY2011 31.92 31.67 99.2 22 +MY2014 61.15 60.66 99.2 49 +MY2022 46.18 45.83 99.2 35 +MY2024 27.22 26.96 99 25 +MY2042 49.68 49.32 99.3 39 +MY2050 49.44 49.04 99.2 43 +MY2051 28.32 28.04 99 26 +MY2054 47.96 47.55 99.2 34 +MY2078 24.21 23.96 99 23 +MY2079 50.45 50.05 99.2 30 +MY2097 52.79 52.44 99.3 50 +MY2099 21.98 21.73 98.9 21 +MY2109 39.86 39.37 98.8 27 +MY2121 38.81 38.48 99.1 25 +MY2137 43.91 43.44 98.9 40 +MY2138 46 45.68 99.3 36 +MY2142 42.95 42.67 99.4 31 +MY2143 43.56 43.26 99.3 37 +MY2144 30.05 29.85 99.3 20 +MY2147 44.59 44.3 99.4 35 +MY2198 31.37 31.17 99.4 21 +MY2199 55.18 54.62 99 47 +MY2208 25.91 25.75 99.4 17 +MY2212 53.31 52.78 99 48 +MY2224 27.4 27.2 99.3 26 +MY2239 50.28 50 99.4 48 +MY2282 49.48 49.16 99.3 39 +MY2288 23.08 22.85 99 22 +MY2291 36.19 35.86 99.1 33 +MY2294 49.46 49.18 99.4 37 +MY23 87.63 86.52 98.7 64 +MY2338 37.89 37.61 99.3 35 +MY2339 57.76 57.28 99.2 46 +MY2344 29.56 29.37 99.4 20 +MY2347 24.91 24.7 99.2 24 +MY2373 46.45 46.06 99.2 42 +MY2406 55.95 55.57 99.3 46 +MY2434 48.19 47.92 99.4 38 +MY2443 45.91 45.62 99.4 33 +MY2453 93.27 92.32 99 80 +MY2479 50.47 50.12 99.3 47 +MY2481 31 30.77 99.3 24 +MY2491 37.47 37.24 99.4 26 +MY2502 49.25 48.85 99.2 39 +MY2530 38.2 37.83 99.1 35 +MY2532 54.35 54.01 99.4 44 +MY2535 57.99 57.65 99.4 46 +MY2541 37.09 36.78 99.2 26 +MY2573 50.53 50.19 99.3 43 +MY2579 38.01 37.69 99.2 27 +MY2585 40.59 40.28 99.2 28 +MY2622 50.85 50.46 99.2 48 +MY2623 49.38 49.02 99.3 37 +MY2630 44.86 44.51 99.2 38 +MY2635 40.1 39.74 99.1 31 +MY2636 54.08 53.66 99.2 46 +MY2640 46.63 46.25 99.2 39 +MY2679 47.01 46.57 99.1 37 +MY2681 36.07 35.78 99.2 24 +MY2684 46.88 46.41 99 42 +MY2685 48.06 47.54 98.9 43 +MY2688 70.14 69.63 99.3 58 +MY2689 37.67 37.29 99 25 +MY2691 55.7 55.23 99.2 46 +MY2692 42.63 41.96 98.4 39 +MY2693 65.87 65.29 99.1 51 +MY2713 64.18 63.62 99.1 53 +MY2719 24.55 24.29 98.9 23 +MY2741 46.26 45.67 98.7 35 +MY508 38.85 38.61 99.4 37 +MY518 41.32 40.97 99.2 29 +MY524 35.64 35.28 99 33 +MY538 31.19 30.88 99 22 +MY559 36.12 35.89 99.4 26 +MY561 57.42 56.8 98.9 51 +MY564 33.32 32.97 99 31 +MY570 54.24 53.8 99.2 45 +MY579 38.72 38.5 99.4 33 +MY589 46.29 45.99 99.3 36 +MY673 52.7 52.24 99.1 50 +MY679 31.15 30.83 99 29 +MY684 23.77 23.54 99 22 +MY710 38.22 38 99.4 32 +MY713 40.91 40.57 99.2 33 +MY741 32.23 31.99 99.3 22 +MY772 70.82 70.32 99.3 58 +MY792 50.41 50.01 99.2 40 +MY795 37.46 37.01 98.8 34 +MY803 37.13 36.71 98.9 34 +MY804 43.36 43.12 99.4 42 +MY819 63.15 62.64 99.2 53 +MY864 55.72 55.28 99.2 42 +MY881 46.06 45.76 99.4 37 +MY882 46.2 45.85 99.2 35 +MY887 26.23 25.99 99.1 24 +MY904 37.93 37.69 99.4 35 +MY920 74.34 73.84 99.3 56 +MY934 45.54 45.18 99.2 41 +MY965 46.22 45.85 99.2 37 +MY990 32.27 31.99 99.2 30 +MY991 27.07 26.85 99.2 18 +N2 35.13 35.12 99.9 28 +NIC1 54.59 45.27 82.9 35 +NIC1049 39.6 39.38 99.5 36 +NIC1107 65.14 64.8 99.5 59 +NIC1119 44.38 44.09 99.3 43 +NIC1604 34.54 34.4 99.6 34 +NIC166 48.02 45.82 95.4 35 +NIC1779 51.96 51.69 99.5 46 +NIC1780 64.38 63.92 99.3 57 +NIC1781 35.25 30.84 87.5 28 +NIC1782 24.9 24.77 99.5 23 +NIC1783 67.87 67.42 99.3 59 +NIC1785 58.36 58.03 99.4 50 +NIC1786 74.57 74.11 99.4 65 +NIC1787 109.46 65.81 60.1 59 +NIC1788 69 65.24 94.5 57 +NIC1789 77.39 76.77 99.2 67 +NIC1790 26.76 26.58 99.3 19 +NIC1791 36.18 35.96 99.4 24 +NIC1792 24.78 24.63 99.4 18 +NIC1793 76.7 76.11 99.2 64 +NIC1794 86.43 85.94 99.4 75 +NIC1795 56.33 55.94 99.3 48 +NIC1796 54.41 53.88 99 47 +NIC1797 53.15 52.25 98.3 42 +NIC1798 59.65 59.18 99.2 55 +NIC1799 70.71 70.17 99.2 61 +NIC1800 62.99 62.63 99.4 54 +NIC1801 57.54 57.04 99.1 48 +NIC1802 58.59 58.2 99.3 52 +NIC1803 58.06 54.94 94.6 50 +NIC1804 31.59 28.78 91.1 20 +NIC1805 59.76 59.48 99.5 53 +NIC1806 71.18 70.71 99.3 62 +NIC1807 27.05 26.87 99.3 18 +NIC1808 60.48 60.15 99.5 55 +NIC1809 77.5 77.18 99.6 67 +NIC1810 49.3 44.54 90.4 41 +NIC1811 62.46 49.4 79.1 44 +NIC1812 52.99 52.71 99.5 47 +NIC1832 74.76 74.22 99.3 65 +NIC195 58.59 56.59 96.6 42 +NIC196 64.84 57.38 88.5 43 +NIC197 53.39 47.87 89.7 36 +NIC1977 27.92 25.86 92.6 18 +NIC198 44.03 35.96 81.7 27 +NIC1980 36.23 34.03 93.9 24 +NIC1985 40.04 39.73 99.2 28 +NIC199 47.81 47.08 98.5 36 +NIC2 67 49.33 73.6 39 +NIC200 56.73 54.43 95.9 39 +NIC2002 33.87 33.76 99.7 23 +NIC2004 25.5 25.41 99.7 18 +NIC2011 33.59 31.84 94.8 22 +NIC207 54.91 54.58 99.4 41 +NIC231 56.31 51.06 90.7 37 +NIC232 90.45 89.75 99.2 70 +NIC236 35.53 33.89 95.4 27 +NIC237 57.51 55.68 96.8 41 +NIC242 37.34 37.04 99.2 31 +NIC251 40.1 39.59 98.7 31 +NIC252 54.49 53.74 98.6 51 +NIC255 62.38 60.4 96.8 33 +NIC256 28.93 28.64 99 25 +NIC258 39.48 38.98 98.7 32 +NIC259 55.08 53.82 97.7 51 +NIC260 80.31 79.45 98.9 67 +NIC261 44.99 44.45 98.8 37 +NIC262 63.6 60.85 95.7 34 +NIC263 27.2 26.86 98.8 23 +NIC265 60.44 58.81 97.3 56 +NIC266 36.17 35.82 99 30 +NIC267 43.96 43.72 99.4 38 +NIC268 41.19 40.66 98.7 34 +NIC269 48.67 48.05 98.7 40 +NIC270 38.32 38.09 99.4 32 +NIC271 35.71 35.35 99 31 +NIC272 40.09 39.1 97.5 34 +NIC273 33.28 32.86 98.8 25 +NIC274 77.54 66.52 85.8 50 +NIC275 62.06 61.46 99 61 +NIC276 51.81 51.14 98.7 40 +NIC277 37.97 37.72 99.4 32 +NIC3 62.19 34.59 55.6 27 +NIC4 26.7 22.31 83.6 19 +NIC501 35.19 34.78 98.8 33 +NIC508 41.21 40.96 99.4 38 +NIC511 50.98 50.66 99.4 47 +NIC512 44.27 43.81 99 40 +NIC513 86.17 85.31 99 75 +NIC514 36.17 35.69 98.7 34 +NIC515 42.98 42.39 98.6 39 +NIC521 31.01 30.74 99.1 28 +NIC522 35.3 34.8 98.6 33 +NIC523 50.16 49.69 99.1 46 +NIC526 34.67 34.38 99.2 32 +NIC527 33.37 33.08 99.1 31 +NIC528 78.89 77.98 98.8 70 +NIC529 44.6 44.33 99.4 40 +PB303 47.3 46.93 99.2 36 +PS2025 55.84 55.14 98.8 41 +PX179 38.26 38.15 99.7 30 +QG2075 67.14 66.32 98.8 59 +QG2810 39.48 38.68 98 38 +QG2811 39.7 39.5 99.5 39 +QG2812 35.29 35.07 99.4 33 +QG2813 35 34.81 99.5 34 +QG2818 41.91 41.73 99.6 40 +QG2823 38.94 38.72 99.4 38 +QG2824 35.63 35.46 99.5 35 +QG2825 33.55 33.34 99.3 34 +QG2826 38.65 38.44 99.5 37 +QG2827 39.02 38.71 99.2 38 +QG2828 33.43 33.13 99.1 33 +QG2829 36.05 35.77 99.2 36 +QG2830 38.16 37.9 99.3 37 +QG2831 34.43 34.24 99.5 34 +QG2832 37.93 37.54 99 37 +QG2833 36.24 34.67 95.7 34 +QG2834 32.24 32 99.3 32 +QG2835 37.84 37.56 99.2 37 +QG2836 34.11 33.81 99.1 33 +QG2837 42.78 42.47 99.3 41 +QG2838 44.98 44.65 99.3 42 +QG2839 39.11 38.85 99.3 37 +QG2840 32.57 32.4 99.5 32 +QG2841 41.68 41.22 98.9 39 +QG2842 32.81 32.53 99.2 32 +QG2843 35.41 35.21 99.5 34 +QG2844 34.44 34.14 99.1 34 +QG2845 30.81 30.54 99.1 30 +QG2846 36.14 35.84 99.2 36 +QG2850 39.22 39.02 99.5 38 +QG2851 36.46 36.07 98.9 36 +QG2852 31.43 30.96 98.5 31 +QG2853 32.7 32.45 99.2 32 +QG2854 40.49 40.26 99.4 39 +QG2855 34.53 34.3 99.3 34 +QG2856 34.78 34.57 99.4 34 +QG2857 31.37 31.13 99.2 31 +QG2858 37.01 36.76 99.3 36 +QG2859 29.65 29.47 99.4 29 +QG2872 37.59 37.33 99.3 37 +QG2873 38.88 38.65 99.4 37 +QG2874 34.94 34.74 99.4 34 +QG2875 43.26 42.97 99.3 42 +QG2876 34.04 33.7 99 33 +QG2877 39.69 39.42 99.3 39 +QG2878 37.76 37.53 99.4 36 +QG2927 35.74 35.43 99.1 35 +QG2928 21.07 20.52 97.4 21 +QG2931 35.8 35.64 99.6 35 +QG2932 38.89 38.65 99.4 39 +QG4003 58.62 58.39 99.6 60 +QG4004 36.54 36.4 99.6 40 +QG4005 50.83 50.66 99.7 55 +QG4006 55.89 55.75 99.8 60 +QG4008 65.45 65.26 99.7 68 +QG4009 54.13 53.97 99.7 58 +QG4010 84.92 84.64 99.7 86 +QG4011 48.44 48.2 99.5 53 +QG4012 53.48 53.22 99.5 56 +QG4014 57.82 57.38 99.2 63 +QG4015 48.89 48.71 99.6 52 +QG4016 47.48 47.32 99.7 50 +QG4017 80.92 80.45 99.4 85 +QG4018 54.94 54.66 99.5 58 +QG4019 66.88 66.58 99.6 71 +QG4021 51.53 51.32 99.6 54 +QG4076 25.32 25.2 99.5 27 +QG4077 24.86 24.75 99.5 26 +QG4078 36.29 36.11 99.5 38 +QG4079 38.57 38.4 99.5 40 +QG4080 44.58 44.4 99.6 46 +QG4103 32.62 32.41 99.4 33 +QG4134 37.24 37.13 99.7 38 +QG4135 40.12 39.95 99.6 40 +QG4137 32.43 32.32 99.6 33 +QG4138 36.85 36.71 99.6 38 +QG4139 33.84 33.71 99.6 34 +QG4151 39.56 39.38 99.5 40 +QG4158 28.81 28.69 99.6 29 +QG4159 35.4 35.31 99.7 37 +QG4160 29.33 29.27 99.8 29 +QG4161 28.53 28.46 99.8 29 +QG4162 27.48 27.36 99.6 27 +QG4163 28.08 27.92 99.4 28 +QG4166 27.59 27.47 99.6 28 +QG4182 39.58 39.45 99.7 42 +QG4184 30.2 30.11 99.7 30 +QG4185 27.83 27.71 99.6 28 +QG4186 39.33 39.21 99.7 39 +QG4193 41.6 41.45 99.6 41 +QG4194 25.9 25.8 99.6 27 +QG4226 32.28 32.13 99.5 32 +QG4228 26.79 26.68 99.6 26 +QG536 50.54 50.03 99 40 +QG537 67.07 66.49 99.1 48 +QG538 41.35 40.99 99.1 31 +QG556 57.78 57.09 98.8 45 +QG557 59.82 59.28 99.1 48 +QG558 40.83 40.35 98.8 31 +QW947 37.02 36.2 97.8 32 +QX1211 79.47 77.91 98 55 +QX1212 58.52 57.93 99 42 +QX1213 64.32 63.63 98.9 47 +QX1214 38.95 38.52 98.9 31 +QX1215 63.19 61.93 98 46 +QX1216 67.3 65.92 98 47 +QX1233 68.61 68.02 99.1 49 +QX1791 70.85 69.66 98.3 51 +QX1792 64.78 64.03 98.8 48 +QX1793 48.88 48.25 98.7 39 +QX1794 92.08 90.5 98.3 66 +RC301 38.08 37.82 99.3 29 +TWN2530 45.05 44.81 99.5 35 +TWN2542 64.92 64.47 99.3 57 +TWN2794 60.39 60.03 99.4 44 +TWN2803 47.17 46.87 99.4 40 +WN2001 42.2 41.77 99 29 +WN2002 57.16 56.69 99.2 43 +WN2010 54.44 53.77 98.8 39 +WN2011 60.33 59.63 98.8 45 +WN2013 65.53 64.77 98.8 46 +WN2014 50 49.46 98.9 37 +WN2016 75.65 74.76 98.8 55 +WN2017 68.8 67.6 98.2 46 +WN2018 50.23 49.34 98.2 37 +WN2019 62.5 61.68 98.7 48 +WN2020 53.45 52.72 98.6 39 +WN2021 28.52 28.14 98.7 20 +WN2033 39.87 39.55 99.2 41 +WN2035 33.77 33.39 98.9 31 +WN2039 57.04 56.36 98.8 50 +WN2050 38.22 37.82 98.9 35 +WN2056 44.47 43.98 98.9 40 +WN2059 33.35 26.91 80.7 29 +WN2060 80.25 79.62 99.2 48 +WN2061 45.45 45.27 99.6 30 +WN2062 59.63 58.88 98.7 44 +WN2063 81.95 81.2 99.1 49 +WN2064 87.56 86.76 99.1 44 +WN2065 70.51 69.99 99.3 49 +WN2066 85.22 84.63 99.3 54 +WN2067 42.31 41.88 99 46 +WN2068 39.16 38.95 99.5 42 +WN2069 37.97 37.73 99.4 41 +WN2070 21.46 21.35 99.5 25 +WN2071 50.47 50.24 99.5 31 +WN2073 42.94 42.8 99.7 27 +WN2074 24.48 24.35 99.5 28 +WN2075 34.58 34.46 99.6 38 +WN2076 19.55 19.47 99.6 22 +WN2077 26.6 26.51 99.7 18 +WN2078 30.53 30.47 99.8 22 +WN2079 41.19 41.02 99.6 27 +WN2080 41.94 39.45 94.1 26 +WN2081 25.05 24.99 99.7 17 +WN2082 31.38 31.29 99.7 22 +WN2083 25.79 25.69 99.6 18 +WN2086 28.25 27.87 98.7 20 +WN2088 34.03 33.86 99.5 23 +WN2089 23.17 23.04 99.4 17 +WN2090 34.32 34.18 99.6 23 +WN2091 26.7 26.59 99.6 19 +WN2092 27.13 27.02 99.6 18 +WN2093 25.25 25.16 99.6 18 +WN2095 20.27 20.22 99.8 14 +WN2096 24.51 24.26 99 16 +XZ1513 42.62 40.08 94 37 +XZ1514 47.3 42.96 90.8 40 +XZ1515 73.88 70.4 95.3 66 +XZ1516 42.31 39.82 94.1 35 +XZ1672 25.38 25.22 99.4 27 +XZ1734 34.42 34.12 99.1 36 +XZ1735 27.35 27.2 99.5 29 +XZ1756 28.42 28.23 99.3 30 +XZ2018 29.67 29.58 99.7 33 +XZ2019 34.81 34.42 98.9 37 +XZ2020 24.1 23.76 98.6 25 +XZ2210 69.47 61.96 89.2 53 +XZ2211 45.38 44.96 99.1 41 +XZ2212 63.26 62.1 98.2 55 +XZ2213 30.19 29.86 98.9 28 diff --git a/base/static/reports/20210121/release_notes.md b/base/static/reports/20210121/release_notes.md new file mode 100644 index 00000000..8f875977 --- /dev/null +++ b/base/static/reports/20210121/release_notes.md @@ -0,0 +1 @@ +The 20210121 release includes genotypes from whole-genome sequences and reduced representation (RAD) sequencing. Genotypes are compared for concordance, and strains that are 99.95% identical to each other are [grouped into isotypes]({{ url_for("primary.help_item", filename="FAQ", _anchor="strain-groups") }}). One strain within each isotype is the reference strain for that isotype. To look up isotype assignment, see [Isotype List]({{ url_for("strains.strains_list") }}). All isotype reference strains are [available on CeNDR]({{ url_for("strains.strains_catalog") }}). diff --git a/base/static/sass/custom-styles.scss b/base/static/sass/custom-styles.scss new file mode 100644 index 00000000..628d04c4 --- /dev/null +++ b/base/static/sass/custom-styles.scss @@ -0,0 +1,1001 @@ +/* Welcome to Compass. + * In this file you should write your main styles. (or centralize your imports) + * Import this file using the following HTML or equivalent: + * */ + +@import "compass/reset"; +@import url('https://fonts.googleapis.com/css2?family=Roboto:ital,wght@0,100;0,300;0,400;0,500;0,700;0,900;1,100;1,300;1,400;1,500;1,700;1,900&display=swap'); + +$black: #000; +$purple: #401F68; +$purple2: #4F2A84; +$white: #FFFFFF; +$yellow: #FFC400; + +h1 { + font-size: 2.5em; +} + +h2 { + font-size: 2.2em; + font-weight: 400; + margin-bottom: 10px; +} + +h3 { + font-size: 1.5em; + font-weight: 500; + margin: 20px 0; +} + +h4 { + font-size: 1.2em; + font-weight: 500; + margin: 20px 0; +} + +h5 { + font-size: 1.0em; + font-weight: 500; + margin: 15px 0; +} + +em { + font-style: italic; +} + +strong { + font-weight: 500; +} + +i { + font-style: italic; +} + +p { + margin-bottom: 15px; + line-height: 24px; +} + +body { + font-family: 'Roboto', sans-serif; +} + +a { + color: $purple; + transition: all 1.0s ease; + + &:hover { + transition: all .6s ease; + } +} + +.content { + ul { + li { + list-style: disc; + line-height: 24px; + margin-left: 50px; + } + } +} + +ol { + list-style: decimal; + margin-bottom: 25px; + + li { + line-height: 24px; + margin-left: 50px; + } +} + +address { + margin-bottom: 20px; + line-height: 20px; +} + +/* Header */ +header { + #top-nav { + width: 1170px; + margin: 0 auto; + padding: 15px 0; + + .NU-logo { + img { + height: 28px; + } + } + + .navbar-cendr-brand { + margin: 0 0 0 10px; + padding: 0px 0px 0px 14px; + font-size: 1.8em; + color: #000; + font-weight: 500; + letter-spacing: 0.03em; + + &:hover { + color: rgba($black, 0.5); + text-decoration: none; + } + } + + .login-topnav { + float: right; + width: 40%; + text-align: right; + margin: 8px 0 0 0; + + a.nav-link { + border: 2px solid #401F68; + padding: 8px 18px; + border-radius: 5px; + font-size: 1.1em; + + &:hover { + background: $purple; + color: $white; + text-decoration: none; + } + } + } + } + + .navbar-static-top { + background: $purple !important; + margin-bottom: 0; + border-bottom: 0; + } + + .navbar-nav { + + li.dropdown, + li.nav-item { + & > a { + color: $white; + font-size: 1.1em; + + &:hover { + color: rgba($white, 0.6); + } + } + } + } + + #latest-update-top { + background: $yellow; + padding: 15px 0; + margin: 0 0 25px; + + div { + width: 1170px; + margin: 0 auto; + color: $black; + + b { + font-weight: 500; + } + + span { + font-weight: 300; + } + + .update-title { + color: $black; + margin: 0 5px; + text-decoration: underline; + + &:hover { + text-decoration: none; + } + } + + .read-more-btn { + background: $purple2; + font-size: 0.7em; + text-transform: uppercase; + padding: 4px 10px; + color: $white; + border-radius: 5px; + margin: 0 0 0 10px; + + &:hover { + text-decoration: none; + background: $white; + color: $purple2; + box-shadow: 0px 1px 4px 0px; + } + } + } + } +} + + +/* Body */ + +.page-content { + h1.lead { + font-size: 2.5em !important; + font-weight: 400; + line-height: 48px; + } +} + +/* Footer */ + +#main-footer { + background: #4e2a84; + color: #fff; + padding: 3rem 0; + width: 100%; + + .footer-widget { + color: $white; + width: 20.00%; + float: left; + margin-right: 5.0%; + min-height: 80px; + + a { + img { + max-width: 170px; + margin-bottom: 20px; + } + } + + .footer-content { + font-size: 0.8em; + + ul { + li { + padding-bottom: 1.0rem !important; + margin-bottom: unset !important; + + .hide-label { + position: absolute; + left: -10000px; + top: auto; + width: 1px; + height: 1px; + overflow: hidden; + } + } + } + + &.contact { + ul { + margin: 1rem 0 1rem 2rem; + position: relative; + + li { + &.footer-pin-icon { + background: url(/static/img/pin-drop.svg) no-repeat; + position: absolute !important; + top: 2px; + left: -2rem; + height: 24px; + width: 18px; + } + + &.footer-phone-icon { + background: url(/static/img/mobile-phone.svg) no-repeat; + position: absolute !important; + top: 2px; + left: -2rem; + height: 24px; + width: 18px; + } + } + + } + } + + p { + strong { + color: #FFF; + } + } + + .social-slide { + display: inline-block; + height: 39px; + width: 39px; + transition: all 0.3s ease 0s; + border: 1px solid #fff; + margin: 0 .6rem .5rem 0; + + &.facebook-hover { + background: url(/static/img/social-media-icons.png); + background-position: 0 0; + + &:hover { + background-position: 0 39px; + } + } + + &.twitter-hover { + background-image: url(/static/img/social-media-icons.png); + background-position: -39px 0; + + &:hover { + background-position: -39px 39px; + } + } + + &.instagram-hover { + background-image: url(/static/img/social-media-icons.png); + background-position: -78px 0; + + &:hover { + background-position: -78px 39px; + } + } + + &.youtube-hover { + background-image: url(/static/img/social-media-icons.png); + background-position: -156px 0; + + &:hover { + background-position: -156px 39px; + } + } + + &.futurity-hover { + background-image: url(/static/img/social-media-icons.png); + background-position: -273px 0; + + &:hover { + background-position: -273px 39px; + } + } + + &.rss-hover { + background-image: url(/static/img/social-media-icons.png); + background-position: -117px 0; + + &:hover { + background-position: -117px 39px; + } + } + + .hide-label { + position: absolute; + left: -10000px; + top: auto; + width: 1px; + height: 1px; + overflow: hidden; + } + } + } + + .custom-html-widget { + p strong { + margin-bottom: 20px; + display: block; + } + } + } + + + + a { + color: $white; + text-decoration: underline; + + &:hover { + text-decoration: none; + } + } +} + +.breadcrumb { + padding: 20px 25px; + margin-bottom: 30px; + margin-top: 20px; + + li { + line-height: unset; + margin-left: unset; + } +} + +.cendr-goals-list { + list-style: decimal; + + li { + line-height: 24px; + margin-left: 50px; + margin-bottom: 20px; + } +} + +.panel-container { + .panel-body { + line-height: 25px; + } +} + +.pub { + line-height: 25px; +} + +.nav-tabs, +.nav-pills, +.list-group { + li { + margin-left: unset !important; + } +} + +.tab-pane { + ul { + li { + margin-left: unset; + } + } + + .panel.panel-default { + ul { + li { + margin-left: 50px; + } + } + } +} + +/* admin dropdown */ +.dropdown-menu-left { + left: auto; + right: 0; + z-index: 99999; +} + +/* New Home UI/UX Styles */ +.home-main-container { + display: inline-block; + + .home-top-container { + background-color: #EBE8F2; + display: inline-block; + + .left-container { + width: 45%; + float: left; + padding: 0 20px 0 30px; + + .cendr-globe-logo { + margin: 0; + + img { + width: 130px !important; + margin-top: 40px; + margin-bottom: 20px; + } + } + + .welcome-message { + width: 100%; + float: left; + + h1 { + font-size: 1.6em !important; + line-height: 38px; + } + + .btn-get-started { + background: #FFC400; + padding: 18px 50px; + border-radius: 8px; + margin: 10px 0 0; + display: inline-block; + color: #000; + font-weight: 500; + text-decoration: none; + font-size: 1.2em; + + &:hover { + transition: all .6s ease; + background: rgba(255, 196, 0, 0.5); + } + } + } + } + + + .video-homepage { + width: 55%; + display: inline-block; + } + } + + /* Major Services section */ + .homepage-major-services { + text-align: center; + margin: 20px 0 60px; + + h2 { + font-weight: 500; + width: 80%; + margin: 0 auto 20px; + line-height: 38px; + font-size: 1.8em; + } + + .services-container { + li { + list-style: none; + width: 24%; + display: inline-block; + min-height: 100px; + margin: 0; + padding: 0 15px; + vertical-align: top; + + h3 { + font-size: 1.1em; + font-weight: 300; + } + + a { + background: $purple; + padding: 8px 30px; + border-radius: 8px; + margin: 10px 0 0; + display: inline-block; + color: $white; + font-weight: 500; + text-decoration: none; + font-size: 1.0em; + + &:hover { + transition: all .6s ease; + background: rgba($purple, 0.8); + } + } + } + } + } + + /* Cendr Publication */ + .cendr-publication-container { + background: #F0F0F0; + text-align: center; + padding: 40px 0; + + h2 { + margin: 0 0 40px; + } + + .pub { + width: 70%; + margin: 0 auto; + text-align: left; + line-height: 34px; + + .pub-img-small { + width: 160px; + margin-right: 30px; + } + + strong { + font-size: 1.2em; + } + + } + } + + /* Mailing List Container */ + .mailing-list-container { + background: $purple2; + text-align: center; + padding: 40px 0; + color: $white; + + h4 { + margin: 0; + font-size: 1.8em; + display: inline-block; + font-weight: 400; + width: 40%; + } + + #mc_embed_signup { + display: inline-block; + width: 50%; + text-align: left; + + .form-group { + width: 100%; + + input[type=email] { + border: none; + font-size: 1.2em; + padding: 7px 12px; + height: 49px; + width: 50%; + border-radius: 4px 0 0 4px; + display: inline-block; + border: unset; + } + + input[type=submit] { + display: inline-block; + background: #FFC400; + color: #000; + font-size: 1.3em; + font-weight: 500; + border-radius: 0 8px 8px 0; + padding: 10px 26px; + margin-left: -5px; + border: unset; + + &:hover { + transition: all .6s ease; + background: rgba(255, 196, 0, 0.8); + } + } + } + + } + } + + /* News Container */ + .news-homepage-container { + text-align: center; + padding: 40px 0; + + h4 { + margin: 0; + font-size: 1.8em; + display: inline-block; + font-weight: 400; + width: 40%; + } + + ul.news-listing { + columns: 2; + -webkit-columns: 2; + -moz-columns: 2; + width: 80%; + margin: 40px auto 0; + font-size: 1.2em; + + li { + vertical-align: top; + margin-bottom: 20px; + } + } + } +} + +.strain-data-container { + .panel { + box-shadow: 0px 5px 30px 0px rgba(0, 0, 0, 0.2); + border-radius: 8px; + padding: 12px; + border: none; + + .panel-heading { + background-color: unset; + border-color: unset; + font-size: 1.5em; + border-bottom: unset; + color: $black; + } + + .panel-body { + line-height: 22px; + } + + .btn { + background-color: $purple; + border: unset; + padding: 8px 18px; + border-radius: 5px; + font-size: 1.1em; + text-decoration: none; + + &:hover { + color: $white; + text-decoration: none; + transition: all .6s ease; + background: rgba($purple, 0.8); + } + } + } +} + +.strain-table { + box-shadow: 0px 0px 10px 0px rgba(0, 0, 0, 0.1); + margin-top: 30px; + border-radius: 8px; + + thead { + background: $purple; + color: $white; + + tr { + th { + padding: 12px 10px; + font-weight: 500; + font-size: 1.0em; + } + } + } + + tbody { + tr { + &:nth-of-type(odd) { + background-color: unset; + } + + &:hover { + background-color: rgba(0, 0, 0, 0.02); + } + + td { + padding: 10px; + + input[type=checkbox] {} + + .view-strain-link { + text-align: left; + padding: 0; + + i { + background: #F2F2F2; + box-shadow: 2px 2px 4px 0 #999; + content: "+"; + display: inline-block; + width: 20px; + height: 20px; + margin: 0 10px 0 0; + border-radius: 4px; + + &:before { + content: "+"; + color: #000; + margin-left: 6px; + font-size: 1.1em; + } + } + } + + .view-strain-list { + columns: 3; + -webkit-columns: 3; + -moz-columns: 3; + margin-top: 25px; + + li { + margin-left: 10px; + width: 80%; + float: unset; + } + } + } + } + } +} + + +@media (max-width: 1199px) { + + .home-main-container { + margin-right: -70px; + margin-left: -70px; + transition: all .8s ease; + + div, + li { + transition: all .8s ease; + } + + .home-top-container { + width: 100%; + padding: 0 40px 15px; + + .left-container { + width: 100%; + float: unset; + + .cendr-globe-logo { + img { + width: 160px !important; + max-height: unset !important; + } + } + + .welcome-message { + width: 70%; + float: left; + margin: 80px 0 0 30px; + } + } + + .video-homepage { + width: 50%; + text-align: center; + } + } + + .homepage-major-services { + .services-container { + li { + width: 38%; + margin: 20px 0 40px; + padding: 0 30px; + } + } + } + } +} + + +.styled-checkbox { + position: absolute; // take it out of document flow + opacity: 0; // hide it + + & + label { + position: relative; + cursor: pointer; + padding: 0; + border-radius: 50%; + } + + // Box. + & + label:before { + content: ''; + margin-right: 10px; + display: inline-block; + vertical-align: text-top; + width: 20px; + height: 20px; + background: white; + border-radius: 50%; + border: 1px solid #CCC; + } + + // Box hover + &:hover + label:before { + background: #FFC400; + border-radius: 50%; + } + + // Box focus + &:focus + label:before { + box-shadow: 0 0 0 3px rgba(0, 0, 0, 0.12); + } + + // Box checked + &:checked + label:before { + background: #FFC400; + } + + // Disabled state label. + &:disabled + label { + color: #b8b8b8; + cursor: auto; + } + + // Disabled box. + &:disabled + label:before { + box-shadow: none; + background: #f8f8f8; + border: 1px solid #eee; + } + + // Checkmark. Could be replaced with an image + &:checked + label:after { + content: ''; + position: absolute; + left: 5px; + top: 9px; + background: white; + width: 2px; + height: 2px; + box-shadow: + 2px 0 0 white, + 4px 0 0 white, + 4px -2px 0 white, + 4px -4px 0 white, + 4px -6px 0 white, + 4px -8px 0 white; + transform: rotate(45deg); + } +} + +@media (max-width: 768px) { + + .home-main-container { + .home-top-container { + + .left-container { + .cendr-globe-logo { + width: 30%; + display: inline-block; + } + + .welcome-message { + width: 60%; + float: unset; + display: inline-block; + margin-top: 55px; + + .visible-xs { + text-align: left !important; + font-size: 1.8em !important; + } + } + } + } + + .homepage-major-services { + .services-container { + padding: 0 30px; + + li { + width: 100%; + margin: 0 0 40px; + padding: 0 20%; + + h3 { + margin-bottom: 0; + font-size: 1.4em; + } + } + } + } + + .mailing-list-container { + h4 { + width: 100%; + margin-bottom: 20px; + } + + #mc_embed_signup { + width: 100%; + text-align: center; + + .form-group { + input[type=submit] { + margin-top: -7px; + } + } + } + } + + .news-homepage-container { + ul.news-listing { + columns: 1; + -webkit-columns: 1; + -moz-columns: 1; + + li { + width: 100%; + } + } + } + } + + tbody { + tr { + td { + .view-strain-list { + columns: 1 !important; + -webkit-columns: 1 !important; + -moz-columns: 1 !important; + margin-top: 25px; + + li { + margin-left: 0 !important; + width: 100% !important; + } + } + } + } + } +} diff --git a/base/static/yaml/advisory-committee.yaml b/base/static/yaml/advisory-committee.yaml index 52724865..46e31eb3 100644 --- a/base/static/yaml/advisory-committee.yaml +++ b/base/static/yaml/advisory-committee.yaml @@ -18,7 +18,7 @@ last_name: Rockman photo: MatthewRockman.jpg title: New York University - website: https://biology.as.nyu.edu/object/MatthewRockman + website: https://as.nyu.edu/content/nyu-as/as/faculty/matthew-rockman.html - first_name: Ann last_name: Rougvie diff --git a/base/static/yaml/funding.yaml b/base/static/yaml/funding.yaml index 9f4a3cc1..0df70e60 100644 --- a/base/static/yaml/funding.yaml +++ b/base/static/yaml/funding.yaml @@ -1,10 +1,10 @@ - name: Northwestern url: https://www.northwestern.edu - image: NU.jpg + image: NU.jpeg - name: NSF url: https://www.nsf.gov image: nsf.png description: NSF - Collections in Support of Biological Research Grant (1930382) - name: Weinberg url: https://www.weinberg.northwestern.edu/ - image: weinberg.png \ No newline at end of file + image: weinberg.jpeg \ No newline at end of file diff --git a/base/static/yaml/protocols.yaml b/base/static/yaml/protocols.yaml index f1f21253..fcc56dd2 100644 --- a/base/static/yaml/protocols.yaml +++ b/base/static/yaml/protocols.yaml @@ -4,7 +4,7 @@ Nematode Isolation: link: SamplingIsolationC.elegansNaturalHabitat.pdf - name: Isolating wild strains in the laboratory - link: IsolatingWildIsolates.pdf + link: 20210430_IdentifyingWildIsolates.pdf - name: Shipping Strains link: ShippingStrains.pdf @@ -14,7 +14,10 @@ Maintenance: link: FreezingThawingWeb.pdf - name: Cleaning Worms - link: CleaningWorms.pdf + link: 2021_CleaningWorms.pdf - name: Chunking Worms - link: ChunkingWorms.pdf \ No newline at end of file + link: ChunkingWorms.pdf + + - name: Soft Agar Freezing Protocol + link: 20200620_SoftAgarFreezing&Thawing.pdf \ No newline at end of file diff --git a/base/static/yaml/staff.yaml b/base/static/yaml/staff.yaml index 62c49070..c868dc90 100644 --- a/base/static/yaml/staff.yaml +++ b/base/static/yaml/staff.yaml @@ -5,17 +5,20 @@ photo: Robyn.jpg email: Robyn.Tanny@northwestern.edu -- first_name: Daniel - last_name: Cook - title: Developer/Maintainer - pub_names: ["Cook DE"] - photo: Daniel_E_Cook.jpg - website: https://www.danielecook.com - email: Dec@u.northwestern.edu - github: danielecook +- first_name: Katie + last_name: Evans + title: Bioinformaticist + photo: Kathryn_Evans.jpeg + email: kathryn.evans@u.northwestern.edu -- first_name: Dan - last_name: Lu - title: Staff Bioinformaticist - photo: DanLu.jpg - email: dan.lu@northwestern.edu \ No newline at end of file +- first_name: Sophia + last_name: Gibson + title: Bioinformaticist + photo: SophieGibson.jpeg + email: sophia.gibson@northwestern.edu + +- first_name: Ryan + last_name: McKeown + title: Bioinformaticist + photo: Ryan.jpeg + email: RyanMcKeown2021@u.northwestern.edu diff --git a/base/templates/_includes/footer.html b/base/templates/_includes/footer.html index 56c9de39..20755145 100644 --- a/base/templates/_includes/footer.html +++ b/base/templates/_includes/footer.html @@ -1,27 +1,114 @@ -



-
+
+ -
+
+
+ + + + +
+ {# /container-fluid #} + + + {# /container-fluid #} - + - - + + + + - + + + + + + + {# Hands on Table #} @@ -24,4 +32,8 @@ -CeNDR | {% if page_title %}{{ page_title }}{% else %}{{ title }}{% endif %}{% if subtitle %} {{ subtitle }}{% endif %} \ No newline at end of file +{# Mapbox #} + + + +CeNDR | {% if page_title %}{{ page_title }}{% else %}{{ title }}{% endif %}{% if subtitle %} {{ subtitle }}{% endif %} diff --git a/base/templates/_includes/navbar.html b/base/templates/_includes/navbar.html index 5943244c..3a0093a9 100644 --- a/base/templates/_includes/navbar.html +++ b/base/templates/_includes/navbar.html @@ -1,99 +1,129 @@
-
\ No newline at end of file + + + + +
+
+ {% for category, msg in get_flashed_messages(with_categories=true) %} +
+

{{ msg }}

+
+ {% endfor %} +
+
+ + diff --git a/base/templates/_layouts/clean.html b/base/templates/_layouts/clean.html index 4aebee7c..b1b4bc8e 100644 --- a/base/templates/_layouts/clean.html +++ b/base/templates/_layouts/clean.html @@ -6,7 +6,7 @@ {% block style %}{% endblock %} - {% set user = session.get('user', None) %} +
@@ -14,20 +14,11 @@

-
-
- {% for category, msg in get_flashed_messages(with_categories=true) %} -
-

{{ msg }}

-
- {% endfor %} -
-
{# /messages-wrap #}
{% if request.blueprint %} {% if title %} -

{{ title }}{% if subtitle %} {{subtitle}}{% endif %}

+

{{ title }}{% if subtitle %} {{subtitle}}{% endif %}

{% endif %} {% endif %} {% if warning %} diff --git a/base/templates/_layouts/default.html b/base/templates/_layouts/default.html index c70b694c..59c48855 100644 --- a/base/templates/_layouts/default.html +++ b/base/templates/_layouts/default.html @@ -6,29 +6,19 @@ {% block style %}{% endblock %} - {% set user = session.get('user') or None %} {% include "_includes/navbar.html" %}
-
-
-
-
- {% for category, msg in get_flashed_messages(with_categories=true) %} -
-

{{ msg }}

-
- {% endfor %} -
-
{# /messages-wrap #} - +
+
+
{% if request.blueprint %} {% if title %} -

{{ title }} {% if subtitle %} {{ subtitle }}{% endif %}

+

{{ title }} {% if subtitle %} {{ subtitle }}{% endif %}

{% endif %} {% endif %} {% if warning %} @@ -39,27 +29,27 @@

{{ title }} {% if subtitle %} {{ subtitle }}{% endif %}

{# BREADCRUMB #} {% if request.blueprint and request.path != "/" %} + + {% endif %}
@@ -84,10 +74,10 @@

{{ title }} {% if subtitle %} {{ subtitle }}{% endif %}

); - {% include "_includes/footer.html" %}
- {% if config.DEBUG %}{{ user }}{% endif %} + {% if config.DEBUG %}{{ session }}{% endif %} + {% include "_includes/footer.html" %} \ No newline at end of file diff --git a/base/templates/about/about.html b/base/templates/about/about.html index ffc5535f..f680b350 100644 --- a/base/templates/about/about.html +++ b/base/templates/about/about.html @@ -1,76 +1,110 @@ {% extends "_layouts/default.html" %} +{% block custom_head %} -{% block content %} -
-
- - -
-Movie of C. elegans taken by the Goldstein Lab -
- -
-
- -{% filter markdown %} - -### What is _C. elegans_? - -_Caenorhabditis elegans_ is a non-parasitic nematode roundworm that lives in rotting material and eats bacteria and fungi. Because this species grows easily and quickly in the laboratory, it is a powerful model to learn about human development, complex behaviors, and evolutionary processes. For more information about _C. elegans_, [see this wikipedia page](https://en.wikipedia.org/wiki/Caenorhabditis_elegans), or to learn about its history at [wormclassroom.org](http://wormclassroom.org/short-history-c-elegans-research). - -{% endfilter %} -
-
-
- -
+ + +{% endblock %} -

Global Distribution of wild isolates

-

Most research groups that study C. elegans focus on the laboratory-adapted strain (called N2) isolated in Bristol, England in the 1950s. We have learned a great deal about basic biological processes from studies of this one strain.

-

-However, this species is found worldwide, and wild strains are as different from one another as humans are different from one another. These strains are isolated from a variety of environments in nature when researchers collect rotting materials, including fruits, flowers, nuts, berries, stems, leaves, and compost. We can use the natural diversity of these strains to learn about how populations of individuals are genetically different from another and how those differences might impact disease.

- -
- -
-
-
-
+{% block content %} -
-
-
-
- -
-
- -
-
+
+ +
+
+ +
Movie of C. elegans taken by the Goldstein Lab
+
+
{# /col-md-6 #} + +
+

What is C. elegans?

+

+ Caenorhabditis elegans is a non-parasitic nematode roundworm that lives in rotting material and eats bacteria and fungi. + Because this species grows easily and quickly in the laboratory, it is a powerful model to learn about human development, complex behaviors, + and evolutionary processes. For more information about C. elegans + see this wikipedia page or learn about its history at + wormclassroom.org +

+
{# /col-md-6 #} + +
{# /row #} + +
+ +
+
+
+
{# /col-md-6 #} + +
+

Global Distribution of wild isolates

+

+ Most research groups that study C. elegans focus on the laboratory-adapted strain (called N2) isolated in + Bristol, England in the 1950s. We have learned a great deal about basic biological processes from studies of this one strain. +

+

+ However, this species is found worldwide, and wild strains are as different from one another as humans are different from one another. + These strains are isolated from a variety of environments in nature when researchers collect rotting materials, including fruits, + flowers, nuts, berries, stems, leaves, and compost. We can use the natural diversity of these strains to learn about how populations + of individuals are genetically different from another and how those differences might impact disease. +

+
{# /col-md-6 #} + +
{# /row #} + +
+ +
+ +
{# /col-md-3 #} + +
+ +
{# /col-md-3 #} + +
+

CeNDR Goals

+

+ To facilitate the study of natural diversity by C. elegans research groups, we created the C. elegans + Natural Diversity Resource (CeNDR). We have three major goals: +

+ +
    +
  1. + To accept, organize, and distribute wild strains to research groups that want to investigate their favorite trait(s) + across natural C. elegans strains. See Strains. +
  2. +
  3. + To sequence the whole genomes of wild C. elegans strains, provide the aligned sequence data, and facilitate discovery + of genetic variation across the entire species. See Data. +
  4. +
  5. + To perform genome-wide association mappings to correlate genotype with phenotype and identify genetic variation underlying quantitative traits. +
  6. +
+ +

+ Please let us know what we can do to facilitate your discoveries! We are interested in + adding new resources and tools. +

+ +
{# /col-md-6 #} + +
{# /row #} - -
- -

CeNDR Goals

-

To facilitate the study of natural diversity by C. elegans research groups, we created the C. elegans Natural Diversity Resource (CeNDR). We have three major goals:

- -
    -
  1. To accept, organize, and distribute wild strains to research groups that want to investigate their favorite trait(s) across natural C. elegans strains. See Strains.
  2. - -
  3. To sequence the whole genomes of wild C. elegans strains, provide the aligned sequence data, and facilitate discovery of genetic variation across the entire species. See Data.
  4. - -
  5. To perform genome-wide association mappings to correlate genotype with phenotype and identify genetic variation underlying quantitative traits.
  6. -
- -

Please let us know what we can do to facilitate your discoveries! We are interested in adding new resources and tools.

- -
-
{% endblock %} @@ -78,66 +112,49 @@

CeNDR Goals

{% block script %} diff --git a/base/templates/about/donate.html b/base/templates/about/donate.html index 2bea82de..c9a7bc73 100644 --- a/base/templates/about/donate.html +++ b/base/templates/about/donate.html @@ -3,9 +3,11 @@ {% from "macros.html" import render_field %}
+
{{ render_markdown("donate.md") }} -
+
{# /col-md-6 #} +
Donate
@@ -18,10 +20,10 @@ {{ render_field(form.total, form_prefix="$", form_suffix=".00") }} {{ form.recaptcha }}
- - + + +
-
-
-
+
{# /col-md-6 #} +
{# /row #} {% endblock %} \ No newline at end of file diff --git a/base/templates/about/funding.html b/base/templates/about/funding.html index 5c9eea9f..99f3379e 100644 --- a/base/templates/about/funding.html +++ b/base/templates/about/funding.html @@ -1,12 +1,16 @@ {% extends "_layouts/default.html" %} {% block content %} +
{% for funder in funding_set %} -
- - {{ funder.name }} -
- {{ funder.description }} -
+ {% endfor %} +
{# /row #} {% endblock %} \ No newline at end of file diff --git a/base/templates/about/statistics.html b/base/templates/about/statistics.html index fa3fcebb..26af98b3 100644 --- a/base/templates/about/statistics.html +++ b/base/templates/about/statistics.html @@ -7,45 +7,42 @@ {% block content %}
-
-

Strains collected over time

- {{ strain_collection_plot|safe }} -
-
-
-
-

Numbers

-
- +
+

Strains collected over time

+ {{ strain_collection_plot|safe }} +
{# /col-md-6 #} + +
+
+
- - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + -
Strains{{ n_strains }}
Isotypes{{ n_isotypes }}
Unique Mapping Pipeline Users{{ n_users }}
Reports{{ n_reports }}
Traits{{ n_traits }}
Strains{{ n_strains }}
Isotypes{{ n_isotypes }}
Unique Mapping Pipeline Users{{ n_users }}
Reports{{ n_reports }}
Traits{{ n_traits }}
-
- +
+
{# /col-md-6 #} -
+
{# /row #}
diff --git a/base/templates/admin/admin.html b/base/templates/admin/admin.html new file mode 100644 index 00000000..f4451f85 --- /dev/null +++ b/base/templates/admin/admin.html @@ -0,0 +1,6 @@ +{% extends "_layouts/default.html" %} + +{% block content %} + + +{% endblock %} diff --git a/base/templates/admin/data_edit.html b/base/templates/admin/data_edit.html new file mode 100644 index 00000000..2b180ae6 --- /dev/null +++ b/base/templates/admin/data_edit.html @@ -0,0 +1,239 @@ +{% extends "_layouts/default.html" %} + +{% block custom_head %} + + + +{% endblock %} + + +{% block content %} +{% from "macros.html" import render_field %} + + +{% if report.initialized == True %} + +
+
+
+ +
+

+
+
{{ report.kind }} +
{{ report.name }} +
+
+

+
+
+
+
+
+ +
+
+
+ Dataset: +
+
+ {{ report.dataset }} +
+
+ +
+
+
+ Wormbase: +
+
+ {{ report.wormbase }} +
+
+ +
+
+
+ Version: +
+
+ {{ report.version }} +
+
+ +
+
+
+ Created On: +
+
+ {{ report.created_on }} +
+
+ +
+
+
+ Report Cloud Location: +
+
+ data-reports/{{report.dataset}}/ +
+
+ +
+
+
+ Report Last Synced: +
+
+ {{report.report_synced_on}} +
+
+ +
+
+
+
+
+ +
+
+ +
+
+
+ DB Cloud Location: +
+
+ db/cendr.{{report.dataset}}.{{report.wormbase}}.db +
+
+ +
+
+
+ DB Last Synced: +
+
+ {{ report.db_synced_on }} +
+
+ +
+
+
+
+
+ +
+
+ +
+
+
+ Published On: +
+
+ {{ report.published_on }} +
+
+ +
+
+
+
+
+ + +
+
+ +
+
+
+ +{% else %} + +
+ {{ form.csrf_token }} + + +
+
+
+
+ +
+
+ +
+
+
+ {{ render_field(form.dataset) }} +
+
+ +
+
+
+ {{ render_field(form.wormbase) }} +
+
+ +
+
+
+ {{ render_field(form.version) }} +
+
+ +
+ + +{% endif %} + + +{% endblock %} + +{% block script %} + +{% endblock %} diff --git a/base/templates/admin/data_list.html b/base/templates/admin/data_list.html new file mode 100644 index 00000000..f9104ccd --- /dev/null +++ b/base/templates/admin/data_list.html @@ -0,0 +1,56 @@ +{% extends "_layouts/default.html" %} + +{% block content %} + +
+
+
+ +
+ +
+
+ + + + + + + + + + + + + {% for item in items %} + + {% if item %} + + + + + + + {% endif %} + + {% endfor %} + +
Dataset Wormbase Version Report Version ID Edit Delete
{{ item.dataset }} {{ item.wormbase }} {{ item.version }} {{ item.key.name }} + + Edit + + + + Delete + +
+
+
+ + +{% endblock %} diff --git a/base/templates/admin/google_sheet.html b/base/templates/admin/google_sheet.html new file mode 100644 index 00000000..26252300 --- /dev/null +++ b/base/templates/admin/google_sheet.html @@ -0,0 +1,11 @@ +{% extends "_layouts/default.html" %} + +{% block content %} + + + New Window + + + + +{% endblock %} diff --git a/base/templates/admin/users_edit.html b/base/templates/admin/users_edit.html new file mode 100644 index 00000000..90695488 --- /dev/null +++ b/base/templates/admin/users_edit.html @@ -0,0 +1,51 @@ +{% extends "_layouts/default.html" %} + +{% block content %} +{% from "macros.html" import render_field %} + +
+ {{ form.csrf_token }} + + +
+
+
+
+ +
+
+ +
+
+
+
+ {{ render_field(form.roles) }} +
+
+ +
+ +
+
+
+
+ + + + + + + + + +
Username{{ user.username }}
Full Name{{ user.full_name }}
Email{{ user.email }}
Verified{{ user.verified_email }}
Password************
Registered{{ user.created }}
Last Modified{{ user.modified_on }}
Last Login{{ user.last_login }}
+
+
+ +
+ + +{% endblock %} diff --git a/base/templates/admin/users_list.html b/base/templates/admin/users_list.html new file mode 100644 index 00000000..04954f15 --- /dev/null +++ b/base/templates/admin/users_list.html @@ -0,0 +1,53 @@ +{% extends "_layouts/default.html" %} + +{% block content %} + +
+
+ + + + + + + + + + + + + + + + + {% for user in users %} + + {% if user %} + + + + + + + + + + + {% endif %} + + {% endfor %} + +
ID Username Full Name Email Verified Email Roles Created On Last Login Edit Delete
{{ user.key.name }} {{ user.username }} {{ user.full_name }} {{ user.email }} {{ user.verified_email }} {{ user.roles }} {{ user.created_on }} {{ user.last_login }} + + Edit + + + + Delete + +
+
+
+ + +{% endblock %} diff --git a/base/templates/alignment.html b/base/templates/alignment.html new file mode 100644 index 00000000..9e7ac99d --- /dev/null +++ b/base/templates/alignment.html @@ -0,0 +1,18 @@ +{% extends "_layouts/default.html" %} + +{% block content %} + +

Bolded strains are isotype reference strains, and tabbed non-bold face strains are strains within an isotype group but not the isotype reference strain.

+

Only strains with whole-genome sequencing data have BAM files for download. If you do not see a strain, check the Strain Issues page. +Some strains have been flagged and removed from distribution and analysis pipelines for a variety of reasons.

+ +{% include('releases/download_tab_strain_v2.html') %} + +{% endblock %} + +{% block script %} + + + +{% endblock %} diff --git a/base/templates/basic_login.html b/base/templates/basic_login.html new file mode 100644 index 00000000..eaec0936 --- /dev/null +++ b/base/templates/basic_login.html @@ -0,0 +1,23 @@ +{% extends "_layouts/default.html" %} +{% block content %} +{% from "macros.html" import render_field %} + +
+
+
+
+
Login
+
+
+ {{ form.csrf_token }} + {{ render_field(form.username) }} + {{ render_field(form.password) }} + {{ form.recaptcha }} +
+ +
+
+
+
+
+{% endblock %} \ No newline at end of file diff --git a/base/templates/browser.html b/base/templates/browser.html deleted file mode 100644 index 1745fdd1..00000000 --- a/base/templates/browser.html +++ /dev/null @@ -1,776 +0,0 @@ -{% extends "_layouts/default.html" %} - -{% block custom_head %} - - - - - - - - - - - - - - - - -{% endblock %} - - -{% block content %} -
-
-
-
-
-
Tracks
- {% for i in ["Genes", "Transcripts"] %} -
- -
- {% endfor %} - {% for i in ["phyloP", "phastCons", "Variants"] %} -
- -
- {% endfor %} - -
- -
- -
- -
- -
- -
- -
- -
- -
- -
- -
Variant Impact
- - -
- -
- -
- -
- -
- -
- -
- -
- -
- -
- - -
-
- -
Gene Search
- - - - - - - - - - - - - - - -
- -
- -
Isotype (reference strain)
- -
- -
- -
- - - -
- - - - - - {% for strain in strain_listing %} - - - - - - - - {% endfor %} - - - -
- {{ strain.isotype }} ({{ strain.strain }}) - - - - -
-
-
- - -
-
{# Close col #} -
{# Close row #} - - -
- -
- -
-
Reference   -
Alternate -    - - -
- -
- - -
-
-

Variants

- - - -

A Maximum of 1000 variants or 100kb will be queried and returned. Heterozygous calls are likely errors. When rows are yellow, it indicates the entire variant failed QC.

- -Hovering over a failing genotype will list the filter applied. Genotypes are shown as follows: -
-
-
- Passing REF  - Passing ALT   Note: The variant browser now only shows hard-filtered variants. - Help -
-
-
- - - - - - - - - - - - - - - - - - -
CHROM:POSREF / ALTFilterAFGene NameWormBase IDSequenceCoding ChangeBiotypeAnnotationImpact
- -
-
-{% endblock %} - -{% block script %} - - - - - - -{% endblock %} diff --git a/base/templates/contact.html b/base/templates/contact.html index fda3af95..310ea6df 100644 --- a/base/templates/contact.html +++ b/base/templates/contact.html @@ -1,49 +1,83 @@ {% extends "_layouts/default.html" %} -{% block title_right %} -{% endblock %} {% block content %}
-
-
-
Associate Professor Erik Andersen
-
- - Erik.Andersen@northwestern.edu
- phone - - -
-
- - -
Lab Phone
- phone - - -

- - -
{#/ col-xs-12 #} - -
-

Administrative Address

-
- Northwestern University
Department of Molecular Biosciences

- 2205 Tech Drive, Hogan 2-100
- Evanston, IL. 60208-3500
-
- -

Shipping Address

-
- Erik Andersen
Northwestern University
Department of Molecular Biosciences

- 2205 Tech Dr, Hogan 1-500
- Evanston, IL. 60208-3500
-
-
-
+
+
+ +
+ +
+
Associate Professor Erik Andersen
+
+ + + Erik.Andersen@northwestern.edu + +
+ phone + + +
+
+
{# /col-md-12#} + + +
+ +
+
Lab Phone
+
+ phone + + +
+
+
{#/ col-xs-12 #} + + + + + + +
{# /row #} +
{# /col-lg-4 #} + +
+ +
+
Administrative Address
+
+
+ Northwestern University
Department of Molecular Biosciences

+ 2205 Tech Drive, Hogan 2-100
+ Evanston, IL. 60208-3500
+
+
+
+
{#/ col-md-12 #} + + +
+ +
+
Shipping Address
+
+
+ Erik Andersen
Northwestern University
Department of Molecular Biosciences

+ 2205 Tech Dr, Hogan 1-500
+ Evanston, IL. 60208-3500
+
+
+
+
{#/ col-xs-12 #} + + + +
{# /row #} +
- +
+
{% endblock %} diff --git a/base/templates/strain/strains.html b/base/templates/strain/strains.html new file mode 100644 index 00000000..e0e2fa29 --- /dev/null +++ b/base/templates/strain/strains.html @@ -0,0 +1,81 @@ +{% extends "_layouts/default.html" %} + +{% block content %} + + + {# /row #} + + + +{% endblock %} + diff --git a/base/templates/strain/strains_list.html b/base/templates/strain/strains_list.html new file mode 100644 index 00000000..486aaac5 --- /dev/null +++ b/base/templates/strain/strains_list.html @@ -0,0 +1,90 @@ +{% extends "_layouts/default.html" %} + +{% block content %} + +
+
+
+ +
{# /col-md-4 #} +
{# /row #} + +
+ + + + + + + + + + + + + {% for isotype, strains in strain_listing|groupby('isotype') %} + + + + + + {% if strains[0].previous_names %} + + {% else %} + + {% endif %} + + {% endfor %} + +
+ Reference Strain + + Isotype + + Strains + + Release + + Alternative Names +
+ {% set isotype_loop_index = loop.index %} + + + {{ isotype }} + + {{ strains|join(", ") }} + + {{ strains[0]['release'] }} + {{ strains[0].previous_names.replace(',', '|').split('|') | join(', ') }}
+
{# /row #} + + +{% endblock %} + + +{% block script %} + + + +{% endblock %} diff --git a/base/templates/strain/global_strain_map.html b/base/templates/strain/strains_map.html similarity index 50% rename from base/templates/strain/global_strain_map.html rename to base/templates/strain/strains_map.html index 2b5f113b..9649ca9b 100644 --- a/base/templates/strain/global_strain_map.html +++ b/base/templates/strain/strains_map.html @@ -1,54 +1,60 @@ {% extends "_layouts/default.html" %} - {% block content %} -
-
+
+
-
- Hover over or click a pin to see information about a C. elegans wild isolate -
{# /text-center #} -
{# /col-md-8 #} -
-
-
Strain Information
-
    - -
  • - - - Isotype - - -
  • - -
  • - - - Strain - - -
  • - - -
  • - - - Reference Strain - -
  • -
  • Release
  • -
  • Isolation Date
  • -
  • Latitude, Longitude
  • -
  • Elevation
  • -
  • Landscape
  • -
  • Substrate
  • -
  • Sampled By
  • -
-
- Submit Strains

-
{# /col-md-4 #} -
{# /col-md-8 #} +
+ Hover over or click a pin to see information about a C. elegans wild isolate +
{# /text-center #} +
{# /col-md-8 #} + +
+
+
+ + Strain Information +
+
    + +
  • + + Isotype + +
    +
    +
  • + +
  • + + Strain + +
    +
    +
  • + +
  • + + Reference Strain + +
    +
    +
  • + +
  • Release
  • +
  • Isolation Date
  • +
  • Latitude, Longitude
  • +
  • Elevation
  • +
  • Landscape
  • +
  • Substrate
  • +
  • Sampled By
  • +
+
{# /panel #} + Submit Strains

+
{# /col-md-4 #} + +
{# /row #} + {% endblock %} @@ -97,7 +103,7 @@ click_s.target.setIcon(icon_norm) s = m.layer.options $(".strain").text(s.strain); - $(".isotype").html("" + s.isotype + ""); + $(".isotype").html("" + s.isotype + ""); $(".isotype_ref_strain").text(s.isotype_ref_strain); $(".release").text(s.release); $(".isolation_date").text(s.isolation_date); @@ -121,7 +127,7 @@ elevation = s.elevation latlng = m.latlng.lat + comma + m.latlng; $(".strain").text(s.strain); - $(".isotype").html("" + s.isotype + ""); + $(".isotype").html("" + s.isotype + ""); $(".release").text(s.release); $(".isotype_ref_strain").text(s.isotype_ref_strain); $(".isolation_date").text(s.isolation_date); @@ -182,29 +188,32 @@ } - data = {{ strain_listing|safe }} + data = {{ strain_listing|tojson|safe }} data.sort(naturalCompare) strain_info = data; data.forEach(function(d) { - strain_names.push(d.strain); - m = L.marker([d.latitude, d.longitude], { icon: icon_norm, - strain: d.strain, - title: d.strain, - isotype: d.isotype, - isotype_ref_strain: d.isotype_ref_strain, - search_field: `${d.strain} (${d.isotype})`, - isolation_date : d.isolation_date, - release: String(d.release).replace(/(\d{4})(\d{2})(\d{2})/, "$1-$2-$3"), - elevation: d.elevation + " m" , - landscape: d.landscape, - substrate: d.substrate, - comma: ", ", - sampled_by: d.sampled_by }).addTo(map) - .on("click", set_click_locked_content) - .on('mouseover', set_panel_content) - .on('mouseout', restore_click_locked_content) - markers.push(m); - }); + if (d.latitude) { + strain_names.push(d.strain); + m = L.marker([d.latitude, d.longitude], { icon: icon_norm, + strain: d.strain, + title: d.strain, + isotype: d.isotype, + isotype_ref_strain: d.isotype_ref_strain, + search_field: `${d.strain} (${d.isotype})`, + isolation_date : d.isolation_date, + release: String(d.release).replace(/(\d{4})(\d{2})(\d{2})/, "$1-$2-$3"), + elevation: d.elevation + " m" , + landscape: d.landscape, + substrate: d.substrate, + comma: ", ", + sampled_by: d.sampled_by }).addTo(map) + .on("click", set_click_locked_content) + .on('mouseover', set_panel_content) + .on('mouseout', restore_click_locked_content) + markers.push(m); + } +}); + var strain_layer = L.layerGroup(markers); var search_control = new L.Control.Search({layer: strain_layer, propertyName: 'search_field', @@ -222,17 +231,26 @@ map.addControl(search_control); $('.search-input').width(100); - $(document).ready(function() { -$(function () { - $('[data-toggle="tooltip"]').tooltip() -}) - + (function($) { + $('[data-toggle="tooltip"]').tooltip() + + var patterns = []; + $('#filter').keyup(function() { + $('.searchable tr').hide(); + console.log($(this).val()); + $(this).val().split(',').forEach(function(r) { + var rex = new RegExp(r, "i"); + $('.searchable tr').filter(function() { + return rex.test($(this).text()); + }).show(); + }) + }) + + }(jQuery)); }); - - {% endblock %} diff --git a/base/templates/strain_issues.html b/base/templates/strain_issues.html new file mode 100644 index 00000000..45cfb9bb --- /dev/null +++ b/base/templates/strain_issues.html @@ -0,0 +1,14 @@ +{% extends "_layouts/default.html" %} + +{% block content %} + +{% include('releases/download_tab_strain_v2_issues.html') %} + +{% endblock %} + +{% block script %} + + + +{% endblock %} diff --git a/base/templates/tools/h2_result_list.html b/base/templates/tools/h2_result_list.html new file mode 100644 index 00000000..32e70dae --- /dev/null +++ b/base/templates/tools/h2_result_list.html @@ -0,0 +1,106 @@ +{% extends "_layouts/default.html" %} + +{% block custom_head %} + + +{% endblock %} + +{% block style %} + +{% endblock %} + +{% block content %} + +{% from "macros.html" import render_dataTable_top_menu %} +{{ render_dataTable_top_menu() }} + +
+ +
+ + + + + + + + + + + + + + {% for item in items %} + + {% if item %} + + + + {% endif %} + + + {% endfor %} + + +
Label Trait Status Date
{{ item.label }} {{ item.trait }} + {% if item.status == 'COMPLETE' %} + + {{ item.status }} + + {% else %} + {{ item.status }} + {% endif %} + {{ item.created_on|date_format }}
+ +
{# /col #} +
{# /row #} + + +{% endblock %} + +{% block script %} + + + + +{% endblock %} diff --git a/base/templates/tools/heritability_calculator.html b/base/templates/tools/heritability_calculator.html index 12ad4bcf..4e07fb4d 100644 --- a/base/templates/tools/heritability_calculator.html +++ b/base/templates/tools/heritability_calculator.html @@ -4,74 +4,120 @@ {% endblock %} - -{% block style %}{% endblock %} - {% block content %} -
-
+
+ +
+
+

+ This tool will calculate the broad-sense heritability for your trait of interest using a set of C. elegans wild + isolates. The broad-sense heritability is the amount of trait variance that comes from genetic differences in the + assayed group of strains. Generally, it is the ratio of genetic variance to total (genetic plus environmental) variance. +

+

+ To obtain the best estimate of heritability, please measure a set of at least five wild strains in three + independent assays. These assays should use different nematode growths, synchronizations, bacterial food + preparations, and any other experimental condition. You should measure trait variance across as many + different experimental conditions (in one block) as you would typically encounter in a large experiment. +

+

+ Please organize your data in a long format, where each row is an independent observation of one strain in one trait. The columns of the data set should be: +

+
    +
  1. AssayNumber - a numeric indicator of independent assays.
  2. +
  3. Strain - one of the CeNDR isotype reference strain names.
  4. +
  5. TraitName - a user-supplied name of a trait with no spaces (e.g. BroodSize).
  6. +
  7. Replicate - independent measures of a trait within one independent assay. You + can think of this column as a numerical value for a technical replicate.
  8. +
  9. Value - the measured output of the trait (e.g. 297 for BroodSize).
  10. +
-

- This tool will calculate the broad-sense heritability for your trait of interest using a set of C. elegans wild - isolates. The broad-sense heritability is the amount of trait variance that comes from genetic differences in the - assayed group of strains. Generally, it is the ratio of genetic variance to total (genetic plus environmental) - variance. -

- -

- To obtain the best estimate of heritability, please measure a set of at least five wild strains in three - independent assays. These assays should use different nematode growths, synchronizations, bacterial food - preparations, and any other experimental condition. You should measure trait variance across as many - different experimental conditions (in one block) as you would typically encounter in a large experiment. -

- -

- Please organize your data in a long format, where each row is an independent observation of one strain in one - trait. The columns of the data set should be: -

- -
    -
  1. AssayNumber - a numeric indicator of independent assays.
  2. -
  3. Strain - one of the CeNDR isotype reference strain names.
  4. -
  5. TraitName - a user-supplied name of a trait with no spaces (e.g. BroodSize).
  6. -
  7. Replicate - independent measures of a trait within one independent assay. You - can think of this column as a numerical value for a technical replicate.
  8. -
  9. Value - the measured output of the trait (e.g. 297 for BroodSize).
  10. -
- -
Use example data

NA values will not be used in broad-sense heritability calculations.

- -
{# /col-md-8 #} +
{# /col-md-12 #} +
{# /row #} + + +{% if hide_form == True %} + +
+ {# /col-md-3 #} + {# /col-md-3 #} +
{# /row #} +
{# /well #} + +{% else %} +
{# /well #} + +
-
+ + {# /col-md-3 #} + + {# /col-md-3 #} -
-
- -
-
- - Prepare your data according to the column headers (described above). Data should be pasted in the table below. - -
-
-
-
- -
{# /col-md-12 #} -
{# /row #} - -
- -
{#/ col-md-12 #} +
{# /row #} + +
+
+
+ + +
+
+
+ +
+
+
+
+
+
+
+
+ Prepare your data according to the column headers (described below). Data should be pasted in the table below. +
+
{# /col-md-12 #} +
{# /row #} + +
{# /col-md-12 #}
{# /row #} + + + + +{% endif %} + + {% endblock %} {% block script %} @@ -82,28 +128,62 @@ ]; var noNAs = 0; function dataValidator(instance, td, row, col, prop, value, cellProperties) { - Handsontable.renderers.TextRenderer.apply(this, arguments); - if (row === 0) { - cellProperties.readOnly = true; + Handsontable.renderers.TextRenderer.apply(this, arguments); + if (row === 0) { + cellProperties.readOnly = true; } - if (row === 0) { - td.style.fontWeight = 'bold'; - td.style.backgroundColor = '#EAEAEA'; - } + if (row === 0) { + td.style.fontWeight = 'bold'; + td.style.backgroundColor = '#EAEAEA'; + } - if (['NA',].indexOf(String(value).trim()) >= 0) { - td.style.background = '#FC6666'; - td.style.fontWeight = 'bold'; - } + if (['NA',].indexOf(String(value).trim()) >= 0) { + td.style.background = '#FC6666'; + td.style.fontWeight = 'bold'; + } if (duplicate_rows.indexOf(row) > -1) { - td.style.background = '#0CEF13'; + td.style.background = 'RED'; } + + if (trait_names.includes(value)) { + td.style.background = 'RED'; + } + + if (unknown_strains.includes(value)) { + td.style.background = 'RED'; + } } -var isValid = false +{% autoescape off %} +var strain_list = {{ strain_list }}; +{% endautoescape %} + +var isValid = false; var duplicate_rows = []; +var trait_names = []; +var unknown_strains = []; + + +var trim_data = function(data) { + for (let i = 0; i < data.length; i++) { + for (let j = 0; j < data[i].length; j++) { + if (data[i][j]) { + data[i][j] = data[i][j].replace("\r\n", "").replace("\r", "").replace("\n", ""); + data[i][j] = data[i][j].trim() + } + } + var stringData = JSON.stringify(data[i]) + if(JSON.stringify(['','','','','']) == stringData || "[null,null,null,null,null]" == stringData) { + data.splice(i, 1); + i--; + } + } + return data; +} + + var validate_data = function(data){ // Performs error checks: // 1) Duplicates @@ -111,36 +191,67 @@ // 3) Trait Name pass = true + data = trim_data(data); // Check for duplicates - dup_search = _.map(data, (item, idx) => item.toString()); - dup_values = _.chain(dup_search).groupBy().filter(x => x.length > 1 && x[0] !== ",,,,").flatten().uniq().value() - duplicate_rows = _.map(dup_search, (item, idx) => _.indexOf(dup_values, item) > -1 ? idx : null).filter(x => x !== null) + occurences = [] + duplicate_rows = [] + for (let i = 0; i < data.length; i++) { + item = JSON.stringify(data[i]); + if (occurences[item]) { + duplicate_rows.push(i) + } + occurences[item] = true; + }; // if dups are present alert user $("#duplicate_error").text("") if (duplicate_rows.length > 0) { - $("#duplicate_error").text("Please check the data because duplicate rows are present. The duplicate rows are shown in green.") + $("#duplicate_error").text("Please check the data because duplicate rows are present. The duplicate rows are shown in red.") pass = false } - // Count number of strains - n_strains = _.uniq(_.pluck(data, 1).filter((x, idx) => idx > 0 && x !== null)).length + // Strain names + var strain_names = _.uniq(_.pluck(data, 1).filter((x, idx) => idx > 0 && x !== null && x.length > 0)); + unknown_strains = _.difference(strain_names, strain_list); + $("#strain_name_error").text(""); + if (unknown_strains.length > 0) { + $("#strain_name_error").text("Please check the data - Some of the strains were not recognized") + pass = false + } - $("#strain_count_error").text("") + // Count number of strains + n_strains = strain_names.length; + $("#strain_count_error").text(""); if (n_strains < 5) { $("#strain_count_error").text("Please check the data because fewer than five strains are present. Please measure trait values for at least five wild strains in at least three independent assays.") pass = false } // Trait Name - trait_count = _.uniq(_.pluck(data, 2).filter((x, idx) => idx > 0 && x !== null)).length + trait_names = _.uniq(_.pluck(data, 2).filter((x, idx) => idx > 0 && x !== null && x.length > 0)); + trait_count = trait_names.length; $("#trait_name_error").text("") - if (trait_count > 1) { + if (trait_count >= 2) { + pass = false $("#trait_name_error").text("Please check the data. The TraitName has multiple unique values. Only data for a single trait allowed.") + var most_common_trait = _.chain(_.pluck(data, 2)).countBy().pairs().max(_.last).head().value(); + const index = trait_names.indexOf(most_common_trait); + if (index !== -1) { + trait_names.splice(index, 1); + } + } else { + trait_names = [] + } + + // Data label + label_len = $("#calcLabel").val().length + $("#calc_label_error").text("") + if (label_len == 0) { + $("#calc_label_error").text("Please include a brief description of the data.") pass = false } - + return pass } @@ -168,43 +279,32 @@ var cellProperties = {}; cellProperties.renderer = dataValidator; return cellProperties; - } + }, + beforePaste: (data, coords) => { + disableScrolling() + } }); + +$("#calcLabel").on('change', function() { + onFormChange() +}) + hot.addHook("afterChange", function() { - isValid = validate_data(hot.getData()); + onFormChange() +}) + +function onFormChange() { + isValid = validate_data(hot.getData()); hot.render(); if (isValid) { document.getElementById('hcalc').disabled = false; - - // Fetch dataset statistics - $.ajax({type: "POST", - url: "{{ url_for('heritability.check_data') }}", - data: JSON.stringify(hot.getData()), - contentType: "application/json; charset=utf-8", - dataType: 'json', - success:function(result) { - $("#trait_summary").html(` -
- Input data summary: -
    -
  • Minimum: ${result['minimum']}
  • -
  • Maximum: ${result['maximum']}
  • -
  • 25% Quartile: ${result['25']}
  • -
  • 50% Quartile: ${result['50']}
  • -
  • 75% Quartile: ${result['75']}
  • -
  • Variance: ${result['variance']}
  • - `) - } - }); - } else { $("#trait_summary").html("") document.getElementById('hcalc').disabled = true; } - -}) +} // Enable setting example data $("#set-example").on('click', function() { @@ -212,6 +312,7 @@ .then(response => response.text()) .then(function(data) { ex_data = _.map(data.split("\n"), x => x.split("\t")) + disableScrolling(); hot.loadData(ex_data); document.documentElement.scrollTo({ top: document.documentElement.scrollHeight - document.documentElement.clientHeight, @@ -225,16 +326,24 @@ // submit result $("#hcalc").on("click", function(e) { + $("#hcalc").addClass("disabled") if (isValid) { + var data = new FormData($('form#form-submit')[0]); + data.append('table_data', JSON.stringify(hot.getData())); + data.set('label', $("#calcLabel").val()); $.ajax({ type: "POST", + processData: false, + contentType: false, + dataType: 'json', url: "{{ url_for('heritability.submit_h2') }}", - data: JSON.stringify(hot.getData()), - contentType: "application/json; charset=utf-8", - dataType: 'json', - success:function(result){ - window.location = `heritability/h2/${result.data_hash}` - } + data: data, + success:function(result) { + window.location = `../heritability/h2/${result.id}` + }, + error:function(error) { + $("#hcalc").removeClass("disabled") + } }) } }); @@ -247,6 +356,19 @@ } }); +function disableScrolling() { + var x=window.scrollX; + var y=window.scrollY; + window.onscroll = function() { + window.scrollTo(x, y); + }; + setTimeout(enableScrolling, 500); +} + +function enableScrolling(){ + window.onscroll=function(){}; +} + {% endblock %} diff --git a/base/templates/tools/heritability_results.html b/base/templates/tools/heritability_results.html index 5293c734..1f45c098 100644 --- a/base/templates/tools/heritability_results.html +++ b/base/templates/tools/heritability_results.html @@ -1,7 +1,6 @@ {% extends "_layouts/default.html" %} {% block custom_head %} - @@ -36,85 +35,115 @@ {% block content %} -{% if data and result %} -
    - - -
    - -{% endif %} - -
    -
    - -

    Heritability Calculator

    - -

    - This tool will calculate the broad-sense heritability for your trait of interest using a set of C. elegans wild - isolates. The broad-sense heritability is the amount of trait variance that comes from genetic differences in the - assayed group of strains. Generally, it is the ratio of genetic variance to total (genetic plus environmental) - variance. -

    - -
    +
    +

    + {{ trait }} +

    +
    {# /row #} -
    {# /col-md-8 #} +
    +

    + {{ hr.label }} +

    {# /row #} -{% if not result %} +{% if data and not result %}
    -
    -

    +

    +
    +

    The heritability calculation is currently being run. Please check back in a few minutes for results. This page will reload automatically.

    -
    -
    +
    {# /col-md-8 #} +
    {# /row #} + +{% else %} -{% endif %} + +
    +
    +
    +
    {# /col-md-12 #} +
    {# /row #} + +
    +
    + {% if data %} +
    + {% endif %} +
    +
    {# /row #}
    -
    - {% if data %} -
    - {% endif %} - {% if result %} - -
    - {% endif %} + {% if result %} + + + + {% endif %} +
    {# /row #} -
    {# /col-md-8 #} +{# /row #}
    -

    Data

    - Download Table - +
    +
    +
    - - - - - - - + + + + + + + - - {% for row in data %} - - - - - - + + {% for row in data %} + + + + + + + {% endfor %} - -
    AssayNumberStrainTraitNameReplicateValue
    AssayNumberStrainTraitNameReplicateValue
    {{ row['AssayNumber'] }} {{ row['Strain'] }} {{ row['TraitName'] }} {{ row['Replicate'] }} {{ row['Value']|string|truncate(8, end= "") }}
    {{ row['AssayNumber'] }} {{ row['Strain'] }} {{ row['TraitName'] }} {{ row['Replicate'] }} {{ row['Value']|string|truncate(8, end= "") }}
    -
    + + +
+
+
{# /row #} + + + +{% endif %} + {% endblock %} {% block script %} @@ -190,71 +219,88 @@

Data

for (var cc=0; cc { - - image.onload = function () { - var ctx = canvas.getContext("2d"); - ctx.fillStyle = "#FFFFFF"; - ctx.fillRect(0, 0, w, h); - ctx.drawImage(image, 0, 0, w, h); - var png = canvas.toDataURL("image/jpg",1); - imag = png - document.getElementById('png-container').innerHTML = ''; - DOMURL.revokeObjectURL(png); - if (this.complete){ - imag = this.src - - } - resolve('resolved'); - } - + image.onload = function () { + var ctx = canvas.getContext("2d"); + ctx.fillStyle = "#FFFFFF"; + ctx.fillRect(0, 0, w, h); + ctx.drawImage(image, 0, 0, w, h); + var png = canvas.toDataURL("image/png"); + imag = png + document.getElementById('png-container').innerHTML = ''; + DOMURL.revokeObjectURL(png); + if (this.complete){ + imag = this.src + } + resolve('resolved'); + } }); - - } + } var loadCanvas = function(){ var svgString = new XMLSerializer().serializeToString(document.querySelector('svg')); var canvas = document.getElementById("canvas"); canvas.width = 1000; - canvas.height = 480; + canvas.height = 480; var ctx = canvas.getContext("2d"); var DOMURL = self.URL || self.webkitURL || self; image = document.createElement("IMG"); var svg = new Blob([svgString], {type: "image/svg+xml;charset=utf-8"}); var url = DOMURL.createObjectURL(svg); - drawImg(canvas, image, DOMURL, 1000, 480).then(() => { + drawImg(canvas, image, DOMURL, canvas.width, canvas.height).then(() => { + - var doc = new jsPDF('p', 'px', 'a4'); var source = document.getElementById('dataContainer'); + const options = { + orientation: 'landscape', + format: 'a4', + unit: 'px', + + } + var doc = new jsPDF(options); + margins = { top: 40, bottom: 40, left: 40, - width: 522 + right: 40 }; + page_height = doc.internal.pageSize.getHeight(); + page_width = doc.internal.pageSize.getWidth(); + /*doc.addImage(document.getElementById('svg'), 'SVG', 0, 0, page_width, page_height, NaN, 'NONE', 90);*/ + doc.setFontSize(10); + doc.fromHTML('Trait: {{ trait }}', 120, 10) + doc.fromHTML('Broad-sense heritability ( H', 120, 20) + doc.setFontSize(4); + doc.fromHTML('2', 225, 15) + doc.setFontSize(10); + doc.fromHTML(') = {{ (result['H2']*100)|round(2) }}% (range {{ (result['ci_l']*100)|round(2) }}% to {{ (result['ci_r']*100)|round(2) }}%)', 230, 20) + + doc.addImage(document.getElementById('img1').src, 'PNG', margins.left, margins.top, + page_width - (margins.left + margins.right), + page_height - (margins.top + margins.bottom), + NaN, 'NONE', 0); + + doc.addPage(); doc.setFontSize(16); doc.text(doc.internal.pageSize.getWidth()/2, 50, 'Heritability Calculator', {'align': 'center'}); doc.fromHTML('The broad-sense heritability is the amount of trait variance that comes from genetic differences in the assayed group ', 20, 85, {'align': 'justify', 'maxWidth': doc.internal.pageSize.getWidth()-40},function(){},margins) doc.fromHTML('of strains. Generally, it is the ratio of genetic variance to total (genetic plus environmental) variance.', 20, 100, {'align': 'justify', 'maxWidth': doc.internal.pageSize.getWidth()-40},function(){},margins) - doc.fromHTML('Broad-sense heritability ( H', 20, 120) - doc.setFontSize(4); - doc.fromHTML('2', 123, 115) - doc.setFontSize(10); - doc.fromHTML(') = {{ (result['H2']*100)|round(2) }}% (range {{ (result['ci_l']*100)|round(2) }}% to {{ (result['ci_r']*100)|round(2) }}%)', 130, 120) - doc.fromHTML('Trait: {{ trait }}', 20, 140) - - doc.addImage(document.getElementById('img1').src, 'JPEG', 10, 170, 430,200, NaN, 'FAST'); - + var a = document.createElement('a'); var pdfblob = new Blob([ doc.output('blob') ], { type : 'application/pdf'}); a.href = window.URL.createObjectURL(pdfblob); @@ -262,52 +308,48 @@

Data

a.click(); }) image.src = url; - document.getElementById("svgchartarea").remove(); - document.getElementById('canvas').remove(); } var getPDF = function(){ - loadCanvas(); + loadCanvas(); } $(document).ready(function(){ - if (chartJson.length != 0){ - var node= document.getElementById("svgchartarea"); - $("#svgchartarea").html("") - //node.querySelectorAll('*').forEach(n => n.remove()); - newPt = document.createElement("p") - newPt.innerHTML = "Broad-sense heritability ( H2 ) = {{ (result['H2']*100)|round(2) }}% (range {{ (result['ci_l']*100)|round(2) }}% to {{ (result['ci_r']*100)|round(2) }}%)"; - document.getElementById("htresarea").appendChild(newPt); - newPt = document.createElement("p") - newPt.appendChild(document.createTextNode("Trait: " + "{{ trait }}")); - document.getElementById("htresarea").appendChild(newPt); - - // chart(data,aes) - // aesthetic : - // y : point's value on y axis Value - // group : how to group data on x axis Strain - // color : color of the point / boxplot AssayNumber - // label : displayed text in toolbox AssayNumber_Strain_Replicate : Value - console.log(chartJson) - - chartJson.sort(sortByProperty("Strain")); - chartJson.forEach(function(d,i){ - if (strn.indexOf(d['Strain']) == -1) { - strn.push(d['Strain']);} - }); - maxP = ((strn.length % 25) > 0) ? parseInt(strn.length / 25)+1 : parseInt(strn.length / 25); - - loadPlot(1); - - } else { - newPt.appendChild(document.createTextNode("Chart data is empty!")); - } - }) + if (chartJson.length != 0){ + var node= document.getElementById("svgchartarea"); + $("#svgchartarea").html("") + //node.querySelectorAll('*').forEach(n => n.remove()); + newPt = document.createElement("p") + newPt.innerHTML = "Broad-sense heritability ( H2 ) = {{ (result['H2']*100)|round(2) }}% (range {{ (result['ci_l']*100)|round(2) }}% to {{ (result['ci_r']*100)|round(2) }}%)"; + document.getElementById("htresarea").appendChild(newPt); + newPt = document.createElement("p") + newPt.appendChild(document.createTextNode("Trait: " + "{{ trait }}")); + document.getElementById("htresarea").appendChild(newPt); + + // chart(data,aes) + // aesthetic : + // y : point's value on y axis Value + // group : how to group data on x axis Strain + // color : color of the point / boxplot AssayNumber + // label : displayed text in toolbox AssayNumber_Strain_Replicate : Value + console.log(chartJson) + chartJson.sort(sortByProperty("Strain")); + chartJson.forEach(function(d,i){ + if (strn.indexOf(d['Strain']) == -1) { + strn.push(d['Strain']);} + }); + maxP = ((strn.length % 25) > 0) ? parseInt(strn.length / 25)+1 : parseInt(strn.length / 25); + + loadPlot(1); + } else { + newPt.appendChild(document.createTextNode("Chart data is empty!")); + } - $( document ).ready(function() { - $("a[href^='http://'], a[href^='https://'], a[href$='pdf']").attr("target","_blank"); - }); + $("a[href^='http://'], a[href^='https://'], a[href$='pdf']").attr("target","_blank"); + + }) -{% endif %} +{% endif %} + {% endblock %} \ No newline at end of file diff --git a/base/templates/tools/indel_primer.html b/base/templates/tools/indel_primer.html index d8a0c49c..440d6e45 100644 --- a/base/templates/tools/indel_primer.html +++ b/base/templates/tools/indel_primer.html @@ -11,45 +11,73 @@ {% block content %} {% from "macros.html" import render_field %} -
-
-

- This web-based tool is designed to compare any two wild C. elegans strains for insertion-deletion (indel) variants that can be genotyped using PCR. - These molecular markers can be used to follow each respective genetic background in crosses. -

- -

- Enter a specific genomic region. The browser will show indel variants between these two strains. - The table will show regions to search for primers. Primers might not be found flanking some indel sites. - Click on additional indel sites to find one that has good quality primers. - For each of these indels, you can search for primers to genotype them in the two wild strains using differential PCR product sizes. - Primers are designed to avoid natural variants in either strain to ensure that the PCR works in both genetic backgrounds. -

- -

- The browser also shows divergent regions, where the reference genome and some wild isolates have sequences with many variants. - These regions should be avoided because indel calls are less reliable and high levels of variation will make primer searches more error-prone. -

-
-
+
+
+
+

+ This web-based tool is designed to compare any two wild C. elegans strains for insertion-deletion (indel) variants that can be genotyped using PCR. + These molecular markers can be used to follow each respective genetic background in crosses. +

+

+ Enter a specific genomic region. The browser will show indel variants between your two chosen strains. + The table will show regions to search for primers. Primers might not be found flanking some indel sites. + Click on additional indel sites to find one that has good quality primers. + For each of these indels, you can search for primers to genotype them between your two chosen strains using differential PCR product sizes. + Primers are designed to avoid natural variants in either strain to ensure that the PCR works in both genetic backgrounds. +

+

+ The browser also shows divergent regions, where the reference genome and some wild isolates have sequences with many variants. + These regions should be avoided because indel calls are less reliable and high levels of variation will make primer searches more error-prone. +

+
{# /col-lg-10 #} +
{# /row #} + + {# /row #} + + +
{{ form.csrf_token }} -
{{ render_field(form.strain_1) }}
-
{{ render_field(form.strain_2) }}
-
{{ render_field(form.chromosome) }}
-
{{ render_field(form.start, placeholder="2,028,824") }}
-
{{ render_field(form.stop, placeholder="2,029,217") }}
-
+
{{ render_field(form.strain_1) }}
+
{{ render_field(form.strain_2) }}
+
{{ render_field(form.chromosome) }}
+
{{ render_field(form.start, placeholder="2,028,824") }}
+
{{ render_field(form.stop, placeholder="2,029,217") }}
+

- +
-
-
-
-
-
+ +
{# /container #} + + + +
+ +
+
+
+
{# /col #} +
{# /row #} + +
+
+
+
{# /col #} +
{# /row #} + +
{# /container-fluid #} + + {% endblock %} @@ -65,8 +93,8 @@ showNavigation: true, showKaryo: false, reference: { - id: "WS245", - fastaURL: "//storage.googleapis.com/elegansvariation.org/browser_tracks/c_elegans.PRJNA13758.WS245.genomic.fa", + id: "WS276", + fastaURL: "//storage.googleapis.com/elegansvariation.org/browser_tracks/c_elegans.PRJNA13758.WS276.genomic.fa", }, }; @@ -81,9 +109,11 @@ } var browser +var results var query_running = false var loaded = false var table + $(document).ready(function(){ let COLORS = { @@ -91,6 +121,67 @@ DEL: 'red' } + var form = document.getElementById('form-submit'); + + form.addEventListener('change', function() { + if (loaded) { + set_position(); + } + }); + + generate_data_table = function() { + table = $("#results").DataTable({ + data: results, + paging: false, + ordering: true, + searching: false, + order: [[1, "asc"]], + columnDefs: [ + { targets: [0], orderData: [1,2] } + ], + columns: [ + { data: "site", title: "Site" }, + { data: "START", title: "Start", visible: false }, + { data: "END", title: "End", visible: false }, + { data: "STRAIN", title: "Strain" }, + { data: "generate_primers", renderer: "html" } + ] + }); + } + + load_tracks = function() { + // Load tracks + rowset = {} + rowset[$("#strain_1").val()] = [] + rowset[$("#strain_2").val()] = [] + tracks = [] + + // Restructure results by strain + for(idx in results) { + row = results[idx] + rowset[row["STRAIN"]].push(row) + } + for(strain in rowset) { + track = rowset[strain] + items = _.map(track, function(row) { + result = { + chr: row["CHROM"], + start: row["START"], + end: row["END"], + name: `${row["site"]}`, + color: COLORS[row["SVTYPE"]], + } + return(result) + }) + igvTrack = { + name: strain, + type: "annotation", + features: items, + } + browser.loadTrack(igvTrack) + } + } + set_position = function() { // Sets browser position based on input browser = igv.getBrowser() @@ -134,107 +225,59 @@ } else { if (loaded == false) { // Create genome browser - browser = igv.createBrowser(browserDiv, options) - .then(function(browser) { - browser.loadTrack(divergent_track_summary) - set_position() - browser.on('locuschange', function(reference_frame, label) { - $("#chromosome").val(reference_frame["chr"]); - $("#start").val(reference_frame["start"]); - $("#stop").val(reference_frame["end"]); - setTimeout(submitForm, 500) - }) - }) - - table = $("#results").DataTable({ - data: results, - paging: false, - ordering: true, - searching: false, - order: [[1, "asc"]], - columnDefs: [ - { targets: [0], orderData: [1,2] } - ], - columns: [ - { data: "site", title: "Site" }, - { data: "START", title: "Start", visible: false }, - { data: "END", title: "End", visible: false }, - { data: "STRAIN", title: "Strain" }, - { data: "generate_primers", renderer: "html" } - ] + browser = igv.createBrowser(browserDiv, options) + .then(function(browser) { + browser.loadTrack(divergent_track_summary); + set_position(); + browser.on('locuschange', function(reference_frame, label) { + $("#chromosome").val(reference_frame["chr"]); + $("#start").val(reference_frame["start"]); + $("#stop").val(reference_frame["end"]); + setTimeout(submitForm, 500) }); - - loaded = true; + generate_data_table(); + load_tracks(browser, results); + loaded = true; + }); } else { table.clear() table.rows.add(results); table.draw(); - // Remove strain tracks - var remove_tracks = _.filter(_.map(browser.findTracks(), x => x.name), y => ["", "Divergent Regions"].includes(y) == false) + load_tracks(); + // Remove strain tracks + var remove_tracks = _.filter(_.map(browser.findTracks(), x => x.name), y => ["", "Divergent Regions"].includes(y) == false) _.each(remove_tracks, x => browser.removeTrackByName(x)) } - - // Load tracks - rowset = {} - rowset[$("#strain_1").val()] = [] - rowset[$("#strain_2").val()] = [] - tracks = [] - - // Restructure results by strain - for(idx in results) { - row = results[idx] - rowset[row["STRAIN"]].push(row) - } - for(strain in rowset) { - track = rowset[strain] - items = _.map(track, function(row) { - result = { - chr: row["CHROM"], - start: row["START"], - end: row["END"], - name: `${row["site"]}`, - color: COLORS[row["SVTYPE"]], - } - return(result) - }) - igvTrack = { - name: strain, - type: "annotation", - features: items, - } - browser.loadTrack(igvTrack) - } - - // Activate primer generation links - $(".generate-primers").on("click", function(e) { - console.log("Submitted") - - $(this).text("Submitted") - - var data = { - site: $(this).attr("site"), - strain_1: $("#strain_1").val(), - strain_2: $("#strain_2").val(), - size: $(this).attr("size"), - svtype: $(this).attr("svtype") - } - $.ajax({ - type: "POST", - url: "{{ url_for('indel_primer.submit_indel_primer') }}", - data: JSON.stringify(data), - contentType: "application/json; charset=utf-8", - dataType: 'json', - success:function(result){ - window.open(`indel_primer/result/${result.data_hash}`, "_blank") - } - }) - }); + + // Activate primer generation links + $(".generate-primers").on("click", function(e) { + console.log("Submitted") + $(this).text("Submitted") + + var data = { + site: $(this).attr("site"), + strain_1: $("#strain_1").val(), + strain_2: $("#strain_2").val(), + size: $(this).attr("size"), + svtype: $(this).attr("svtype") + } + $.ajax({ + type: "POST", + url: "{{ url_for('indel_primer.submit_indel_primer') }}", + data: JSON.stringify(data), + contentType: "application/json; charset=utf-8", + dataType: 'json', + success:function(result){ + window.open(`indel_primer/result/${result.id}`, "_blank") + } + }) + }); } - query_running = false; + query_running = false; } - }); + }); }); diff --git a/base/templates/tools/indel_primer_results.html b/base/templates/tools/indel_primer_results.html index 7d62afa5..00bc8687 100644 --- a/base/templates/tools/indel_primer_results.html +++ b/base/templates/tools/indel_primer_results.html @@ -69,19 +69,17 @@ {% if data and ready %} {% if empty %} -
+
-

No Results

-

- Unfortunately, no primers could be found for this site. -

+

No Results

+ Unfortunately, no primers could be found for this site.
{# /col-md-4 #}
{# /row #} {% else %}
- PDF - TSV + PDF + TSV
{# /row #} diff --git a/base/templates/tools/ip_result_list.html b/base/templates/tools/ip_result_list.html new file mode 100644 index 00000000..ed2a75f6 --- /dev/null +++ b/base/templates/tools/ip_result_list.html @@ -0,0 +1,112 @@ +{% extends "_layouts/default.html" %} + +{% block custom_head %} + + +{% endblock %} + +{% block style %} + +{% endblock %} + +{% block content %} + +{% from "macros.html" import render_dataTable_top_menu %} +{{ render_dataTable_top_menu() }} + +
+
+ + + + + + + + + + + + + + {% for item in items %} + + {% if item %} + + + + {% if item.empty %} + + {% else %} + + {% endif %} + + {% endif %} + + + {% endfor %} + + +
Site Strain 1 Strain 2 Empty Status Date
{{ item.site }} {{ item.strain1 }} {{ item.strain2 }} + {% if item.status == 'COMPLETE' %} + + {{ item.status }} + + {% else %} + {{ item.status }} + {% endif %} + {{ item.created_on|date_format }}
+ +
+
+ + +{% endblock %} + +{% block script %} + + + + +{% endblock %} \ No newline at end of file diff --git a/base/templates/tools/tools.html b/base/templates/tools/tools.html index ab06cdbb..9a7e4f80 100644 --- a/base/templates/tools/tools.html +++ b/base/templates/tools/tools.html @@ -5,26 +5,34 @@ {% endblock %} {% block content %} -
-
-
- -
-

Pairwise Indel Finder

-

Design primers for regions with indels.

-
-
-
-
-
- -
-

Heritability Calculator

-

Calculate broad-sense heritability.

-
-
-
-
-{% endblock %} + {# /row #} +{% endblock %} diff --git a/base/templates/user.html b/base/templates/user.html deleted file mode 100644 index c9b28d7f..00000000 --- a/base/templates/user.html +++ /dev/null @@ -1,49 +0,0 @@ -{% extends "_layouts/default.html" %} - -{% block content %} -
-
- -

Submitted Mappings

- - - - - - - - - - - - - - - - {% for report_group, traits in user_obj.reports().items() %} - {% set report = list(traits)[0] %} - - - - - - - - - - {% endfor %} -
#Report[ Status ] TraitsReport VersionData ReleaseAvailabilityDescriptionSubmitted
{{ loop.index }}{{ report.report_slug }}{% for trait in list(traits) %} - {{ '[ {:20}]'.format(trait.status) }} - - {% if trait.is_significant %} - {{ trait.trait_name }} - {% else %} - {{ trait.trait_name }} - {% endif %} - -
- {% endfor %}
{{ report.REPORT_VERSION }}{{ report.DATASET_RELEASE }}{% if report.is_public %}Public{% else %}Private{% endif %}{{ report.description }}{{ report['created_on'].strftime("%Y-%m-%d %H:%m:%S")|safe }} -
-
-
-{% endblock %} \ No newline at end of file diff --git a/base/templates/user/profile.html b/base/templates/user/profile.html new file mode 100644 index 00000000..41f08e57 --- /dev/null +++ b/base/templates/user/profile.html @@ -0,0 +1,64 @@ +{% extends "_layouts/default.html" %} +{% from "macros.html" import render_field %} + +{% block content %} + +
+
+
+ + + + + + + + +
+ +
+ + + + + + + + + + +
Username{{ user.username }}
Full Name{{ user.full_name }}
Email{{ user.email }}
Verified{{ user.verified_email }}
Password************
Roles{{ user.roles }}
Registered{{ user.created_on }}
Last Modified{{ user.modified_on }}
Last Login{{ user.last_login }}
+ +
+
{# /col-md-3 >#} +
+ +
+
+ +
+ +
{# Tab Set #} + + +
+ + + + +{% endblock %} diff --git a/base/templates/user/register.html b/base/templates/user/register.html new file mode 100644 index 00000000..b1e6d3fc --- /dev/null +++ b/base/templates/user/register.html @@ -0,0 +1,26 @@ +{% extends "_layouts/default.html" %} +{% block content %} +{% from "macros.html" import render_field %} + +
+
+
+
+
Register
+
+
+ {{ form.csrf_token }} + {{ render_field(form.username) }} + {{ render_field(form.full_name) }} + {{ render_field(form.email) }} + {{ render_field(form.password) }} + {{ render_field(form.confirm_password) }} + {{ form.recaptcha }} +
+ +
+
+
+
+
+{% endblock %} \ No newline at end of file diff --git a/base/templates/user/update.html b/base/templates/user/update.html new file mode 100644 index 00000000..4ac55ec8 --- /dev/null +++ b/base/templates/user/update.html @@ -0,0 +1,33 @@ +{% extends "_layouts/default.html" %} +{% from "macros.html" import render_field %} + +{% block content %} + +
+
+
+
+
+ {{ form.csrf_token }} + + {{ render_field(form.full_name) }} + {{ render_field(form.email) }} + {{ render_field(form.password) }} + {{ render_field(form.confirm_password) }} +
+
+ +
+
+
+
+ +
+
+ +
+ +{% endblock %} diff --git a/base/templates/vbrowser.html b/base/templates/vbrowser.html new file mode 100644 index 00000000..769fa0fc --- /dev/null +++ b/base/templates/vbrowser.html @@ -0,0 +1,677 @@ +{% extends "_layouts/default.html" %} + + +{% block custom_head %} + + + + +{% endblock %} + + +{% block style %} + +{% endblock %} + +{% block content %} + +
+ +
+ {# Gene Search Block #} +
+ +

+
+
+ +
+
+ Search by WBGeneID, alphanumeric name (F37A4.8), or gene name (isw-1) + + + + + + + + + + + + + +
{# /col-lg-4 #} + + {# Interval Search Block #} +
+

+
+
+ +
+ Search using the format [chromosome:START-STOP] +
+ +
+
+
+ +
+
{# /col-lg-4 #} + + + {# Column select list block #} +
+

Columns

+ +
+
+
+
+
+ + +
+
+ {% for col in columns %} +
+ + +
+ {% endfor %} +
+
+
+
{# /col-list #} +
{# /col-md-2 #} + + {# strain select list block #} +
+

Strains

+ +
+ + + +
+
+
+
+ + +
+
+ {% for strain in strain_listing %} +
+ + +
+ {% endfor %} +
+
+
+
{# /strain-list #} +
{# /col-md-2 #} + +
{# /row #} + +
+
+ +

Variant Impact

+ +
+
+ +
+
+ + +
+
{# /form group #} + +
+
+ + +
+
{# /form group #} + +
{#/ row #} + +
+ +
{# /col #} + +
{# /row #} + + +
{# /container #} + +
+ + + + +
{# /row #} + + +{% endblock %} + +{% block script %} + +{% endblock %} + diff --git a/base/utils/cache.py b/base/utils/cache.py index 4932ac8a..7a7260b6 100644 --- a/base/utils/cache.py +++ b/base/utils/cache.py @@ -9,10 +9,11 @@ import pickle import base64 from cachelib import BaseCache -from base.utils.gcloud import get_item, store_item +from base.utils.gcloud import get_item, store_item, delete_items_by_query from time import time from base.config import config +kind = config['DS_PREFIX'] + 'cache' class DatastoreCache(BaseCache): def __init__(self, default_timeout=500): @@ -23,14 +24,14 @@ def set(self, key, value, timeout=None): expires = time() + timeout try: value = base64.b64encode(pickle.dumps(value)) - store_item('cache', self.key_prefix + "/" + key, value=value, expires=expires, exclude_from_indexes=['value', 'expires']) + store_item(kind, self.key_prefix + "/" + key, value=value, expires=expires, exclude_from_indexes=['value']) return True except: return False def get(self, key): try: - item = get_item('cache', self.key_prefix + "/" + key) + item = get_item(kind, self.key_prefix + "/" + key) value = item.get('value') value = pickle.loads(base64.b64decode(value)) expires = item.get('expires') @@ -46,14 +47,14 @@ def get_dict(self, *keys): results = {} for key in keys: try: - results.update({key: get_item('cache', key)}) + results.update({key: get_item(kind, key)}) except AttributeError: pass return results def has(self, key): try: - item = get_item('cache', key) + item = get_item(kind, key) expires = item.get('expires') if expires == 0 or expires > time(): return True @@ -62,8 +63,15 @@ def has(self, key): def set_many(self, mapping, timeout): for k, v in mapping.items(): - store_item('cache', k, value=v) + store_item(kind, k, value=v) def datastore_cache(app, config, args, kwargs): return DatastoreCache(*args, **kwargs) + + +def delete_expired_cache(): + epoch_time = int(time()) + filters = [("expires", "<", epoch_time)] + num_deleted = delete_items_by_query(kind, filters=filters, projection=['expires']) + return num_deleted \ No newline at end of file diff --git a/base/utils/data_utils.py b/base/utils/data_utils.py index 72587c60..6b103126 100644 --- a/base/utils/data_utils.py +++ b/base/utils/data_utils.py @@ -12,19 +12,17 @@ import os import uuid import zipfile -from collections import Counter -from datetime import datetime as dt - import pytz import yaml + +from collections import Counter +from datetime import datetime as dt from flask import g, json from gcloud import storage from logzero import logger - from concurrent.futures import ThreadPoolExecutor from typing import Iterable from urllib.request import urlretrieve - from rich.progress import ( BarColumn, DownloadColumn, @@ -35,6 +33,7 @@ TaskID, ) +from base.constants import GOOGLE_CLOUD_BUCKET, GOOGLE_CLOUD_PROJECT_ID def flatten_dict(d, max_depth=1): def expand(key, value): @@ -57,14 +56,13 @@ def load_yaml(yaml_file): def get_gs(): """ - Gets the elegansvariation.org google storage bucket which + Gets the google storage bucket which stores static assets and report data. """ if not hasattr(g, 'gs'): - g.gs = storage.Client(project='andersen-lab').get_bucket('elegansvariation.org') + g.gs = storage.Client(project=GOOGLE_CLOUD_PROJECT_ID).get_bucket(GOOGLE_CLOUD_BUCKET) return g.gs - class json_encoder(json.JSONEncoder): def default(self, o): if hasattr(o, "to_json"): @@ -102,6 +100,25 @@ def hash_it(object, length=10): return hashlib.sha1(str(object).encode('utf-8')).hexdigest()[0:length] +def hash_file_contents(filename, length=10): + ''' Computes the sha1 hash of a file's contents ''' + logger.debug(filename) + BLOCKSIZE = 65536 + hasher = hashlib.sha1() + with open(filename, 'rb') as afile: + buf = afile.read(BLOCKSIZE) + while len(buf) > 0: + hasher.update(buf) + buf = afile.read(BLOCKSIZE) + + return hasher.hexdigest()[0:length] + + +def hash_password(password): + h = hashlib.md5(password.encode()) + return h.hexdigest() + + def chicago_date(): return dt.now(pytz.timezone("America/Chicago")).date().isoformat() diff --git a/base/utils/decorators.py b/base/utils/decorators.py index 362db9de..5e86efa0 100644 --- a/base/utils/decorators.py +++ b/base/utils/decorators.py @@ -1,6 +1,9 @@ +import arrow +from rich.console import Console from functools import wraps from flask import request, jsonify +console = Console() def jsonify_request(func): """ @@ -21,3 +24,13 @@ def jsonify_the_request(*args, **kwargs): return jsonify(func(*args, **kwargs)) return func(*args, **kwargs) return jsonify_the_request + + +def timeit(method): + def timed(*args, **kw): + start = arrow.utcnow() + result = method(*args, **kw) + diff = int((arrow.utcnow() - start).total_seconds()) + console.log(f"{diff} seconds") + return result + return timed \ No newline at end of file diff --git a/base/utils/gcloud.py b/base/utils/gcloud.py index 2d7988b7..dd93cbf0 100644 --- a/base/utils/gcloud.py +++ b/base/utils/gcloud.py @@ -1,11 +1,17 @@ import json +import datetime +import googleapiclient.discovery + from flask import g -from base.utils.data_utils import dump_json from gcloud import datastore, storage from logzero import logger -import googleapiclient.discovery from google.oauth2 import service_account +from google.cloud import tasks_v2 +from google.protobuf import timestamp_pb2 +import requests +from base.constants import GOOGLE_CLOUD_BUCKET, GOOGLE_CLOUD_PROJECT_ID, GOOGLE_CLOUD_LOCATION +from base.utils.data_utils import dump_json def google_datastore(open=False): """ @@ -14,12 +20,12 @@ def google_datastore(open=False): Args: open - Return the client without storing it in the g object. """ - client = datastore.Client(project='andersen-lab') - if open: - return client - if not hasattr(g, 'ds'): - g.ds = client - return g.ds + if (g and open == False): + if not hasattr(g, 'ds'): + g.ds = datastore.Client(project=GOOGLE_CLOUD_PROJECT_ID) + return g.ds + + return datastore.Client(project=GOOGLE_CLOUD_PROJECT_ID) def delete_item(item): @@ -29,6 +35,42 @@ def delete_item(item): batch.commit() +def delete_by_ref(kind, id): + ds = google_datastore() + key = ds.key(kind, id) + batch = ds.batch() + batch.delete(key) + batch.commit() + + +def delete_items_by_query(kind, filters=None, projection=()): + """ + Deletes all items that are returned by a query. + Items are deleted in page-sized batches as the results are being returned + Returns the number of items deleted + """ + # filters: + # [("var_name", "=", 1)] + ds = google_datastore() + query = ds.query(kind=kind, projection=projection) + deleted_items = 0 + if filters: + for var, op, val in filters: + query.add_filter(var, op, val) + + query = query.fetch() + while True: + data, more, cursor = query.next_page() + keys = [] + for entity in data: + keys.append(entity.key) + ds.delete_multi(keys) + deleted_items += len(keys) + if more is False: + break + return deleted_items + + def store_item(kind, name, **kwargs): ds = google_datastore() try: @@ -48,7 +90,7 @@ def store_item(kind, name, **kwargs): ds.put(m) -def query_item(kind, filters=None, projection=(), order=None, limit=None): +def query_item(kind, filters=None, projection=(), order=None, limit=None, keys_only=False): """ Filter items from google datastore using a query """ @@ -56,6 +98,8 @@ def query_item(kind, filters=None, projection=(), order=None, limit=None): # [("var_name", "=", 1)] ds = google_datastore() query = ds.query(kind=kind, projection=projection) + if keys_only: + query.keys_only() if order: query.order = order if filters: @@ -101,12 +145,13 @@ def google_storage(open=False): Args: open - Return the client without storing it in the g object. """ - client = storage.Client(project='andersen-lab') - if open: - return client - if not hasattr(g, 'gs'): - g.gs = client - return g.gs + if (g and open == False): + if not hasattr(g, 'gs'): + g.gs = storage.Client.from_service_account_json('env_config/client-secret.json') + return g.gs + + return storage.Client.from_service_account_json('env_config/client-secret.json') + def get_cendr_bucket(): @@ -114,24 +159,30 @@ def get_cendr_bucket(): Returns the CeNDR bucket """ gs = google_storage() - return gs.get_bucket("elegansvariation.org") + return gs.get_bucket(GOOGLE_CLOUD_BUCKET) -def upload_file(blob, obj, as_string = False): +def upload_file(blob, obj, as_string = False, as_file_obj = False): """ - Upload a file to the CeNDR bucket + Upload a file to the CeNDR bucket - Args: - blob - The name of the blob (server-side) - fname - The filename to upload (client-side) + Args: + blob - The name of the blob (server-side) + fname - The filename to upload (client-side) """ logger.info(f"Uploading: {blob} --> {obj}") cendr_bucket = get_cendr_bucket() blob = cendr_bucket.blob(blob) + if as_string: - blob.upload_from_string(obj) - else: - blob.upload_from_filename(obj) + blob.upload_from_string(obj) + return blob + + if as_file_obj: + blob.upload_from_file(obj) + return blob + + blob.upload_from_filename(obj) return blob @@ -158,15 +209,24 @@ def check_blob(fname): return cendr_bucket.get_blob(fname) +def list_files(prefix): + """ + Lists files with a given prefix + """ + cendr_bucket = get_cendr_bucket() + items = cendr_bucket.list_blobs(prefix=prefix) + return list(items) + + + def list_release_files(prefix): """ Lists files with a given prefix from the current dataset release """ - cendr_bucket = get_cendr_bucket() items = cendr_bucket.list_blobs(prefix=prefix) - return list([f"https://storage.googleapis.com/elegansvariation.org/{x.name}" for x in items]) + return list([f"https://storage.googleapis.com/{GOOGLE_CLOUD_BUCKET}/{x.name}" for x in items]) def google_analytics(): @@ -177,3 +237,97 @@ def google_analytics(): scopes=['https://www.googleapis.com/auth/analytics.readonly']) return googleapiclient.discovery.build('analyticsreporting', 'v4', credentials=credentials) + +def generate_download_signed_url_v4(blob_path, expiration=datetime.timedelta(minutes=15)): + """Generates a v4 signed URL for downloading a blob. """ + cendr_bucket = get_cendr_bucket() + + try: + blob = cendr_bucket.blob(blob_path) + url = blob.generate_signed_url( + expiration=expiration, + method="GET" + ) + return url + + except Exception as inst: + print(type(inst)) + print(inst.args) + print(inst) + return None + + +def generate_upload_signed_url_v4(blob_name, content_type="application/octet-stream"): + """Generates a v4 signed URL for uploading a blob using HTTP PUT. """ + cendr_bucket = get_cendr_bucket() + try: + blob = cendr_bucket.blob(blob_name) + url = blob.generate_signed_url( + expiration=datetime.timedelta(minutes=15), + method="PUT", + content_type=content_type + ) + except: + return None + return url + + +def google_task(open=False): + """ + Fetch google datastore credentials + + Args: + open - Return the client without storing it in the g object. + """ + client = tasks_v2.CloudTasksClient() + if open: + return client + if g: + if not hasattr(g, 'tc'): + g.tc = client + return g.tc + return client + + +def add_task(queue, url, payload, delay_seconds=None, task_name=None): + client = google_task() + parent = client.queue_path(GOOGLE_CLOUD_PROJECT_ID, GOOGLE_CLOUD_LOCATION, queue) + + task = { + "http_request": { + "http_method": tasks_v2.HttpMethod.POST, + "url": url, + } + } + + if payload is not None: + if isinstance(payload, dict): + payload = json.dumps(payload) + task["http_request"]["headers"] = {"Content-type": "application/json"} + + converted_payload = payload.encode() + task["http_request"]["body"] = converted_payload + + + if delay_seconds is not None: + # Convert "seconds from now" into an rfc3339 datetime string then into a Timestamp protobuf. + d = datetime.datetime.utcnow() + datetime.timedelta(seconds=delay_seconds) + timestamp = timestamp_pb2.Timestamp() + timestamp.FromDatetime(d) + task["schedule_time"] = timestamp + + if task_name is not None: + task["name"] = f"{parent}/tasks/{task_name}" + + try: + response = client.create_task(request={"parent": parent, "task": task}) + logger.debug("Created task {}".format(response.name)) + except Exception as e: + logger.error(f"Failed to create task {e}") + eType = str(type(e).__name__) + if eType == 'AlreadyExists': + response = 'SCHEDULED' + else: + response = None + + return response diff --git a/base/utils/jwt_utils.py b/base/utils/jwt_utils.py new file mode 100644 index 00000000..3d4bcf62 --- /dev/null +++ b/base/utils/jwt_utils.py @@ -0,0 +1,90 @@ +from functools import wraps +from flask import (request, + redirect, + abort, + url_for, + session, + make_response) +from flask_jwt_extended import (create_access_token, + create_refresh_token, + set_access_cookies, + set_refresh_cookies, + unset_jwt_cookies, + unset_access_cookies, + get_jwt, + get_jwt_identity, + get_current_user, + verify_jwt_in_request, + jwt_required) + +from base.models import user_ds +from base.extensions import jwt + +def assign_access_refresh_tokens(id, roles, url, refresh=True): + resp = make_response(redirect(url, 302)) + access_token = create_access_token(identity=str(id), additional_claims={'roles': roles}) + set_access_cookies(resp, access_token) + if refresh: + refresh_token = create_refresh_token(identity=str(id)) + set_refresh_cookies(resp, refresh_token) + session['is_logged_in'] = True + session['is_admin'] = ('admin' in roles) + return resp + + +def unset_jwt(): + resp = make_response(redirect('/', 302)) + session["is_logged_in"] = False + session["is_admin"] = False + unset_jwt_cookies(resp) + return resp + + +def admin_required(): + def wrapper(fn): + @wraps(fn) + def decorator(*args, **kwargs): + verify_jwt_in_request() + claims = get_jwt() + if claims["roles"] and ('admin' in claims["roles"]): + return fn(*args, **kwargs) + else: + return abort(401) + + return decorator + return wrapper + + +@jwt.user_identity_loader +def user_identity_lookup(sub): + return sub + + +@jwt.user_lookup_loader +def user_lookup_callback(_jwt_header, jwt_data): + id = jwt_data["sub"] + return user_ds(id) + + +@jwt.unauthorized_loader +def unauthorized_callback(reason): + return redirect(url_for('auth.choose_login')), 302 + + +@jwt.invalid_token_loader +def invalid_token_callback(callback): + # Invalid Fresh/Non-Fresh Access token in auth header + resp = make_response(redirect(url_for('auth.choose_login'))) + session["is_logged_in"] = False + session["is_admin"] = False + unset_jwt_cookies(resp) + return resp, 302 + + +@jwt.expired_token_loader +def expired_token_callback(_jwt_header, jwt_data): + # Expired auth header + session['login_referrer'] = request.base_url + resp = make_response(redirect(url_for('auth.refresh'))) + unset_access_cookies(resp) + return resp, 302 diff --git a/base/views/about.py b/base/views/about.py index c3577cad..0ea5e8f7 100644 --- a/base/views/about.py +++ b/base/views/about.py @@ -15,11 +15,13 @@ from base.config import config from base.models import Strain from base.forms import donation_form +from base.extensions import cache from base.views.api.api_strain import get_isotypes from base.utils.google_sheets import add_to_order_ws from base.utils.email import send_email, DONATE_SUBMISSION_EMAIL from base.utils.data_utils import load_yaml, chicago_date, hash_it from base.utils.plots import time_series_plot +from base.utils.jwt_utils import jwt_required, get_current_user about_bp = Blueprint('about', __name__, @@ -27,25 +29,30 @@ @about_bp.route('/') +@cache.memoize(50) def about(): """ About us Page - Gives an overview of CeNDR """ - title = "About" - - strain_listing = get_isotypes(known_origin=True) + title = "About CeNDR" + disable_parent_breadcrumb = True + isotypes = get_isotypes(known_origin=True) + strain_listing = [s.to_json() for s in isotypes] return render_template('about/about.html', **locals()) @about_bp.route('/getting_started/') +@cache.memoize(50) def getting_started(): """ Getting Started - provides information on how to get started with CeNDR """ - VARS = {"title": "Getting Started", - "strain_listing": get_isotypes(known_origin=True)} - return render_template('about/getting_started.html', **VARS) + title = "Getting Started" + isotypes = get_isotypes(known_origin=True) + strain_listing = [s.to_json() for s in isotypes] + disable_parent_breadcrumb = True + return render_template('about/getting_started.html', **locals()) @about_bp.route('/committee/') @@ -54,12 +61,14 @@ def committee(): Scientific Panel Page """ title = "Scientific Advisory Committee" + disable_parent_breadcrumb = True committee_data = load_yaml("advisory-committee.yaml") return render_template('about/committee.html', **locals()) @about_bp.route('/collaborators/') def collaborators(): title = "Collaborators" + disable_parent_breadcrumb = True collaborator_data = load_yaml("collaborators.yaml") return render_template('about/collaborators.html', **locals()) @@ -70,21 +79,25 @@ def staff(): Staff Page """ title = "Staff" + disable_parent_breadcrumb = True staff_data = load_yaml("staff.yaml") return render_template('about/staff.html', **locals()) @about_bp.route('/donate/', methods=['GET', 'POST']) +@jwt_required(optional=True) def donate(): """ Process donation """ title = "Donate" + disable_parent_breadcrumb = True form = donation_form(request.form) # Autofill email - if session.get('user') and not form.email.data: - form.email.data = session.get('user')['user_email'] + user = get_current_user() + if user and hasattr(user, 'email') and not form.email.data: + form.email.data = user.email if form.validate_on_submit(): # order_number is generated as a unique string @@ -110,6 +123,7 @@ def donate(): @about_bp.route('/funding/') def funding(): title = "Funding" + disable_parent_breadcrumb = True funding_set = load_yaml('funding.yaml') return render_template('about/funding.html', **locals()) @@ -165,6 +179,7 @@ def statistics(): n_users = get_unique_users() VARS = {'title': title, + 'disable_parent_breadcrumb': True, 'strain_collection_plot': strain_collection_plot, 'report_summary_plot': report_summary_plot, 'weekly_visits_plot': weekly_visits_plot, @@ -183,8 +198,12 @@ def publications(): List of publications that have referenced CeNDR """ title = "Publications" - req = requests.get( - "https://docs.google.com/spreadsheets/d/1ghJG6E_9YPsHu0H3C9s_yg_-EAjTUYBbO15c3RuePIs/export?format=csv&id=1ghJG6E_9YPsHu0H3C9s_yg_-EAjTUYBbO15c3RuePIs&gid=0") + disable_parent_breadcrumb = True + csv_prefix = config['GOOGLE_SHEET_PREFIX'] + sheet_id = config['CENDR_PUBLICATIONS_STRAIN_SHEET'] + csv_export_suffix = 'export?format=csv&id={}&gid=0'.format(sheet_id) + url = '{}/{}/{}'.format(csv_prefix, sheet_id, csv_export_suffix) + req = requests.get(url) df = pd.read_csv(StringIO(req.content.decode("UTF-8"))) df['pmid'] = df['pmid'].astype(int) df = df.sort_values(by='pmid', ascending=False) diff --git a/base/views/admin/__init__.py b/base/views/admin/__init__.py new file mode 100644 index 00000000..a5e9d041 --- /dev/null +++ b/base/views/admin/__init__.py @@ -0,0 +1,4 @@ +from .admin import admin_bp +from .data import data_admin_bp +from .users import users_bp + diff --git a/base/views/admin/admin.py b/base/views/admin/admin.py new file mode 100644 index 00000000..90e8fe2c --- /dev/null +++ b/base/views/admin/admin.py @@ -0,0 +1,38 @@ +from flask import (render_template, + Blueprint) +from base.config import config +from base.utils.jwt_utils import admin_required + +# Admin blueprint +admin_bp = Blueprint('admin', + __name__, + template_folder='admin') + + +@admin_bp.route('/') +@admin_required() +def admin(): + VARS = {"title": "Admin"} + return render_template('admin/admin.html', **VARS) + + +@admin_bp.route('/strain_sheet/') +@admin_required() +def admin_strain_sheet(): + title = "Andersen Lab Strain Sheet" + id = config['ANDERSEN_LAB_STRAIN_SHEET'] + prefix = config['GOOGLE_SHEET_PREFIX'] + sheet_url = '{}/{}'.format(prefix, id) + return render_template('admin/google_sheet.html', **locals()) + + + +@admin_bp.route('/publications/') +@admin_required() +def admin_publications_sheet(): + title = "CeNDR Publications Sheet" + id = config['CENDR_PUBLICATIONS_STRAIN_SHEET'] + prefix = config['GOOGLE_SHEET_PREFIX'] + sheet_url = '{}/{}'.format(prefix, id) + return render_template('admin/google_sheet.html', **locals()) + diff --git a/base/views/admin/data.py b/base/views/admin/data.py new file mode 100644 index 00000000..7622bb58 --- /dev/null +++ b/base/views/admin/data.py @@ -0,0 +1,247 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +Author: Sam Wachspress + +Data Release administration + +""" +import arrow + +from flask import abort, flash, request, render_template, Blueprint, redirect, url_for + +from base.constants import REPORT_V2_FILE_LIST, REPORT_V1_FILE_LIST, REPORT_VERSIONS +from base.config import config +from base.models import data_report_ds +from base.forms import data_report_form +from base.utils.jwt_utils import get_jwt, admin_required, get_current_user +from base.utils.data_utils import unique_id +from base.utils.gcloud import delete_by_ref + + +data_admin_bp = Blueprint('data_admin', + __name__, + template_folder='admin') + + +cloud_config = config['cloud_config'] + +@data_admin_bp.route('/', methods=["GET"]) +@data_admin_bp.route('/', methods=["GET"]) +@admin_required() +def data_admin(id=None): + if id is None: + title = 'All' + items = data_report_ds().get_all() + else: + return redirect(url_for('data_admin.data_edit', id=id)) + + return render_template('admin/data_list.html', **locals()) + + +@data_admin_bp.route('/create/', methods=["GET"]) +@admin_required() +def data_create(id=None): + user = get_current_user() + id = unique_id() + report = data_report_ds(id) + report.init() + report.save() + return redirect(url_for('data_admin.data_edit', id=id)) + + +@data_admin_bp.route('//edit/', methods=["GET", "POST"]) +@admin_required() +def data_edit(id=None): + if id is None: + flash('Error: No Report ID Provided!') + abort(500) + + title = "Edit" + jwt_csrf_token = (get_jwt() or {}).get("csrf") + form = data_report_form(request.form) + + report = data_report_ds(id) + if not report._exists: + flash(f"Error: Report {id} does not exist!") + abort(500) + + # Get content of cloud bucket + report_dirs = [''] + data_report_ds.list_bucket_dirs() + form.dataset.choices = [(f, f) for f in report_dirs] + form.version.choices = [(v, v) for v in REPORT_VERSIONS] + + if request.method == 'GET': + form.dataset.data = report.dataset if hasattr(report, 'dataset') else '' + form.wormbase.data = report.wormbase if hasattr(report, 'wormbase') else '' + form.version.data = report.version if hasattr(report, 'version') else '' + + if request.method == 'POST' and form.validate(): + # if changes then re-sync + report.dataset = request.form.get('dataset') + report.wormbase = request.form.get('wormbase') + report.version = request.form.get('version') + report.initialized = True + report.save() + return redirect(url_for('data_admin.data_admin')) + return render_template('admin/data_edit.html', **locals()) + + +@data_admin_bp.route('//delete/', methods=["GET"]) +@admin_required() +def data_delete(id=None): + if id is None: + flash('Error: No Report ID Provided!') + abort(500) + + report = data_report_ds(id) + if not report._exists: + flash(f"Error: Report {id} does not exist!") + abort(500) + + if hasattr(report, 'dataset'): + dataset = str(report.dataset) + cloud_config.create_backup() + cloud_config.remove_release(dataset) + cloud_config.remove_release_files(dataset) + props = cloud_config.get_properties() + config.update(props) + delete_by_ref('data-report', id) + + return redirect(url_for('data_admin.data_admin')) + + +@data_admin_bp.route('//sync-report', methods=["GET"]) +@admin_required() +def data_sync_report(id=None): + """ + Fetches static content from a google cloud bucket and copies it locally to serve + """ + if id is None: + flash('Error: No Report ID Provided!') + abort(500) + + report = data_report_ds(id) + if not report._exists or not hasattr(report, 'dataset'): + flash(f"Error: Report {id} does not exist!") + abort(500) + + dataset = report.dataset if hasattr(report, 'dataset') else None + wormbase = report.wormbase if hasattr(report, 'wormbase') else None + version = report.version if hasattr(report, 'version') else None + if dataset is None or wormbase is None or version is None: + flash(f"Error: Report {id} is missing required properties!") + abort(500) + + files = [] + if version == 'v1': + files = REPORT_V1_FILE_LIST + elif version == 'v2': + files = REPORT_V2_FILE_LIST + + result = cloud_config.get_release_files(dataset, files, refresh=True) + if not result is None: + now = arrow.utcnow().datetime + report.report_synced_on = now + report.save() + else: + report.save() + flash(f"Failed to sync report: {id}!") + abort(500) + + return redirect(url_for('data_admin.data_admin')) + + +@data_admin_bp.route('//sync-db', methods=["GET"]) +@admin_required() +def data_sync_db(id=None): + """ + Fetches sqlite db file from google cloud bucket and copies it locally to serve + """ + if id is None: + flash('Error: No Report ID Provided!') + abort(500) + + report = data_report_ds(id) + if not report._exists or not hasattr(report, 'dataset'): + flash(f"Error: Report {id} does not exist!") + abort(500) + + dataset = report.dataset if hasattr(report, 'dataset') else None + wormbase = report.wormbase if hasattr(report, 'wormbase') else None + if dataset is None or wormbase is None: + flash(f"Error: Report {id} is missing required properties!") + abort(500) + + result = cloud_config.get_release_db(dataset, wormbase, refresh=True) + if not result is None: + now = arrow.utcnow().datetime + report.db_synced_on = now + report.save() + else: + report.save() + flash(f"Failed to sync report: {id}!") + abort(500) + + return redirect(url_for('data_admin.data_admin')) + + +@data_admin_bp.route('//hide', methods=["GET"]) +@admin_required() +def data_hide_report(id=None): + """ Updates the cloud config to hide the release """ + if id is None: + flash('Error: No Report ID Provided!') + abort(500) + + report = data_report_ds(id) + if not report._exists or not hasattr(report, 'dataset'): + flash(f"Error: Report {id} does not exist or is missing required properties!") + abort(500) + + # update the config + dataset = report.dataset + cloud_config.create_backup() + cloud_config.remove_release(dataset) + props = cloud_config.get_properties() + config.update(props) + + # update the datastore report object + report.publish = False + report.published_on = '' + report.save() + + return redirect(url_for('data_admin.data_admin')) + + +@data_admin_bp.route('//publish', methods=["GET"]) +@admin_required() +def data_publish_report(id=None): + """ Updates the cloud config to recognize the release """ + if id is None: + flash('Error: No Report ID Provided!') + abort(500) + + report = data_report_ds(id) + if not report._exists: + flash(f"Error: Report {id} does not exist!") + abort(500) + + dataset = report.dataset if hasattr(report, 'dataset') else None + wormbase = report.wormbase if hasattr(report, 'wormbase') else None + version = report.version if hasattr(report, 'version') else None + if dataset is None or wormbase is None or version is None: + flash(f"Error: Report {id} is missing required properties!") + abort(500) + + cloud_config.create_backup() + cloud_config.add_release(dataset, wormbase, version) + props = cloud_config.get_properties() + config.update(props) + + # update the datastore report object + report.publish = True + report.published_on = arrow.utcnow().datetime + report.save() + + return redirect(url_for('data_admin.data_admin')) diff --git a/base/views/admin/users.py b/base/views/admin/users.py new file mode 100644 index 00000000..ee3b247c --- /dev/null +++ b/base/views/admin/users.py @@ -0,0 +1,68 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +Author: Sam Wachspress + +User administration + +""" +import arrow +from flask import request, render_template, Blueprint, redirect, url_for + +from base.models import user_ds +from base.forms import admin_edit_user_form +from base.utils.jwt_utils import jwt_required, get_jwt, admin_required +from base.utils.gcloud import delete_by_ref + + +users_bp = Blueprint('users', + __name__, + template_folder='admin') + + +@users_bp.route('/', methods=["GET"]) +@users_bp.route('/', methods=["GET"]) +@admin_required() +def users(id=None): + if id is None: + title = 'All' + users = user_ds().get_all() + return render_template('admin/users_list.html', **locals()) + else: + return redirect(url_for('users.users_edit'), id=id) + + +@users_bp.route('//edit/', methods=["GET", "POST"]) +@admin_required() +def users_edit(id=None): + if id is None: + # todo: fix redirect + return render_template('500.html'), 500 + + title = "Edit" + jwt_csrf_token = (get_jwt() or {}).get("csrf") + form = admin_edit_user_form(request.form) + user = user_ds(id) + + if request.method == 'GET': + form.roles.data = user.roles if hasattr(user, 'roles') else ['user'] + + if request.method == 'POST' and form.validate(): + user.roles = request.form.getlist('roles') + user.modified = arrow.utcnow().datetime + user.save() + return redirect(url_for('users.users')) + + # todo: fix redirect here + return render_template('admin/users_edit.html', **locals()) + + +@users_bp.route('//delete', methods=["GET"]) +@admin_required() +def users_delete(id=None): + if id is None: + # todo: fix redirect + return render_template('500.html'), 500 + + delete_by_ref('user', id) + return redirect(url_for('users.users')) diff --git a/base/views/api/api_gene.py b/base/views/api/api_gene.py index 7e239201..791b17cc 100644 --- a/base/views/api/api_gene.py +++ b/base/views/api/api_gene.py @@ -1,11 +1,10 @@ -from flask import request +from flask import request, Blueprint from base.models import Homologs, WormbaseGeneSummary from base.utils.decorators import jsonify_request from sqlalchemy import or_, func from base.views.api.api_variant import variant_query from logzero import logger -from flask import Blueprint api_gene_bp = Blueprint('api_gene', __name__, @@ -28,10 +27,10 @@ def query_homolog(query=""): """ query = request.args.get('query') or query query = query.lower() - results = Homologs.query.filter(func.lower(Homologs.homolog_gene)==query) \ + results = Homologs.query.filter((func.lower(Homologs.homolog_gene)).startswith(query)) \ .limit(10) \ .all() - results = [x.unnest() for x in results] + results = [x.unnest().to_json() for x in results] return results @@ -50,16 +49,17 @@ def lookup_gene(query=""): """ query = request.args.get('query') or query + query = str(query).lower() # First identify exact match - result = WormbaseGeneSummary.query.filter(or_(WormbaseGeneSummary.locus == query, - WormbaseGeneSummary.sequence_name == query, - WormbaseGeneSummary.gene_id == query)) \ - .first() + result = WormbaseGeneSummary.query.filter(or_(func.lower(WormbaseGeneSummary.locus) == query, + func.lower(WormbaseGeneSummary.sequence_name) == query, + func.lower(WormbaseGeneSummary.gene_id) == query)) \ + .first() if not result: - result = WormbaseGeneSummary.query.filter(or_(WormbaseGeneSummary.locus.startswith(query), - WormbaseGeneSummary.sequence_name.startswith(query), - WormbaseGeneSummary.gene_id.startswith(query))) \ - .first() + result = WormbaseGeneSummary.query.filter(or_(func.lower(WormbaseGeneSummary.locus).startswith(query), + func.lower(WormbaseGeneSummary.sequence_name).startswith(query), + func.lower(WormbaseGeneSummary.gene_id).startswith(query))) \ + .first() return result @@ -79,11 +79,14 @@ def query_gene(query=""): """ query = request.args.get('query') or query - results = WormbaseGeneSummary.query.filter(or_(WormbaseGeneSummary.locus.startswith(query), - WormbaseGeneSummary.sequence_name.startswith(query), - WormbaseGeneSummary.gene_id.startswith(query))) \ - .limit(10) \ - .all() + query = str(query).lower() + results = WormbaseGeneSummary.query.filter(or_(func.lower(WormbaseGeneSummary.locus).startswith(query), + func.lower(WormbaseGeneSummary.sequence_name).startswith(query), + func.lower(WormbaseGeneSummary.gene_id).startswith(query))) \ + .limit(10) \ + .all() + + results = [x.to_json() for x in results] return results @@ -99,7 +102,7 @@ def combined_search(query=""): results (list): List of dictionaries describing the homolog. """ - return query_gene(query) + query_homolog(query) + return (query_gene(query) + query_homolog(query))[0:10] diff --git a/base/views/api/api_popgen.py b/base/views/api/api_popgen.py index 4ead6b6a..f767b5ac 100644 --- a/base/views/api/api_popgen.py +++ b/base/views/api/api_popgen.py @@ -1,17 +1,18 @@ from flask import jsonify -from base.application import app from subprocess import Popen, PIPE -from base.constants import DATASET_RELEASE + +from base.constants import GOOGLE_CLOUD_BUCKET +from base.config import config +from base.application import app from base.views.api.api_variant import variant_query from base.views.api.api_strain import get_isotypes from base.utils.decorators import jsonify_request -from logzero import logger @app.route('/api/popgen/tajima///') @app.route('/api/popgen/tajima////') @jsonify_request -def tajima(chrom, start, end, release = DATASET_RELEASE): +def tajima(chrom, start, end, release = config['DATASET_RELEASE']): """ Args: chrom @@ -30,7 +31,7 @@ def tajima(chrom, start, end, release = DATASET_RELEASE): # No tajima bedfile exists for 20160408 - so use next version. if release < 20170531: release = 20170531 - url = f"http://storage.googleapis.com/elegansvariation.org/releases/{release}/popgen/WI.{release}.tajima.bed.gz" + url = f"http://storage.googleapis.com/{GOOGLE_CLOUD_BUCKET}/releases/{release}/popgen/WI.{release}.tajima.bed.gz" comm = ['tabix', url, "{chrom}:{start}-{end}".format(**locals())] out, err = Popen(comm, stdout=PIPE, stderr=PIPE).communicate() @@ -46,13 +47,15 @@ def tajima(chrom, start, end, release = DATASET_RELEASE): @app.route('/api/popgen/gt//') @app.route('/api/popgen/gt///') @jsonify_request -def get_allele_geo(chrom, pos, isotypes=None, release = DATASET_RELEASE): +def get_allele_geo(chrom, pos, isotypes=None, release=None): """ Args: chrom pos isotypes """ + if release == None: + release = config['DATASET_RELEASE'] try: variant = variant_query(f"{chrom}:{pos}-{pos+1}", list_all_strains=True, release=release)[0] except IndexError: diff --git a/base/views/api/api_strain.py b/base/views/api/api_strain.py index fc0dbf43..8754f61e 100644 --- a/base/views/api/api_strain.py +++ b/base/views/api/api_strain.py @@ -11,7 +11,7 @@ template_folder='api') -@api_strain_bp.route('/strain/query/') +@api_strain_bp.route('/strains/query/') @jsonify_request def search_strains(query): base_query = Strain.query.filter(Strain.isotype != None) @@ -27,11 +27,11 @@ def search_strains(query): return list([x.to_json() for x in results]) -@api_strain_bp.route('/strain/') -@api_strain_bp.route('/strain/') -@api_strain_bp.route('/strain/isotype/') +@api_strain_bp.route('/strains/') +@api_strain_bp.route('/strains/') +@api_strain_bp.route('/strains/isotype/') @jsonify_request -def query_strains(strain_name=None, isotype_name=None, release=None, all_strain_names=False, resolve_isotype=False, issues=False): +def query_strains(strain_name=None, isotype_name=None, release=None, all_strain_names=False, resolve_isotype=False, issues=False, is_sequenced=False): """ Return the full strain database set @@ -55,6 +55,9 @@ def query_strains(strain_name=None, isotype_name=None, release=None, all_strain_ else: query = query + if is_sequenced is True: + query = query.filter(Strain.sequenced == True) + if issues is False: query = query.filter(Strain.issues == False) query = query.filter(Strain.isotype != None) @@ -122,3 +125,16 @@ def get_isotypes(known_origin=False, list_only=False): if list_only: result = [x.isotype for x in result] return result + + + +@api_strain_bp.route('/distinct-isotypes') +@jsonify_request +def get_distinct_isotypes(): + """ + Returns a list of unique values in the isotype column of the Strains table + """ + result = Strain.query.with_entities(Strain.isotype).filter(Strain.isotype != None).distinct().all() + result = [x.isotype for x in result] + return result + diff --git a/base/views/api/api_variant.py b/base/views/api/api_variant.py index d5c9ce24..854d7dd8 100644 --- a/base/views/api/api_variant.py +++ b/base/views/api/api_variant.py @@ -5,18 +5,21 @@ """ import re import pickle + from cyvcf2 import VCF from flask import request, Response from tempfile import NamedTemporaryFile from subprocess import Popen, PIPE from collections import OrderedDict -from base.utils.decorators import jsonify_request -from base.config import config from collections import Counter from logzero import logger - from flask import Blueprint +from base.constants import GOOGLE_CLOUD_BUCKET +from base.config import config +from base.utils.decorators import jsonify_request + + api_variant_bp = Blueprint('api_variant', __name__, template_folder='api') @@ -42,7 +45,7 @@ def get_vcf(release=config["DATASET_RELEASE"], filter_type="hard"): - return "http://storage.googleapis.com/elegansvariation.org/releases/{release}/variation/WI.{release}.{filter_type}-filter.isotype.vcf.gz".format(release=release, filter_type=filter_type) + return f"http://storage.googleapis.com/{GOOGLE_CLOUD_BUCKET}/releases/{release}/variation/WI.{release}.{filter_type}-filter.isotype.vcf.gz" gt_set_keys = ["SAMPLE", "GT", "FT", "TGT"] @@ -92,7 +95,7 @@ def variant_query(query=None, samples=None, list_all_strains=False, release=conf 'start': int(query['start']), 'end': int(query['end']), 'release': query['release'], - 'variant_impact': query['variant_impact'].split("_"), + 'variant_impact': ['ALL'], 'sample_list': query['sample_tracks'].split("_"), 'output': query['output'], 'list-all-strains': list_all_strains or query['list-all-strains'] == 'true', diff --git a/base/views/auth/__init__.py b/base/views/auth/__init__.py new file mode 100644 index 00000000..ecef2883 --- /dev/null +++ b/base/views/auth/__init__.py @@ -0,0 +1,5 @@ +from .auth import auth_bp +from .saml import saml_bp +from .oauth import google_bp + + diff --git a/base/views/auth/auth.py b/base/views/auth/auth.py new file mode 100644 index 00000000..fb0d4d83 --- /dev/null +++ b/base/views/auth/auth.py @@ -0,0 +1,84 @@ +import os +import arrow +from flask import (abort, + redirect, + render_template, + session, + request, + make_response, + flash, + jsonify, + Blueprint) +from slugify import slugify + +from base.models import user_ds +from base.forms import basic_login_form +from base.utils.jwt_utils import (create_access_token, + set_access_cookies, + get_jwt_identity, + jwt_required, + assign_access_refresh_tokens, + unset_jwt) +from base.config import config + +auth_bp = Blueprint('auth', + __name__, + template_folder='') + + +@auth_bp.route('/refresh', methods=['GET']) +@jwt_required(refresh=True) +def refresh(): + ''' Refreshing expired Access token ''' + username = get_jwt_identity() + user = user_ds(username) + if user._exists: + referrer = session.get('login_referrer', '/') + return assign_access_refresh_tokens(username, user.roles, referrer, refresh=False) + + return abort(401) + + +@auth_bp.route("/login/select", methods=['GET']) +def choose_login(error=None): + # Relax scope for Google + referrer = session.get("login_referrer") or "" + if not referrer.endswith("/login/select"): + session["login_referrer"] = request.referrer + os.environ['OAUTHLIB_RELAX_TOKEN_SCOPE'] = "true" + VARS = {'page_title': 'Choose Login'} + if error: + flash(error, 'danger') + return render_template('select_login.html', **VARS) + + +@auth_bp.route("/login/basic", methods=["GET", "POST"]) +def basic_login(): + title = "Login" + disable_parent_breadcrumb = True + form = basic_login_form(request.form) + if request.method == 'POST' and form.validate(): + username = slugify(request.form.get("username")) + password = request.form.get("password") + user = user_ds(username) + if user._exists: + if user.check_password(password, config['PASSWORD_SALT']): + user.last_login = arrow.utcnow().datetime + user.save() + referrer = session.get('login_referrer', '/') + flash('Logged In', 'success') + return assign_access_refresh_tokens(username, user.roles, referrer) + flash('Wrong username or password', 'error') + return redirect(request.referrer) + return render_template('basic_login.html', **locals()) + + +@auth_bp.route('/logout') +def logout(): + """ + Logs the user out. + """ + session.clear() + resp = unset_jwt() + flash("Successfully logged out", "success") + return resp diff --git a/base/views/auth/oauth.py b/base/views/auth/oauth.py new file mode 100644 index 00000000..9ee4b06d --- /dev/null +++ b/base/views/auth/oauth.py @@ -0,0 +1,56 @@ +import arrow +from flask import (redirect, + url_for, + session, + flash) +from flask_dance.contrib.google import make_google_blueprint, google +from flask_dance.consumer import oauth_authorized + +from base.config import config +from base.models import user_ds +from base.utils.data_utils import unique_id +from base.utils.jwt_utils import assign_access_refresh_tokens + + +google_bp = make_google_blueprint(client_id=config['GOOGLE_CLIENT_ID'], + client_secret=config['GOOGLE_CLIENT_SECRET'], + scope=["https://www.googleapis.com/auth/userinfo.profile", + "https://www.googleapis.com/auth/userinfo.email", + "openid"], + offline=True) + + +def create_or_update_google_user(user_info): + # Get up-to-date properties + user_id = user_info['google']['id'] + user_email = user_info['google']['email'] + user_name = user_info['google']['name'] + user = user_ds(user_id) + now = arrow.utcnow().datetime + if not user._exists: + user.roles = ['user'] + user.created_on = now + + # Save updated properties + user.modified_on = now + user.last_login = now + user.set_properties(username=user_email, password=unique_id(), salt=config['PASSWORD_SALT'], full_name=user_name, email=user_email.lower()) + user.verified_email = True; + user.user_type = 'OAUTH' + user.save() + return user + + +@oauth_authorized.connect +def authorized(blueprint, token): + if not google.authorized: + flash("Error logging in!") + return redirect(url_for("auth.choose_login")) + + user_info = google.get("/oauth2/v2/userinfo") + assert user_info.ok + user_info = {'google': user_info.json()} + user = create_or_update_google_user(user_info) + + flash("Successfully logged in!", 'success') + return assign_access_refresh_tokens(user.name, user.roles, session.get("login_referrer")) diff --git a/base/views/auth/saml.py b/base/views/auth/saml.py new file mode 100644 index 00000000..dfa0c7a2 --- /dev/null +++ b/base/views/auth/saml.py @@ -0,0 +1,169 @@ +import arrow + +from flask import (redirect, + url_for, + session, + request, + make_response, + flash, + Blueprint) +from slugify import slugify + +from base.config import config +from base.models import user_ds +from base.utils.jwt_utils import assign_access_refresh_tokens + +from urllib.parse import urlparse +from onelogin.saml2.auth import OneLogin_Saml2_Auth +from onelogin.saml2.settings import OneLogin_Saml2_Settings +from onelogin.saml2.utils import OneLogin_Saml2_Utils + + +saml_bp = Blueprint('saml', + __name__, + template_folder='') + +attr_epp = 'urn:oid:1.3.6.1.4.1.5923.1.1.1.6' +attr_mail = 'urn:oid:0.9.2342.19200300.100.1.3' +attr_o = 'urn:oid:2.5.4.10' +attr_name = 'urn:oid:2.16.840.1.113730.3.1.241' +attr_uid = 'urn:oid:0.9.2342.19200300.100.1.1' + + +def get_or_register_user(saml_auth): + try: + attributes = saml_auth.get_attributes() + print(attributes) + error_reason = saml_auth.get_last_error_reason() + print(error_reason) + username = attributes.get(attr_epp, [''])[0] + id = slugify(username) + if id is None: + return None + + user = user_ds(id) + now = arrow.utcnow().datetime + if not user._exists: + user.created_on = now + user.roles = ['user'] + + user.username = username + user.email = attributes.get(attr_mail, [''])[0] + user.verified_email = True + user.o = attributes.get(attr_o, [''])[0] + user.name = attributes.get(attr_name, [''])[0] + user.uid = attributes.get(attr_uid, [''])[0] + + # store the rest of the saml info + user.samlUserdata = attributes + user.samlNameId = saml_auth.get_nameid() + user.samlNameIdFormat = saml_auth.get_nameid_format() + user.samlNameIdNameQualifier = saml_auth.get_nameid_nq() + user.samlNameIdSPNameQualifier = saml_auth.get_nameid_spnq() + user.samlSessionIndex = saml_auth.get_session_index() + user.last_login = now + user.user_type = 'SAML' + user.save() + return user + except: + return None + +def init_saml_auth(req): + """ + Loads the saml config from settings.json + to generate the SAML XML + """ + saml_auth = OneLogin_Saml2_Auth(req, custom_base_path=config['SAML_PATH']) + return saml_auth + + +def prepare_flask_request(request): + """ + Preprocesser for request data + """ + # If server is behind proxys or balancers use the HTTP_X_FORWARDED fields + url_data = urlparse(request.url) + return { + 'https': 'on' if request.scheme == 'https' else 'off', + 'http_host': request.host, + 'server_port': url_data.port, + 'script_name': request.path, + 'get_data': request.args.copy(), + # Uncomment if using ADFS as IdP, https://github.com/onelogin/python-saml/pull/144 + # 'lowercase_urlencoding': True, + 'post_data': request.form.copy() + } + + +@saml_bp.route('/sso2', methods=['GET', 'POST']) +def saml_sso2(): + """ + Single Sign On (2) route for SAML which includes user attributes + """ + req = prepare_flask_request(request) + saml_auth = init_saml_auth(req) + return_to = session.get("login_referrer") + return redirect(saml_auth.login(return_to)) + + +@saml_bp.route('/acs', methods=['GET', 'POST']) +def saml_acs(): + """ + Assertion Consumer Service route for SAML + """ + req = prepare_flask_request(request) + saml_auth = init_saml_auth(req) + settings = saml_auth.get_settings() + errors = [] + error_reason = None + is_auth = False + + request_id = None + if 'AuthNRequestID' in session: + request_id = session['AuthNRequestID'] + + saml_auth.process_response(request_id=request_id) + errors = saml_auth.get_errors() + is_auth = saml_auth.is_authenticated() + + if (len(errors) == 0) and is_auth: + if 'AuthNRequestID' in session: + del session['AuthNRequestID'] + + user = get_or_register_user(saml_auth) + if user is None: + flash('Failed to retrieve attributes from Identity Provider', 'error') + return redirect(url_for('auth.logout')) + + self_url = OneLogin_Saml2_Utils.get_self_url(req) + referrer = session.get("login_referrer",'/') + if 'RelayState' in request.form and self_url != request.form['RelayState']: + referrer = request.form['RelayState'] + + return assign_access_refresh_tokens(user.name, user.roles, referrer) + + elif settings.is_debug_active(): + error_reason = saml_auth.get_last_error_reason() + + flash('Wrong username or password', 'error') + return redirect(request.referrer) + + +@saml_bp.route('/metadata/') +def saml_metadata(): + """ + Generates metadata.xml for SAML Service Provider from settings.json + """ + req = prepare_flask_request(request) + saml_auth = init_saml_auth(req) + settings = saml_auth.get_settings() + metadata = settings.get_sp_metadata() + errors = settings.validate_metadata(metadata) + + if len(errors) == 0: + resp = make_response(metadata, 200) + resp.headers['Content-Type'] = 'text/xml' + else: + resp = make_response(', '.join(errors), 500) + return resp + diff --git a/base/views/data.py b/base/views/data.py index 841e8cc7..aadd96d2 100644 --- a/base/views/data.py +++ b/base/views/data.py @@ -1,12 +1,21 @@ +import json +from flask import request, jsonify import requests +import os + +from datetime import timedelta from simplejson.errors import JSONDecodeError -from flask import make_response -from flask import render_template -from flask import Blueprint -from base.views.api.api_strain import get_isotypes, query_strains +from flask import make_response, render_template, Blueprint, send_file, url_for + +from base.constants import BAM_BAI_DOWNLOAD_SCRIPT_NAME, GOOGLE_CLOUD_BUCKET from base.config import config -from base.models import Strain -from base.utils.gcloud import list_release_files +from base.extensions import cache +from base.forms import vbrowser_form +from base.models import Strain, StrainAnnotatedVariants +from base.utils.gcloud import list_release_files, generate_download_signed_url_v4, download_file +from base.utils.jwt_utils import jwt_required +from base.views.api.api_strain import get_isotypes, query_strains, get_distinct_isotypes + data_bp = Blueprint('data', __name__, @@ -15,37 +24,87 @@ # ============= # # Data Page # # ============= # + +@cache.memoize(50) +def generate_v2_file_list(selected_release): + path = f'releases/{selected_release}' + prefix = f'https://storage.googleapis.com/{GOOGLE_CLOUD_BUCKET}/{path}' + release_files = list_release_files(f"{path}/") + + f = dict() + + f['soft_filter_vcf_gz'] = f'{prefix}/variation/WI.{selected_release}.soft-filter.vcf.gz' + f['soft_filter_vcf_gz_tbi'] = f'{prefix}/variation/WI.{selected_release}.soft-filter.vcf.gz.tbi' + f['soft_filter_isotype_vcf_gz'] = f'{prefix}/variation/WI.{selected_release}.soft-filter.isotype.vcf.gz' + f['soft_filter_isotype_vcf_gz_tbi'] = f'{prefix}/variation/WI.{selected_release}.soft-filter.isotype.vcf.gz.tbi' + f['hard_filter_vcf_gz'] = f'{prefix}/variation/WI.{selected_release}.hard-filter.vcf.gz' + f['hard_filter_vcf_gz_tbi'] = f'{prefix}/variation/WI.{selected_release}.hard-filter.vcf.gz.tbi' + f['hard_filter_isotype_vcf_gz'] = f'{prefix}/variation/WI.{selected_release}.hard-filter.isotype.vcf.gz' + f['hard_filter_isotype_vcf_gz_tbi'] = f'{prefix}/variation/WI.{selected_release}.hard-filter.isotype.vcf.gz.tbi' + f['impute_isotype_vcf_gz'] = f'{prefix}/variation/WI.{selected_release}.impute.isotype.vcf.gz' + f['impute_isotype_vcf_gz_tbi'] = f'{prefix}/variation/WI.{selected_release}.impute.isotype.vcf.gz.tbi' + + f['hard_filter_min4_tree'] = f'{prefix}/tree/WI.{selected_release}.hard-filter.min4.tree' + f['hard_filter_min4_tree_pdf'] = f'{prefix}/tree/WI.{selected_release}.hard-filter.min4.tree.pdf' + f['hard_filter_isotype_min4_tree'] = f'{prefix}/tree/WI.{selected_release}.hard-filter.isotype.min4.tree' + f['hard_filter_isotype_min4_tree_pdf'] = f'{prefix}/tree/WI.{selected_release}.hard-filter.isotype.min4.tree.pdf' + + f['haplotype_png'] = f'{prefix}/haplotype/haplotype.png' + f['haplotype_pdf'] = f'{prefix}/haplotype/haplotype.pdf' + f['sweep_pdf'] = f'{prefix}/haplotype/sweep.pdf' + f['sweep_summary_tsv'] = f'{prefix}/haplotype/sweep_summary.tsv' + + for key, value in f.items(): + if value not in release_files: + f[key] = None + + return f + +@data_bp.route('/') +def data_landing(): + disable_parent_breadcrumb = True + return render_template('data_landing.html', **locals()) + + + + @data_bp.route('/release/latest') @data_bp.route('/release/') -@data_bp.route('/release/') -def data(selected_release=config["DATASET_RELEASE"]): +@cache.memoize(50) +def data(selected_release=None): """ Default data page - lists available releases. """ + if selected_release is None: + selected_release = config['DATASET_RELEASE'] + # Pre-2020 releases used BAMs grouped by isotype. if int(selected_release) < 20200101: return data_v01(selected_release) # Post-2020 releases keep strain-level bams separate. - title = "Releases" + title = "Genomic Data" + alt_parent_breadcrumb = {"title": "Data", "url": url_for('data.data_landing')} sub_page = selected_release strain_listing = query_strains(release=selected_release) - strain_listing_issues = query_strains(release=selected_release, issues=True) release_summary = Strain.release_summary(selected_release) RELEASES = config["RELEASES"] DATASET_RELEASE, WORMBASE_VERSION = list(filter(lambda x: x[0] == selected_release, RELEASES))[0] REPORTS = ["alignment"] + f = generate_v2_file_list(selected_release) return render_template('data_v2.html', **locals()) +@cache.memoize(50) def data_v01(selected_release): # Legacy releases (Pre 20200101) - title = "Releases" + title = "Genomic Data" + alt_parent_breadcrumb = {"title": "Data", "url": url_for('data.data_landing')} subtitle = selected_release strain_listing = query_strains(release=selected_release) # Fetch variant data - url = "https://storage.googleapis.com/elegansvariation.org/releases/{selected_release}/multiqc_bcftools_stats.json".format(selected_release=selected_release) + url = f"https://storage.googleapis.com/{GOOGLE_CLOUD_BUCKET}/releases/{selected_release}/multiqc_bcftools_stats.json" try: vcf_summary = requests.get(url).json() except JSONDecodeError: @@ -60,42 +119,158 @@ def data_v01(selected_release): return render_template('data.html', **locals()) +# ======================= # +# Alignment Data Page # +# ======================= # +@data_bp.route('/release/latest/alignment') +@data_bp.route('/release//alignment') +@cache.memoize(50) +def alignment_data(selected_release=None): + """ + Alignment data page + """ + if selected_release is None: + selected_release = config['DATASET_RELEASE'] + # Pre-2020 releases don't have data organized the same way + if int(selected_release) < 20200101: + return + + # Post-2020 releases + title = "Alignment Data" + alt_parent_breadcrumb = {"title": "Data", "url": url_for('data.data_landing')} + strain_listing = query_strains(release=selected_release) + RELEASES = config["RELEASES"] + DATASET_RELEASE, WORMBASE_VERSION = list(filter(lambda x: x[0] == selected_release, RELEASES))[0] + REPORTS = ["alignment"] + return render_template('alignment.html', **locals()) + +# =========================== # +# Strain Issues Data Page # +# =========================== # +@data_bp.route('/release/latest/strain_issues') +@data_bp.route('/release//strain_issues') +@cache.memoize(50) +def strain_issues(selected_release=None): + """ + Strain Issues page + """ + if selected_release is None: + selected_release = config['DATASET_RELEASE'] + # Pre-2020 releases don't have data organized the same way + if int(selected_release) < 20200101: + return + + # Post-2020 releases + title = "Strain Issues" + alt_parent_breadcrumb = {"title": "Data", "url": url_for('data.data_landing')} + strain_listing_issues = query_strains(release=selected_release, issues=True) + RELEASES = config["RELEASES"] + DATASET_RELEASE, WORMBASE_VERSION = list(filter(lambda x: x[0] == selected_release, RELEASES))[0] + return render_template('strain_issues.html', **locals()) + # =================== # # Download Script # # =================== # +@data_bp.route('/release//download/download_isotype_bams.sh') +@cache.cached(timeout=60*60*24) +@jwt_required() +def download_script(selected_release): + if not os.path.exists(f'base/{BAM_BAI_DOWNLOAD_SCRIPT_NAME}'): + download_file(f'bam/{BAM_BAI_DOWNLOAD_SCRIPT_NAME}', f'base/{BAM_BAI_DOWNLOAD_SCRIPT_NAME}') + return send_file(BAM_BAI_DOWNLOAD_SCRIPT_NAME, as_attachment=True) + + + +@data_bp.route('/release/latest/download/download_strain_bams.sh') +@data_bp.route('/release//download/download_strain_bams.sh') +@cache.cached(timeout=60*60*24) +@jwt_required() +def download_script_strain_v2(selected_release=None): + if not os.path.exists(f'base/{BAM_BAI_DOWNLOAD_SCRIPT_NAME}'): + download_file(f'bam/{BAM_BAI_DOWNLOAD_SCRIPT_NAME}', f'base/{BAM_BAI_DOWNLOAD_SCRIPT_NAME}') + return send_file(BAM_BAI_DOWNLOAD_SCRIPT_NAME, as_attachment=True) + -@data_bp.route('/download/download_isotype_bams.sh') -def download_script(): - strain_listing = query_strains(release=config["DATASET_RELEASE"]) - download_page = render_template('download_script.sh', **locals()) - response = make_response(download_page) - response.headers["Content-Type"] = "text/plain" - return response +@data_bp.route('/download/files/') +@jwt_required() +def download_bam_url(blob_name=''): + title = blob_name + blob_path = 'bam/' + blob_name + signed_download_url = generate_download_signed_url_v4(blob_path) + msg = 'download will begin shortly...' + if not signed_download_url: + msg = 'error fetching download link' + signed_download_url = '' -@data_bp.route('/download/download_strain_bams.sh') -def download_script_strain_v2(): - v2 = True - strain_listing = query_strains(release=config["DATASET_RELEASE"]) - download_page = render_template('download_script.sh', **locals()) - response = make_response(download_page) - response.headers["Content-Type"] = "text/plain" - return response + return render_template('download.html', **locals()) # ============= # -# Browser # +# GBrowser # # ============= # - -@data_bp.route('/browser') -@data_bp.route('/browser/') -@data_bp.route('/browser//') -@data_bp.route('/browser///') -def browser(release=config["DATASET_RELEASE"], region="III:11746923-11750250", query=None): - VARS = {'title': "Variant Browser", +@data_bp.route('/gbrowser') +@data_bp.route('/gbrowser/') +@data_bp.route('/gbrowser//') +@data_bp.route('/gbrowser///') +def gbrowser(release=config["DATASET_RELEASE"], region="III:11746923-11750250", query=None): + VARS = {'title': "Genome Browser", 'DATASET_RELEASE': int(release), 'strain_listing': get_isotypes(), 'region': region, 'query': query, + 'alt_parent_breadcrumb': { + "title": "Data", + "url": url_for('data.data_landing') + }, 'fluid_container': True} - return render_template('browser.html', **VARS) + return render_template('gbrowser.html', **VARS) + + +# ============= # +# VBrowser # +# ============= # + + +@data_bp.route('/vbrowser') +def vbrowser(): + title = 'Variant Annotation' + alt_parent_breadcrumb = {"title": "Data", "url": url_for('data.data_landing')} + form = vbrowser_form() + strain_listing = get_distinct_isotypes() + columns = StrainAnnotatedVariants.column_details + fluid_container = True + return render_template('vbrowser.html', **locals()) + + +@data_bp.route('/vbrowser/query/interval', methods=['POST']) +def vbrowser_query_interval(): + title = 'Variant Annotation' + payload = json.loads(request.data) + + query = payload.get('query') + + is_valid = StrainAnnotatedVariants.verify_interval_query(q=query) + if is_valid: + data = StrainAnnotatedVariants.run_interval_query(q=query) + return jsonify(data) + + return jsonify({}) + + + +@data_bp.route('/vbrowser/query/position', methods=['POST']) +def vbrowser_query_position(): + title = 'Variant Annotation' + payload = json.loads(request.data) + + query = payload.get('query') + + is_valid = StrainAnnotatedVariants.verify_position_query(q=query) + if is_valid: + data = StrainAnnotatedVariants.run_position_query(q=query) + return jsonify(data) + + return jsonify({}) + + diff --git a/base/views/gene.py b/base/views/gene.py index 90bc1310..8c214ae1 100644 --- a/base/views/gene.py +++ b/base/views/gene.py @@ -16,11 +16,9 @@ def gene(gene_name=""): if not gene_name: redirect(url_for('gene.gene', gene_name='pot-2')) - gene_record = lookup_gene(gene_name) - if gene_record is None: - return render_template('404.html'), 404 + return render_template('errors/404.html'), 404 # Gene Variants variants = gene_variants(gene_record.gene_id) diff --git a/base/views/maintenance.py b/base/views/maintenance.py new file mode 100644 index 00000000..76966667 --- /dev/null +++ b/base/views/maintenance.py @@ -0,0 +1,83 @@ +import os +import time + +from threading import Thread +from datetime import timedelta +from flask import jsonify, Blueprint, request, flash, abort +from logzero import logger + +from base.constants import BAM_BAI_DOWNLOAD_SCRIPT_NAME +from base.config import config +from base.utils.gcloud import generate_download_signed_url_v4, upload_file +from base.views.api.api_strain import query_strains +from base.utils.cache import delete_expired_cache + +maintenance_bp = Blueprint('maintenance', + __name__) + + +def verify_req_origin(request): + cron_header = request.headers.get('X-Appengine-Cron') + if cron_header: + return True + return False + + +@maintenance_bp.route('/cleanup_cache', methods=['GET']) +def cleanup_cache(): + if verify_req_origin(request): + result = delete_expired_cache() + response = jsonify({"result": result}) + response.status_code = 200 + return response + + flash('You do not have access to this page', 'error') + return abort(401) + +@maintenance_bp.route('/create_bam_bai_download_script', methods=['GET']) +def create_bam_bai_download_script(): + if verify_req_origin(request): + strain_listing = query_strains(is_sequenced=True) + joined_strain_list = '' + for strain in strain_listing: + joined_strain_list += strain.strain + ',' + + thread = Thread(target=generate_bam_bai_download_script, args={joined_strain_list: joined_strain_list}) + thread.start() + + response = jsonify({}) + response.status_code = 200 + return response + + flash('You do not have access to this page', 'error') + return abort(401) + + +def generate_bam_bai_download_script(joined_strain_list): + ''' Generates signed downloads urls for every sequenced strain and creates a script to download them ''' + expiration = timedelta(days=7) + filename = f'{BAM_BAI_DOWNLOAD_SCRIPT_NAME}' + blobPath = f'bam/{BAM_BAI_DOWNLOAD_SCRIPT_NAME}' + + if os.path.exists(filename): + os.remove(filename) + + f = open(filename, "a") + + strain_listing = joined_strain_list.split(',') + for strain in strain_listing: + f.write(f'\n\n# Strain: {strain}') + + bam_path = 'bam/{}.bam'.format(strain) + bam_signed_url = generate_download_signed_url_v4(bam_path, expiration=expiration) + if bam_signed_url: + f.write('\nwget -O "{}.bam" "{}"'.format(strain, bam_signed_url)) + + bai_path = 'bam/{}.bam.bai'.format(strain) + bai_signed_url = generate_download_signed_url_v4(bai_path, expiration=expiration) + if bai_signed_url: + f.write('\nwget -O "{}.bam.bai" "{}"'.format(strain, bai_signed_url)) + + f.close() + upload_file(blobPath, f"{BAM_BAI_DOWNLOAD_SCRIPT_NAME}") + diff --git a/base/views/mapping.py b/base/views/mapping.py index 6fdd32b2..7392c451 100644 --- a/base/views/mapping.py +++ b/base/views/mapping.py @@ -1,30 +1,35 @@ import decimal -import re -import arrow -import urllib -import pandas as pd +import csv import simplejson as json +import os -from base.constants import BIOTYPES, TABLE_COLORS -from base.models import trait_ds from datetime import date from flask import render_template, request, redirect, url_for, abort -from slugify import slugify -from base.forms import mapping_submission_form from logzero import logger from flask import session, flash, Blueprint, g -from base.utils.data_utils import unique_id -from base.config import config -from base.utils.gcloud import query_item, delete_item -from base.utils.plots import pxg_plot, plotly_distplot +from base.constants import GOOGLE_CLOUD_BUCKET +from base.config import config +from base.models import ns_calc_ds, gls_op_ds +from base.forms import file_upload_form +from base.utils.data_utils import unique_id, hash_file_contents +from base.utils.gcloud import list_files, upload_file, add_task +from base.utils.jwt_utils import jwt_required, get_jwt, get_current_user mapping_bp = Blueprint('mapping', __name__, template_folder='mapping') +DATA_BLOB_PATH = 'reports/nemascan/{data_hash}/data.tsv' +REPORT_BLOB_PATH = 'reports/nemascan/{data_hash}/results/Reports/NemaScan_Report_' +RESULT_BLOB_PATH = 'reports/nemascan/{data_hash}/results/' + + +# Create a directory in a known location to save files to. +uploads_dir = os.path.join('./', 'uploads') +os.makedirs(uploads_dir, exist_ok=True) class CustomEncoder(json.JSONEncoder): def default(self, o): @@ -34,231 +39,197 @@ def default(self, o): return str(o) return super(CustomEncoder, self).default(o) +def create_ns_task(data_hash, ds_id, ds_kind): + """ + Creates a Cloud Task to schedule the pipeline for execution + by the NemaScan service + """ + ns = ns_calc_ds(ds_id) + + # schedule nemascan request + queue = config['NEMASCAN_PIPELINE_TASK_QUEUE'] + url = config['NEMASCAN_PIPELINE_URL'] + data = {'hash': data_hash, 'ds_id': ds_id, 'ds_kind': ds_kind} + result = add_task(queue, url, data, task_name=ds_id) + + # Update report status + if result: + ns.status = 'SCHEDULED' + else: + ns.status = 'FAILED' + + return result + + +def is_data_cached(data_hash): + # Check if the file already exists in google storage (matching hash) + data_blob = DATA_BLOB_PATH.format(data_hash=data_hash) + data_exists = list_files(data_blob) + if len(data_exists) > 0: + return True + return False + +def is_result_cached(ns): + if ns.status == 'COMPLETE' and len(ns.report_path) > 0: + return True + + # Check the datastore entry for the GLS pipeline execution + glsOp = gls_op_ds(ns.data_hash) + if hasattr(glsOp, 'error'): + ns.status = 'ERROR' + ns.save() + return False + + # check if there is a report on GS, just to be sure + data_blob = REPORT_BLOB_PATH.format(data_hash=ns.data_hash) + result = list_files(data_blob) + if len(result) > 0: + for x in result: + if x.name.endswith('.html'): + report_path = GOOGLE_CLOUD_BUCKET + '/' + x.name + ns.report_path = report_path + ns.status = 'COMPLETE' + ns.save() + return True + else: + if hasattr(glsOp, 'done'): + ns.status = 'DONE' + ns.save() + return False + +@mapping_bp.route('/mapping/upload', methods = ['POST']) +@jwt_required() +def schedule_mapping(): + ''' + Uploads the users file and schedules the nemascan pipeline task + tracking metadata in an associated datastore entry + ''' + form = file_upload_form(request.form) + if not form.validate_on_submit(): + flash("You must include a description of your data and a TSV file to upload", "error") + return redirect(url_for('mapping.mapping')) + + # Store report metadata in datastore + user = get_current_user() + id = unique_id() + ns = ns_calc_ds(id) + ns.label = request.form.get('label') + ns.username = user.name + + # Save uploaded file to server temporarily + file = request.files['file'] + local_path = os.path.join(uploads_dir, f'{id}.tsv') + file.save(local_path) + + # Read first line from tsv + with open(local_path, 'r') as f: + csv_reader = csv.reader(f, delimiter='\t') + csv_headings = next(csv_reader) + + # Check first line for column headers (strain, {TRAIT}) + if csv_headings[0] != 'strain' or len(csv_headings) != 2 or len(csv_headings[1]) == 0: + os.remove(local_path) + flash("Please make sure that your data file exactly matches the sample format", 'error') + return redirect(url_for('mapping.mapping')) + + trait = csv_headings[1] + data_hash = hash_file_contents(local_path, length=32) + + # Update report status + ns.data_hash = data_hash + ns.trait = trait + ns.status = 'RECEIVED' + ns.save() + + if is_data_cached(data_hash): + flash('It looks like that data has already been uploaded - You will be redirected to the saved results', 'danger') + return redirect(url_for('mapping.mapping_report', id=id)) + + # Upload file to google storage + data_blob = DATA_BLOB_PATH.format(data_hash=data_hash) + result = upload_file(data_blob, local_path) + if not result: + ns.status = 'ERROR UPLOADING' + ns.save() + flash("There was an error uploading your data") + return redirect(url_for('mapping.mapping')) + + # Schedule task + task_result = create_ns_task(data_hash, id, ns.kind) + + # Delete copy stored locally on server + os.remove(local_path) + + if not task_result: + flash("There was an error scheduling your calculations...") + redirect(url_for('mapping.mapping')) + + return redirect(url_for('mapping.mapping_report', id=id)) + + +@mapping_bp.route('/mapping/report/all', methods=['GET', 'POST']) +@jwt_required() +def mapping_result_list(): + title = 'Genetic Mapping' + subtitle = 'Report List' + user = get_current_user() + items = ns_calc_ds().query_by_username(user.name) + # check for status changes + for x in items: + x = ns_calc_ds(x) + prevStatus = x.status + if prevStatus != 'COMPLETE' and prevStatus != 'ERROR' and prevStatus != 'DONE': + is_result_cached(x) + + items = sorted(items, key=lambda x: x['created_on'], reverse=True) + return render_template('mapping_result_list.html', **locals()) + + +@mapping_bp.route('/mapping/report//', methods=['GET']) +@jwt_required() +def mapping_report(id): + title = 'Genetic Mapping Report' + user = get_current_user() + ns = ns_calc_ds(id) + fluid_container = True + subtitle = ns.label +': ' + ns.trait + # check if DS entry has complete status + result = is_result_cached(ns) + if result: + report_path = ns.report_path + + return render_template('mapping_result.html', **locals()) + + +@mapping_bp.route('/mapping/results//', methods=['GET']) +@jwt_required() +def mapping_results(id): + title = 'Genetic Mapping Results' + user = get_current_user() + ns = ns_calc_ds(id) + result = is_result_cached(ns) + subtitle = ns.label + ': ' + ns.trait + + data_blob = RESULT_BLOB_PATH.format(data_hash=ns.data_hash) + blobs = list_files(data_blob) + file_list = [] + for blob in blobs: + file_list.append({ + "name": blob.name.rsplit('/', 2)[1] + '/' + blob.name.rsplit('/', 2)[2], + "url": blob.public_url + }) + + return render_template('mapping_result_files.html', **locals()) + @mapping_bp.route('/mapping/perform-mapping/', methods=['GET', 'POST']) +@jwt_required() def mapping(): - """ - This is the mapping submission page. - """ - form = mapping_submission_form(request.form) - - VARS = {'title': 'Perform Mapping', - 'form': form} - - user = session.get('user') - if form.validate_on_submit() and user: - transaction = g.ds.transaction() - transaction.begin() - - # Now generate and run trait tasks - report_name = form.report_name.data - report_slug = slugify(report_name) - trait_list = list(form.trait_data.processed_data.columns[2:]) - now = arrow.utcnow().datetime - trait_set = [] - secret_hash = unique_id()[0:8] - for trait_name in trait_list: - trait = trait_ds() - trait_data = form.trait_data.processed_data[['ISOTYPE', 'STRAIN', trait_name]].dropna(how='any') \ - .to_csv(index=False, - sep="\t", - na_rep="NA") - trait.__dict__.update({ - 'report_name': report_name, - 'report_slug': report_slug, - 'trait_name': trait_name, - 'trait_list': list(form.trait_data.processed_data.columns[2:]), - 'trait_data': trait_data, - 'n_strains': int(form.trait_data.processed_data.STRAIN.count()), - 'created_on': now, - 'status': 'queued', - 'is_public': form.is_public.data == 'true', - 'CENDR_VERSION': CENDR_VERSION, - 'REPORT_VERSION': REPORT_VERSION, - 'DATASET_RELEASE': DATASET_RELEASE, - 'WORMBASE_VERSION': WORMBASE_VERSION, - 'username': user['username'], - 'user_id': user['user_id'], - 'user_email': user['user_email'] - }) - if trait.is_public is False: - trait.secret_hash = secret_hash - trait.run_task() - trait_set.append(trait) - # Update the report to contain the set of the - # latest task runs - transaction.commit() - - flash("Successfully submitted mapping!", 'success') - return redirect(url_for('mapping.report_view', - report_slug=report_slug, - trait_name=trait_list[0])) - - return render_template('mapping.html', **VARS) - - -@mapping_bp.route("/report//") -@mapping_bp.route("/report//") -@mapping_bp.route("/report///") -def report_view(report_slug, trait_name=None, rerun=None): - """ - This view will handle logic of handling legacy reports - and v2 reports. - - """ - - trait_set = query_item('trait', filters=[('report_slug', '=', report_slug)]) - - # Get first report if available. - try: - trait = trait_set[0] - except IndexError: - try: - trait_set = query_item('trait', filters=[('secret_hash', '=', report_slug)]) - trait = trait_set[0] - except IndexError: - flash('Cannot find report', 'danger') - return abort(404) - - # Enable reruns - if rerun: - trait_set = [x for x in trait_set if x['trait_name'] == trait_name] - for n, existing_trait in enumerate(trait_set): - logger.info(n) - logger.info(existing_trait.key) - delete_item(existing_trait) - trait = trait_ds(trait_set[0]) - - mapping_items = query_item('mapping', filters=[('report_slug', '=', report_slug), ('trait_slug', '=', trait_name)]) - for existing_mapping in mapping_items: - delete_item(existing_mapping) - - trait.status = "Rerunning" - # Running the task will save it. - trait.run_task() - return redirect(url_for('mapping.report_view', - report_slug=report_slug, - trait_name=trait_name)) - - # Verify user has permission to view report - user = session.get('user') - if not trait.get('is_public'): - if user: - user_id = user.get('user_id') - else: - user_id = None - if trait['secret_hash'] != report_slug and user_id != trait['user_id']: - flash('You do not have access to that report', 'danger') - return abort(404) - - if not trait_name: - logger.error("Trait name not found") - # Redirect to the first trait - return redirect(url_for('mapping.report_view', - report_slug=report_slug, - trait_name=trait_set[0]['trait_name'])) - - try: - # Resolve REPORT --> TRAIT - # Fetch trait and convert to trait object. - cur_trait = [x for x in trait_set if x['trait_name'] == trait_name][0] - trait = trait_ds(cur_trait.key.name) - trait.__dict__.update(cur_trait) - logger.info(trait) - except IndexError: - return abort(404) - - VARS = { - 'title': trait.report_name, - 'subtitle': trait_name, - 'trait_name': trait_name, - 'report_slug': report_slug, - 'trait': trait, - 'trait_set': trait_set, - 'BIOTYPES': BIOTYPES, - 'TABLE_COLORS': TABLE_COLORS, - 'n_peaks': 0 - } - - # Set status to error if the container is stopped and status is not set to complete. - if trait.container_status() == 'STOPPED' and trait.status != "complete": - trait.status = 'error' - trait.save() - - if trait.status == 'complete': - if trait.REPORT_VERSION == 'v1': - """ - VERSION 1 - """ - phenotype_data = trait.get_gs_as_dataset("tables/phenotype.tsv") - isotypes = list(phenotype_data.iloc[:, 1].dropna().values) - phenotype_data = list(phenotype_data.iloc[:, 3].values) - VARS.update({'phenotype_data': phenotype_data, - 'isotypes': isotypes}) - if trait.is_significant: - interval_summary = trait.get_gs_as_dataset("tables/interval_summary.tsv.gz") \ - .rename(index=str, columns={'gene_w_variants': 'genes w/ variants'}) - try: - variant_correlation = trait.get_gs_as_dataset("tables/variant_correlation.tsv.gz") - max_corr = variant_correlation.groupby(['gene_id', 'interval']).apply(lambda x: max(abs(x.correlation))) - max_corr = max_corr.reset_index().rename(index=str, columns={0: 'max_correlation'}) - variant_correlation = pd.merge(variant_correlation, max_corr, on=['gene_id', 'interval']) \ - .sort_values(['max_correlation', 'gene_id'], ascending=False) - except (urllib.error.HTTPError, pd.errors.EmptyDataError): - variant_correlation = [] - peak_summary = trait.get_gs_as_dataset("tables/peak_summary.tsv.gz") - peak_summary['interval'] = peak_summary.apply(lambda row: f"{row.chrom}:{row.interval_start}-{row.interval_end}", axis=1) - first_peak = peak_summary.iloc[0] - VARS.update({'peak_summary': peak_summary, - 'first_peak': first_peak, - 'n_peaks': len(peak_summary), - 'variant_correlation': variant_correlation, - 'interval_summary': interval_summary}) - - elif trait.REPORT_VERSION == "v2": - """ - VERSION 2 - """ - # If the mapping is complete: - # Phenotype plot - - phenotype_plot = plotly_distplot(trait._trait_df, trait_name) - VARS.update({'phenotype_plot': phenotype_plot}) - # Fetch datafiles for complete runs - VARS.update({'n_peaks': 0}) - if trait.is_significant: - peak_summary = trait.get_gs_as_dataset("peak_summary.tsv.gz") - try: - first_peak = peak_summary.loc[0] - chrom, interval_start, interval_end = re.split(":|\-", first_peak['interval']) - first_peak.chrom = chrom - first_peak.pos = int(first_peak['peak_pos'].split(":")[1]) - first_peak.interval_start = int(interval_start) - first_peak.interval_end = int(interval_end) - except: - first_peak = None - - try: - variant_correlation = trait.get_gs_as_dataset("interval_variants.tsv.gz") - except (pd.errors.EmptyDataError): - variant_correlation = pd.DataFrame() - - interval_summary = trait.get_gs_as_dataset("interval_summary.tsv.gz") \ - .rename(index=str, columns={'gene_w_variants': 'genes w/ variants'}) - - peak_marker_data = trait.get_gs_as_dataset("peak_markers.tsv.gz") - peak_summary = trait.get_gs_as_dataset("peak_summary.tsv.gz") - VARS.update({'pxg_plot': pxg_plot(peak_marker_data, trait_name), - 'interval_summary': interval_summary, - 'variant_correlation': variant_correlation, - 'peak_summary': peak_summary, - 'n_peaks': len(peak_summary), - 'isotypes': list(trait._trait_df.ISOTYPE.values), - 'first_peak': first_peak}) - - # To handle report data, functions specific - # to the version will be required. - - report_template = f"reports/{trait.REPORT_VERSION}.html" - return render_template(report_template, **VARS) + """ + This is the mapping submission page. + """ + title = 'Perform Mapping' + jwt_csrf_token = (get_jwt() or {}).get("csrf") + form = file_upload_form() + return render_template('mapping.html', **locals()) diff --git a/base/views/order.py b/base/views/order.py index 629f56b6..d97c709d 100644 --- a/base/views/order.py +++ b/base/views/order.py @@ -7,13 +7,14 @@ """ import uuid +from flask import render_template, request, url_for, redirect, Blueprint, abort, flash + from base.forms import order_form from base.config import config from base.utils.email import send_email, ORDER_SUBMISSION_EMAIL from base.utils.google_sheets import add_to_order_ws, lookup_order -from flask import render_template, request, url_for, redirect, Blueprint, abort, flash, session -from datetime import datetime -from base.utils.data_utils import chicago_date, hash_it +from base.utils.data_utils import chicago_date +from base.utils.jwt_utils import jwt_required, get_current_user order_bp = Blueprint('order', __name__, @@ -31,18 +32,19 @@ @order_bp.route("/", methods=['GET', 'POST']) def order(): - return redirect(url_for('strain.strain_catalog')) - + return redirect(url_for('strains.strains_catalog')) @order_bp.route('/create', methods=['GET', 'POST']) +@jwt_required(optional=True) def order_page(): """ This view handles the order page. """ form = order_form() - if session.get('user') and not form.email.data: - form.email.data = session.get('user')['user_email'] + user = get_current_user() + if user and hasattr(user, 'email') and not form.email.data: + form.email.data = user.email # Fetch items items = form.items.data @@ -51,7 +53,7 @@ def order_page(): if (len(items) == 0): flash("You must select strains/sets from the catalog", 'error') - return redirect(url_for("strain.strain_catalog")) + return redirect(url_for("strains.strains_catalog")) # Is the user coming from the catalog? user_from_catalog = request.form.get('from_catalog') == "true" diff --git a/base/views/primary.py b/base/views/primary.py index 7ac29337..f8b27c18 100644 --- a/base/views/primary.py +++ b/base/views/primary.py @@ -31,6 +31,7 @@ def primary(): files = sorted_files("base/static/content/news/") VARS = {'page_title': page_title, 'files': files, + 'fluid_container': True, 'latest_mappings': get_latest_public_mappings()} return render_template('primary/home.html', **VARS) diff --git a/base/views/strains.py b/base/views/strains.py index 7300b687..00b383d6 100644 --- a/base/views/strains.py +++ b/base/views/strains.py @@ -19,39 +19,71 @@ from base.config import config from logzero import logger -strain_bp = Blueprint('strain', +strains_bp = Blueprint('strains', __name__, template_folder='strain') - # -# Global Strain Map +# Strain List Page # -@strain_bp.route('/') -def strain(): +@strains_bp.route('/') +@cache.memoize(50) +def strains(): """ - Redirect base route to the global strain map + Redirect base route to the strain list page """ - return redirect(url_for('strain.map_page')) + disable_parent_breadcrumb = True + return render_template('strain/strains.html', **locals()) +@strains_bp.route('/map') +@cache.memoize(50) +def strains_map(): + """ + Redirect base route to the strain list page + """ + title = 'Strain Map' + strains = get_strains() + strain_listing = [s.to_json() for s in strains] + return render_template('strain/strains_map.html', **locals()) -@strain_bp.route('/global-strain-map') +@strains_bp.route('/isotype_list') @cache.memoize(50) -def map_page(): +def strains_list(): """ - Global strain map shows the locations of all wild isolates - within the SQLite database. + Strain list of all wild isolates + within the SQLite database and a table of all strains """ - VARS = {'title': "Global Strain Map", - 'strain_listing': dump_json(get_strains(known_origin=True))} - return render_template('strain/global_strain_map.html', **VARS) + VARS = { + 'title': 'Isotype List', + 'strain_listing': get_strains()} + return render_template('strain/strains_list.html', **VARS) + +@strains_bp.route('/issues') +@cache.memoize(50) +def strains_issues(): + """ + Strain issues shows latest data releases table of strain issues + """ + VARS = { + 'title': 'Strain Issues', + 'strain_listing_issues': get_strains(issues=True)} + return render_template('strain/strain_issues.html', **VARS) + + +@strains_bp.route('/external-links') +@cache.memoize(50) +def external_links(): + """ + Strain issues shows latest data releases table of strain issues + """ + title = 'External Links' + return render_template('strain/external_links.html', **locals()) # # Strain Data # - -@strain_bp.route('/CelegansStrainData.tsv') -def strain_data_tsv(): +@strains_bp.route('/CelegansStrainData.tsv') +def strains_data_tsv(): """ Dumps strain dataset; Normalizes lat/lon on the way out. """ @@ -72,8 +104,8 @@ def generate(): # # Isotype View # -@strain_bp.route('/isotype//') -@strain_bp.route('/isotype//') +@strains_bp.route('/isotype//') +@strains_bp.route('/isotype//') @cache.memoize(50) def isotype_page(isotype_name, release=config['DATASET_RELEASE']): """ @@ -107,23 +139,23 @@ def isotype_page(isotype_name, release=config['DATASET_RELEASE']): # Strain Catalog # -@strain_bp.route('/catalog', methods=['GET', 'POST']) +@strains_bp.route('/catalog', methods=['GET', 'POST']) @cache.memoize(50) -def strain_catalog(): - flash(Markup("Strain mapping sets 7 and 8 will not be available until later this year."), category="warning") - VARS = {"title": "Strain Catalog", - "warning": request.args.get('warning'), - "strain_listing": get_strains(), - "strain_sets": Strain.strain_sets() } - return render_template('strain/strain_catalog.html', **VARS) +def strains_catalog(): + flash(Markup("Strain mapping sets 9 and 10 will not be available until later this year."), category="warning") + title = "Strain Catalog" + warning = request.args.get('warning') + strain_listing = get_strains() + strain_sets = Strain.strain_sets() + return render_template('strain/strain_catalog.html', **locals()) # # Strain Submission # -@strain_bp.route('/submit') -def strain_submission_page(): +@strains_bp.route('/submit') +def strains_submission_page(): """ Google form for submitting strains """ @@ -135,7 +167,7 @@ def strain_submission_page(): # Protocols # -@strain_bp.route("/protocols") +@strains_bp.route("/protocols") @cache.cached(timeout=50) def protocols(): title = "Protocols" diff --git a/base/views/tools/heritability.py b/base/views/tools/heritability.py index e61a4aab..b57d9aa6 100644 --- a/base/views/tools/heritability.py +++ b/base/views/tools/heritability.py @@ -1,23 +1,25 @@ import io -import requests -import statistics as st - -import numpy as np import pandas as pd +import json -from base.utils.data_utils import hash_it -from base.utils.gcloud import check_blob, upload_file -from base.config import config -from base.forms import heritability_form - -from flask import (request, +from flask import (flash, + request, + redirect, + url_for, jsonify, render_template, Blueprint, abort) from logzero import logger from datetime import datetime -from threading import Thread + +from base.config import config +from base.views.api.api_strain import get_strains +from base.utils.data_utils import hash_it, unique_id +from base.utils.jwt_utils import jwt_required, get_jwt, get_current_user +from base.utils.gcloud import check_blob, upload_file, add_task +from base.forms import heritability_form +from base.models import h2calc_ds # ================== # # heritability # @@ -28,113 +30,163 @@ __name__, template_folder='tools') -@heritability_bp.route('/heritability') -def heritability(): - VARS = {"title": "Heritability Calculator", - "form": heritability_form()} - return render_template('tools/heritability_calculator.html', **VARS) +def create_h2_task(data_hash, ds_id, ds_kind): + """ + This is designed to be run in the background on the server. + It will run a heritability analysis on google cloud run + """ + hr = h2calc_ds(ds_id) + + # Perform h2 request + queue = config['HERITABILITY_CALC_TASK_QUEUE'] + url = config['HERITABILITY_CALC_URL'] + data = {'hash': data_hash, 'ds_id': ds_id, 'ds_kind': ds_kind} + result = add_task(queue, url, data, task_name=data_hash) -def h2_task(data, data_hash): - """ - This is designed to be run in the background on the server. - It will run a heritability analysis on google cloud run - """ - # Perform h2 request - result = requests.post(config['HERITABILITY_URL'], data={'data': data, - 'hash': data_hash}) - logger.debug(result) + # Update report status + hr.status = 'SCHEDULED' if result else 'FAILED' + hr.save() +@heritability_bp.route('/heritability') +def heritability(): + title = "Heritability Calculator" + alt_parent_breadcrumb = {"title": "Tools", "url": url_for('tools.tools')} + form = heritability_form() + hide_form = True + strain_list = [] + return render_template('tools/heritability_calculator.html', **locals()) + + +@heritability_bp.route('/heritability/create', methods=["GET"]) +@jwt_required() +def heritability_create(): + """ + This endpoint is used to create a heritability job. + """ + title = "Heritability Calculator" + alt_parent_breadcrumb = {"title": "Tools", "url": url_for('tools.tools')} + jwt_csrf_token = (get_jwt() or {}).get("csrf") + form = heritability_form() + strain_data = get_strains() + strain_list = [] + for x in strain_data: + strain_list.append(x.strain) + + hide_form = False + id = unique_id() + return render_template('tools/heritability_calculator.html', **locals()) + + @heritability_bp.route('/heritability/submit', methods=["POST"]) +@jwt_required() def submit_h2(): - """ - This endpoint is used to submit a heritability job. - The endpoint request is executed as a background task to keep the job alive. - """ - data = request.get_json() - data = [x for x in data[1:] if x[0] is not None] - header = ["AssayNumber", "Strain", "TraitName", "Replicate", "Value"] - data = pd.DataFrame(data, columns=header) - data = data.to_csv(index=False, sep="\t") - - # Generate an ID for the data based on its hash - data_hash = hash_it(data, length=32) - logger.debug(data_hash) - - # Upload data immediately. - data_blob = f"reports/heritability/{data_hash}/data.tsv" - upload_file(data_blob, data, as_string=True) - - thread = Thread(target=h2_task, args=(data, data_hash,)) - thread.daemon = True - thread.start() - return jsonify({'thread_name': str(thread.name), + """ + This endpoint is used to submit a heritability job. + The endpoint request is executed as a background task to keep the job alive. + """ + user = get_current_user() + label = request.values['label'] + + # Process data into tsv + data = json.loads(request.values['table_data']) + data = [x for x in data[1:] if x[0] is not None] + header = ["AssayNumber", "Strain", "TraitName", "Replicate", "Value"] + data = pd.DataFrame(data, columns=header) + trait = data.values[0][2] + data = data.to_csv(index=False, sep="\t") + + # Generate an ID for the data based on its hash + data_hash = hash_it(data, length=32) + logger.debug(data_hash) + + # Store the report info for user in datastore + id = unique_id() + hr = h2calc_ds(id) + hr.label = label + hr.data_hash = data_hash + hr.username = user.name + hr.status = 'NEW' + hr.trait = trait + hr.save() + + # Check whether analysis has previously been run and if so - skip + result = check_blob(f"reports/heritability/{data_hash}/result.tsv") + if result: + hr.status = 'COMPLETE' + hr.save() + return jsonify({'thread_name': 'done', 'started': True, - 'data_hash': data_hash}) - - -@heritability_bp.route('/heritability', methods=["POST"]) -def check_data(): - """ - This check is used to report on the: - - Minimum - Maximum - Quartiles: 25, 50, 75 - Variance - - using an AJAX request - it appears at the bottom - before the user submits. - """ - data = request.get_json() - data = [x for x in data[1:] if x[0] is not None] - header = ["AssayNumber", "Strain", "TraitName", "Replicate", "Value"] - data = pd.DataFrame(data, columns=header) - - # filter missing - data = data[data.Value.apply(lambda x: x not in [None, "", "NA"])] - - # Convert to list - data = data.Value.astype(float).tolist() - - result = {} - result["variance"] = "{:.2f}".format(st.variance(data)) - result["sd"] = "{:.2f}".format(st.stdev(data)) - result["minimum"] = "{:.2f}".format(min(data)) - result["maximum"] = "{:.2f}".format(max(data)) - # Calculate quartiles - All_quartiles = np.percentile(data, [25, 50, 75]) - result["25"] = "{:.2f}".format(All_quartiles[0]) - result["50"] = "{:.2f}".format(All_quartiles[1]) - result["75"] = "{:.2f}".format(All_quartiles[2]) - return result - - -@heritability_bp.route("/heritability/h2/") -def heritability_result(data_hash): - title = "Heritability Results" - data = check_blob(f"reports/heritability/{data_hash}/data.tsv") - result = check_blob(f"reports/heritability/{data_hash}/result.tsv") - ready = False - - if data is None: - return abort(404, description="Heritability report not found") - data = data.download_as_string().decode('utf-8') - data = pd.read_csv(io.StringIO(data), sep="\t") - data['AssayNumber'] = data['AssayNumber'].astype(str) - data['label'] = data.apply(lambda x: f"{x['AssayNumber']}: {x['Value']}", 1) - data = data.to_dict('records') - trait = data[0]['TraitName'] - # Get trait and set title - title = f"Heritability Results: {trait}" - - if result: - result = result.download_as_string().decode('utf-8') - result = pd.read_csv(io.StringIO(result), sep="\t") - result = result.to_dict('records')[0] - - fnam=datetime.today().strftime('%Y%m%d.')+trait - ready = True - - return render_template("tools/heritability_results.html", **locals()) + 'data_hash': data_hash, + 'id': id}) + + # Upload data immediately. + data_blob = f"reports/heritability/{data_hash}/data.tsv" + upload_file(data_blob, data, as_string=True) + hr.status = 'RECEIVED' + hr.save() + + # Schedule the task + create_h2_task(data_hash, id, hr.kind) + return jsonify({'started': True, + 'data_hash': data_hash, + 'id': id}) + + +@heritability_bp.route("/heritability/h2/") +@jwt_required() +def heritability_result(id): + title = "Heritability Results" + alt_parent_breadcrumb = {"title": "Tools", "url": url_for('tools.tools')} + user = get_current_user() + hr = h2calc_ds(id) + ready = False + + if (not hr._exists) or (hr.username != user.name): + flash('You do not have access to that report', 'danger') + abort(401) + + data_hash = hr.data_hash + data = check_blob(f"reports/heritability/{data_hash}/data.tsv") + result = check_blob(f"reports/heritability/{data_hash}/result.tsv") + + if data is None: + hr.status = 'NOT FOUND' + hr.save() + return abort(404, description="Heritability report not found") + data = data.download_as_string().decode('utf-8') + data = pd.read_csv(io.StringIO(data), sep="\t") + data['AssayNumber'] = data['AssayNumber'].astype(str) + data['label'] = data.apply(lambda x: f"{x['AssayNumber']}: {x['Value']}", 1) + data = data.to_dict('records') + trait = data[0]['TraitName'] + # Get trait and set title + subtitle = trait + + if result: + hr.status = 'COMPLETE' + hr.save() + result = result.download_as_string().decode('utf-8') + result = pd.read_csv(io.StringIO(result), sep="\t") + result = result.to_dict('records')[0] + + fnam=datetime.today().strftime('%Y%m%d.')+trait + ready = True + + return render_template("tools/heritability_results.html", **locals()) + + +@heritability_bp.route("/heritability/h2/all") +@jwt_required() +def heritability_result_list(): + title = "Heritability Results" + alt_parent_breadcrumb = {"title": "Tools", "url": url_for('tools.tools')} + user = get_current_user() + items = h2calc_ds().query_by_username(user.name) + items = sorted(items, key=lambda x: x['created_on'], reverse=True) + for x in items: + data_hash = x['data_hash'] + if check_blob(f"reports/heritability/{data_hash}/result.tsv"): + x.status = 'COMPLETE' + return render_template('tools/h2_result_list.html', **locals()) diff --git a/base/views/tools/heritability_run/.envrc b/base/views/tools/heritability_run/.envrc deleted file mode 100644 index b15726f9..00000000 --- a/base/views/tools/heritability_run/.envrc +++ /dev/null @@ -1,2 +0,0 @@ -export GOOGLE_APPLICATION_CREDENTIALS=../../../../env_config/client-secret.json - diff --git a/base/views/tools/heritability_run/.gcloudignore b/base/views/tools/heritability_run/.gcloudignore deleted file mode 100644 index 552c4088..00000000 --- a/base/views/tools/heritability_run/.gcloudignore +++ /dev/null @@ -1,3 +0,0 @@ -*.tsv -*.csv -server \ No newline at end of file diff --git a/base/views/tools/heritability_run/Dockerfile b/base/views/tools/heritability_run/Dockerfile deleted file mode 100644 index aff165f2..00000000 --- a/base/views/tools/heritability_run/Dockerfile +++ /dev/null @@ -1,35 +0,0 @@ -FROM continuumio/miniconda3 -RUN apt-get update && apt-get install -y procps && \ - apt-get clean -RUN conda config --add channels defaults && \ - conda config --add channels bioconda && \ - conda config --add channels conda-forge -RUN conda create -n heritability \ - conda-forge::go=1.13.15 \ - r=3.6.0 \ - r-lme4 \ - r-dplyr \ - r-boot \ - r-data.table \ - r-futile.logger \ - && conda clean -a -ENV PATH /opt/conda/envs/heritability/bin:$PATH -LABEL Name="heritability" Author="Daniel Cook" - -# Create and change to the app directory. -WORKDIR /app - -# Copy local code to the container image. -COPY invoke.go ./ - -# Install google cloud storage -RUN CGO_ENABLED=0 GOOS=linux go get cloud.google.com/go/storage - -# Build the binary. -# -mod=readonly ensures immutable go.mod and go.sum in container builds. -RUN CGO_ENABLED=0 GOOS=linux go build -v -o server - -COPY ./H2_script.R ./ - -# Run the web service on container startup. -CMD ["/app/server"] \ No newline at end of file diff --git a/base/views/tools/heritability_run/H2_script.R b/base/views/tools/heritability_run/H2_script.R deleted file mode 100644 index 3e37e1d0..00000000 --- a/base/views/tools/heritability_run/H2_script.R +++ /dev/null @@ -1,104 +0,0 @@ -#!/usr/bin/env Rscript H2_script.R -library(boot) -library(lme4) -library(dplyr) -library(futile.logger) - -######################## -### define functions ### -######################## -# Heritability -# data is data frame that contains strain and Value column -# indicies are used by the boot function to sample from the 'data' data.frame -H2.test.boot <- function(data, indicies){ - - d <- data[indicies,] - - pheno <- as.data.frame(dplyr::select(d, Value))[,1] - - Strain <- as.factor(d$Strain) - - reffMod <- lme4::lmer(pheno ~ 1 + (1|Strain)) - - Variances <- as.data.frame(lme4::VarCorr(reffMod, comp = "Variance")) - - Vg <- Variances$vcov[1] - Ve <- Variances$vcov[2] - H2 <- Vg/(Vg+Ve) - - # errors <- sqrt(diag(lme4::VarCorr(reffMod, comp = "Variance")$strain)) - - return(H2) -} - -# data is data frame that contains strain and Value column -H2.test <- function(data){ - - pheno <- as.data.frame(dplyr::select(data, Value))[,1] - Strain <- as.factor(data$Strain) - - reffMod <- lme4::lmer(pheno ~ 1 + (1|Strain)) - - Variances <- as.data.frame(lme4::VarCorr(reffMod, comp = "Variance")) - - Vg <- Variances$vcov[1] - Ve <- Variances$vcov[2] - H2 <- Vg/(Vg+Ve) - - # errors <- sqrt(diag(lme4::VarCorr(reffMod, comp = "Variance")$strain)) - - return(H2) -} - -# df is data frame that contains strain and Value column -H2.calc <- function(data, boot = TRUE){ - df <- dplyr::select(data, Strain, Value) - - flog.info("Running bootstrapping") - if(boot == TRUE){ - # bootstrapping with 10000 replications - # can reduce value to save time (500 is reasonable most of the time). - # if you Error in bca.ci(boot.out, conf, index[1L], L = L, t = t.o, t0 = t0.o, : estimated adjustment 'a' is NA, then you need to increase R value. - results <- boot(data=df, statistic=H2.test.boot, R=10000) - - # get 95% confidence interval - ci <- boot.ci(results, type="bca") - - H2_errors <- data.frame(H2 = ci$t0, - ci_l = ci$bca[4], - ci_r = ci$bca[5]) - - return(H2_errors) - - } else { - H2 <- data.frame(H2 = H2.test(data = df), ci_l = NA, ci_r = NA) - return(H2) - } - -} - -# RUN -args = commandArgs(trailingOnly=TRUE) -usage_cmd = "USAGE: Rscprit H2_script.R input_file output_file" -if (length(args) < 2){ - print(usage_cmd) -} - -# Read in data -input_data = args[1] -output_fname = args[2] -hash = args[3] -heritability_version = args[4] -data <- data.table::fread(input_data) -hash <- readLines(hash) - -# Run H2 calculation -result <- H2.calc(data, boot = T) - -result$hash <- hash -result$trait_name <- data$traitName[[0]] -result$date <- Sys.time() -result$heritability_version <- Sys.time() - -# Write the result -data.table::fwrite(result, output_fname, sep = '\t') diff --git a/base/views/tools/heritability_run/README.md b/base/views/tools/heritability_run/README.md deleted file mode 100644 index 0cafb5af..00000000 --- a/base/views/tools/heritability_run/README.md +++ /dev/null @@ -1,8 +0,0 @@ -This directory contains the code to start the Google Cloud Run Microservice for the Heritability Tool - -Build using: - -```bash -gcloud builds submit --tag gcr.io/andersen-lab/h2 --timeout=3h -gcloud run deploy --image gcr.io/andersen-lab/h2 --platform managed h2 -``` \ No newline at end of file diff --git a/base/views/tools/heritability_run/invoke.go b/base/views/tools/heritability_run/invoke.go deleted file mode 100644 index aa278445..00000000 --- a/base/views/tools/heritability_run/invoke.go +++ /dev/null @@ -1,108 +0,0 @@ -package main - -// VERSION v1 - -import ( - "context" - "encoding/json" - "fmt" - "io" - "log" - "net/http" - "os" - "os/exec" - - "cloud.google.com/go/storage" -) - -const heritabilityVersion = "v1" -const datasetName = "data.tsv" -const resultName = "result.tsv" - -func check(e error) { - if e != nil { - panic(e) - } -} - -func copyBlob(bucket string, source string, blob string) { - log.Printf("%s --> %s", source, blob) - ctx := context.Background() - client, err := storage.NewClient(ctx) - check(err) - - // source - f, err := os.Open(source) - if err != nil { - log.Fatal(err) - } - defer f.Close() - - wc := client.Bucket(bucket).Object(blob).NewWriter(ctx) - _, err = io.Copy(wc, f) - check(err) - err = wc.Close() - check(err) -} - -func fileExists(filename string) bool { - info, err := os.Stat(filename) - if os.IsNotExist(err) { - return false - } - return !info.IsDir() -} - -func runTask(dataHash string) { - // Run the heritability analysis. This is used to run it in the background. - // Execute R script - cmd := exec.Command("Rscript", "H2_script.R", datasetName, resultName, "hash.txt", heritabilityVersion) - cmd.Stderr = os.Stderr - _, err := cmd.Output() - check(err) - - // Copy results to google storage. - resultBlob := fmt.Sprintf("reports/heritability/%s/%s", dataHash, resultName) - copyBlob("elegansvariation.org", resultName, resultBlob) -} - -func runH2(w http.ResponseWriter, r *http.Request) { - /* - Runs the herritability analysis - */ - - // Get POST data and save as CSV - data := r.FormValue("data") - dataHash := r.FormValue("hash") - log.Printf("hash: %s", dataHash) - f, err := os.Create(datasetName) - check(err) - f.WriteString(data) - f.Close() - - // Write the hash - h, err := os.Create("hash.txt") - check(err) - h.WriteString(dataHash) - f.Close() - - runTask(dataHash) - - if err := json.NewEncoder(w).Encode("submitted h2"); err != nil { - log.Printf("Error sending response: %v", err) - } - -} - -func main() { - - http.HandleFunc("/", runH2) - - port := os.Getenv("PORT") - if port == "" { - port = "8080" - } - - log.Printf("listening on %s", port) - log.Fatal(http.ListenAndServe(fmt.Sprintf(":%s", port), nil)) -} diff --git a/base/views/tools/indel_primer.py b/base/views/tools/indel_primer.py index ae978221..28af50f8 100644 --- a/base/views/tools/indel_primer.py +++ b/base/views/tools/indel_primer.py @@ -8,6 +8,8 @@ import pandas as pd from cyvcf2 import VCF from flask import (Blueprint, + flash, + url_for, jsonify, render_template, request, @@ -17,12 +19,14 @@ from logzero import logger from wtforms import IntegerField, SelectField from wtforms.validators import Required, ValidationError +from threading import Thread +from base.constants import CHROM_NUMERIC, GOOGLE_CLOUD_BUCKET from base.config import config -from base.utils.gcloud import check_blob, upload_file -from base.utils.data_utils import hash_it -from base.constants import CHROM_NUMERIC -from threading import Thread +from base.models import ip_calc_ds +from base.utils.gcloud import add_task, check_blob, upload_file +from base.utils.data_utils import hash_it, unique_id +from base.utils.jwt_utils import jwt_required, get_jwt, get_current_user # Tools blueprint indel_primer_bp = Blueprint('indel_primer', @@ -39,8 +43,9 @@ # Initial load of strain list from sv_data # This is run when the server is started. # NOTE: Tabix cannot make requests over https! -SV_BED_URL = "http://storage.googleapis.com/elegansvariation.org/tools/pairwise_indel_primer/sv.20200815.bed.gz" -SV_VCF_URL = "https://storage.googleapis.com/elegansvariation.org/tools/pairwise_indel_primer/sv.20200815.vcf.gz" +SV_BED_URL = f"http://storage.googleapis.com/{GOOGLE_CLOUD_BUCKET}/tools/pairwise_indel_primer/sv.20200815.bed.gz" +SV_VCF_URL = f"http://storage.googleapis.com/{GOOGLE_CLOUD_BUCKET}/tools/pairwise_indel_primer/sv.20200815.vcf.gz" + SV_STRAINS = VCF(SV_VCF_URL).samples SV_COLUMNS = ["CHROM", "START", @@ -100,17 +105,32 @@ class pairwise_indel_form(Form): stop = FlexIntegerField('Stop', default="2,039,217", validators=[Required()]) + +@indel_primer_bp.route("/pairwise_indel_finder/ip/all") +@jwt_required() +def indel_primer_result_list(): + title = "Indel Primer Results" + alt_parent_breadcrumb = {"title": "Tools", "url": url_for('tools.tools')} + user = get_current_user() + items = ip_calc_ds().query_by_username(user.name) + items = sorted(items, key=lambda x: x['created_on'], reverse=True) + return render_template('tools/ip_result_list.html', **locals()) + + @indel_primer_bp.route('/pairwise_indel_finder', methods=['GET']) +@jwt_required() def indel_primer(): """ Main view """ form = pairwise_indel_form(request.form) - VARS = {"title": "Pairwise Indel Finder", - "strains": SV_STRAINS, - "chroms": CHROM_NUMERIC.keys(), - "form": form} - return render_template('tools/indel_primer.html', **VARS) + title = "Pairwise Indel Finder" + strains = SV_STRAINS, + chroms = CHROM_NUMERIC.keys(), + fluid_container = True + alt_parent_breadcrumb = {"title": "Tools", "url": url_for('tools.tools')} + + return render_template('tools/indel_primer.html', **locals()) def overlaps(s1, e1, s2, e2): @@ -118,6 +138,7 @@ def overlaps(s1, e1, s2, e2): @indel_primer_bp.route("/pairwise_indel_finder/query_indels", methods=["POST"]) +@jwt_required() def pairwise_indel_finder_query(): form = pairwise_indel_form() if form.validate_on_submit(): @@ -154,61 +175,88 @@ def pairwise_indel_finder_query(): return jsonify(results=[]) return jsonify({"errors": form.errors}) - -def indel_primer_task(data_hash, site, strain1, strain2, vcf_url): - """ - This is designed to be run in the background on the server. - It will run a an indel_primer request using google cloud run - """ - # Perform indel_primer_request - logger.info("Submitting Primer Task") - result = requests.post(config['INDEL_PRIMER_URL'], data={'hash': data_hash, - 'site': site, - 'strain1': strain1, - 'strain2': strain2, - 'vcf_url': vcf_url.replace("https", "http")}) - logger.debug(result) +def create_ip_task(id, data_hash, site, strain1, strain2, vcf_url): + """ + This is designed to be run in the background on the server. + It will run a heritability analysis on google cloud run + """ + logger.debug("Submitting Indel Primer Job") + ip = ip_calc_ds(id) + + # Perform ip request + queue = config['INDEL_PRIMER_TASK_QUEUE'] + url = config['INDEL_PRIMER_URL'] + data = { 'hash': data_hash, + 'site': site, + 'strain1': strain1, + 'strain2': strain2, + 'vcf_url': vcf_url.replace("https", "http"), + 'ds_id': id, + 'ds_kind': ip.kind } + + result = add_task(queue, url, data, task_name=data_hash) + + # Update report status + ip.status = 'SCHEDULED' if result else 'FAILED' + ip.save() @indel_primer_bp.route('/pairwise_indel_finder/submit', methods=["POST"]) +@jwt_required() def submit_indel_primer(): """ This endpoint is used to submit an indel primer job. The endpoint request is executed as a background task to keep the job alive. """ data = request.get_json() + user = get_current_user() + id = unique_id() + ip = ip_calc_ds(id) + ip.username = user.name # Generate an ID for the data based on its hash data_hash = hash_it(data, length=32) data['date'] = str(arrow.utcnow()) + ip.data_hash = data_hash + ip.site = data.get('site') + ip.strain1 = data.get('strain_1') + ip.strain2 = data.get('strain_2') + ip.save() # Check whether analysis has previously been run and if so - skip result = check_blob(f"reports/indel_primer/{data_hash}/results.tsv") if result: - return jsonify({'thread_name': 'done', - 'started': True, - 'data_hash': data_hash}) + ip.status = 'COMPLETE' + ip.save() + return jsonify({'thread_name': 'done', + 'started': True, + 'data_hash': data_hash, + 'id': id}) - logger.debug("Submitting Indel Primer Job") # Upload query information data_blob = f"reports/indel_primer/{data_hash}/input.json" upload_file(data_blob, json.dumps(data), as_string=True) + create_ip_task(id=id, data_hash=data_hash, site=data.get('site'), strain1=data.get('strain_1'), strain2=data.get('strain_2'), vcf_url=SV_VCF_URL) + + return jsonify({'started': True, + 'data_hash': data_hash, + 'id': id }) + + +@indel_primer_bp.route("/indel_primer/result/") +@indel_primer_bp.route("/indel_primer/result//tsv/") +@jwt_required() +def pairwise_indel_query_results(id, filename = None): + alt_parent_breadcrumb = {"title": "Tools", "url": url_for('tools.tools')} + user = get_current_user() + ip = ip_calc_ds(id) + + if (not ip._exists) or (ip.username != user.name): + flash('You do not have access to that report', 'danger') + abort(401) + + data_hash = ip.data_hash - thread = Thread(target=indel_primer_task, args=(data_hash, - data['site'], - data['strain_1'], - data['strain_2'], - SV_VCF_URL)) - thread.daemon = True - thread.start() - return jsonify({'thread_name': str(thread.name), - 'started': True, - 'data_hash': data_hash}) - - -@indel_primer_bp.route("/indel_primer/result/") -@indel_primer_bp.route("/indel_primer/result//tsv/") -def pairwise_indel_query_results(data_hash, filename = None): title = "Indel Primer Results" data = check_blob(f"reports/indel_primer/{data_hash}/input.json") result = check_blob(f"reports/indel_primer/{data_hash}/results.tsv") @@ -216,7 +264,6 @@ def pairwise_indel_query_results(data_hash, filename = None): if data is None: return abort(404, description="Indel primer report not found") - logger.debug(data.download_as_string().decode("utf-8")) data = json.loads(data.download_as_string().decode('utf-8')) logger.info(data) # Get trait and set title @@ -235,6 +282,9 @@ def pairwise_indel_query_results(data_hash, filename = None): # Check for no results empty = True if len(result) == 0 else False ready = True + ip.status = 'COMPLETE' + ip.empty = empty + ip.save() if empty is False: # left primer result['left_primer_start'] = result.amplicon_region.apply(lambda x: x.split(":")[1].split("-")[0]).astype(int) diff --git a/base/views/tools/indel_primer_run/Dockerfile b/base/views/tools/indel_primer_run/Dockerfile deleted file mode 100644 index 8c15e3ff..00000000 --- a/base/views/tools/indel_primer_run/Dockerfile +++ /dev/null @@ -1,56 +0,0 @@ -FROM continuumio/miniconda3 -RUN apt-get update && apt-get install -y procps && \ - apt-get clean -RUN conda config --add channels defaults && \ - conda config --add channels bioconda && \ - conda config --add channels conda-forge -RUN conda create -n vcf-kit \ - conda-forge::go=1.13.15 \ - python=3.7 \ - pytest \ - pytest-cov \ - coveralls \ - biopython \ - logzero \ - curl \ - "matplotlib>=1.3.1" \ - "scipy>=0.13.3" \ - "numpy>=1.8.0" \ - "cython>=0.20.1" \ - "cyvcf2>=0.6.5" \ - "intervaltree>=2.1.0" \ - "networkx>=1.11" \ - "bwa>=0.7.12" \ - "samtools>=1.10" \ - "bcftools>=1.10" \ - "blast>=2.2.31" \ - "muscle>=3.8.31" \ - "primer3>=2.5.0" - -ENV PATH /opt/conda/envs/vcf-kit/bin:$PATH - -LABEL Name="vcf-kit" Author="Daniel Cook" - -# Now install VCF-Kit 0.2.8 -RUN git clone https://www.github.com/andersenlab/vcf-kit && \ - cd vcf-kit && \ - git checkout 25c7c032628ffd02f4c474758dec7402979c2b12 && \ - python setup.py install - -RUN vk genome wormbase --ref=WS276 - -# Create and change to the app directory. -WORKDIR /app - -# Copy local code to the container image. -COPY indel_primer_invoke.go ./ - -# Install google cloud storage -RUN CGO_ENABLED=0 GOOS=linux go get cloud.google.com/go/storage - -# Build the binary. -# -mod=readonly ensures immutable go.mod and go.sum in container builds. . -RUN CGO_ENABLED=0 GOOS=linux go build -v -o server - -# Run the web service on container startup. -CMD ["/app/server"] diff --git a/base/views/tools/indel_primer_run/README.md b/base/views/tools/indel_primer_run/README.md deleted file mode 100644 index 28f9e727..00000000 --- a/base/views/tools/indel_primer_run/README.md +++ /dev/null @@ -1,8 +0,0 @@ -This directory contains the code to start the Google Cloud Run Microservice for the Indel Primer Tool - -Build using: - -```bash -gcloud builds submit --tag gcr.io/andersen-lab/indel_primer --timeout=3h -gcloud run deploy --image gcr.io/andersen-lab/indel_primer --platform managed indel-primer -``` diff --git a/base/views/tools/indel_primer_run/indel_primer_invoke.go b/base/views/tools/indel_primer_run/indel_primer_invoke.go deleted file mode 100644 index 94126250..00000000 --- a/base/views/tools/indel_primer_run/indel_primer_invoke.go +++ /dev/null @@ -1,142 +0,0 @@ -package main - -// VERSION v1 - -import ( - "context" - "encoding/json" - "fmt" - "io" - "io/ioutil" - "log" - "net/http" - "os" - "os/exec" - "path" - - "cloud.google.com/go/storage" -) - -const resultName = "results.tsv" - -func check(e error) { - if e != nil { - log.Fatal(e) - } -} - -func downloadFile(filepath string, url string) error { - - // Get the data - resp, err := http.Get(url) - if err != nil { - return err - } - defer resp.Body.Close() - - // Create the file - out, err := os.Create(filepath) - if err != nil { - return err - } - defer out.Close() - - // Write the body to file - _, err = io.Copy(out, resp.Body) - return err -} - -func copyBlob(bucket string, source string, blob string) { - log.Printf("%s --> %s", source, blob) - ctx := context.Background() - client, err := storage.NewClient(ctx) - check(err) - - // source - f, err := os.Open(source) - if err != nil { - log.Fatal(err) - } - defer f.Close() - - wc := client.Bucket(bucket).Object(blob).NewWriter(ctx) - _, err = io.Copy(wc, f) - check(err) - err = wc.Close() - check(err) -} - -func fileExists(filename string) bool { - info, err := os.Stat(filename) - if os.IsNotExist(err) { - return false - } - return !info.IsDir() -} - -func runIndelPrimer(w http.ResponseWriter, r *http.Request) { - /* - Runs the herritability analysis - */ - - // Run the heritability analysis. This is used to run it in the background. - // Execute R script - - site := r.FormValue("site") - dataHash := r.FormValue("hash") - strain1 := r.FormValue("strain1") - strain2 := r.FormValue("strain2") - vcfURL := r.FormValue("vcf_url") - - // Download Index - indexURL := fmt.Sprintf("%s.csi", vcfURL) - downloadFile(path.Base(indexURL), indexURL) - - log.Printf("vk primer indel --region %v --nprimers 10 --ref=WS276 --samples=%v %v", site, fmt.Sprintf("%s,%s", strain1, strain2), vcfURL) - cmd := exec.Command("conda", - "run", - "-n", - "vcf-kit", - "vk", - "primer", - "indel", - "--region", - site, - "--nprimers", - "10", - "--polymorphic", - "--ref", - "WS276", - "--samples", - fmt.Sprintf("%s,%s", strain1, strain2), - vcfURL) - - cmd.Stderr = os.Stderr - out, err := cmd.Output() - check(err) - fmt.Println(err) - - // write file to output - ioutil.WriteFile(resultName, out, 0755) - - resultBlob := fmt.Sprintf("reports/indel_primer/%s/%s", dataHash, resultName) - copyBlob("elegansvariation.org", resultName, resultBlob) - - if err := json.NewEncoder(w).Encode("submitted indel primer"); err != nil { - log.Printf("Error sending response: %v", err) - } - -} - -func main() { - - http.HandleFunc("/", runIndelPrimer) - - port := os.Getenv("PORT") - if port == "" { - port = "8080" - } - - log.Printf("listening on %s", port) - log.Fatal(http.ListenAndServe(fmt.Sprintf(":%s", port), nil)) -} diff --git a/base/views/tools/indel_primer_run/primer.py b/base/views/tools/indel_primer_run/primer.py deleted file mode 100644 index ea2001c3..00000000 --- a/base/views/tools/indel_primer_run/primer.py +++ /dev/null @@ -1,212 +0,0 @@ -#! /usr/bin/env python -""" -usage: - vk primer template [options] - vk primer sanger [options] - vk primer snip [options] - vk primer indel [options] - -options: - -h --help Show this screen. - --version Show version. - --ref= Reference Genome - --region= Restrict to region. - --samples= Output genotypes for a sample or set of samples. [default: ALL] - --template=