Skip to content
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions analysis_runner/cli_analysisrunner.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@
logger,
)

SUPPORTED_CLOUD_ENVIRONMENTS = {'gcp', 'azure'}
DEFAULT_CLOUD_ENVIRONMENT = 'gcp'

def add_analysis_runner_args(parser=None) -> argparse.ArgumentParser:
"""
Expand All @@ -35,6 +37,15 @@ def add_analysis_runner_args(parser=None) -> argparse.ArgumentParser:

add_general_args(parser)

parser.add_argument(
'-c',
'--cloud',
required=False,
default=DEFAULT_CLOUD_ENVIRONMENT,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we omit instead of provide a default? Which lets the analysis-runner decide the default

choices=SUPPORTED_CLOUD_ENVIRONMENTS,
help=f'Backend cloud environment to use. Supported options are ({", ".join(SUPPORTED_CLOUD_ENVIRONMENTS)})',
)

parser.add_argument(
'--image',
help=(
Expand Down Expand Up @@ -104,6 +115,7 @@ def run_analysis_runner( # pylint: disable=too-many-arguments
commit=None,
repository=None,
cwd=None,
cloud=DEFAULT_CLOUD_ENVIRONMENT,
image=None,
cpu=None,
memory=None,
Expand Down Expand Up @@ -216,6 +228,7 @@ def run_analysis_runner( # pylint: disable=too-many-arguments
'script': _script,
'description': description,
'cwd': _cwd,
'cloud': cloud,
'image': image,
'cpu': cpu,
'memory': memory,
Expand Down
4 changes: 3 additions & 1 deletion server/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ EXPOSE $PORT
COPY main.py cromwell.py util.py ./

# Prepare the Hail deploy config to point to the CPG domain.
COPY deploy-config.json /deploy-config/deploy-config.json
ENV HAIL_DEPLOY_CONFIG_FILE /deploy-config/deploy-config-gcp.json
COPY deploy-config-gcp.json /deploy-config/deploy-config-gcp.json
COPY deploy-config-azure.json /deploy-config/deploy-config-azure.json

CMD gunicorn --bind :$PORT --worker-class aiohttp.GunicornWebWorker main:init_func
5 changes: 5 additions & 0 deletions server/deploy-config-azure.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"location": "external",
"default_namespace": "default",
"domain": "azhail.populationgenomics.org.au"
}
5 changes: 5 additions & 0 deletions server/deploy-config-gcp.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"location": "external",
"default_namespace": "default",
"domain": "hail.populationgenomics.org.au"
}
1 change: 0 additions & 1 deletion server/deploy-config.json

This file was deleted.

21 changes: 12 additions & 9 deletions server/main.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
"""The analysis-runner server, running Hail Batch pipelines on users' behalf."""
# pylint: disable=wrong-import-order
import datetime
import os
import json
import datetime
import logging
import traceback
from shlex import quote
Expand All @@ -15,6 +16,9 @@
from util import (
DRIVER_IMAGE,
PUBSUB_TOPIC,
SUPPORTED_CLOUD_ENVIRONMENTS,
DEFAULT_CLOUD_ENVIRONMENT,
DEPLOY_CONFIG_PATHS,
_get_hail_version,
check_allowed_repos,
check_dataset_and_group,
Expand All @@ -41,9 +45,6 @@

routes = web.RouteTableDef()

SUPPORTED_CLOUD_ENVIRONMENTS = {'gcp'}


# pylint: disable=too-many-statements
@routes.post('/')
async def index(request):
Expand All @@ -56,11 +57,14 @@ async def index(request):

output_prefix = validate_output_dir(params['output'])
dataset = params['dataset']
cloud_environment = params.get('cloud_environment', 'gcp')
cloud_environment = params.get('cloud', DEFAULT_CLOUD_ENVIRONMENT)
if cloud_environment not in SUPPORTED_CLOUD_ENVIRONMENTS:
raise web.HTTPBadRequest(
reason=f'analysis-runner does not yet support the {cloud_environment} environment'
)

# Set hail backend to the correct one based on the cloud environment
os.environ['HAIL_DEPLOY_CONFIG_FILE'] = DEPLOY_CONFIG_PATHS.get(cloud_environment)

dataset_config = check_dataset_and_group(
server_config=get_server_config(),
Expand Down Expand Up @@ -217,7 +221,7 @@ async def config(request):

output_prefix = validate_output_dir(params['output'])
dataset = params['dataset']
cloud_environment = params.get('cloud_environment', 'gcp')
cloud_environment = params.get('cloud', DEFAULT_CLOUD_ENVIRONMENT)
if cloud_environment not in SUPPORTED_CLOUD_ENVIRONMENTS:
raise web.HTTPBadRequest(
reason=f'analysis-runner config does not yet support the {cloud_environment} environment'
Expand All @@ -229,7 +233,6 @@ async def config(request):
dataset=dataset,
email=email,
)
environment_config = dataset_config.get(cloud_environment)

image = params.get('image') or DRIVER_IMAGE
access_level = params['accessLevel']
Expand All @@ -241,8 +244,8 @@ async def config(request):
# Prepare the job's configuration to return

run_config = get_baseline_run_config(
environment=cloud_environment,
gcp_project_id=environment_config.get('projectId'),
environment='gcp',
gcp_project_id=dataset_config.get('gcp', {}).get('projectId'),
dataset=dataset,
access_level=access_level,
output_prefix=output_prefix,
Expand Down
45 changes: 34 additions & 11 deletions server/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,18 @@
MEMBERS_CACHE_LOCATION = os.getenv('MEMBERS_CACHE_LOCATION')
assert MEMBERS_CACHE_LOCATION

CONFIG_PATH_PREFIXES = {'gcp': 'gs://cpg-config'}
SUPPORTED_CLOUD_ENVIRONMENTS = {'gcp', 'azure'}
DEFAULT_CLOUD_ENVIRONMENT = 'gcp'

DEPLOY_CONFIG_PATHS = {
'gcp': '/deploy-config/deploy-config-gcp.json',
'azure': '/deploy-config/deploy-config-azure.json'
}

CONFIG_PATH_PREFIXES = {
'gcp': 'gs://cpg-config',
'azure': 'az://cpgcommon/cpg-config'
}

secret_manager = secretmanager.SecretManagerServiceClient()
publisher = pubsub_v1.PublisherClient()
Expand All @@ -46,7 +57,7 @@ def get_server_config() -> dict:

async def _get_hail_version(environment: str) -> str:
"""ASYNC get hail version for the hail server in the local deploy_config"""
if not environment == 'gcp':
if environment not in SUPPORTED_CLOUD_ENVIRONMENTS:
raise web.HTTPBadRequest(
reason=f'Unsupported Hail Batch deploy config environment: {environment}'
)
Expand Down Expand Up @@ -110,13 +121,22 @@ def check_dataset_and_group(server_config, environment: str, dataset, email) ->
reason=f'Dataset {dataset} does not support the {environment} environment'
)

# do this to check access-members cache
gcp_project = dataset_config.get('gcp', {}).get('projectId')
if environment == 'gcp':
# do this to check access-members cache
gcp_project = dataset_config.get('gcp', {}).get('projectId')

if not gcp_project:
raise web.HTTPBadRequest(
reason=f'The analysis-runner does not support checking group members for the {environment} environment'
)
elif environment == 'azure':
azure_resource_group = dataset_config.get('azure', {}).get('resourceGroup')

if not azure_resource_group:
raise web.HTTPBadRequest(
reason=f'The analysis-runner does not support checking group members for the {environment} environment'
)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can remove this, group member checks are not in secrets, therefore no gcp_project ID is needed anymore (I think):

Suggested change
if environment == 'gcp':
# do this to check access-members cache
gcp_project = dataset_config.get('gcp', {}).get('projectId')
if not gcp_project:
raise web.HTTPBadRequest(
reason=f'The analysis-runner does not support checking group members for the {environment} environment'
)
elif environment == 'azure':
azure_resource_group = dataset_config.get('azure', {}).get('resourceGroup')
if not azure_resource_group:
raise web.HTTPBadRequest(
reason=f'The analysis-runner does not support checking group members for the {environment} environment'
)


if not gcp_project:
raise web.HTTPBadRequest(
reason=f'The analysis-runner does not support checking group members for the {environment} environment'
)
if not is_member_in_cached_group(
f'{dataset}-analysis', email, members_cache_location=MEMBERS_CACHE_LOCATION
):
Expand Down Expand Up @@ -148,7 +168,12 @@ def get_analysis_runner_metadata(
Get well-formed analysis-runner metadata, requiring the core listed keys
with some flexibility to provide your own keys (as **kwargs)
"""
output_dir = f'gs://cpg-{dataset}-{cpg_namespace(access_level)}/{output_prefix}'
if environment == 'gcp':
output_dir = f'gs://cpg-{dataset}-{cpg_namespace(access_level)}/{output_prefix}'
elif environment == 'azure':
# TODO: need a way for analysis runner to know where to save metadata
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It follows the same sort of convention right, where storage-account is cpg{datasetWithoutTabs}

azure://{storage-account}/{main,test}/{output_prefix}

prefix = CONFIG_PATH_PREFIXES.get(environment)
output_dir = AnyPath(prefix) /

return {
'timestamp': timestamp,
Expand Down Expand Up @@ -228,8 +253,6 @@ def get_baseline_run_config(
baseline_config = {
'hail': {
'billing_project': dataset,
# TODO: how would this work for Azure
'bucket': f'cpg-{dataset}-hail',
},
'workflow': {
'access_level': access_level,
Expand Down
42 changes: 42 additions & 0 deletions test/hail_batch_job.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#!/usr/bin/env python3
"""
Simple script to test whether the CPG infrastructure and permissions are
configured appropriately to permit running AIP.
"""

import click

from cpg_utils.config import get_config
from cpg_utils.hail_batch import remote_tmpdir
import hailtop.batch as hb


@click.command()
Copy link
Contributor

@illusional illusional Mar 28, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There are a couple of test workflows in examples/batch, can you use them or move this one to there?

def main():
"""
main
"""

service_backend = hb.ServiceBackend(
billing_project=get_config()['hail']['billing_project'],
remote_tmpdir=remote_tmpdir(),
)
batch = hb.Batch(
name='Test CPG Infra',
backend=service_backend,
cancel_after_n_failures=1,
default_timeout=6000,
default_memory='highmem',
)

j = batch.new_job(name='Write the file')
j.command(f'echo "Hello World." > {j.ofile}')

k = batch.new_job(name='Read the file')
k.command(f'cat {j.ofile}')

batch.run(wait=False)


if __name__ == '__main__':
main() # pylint: disable=E1120
19 changes: 19 additions & 0 deletions test/hail_batch_job.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
[buckets]
web_suffix = "web"
tmp_suffix = "tmp"
analysis_suffix = "analysis"

[workflow]
dataset = "thousand-genomes"
access_level = "test"
dataset_path = "cpgthousandgenomes/test"
output_prefix = "output"
path_scheme = "az"
image_registry_prefix = "cpgcommonimages.azurecr.io"

[hail]
billing_project = "fewgenomes"
bucket = "az://cpgthousandgenomes/test"

[images]
hail = "hailgenetics/hail:0.2.93"
12 changes: 12 additions & 0 deletions test/run_analysis.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#!/bin/bash

analysis-runner \
--dataset thousand-genomes \
--description 'Test script for batch on Azure' \
--output-dir test \
--cloud azure \
--access-level test \
--config test/hail_batch_job.toml \
--image cpg_workflows:latest \
test/test_cpg_infra.py \
test