diff --git a/examples/cromwell/README.md b/examples/cromwell/README.md index 6bcd7c9a..284ea94a 100644 --- a/examples/cromwell/README.md +++ b/examples/cromwell/README.md @@ -112,7 +112,7 @@ analysis-runner \ --dataset fewgenomes \ --access-level test \ --description "test hail-batch to cromwell support" \ - -o $OUTPUT_DIR \ + -o 'test_output' \ python examples/cromwell/cromwell_from_hail_batch.py ``` diff --git a/examples/cromwell/cromwell_from_hail_batch.py b/examples/cromwell/cromwell_from_hail_batch.py index e78336df..8d09255d 100644 --- a/examples/cromwell/cromwell_from_hail_batch.py +++ b/examples/cromwell/cromwell_from_hail_batch.py @@ -4,24 +4,19 @@ from within a batch environment, and operate on the result(s) """ -from cpg_utils.config import get_config, output_path +from cpg_utils.config import config_retrieve, output_path from cpg_utils.cromwell import ( + CromwellBackend, CromwellOutputType, run_cromwell_workflow_from_repo_and_get_outputs, ) from cpg_utils.hail_batch import get_batch -_config = get_config() -DATASET = _config['workflow']['dataset'] -OUTPUT_PATH = output_path('outputs') - -b = get_batch() - inputs = ['Hello, analysis-runner ;)', 'Hello, second output!'] submit_j, workflow_outputs = run_cromwell_workflow_from_repo_and_get_outputs( - b=b, + b=get_batch(default_python_image=config_retrieve(['workflow', 'driver_image'])), job_prefix='hello', workflow='hello_all_in_one_file.wdl', cwd='examples/cromwell', @@ -37,8 +32,9 @@ ), }, libs=[], # hello_all_in_one_file is self-contained, so no dependencies - output_prefix=OUTPUT_PATH, - dataset=DATASET, + output_prefix=config_retrieve(['workflow', 'output_prefix']), + dataset=config_retrieve(['workflow', 'dataset']), + backend=CromwellBackend.batch, ) print(workflow_outputs) # { @@ -51,7 +47,7 @@ # ] # } -process_j = b.new_job('do-something-with-string-output') +process_j = get_batch().new_bash_job('do-something-with-string-output') process_j.command(f"cat {workflow_outputs['joined_out']} | awk '{{print toupper($0)}}'") @@ -69,13 +65,14 @@ def process_paths_python(*files: str): assert isinstance(workflow_outputs['out_paths'], list) assert isinstance(workflow_outputs['texts'], list) -process_paths_job = b.new_python_job('process_paths') +process_paths_job = get_batch().new_python_job('process_paths') process_paths_job.call(process_paths_python, *workflow_outputs['out_paths']) # Here, we're showing that you can use the output of a # resource group that we defined earlier in different tasks. for idx, out in enumerate(workflow_outputs['texts']): - process_j = b.new_job(f'do-something-with-input-{idx + 1}') + process_j = get_batch().new_bash_job(f'do-something-with-input-{idx + 1}') + process_j.image(config_retrieve(['workflow', 'driver_image'])) # For example: # convert the .md5 file to uppercase and print it to the console @@ -85,6 +82,6 @@ def process_paths_python(*files: str): cat {out.md5} | awk '{{print toupper($0)}}' cat {out.txt} | awk '{{print toupper($0)}}' > {process_j.out}""", ) - b.write_output(process_j.out, OUTPUT_PATH + f'file-{idx + 1}.txt') + get_batch().write_output(process_j.out, output_path(f'outputs/file-{idx + 1}.txt')) -b.run(wait=False) +get_batch().run(wait=False) diff --git a/examples/cromwell/hello_all_in_one_file.wdl b/examples/cromwell/hello_all_in_one_file.wdl index 6ff02cd6..0ba7fb76 100644 --- a/examples/cromwell/hello_all_in_one_file.wdl +++ b/examples/cromwell/hello_all_in_one_file.wdl @@ -39,7 +39,7 @@ task echo { runtime { cpu: 1 disks: "local-disk 10 SSD" - docker: "ubuntu@sha256:1d7b639619bdca2d008eca2d5293e3c43ff84cbee597ff76de3b7a7de3e84956" + docker: "australia-southeast1-docker.pkg.dev/cpg-common/images/sv/ubuntu1804:latest" memory: "1G" } output { @@ -58,7 +58,7 @@ task GenerateFileWithSecondary { >>> runtime { - docker: "ubuntu@sha256:1d7b639619bdca2d008eca2d5293e3c43ff84cbee597ff76de3b7a7de3e84956" + docker: "australia-southeast1-docker.pkg.dev/cpg-common/images/sv/ubuntu1804:latest" disks: "local-disk 10 SSD" memory: "1G" cpu: 1