diff --git a/tools/one_off/gather_athena_telemetry.sh b/tools/one_off/gather_athena_telemetry.sh old mode 100644 new mode 100755 index af0e7d5d..0a875ce9 --- a/tools/one_off/gather_athena_telemetry.sh +++ b/tools/one_off/gather_athena_telemetry.sh @@ -1,10 +1,13 @@ #! /bin/bash +set -e + SCRIPT_PATH=$(cd $(dirname $0) && pwd -P) cd $SCRIPT_PATH if [ -z $4 ]; then echo "Usage: $0 queries_json out_dir config_file schema_name" + exit 1 fi queries_json=$1 @@ -15,9 +18,12 @@ schema_name=$4 mkdir -p $out_dir/sql mkdir -p $out_dir/raw -for epoch in "0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22"; do +for epoch in 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22; do echo "Processing $epoch" - jq -r '.["epoch_'"$epoch"'"] | .[]' $queries_json > $out_dir/sql/epoch_${epoch}.sql + + jq -r '.["epoch_'"$epoch"'"] | .[]' $queries_json > $out_dir/sql/epoch_${epoch}_orig.sql + # Need to fix the query: `movie_telemetry` should be `telemetry` + sed 's/movie_telemetry/telemetry/g' epoch_${epoch}_orig.sql > epoch_${epoch}.sql echo "Gathering data..." python ../../run_cost_model.py \ @@ -40,5 +46,4 @@ for epoch in "0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22"; do --data-s3-path imdb_specialized_100g/telemetry/telemetry.csv \ --times 1 \ --schema-name $schema_name -fi - +done diff --git a/workloads/cross_db_benchmark/benchmark_tools/athena/run_workload.py b/workloads/cross_db_benchmark/benchmark_tools/athena/run_workload.py index 858cb551..4e70d3ab 100644 --- a/workloads/cross_db_benchmark/benchmark_tools/athena/run_workload.py +++ b/workloads/cross_db_benchmark/benchmark_tools/athena/run_workload.py @@ -36,6 +36,9 @@ def run_athena_workload( if not use_boto_client: db_conn = create_db_conn(database, db_name, None, None) boto_client = None + print( + "!!!! WARNING: This data collection will exclude data scanned statistics. !!!!" + ) else: db_conn = None # NOTE: Using the boto client is preferred because it also collects data diff --git a/workloads/cross_db_benchmark/benchmark_tools/run_workload.py b/workloads/cross_db_benchmark/benchmark_tools/run_workload.py index c55ced88..95180178 100644 --- a/workloads/cross_db_benchmark/benchmark_tools/run_workload.py +++ b/workloads/cross_db_benchmark/benchmark_tools/run_workload.py @@ -98,7 +98,7 @@ def run_workload( cap_workload=cap_workload, rank=rank, world_size=world_size, - use_boto_client=False, + use_boto_client=True, s3_output_path=s3_output_path, ) else: