From 04e8112347f96704eed13bfe19bffcb81b4b920d Mon Sep 17 00:00:00 2001 From: Geoffrey Yu Date: Sun, 12 May 2024 23:04:55 -0400 Subject: [PATCH] CH-BenCHmark: Add support for A+R baseline --- experiments/17-chbenchmark/common.sh | 52 +++++++++++++++++++ .../17-chbenchmark/scale_down/.gitignore | 1 + experiments/17-chbenchmark/scale_down/COND | 20 ++++++- .../scale_down/run_full_ar_baseline.sh | 24 +++++++++ workloads/chbenchmark/queries.sql | 2 +- 5 files changed, 97 insertions(+), 2 deletions(-) create mode 100644 experiments/17-chbenchmark/scale_down/.gitignore create mode 100755 experiments/17-chbenchmark/scale_down/run_full_ar_baseline.sh diff --git a/experiments/17-chbenchmark/common.sh b/experiments/17-chbenchmark/common.sh index 95ee520c..56b91ca2 100644 --- a/experiments/17-chbenchmark/common.sh +++ b/experiments/17-chbenchmark/common.sh @@ -32,6 +32,26 @@ function run_tpcc() { popd } +function run_tpcc_aurora_serverless() { + local results_name=$1 + pushd ../../../workloads/chbenchmark/py-tpcc/ + local args=( + --no-load + --config $abs_txn_config_file + --warehouses $txn_warehouses + --duration $run_for_s + --clients $t_clients + --scalefactor $txn_scale_factor + ) + if [[ ! -z $txn_zipfian_alpha ]]; then + args+=(--zipfian-alpha $txn_zipfian_alpha) + fi + mkdir -p $COND_OUT/$results_name + RECORD_DETAILED_STATS=1 COND_OUT=$COND_OUT/$results_name python3 -m pytpcc.tpcc aurora "${args[@]}" & + tpcc_pid=$! + popd +} + function log_workload_point() { msg=$1 now=$(date --utc "+%Y-%m-%d %H:%M:%S") @@ -74,6 +94,38 @@ function start_repeating_olap_runner() { runner_pid=$! } +function start_repeating_olap_runner_redshift_serverless() { + local ra_clients=$1 + local ra_gap_s=$2 + local ra_gap_std_s=$3 + local query_indexes=$4 + local results_name=$5 + + local args=( + --num-clients $ra_clients + --num-front-ends $num_front_ends + --query-indexes $query_indexes + --query-bank-file $ra_query_bank_file + --avg-gap-s $ra_gap_s + --avg-gap-std-s $ra_gap_std_s + --brad-direct + --engine redshift + --serverless-redshift + --schema-name $schema_name + --config-file $abs_physical_config_file + ) + + >&2 echo "[Serial Repeating Analytics] Running with $ra_clients..." + results_dir=$COND_OUT/$results_name + mkdir -p $results_dir + + log_workload_point $results_name + COND_OUT=$results_dir python3.11 ../../../workloads/IMDB_extended/run_repeating_analytics_serial.py "${args[@]}" & + + # This is a special return value variable that we use. + runner_pid=$! +} + function graceful_shutdown() { for pid_var in "$@"; do kill -INT $pid_var diff --git a/experiments/17-chbenchmark/scale_down/.gitignore b/experiments/17-chbenchmark/scale_down/.gitignore new file mode 100644 index 00000000..0949a3cb --- /dev/null +++ b/experiments/17-chbenchmark/scale_down/.gitignore @@ -0,0 +1 @@ +aurora.config diff --git a/experiments/17-chbenchmark/scale_down/COND b/experiments/17-chbenchmark/scale_down/COND index a8a2a218..a78e79fb 100644 --- a/experiments/17-chbenchmark/scale_down/COND +++ b/experiments/17-chbenchmark/scale_down/COND @@ -20,7 +20,25 @@ run_experiment( "txn-scale-factor": 1, # TBD "t-clients": 4, # TBD "num-front-ends": 5, # TBD - "run-for-s": 2 * 60 * 60, # 1 hour + "run-for-s": 2 * 60 * 60, # 2 hours + "txn-zipfian-alpha": ZIPFIAN_ALPHA, + "ra-query-indexes": QUERIES_STR, + "ra-query-bank-file": "../../../workloads/chbenchmark/queries.sql", + }, +) + + +run_experiment( + name="run_full_ar", + run="./run_full_ar_baseline.sh", + options={ + "physical-config-file": "../../../config/physical_config_chbench.yml", + "schema-name": "chbenchmark", + "txn-config-file": "aurora.config", + "txn-warehouses": 1740, + "txn-scale-factor": 1, # TBD + "t-clients": 4, # TBD + "run-for-s": 2 * 60 * 60, # 2 hours "txn-zipfian-alpha": ZIPFIAN_ALPHA, "ra-query-indexes": QUERIES_STR, "ra-query-bank-file": "../../../workloads/chbenchmark/queries.sql", diff --git a/experiments/17-chbenchmark/scale_down/run_full_ar_baseline.sh b/experiments/17-chbenchmark/scale_down/run_full_ar_baseline.sh new file mode 100755 index 00000000..4e904032 --- /dev/null +++ b/experiments/17-chbenchmark/scale_down/run_full_ar_baseline.sh @@ -0,0 +1,24 @@ +#! /bin/bash + +script_loc=$(cd $(dirname $0) && pwd -P) +cd $script_loc +source ../common.sh +extract_named_arguments $@ + +# Resolve paths into absolute paths +abs_txn_config_file=$(realpath $txn_config_file) +abs_physical_config_file=$(realpath $physical_config_file) + +sleep 30 + +run_tpcc_aurora_serverless "t_4" +start_repeating_olap_runner_redshift_serverless 1 10 5 $ra_query_indexes "ch_1" +ra_pid=$runner_pid + +sleep $run_for_s + +# Shut down. +kill $tpcc_pid +kill $ra_pid +wait $tpcc_pid +wait $ra_pid diff --git a/workloads/chbenchmark/queries.sql b/workloads/chbenchmark/queries.sql index 6ced3e67..c21976be 100644 --- a/workloads/chbenchmark/queries.sql +++ b/workloads/chbenchmark/queries.sql @@ -4,7 +4,7 @@ select ol_o_id, ol_w_id, ol_d_id, sum(ol_amount) as revenue, o_entry_d from cust select o_ol_cnt, count(*) as order_count from orders where exists (select * from order_line where o_id = ol_o_id and o_w_id = ol_w_id and o_d_id = ol_d_id and ol_delivery_d >= o_entry_d) group by o_ol_cnt order by o_ol_cnt; select n_name, sum(ol_amount) as revenue from customer, orders, order_line, stock, supplier, nation, region where c_id = o_c_id and c_w_id = o_w_id and c_d_id = o_d_id and ol_o_id = o_id and ol_w_id = o_w_id and ol_d_id=o_d_id and ol_w_id = s_w_id and ol_i_id = s_i_id and mod((s_w_id * s_i_id),10000) = su_suppkey and ascii(cast(substring(c_state,1,1) as varchar(1))) = su_nationkey and su_nationkey = n_nationkey and n_regionkey = r_regionkey and r_name = 'Europe' group by n_name order by revenue desc; select sum(ol_amount) as revenue from order_line where ol_quantity between 1 and 100000; -WITH inner_query AS (select su_nationkey as supp_nation, substring(c_state,1,1) as cust_nation, extract(year from o_entry_d) as l_year, ol_amount as revenue from supplier, stock, order_line, orders, customer, nation n1, nation n2 where ol_supply_w_id = s_w_id and ol_i_id = s_i_id and mod((s_w_id * s_i_id), 10000) = su_suppkey and ol_w_id = o_w_id and ol_d_id = o_d_id and ol_o_id = o_id and c_id = o_c_id and c_w_id = o_w_id and c_d_id = o_d_id and su_nationkey = n1.n_nationkey and ascii(substring(c_state,1,1)) = n2.n_nationkey and ((n1.n_name = 'Germany' and n2.n_name = 'Cambodia') or (n1.n_name = 'Cambodia' and n2.n_name = 'Germany'))) SELECT supp_nation, cust_nation, l_year, sum(revenue) as revenue FROM inner_query group by supp_nation, cust_nation, l_year order by supp_nation, cust_nation, l_year; +WITH inner_query AS (select su_nationkey as supp_nation, substring(c_state,1,1) as cust_nation, extract(year from o_entry_d) as l_year, ol_amount as revenue from supplier, stock, order_line, orders, customer, nation n1, nation n2 where ol_supply_w_id = s_w_id and ol_i_id = s_i_id and mod((s_w_id * s_i_id), 10000) = su_suppkey and ol_w_id = o_w_id and ol_d_id = o_d_id and ol_o_id = o_id and c_id = o_c_id and c_w_id = o_w_id and c_d_id = o_d_id and su_nationkey = n1.n_nationkey and ascii(cast(substring(c_state,1,1) as varchar(1))) = n2.n_nationkey and ((n1.n_name = 'Germany' and n2.n_name = 'Cambodia') or (n1.n_name = 'Cambodia' and n2.n_name = 'Germany'))) SELECT supp_nation, cust_nation, l_year, sum(revenue) as revenue FROM inner_query group by supp_nation, cust_nation, l_year order by supp_nation, cust_nation, l_year; select extract(year from o_entry_d) as l_year, sum(case when n2.n_name = 'Germany' then ol_amount else 0 end) / sum(ol_amount) as mkt_share from item, supplier, stock, order_line, orders, customer, nation n1, nation n2, region where i_id = s_i_id and ol_i_id = s_i_id and ol_supply_w_id = s_w_id and mod((s_w_id * s_i_id),10000) = su_suppkey and ol_w_id = o_w_id and ol_d_id = o_d_id and ol_o_id = o_id and c_id = o_c_id and c_w_id = o_w_id and c_d_id = o_d_id and n1.n_nationkey = ascii(cast(substring(c_state,1,1) as varchar(1))) and n1.n_regionkey = r_regionkey and ol_i_id < 1000 and r_name = 'Europe' and su_nationkey = n2.n_nationkey and i_data like '%b' and i_id = ol_i_id group by extract(year from o_entry_d) order by l_year; select n_name, extract(year from o_entry_d) as l_year, sum(ol_amount) as sum_profit from item, stock, supplier, order_line, orders, nation where ol_i_id = s_i_id and ol_supply_w_id = s_w_id and mod((s_w_id * s_i_id), 10000) = su_suppkey and ol_w_id = o_w_id and ol_d_id = o_d_id and ol_o_id = o_id and ol_i_id = i_id and su_nationkey = n_nationkey and i_data like '%BB' group by n_name, extract(year from o_entry_d) order by n_name, l_year desc; select c_id, c_last, sum(ol_amount) as revenue, c_city, c_phone, n_name from customer, orders, order_line, nation where c_id = o_c_id and c_w_id = o_w_id and c_d_id = o_d_id and ol_w_id = o_w_id and ol_d_id = o_d_id and ol_o_id = o_id and o_entry_d <= ol_delivery_d and n_nationkey = ascii(cast(substring(c_state,1,1) as varchar(1))) group by c_id, c_last, c_city, c_phone, n_name order by revenue desc;