From 48a491ff2f98f04774512ba308c9786fe0b52d38 Mon Sep 17 00:00:00 2001 From: Geoffrey Yu Date: Fri, 26 Apr 2024 13:10:16 -0400 Subject: [PATCH 01/30] Check in table size stats for CH-BenCHmark --- src/brad/planner/constants.yml | 16 ++++++++++++++++ tools/calibration/table_sizes.py | 4 ++-- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/src/brad/planner/constants.yml b/src/brad/planner/constants.yml index 51d9a428..4c393d51 100644 --- a/src/brad/planner/constants.yml +++ b/src/brad/planner/constants.yml @@ -181,6 +181,22 @@ table_extract_bytes_per_row: movie_info: 29.57191 person_info: 133.458044 + # TPC-C Warehouses: 1740 + # Around ~120 GB of uncompressed data. + chbenchmark: + warehouse: 92.40747126436781 + item: 75.62581 + stock: 308.868974 + district: 98.52431034482758 + customer: 570.148704 + history: 65.51127 + orders: 40.134002 + new_order: 9.937048 + order_line: 68.538322 + region: 216.8 + nation: 185.03225806451613 + supplier: 194.728 + ### ### Models used to account for hardware/system load. ### diff --git a/tools/calibration/table_sizes.py b/tools/calibration/table_sizes.py index ca1c3bc4..0fce773d 100644 --- a/tools/calibration/table_sizes.py +++ b/tools/calibration/table_sizes.py @@ -30,7 +30,7 @@ def delete_s3_object(client, bucket: str, key: str) -> None: async def main_impl(args) -> None: - config = ConfigFile.load(args.config_file) + config = ConfigFile.load_from_physical_config(args.physical_config_file) assets = AssetManager(config) mgr = BlueprintManager(config, assets, args.schema_name) await mgr.load() @@ -121,7 +121,7 @@ def main(): "Run this after bootstrapping a schema to measure table sizing " "constants used by the blueprint planner." ) - parser.add_argument("--config-file", type=str, required=True) + parser.add_argument("--physical-config-file", type=str, required=True) parser.add_argument("--schema-name", type=str, required=True) parser.add_argument("--debug", action="store_true") # Unloading is slow - we do not need to unload the entire table to get a From f5cb08488649f07ee3078aecec3dfdc3964fa8b9 Mon Sep 17 00:00:00 2001 From: Geoffrey Yu Date: Fri, 26 Apr 2024 21:48:16 +0000 Subject: [PATCH 02/30] Adjust starting config for the SLO experiment --- .../15-e2e-scenarios-v2/slo_change/set_up_starting_blueprint.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/experiments/15-e2e-scenarios-v2/slo_change/set_up_starting_blueprint.sh b/experiments/15-e2e-scenarios-v2/slo_change/set_up_starting_blueprint.sh index 7c17816d..8834bd79 100755 --- a/experiments/15-e2e-scenarios-v2/slo_change/set_up_starting_blueprint.sh +++ b/experiments/15-e2e-scenarios-v2/slo_change/set_up_starting_blueprint.sh @@ -15,6 +15,6 @@ python3 ../../../workloads/IMDB_extended/set_up_starting_blueprint.py \ --aurora-queries "99,56,32,92,91" \ --redshift-queries "49,30,83,94,38,87,86,76,37,31,46,58,61,62,64,69,73,74,51,57,60" \ --redshift-provisioning "dc2.large:2" \ - --aurora-provisioning "db.r6g.xlarge:2" \ + --aurora-provisioning "db.r6g.xlarge:1" \ --system-config-file slo_change_config.yml \ --physical-config-file $1 From e8fd93e8c5528917f933f7d651e033a359c655cb Mon Sep 17 00:00:00 2001 From: Geoffrey Yu Date: Sat, 27 Apr 2024 20:15:51 -0400 Subject: [PATCH 03/30] Add data gathering scripts for CH-BenCHmark (load and instance types) (#501) Part of #487. --- .../calibration/load_chbench/cond_config.toml | 0 .../load_chbench/gather_redshift.sh | 86 +++++++++++++++++++ tools/calibration/load_chbench/redshift/COND | 49 +++++++++++ .../load_chbench/sample_full_queries.py | 29 +++++++ .../load_chbench/selected_queries.sql | 22 +++++ .../calibration/load_chbench/test_queries.py | 43 ++++++++++ 6 files changed, 229 insertions(+) create mode 100644 tools/calibration/load_chbench/cond_config.toml create mode 100755 tools/calibration/load_chbench/gather_redshift.sh create mode 100644 tools/calibration/load_chbench/redshift/COND create mode 100644 tools/calibration/load_chbench/sample_full_queries.py create mode 100644 tools/calibration/load_chbench/selected_queries.sql create mode 100644 tools/calibration/load_chbench/test_queries.py diff --git a/tools/calibration/load_chbench/cond_config.toml b/tools/calibration/load_chbench/cond_config.toml new file mode 100644 index 00000000..e69de29b diff --git a/tools/calibration/load_chbench/gather_redshift.sh b/tools/calibration/load_chbench/gather_redshift.sh new file mode 100755 index 00000000..37bd23aa --- /dev/null +++ b/tools/calibration/load_chbench/gather_redshift.sh @@ -0,0 +1,86 @@ +#! /bin/bash + +if [ -z $2 ]; then + >&2 echo "Usage: $0 " + >&2 echo "The config path should be relative to the redshift/ subdirectory." + exit 1 +fi + +export BRAD_CONFIG=$1 +cluster_identifier=$2 + +export BRAD_SCHEMA="chbenchmark" + +function run_warm_up() { + >&2 echo "Running warm up..." + pushd redshift + python3 -m brad.calibration.measure_load --run-warmup --engine redshift --query-file ../../../../tools/calibration/load_chbench/selected_queries.sql + popd +} + +function sync_redshift_resize() { + raw_instance=$1 + target_instance_type=${raw_instance//_/.} + target_node_count=$2 + + if [[ $target_node_count = "2" ]] && [[ $raw_instance = "dc2_large" ]]; then + >&2 echo "Skipping initial resize to $raw_instance $target_node_count (special case)" + return + fi + + # Try an elastic resize first. + >&2 echo "Resizing Redshift cluster to $target_instance_type with $target_node_count nodes (attempt elastic)" + aws redshift resize-cluster --cluster-identifier "$cluster_identifier" --cluster-type multi-node --node-type "$target_instance_type" --number-of-nodes "$target_node_count" --no-classic --region us-east-1 > /dev/null + result=$? + + # Resize Redshift cluster + if [ $result -ne 0 ]; then + >&2 echo "Classic resizing Redshift cluster to $target_instance_type with $target_node_count nodes" + aws redshift modify-cluster --cluster-identifier "$cluster_identifier" --node-type "$target_instance_type" --number-of-nodes "$target_node_count" > /dev/null + fi + + sleep 60 + + # Wait for resize to complete + while true; do + cluster_status=$(aws redshift describe-clusters --cluster-identifier "$cluster_identifier" --query 'Clusters[0].ClusterStatus' --output text) + if [[ $cluster_status == "available" ]]; then + break + fi + >&2 echo "Waiting for resize to complete..." + sleep 10 + done +} + +function run_cfg() { + instance_type=$1 + num_nodes=$2 + + >&2 echo "$instance_type $num_nodes" + sync_redshift_resize $instance_type $num_nodes + >&2 echo "Warming up..." + run_warm_up + >&2 echo "Running..." + cond run "//redshift:${instance_type}-${num_nodes}" +} + +>&2 echo "Running $cluster_identifier" +>&2 echo "Config $BRAD_CONFIG" +>&2 echo "Cluster id $cluster_identifier" +sleep 10 + +run_cfg "dc2_large" 2 +run_cfg "dc2_large" 4 +run_cfg "dc2_large" 8 +run_cfg "dc2_large" 16 +run_cfg "ra3_xlplus" 2 +run_cfg "ra3_xlplus" 4 +run_cfg "ra3_xlplus" 8 +run_cfg "ra3_4xlarge" 8 +run_cfg "ra3_4xlarge" 4 +run_cfg "ra3_4xlarge" 2 + +sleep 60 + +>&2 echo "Done. Pausing $cluster_identifier..." +aws redshift pause-cluster --cluster-identifier "$cluster_identifier" diff --git a/tools/calibration/load_chbench/redshift/COND b/tools/calibration/load_chbench/redshift/COND new file mode 100644 index 00000000..7dfa96a0 --- /dev/null +++ b/tools/calibration/load_chbench/redshift/COND @@ -0,0 +1,49 @@ +from itertools import product + + +AVG_GAP_S = 3 +RUN_FOR_S = 5 * 60 # 5 minutes +NUM_CLIENTS = [1, 2, 4, 6] +WAIT_BEFORE_START = 10 +NUM_QUERIES = 22 + + +# Relative to experiment definition directories. +QUERY_BANK = "../selected_queries.sql" + + +CLUSTER_CONFIGS = [ + ("dc2_large", 2), + ("dc2_large", 4), + ("dc2_large", 8), + ("dc2_large", 16), + ("ra3_xlplus", 2), + ("ra3_xlplus", 4), + ("ra3_xlplus", 8), + ("ra3_4xlarge", 2), + ("ra3_4xlarge", 4), + ("ra3_4xlarge", 8), +] + + +for inst, nodes in CLUSTER_CONFIGS: + cfg_name = f"{inst}-{nodes}" + run_experiment_group( + name=cfg_name, + run="python3 -m brad.calibration.measure_load", + experiments=[ + ExperimentInstance( + name=f"{cfg_name}-{clients}-q{query_idx}", + options={ + "num-clients": clients, + "specific-query-idx": query_idx, + "run-for-s": RUN_FOR_S, + "avg-gap-s": AVG_GAP_S, + "wait-before-start": WAIT_BEFORE_START, + "query-file": QUERY_BANK, + "engine": "redshift", + }, + ) + for query_idx, clients in product(range(NUM_QUERIES), NUM_CLIENTS) + ], + ) diff --git a/tools/calibration/load_chbench/sample_full_queries.py b/tools/calibration/load_chbench/sample_full_queries.py new file mode 100644 index 00000000..02ee1385 --- /dev/null +++ b/tools/calibration/load_chbench/sample_full_queries.py @@ -0,0 +1,29 @@ +import argparse +import random + + +def main() -> None: + parser = argparse.ArgumentParser() + parser.add_argument("--query-file", type=str, required=True) + parser.add_argument("--seed", type=int, default=42) + parser.add_argument("--num-query-blocks", type=int, default=22) + parser.add_argument("--queries-per-block", type=int, default=200) + args = parser.parse_args() + + prng = random.Random(args.seed) + + with open(args.query_file, "r", encoding="UTF-8") as file: + queries = [line.strip() for line in file] + + selected = [] + for qidx in range(args.num_query_blocks): + offset = prng.randint(0, args.queries_per_block - 1) + selected.append(queries[qidx * args.queries_per_block + offset]) + + with open("selected_queries.sql", "w", encoding="UTF-8") as file: + for q in selected: + print(q, file=file) + + +if __name__ == "__main__": + main() diff --git a/tools/calibration/load_chbench/selected_queries.sql b/tools/calibration/load_chbench/selected_queries.sql new file mode 100644 index 00000000..8700fcf9 --- /dev/null +++ b/tools/calibration/load_chbench/selected_queries.sql @@ -0,0 +1,22 @@ +select ol_number, sum(ol_quantity) as sum_qty, sum(ol_amount) as sum_amount, avg(ol_quantity) as avg_qty, avg(ol_amount) as avg_amount, count(*) as count_order from order_line where ol_amount <= 33.06648003661816 group by ol_number order by ol_number; +select su_suppkey, su_name, n_name, i_id, i_name, su_address, su_phone, su_comment from item, supplier, stock, nation, region, (select s_i_id as m_i_id, min(s_quantity) as m_s_quantity from stock, supplier, nation, region where mod((s_w_id*s_i_id),10000)=su_suppkey and s_quantity >= 11.777062936461403 and su_nationkey=n_nationkey and n_regionkey=r_regionkey and r_name like 'Europ%' group by s_i_id) m where i_id = s_i_id and su_suppkey >= 2941.508980163913 and i_id <= 79857.67421953629 and mod((s_w_id * s_i_id), 10000) = su_suppkey and su_nationkey = n_nationkey and n_regionkey = r_regionkey and i_data like '%b' and r_name like 'Europ%' and i_id=m_i_id and s_quantity = m_s_quantity order by n_name, su_name, i_id; +select ol_o_id, ol_w_id, ol_d_id, sum(ol_amount) as revenue, o_entry_d from customer, new_order, orders, order_line where c_state like 'A%' and o_w_id >= 42.7486611465113 and ol_amount <= 99.70737680321619 and c_id = o_c_id and c_w_id = o_w_id and c_d_id = o_d_id and no_w_id = o_w_id and no_d_id = o_d_id and no_o_id = o_id and ol_w_id = o_w_id and ol_d_id = o_d_id and ol_o_id = o_id group by ol_o_id, ol_w_id, ol_d_id, o_entry_d order by revenue desc, o_entry_d; +select o_ol_cnt, count(*) as order_count from orders where o_carrier_id <= 6.749521428520183 and exists (select * from order_line where o_id = ol_o_id and o_w_id = ol_w_id and o_d_id = ol_d_id and ol_delivery_d >= o_entry_d) group by o_ol_cnt order by o_ol_cnt; +select n_name, sum(ol_amount) as revenue from customer, orders, order_line, stock, supplier, nation, region where c_id = o_c_id and n_nationkey <= 99.21862547006236 and s_order_cnt >= 42.71343586058127 and c_w_id = o_w_id and c_d_id = o_d_id and ol_o_id = o_id and ol_w_id = o_w_id and ol_d_id=o_d_id and ol_w_id = s_w_id and ol_i_id = s_i_id and mod((s_w_id * s_i_id),10000) = su_suppkey and ascii(substring(c_state,1,1)) = su_nationkey and su_nationkey = n_nationkey and n_regionkey = r_regionkey and r_name = 'Europe' group by n_name order by revenue desc; +select sum(ol_amount) as revenue from order_line where ol_quantity <= 5.0 and ol_quantity between 1 and 100000; +WITH inner_query AS ( select su_nationkey as supp_nation, substring(c_state,1,1) as cust_nation, extract(year from o_entry_d) as l_year, ol_amount as revenue from supplier, stock, order_line, orders, customer, nation n1, nation n2 where ol_supply_w_id = s_w_id and c_id <= 2665.5792747107366 and s_quantity <= 72.4600053847094 and su_suppkey >= 528.9934672447876 and ol_i_id = s_i_id and mod((s_w_id * s_i_id), 10000) = su_suppkey and ol_w_id = o_w_id and ol_d_id = o_d_id and ol_o_id = o_id and c_id = o_c_id and c_w_id = o_w_id and c_d_id = o_d_id and su_nationkey = n1.n_nationkey and ascii(substring(c_state,1,1)) = n2.n_nationkey and ( (n1.n_name = 'Germany' and n2.n_name = 'Cambodia') or (n1.n_name = 'Cambodia' and n2.n_name = 'Germany') ) ) SELECT supp_nation, cust_nation, l_year, sum(revenue) as revenue FROM inner_query group by supp_nation, cust_nation, l_year order by supp_nation, cust_nation, l_year; +select extract(year from o_entry_d) as l_year, sum(case when n2.n_name = 'Germany' then ol_amount else 0 end) / sum(ol_amount) as mkt_share from item, supplier, stock, order_line, orders, customer, nation n1, nation n2, region where i_id = s_i_id and o_w_id >= 583.5206747942913 and s_order_cnt <= 81.86012188607452 and su_suppkey >= 689.8054116558625 and ol_i_id = s_i_id and ol_supply_w_id = s_w_id and mod((s_w_id * s_i_id),10000) = su_suppkey and ol_w_id = o_w_id and ol_d_id = o_d_id and ol_o_id = o_id and c_id = o_c_id and c_w_id = o_w_id and c_d_id = o_d_id and n1.n_nationkey = ascii(cast(substring(c_state,1,1) as varchar(1))) and n1.n_regionkey = r_regionkey and ol_i_id < 1000 and r_name = 'Europe' and su_nationkey = n2.n_nationkey and i_data like '%b' and i_id = ol_i_id group by extract(year from o_entry_d) order by l_year; +select n_name, extract(year from o_entry_d) as l_year, sum(ol_amount) as sum_profit from item, stock, supplier, order_line, orders, nation where ol_i_id = s_i_id and o_id <= 2939.378308830152 and s_quantity <= 86.59608959532211 and ol_supply_w_id = s_w_id and mod((s_w_id * s_i_id), 10000) = su_suppkey and ol_w_id = o_w_id and ol_d_id = o_d_id and ol_o_id = o_id and ol_i_id = i_id and su_nationkey = n_nationkey and i_data like '%BB' group by n_name, extract(year from o_entry_d) order by n_name, l_year desc; +select c_id, c_last, sum(ol_amount) as revenue, c_city, c_phone, n_name from customer, orders, order_line, nation where c_id = o_c_id and n_nationkey <= 116.40294250401558 and o_id <= 1560.082691309974 and c_w_id = o_w_id and c_d_id = o_d_id and ol_w_id = o_w_id and ol_d_id = o_d_id and ol_o_id = o_id and o_entry_d <= ol_delivery_d and n_nationkey = ascii(cast(substring(c_state,1,1) as varchar(1))) group by c_id, c_last, c_city, c_phone, n_name order by revenue desc; +select s_i_id, sum(s_order_cnt) as ordercount from stock, supplier, nation where mod((s_w_id * s_i_id),10000) = su_suppkey and su_suppkey >= 2406.641955682944 and su_nationkey = n_nationkey and n_name = 'Germany' group by s_i_id having sum(s_order_cnt) > (select sum(s_order_cnt) * .005 from stock, supplier, nation where mod((s_w_id * s_i_id),10000) = su_suppkey and s_quantity >= 27.35152833573742 and su_nationkey = n_nationkey and n_name = 'Germany') order by ordercount desc; +select o_ol_cnt, sum(case when o_carrier_id = 1 or o_carrier_id = 2 then 1 else 0 end) as high_line_count, sum(case when o_carrier_id <> 1 and o_carrier_id <> 2 then 1 else 0 end) as low_line_count from orders, order_line where ol_w_id = o_w_id and ol_amount <= 36.57742392006392 and ol_d_id = o_d_id and ol_o_id = o_id and o_entry_d <= ol_delivery_d group by o_ol_cnt order by o_ol_cnt; +select c_count, count(*) as custdist from (select c_id, count(o_id) from customer left outer join orders on ( c_w_id = o_w_id and c_d_id = o_d_id and c_id = o_c_id and o_carrier_id >= 4.392919247526648 ) group by c_id) as c_orders (c_id, c_count) group by c_count order by custdist desc, c_count desc; +select 100.00 * sum(case when i_data like 'PR%' then ol_amount else 0 end) / (1+sum(ol_amount)) as promo_revenue from order_line, item where ol_i_id = i_id and i_id <= 82830.86056286634; +with revenue (supplier_no, total_revenue) as ( select mod((s_w_id * s_i_id),10000) as supplier_no, sum(ol_amount) as total_revenue from order_line, stock where ol_i_id = s_i_id and ol_supply_w_id = s_w_id and s_quantity >= 52.409029036617504 group by mod((s_w_id * s_i_id),10000)) select su_suppkey, su_name, su_address, su_phone, total_revenue from supplier, revenue where su_suppkey = supplier_no and total_revenue = (select max(total_revenue) from revenue) and su_suppkey >= 600.4811082997699 order by su_suppkey; +select i_name, substring(i_data, 1, 3) as brand, i_price, count(distinct (mod((s_w_id * s_i_id),10000))) as supplier_cnt from stock, item where i_id = s_i_id and i_data not like 'zz%' and i_price >= 17.67656701310919 and (mod((s_w_id * s_i_id),10000) not in (select su_suppkey from supplier where su_comment like '%bad%')) group by i_name, substring(i_data, 1, 3), i_price order by supplier_cnt desc; +select sum(ol_amount) / 2.0 as avg_yearly from order_line, (select i_id, avg(ol_quantity) as a from item, order_line where i_data like '%b' and ol_quantity <= 5.0 and ol_i_id = i_id group by i_id) t where ol_i_id = t.i_id and ol_quantity < t.a; +select c_last, c_id o_id, o_entry_d, o_ol_cnt, sum(ol_amount) from customer, orders, order_line where c_id = o_c_id and c_d_id <= 6.652803502875462 and c_w_id = o_w_id and c_d_id = o_d_id and ol_w_id = o_w_id and ol_d_id = o_d_id and ol_o_id = o_id group by o_id, o_w_id, o_d_id, c_id, c_last, o_entry_d, o_ol_cnt having sum(ol_amount) > 200 order by sum(ol_amount) desc, o_entry_d; +select sum(ol_amount) as revenue from order_line, item where ( ol_i_id = i_id and i_data like '%a' and ol_quantity >= 1 and ol_quantity <= 10 and i_price <= 97.10832030687996 and ol_w_id in (1,2,3) ) or ( ol_i_id = i_id and i_data like '%b' and ol_quantity >= 1 and ol_quantity <= 10 and i_price <= 79.78326318023088 and ol_w_id in (1,2,4) ) or ( ol_i_id = i_id and i_data like '%c' and ol_quantity >= 1 and ol_quantity <= 10 and i_price >= 5.450023586551352 and ol_w_id in (1,5,3) ); +select su_name, su_address from supplier, nation where su_suppkey in (select mod(s_i_id * s_w_id, 10000) from stock, order_line where s_i_id in (select i_id from item where i_data like 'co%') and ol_i_id=s_i_id group by s_i_id, s_w_id, s_quantity having 2*s_quantity > sum(ol_quantity)) and su_nationkey = n_nationkey and su_suppkey <= 8996.163667412242 and n_name = 'Germany' order by su_name; +select su_name, count(*) as numwait from supplier, order_line l1, orders, stock, nation where ol_o_id = o_id and su_suppkey <= 8526.675416612981 and o_w_id >= 369.02551642220345 and ol_w_id = o_w_id and ol_d_id = o_d_id and ol_w_id = s_w_id and ol_i_id = s_i_id and mod((s_w_id * s_i_id),10000) = su_suppkey and l1.ol_delivery_d > o_entry_d and not exists (select * from order_line l2 where l2.ol_o_id = l1.ol_o_id and l2.ol_w_id = l1.ol_w_id and l2.ol_d_id = l1.ol_d_id and l2.ol_delivery_d > l1.ol_delivery_d) and su_nationkey = n_nationkey and n_name = 'Germany' group by su_name order by numwait desc, su_name; +select substring(c_state,1,1) as country, count(*) as numcust, sum(c_balance) as totacctbal from customer where substring(c_phone,1,1) in ('1','2','3','4','5','6','7') and c_balance > (select avg(c_BALANCE) from customer where c_balance > 0.00 and substring(c_phone,1,1) in ('1','2','3','4','5','6','7')) and not exists (select * from orders where o_c_id = c_id and o_w_id = c_w_id and o_d_id = c_d_id and o_w_id <= 1264.1427731874844 ) group by substring(c_state,1,1) order by substring(c_state,1,1); diff --git a/tools/calibration/load_chbench/test_queries.py b/tools/calibration/load_chbench/test_queries.py new file mode 100644 index 00000000..72d807da --- /dev/null +++ b/tools/calibration/load_chbench/test_queries.py @@ -0,0 +1,43 @@ +import argparse +import asyncio +from brad.config.file import ConfigFile +from brad.connection.factory import ConnectionFactory +from brad.config.engine import Engine +from brad.provisioning.directory import Directory + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--schema-name", type=str, required=True) + parser.add_argument("--physical-config-file", type=str, required=True) + parser.add_argument("--query-file", type=str, required=True) + args = parser.parse_args() + + with open(args.query_file, "r", encoding="UTF-8") as file: + queries = [line.strip() for line in file] + + config = ConfigFile.load_from_physical_config(args.physical_config_file) + directory = Directory(config) + asyncio.run(directory.refresh()) + connection = ConnectionFactory.connect_to_sync( + Engine.Redshift, args.schema_name, config, directory, autocommit=True + ) + + cursor = connection.cursor_sync() + num_succeeded = 0 + for idx, q in enumerate(queries): + try: + print("Running query", idx, "of", len(queries) - 1) + cursor.execute_sync(q) + num_succeeded += 1 + except Exception as ex: + print("Query", idx, "failed with error", str(ex)) + + if num_succeeded == len(queries): + print("All succeeded.") + else: + print((len(queries) - num_succeeded), "failed.") + + +if __name__ == "__main__": + main() From 903a48fc04892ed300e732b2149677452279f626 Mon Sep 17 00:00:00 2001 From: Geoffrey Yu Date: Mon, 29 Apr 2024 16:58:14 -0400 Subject: [PATCH 04/30] Check in TPC-C run time debug code (#503) Part of #487. --- experiments/17-chbenchmark/debug/COND | 12 + .../17-chbenchmark/debug/run_aurora_timing.sh | 18 + .../pytpcc/drivers/auroratimingdriver.py | 701 ++++++++++++++++++ workloads/chbenchmark/py-tpcc/pytpcc/tpcc.py | 3 + 4 files changed, 734 insertions(+) create mode 100755 experiments/17-chbenchmark/debug/run_aurora_timing.sh create mode 100644 workloads/chbenchmark/py-tpcc/pytpcc/drivers/auroratimingdriver.py diff --git a/experiments/17-chbenchmark/debug/COND b/experiments/17-chbenchmark/debug/COND index f00864cf..4cfa490f 100644 --- a/experiments/17-chbenchmark/debug/COND +++ b/experiments/17-chbenchmark/debug/COND @@ -24,3 +24,15 @@ run_command( "run-for-s": 180, }, ) + +run_experiment( + name="aurora_timing", + run="./run_aurora_timing.sh", + options={ + "txn-config-file": "aurora.config", + "txn-warehouses": 1740, + "txn-scale-factor": 1, # TBD + "t-clients": 1, # TBD + "run-for-s": 30, + }, +) diff --git a/experiments/17-chbenchmark/debug/run_aurora_timing.sh b/experiments/17-chbenchmark/debug/run_aurora_timing.sh new file mode 100755 index 00000000..cb96028a --- /dev/null +++ b/experiments/17-chbenchmark/debug/run_aurora_timing.sh @@ -0,0 +1,18 @@ +#! /bin/bash + +script_loc=$(cd $(dirname $0) && pwd -P) +cd $script_loc +source ../common.sh +extract_named_arguments $@ + +# Resolve paths into absolute paths +abs_txn_config_file=$(realpath $txn_config_file) + +cd ../../../workloads/chbenchmark/py-tpcc/ +RECORD_DETAILED_STATS=1 python3 -m pytpcc.tpcc auroratiming \ + --no-load \ + --config $abs_txn_config_file \ + --warehouses $txn_warehouses \ + --duration $run_for_s \ + --clients $t_clients \ + --scalefactor $txn_scale_factor diff --git a/workloads/chbenchmark/py-tpcc/pytpcc/drivers/auroratimingdriver.py b/workloads/chbenchmark/py-tpcc/pytpcc/drivers/auroratimingdriver.py new file mode 100644 index 00000000..d1d88cf4 --- /dev/null +++ b/workloads/chbenchmark/py-tpcc/pytpcc/drivers/auroratimingdriver.py @@ -0,0 +1,701 @@ +import logging +import traceback +import decimal +import os +import time +from typing import Dict, Tuple, Any, Optional, List + +from .abstractdriver import * +from .. import constants + +from brad.connection.psycopg_connection import PsycopgConnection +from brad.connection.psycopg_cursor import PsycopgCursor +import conductor.lib as cond + +Config = Dict[str, Tuple[str, Any]] + +logger = logging.getLogger(__name__) + + +TXN_QUERIES = { + "DELIVERY": { + "getNewOrder": "SELECT no_o_id FROM new_order WHERE no_d_id = {} AND no_w_id = {} AND no_o_id > -1 LIMIT 1", # + "deleteNewOrder": "DELETE FROM new_order WHERE no_d_id = {} AND no_w_id = {} AND no_o_id = {}", # d_id, w_id, no_o_id + "getCId": "SELECT o_c_id FROM orders WHERE o_id = {} AND o_d_id = {} AND o_w_id = {}", # no_o_id, d_id, w_id + "updateOrders": "UPDATE orders SET o_carrier_id = {} WHERE o_id = {} AND o_d_id = {} AND o_w_id = {}", # o_carrier_id, no_o_id, d_id, w_id + "updateOrderLine": "UPDATE order_line SET ol_delivery_d = '{}' WHERE ol_o_id = {} AND ol_d_id = {} AND ol_w_id = {}", # o_entry_d, no_o_id, d_id, w_id + "sumOLAmount": "SELECT SUM(ol_amount) FROM order_line WHERE ol_o_id = {} AND ol_d_id = {} AND ol_w_id = {}", # no_o_id, d_id, w_id + "updateCustomer": "UPDATE customer SET c_balance = c_balance + {} WHERE c_id = {} AND c_d_id = {} AND c_w_id = {}", # ol_total, c_id, d_id, w_id + }, + "NEW_ORDER": { + "getWarehouseTaxRate": "SELECT w_tax FROM warehouse WHERE w_id = {}", # w_id + "getDistrict": "SELECT d_tax, d_next_o_id FROM district WHERE d_id = {} AND d_w_id = {}", # d_id, w_id + "incrementNextOrderId": "UPDATE district SET d_next_o_id = {} WHERE d_id = {} AND d_w_id = {}", # d_next_o_id, d_id, w_id + "getCustomer": "SELECT c_discount, c_last, c_credit FROM customer WHERE c_w_id = {} AND c_d_id = {} AND c_id = {}", # w_id, d_id, c_id + "createOrder": "INSERT INTO orders (o_id, o_d_id, o_w_id, o_c_id, o_entry_d, o_carrier_id, o_ol_cnt, o_all_local) VALUES ({}, {}, {}, {}, '{}', {}, {}, {})", # d_next_o_id, d_id, w_id, c_id, o_entry_d, o_carrier_id, o_ol_cnt, o_all_local + "createNewOrder": "INSERT INTO new_order (no_o_id, no_d_id, no_w_id) VALUES ({}, {}, {})", # o_id, d_id, w_id + "getItemInfo": "SELECT i_price, i_name, i_data FROM item WHERE i_id = {}", # ol_i_id + "getStockInfo": "SELECT s_quantity, s_data, s_ytd, s_order_cnt, s_remote_cnt, s_dist_{:02d} FROM stock WHERE s_i_id = {} AND s_w_id = {}", # d_id, ol_i_id, ol_supply_w_id + "updateStock": "UPDATE stock SET s_quantity = {}, s_ytd = {}, s_order_cnt = {}, s_remote_cnt = {} WHERE s_i_id = {} AND s_w_id = {}", # s_quantity, s_order_cnt, s_remote_cnt, ol_i_id, ol_supply_w_id + "createOrderLine": "INSERT INTO order_line (ol_o_id, ol_d_id, ol_w_id, ol_number, ol_i_id, ol_supply_w_id, ol_delivery_d, ol_quantity, ol_amount, ol_dist_info) VALUES ({}, {}, {}, {}, {}, {}, '{}', {}, {}, '{}')", # o_id, d_id, w_id, ol_number, ol_i_id, ol_supply_w_id, ol_quantity, ol_amount, ol_dist_info + }, + "ORDER_STATUS": { + "getCustomerByCustomerId": "SELECT c_id, c_first, c_middle, c_last, c_balance FROM customer WHERE c_w_id = {} AND c_d_id = {} AND c_id = {}", # w_id, d_id, c_id + "getCustomersByLastName": "SELECT c_id, c_first, c_middle, c_last, c_balance FROM customer WHERE c_w_id = {} AND c_d_id = {} AND c_last = '{}' ORDER BY c_first", # w_id, d_id, c_last + "getLastOrder": "SELECT o_id, o_carrier_id, o_entry_d FROM orders WHERE o_w_id = {} AND o_d_id = {} AND o_c_id = {} ORDER BY o_id DESC LIMIT 1", # w_id, d_id, c_id + "getOrderLines": "SELECT ol_supply_w_id, ol_i_id, ol_quantity, ol_amount, ol_delivery_d FROM order_line WHERE ol_w_id = {} AND ol_d_id = {} AND ol_o_id = {}", # w_id, d_id, o_id + }, + "PAYMENT": { + "getWarehouse": "SELECT w_name, w_street_1, w_street_2, w_city, w_state, w_zip FROM warehouse WHERE w_id = {}", # w_id + "updateWarehouseBalance": "UPDATE warehouse SET w_ytd = w_ytd + {} WHERE w_id = {}", # h_amount, w_id + "getDistrict": "SELECT d_name, d_street_1, d_street_2, d_city, d_state, d_zip FROM district WHERE d_w_id = {} AND d_id = {}", # w_id, d_id + "updateDistrictBalance": "UPDATE district SET d_ytd = d_ytd + {} WHERE d_w_id = {} AND d_id = {}", # h_amount, d_w_id, d_id + "getCustomerByCustomerId": "SELECT c_id, c_first, c_middle, c_last, c_street_1, c_street_2, c_city, c_state, c_zip, c_phone, c_since, c_credit, c_credit_lim, c_discount, c_balance, c_ytd_payment, c_payment_cnt, c_data FROM customer WHERE c_w_id = {} AND c_d_id = {} AND c_id = {}", # w_id, d_id, c_id + "getCustomersByLastName": "SELECT c_id, c_first, c_middle, c_last, c_street_1, c_street_2, c_city, c_state, c_zip, c_phone, c_since, c_credit, c_credit_lim, c_discount, c_balance, c_ytd_payment, c_payment_cnt, c_data FROM customer WHERE c_w_id = {} AND c_d_id = {} AND c_last = '{}' ORDER BY c_first", # w_id, d_id, c_last + "updateBCCustomer": "UPDATE customer SET c_balance = {}, c_ytd_payment = {}, c_payment_cnt = {}, c_data = '{}' WHERE c_w_id = {} AND c_d_id = {} AND c_id = {}", # c_balance, c_ytd_payment, c_payment_cnt, c_data, c_w_id, c_d_id, c_id + "updateGCCustomer": "UPDATE customer SET c_balance = {}, c_ytd_payment = {}, c_payment_cnt = {} WHERE c_w_id = {} AND c_d_id = {} AND c_id = {}", # c_balance, c_ytd_payment, c_payment_cnt, c_w_id, c_d_id, c_id + "insertHistory": "INSERT INTO history (h_c_id, h_c_d_id, h_c_w_id, h_d_id, h_w_id, h_date, h_amount, h_data) VALUES ({}, {}, {}, {}, {}, '{}', {}, '{}')", + }, + "STOCK_LEVEL": { + "getOId": "SELECT d_next_o_id FROM district WHERE d_w_id = {} AND d_id = {}", + "getStockCount": """ + SELECT COUNT(DISTINCT(ol_i_id)) FROM order_line, stock + WHERE ol_w_id = {} + AND ol_d_id = {} + AND ol_o_id < {} + AND ol_o_id >= {} + AND s_w_id = {} + AND s_i_id = ol_i_id + AND s_quantity < {} + """, + }, +} + + +class AuroraTimingDriver(AbstractDriver): + DEFAULT_CONFIG = { + "host": ("Host running the database.", "localhost"), + "port": ("Port on which the database is listening.", 5432), + "user": ("Username", "postgres"), + "password": ("Password", ""), + "database": ("Database", "chbenchmark"), + "isolation_level": ("The isolation level to use.", "REPEATABLE READ"), + } + + def __init__(self, ddl: str) -> None: + super().__init__("brad", ddl) + self._connection: Optional[PsycopgConnection] = None + self._cursor: Optional[PsycopgCursor] = None + self._config: Dict[str, Any] = {} + self._nonsilent_errs = constants.NONSILENT_ERRORS_VAR in os.environ + self._measure_file = None + self._wdc_stats_file = None + self._ol_stats_file = None + self._ins_ol_counter = 0 + + if "LOG_QUERIES" in os.environ: + query_log_file_path = cond.in_output_dir("queries.log") + self._query_log_file = open(query_log_file_path, "w", encoding="UTF-8") + else: + self._query_log_file = None + + def makeDefaultConfig(self) -> Config: + return AuroraTimingDriver.DEFAULT_CONFIG + + def loadConfig(self, config: Config) -> None: + self._config = config + address = self._config["host"] + port = int(self._config["port"]) + user = self._config["user"] + password = self._config["password"] + database = self._config["database"] + cstr = f"host={address} port={port} user={user} password={password} dbname={database}" + self._connection = PsycopgConnection.connect_sync(cstr, autocommit=True) + self._cursor = self._connection.cursor_sync() + + def loadTuples(self, tableName: str, tuples) -> None: + # We don't support data loading directly here. + pass + + def executeStart(self): + assert self._cursor is not None + # We use this callback to set the isolation level. + logger.info("Setting isolation level to %s", self._config["isolation_level"]) + self._cursor.execute_sync( + f"SET SESSION CHARACTERISTICS AS TRANSACTION ISOLATION LEVEL {self._config['isolation_level']}" + ) + measure_file_path = cond.in_output_dir("aurora_timing.csv") + self._measure_file = open(measure_file_path, "w", encoding="UTF-8") + print( + "init,begin,getitems,getwdc,getorder,insertorder,commit,collect,total", + file=self._measure_file, + ) + + stats_file = cond.in_output_dir("wdc_stats.csv") + self._wdc_stats_file = open(stats_file, "w", encoding="UTF-8") + print("tax_rate,district,customer,total", file=self._wdc_stats_file) + + stats_file2 = cond.in_output_dir("item_stats.csv") + self._ol_stats_file = open(stats_file2, "w", encoding="UTF-8") + print( + "txn_counter,init,fetch_stock,stock_prep,update_stock,ol_prep,ol_insert,ol_append,total", + file=self._ol_stats_file, + ) + + def __del__(self): + if self._measure_file is not None: + self._measure_file.close() + self._measure_file = None + + if self._wdc_stats_file is not None: + self._wdc_stats_file.close() + self._wdc_stats_file = None + + if self._ol_stats_file is not None: + self._ol_stats_file.close() + self._ol_stats_file = None + + if self._query_log_file is not None: + self._query_log_file.close() + self._query_log_file = None + + def doDelivery(self, params: Dict[str, Any]) -> List[Tuple[Any, ...]]: + try: + assert self._cursor is not None + + q = TXN_QUERIES["DELIVERY"] + w_id = params["w_id"] + o_carrier_id = params["o_carrier_id"] + ol_delivery_d = params["ol_delivery_d"] + + result: List[Tuple[Any, ...]] = [] + self._cursor.execute_sync("BEGIN") + for d_id in range(1, constants.DISTRICTS_PER_WAREHOUSE + 1): + self._cursor.execute_sync(q["getNewOrder"].format(d_id, w_id)) + r = self._cursor.fetchall_sync() + if len(r) == 0: + ## No orders for this district: skip it. Note: This must be reported if > 1% + continue + no_o_id = r[0][0] + + self._cursor.execute_sync(q["getCId"].format(no_o_id, d_id, w_id)) + r = self._cursor.fetchall_sync() + c_id = r[0][0] + + self._cursor.execute_sync(q["sumOLAmount"].format(no_o_id, d_id, w_id)) + r = self._cursor.fetchall_sync() + ol_total = decimal.Decimal(r[0][0]) + + self._cursor.execute_sync( + q["deleteNewOrder"].format(d_id, w_id, no_o_id) + ) + updateOrders = q["updateOrders"].format( + o_carrier_id, no_o_id, d_id, w_id + ) + self._cursor.execute_sync(updateOrders) + updateOrderLine = q["updateOrderLine"].format( + ol_delivery_d.strftime("%Y-%m-%d %H:%M:%S"), no_o_id, d_id, w_id + ) + self._cursor.execute_sync(updateOrderLine) + + # These must be logged in the "result file" according to TPC-C 2.7.2.2 (page 39) + # We remove the queued time, completed time, w_id, and o_carrier_id: the client can figure + # them out + # If there are no order lines, SUM returns null. There should always be order lines. + assert ( + ol_total != None + ), "ol_total is NULL: there are no order lines. This should not happen" + assert ol_total > 0.0 + + self._cursor.execute_sync( + q["updateCustomer"].format( + ol_total.quantize(decimal.Decimal("1.00")), c_id, d_id, w_id + ) + ) + + result.append((d_id, no_o_id)) + + self._cursor.execute_sync("COMMIT") + return result + + except Exception as ex: + if self._nonsilent_errs: + print("Error in DELIVERY", str(ex)) + print(traceback.format_exc()) + raise + + def doNewOrder(self, params: Dict[str, Any]) -> List[Tuple[Any, ...]]: + try: + assert self._cursor is not None + + no_start = time.time() + q = TXN_QUERIES["NEW_ORDER"] + w_id = params["w_id"] + d_id = params["d_id"] + c_id = params["c_id"] + o_entry_d = params["o_entry_d"] + i_ids = params["i_ids"] + i_w_ids = params["i_w_ids"] + i_qtys = params["i_qtys"] + + assert len(i_ids) > 0 + assert len(i_ids) == len(i_w_ids) + assert len(i_ids) == len(i_qtys) + + no_pbegin = time.time() + self._cursor.execute_sync("BEGIN") + no_abegin = time.time() + all_local = True + items = [] + for i in range(len(i_ids)): + ## Determine if this is an all local order or not + all_local = all_local and i_w_ids[i] == w_id + self._cursor.execute_sync(q["getItemInfo"].format(i_ids[i])) + r = self._cursor.fetchone_sync() + items.append(r) + assert len(items) == len(i_ids) + no_getitems = time.time() + + ## TPCC defines 1% of neworder gives a wrong itemid, causing rollback. + ## Note that this will happen with 1% of transactions on purpose. + for item in items: + if len(item) == 0: + self._cursor.execute_sync("ROLLBACK") + return + ## FOR + + ## ---------------- + ## Collect Information from WAREHOUSE, DISTRICT, and CUSTOMER + ## ---------------- + wdc_start = time.time() + get_warehouse = q["getWarehouseTaxRate"].format(w_id) + self._cursor.execute_sync(get_warehouse) + r = self._cursor.fetchone_sync() + w_tax = r[0] + wdc_warehouse_tax_rate = time.time() + + get_district = q["getDistrict"].format(d_id, w_id) + self._cursor.execute_sync(get_district) + r = self._cursor.fetchone_sync() + district_info = r + d_tax = district_info[0] + d_next_o_id = district_info[1] + wdc_district = time.time() + + get_customer = q["getCustomer"].format(w_id, d_id, c_id) + self._cursor.execute_sync(get_customer) + r = self._cursor.fetchone_sync() + customer_info = r + c_discount = customer_info[0] + no_get_wdc_info = time.time() + + if self._query_log_file is not None: + print(get_warehouse, file=self._query_log_file) + print(get_district, file=self._query_log_file) + print(get_customer, file=self._query_log_file) + + ## ---------------- + ## Insert Order Information + ## ---------------- + ol_cnt = len(i_ids) + o_carrier_id = constants.NULL_CARRIER_ID + + self._cursor.execute_sync( + q["incrementNextOrderId"].format(d_next_o_id + 1, d_id, w_id) + ) + createOrder = q["createOrder"].format( + d_next_o_id, + d_id, + w_id, + c_id, + o_entry_d.strftime("%Y-%m-%d %H:%M:%S"), + o_carrier_id, + ol_cnt, + 1 if all_local else 0, + ) + self._cursor.execute_sync(createOrder) + self._cursor.execute_sync( + q["createNewOrder"].format(d_next_o_id, d_id, w_id) + ) + no_ins_order_info = time.time() + + ## ---------------- + ## Insert Order Item Information + ## ---------------- + item_data = [] + total = 0 + insert_metadata = [] + for i in range(len(i_ids)): + io_start = time.time() + ol_number = i + 1 + ol_supply_w_id = i_w_ids[i] + ol_i_id = i_ids[i] + ol_quantity = i_qtys[i] + + itemInfo = items[i] + i_name = itemInfo[1] + i_data = itemInfo[2] + i_price = decimal.Decimal(itemInfo[0]) + io_init = time.time() + + get_stock_info = q["getStockInfo"].format(d_id, ol_i_id, ol_supply_w_id) + self._cursor.execute_sync(get_stock_info) + r = self._cursor.fetchone_sync() + io_fetch_stock = time.time() + if r is None: + logger.warning( + "No STOCK record for (ol_i_id=%d, ol_supply_w_id=%d)", + ol_i_id, + ol_supply_w_id, + ) + continue + stockInfo = r + s_quantity = stockInfo[0] + s_ytd = decimal.Decimal(stockInfo[2]) + s_order_cnt = int(stockInfo[3]) + s_remote_cnt = int(stockInfo[4]) + s_data = stockInfo[1] + s_dist_xx = stockInfo[5] # Fetches data from the s_dist_[d_id] column + + ## Update stock + s_ytd += ol_quantity + if s_quantity >= ol_quantity + 10: + s_quantity = s_quantity - ol_quantity + else: + s_quantity = s_quantity + 91 - ol_quantity + s_order_cnt += 1 + + if ol_supply_w_id != w_id: + s_remote_cnt += 1 + io_stock_prep = time.time() + + update_stock = q["updateStock"].format( + s_quantity, + s_ytd.quantize(decimal.Decimal("1.00")), + s_order_cnt, + s_remote_cnt, + ol_i_id, + ol_supply_w_id, + ) + self._cursor.execute_sync(update_stock) + io_update_stock = time.time() + + if ( + i_data.find(constants.ORIGINAL_STRING) != -1 + and s_data.find(constants.ORIGINAL_STRING) != -1 + ): + brand_generic = "B" + else: + brand_generic = "G" + + ## Transaction profile states to use "ol_quantity * i_price" + ol_amount = ol_quantity * i_price + total += ol_amount + io_ol_prep = time.time() + + createOrderLine = q["createOrderLine"].format( + d_next_o_id, + d_id, + w_id, + ol_number, + ol_i_id, + ol_supply_w_id, + o_entry_d.strftime("%Y-%m-%d %H:%M:%S"), + ol_quantity, + ol_amount, + s_dist_xx, + ) + self._cursor.execute_sync(createOrderLine) + io_ol_insert = time.time() + + ## Add the info to be returned + item_data.append( + (i_name, s_quantity, brand_generic, i_price, ol_amount) + ) + io_ol_append = time.time() + + insert_metadata.append( + ( + io_init - io_start, + io_fetch_stock - io_init, + io_stock_prep - io_fetch_stock, + io_update_stock - io_stock_prep, + io_ol_prep - io_update_stock, + io_ol_insert - io_ol_prep, + io_ol_append - io_ol_insert, + io_ol_append - io_start, + ) + ) + + if self._query_log_file is not None: + print(get_stock_info, file=self._query_log_file) + print(update_stock, file=self._query_log_file) + print(createOrderLine, file=self._query_log_file) + + ## FOR + no_insert_order_line = time.time() + + ## Commit! + self._cursor.execute_sync("COMMIT") + no_commit = time.time() + + ## Adjust the total for the discount + # print "c_discount:", c_discount, type(c_discount) + # print "w_tax:", w_tax, type(w_tax) + # print "d_tax:", d_tax, type(d_tax) + total = int( + total + * (1 - decimal.Decimal(c_discount)) + * (1 + decimal.Decimal(w_tax) + decimal.Decimal(d_tax)) + ) + + ## Pack up values the client is missing (see TPC-C 2.4.3.5) + misc = [(w_tax, d_tax, d_next_o_id, total)] + no_collect = time.time() + + if self._measure_file is not None: + init_time = no_pbegin - no_start + begin_time = no_abegin - no_pbegin + getitems_time = no_getitems - no_abegin + getwdc_time = no_get_wdc_info - no_getitems + getorder_time = no_ins_order_info - no_get_wdc_info + insertorder_time = no_insert_order_line - no_ins_order_info + commit_time = no_commit - no_insert_order_line + collect_time = no_collect - no_commit + total_time = no_collect - no_start + print( + f"{init_time},{begin_time},{getitems_time},{getwdc_time},{getorder_time},{insertorder_time},{commit_time},{collect_time},{total_time}", + file=self._measure_file, + ) + + if self._wdc_stats_file is not None: + tax_rate_time = wdc_warehouse_tax_rate - wdc_start + district_time = wdc_district - wdc_warehouse_tax_rate + customer_time = no_get_wdc_info - wdc_district + total_time = no_get_wdc_info - wdc_start + print( + f"{tax_rate_time},{district_time},{customer_time},{total_time}", + file=self._wdc_stats_file, + ) + + if self._ol_stats_file is not None: + for im in insert_metadata: + print( + "{},{},{},{},{},{},{},{},{}".format(self._ins_ol_counter, *im), + file=self._ol_stats_file, + ) + self._ins_ol_counter += 1 + + return [customer_info, misc, item_data] + + except Exception as ex: + if self._nonsilent_errs: + print("Error in NEWORDER", str(ex)) + print(traceback.format_exc()) + raise + + def doOrderStatus(self, params: Dict[str, Any]) -> List[Tuple[Any, ...]]: + try: + assert self._cursor is not None + + q = TXN_QUERIES["ORDER_STATUS"] + w_id = params["w_id"] + d_id = params["d_id"] + c_id = params["c_id"] + c_last = params["c_last"] + + self._cursor.execute_sync("BEGIN") + if c_id != None: + self._cursor.execute_sync( + q["getCustomerByCustomerId"].format(w_id, d_id, c_id) + ) + r = self._cursor.fetchall_sync() + customer = r[0] + else: + # Get the midpoint customer's id + self._cursor.execute_sync( + q["getCustomersByLastName"].format(w_id, d_id, c_last) + ) + r = self._cursor.fetchall_sync() + all_customers = r + assert len(all_customers) > 0 + namecnt = len(all_customers) + index = (namecnt - 1) // 2 + customer = all_customers[index] + c_id = customer[0] + assert len(customer) > 0 + assert c_id != None + + getLastOrder = q["getLastOrder"].format(w_id, d_id, c_id) + self._cursor.execute_sync(getLastOrder) + r = self._cursor.fetchall_sync() + order = r[0] + if order: + self._cursor.execute_sync( + q["getOrderLines"].format(w_id, d_id, order[0]) + ) + r = self._cursor.fetchall_sync() + orderLines = r + else: + orderLines = [] + + self._cursor.execute_sync("COMMIT") + return [customer, order, orderLines] + + except Exception as ex: + if self._nonsilent_errs: + print("Error in ORDER_STATUS", str(ex)) + print(traceback.format_exc()) + raise + + def doPayment(self, params: Dict[str, Any]) -> List[Tuple[Any, ...]]: + try: + assert self._cursor is not None + + q = TXN_QUERIES["PAYMENT"] + w_id = params["w_id"] + d_id = params["d_id"] + h_amount = decimal.Decimal(params["h_amount"]) + c_w_id = params["c_w_id"] + c_d_id = params["c_d_id"] + c_id = params["c_id"] + c_last = params["c_last"] + h_date = params["h_date"] # Python datetime + + self._cursor.execute_sync("BEGIN") + if c_id != None: + self._cursor.execute_sync( + q["getCustomerByCustomerId"].format(w_id, d_id, c_id) + ) + r = self._cursor.fetchall_sync() + customer = r[0] + else: + # Get the midpoint customer's id + self._cursor.execute_sync( + q["getCustomersByLastName"].format(w_id, d_id, c_last) + ) + r = self._cursor.fetchall_sync() + all_customers = r + assert len(all_customers) > 0 + namecnt = len(all_customers) + index = (namecnt - 1) // 2 + customer = all_customers[index] + c_id = customer[0] + assert len(customer) > 0 + c_balance = decimal.Decimal(customer[14]) - h_amount + c_ytd_payment = decimal.Decimal(customer[15]) + h_amount + c_payment_cnt = int(customer[16]) + 1 + c_data = customer[17] + + self._cursor.execute_sync(q["getWarehouse"].format(w_id)) + r = self._cursor.fetchall_sync() + warehouse = r[0] + + self._cursor.execute_sync(q["getDistrict"].format(w_id, d_id)) + r = self._cursor.fetchall_sync() + district = r[0] + + self._cursor.execute_sync( + q["updateWarehouseBalance"].format(h_amount, w_id) + ) + self._cursor.execute_sync( + q["updateDistrictBalance"].format(h_amount, w_id, d_id) + ) + + # Customer Credit Information + if customer[11] == constants.BAD_CREDIT: + newData = " ".join( + map(str, [c_id, c_d_id, c_w_id, d_id, w_id, h_amount]) + ) + c_data = newData + "|" + c_data + if len(c_data) > constants.MAX_C_DATA: + c_data = c_data[: constants.MAX_C_DATA] + updateCustomer = q["updateBCCustomer"].format( + c_balance, + c_ytd_payment, + c_payment_cnt, + c_data, + c_w_id, + c_d_id, + c_id, + ) + self._cursor.execute_sync(updateCustomer) + else: + c_data = "" + self._cursor.execute_sync( + q["updateGCCustomer"].format( + c_balance, c_ytd_payment, c_payment_cnt, c_w_id, c_d_id, c_id + ), + ) + + # Concatenate w_name, four spaces, d_name + h_data = "%s %s" % (warehouse[0], district[0]) + # Create the history record + insertHistory = q["insertHistory"].format( + c_id, + c_d_id, + c_w_id, + d_id, + w_id, + h_date.strftime("%Y-%m-%d %H:%M:%S"), + h_amount.quantize(decimal.Decimal("1.00")), + h_data, + ) + self._cursor.execute_sync(insertHistory) + + self._cursor.execute_sync("COMMIT") + + # TPC-C 2.5.3.3: Must display the following fields: + # W_ID, D_ID, C_ID, C_D_ID, C_W_ID, W_STREET_1, W_STREET_2, W_CITY, W_STATE, W_ZIP, + # D_STREET_1, D_STREET_2, D_CITY, D_STATE, D_ZIP, C_FIRST, C_MIDDLE, C_LAST, C_STREET_1, + # C_STREET_2, C_CITY, C_STATE, C_ZIP, C_PHONE, C_SINCE, C_CREDIT, C_CREDIT_LIM, + # C_DISCOUNT, C_BALANCE, the first 200 characters of C_DATA (only if C_CREDIT = "BC"), + # H_AMOUNT, and H_DATE. + + # Hand back all the warehouse, district, and customer data + return [warehouse, district, customer] + + except Exception as ex: + if self._nonsilent_errs: + print("Error in PAYMENT", str(ex)) + print(traceback.format_exc()) + raise + + def doStockLevel(self, params: Dict[str, Any]) -> int: + try: + assert self._cursor is not None + + q = TXN_QUERIES["STOCK_LEVEL"] + w_id = params["w_id"] + d_id = params["d_id"] + threshold = params["threshold"] + + self._cursor.execute_sync("BEGIN") + self._cursor.execute_sync(q["getOId"].format(w_id, d_id)) + r = self._cursor.fetchall_sync() + result = r[0] + assert result + o_id = result[0] + + self._cursor.execute_sync( + q["getStockCount"].format( + w_id, d_id, o_id, (o_id - 20), w_id, threshold + ) + ) + r = self._cursor.fetchall_sync() + result = r[0] + + self._cursor.execute_sync("COMMIT") + return int(result[0]) + + except Exception as ex: + if self._nonsilent_errs: + print("Error in STOCK_LEVEL", str(ex)) + print(traceback.format_exc()) + raise + + def ensureRollback(self) -> None: + """ + Makes sure the transaction has rolled back. + """ + self._cursor.execute_sync("ROLLBACK") diff --git a/workloads/chbenchmark/py-tpcc/pytpcc/tpcc.py b/workloads/chbenchmark/py-tpcc/pytpcc/tpcc.py index 2273933f..027d4fb3 100755 --- a/workloads/chbenchmark/py-tpcc/pytpcc/tpcc.py +++ b/workloads/chbenchmark/py-tpcc/pytpcc/tpcc.py @@ -43,6 +43,7 @@ from .util import * from .runtime import * from .drivers.auroradriver import AuroraDriver +from .drivers.auroratimingdriver import AuroraTimingDriver from .drivers.braddriver import BradDriver logging.basicConfig( @@ -61,6 +62,8 @@ def createDriverClass(name): return BradDriver elif name == "aurora": return AuroraDriver + elif name == "auroratiming": + return AuroraTimingDriver else: raise NotImplementedError From b4a54a9c8e2acd1de415c39a421a5359f53ab63b Mon Sep 17 00:00:00 2001 From: Sophie Zhang <88999452+sopzha@users.noreply.github.com> Date: Tue, 30 Apr 2024 21:39:50 -0400 Subject: [PATCH 05/30] Create RecordBatch in BradStatement from query result and schema exposed from underlying connections (#502) Co-authored-by: Sophie Zhang --- cpp/server/brad_server_simple.cc | 124 ++++++++++++++++++++++++++----- cpp/server/brad_server_simple.h | 1 + cpp/server/brad_statement.cc | 89 +++------------------- cpp/server/brad_statement.h | 8 +- 4 files changed, 120 insertions(+), 102 deletions(-) diff --git a/cpp/server/brad_server_simple.cc b/cpp/server/brad_server_simple.cc index 6c4260bc..5cc7594d 100644 --- a/cpp/server/brad_server_simple.cc +++ b/cpp/server/brad_server_simple.cc @@ -7,6 +7,7 @@ #include #include +#include #include #include "brad_sql_info.h" #include "brad_statement.h" @@ -50,23 +51,108 @@ arrow::Result> DecodeTransactionQuery( return std::make_pair(std::move(autoincrement_id), std::move(transaction_id)); } -std::vector> TransformQueryResult( - std::vector query_result) { - std::vector> transformed_query_result; - for (const auto &row : query_result) { - std::vector transformed_row{}; - for (const auto &field : row) { - if (py::isinstance(field)) { - transformed_row.push_back(std::make_any(py::cast(field))); - } else if (py::isinstance(field)) { - transformed_row.push_back(std::make_any(py::cast(field))); - } else { - transformed_row.push_back(std::make_any(py::cast(field))); +arrow::Result> ResultToRecordBatch( + const std::vector &query_result, + const std::shared_ptr &schema) { + const size_t num_rows = query_result.size(); + + const size_t num_columns = schema->num_fields(); + std::vector> columns; + columns.reserve(num_columns); + + for (int field_ix = 0; field_ix < num_columns; ++field_ix) { + const auto &field_type = schema->field(field_ix)->type(); + if (field_type->Equals(arrow::int64())) { + arrow::Int64Builder int64builder; + for (int row_ix = 0; row_ix < num_rows; ++row_ix) { + const std::optional val = + py::cast>(query_result[row_ix][field_ix]); + if (val) { + ARROW_RETURN_NOT_OK(int64builder.Append(*val)); + } else { + ARROW_RETURN_NOT_OK(int64builder.AppendNull()); + } } + std::shared_ptr values; + ARROW_ASSIGN_OR_RAISE(values, int64builder.Finish()); + columns.push_back(values); + + } else if (field_type->Equals(arrow::float32())) { + arrow::FloatBuilder floatbuilder; + for (int row_ix = 0; row_ix < num_rows; ++row_ix) { + const std::optional val = + py::cast>(query_result[row_ix][field_ix]); + if (val) { + ARROW_RETURN_NOT_OK(floatbuilder.Append(*val)); + } else { + ARROW_RETURN_NOT_OK(floatbuilder.AppendNull()); + } + } + std::shared_ptr values; + ARROW_ASSIGN_OR_RAISE(values, floatbuilder.Finish()); + columns.push_back(values); + + } else if (field_type->Equals(arrow::decimal(/*precision=*/10, /*scale=*/2))) { + arrow::Decimal128Builder decimalbuilder(arrow::decimal(/*precision=*/10, /*scale=*/2)); + for (int row_ix = 0; row_ix < num_rows; ++row_ix) { + const std::optional val = + py::cast>(query_result[row_ix][field_ix]); + if (val) { + ARROW_RETURN_NOT_OK( + decimalbuilder.Append(arrow::Decimal128::FromString(*val).ValueOrDie())); + } else { + ARROW_RETURN_NOT_OK(decimalbuilder.AppendNull()); + } + } + std::shared_ptr values; + ARROW_ASSIGN_OR_RAISE(values, decimalbuilder.Finish()); + columns.push_back(values); + + } else if (field_type->Equals(arrow::utf8())) { + arrow::StringBuilder stringbuilder; + for (int row_ix = 0; row_ix < num_rows; ++row_ix) { + const std::optional str = + py::cast>(query_result[row_ix][field_ix]); + if (str) { + ARROW_RETURN_NOT_OK(stringbuilder.Append(str->data(), str->size())); + } else { + ARROW_RETURN_NOT_OK(stringbuilder.AppendNull()); + } + } + std::shared_ptr values; + ARROW_ASSIGN_OR_RAISE(values, stringbuilder.Finish()); + columns.push_back(values); + + } else if (field_type->Equals(arrow::date64())) { + arrow::Date64Builder datebuilder; + for (int row_ix = 0; row_ix < num_rows; ++row_ix) { + const std::optional val = + py::cast>(query_result[row_ix][field_ix]); + if (val) { + ARROW_RETURN_NOT_OK(datebuilder.Append(*val)); + } else { + ARROW_RETURN_NOT_OK(datebuilder.AppendNull()); + } + } + std::shared_ptr values; + ARROW_ASSIGN_OR_RAISE(values, datebuilder.Finish()); + columns.push_back(values); + + } else if (field_type->Equals(arrow::null())) { + arrow::NullBuilder nullbuilder; + for (int row_ix = 0; row_ix < num_rows; ++row_ix) { + ARROW_RETURN_NOT_OK(nullbuilder.AppendNull()); + } + std::shared_ptr values; + ARROW_ASSIGN_OR_RAISE(values, nullbuilder.Finish()); + columns.push_back(values); } - transformed_query_result.push_back(transformed_row); } - return transformed_query_result; + + std::shared_ptr result_record_batch = + arrow::RecordBatch::Make(schema, num_rows, columns); + + return result_record_batch; } BradFlightSqlServer::BradFlightSqlServer() : autoincrement_id_(0ULL) {} @@ -125,25 +211,23 @@ arrow::Result> EncodeTransactionQuery(query_ticket)); std::shared_ptr result_schema; - std::vector> transformed_query_result; + std::shared_ptr result_record_batch; { py::gil_scoped_acquire guard; auto result = handle_query_(query); result_schema = ArrowSchemaFromBradSchema(result.second); - transformed_query_result = TransformQueryResult(result.first); + result_record_batch = ResultToRecordBatch(result.first, result_schema).ValueOrDie(); } - ARROW_ASSIGN_OR_RAISE(auto statement, BradStatement::Create(transformed_query_result)); + ARROW_ASSIGN_OR_RAISE(auto statement, BradStatement::Create(std::move(result_record_batch), result_schema)); query_data_.insert(query_ticket, statement); - ARROW_ASSIGN_OR_RAISE(auto schema, statement->GetSchema()); - std::vector endpoints{ FlightEndpoint{std::move(ticket), {}, std::nullopt, ""}}; const bool ordered = false; - ARROW_ASSIGN_OR_RAISE(auto result, FlightInfo::Make(*schema, + ARROW_ASSIGN_OR_RAISE(auto result, FlightInfo::Make(*result_schema, descriptor, endpoints, -1, diff --git a/cpp/server/brad_server_simple.h b/cpp/server/brad_server_simple.h index 484ea216..ee6eaf21 100644 --- a/cpp/server/brad_server_simple.h +++ b/cpp/server/brad_server_simple.h @@ -15,6 +15,7 @@ #include "libcuckoo/cuckoohash_map.hh" #include +#include namespace brad { diff --git a/cpp/server/brad_statement.cc b/cpp/server/brad_statement.cc index e9ce1588..0db4a786 100644 --- a/cpp/server/brad_statement.cc +++ b/cpp/server/brad_statement.cc @@ -25,96 +25,27 @@ arrow::Result> BradStatement::Create( } arrow::Result> BradStatement::Create( - std::vector> query_result) { - std::shared_ptr result( - std::make_shared(query_result)); - return result; + std::shared_ptr result_record_batch, + std::shared_ptr schema) { + std::shared_ptr result( + std::make_shared(result_record_batch, schema)); + return result; } -BradStatement::BradStatement(std::vector> query_result) : - query_result_(std::move(query_result)) {} +BradStatement::BradStatement(std::shared_ptr result_record_batch, + std::shared_ptr schema) : + result_record_batch_(std::move(result_record_batch)), + schema_(std::move(schema)) {} BradStatement::~BradStatement() { } arrow::Result> BradStatement::GetSchema() const { - if (schema_) { - return schema_; - } - - std::vector> fields; - - if (query_result_.size() > 0) { - const std::vector &row = query_result_[0]; - - int counter = 0; - for (const auto &field : row) { - std::string field_type = field.type().name(); - if (field_type == "i") { - fields.push_back(arrow::field("INT FIELD " + std::to_string(++counter), arrow::int8())); - } else if (field_type == "f") { - fields.push_back(arrow::field("FLOAT FIELD " + std::to_string(++counter), arrow::float32())); - } else { - fields.push_back(arrow::field("STRING FIELD " + std::to_string(++counter), arrow::utf8())); - } - } - } - - schema_ = arrow::schema(fields); return schema_; } arrow::Result> BradStatement::FetchResult() { - std::shared_ptr schema = GetSchema().ValueOrDie(); - - const int num_rows = query_result_.size(); - - std::vector> columns; - columns.reserve(schema->num_fields()); - - for (int field_ix = 0; field_ix < schema->num_fields(); ++field_ix) { - const auto &field = schema->fields()[field_ix]; - if (field->type() == arrow::int8()) { - arrow::Int8Builder int8builder; - int8_t values_raw[num_rows]; - for (int row_ix = 0; row_ix < num_rows; ++row_ix) { - values_raw[row_ix] = std::any_cast(query_result_[row_ix][field_ix]); - } - ARROW_RETURN_NOT_OK(int8builder.AppendValues(values_raw, num_rows)); - - std::shared_ptr values; - ARROW_ASSIGN_OR_RAISE(values, int8builder.Finish()); - - columns.push_back(values); - } else if (field->type() == arrow::float32()) { - arrow::FloatBuilder floatbuilder; - float values_raw[num_rows]; - for (int row_ix = 0; row_ix < num_rows; ++row_ix) { - values_raw[row_ix] = std::any_cast(query_result_[row_ix][field_ix]); - } - ARROW_RETURN_NOT_OK(floatbuilder.AppendValues(values_raw, num_rows)); - - std::shared_ptr values; - ARROW_ASSIGN_OR_RAISE(values, floatbuilder.Finish()); - - columns.push_back(values); - } else if (field->type() == arrow::utf8()) { - arrow::StringBuilder stringbuilder; - for (int row_ix = 0; row_ix < num_rows; ++row_ix) { - const std::string* str = std::any_cast(&(query_result_[row_ix][field_ix])); - ARROW_RETURN_NOT_OK(stringbuilder.Append(str->data(), str->size())); - } - - std::shared_ptr values; - ARROW_ASSIGN_OR_RAISE(values, stringbuilder.Finish()); - } - } - - std::shared_ptr record_batch = - arrow::RecordBatch::Make(schema, - num_rows, - columns); - return record_batch; + return result_record_batch_; } std::string* BradStatement::GetBradStmt() const { return stmt_; } diff --git a/cpp/server/brad_statement.h b/cpp/server/brad_statement.h index b3dba2cc..6d296c16 100644 --- a/cpp/server/brad_statement.h +++ b/cpp/server/brad_statement.h @@ -26,9 +26,11 @@ class BradStatement { const std::string& sql); static arrow::Result> Create( - const std::vector>); + std::shared_ptr result_record_batch, + std::shared_ptr schema); - BradStatement(std::vector>); + BradStatement(std::shared_ptr, + std::shared_ptr); ~BradStatement(); @@ -41,7 +43,7 @@ class BradStatement { std::string* GetBradStmt() const; private: - std::vector> query_result_; + std::shared_ptr result_record_batch_; mutable std::shared_ptr schema_; From b9ba0f398723f56f7e85c3562570a35ff8730372 Mon Sep 17 00:00:00 2001 From: Geoffrey Yu Date: Wed, 1 May 2024 18:55:30 -0400 Subject: [PATCH 06/30] Various improvements to the TPC-C runner (#504) - Batch inserts - Add Zipfian skew to how keys are selected - Remove use of fetchall() where not needed (does not really impact BRAD though) These changes reduce new order latency from ~95 ms down to ~45 ms. Part of #487. --- experiments/17-chbenchmark/common.sh | 23 +- experiments/17-chbenchmark/debug/COND | 45 +++ .../17-chbenchmark/debug/run_aurora_direct.sh | 20 +- .../17-chbenchmark/debug/run_aurora_timing.sh | 20 +- .../calibration/transactions/chbenchmark/COND | 3 + .../transactions/chbenchmark/run_instance.sh | 7 +- .../py-tpcc/pytpcc/drivers/auroradriver.py | 81 ++--- .../pytpcc/drivers/auroratimingdriver.py | 287 +++++++++++++++++- .../py-tpcc/pytpcc/drivers/braddriver.py | 61 ++-- .../py-tpcc/pytpcc/runtime/executor.py | 39 ++- workloads/chbenchmark/py-tpcc/pytpcc/tpcc.py | 12 +- 11 files changed, 510 insertions(+), 88 deletions(-) diff --git a/experiments/17-chbenchmark/common.sh b/experiments/17-chbenchmark/common.sh index 04ad6a0b..2db49e0e 100644 --- a/experiments/17-chbenchmark/common.sh +++ b/experiments/17-chbenchmark/common.sh @@ -14,13 +14,18 @@ function start_brad() { function run_tpcc() { pushd ../../../workloads/chbenchmark/py-tpcc/ - RECORD_DETAILED_STATS=1 python3 -m pytpcc.tpcc brad \ - --no-load \ - --config $abs_txn_config_file \ - --warehouses $txn_warehouses \ - --duration $run_for_s \ - --clients $t_clients \ - --scalefactor $txn_scale_factor & + local args=( + --no-load + --config $abs_txn_config_file + --warehouses $txn_warehouses + --duration $run_for_s + --clients $t_clients + --scalefactor $txn_scale_factor + ) + if [[ ! -z $txn_zipfian_alpha ]]; then + args+=(--zipfian-alpha $txn_zipfian_alpha) + fi + RECORD_DETAILED_STATS=1 python3 -m pytpcc.tpcc brad "${args[@]}" & tpcc_pid=$! popd } @@ -91,6 +96,10 @@ function extract_named_arguments() { if [[ $phys_arg =~ --txn-config-file=.+ ]]; then txn_config_file=${phys_arg:18} fi + + if [[ $phys_arg =~ --txn-zipfian-alpha=.+ ]]; then + txn_zipfian_alpha=${phys_arg:20} + fi done } diff --git a/experiments/17-chbenchmark/debug/COND b/experiments/17-chbenchmark/debug/COND index 4cfa490f..7feaa352 100644 --- a/experiments/17-chbenchmark/debug/COND +++ b/experiments/17-chbenchmark/debug/COND @@ -1,3 +1,6 @@ +ZIPFIAN_ALPHA = 5.0 + + run_command( name="txn_lat", run="./run_tpcc.sh", @@ -13,6 +16,22 @@ run_command( }, ) +run_command( + name="txn_lat_zipf", + run="./run_tpcc.sh", + options={ + "physical-config-file": "../../../config/physical_config_chbench.yml", + "system-config-file": "debug_config.yml", # Relative to one level up. + "txn-config-file": "brad.config", + "schema-name": "chbenchmark", + "txn-warehouses": 1740, + "txn-scale-factor": 1, # TBD + "t-clients": 1, # TBD + "run-for-s": 180, + "txn-zipfian-alpha": ZIPFIAN_ALPHA, + }, +) + run_command( name="aurora_direct", run="./run_aurora_direct.sh", @@ -25,6 +44,19 @@ run_command( }, ) +run_command( + name="aurora_direct_zipf", + run="./run_aurora_direct.sh", + options={ + "txn-config-file": "aurora.config", + "txn-warehouses": 1740, + "txn-scale-factor": 1, # TBD + "t-clients": 1, # TBD + "run-for-s": 180, + "txn-zipfian-alpha": ZIPFIAN_ALPHA, + }, +) + run_experiment( name="aurora_timing", run="./run_aurora_timing.sh", @@ -36,3 +68,16 @@ run_experiment( "run-for-s": 30, }, ) + +run_experiment( + name="aurora_timing_zipf", + run="./run_aurora_timing.sh", + options={ + "txn-config-file": "aurora.config", + "txn-warehouses": 1740, + "txn-scale-factor": 1, # TBD + "t-clients": 1, # TBD + "run-for-s": 30, + "txn-zipfian-alpha": ZIPFIAN_ALPHA, + }, +) diff --git a/experiments/17-chbenchmark/debug/run_aurora_direct.sh b/experiments/17-chbenchmark/debug/run_aurora_direct.sh index df6b232a..36d85f2b 100755 --- a/experiments/17-chbenchmark/debug/run_aurora_direct.sh +++ b/experiments/17-chbenchmark/debug/run_aurora_direct.sh @@ -9,10 +9,18 @@ extract_named_arguments $@ abs_txn_config_file=$(realpath $txn_config_file) cd ../../../workloads/chbenchmark/py-tpcc/ -RECORD_DETAILED_STATS=1 python3 -m pytpcc.tpcc aurora \ - --no-load \ - --config $abs_txn_config_file \ - --warehouses $txn_warehouses \ - --duration $run_for_s \ - --clients $t_clients \ + +args=( + --no-load + --config $abs_txn_config_file + --warehouses $txn_warehouses + --duration $run_for_s + --clients $t_clients --scalefactor $txn_scale_factor +) + +if [[ ! -z $txn_zipfian_alpha ]]; then + args+=(--zipfian-alpha $txn_zipfian_alpha) +fi + +RECORD_DETAILED_STATS=1 python3 -m pytpcc.tpcc aurora "${args[@]}" diff --git a/experiments/17-chbenchmark/debug/run_aurora_timing.sh b/experiments/17-chbenchmark/debug/run_aurora_timing.sh index cb96028a..d28f1633 100755 --- a/experiments/17-chbenchmark/debug/run_aurora_timing.sh +++ b/experiments/17-chbenchmark/debug/run_aurora_timing.sh @@ -9,10 +9,18 @@ extract_named_arguments $@ abs_txn_config_file=$(realpath $txn_config_file) cd ../../../workloads/chbenchmark/py-tpcc/ -RECORD_DETAILED_STATS=1 python3 -m pytpcc.tpcc auroratiming \ - --no-load \ - --config $abs_txn_config_file \ - --warehouses $txn_warehouses \ - --duration $run_for_s \ - --clients $t_clients \ + +args=( + --no-load + --config $abs_txn_config_file + --warehouses $txn_warehouses + --duration $run_for_s + --clients $t_clients --scalefactor $txn_scale_factor +) + +if [[ ! -z $txn_zipfian_alpha ]]; then + args+=(--zipfian-alpha $txn_zipfian_alpha) +fi + +RECORD_DETAILED_STATS=1 python3 -m pytpcc.tpcc auroratiming "${args[@]}" diff --git a/tools/calibration/transactions/chbenchmark/COND b/tools/calibration/transactions/chbenchmark/COND index f59e559a..f8901f59 100644 --- a/tools/calibration/transactions/chbenchmark/COND +++ b/tools/calibration/transactions/chbenchmark/COND @@ -13,6 +13,8 @@ COND_INSTANCES = { instance: instance.replace(".", "_").replace("db.", "") for instance in INSTANCES } +ZIPFIAN_ALPHA = 5.0 + combine( name="all", deps=[ @@ -36,6 +38,7 @@ for instance in INSTANCES: "txn-warehouses": 1740, "txn-config-file": "aurora.config", "schema-name": "chbenchmark", + "txn-zipfian-alpha": ZIPFIAN_ALPHA, "instance": instance, }, ) diff --git a/tools/calibration/transactions/chbenchmark/run_instance.sh b/tools/calibration/transactions/chbenchmark/run_instance.sh index dfe0c6b5..3890358c 100755 --- a/tools/calibration/transactions/chbenchmark/run_instance.sh +++ b/tools/calibration/transactions/chbenchmark/run_instance.sh @@ -38,6 +38,10 @@ function extract_named_arguments() { if [[ $phys_arg =~ --instance=.+ ]]; then instance=${phys_arg:11} fi + + if [[ $phys_arg =~ --txn-zipfian-alpha=.+ ]]; then + txn_zipfian_alpha=${phys_arg:20} + fi done } @@ -74,7 +78,8 @@ RECORD_DETAILED_STATS=1 python3 -m pytpcc.tpcc aurora \ --duration $run_for_s \ --clients $t_clients \ --scalefactor 1 \ - --lat-sample-prob 0.25 + --lat-sample-prob 0.25 \ + --txn-zipfian-alpha $txn_zipfian_alpha popd >&2 echo "Waiting 10 seconds before retrieving metrics..." diff --git a/workloads/chbenchmark/py-tpcc/pytpcc/drivers/auroradriver.py b/workloads/chbenchmark/py-tpcc/pytpcc/drivers/auroradriver.py index 79e65ebc..26d10812 100644 --- a/workloads/chbenchmark/py-tpcc/pytpcc/drivers/auroradriver.py +++ b/workloads/chbenchmark/py-tpcc/pytpcc/drivers/auroradriver.py @@ -36,6 +36,8 @@ "getStockInfo": "SELECT s_quantity, s_data, s_ytd, s_order_cnt, s_remote_cnt, s_dist_{:02d} FROM stock WHERE s_i_id = {} AND s_w_id = {}", # d_id, ol_i_id, ol_supply_w_id "updateStock": "UPDATE stock SET s_quantity = {}, s_ytd = {}, s_order_cnt = {}, s_remote_cnt = {} WHERE s_i_id = {} AND s_w_id = {}", # s_quantity, s_order_cnt, s_remote_cnt, ol_i_id, ol_supply_w_id "createOrderLine": "INSERT INTO order_line (ol_o_id, ol_d_id, ol_w_id, ol_number, ol_i_id, ol_supply_w_id, ol_delivery_d, ol_quantity, ol_amount, ol_dist_info) VALUES ({}, {}, {}, {}, {}, {}, '{}', {}, {}, '{}')", # o_id, d_id, w_id, ol_number, ol_i_id, ol_supply_w_id, ol_quantity, ol_amount, ol_dist_info + "createOrderLineMultivalue": "INSERT INTO order_line (ol_o_id, ol_d_id, ol_w_id, ol_number, ol_i_id, ol_supply_w_id, ol_delivery_d, ol_quantity, ol_amount, ol_dist_info) VALUES ", + "createOrderLineValues": "({}, {}, {}, {}, {}, {}, '{}', {}, {}, '{}')", }, "ORDER_STATUS": { "getCustomerByCustomerId": "SELECT c_id, c_first, c_middle, c_last, c_balance FROM customer WHERE c_w_id = {} AND c_d_id = {} AND c_id = {}", # w_id, d_id, c_id @@ -81,7 +83,7 @@ class AuroraDriver(AbstractDriver): } def __init__(self, ddl: str) -> None: - super().__init__("brad", ddl) + super().__init__("aurora", ddl) self._connection: Optional[PsycopgConnection] = None self._cursor: Optional[PsycopgCursor] = None self._config: Dict[str, Any] = {} @@ -127,19 +129,19 @@ def doDelivery(self, params: Dict[str, Any]) -> List[Tuple[Any, ...]]: self._cursor.execute_sync("BEGIN") for d_id in range(1, constants.DISTRICTS_PER_WAREHOUSE + 1): self._cursor.execute_sync(q["getNewOrder"].format(d_id, w_id)) - r = self._cursor.fetchall_sync() - if len(r) == 0: + r = self._cursor.fetchone_sync() + if r is None: ## No orders for this district: skip it. Note: This must be reported if > 1% continue - no_o_id = r[0][0] + no_o_id = r[0] self._cursor.execute_sync(q["getCId"].format(no_o_id, d_id, w_id)) - r = self._cursor.fetchall_sync() - c_id = r[0][0] + r = self._cursor.fetchone_sync() + c_id = r[0] self._cursor.execute_sync(q["sumOLAmount"].format(no_o_id, d_id, w_id)) - r = self._cursor.fetchall_sync() - ol_total = decimal.Decimal(r[0][0]) + r = self._cursor.fetchone_sync() + ol_total = decimal.Decimal(r[0]) self._cursor.execute_sync( q["deleteNewOrder"].format(d_id, w_id, no_o_id) @@ -203,8 +205,8 @@ def doNewOrder(self, params: Dict[str, Any]) -> List[Tuple[Any, ...]]: ## Determine if this is an all local order or not all_local = all_local and i_w_ids[i] == w_id self._cursor.execute_sync(q["getItemInfo"].format(i_ids[i])) - r = self._cursor.fetchall_sync() - items.append(r[0]) + r = self._cursor.fetchone_sync() + items.append(r) assert len(items) == len(i_ids) ## TPCC defines 1% of neworder gives a wrong itemid, causing rollback. @@ -219,18 +221,18 @@ def doNewOrder(self, params: Dict[str, Any]) -> List[Tuple[Any, ...]]: ## Collect Information from WAREHOUSE, DISTRICT, and CUSTOMER ## ---------------- self._cursor.execute_sync(q["getWarehouseTaxRate"].format(w_id)) - r = self._cursor.fetchall_sync() - w_tax = r[0][0] + r = self._cursor.fetchone_sync() + w_tax = r[0] self._cursor.execute_sync(q["getDistrict"].format(d_id, w_id)) - r = self._cursor.fetchall_sync() - district_info = r[0] + r = self._cursor.fetchone_sync() + district_info = r d_tax = district_info[0] d_next_o_id = district_info[1] self._cursor.execute_sync(q["getCustomer"].format(w_id, d_id, c_id)) - r = self._cursor.fetchall_sync() - customer_info = r[0] + r = self._cursor.fetchone_sync() + customer_info = r c_discount = customer_info[0] ## ---------------- @@ -261,6 +263,7 @@ def doNewOrder(self, params: Dict[str, Any]) -> List[Tuple[Any, ...]]: ## Insert Order Item Information ## ---------------- item_data = [] + insert_value_strings = [] total = 0 for i in range(len(i_ids)): ol_number = i + 1 @@ -276,15 +279,15 @@ def doNewOrder(self, params: Dict[str, Any]) -> List[Tuple[Any, ...]]: self._cursor.execute_sync( q["getStockInfo"].format(d_id, ol_i_id, ol_supply_w_id) ) - r = self._cursor.fetchall_sync() - if len(r) == 0: + r = self._cursor.fetchone_sync() + if r is None: logger.warning( "No STOCK record for (ol_i_id=%d, ol_supply_w_id=%d)", ol_i_id, ol_supply_w_id, ) continue - stockInfo = r[0] + stockInfo = r s_quantity = stockInfo[0] s_ytd = decimal.Decimal(stockInfo[2]) s_order_cnt = int(stockInfo[3]) @@ -326,7 +329,7 @@ def doNewOrder(self, params: Dict[str, Any]) -> List[Tuple[Any, ...]]: ol_amount = ol_quantity * i_price total += ol_amount - createOrderLine = q["createOrderLine"].format( + createOrderLineValues = q["createOrderLineValues"].format( d_next_o_id, d_id, w_id, @@ -338,7 +341,7 @@ def doNewOrder(self, params: Dict[str, Any]) -> List[Tuple[Any, ...]]: ol_amount, s_dist_xx, ) - self._cursor.execute_sync(createOrderLine) + insert_value_strings.append(createOrderLineValues) ## Add the info to be returned item_data.append( @@ -346,6 +349,12 @@ def doNewOrder(self, params: Dict[str, Any]) -> List[Tuple[Any, ...]]: ) ## FOR + # Do one multivalue insert. + insertOrderLines = q["createOrderLineMultivalue"] + ", ".join( + insert_value_strings + ) + self._cursor.execute_sync(insertOrderLines) + ## Commit! self._cursor.execute_sync("COMMIT") @@ -385,8 +394,8 @@ def doOrderStatus(self, params: Dict[str, Any]) -> List[Tuple[Any, ...]]: self._cursor.execute_sync( q["getCustomerByCustomerId"].format(w_id, d_id, c_id) ) - r = self._cursor.fetchall_sync() - customer = r[0] + r = self._cursor.fetchone_sync() + customer = r else: # Get the midpoint customer's id self._cursor.execute_sync( @@ -404,13 +413,13 @@ def doOrderStatus(self, params: Dict[str, Any]) -> List[Tuple[Any, ...]]: getLastOrder = q["getLastOrder"].format(w_id, d_id, c_id) self._cursor.execute_sync(getLastOrder) - r = self._cursor.fetchall_sync() - order = r[0] + r = self._cursor.fetchone_sync() + order = r if order: self._cursor.execute_sync( q["getOrderLines"].format(w_id, d_id, order[0]) ) - r = self._cursor.fetchall_sync() + r = self._cursor.fetchone_sync() orderLines = r else: orderLines = [] @@ -443,8 +452,8 @@ def doPayment(self, params: Dict[str, Any]) -> List[Tuple[Any, ...]]: self._cursor.execute_sync( q["getCustomerByCustomerId"].format(w_id, d_id, c_id) ) - r = self._cursor.fetchall_sync() - customer = r[0] + r = self._cursor.fetchone_sync() + customer = r else: # Get the midpoint customer's id self._cursor.execute_sync( @@ -464,12 +473,12 @@ def doPayment(self, params: Dict[str, Any]) -> List[Tuple[Any, ...]]: c_data = customer[17] self._cursor.execute_sync(q["getWarehouse"].format(w_id)) - r = self._cursor.fetchall_sync() - warehouse = r[0] + r = self._cursor.fetchone_sync() + warehouse = r self._cursor.execute_sync(q["getDistrict"].format(w_id, d_id)) - r = self._cursor.fetchall_sync() - district = r[0] + r = self._cursor.fetchone_sync() + district = r self._cursor.execute_sync( q["updateWarehouseBalance"].format(h_amount, w_id) @@ -548,8 +557,8 @@ def doStockLevel(self, params: Dict[str, Any]) -> int: self._cursor.execute_sync("BEGIN") self._cursor.execute_sync(q["getOId"].format(w_id, d_id)) - r = self._cursor.fetchall_sync() - result = r[0] + r = self._cursor.fetchone_sync() + result = r assert result o_id = result[0] @@ -558,8 +567,8 @@ def doStockLevel(self, params: Dict[str, Any]) -> int: w_id, d_id, o_id, (o_id - 20), w_id, threshold ) ) - r = self._cursor.fetchall_sync() - result = r[0] + r = self._cursor.fetchone_sync() + result = r self._cursor.execute_sync("COMMIT") return int(result[0]) diff --git a/workloads/chbenchmark/py-tpcc/pytpcc/drivers/auroratimingdriver.py b/workloads/chbenchmark/py-tpcc/pytpcc/drivers/auroratimingdriver.py index d1d88cf4..8443fd53 100644 --- a/workloads/chbenchmark/py-tpcc/pytpcc/drivers/auroratimingdriver.py +++ b/workloads/chbenchmark/py-tpcc/pytpcc/drivers/auroratimingdriver.py @@ -38,6 +38,8 @@ "getStockInfo": "SELECT s_quantity, s_data, s_ytd, s_order_cnt, s_remote_cnt, s_dist_{:02d} FROM stock WHERE s_i_id = {} AND s_w_id = {}", # d_id, ol_i_id, ol_supply_w_id "updateStock": "UPDATE stock SET s_quantity = {}, s_ytd = {}, s_order_cnt = {}, s_remote_cnt = {} WHERE s_i_id = {} AND s_w_id = {}", # s_quantity, s_order_cnt, s_remote_cnt, ol_i_id, ol_supply_w_id "createOrderLine": "INSERT INTO order_line (ol_o_id, ol_d_id, ol_w_id, ol_number, ol_i_id, ol_supply_w_id, ol_delivery_d, ol_quantity, ol_amount, ol_dist_info) VALUES ({}, {}, {}, {}, {}, {}, '{}', {}, {}, '{}')", # o_id, d_id, w_id, ol_number, ol_i_id, ol_supply_w_id, ol_quantity, ol_amount, ol_dist_info + "createOrderLineMultivalue": "INSERT INTO order_line (ol_o_id, ol_d_id, ol_w_id, ol_number, ol_i_id, ol_supply_w_id, ol_delivery_d, ol_quantity, ol_amount, ol_dist_info) VALUES ", + "createOrderLineValues": "({}, {}, {}, {}, {}, {}, '{}', {}, {}, '{}')", }, "ORDER_STATUS": { "getCustomerByCustomerId": "SELECT c_id, c_first, c_middle, c_last, c_balance FROM customer WHERE c_w_id = {} AND c_d_id = {} AND c_id = {}", # w_id, d_id, c_id @@ -83,7 +85,7 @@ class AuroraTimingDriver(AbstractDriver): } def __init__(self, ddl: str) -> None: - super().__init__("brad", ddl) + super().__init__("aurora timing", ddl) self._connection: Optional[PsycopgConnection] = None self._cursor: Optional[PsycopgCursor] = None self._config: Dict[str, Any] = {} @@ -127,7 +129,7 @@ def executeStart(self): measure_file_path = cond.in_output_dir("aurora_timing.csv") self._measure_file = open(measure_file_path, "w", encoding="UTF-8") print( - "init,begin,getitems,getwdc,getorder,insertorder,commit,collect,total", + "init,begin,getitems,getwdc,getorder,insertorder,commit,collect,multi_insert_time,total", file=self._measure_file, ) @@ -224,7 +226,7 @@ def doDelivery(self, params: Dict[str, Any]) -> List[Tuple[Any, ...]]: print(traceback.format_exc()) raise - def doNewOrder(self, params: Dict[str, Any]) -> List[Tuple[Any, ...]]: + def doNewOrderOriginal(self, params: Dict[str, Any]) -> List[Tuple[Any, ...]]: try: assert self._cursor is not None @@ -494,6 +496,285 @@ def doNewOrder(self, params: Dict[str, Any]) -> List[Tuple[Any, ...]]: print(traceback.format_exc()) raise + def doNewOrder(self, params: Dict[str, Any]) -> List[Tuple[Any, ...]]: + try: + assert self._cursor is not None + + no_start = time.time() + q = TXN_QUERIES["NEW_ORDER"] + w_id = params["w_id"] + d_id = params["d_id"] + c_id = params["c_id"] + o_entry_d = params["o_entry_d"] + i_ids = params["i_ids"] + i_w_ids = params["i_w_ids"] + i_qtys = params["i_qtys"] + + assert len(i_ids) > 0 + assert len(i_ids) == len(i_w_ids) + assert len(i_ids) == len(i_qtys) + + no_pbegin = time.time() + self._cursor.execute_sync("BEGIN") + no_abegin = time.time() + all_local = True + items = [] + for i in range(len(i_ids)): + ## Determine if this is an all local order or not + all_local = all_local and i_w_ids[i] == w_id + self._cursor.execute_sync(q["getItemInfo"].format(i_ids[i])) + r = self._cursor.fetchone_sync() + items.append(r) + assert len(items) == len(i_ids) + no_getitems = time.time() + + ## TPCC defines 1% of neworder gives a wrong itemid, causing rollback. + ## Note that this will happen with 1% of transactions on purpose. + for item in items: + if item is None or len(item) == 0: + self._cursor.execute_sync("ROLLBACK") + return + ## FOR + + ## ---------------- + ## Collect Information from WAREHOUSE, DISTRICT, and CUSTOMER + ## ---------------- + wdc_start = time.time() + get_warehouse = q["getWarehouseTaxRate"].format(w_id) + self._cursor.execute_sync(get_warehouse) + r = self._cursor.fetchone_sync() + w_tax = r[0] + wdc_warehouse_tax_rate = time.time() + + get_district = q["getDistrict"].format(d_id, w_id) + self._cursor.execute_sync(get_district) + r = self._cursor.fetchone_sync() + district_info = r + d_tax = district_info[0] + d_next_o_id = district_info[1] + wdc_district = time.time() + + get_customer = q["getCustomer"].format(w_id, d_id, c_id) + self._cursor.execute_sync(get_customer) + r = self._cursor.fetchone_sync() + customer_info = r + c_discount = customer_info[0] + no_get_wdc_info = time.time() + + if self._query_log_file is not None: + print(get_warehouse, file=self._query_log_file) + print(get_district, file=self._query_log_file) + print(get_customer, file=self._query_log_file) + + ## ---------------- + ## Insert Order Information + ## ---------------- + ol_cnt = len(i_ids) + o_carrier_id = constants.NULL_CARRIER_ID + + self._cursor.execute_sync( + q["incrementNextOrderId"].format(d_next_o_id + 1, d_id, w_id) + ) + createOrder = q["createOrder"].format( + d_next_o_id, + d_id, + w_id, + c_id, + o_entry_d.strftime("%Y-%m-%d %H:%M:%S"), + o_carrier_id, + ol_cnt, + 1 if all_local else 0, + ) + self._cursor.execute_sync(createOrder) + self._cursor.execute_sync( + q["createNewOrder"].format(d_next_o_id, d_id, w_id) + ) + no_ins_order_info = time.time() + + ## ---------------- + ## Insert Order Item Information + ## ---------------- + item_data = [] + total = 0 + insert_metadata = [] + insert_value_strs = [] + for i in range(len(i_ids)): + io_start = time.time() + ol_number = i + 1 + ol_supply_w_id = i_w_ids[i] + ol_i_id = i_ids[i] + ol_quantity = i_qtys[i] + + itemInfo = items[i] + i_name = itemInfo[1] + i_data = itemInfo[2] + i_price = decimal.Decimal(itemInfo[0]) + io_init = time.time() + + get_stock_info = q["getStockInfo"].format(d_id, ol_i_id, ol_supply_w_id) + self._cursor.execute_sync(get_stock_info) + r = self._cursor.fetchone_sync() + io_fetch_stock = time.time() + if r is None: + logger.warning( + "No STOCK record for (ol_i_id=%d, ol_supply_w_id=%d)", + ol_i_id, + ol_supply_w_id, + ) + continue + stockInfo = r + s_quantity = stockInfo[0] + s_ytd = decimal.Decimal(stockInfo[2]) + s_order_cnt = int(stockInfo[3]) + s_remote_cnt = int(stockInfo[4]) + s_data = stockInfo[1] + s_dist_xx = stockInfo[5] # Fetches data from the s_dist_[d_id] column + + ## Update stock + s_ytd += ol_quantity + if s_quantity >= ol_quantity + 10: + s_quantity = s_quantity - ol_quantity + else: + s_quantity = s_quantity + 91 - ol_quantity + s_order_cnt += 1 + + if ol_supply_w_id != w_id: + s_remote_cnt += 1 + io_stock_prep = time.time() + + update_stock = q["updateStock"].format( + s_quantity, + s_ytd.quantize(decimal.Decimal("1.00")), + s_order_cnt, + s_remote_cnt, + ol_i_id, + ol_supply_w_id, + ) + self._cursor.execute_sync(update_stock) + io_update_stock = time.time() + + if ( + i_data.find(constants.ORIGINAL_STRING) != -1 + and s_data.find(constants.ORIGINAL_STRING) != -1 + ): + brand_generic = "B" + else: + brand_generic = "G" + + ## Transaction profile states to use "ol_quantity * i_price" + ol_amount = ol_quantity * i_price + total += ol_amount + io_ol_prep = time.time() + + createOrderLineValues = q["createOrderLineValues"].format( + d_next_o_id, + d_id, + w_id, + ol_number, + ol_i_id, + ol_supply_w_id, + o_entry_d.strftime("%Y-%m-%d %H:%M:%S"), + ol_quantity, + ol_amount, + s_dist_xx, + ) + insert_value_strs.append(createOrderLineValues) + io_ol_insert = time.time() + + ## Add the info to be returned + item_data.append( + (i_name, s_quantity, brand_generic, i_price, ol_amount) + ) + io_ol_append = time.time() + + insert_metadata.append( + ( + io_init - io_start, + io_fetch_stock - io_init, + io_stock_prep - io_fetch_stock, + io_update_stock - io_stock_prep, + io_ol_prep - io_update_stock, + io_ol_insert - io_ol_prep, + io_ol_append - io_ol_insert, + io_ol_append - io_start, + ) + ) + + if self._query_log_file is not None: + print(get_stock_info, file=self._query_log_file) + print(update_stock, file=self._query_log_file) + + no_mv_insert_pre = time.time() + ## FOR + insert_order_line_query = q["createOrderLineMultivalue"] + ", ".join( + insert_value_strs + ) + self._cursor.execute_sync(insert_order_line_query) + no_mv_insert_after = time.time() + if self._query_log_file is not None: + print(insert_order_line_query, file=self._query_log_file) + no_insert_order_line = time.time() + + ## Commit! + self._cursor.execute_sync("COMMIT") + no_commit = time.time() + + ## Adjust the total for the discount + # print "c_discount:", c_discount, type(c_discount) + # print "w_tax:", w_tax, type(w_tax) + # print "d_tax:", d_tax, type(d_tax) + total = int( + total + * (1 - decimal.Decimal(c_discount)) + * (1 + decimal.Decimal(w_tax) + decimal.Decimal(d_tax)) + ) + + ## Pack up values the client is missing (see TPC-C 2.4.3.5) + misc = [(w_tax, d_tax, d_next_o_id, total)] + no_collect = time.time() + + if self._measure_file is not None: + init_time = no_pbegin - no_start + begin_time = no_abegin - no_pbegin + getitems_time = no_getitems - no_abegin + getwdc_time = no_get_wdc_info - no_getitems + getorder_time = no_ins_order_info - no_get_wdc_info + insertorder_time = no_insert_order_line - no_ins_order_info + commit_time = no_commit - no_insert_order_line + collect_time = no_collect - no_commit + total_time = no_collect - no_start + multi_insert_time = no_mv_insert_after - no_mv_insert_pre + print( + f"{init_time},{begin_time},{getitems_time},{getwdc_time},{getorder_time},{insertorder_time},{commit_time},{collect_time},{multi_insert_time},{total_time}", + file=self._measure_file, + ) + + if self._wdc_stats_file is not None: + tax_rate_time = wdc_warehouse_tax_rate - wdc_start + district_time = wdc_district - wdc_warehouse_tax_rate + customer_time = no_get_wdc_info - wdc_district + total_time = no_get_wdc_info - wdc_start + print( + f"{tax_rate_time},{district_time},{customer_time},{total_time}", + file=self._wdc_stats_file, + ) + + if self._ol_stats_file is not None: + for im in insert_metadata: + print( + "{},{},{},{},{},{},{},{},{}".format(self._ins_ol_counter, *im), + file=self._ol_stats_file, + ) + self._ins_ol_counter += 1 + + return [customer_info, misc, item_data] + + except Exception as ex: + if self._nonsilent_errs: + print("Error in NEWORDER", str(ex)) + print(traceback.format_exc()) + raise + def doOrderStatus(self, params: Dict[str, Any]) -> List[Tuple[Any, ...]]: try: assert self._cursor is not None diff --git a/workloads/chbenchmark/py-tpcc/pytpcc/drivers/braddriver.py b/workloads/chbenchmark/py-tpcc/pytpcc/drivers/braddriver.py index 9458a0c1..fa6e678f 100644 --- a/workloads/chbenchmark/py-tpcc/pytpcc/drivers/braddriver.py +++ b/workloads/chbenchmark/py-tpcc/pytpcc/drivers/braddriver.py @@ -35,6 +35,8 @@ "getStockInfo": "SELECT s_quantity, s_data, s_ytd, s_order_cnt, s_remote_cnt, s_dist_{:02d} FROM stock WHERE s_i_id = {} AND s_w_id = {}", # d_id, ol_i_id, ol_supply_w_id "updateStock": "UPDATE stock SET s_quantity = {}, s_ytd = {}, s_order_cnt = {}, s_remote_cnt = {} WHERE s_i_id = {} AND s_w_id = {}", # s_quantity, s_order_cnt, s_remote_cnt, ol_i_id, ol_supply_w_id "createOrderLine": "INSERT INTO order_line (ol_o_id, ol_d_id, ol_w_id, ol_number, ol_i_id, ol_supply_w_id, ol_delivery_d, ol_quantity, ol_amount, ol_dist_info) VALUES ({}, {}, {}, {}, {}, {}, '{}', {}, {}, '{}')", # o_id, d_id, w_id, ol_number, ol_i_id, ol_supply_w_id, ol_quantity, ol_amount, ol_dist_info + "createOrderLineMultivalue": "INSERT INTO order_line (ol_o_id, ol_d_id, ol_w_id, ol_number, ol_i_id, ol_supply_w_id, ol_delivery_d, ol_quantity, ol_amount, ol_dist_info) VALUES ", + "createOrderLineValues": "({}, {}, {}, {}, {}, {}, '{}', {}, {}, '{}')", }, "ORDER_STATUS": { "getCustomerByCustomerId": "SELECT c_id, c_first, c_middle, c_last, c_balance FROM customer WHERE c_w_id = {} AND c_d_id = {} AND c_id = {}", # w_id, d_id, c_id @@ -119,7 +121,7 @@ def doDelivery(self, params: Dict[str, Any]) -> List[Tuple[Any, ...]]: ol_delivery_d = params["ol_delivery_d"] result: List[Tuple[Any, ...]] = [] - self._client.run_query_json("BEGIN") + self._client.run_query_ignore_results("BEGIN") for d_id in range(1, constants.DISTRICTS_PER_WAREHOUSE + 1): r, _ = self._client.run_query_json(q["getNewOrder"].format(d_id, w_id)) if len(r) == 0: @@ -137,17 +139,17 @@ def doDelivery(self, params: Dict[str, Any]) -> List[Tuple[Any, ...]]: ) ol_total = decimal.Decimal(r[0][0]) - self._client.run_query_json( + self._client.run_query_ignore_results( q["deleteNewOrder"].format(d_id, w_id, no_o_id) ) updateOrders = q["updateOrders"].format( o_carrier_id, no_o_id, d_id, w_id ) - self._client.run_query_json(updateOrders) + self._client.run_query_ignore_results(updateOrders) updateOrderLine = q["updateOrderLine"].format( ol_delivery_d.strftime("%Y-%m-%d %H:%M:%S"), no_o_id, d_id, w_id ) - self._client.run_query_json(updateOrderLine) + self._client.run_query_ignore_results(updateOrderLine) # These must be logged in the "result file" according to TPC-C 2.7.2.2 (page 39) # We remove the queued time, completed time, w_id, and o_carrier_id: the client can figure @@ -158,7 +160,7 @@ def doDelivery(self, params: Dict[str, Any]) -> List[Tuple[Any, ...]]: ), "ol_total is NULL: there are no order lines. This should not happen" assert ol_total > 0.0 - self._client.run_query_json( + self._client.run_query_ignore_results( q["updateCustomer"].format( ol_total.quantize(decimal.Decimal("1.00")), c_id, d_id, w_id ) @@ -166,7 +168,7 @@ def doDelivery(self, params: Dict[str, Any]) -> List[Tuple[Any, ...]]: result.append((d_id, no_o_id)) - self._client.run_query_json("COMMIT") + self._client.run_query_ignore_results("COMMIT") return result except Exception as ex: @@ -192,7 +194,7 @@ def doNewOrder(self, params: Dict[str, Any]) -> List[Tuple[Any, ...]]: assert len(i_ids) == len(i_w_ids) assert len(i_ids) == len(i_qtys) - self._client.run_query_json("BEGIN") + self._client.run_query_ignore_results("BEGIN") all_local = True items = [] for i in range(len(i_ids)): @@ -206,7 +208,7 @@ def doNewOrder(self, params: Dict[str, Any]) -> List[Tuple[Any, ...]]: ## Note that this will happen with 1% of transactions on purpose. for item in items: if len(item) == 0: - self._client.run_query_json("ROLLBACK") + self._client.run_query_ignore_results("ROLLBACK") return ## FOR @@ -233,7 +235,7 @@ def doNewOrder(self, params: Dict[str, Any]) -> List[Tuple[Any, ...]]: ol_cnt = len(i_ids) o_carrier_id = constants.NULL_CARRIER_ID - self._client.run_query_json( + self._client.run_query_ignore_results( q["incrementNextOrderId"].format(d_next_o_id + 1, d_id, w_id) ) createOrder = q["createOrder"].format( @@ -246,8 +248,8 @@ def doNewOrder(self, params: Dict[str, Any]) -> List[Tuple[Any, ...]]: ol_cnt, 1 if all_local else 0, ) - self._client.run_query_json(createOrder) - self._client.run_query_json( + self._client.run_query_ignore_results(createOrder) + self._client.run_query_ignore_results( q["createNewOrder"].format(d_next_o_id, d_id, w_id) ) @@ -256,6 +258,7 @@ def doNewOrder(self, params: Dict[str, Any]) -> List[Tuple[Any, ...]]: ## ---------------- item_data = [] total = 0 + insert_value_strings = [] for i in range(len(i_ids)): ol_number = i + 1 ol_supply_w_id = i_w_ids[i] @@ -296,7 +299,7 @@ def doNewOrder(self, params: Dict[str, Any]) -> List[Tuple[Any, ...]]: if ol_supply_w_id != w_id: s_remote_cnt += 1 - self._client.run_query_json( + self._client.run_query_ignore_results( q["updateStock"].format( s_quantity, s_ytd.quantize(decimal.Decimal("1.00")), @@ -319,7 +322,7 @@ def doNewOrder(self, params: Dict[str, Any]) -> List[Tuple[Any, ...]]: ol_amount = ol_quantity * i_price total += ol_amount - createOrderLine = q["createOrderLine"].format( + createOrderLineValues = q["createOrderLineValues"].format( d_next_o_id, d_id, w_id, @@ -331,7 +334,7 @@ def doNewOrder(self, params: Dict[str, Any]) -> List[Tuple[Any, ...]]: ol_amount, s_dist_xx, ) - self._client.run_query_json(createOrderLine) + insert_value_strings.append(createOrderLineValues) ## Add the info to be returned item_data.append( @@ -339,8 +342,14 @@ def doNewOrder(self, params: Dict[str, Any]) -> List[Tuple[Any, ...]]: ) ## FOR + # Do one multivalue insert. + insertOrderLines = q["createOrderLineMultivalue"] + ", ".join( + insert_value_strings + ) + self._client.run_query_ignore_results(insertOrderLines) + ## Commit! - self._client.run_query_json("COMMIT") + self._client.run_query_ignore_results("COMMIT") ## Adjust the total for the discount # print "c_discount:", c_discount, type(c_discount) @@ -373,7 +382,7 @@ def doOrderStatus(self, params: Dict[str, Any]) -> List[Tuple[Any, ...]]: c_id = params["c_id"] c_last = params["c_last"] - self._client.run_query_json("BEGIN") + self._client.run_query_ignore_results("BEGIN") if c_id != None: r, _ = self._client.run_query_json( q["getCustomerByCustomerId"].format(w_id, d_id, c_id) @@ -404,7 +413,7 @@ def doOrderStatus(self, params: Dict[str, Any]) -> List[Tuple[Any, ...]]: else: orderLines = [] - self._client.run_query_json("COMMIT") + self._client.run_query_ignore_results("COMMIT") return [customer, order, orderLines] except Exception as ex: @@ -427,7 +436,7 @@ def doPayment(self, params: Dict[str, Any]) -> List[Tuple[Any, ...]]: c_last = params["c_last"] h_date = params["h_date"] # Python datetime - self._client.run_query_json("BEGIN") + self._client.run_query_ignore_results("BEGIN") if c_id != None: r, _ = self._client.run_query_json( q["getCustomerByCustomerId"].format(w_id, d_id, c_id) @@ -456,10 +465,10 @@ def doPayment(self, params: Dict[str, Any]) -> List[Tuple[Any, ...]]: r, _ = self._client.run_query_json(q["getDistrict"].format(w_id, d_id)) district = r[0] - self._client.run_query_json( + self._client.run_query_ignore_results( q["updateWarehouseBalance"].format(h_amount, w_id) ) - self._client.run_query_json( + self._client.run_query_ignore_results( q["updateDistrictBalance"].format(h_amount, w_id, d_id) ) @@ -480,10 +489,10 @@ def doPayment(self, params: Dict[str, Any]) -> List[Tuple[Any, ...]]: c_d_id, c_id, ) - self._client.run_query_json(updateCustomer) + self._client.run_query_ignore_results(updateCustomer) else: c_data = "" - self._client.run_query_json( + self._client.run_query_ignore_results( q["updateGCCustomer"].format( c_balance, c_ytd_payment, c_payment_cnt, c_w_id, c_d_id, c_id ), @@ -502,9 +511,9 @@ def doPayment(self, params: Dict[str, Any]) -> List[Tuple[Any, ...]]: h_amount.quantize(decimal.Decimal("1.00")), h_data, ) - self._client.run_query_json(insertHistory) + self._client.run_query_ignore_results(insertHistory) - self._client.run_query_json("COMMIT") + self._client.run_query_ignore_results("COMMIT") # TPC-C 2.5.3.3: Must display the following fields: # W_ID, D_ID, C_ID, C_D_ID, C_W_ID, W_STREET_1, W_STREET_2, W_CITY, W_STATE, W_ZIP, @@ -531,7 +540,7 @@ def doStockLevel(self, params: Dict[str, Any]) -> int: d_id = params["d_id"] threshold = params["threshold"] - self._client.run_query_json("BEGIN") + self._client.run_query_ignore_results("BEGIN") r, _ = self._client.run_query_json(q["getOId"].format(w_id, d_id)) result = r[0] assert result @@ -544,7 +553,7 @@ def doStockLevel(self, params: Dict[str, Any]) -> int: ) result = r[0] - self._client.run_query_json("COMMIT") + self._client.run_query_ignore_results("COMMIT") return int(result[0]) except Exception as ex: diff --git a/workloads/chbenchmark/py-tpcc/pytpcc/runtime/executor.py b/workloads/chbenchmark/py-tpcc/pytpcc/runtime/executor.py index c25bce1c..f10f111f 100644 --- a/workloads/chbenchmark/py-tpcc/pytpcc/runtime/executor.py +++ b/workloads/chbenchmark/py-tpcc/pytpcc/runtime/executor.py @@ -37,6 +37,7 @@ import logging import os import pathlib +import numpy as np from datetime import datetime from pprint import pprint, pformat from brad.utils.rand_exponential_backoff import RandomizedExponentialBackoff @@ -62,6 +63,9 @@ def __init__(self, driver, scaleParameters, stop_on_error=False, pct_remote=0.1) self.total_workers = 1 self.worker_index = 0 + self.skew_alpha = None + self.skew_prng = None + ## DEF def execute( @@ -70,6 +74,7 @@ def execute( worker_index: int, total_workers: int, lat_sample_prob: float, + zipfian_alpha: Optional[float], ) -> results.Results: if RECORD_DETAILED_STATS_VAR in os.environ: import conductor.lib as cond @@ -115,6 +120,17 @@ def execute( *self.local_warehouse_range ) + if zipfian_alpha is not None: + self.skew_alpha = zipfian_alpha + self.skew_prng = np.random.default_rng(seed=42 ^ worker_index) + logging.info( + "Worker index %d - Selecting warehouse and items using a Zipfian distribution; a = %.2f", + worker_index, + self.skew_alpha, + ) + else: + logging.info("Worker index %d - Not using a Zipfian distribution") + r = results.Results(options) assert r logging.info("Executing benchmark for %d seconds" % duration) @@ -370,7 +386,19 @@ def makeWarehouseId(self): ): break else: - w_id = rand.number(*self.local_warehouse_range) + if self.skew_prng is not None: + # Skewed warehouse choice + min_warehouse, max_warehouse = self.local_warehouse_range + warehouse_span = max_warehouse - min_warehouse + 1 + while True: + # Chosen in range [1, inf) + candidate = self.skew_prng.zipf(a=self.skew_alpha) + if candidate <= warehouse_span: + break + return min_warehouse + (candidate - 1) + else: + # Uniformly randomly chosen warehouse + w_id = rand.number(*self.local_warehouse_range) assert w_id >= self.scaleParameters.starting_warehouse, ( "Invalid W_ID: %d" % w_id @@ -391,7 +419,14 @@ def makeCustomerId(self): ## DEF def makeItemId(self): - return rand.NURand(8191, 1, self.scaleParameters.items) + if self.skew_alpha is None: + return rand.NURand(8191, 1, self.scaleParameters.items) + else: + # Select item ID using a zipfian distribution. + while True: + candidate = self.skew_prng.zipf(a=self.skew_alpha) + if candidate <= self.scaleParameters.items: + return candidate ## DEF diff --git a/workloads/chbenchmark/py-tpcc/pytpcc/tpcc.py b/workloads/chbenchmark/py-tpcc/pytpcc/tpcc.py index 027d4fb3..57ff0910 100755 --- a/workloads/chbenchmark/py-tpcc/pytpcc/tpcc.py +++ b/workloads/chbenchmark/py-tpcc/pytpcc/tpcc.py @@ -218,7 +218,11 @@ def executorFunc( ) driver.executeStart() results = e.execute( - args["duration"], worker_index, total_workers, args["lat_sample_prob"] + args["duration"], + worker_index, + total_workers, + args["lat_sample_prob"], + args["zipfian_alpha"], ) driver.executeFinish() @@ -304,6 +308,11 @@ def executorFunc( default=0.1, help="The fraction of the transaction latencies to record.", ) + aparser.add_argument( + "--zipfian-alpha", + type=float, + help="The alpha parameter to use in a Zipfian distribution when selecting warehouse and item IDs.", + ) args = vars(aparser.parse_args()) if args["debug"]: @@ -386,6 +395,7 @@ def executorFunc( worker_index=0, total_workers=1, lat_sample_prob=args["lat_sample_prob"], + zipfian_alpha=args["zipfian_alpha"], ) driver.executeFinish() else: From 30119b8f29e55338dc7d5544c29398fda4ed83fc Mon Sep 17 00:00:00 2001 From: Geoffrey Yu Date: Wed, 1 May 2024 23:12:25 +0000 Subject: [PATCH 07/30] Fix command line argument --- tools/calibration/transactions/chbenchmark/run_instance.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/calibration/transactions/chbenchmark/run_instance.sh b/tools/calibration/transactions/chbenchmark/run_instance.sh index 3890358c..74a0cb8b 100755 --- a/tools/calibration/transactions/chbenchmark/run_instance.sh +++ b/tools/calibration/transactions/chbenchmark/run_instance.sh @@ -79,7 +79,7 @@ RECORD_DETAILED_STATS=1 python3 -m pytpcc.tpcc aurora \ --clients $t_clients \ --scalefactor 1 \ --lat-sample-prob 0.25 \ - --txn-zipfian-alpha $txn_zipfian_alpha + --zipfian-alpha $txn_zipfian_alpha popd >&2 echo "Waiting 10 seconds before retrieving metrics..." From 025dff6cef87d07c4024582a5326575616abf2a0 Mon Sep 17 00:00:00 2001 From: Geoffrey Yu Date: Thu, 2 May 2024 19:17:20 -0400 Subject: [PATCH 08/30] Various changes to support the table movement experiments (#505) This is to make running them slightly less painful: - We avoid deleting tables from Athena - Add experiment configs for table movement (specialized scenario first) - Add tool to make physical alterations to the blueprint and placement Part of #487. --- .../15-e2e-scenarios-v2/specialized/COND | 10 ++ .../specialized/run_vector_workload_tm.sh | 64 +++++++ .../specialized/specialized_config_tm.yml | 166 ++++++++++++++++++ src/brad/admin/table_adjustments.py | 112 ++++++++++++ src/brad/blueprint/sql_gen/table.py | 17 ++ src/brad/config/file.py | 16 ++ src/brad/daemon/transition_orchestrator.py | 25 ++- src/brad/exec/admin.py | 2 + 8 files changed, 403 insertions(+), 9 deletions(-) create mode 100755 experiments/15-e2e-scenarios-v2/specialized/run_vector_workload_tm.sh create mode 100644 experiments/15-e2e-scenarios-v2/specialized/specialized_config_tm.yml create mode 100644 src/brad/admin/table_adjustments.py diff --git a/experiments/15-e2e-scenarios-v2/specialized/COND b/experiments/15-e2e-scenarios-v2/specialized/COND index 439d850b..c7d38ced 100644 --- a/experiments/15-e2e-scenarios-v2/specialized/COND +++ b/experiments/15-e2e-scenarios-v2/specialized/COND @@ -19,6 +19,16 @@ run_experiment( }, ) +run_experiment( + name="brad_100g_vector_tm", + run="./run_vector_workload_tm.sh", + options={ + # NOTE: This has table movement enabled. + "system-config-file": "specialized_config_tm.yml", + **COMMON_CONFIGS, + }, +) + run_experiment( name="hand_designed_100g_vector", run="./run_vector_workload.sh", diff --git a/experiments/15-e2e-scenarios-v2/specialized/run_vector_workload_tm.sh b/experiments/15-e2e-scenarios-v2/specialized/run_vector_workload_tm.sh new file mode 100755 index 00000000..a6209ec4 --- /dev/null +++ b/experiments/15-e2e-scenarios-v2/specialized/run_vector_workload_tm.sh @@ -0,0 +1,64 @@ +#! /bin/bash + +script_loc=$(cd $(dirname $0) && pwd -P) +cd $script_loc +source ../common.sh + +# Arguments: +# --config-file +# --planner-config-file +# --query-indexes +extract_named_arguments $@ + +# Repeating query indexes: +# 51, 53, 58, 61, 62, 64, 65, 66, 69, 72, 73, 74, 77, 86, 91 +# +# Touch `title`: +# 65, 69, 73 +# +# Heavy repeating query indexes: +# 14, 54, 59, 60, 71, 75 +# +# Touch `title`: +# 14, 54, 59, 75 + +# General scenario: +# Aurora is being used for queries involving `title` because of the vector +# similarity queries that also touch `title`. After deploying BRAD, it realizes +# that it's better to replicate `title` and route the rest of the queries onto +# Redshift. + +query_indices="62,64,65,66,69,72,73,74,91,59" +heavier_queries="14,54,60,71,75" +all_queries="${query_indices},${heavier_queries}" + +start_brad $system_config_file $physical_config_file +log_workload_point "brad_start_initiated" +sleep 30 + +log_workload_point "clients_starting" +start_repeating_olap_runner 8 5 5 $all_queries "ra_8" +rana_pid=$runner_pid + +start_other_repeating_runner 2 8 5 "ra_vector" 8 +other_pid=$runner_pid + +start_txn_runner_serial 4 # Implicit: --dataset-type +txn_pid=$runner_pid +log_workload_point "clients_started" + +function inner_cancel_experiment() { + cancel_experiment $rana_pid $txn_pid $other_pid +} + +trap "inner_cancel_experiment" INT +trap "inner_cancel_experiment" TERM + +# Note that this line is different from the TM-disabled version (3 hours instead of 2). +sleep $((3 * 60 * 60)) # Wait for 3 hours. +log_workload_point "experiment_done" + +# Shut down everything now. +>&2 echo "Experiment done. Shutting down runners..." +graceful_shutdown $rana_pid $txn_pid $other_pid +log_workload_point "shutdown_complete" diff --git a/experiments/15-e2e-scenarios-v2/specialized/specialized_config_tm.yml b/experiments/15-e2e-scenarios-v2/specialized/specialized_config_tm.yml new file mode 100644 index 00000000..41d3ca39 --- /dev/null +++ b/experiments/15-e2e-scenarios-v2/specialized/specialized_config_tm.yml @@ -0,0 +1,166 @@ +# This file contains configurations that are used by BRAD. These are default +# values and should be customized for specific situations. + +# BRAD's front end servers will listen for client connections on this interface +# and port. If `num_front_ends` is greater than one, subsequent front ends will +# listen on successive ports (e.g., 6584, 6585, etc.). +front_end_interface: "0.0.0.0" +front_end_port: 6583 +num_front_ends: 16 + +# Logging paths. If the value is in ALL_CAPS (with underscores), it is +# interpreted as an environment variable (BRAD will log to the path stored in +# the environment variable). + +# Where BRAD's daemon process will write its logs. +daemon_log_file: COND_OUT + +# Where BRAD's front end processes will write their logs. +front_end_log_path: COND_OUT + +# Where BRAD's blueprint planner will write debug logs. +planner_log_path: COND_OUT + +# Where BRAD's metrics loggers will write their logs. +metrics_log_path: COND_OUT + +# Probability that each transactional query will be logged. +txn_log_prob: 0.01 + +# Set to a non-zero value enable automatic data syncing. When this is set to 0, +# automatic syncing is disabled. +data_sync_period_seconds: 0 + +# BRAD's front end servers will report their metrics at regular intervals. +front_end_metrics_reporting_period_seconds: 30 +front_end_query_latency_buffer_size: 100 + +# `default` means to use the policy encoded in the blueprint. Other values will +# override the blueprint. +routing_policy: default + +# Whether to disable table movement for benchmark purposes (i.e., keep all +# tables on all engines.) +disable_table_movement: false +skip_sync_before_table_movement: true + +# Epoch length for metrics and forecasting. This is the granularity at which +# metrics/forecasting will be performed. +epoch_length: + weeks: 0 + days: 0 + hours: 0 + minutes: 1 + +# Blueprint planning strategy. +strategy: fp_query_based_beam + +# Used to specify the period of time over which to use data for planning. +# Currrently, this is a "look behind" window for the workload. +planning_window: + weeks: 0 + days: 0 + hours: 1 + minutes: 0 + +# Used to aggregate metrics collected in the planning window. +metrics_agg: + method: ewm # 'mean' is another option + alpha: 0.86466472 # 1 - 1 / e^2 + +# Used during planning. +reinterpret_second_as: 1 + +# The query distribution must change by at least this much for a new blueprint +# to be accepted. +query_dist_change_frac: 0.1 + +# The search bound for the provisioning. +max_provisioning_multiplier: 2.5 + +# Flag options for blueprint planning. +use_io_optimized_aurora: true +use_recorded_routing_if_available: true +ensure_tables_together_on_one_engine: true + +# Loads used to prime the system when no information is available. +aurora_initialize_load_fraction: 0.25 +redshift_initialize_load_fraction: 0.25 + +# BRAD will not reduce predicted load lower than these values. Raise these +# values to be more conservative against mispredictions. +aurora_min_load_removal_fraction: 0.8 +redshift_min_load_removal_fraction: 0.9 + +aurora_max_query_factor: 4.0 +aurora_max_query_factor_replace: 10000.0 +redshift_peak_load_threshold: 99.0 +redshift_peak_load_multiplier: 1.5 + +# Blueprint planning performance ceilings. +query_latency_p90_ceiling_s: 30.0 +txn_latency_p90_ceiling_s: 0.030 + +# Used for ordering blueprints during planning. +comparator: + type: benefit_perf_ceiling # or `perf_ceiling` + + benefit_horizon: # Only used by the `benefit_perf_ceiling` comparator + weeks: 0 + days: 0 + hours: 24 + minutes: 0 + + penalty_threshold: 0.8 # Only used by the `benefit_perf_ceiling` comparator + penalty_power: 2 # Only used by the `benefit_perf_ceiling` comparator + +# Used for precomputed predictions. +std_datasets: + - name: regular + path: workloads/IMDB_100GB/regular_test/ + - name: adhoc + path: workloads/IMDB_100GB/adhoc_test/ + +use_preset_redshift_clusters: false + +aurora_provisioning_search_distance: 1500.0 +redshift_provisioning_search_distance: 400.0 + +planner_max_workers: 16 + +# Blueprint planning trigger configs. + +triggers: + enabled: true + check_period_s: 90 # Triggers are checked every X seconds. + check_period_offset_s: 360 # Wait 6 mins before starting. + observe_new_blueprint_mins: 3 + + elapsed_time: + disabled: true + multiplier: 60 # Multiplier over `planning_window`. + + redshift_cpu: + lo: 15 + hi: 85 + sustained_epochs: 3 + + aurora_cpu: + lo: 10 + hi: 85 + sustained_epochs: 3 + + variable_costs: + disabled: true + threshold: 1.0 + + query_latency_ceiling: + ceiling_s: 30.0 + sustained_epochs: 3 + + txn_latency_ceiling: + ceiling_s: 0.030 + sustained_epochs: 3 + + recent_change: + delay_epochs: 5 diff --git a/src/brad/admin/table_adjustments.py b/src/brad/admin/table_adjustments.py new file mode 100644 index 00000000..75c35b4f --- /dev/null +++ b/src/brad/admin/table_adjustments.py @@ -0,0 +1,112 @@ +import asyncio +import logging + +from brad.asset_manager import AssetManager +from brad.blueprint.manager import BlueprintManager +from brad.config.engine import Engine +from brad.config.file import ConfigFile +from brad.blueprint.blueprint import Blueprint +from brad.blueprint.sql_gen.table import TableSqlGenerator +from brad.front_end.engine_connections import EngineConnections + +logger = logging.getLogger(__name__) + + +def register_admin_action(subparser) -> None: + parser = subparser.add_parser( + "table_adjustments", + help="Used to manually modify the physical tables in BRAD's underlying infrastructure.", + ) + parser.add_argument( + "--physical-config-file", + type=str, + required=True, + help="Path to BRAD's physical configuration file.", + ) + parser.add_argument( + "--schema-name", + type=str, + required=True, + help="The schema name to use.", + ) + parser.add_argument( + "action", + type=str, + help="The action to run {remove_blueprint_table, rename_table}.", + ) + parser.add_argument( + "--table-name", type=str, help="The name of the table.", required=True + ) + parser.add_argument("--engines", type=str, nargs="+", help="The engines involved.") + parser.add_argument( + "--new-table-name", type=str, help="The new table name, when applicable." + ) + parser.set_defaults(admin_action=table_adjustments) + + +async def table_adjustments_impl(args) -> None: + # 1. Load the config, blueprint, and provisioning. + config = ConfigFile.load_from_physical_config(phys_config=args.physical_config_file) + assets = AssetManager(config) + + blueprint_mgr = BlueprintManager(config, assets, args.schema_name) + await blueprint_mgr.load() + blueprint = blueprint_mgr.get_blueprint() + directory = blueprint_mgr.get_directory() + + if args.action == "remove_blueprint_table": + # NOTE: This only removes the table from the blueprint. You need to + # manually remove it from the physical engines (if appropriate). + table_to_remove = args.table_name + new_blueprint = Blueprint( + schema_name=blueprint.schema_name(), + table_schemas=[ + table for table in blueprint.tables() if table.name != table_to_remove + ], + table_locations={ + table_name: locations + for table_name, locations in blueprint.table_locations().items() + if table_name != table_to_remove + }, + aurora_provisioning=blueprint.aurora_provisioning(), + redshift_provisioning=blueprint.redshift_provisioning(), + full_routing_policy=blueprint.get_routing_policy(), + ) + blueprint_mgr.force_new_blueprint_sync(new_blueprint, score=None) + + elif args.action == "rename_table": + engines = {Engine.from_str(engine_str) for engine_str in args.engines} + connections = EngineConnections.connect_sync( + config, + directory, + schema_name=args.schema_name, + autocommit=False, + specific_engines=engines, + ) + sqlgen = TableSqlGenerator(config, blueprint) + for engine in engines: + table = blueprint.get_table(args.table_name) + logger.info( + "On %s: Renaming table %s to %s", + str(engine), + table.name, + args.new_table_name, + ) + statements, run_on = sqlgen.generate_rename_table_sql( + table, engine, args.new_table_name + ) + conn = connections.get_connection(run_on) + cursor = conn.cursor_sync() + for stmt in statements: + cursor.execute_sync(stmt) + cursor.commit_sync() + + else: + logger.error("Unknown action %s", args.action) + + logger.info("Done.") + + +# This method is called by `brad.exec.admin.main`. +def table_adjustments(args): + asyncio.run(table_adjustments_impl(args)) diff --git a/src/brad/blueprint/sql_gen/table.py b/src/brad/blueprint/sql_gen/table.py index efc8e2c4..5ae0ab6d 100644 --- a/src/brad/blueprint/sql_gen/table.py +++ b/src/brad/blueprint/sql_gen/table.py @@ -232,6 +232,23 @@ def generate_extraction_progress_init( queries.append(initialize_template.format(table_name=table_name)) return (queries, Engine.Aurora) + def generate_rename_table_sql( + self, table: Table, location: Engine, new_name: str + ) -> Tuple[List[str], Engine]: + """ + Generates the SQL statements needed to rename a table on the given engine. + """ + if location == Engine.Aurora: + # Aurora is more complicated because we use a view with other + # metadata too. This is not currently needed. + raise RuntimeError("Aurora renames are currently unimplemented.") + + elif location == Engine.Redshift or location == Engine.Athena: + return ([f"ALTER TABLE {table.name} RENAME TO {new_name}"], location) + + else: + raise RuntimeError(f"Unsupported location {str(location)}") + def generate_create_index_sql( table: Table, indexes: List[Tuple[Column, ...]] diff --git a/src/brad/config/file.py b/src/brad/config/file.py index e7eda3d3..b8ef4054 100644 --- a/src/brad/config/file.py +++ b/src/brad/config/file.py @@ -190,6 +190,22 @@ def disable_table_movement(self) -> bool: # Table movement disabled by default. return True + @property + def skip_sync_before_movement(self) -> bool: + try: + return self._raw["skip_sync_before_table_movement"] + except KeyError: + # Skip by default. + return True + + @property + def skip_athena_table_deletion(self) -> bool: + try: + return self._raw["skip_athena_table_deletion"] + except KeyError: + # Skip by default. + return True + @property def use_preset_redshift_clusters(self) -> bool: try: diff --git a/src/brad/daemon/transition_orchestrator.py b/src/brad/daemon/transition_orchestrator.py index 06bda3d9..4b2d02cc 100644 --- a/src/brad/daemon/transition_orchestrator.py +++ b/src/brad/daemon/transition_orchestrator.py @@ -131,14 +131,17 @@ async def run_prepare_then_transition( # 2. Sync tables (TODO: discuss more efficient alternatives - # possibly add a filter of tables to run_sync) - await self._data_sync_executor.establish_connections() - ran_sync = await self._data_sync_executor.run_sync( - self._blueprint_mgr.get_blueprint() - ) - logger.debug( - """Completed data sync step during transition. """ - f"""There were {'some' if ran_sync else 'no'} new writes to sync""" - ) + if not self._config.skip_sync_before_movement: + await self._data_sync_executor.establish_connections() + ran_sync = await self._data_sync_executor.run_sync( + self._blueprint_mgr.get_blueprint() + ) + logger.debug( + """Completed data sync step during transition. """ + f"""There were {'some' if ran_sync else 'no'} new writes to sync""" + ) + else: + logger.info("Not running table sync before movement.") # 3. Create tables in new locations as needed directory = self._blueprint_mgr.get_directory() @@ -628,7 +631,11 @@ async def _run_athena_post_transition( ) -> None: # Drop removed tables to_drop = [] - if table_diffs is not None and self._config.disable_table_movement is False: + if ( + table_diffs is not None + and self._config.disable_table_movement is False + and self._config.skip_athena_table_deletion is False + ): for table_diff in table_diffs: if Engine.Athena in table_diff.removed_locations(): to_drop.append(table_diff.table_name()) diff --git a/src/brad/exec/admin.py b/src/brad/exec/admin.py index 13a970ed..d70e7d7d 100644 --- a/src/brad/exec/admin.py +++ b/src/brad/exec/admin.py @@ -15,6 +15,7 @@ import brad.admin.replay_planner as replay_planner import brad.admin.clean_dataset as clean_dataset import brad.admin.alter_schema as alter_schema +import brad.admin.table_adjustments as table_adjustments logger = logging.getLogger(__name__) @@ -43,6 +44,7 @@ def register_command(subparsers) -> None: replay_planner.register_admin_action(admin_subparsers) clean_dataset.register_admin_action(admin_subparsers) alter_schema.register_admin_action(admin_subparsers) + table_adjustments.register_admin_action(admin_subparsers) parser.set_defaults(func=main) From e2971153f62edba80d6d87bea16b9140dc6821a8 Mon Sep 17 00:00:00 2001 From: Geoffrey Yu Date: Fri, 3 May 2024 11:00:35 -0400 Subject: [PATCH 09/30] Additional table movement experiment improvements (#506) This is primarily for the specialized scenario. - Remove the embeddings table from Athena - Exclude it from the all tables constraint (in the VDBE abstraction, this table will have different constraints anyways, so this is natural) - Add a defensive data type conversion for the vector data type (unsure if unloading will even work) Part of #487. --- .../specialized/set_up_vector_blueprint.py | 4 +++- src/brad/blueprint/sql_gen/table.py | 7 +++++++ src/brad/planner/beam/fpqb.py | 6 +++++- src/brad/planner/beam/query_based.py | 6 +++++- src/brad/planner/beam/table_based.py | 6 +++++- 5 files changed, 25 insertions(+), 4 deletions(-) diff --git a/experiments/15-e2e-scenarios-v2/specialized/set_up_vector_blueprint.py b/experiments/15-e2e-scenarios-v2/specialized/set_up_vector_blueprint.py index cd79ecb3..53a834a5 100644 --- a/experiments/15-e2e-scenarios-v2/specialized/set_up_vector_blueprint.py +++ b/experiments/15-e2e-scenarios-v2/specialized/set_up_vector_blueprint.py @@ -133,8 +133,10 @@ def main(): new_placement[table.name] = [Engine.Aurora, Engine.Athena, Engine.Redshift] if table.name == "telemetry": new_placement[table.name] = [Engine.Athena] - if table.name == "embeddings" or table.name == "title": + if table.name == "title": new_placement[table.name] = [Engine.Aurora, Engine.Athena] + if table.name == "embeddings": + new_placement[table.name] = [Engine.Aurora] enum_blueprint.set_table_locations(new_placement) # 6. Transition to the new blueprint. diff --git a/src/brad/blueprint/sql_gen/table.py b/src/brad/blueprint/sql_gen/table.py index 5ae0ab6d..59b2d082 100644 --- a/src/brad/blueprint/sql_gen/table.py +++ b/src/brad/blueprint/sql_gen/table.py @@ -318,5 +318,12 @@ def _type_for(data_type: str, for_db: Engine) -> str: return "BIGINT" elif data_type_upper.startswith("VARCHAR") and for_db == Engine.Athena: return "STRING" + elif data_type_upper.startswith("VECTOR"): + if for_db == Engine.Athena: + return "BINARY" + elif for_db == Engine.Redshift: + return "VARBYTE" + else: + return data_type else: return data_type diff --git a/src/brad/planner/beam/fpqb.py b/src/brad/planner/beam/fpqb.py index d61c4718..8ac887c3 100644 --- a/src/brad/planner/beam/fpqb.py +++ b/src/brad/planner/beam/fpqb.py @@ -74,7 +74,11 @@ async def _run_replan_impl( # on at least one engine. This ensures that arbitrary unseen join # templates can always be immediately handled. all_tables = ", ".join( - [table.name for table in self._current_blueprint.tables()] + [ + table.name + for table in self._current_blueprint.tables() + if table.name != "embeddings" + ] ) next_workload.add_priming_analytical_query( f"SELECT 1 FROM {all_tables} LIMIT 1" diff --git a/src/brad/planner/beam/query_based.py b/src/brad/planner/beam/query_based.py index 7f6c8e95..eebfa834 100644 --- a/src/brad/planner/beam/query_based.py +++ b/src/brad/planner/beam/query_based.py @@ -75,7 +75,11 @@ async def _run_replan_impl( # on at least one engine. This ensures that arbitrary unseen join # templates can always be immediately handled. all_tables = ", ".join( - [table.name for table in self._current_blueprint.tables()] + [ + table.name + for table in self._current_blueprint.tables() + if table.name != "embeddings" + ] ) next_workload.add_priming_analytical_query( f"SELECT 1 FROM {all_tables} LIMIT 1" diff --git a/src/brad/planner/beam/table_based.py b/src/brad/planner/beam/table_based.py index 36cc6138..2040598f 100644 --- a/src/brad/planner/beam/table_based.py +++ b/src/brad/planner/beam/table_based.py @@ -75,7 +75,11 @@ async def _run_replan_impl( # on at least one engine. This ensures that arbitrary unseen join # templates can always be immediately handled. all_tables = ", ".join( - [table.name for table in self._current_blueprint.tables()] + [ + table.name + for table in self._current_blueprint.tables() + if table.name != "embeddings" + ] ) next_workload.add_priming_analytical_query( f"SELECT 1 FROM {all_tables} LIMIT 1" From dc45218cb6a7fb7fb01860288eb083c5ab6d48f3 Mon Sep 17 00:00:00 2001 From: Geoffrey Yu Date: Fri, 3 May 2024 11:46:41 -0400 Subject: [PATCH 10/30] Additional table movement support fixes - Ensure consistent serialized schema names - Make drop table operator tolerant of missing tables - Print serialized schema name --- src/brad/admin/modify_blueprint.py | 11 ++++++++++- src/brad/blueprint/blueprint.py | 2 ++ src/brad/data_sync/operators/drop_tables.py | 2 +- src/brad/planner/enumeration/blueprint.py | 4 ++-- 4 files changed, 15 insertions(+), 4 deletions(-) diff --git a/src/brad/admin/modify_blueprint.py b/src/brad/admin/modify_blueprint.py index 7b0a530c..64ca6c75 100644 --- a/src/brad/admin/modify_blueprint.py +++ b/src/brad/admin/modify_blueprint.py @@ -149,6 +149,13 @@ def register_admin_action(subparser) -> None: help="Set to abort an in-progress transition. " "Only do this if you know what you are doing!", ) + parser.add_argument( + "--reset-schema-name", + action="store_true", + help="Set to ensure the serialized schema name is the same as the " + "passed-in schema name. Sometimes there may be a mismatch, which can " + "cause problems.", + ) parser.set_defaults(admin_action=modify_blueprint) @@ -350,7 +357,9 @@ def modify_blueprint(args) -> None: enum_blueprint.set_routing_policy(full_policy) # 6. Write the changes back. - modified_blueprint = enum_blueprint.to_blueprint() + modified_blueprint = enum_blueprint.to_blueprint( + forced_schema_name=args.schema_name if args.reset_schema_name else None + ) if blueprint == modified_blueprint: logger.info("No changes made to the blueprint.") return diff --git a/src/brad/blueprint/blueprint.py b/src/brad/blueprint/blueprint.py index ba169a97..2610278a 100644 --- a/src/brad/blueprint/blueprint.py +++ b/src/brad/blueprint/blueprint.py @@ -140,6 +140,8 @@ def __repr__(self) -> str: "---", indefinite_policies, definite_policy, + "---", + f"Schema name: {self.schema_name()}", ] ) diff --git a/src/brad/data_sync/operators/drop_tables.py b/src/brad/data_sync/operators/drop_tables.py index c8d41ed6..35f7b5df 100644 --- a/src/brad/data_sync/operators/drop_tables.py +++ b/src/brad/data_sync/operators/drop_tables.py @@ -30,7 +30,7 @@ def __repr__(self) -> str: ) async def execute(self, ctx: ExecutionContext) -> "Operator": - query_template = "DROP TABLE {}" + query_template = "DROP TABLE IF EXISTS {}" if self._engine == Engine.Aurora: for table in self._table_names: diff --git a/src/brad/planner/enumeration/blueprint.py b/src/brad/planner/enumeration/blueprint.py index 8fff27dd..f166f29d 100644 --- a/src/brad/planner/enumeration/blueprint.py +++ b/src/brad/planner/enumeration/blueprint.py @@ -51,13 +51,13 @@ def set_routing_policy( self._current_routing_policy = routing_policy return self - def to_blueprint(self) -> Blueprint: + def to_blueprint(self, forced_schema_name: Optional[str] = None) -> Blueprint: """ Makes a copy of this object as a `Blueprint`. """ return Blueprint( - self.schema_name(), + self.schema_name() if forced_schema_name is None else forced_schema_name, self.tables(), table_locations={ name: locations.copy() From 938796d7bd12cf0cc313faa11fdb91afc9cf6410 Mon Sep 17 00:00:00 2001 From: Geoffrey Yu Date: Fri, 3 May 2024 13:24:07 -0400 Subject: [PATCH 11/30] Make S3 loads to Redshift more permissive, add more logs to table movement --- src/brad/daemon/transition_orchestrator.py | 42 ++++++++++++++------ src/brad/data_sync/operators/load_from_s3.py | 2 + 2 files changed, 31 insertions(+), 13 deletions(-) diff --git a/src/brad/daemon/transition_orchestrator.py b/src/brad/daemon/transition_orchestrator.py index 4b2d02cc..9c065721 100644 --- a/src/brad/daemon/transition_orchestrator.py +++ b/src/brad/daemon/transition_orchestrator.py @@ -191,7 +191,7 @@ async def run_prepare_then_transition( table_awaitables.append(self._enforce_table_diff_additions(diff)) await asyncio.gather(*table_awaitables) - logger.debug("Table movement complete.") + logger.info("Table movement complete.") # Close connections await self._cxns.close() @@ -490,6 +490,12 @@ async def _run_aurora_post_transition( tables_to_drop.append(source_table_name(table_diff.table_name())) tables_to_drop.append(shadow_table_name(table_diff.table_name())) + logger.info("In transition: Dropping Aurora views %s", str(views_to_drop)) + logger.info( + "In transition: Dropping Aurora triggers %s", str(triggers_to_drop) + ) + logger.info("In transition: Dropping Aurora tables %s", str(tables_to_drop)) + ctx = self._new_execution_context() dv = DropViews(views_to_drop, Engine.Aurora) @@ -609,7 +615,7 @@ async def _run_redshift_post_transition( for table_diff in table_diffs: if Engine.Redshift in table_diff.removed_locations(): to_drop.append(table_diff.table_name()) - logger.debug(f"Tables to drop: {to_drop}") + logger.info("In transition: Dropping Redshift tables %s", str(to_drop)) d = DropTables(to_drop, Engine.Redshift) ctx = self._new_execution_context() await d.execute(ctx) @@ -639,6 +645,7 @@ async def _run_athena_post_transition( for table_diff in table_diffs: if Engine.Athena in table_diff.removed_locations(): to_drop.append(table_diff.table_name()) + logger.info("In transition: Dropping Athena tables %s", str(to_drop)) d = DropTables(to_drop, Engine.Athena) ctx = self._new_execution_context() await d.execute(ctx) @@ -677,28 +684,28 @@ async def _unload_table(self, table_name: str, s3_path: str) -> None: if Engine.Redshift in curr_locations: # Faster to write out from Redshift u = UnloadToS3(table_name, s3_path, engine=Engine.Redshift, delimiter=",") ctx = self._new_execution_context() - await u.execute(ctx) - logger.debug( - f"In transition: table {table_name} written to S3 from Redshift." + logger.info( + "In transition: table %s being written to S3 from Redshift.", table_name ) + await u.execute(ctx) elif Engine.Aurora in curr_locations: u = UnloadToS3(table_name, s3_path, engine=Engine.Aurora, delimiter=",") ctx = self._new_execution_context() - await u.execute(ctx) - logger.debug( - f"In transition: table {table_name} written to S3 from Aurora." + logger.info( + "In transition: table %s being written to S3 from Aurora.", table_name ) + await u.execute(ctx) elif Engine.Athena in curr_locations: u = UnloadToS3(table_name, s3_path, engine=Engine.Athena) ctx = self._new_execution_context() - await u.execute(ctx) - logger.debug( - f"In transition: table {table_name} written to S3 from Athena." + logger.info( + "In transition: table %s being written to S3 from Athena.", table_name ) + await u.execute(ctx) else: logger.error( - f"""In transition: table {table_name} does not exist - on any engine in current blueprint.""" + "In transition: table %s does not exist on any engine in current blueprint.", + table_name, ) async def _load_table_to_engine(self, table_name: str, e: Engine) -> None: @@ -715,6 +722,9 @@ async def _load_table_to_engine(self, table_name: str, e: Engine) -> None: if e == Engine.Aurora: # Load table to aurora from S3 + logger.info( + "In transition: loading table %s into Aurora from S3", table_name + ) response = ctx.s3_client().list_objects_v2( Bucket=ctx.s3_bucket(), Prefix=ctx.s3_path() + s3_path_prefix ) @@ -758,12 +768,18 @@ async def _load_table_to_engine(self, table_name: str, e: Engine) -> None: await cursor.commit() elif e == Engine.Redshift: + logger.info( + "In transition: loading table %s into Redshift from S3", table_name + ) l = LoadFromS3(table_name, s3_path_prefix, e, delimiter=",", header_rows=1) await l.execute(ctx) nonsilent_assert(self._cxns is not None) assert self._cxns is not None self._cxns.get_connection(Engine.Redshift).cursor_sync().commit_sync() elif e == Engine.Athena: + logger.info( + "In transition: loading table %s into Athena from S3", table_name + ) l = LoadFromS3(table_name, s3_path_prefix, e, delimiter=",", header_rows=1) await l.execute(ctx) diff --git a/src/brad/data_sync/operators/load_from_s3.py b/src/brad/data_sync/operators/load_from_s3.py index ff984efd..248209f6 100644 --- a/src/brad/data_sync/operators/load_from_s3.py +++ b/src/brad/data_sync/operators/load_from_s3.py @@ -29,6 +29,8 @@ DELIMITER '{delimiter}' IGNOREHEADER {header_rows} REMOVEQUOTES + BLANKASNULL + IGNOREALLERRORS """ _ATHENA_CREATE_LOAD_TABLE = """ From 0fac52164e3903d6abf14483848c16430db42cc9 Mon Sep 17 00:00:00 2001 From: Geoffrey Yu Date: Fri, 3 May 2024 14:39:30 -0400 Subject: [PATCH 12/30] Additional load argument fix --- src/brad/data_sync/operators/load_from_s3.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/brad/data_sync/operators/load_from_s3.py b/src/brad/data_sync/operators/load_from_s3.py index 248209f6..98b25397 100644 --- a/src/brad/data_sync/operators/load_from_s3.py +++ b/src/brad/data_sync/operators/load_from_s3.py @@ -29,7 +29,6 @@ DELIMITER '{delimiter}' IGNOREHEADER {header_rows} REMOVEQUOTES - BLANKASNULL IGNOREALLERRORS """ From 68954e773116de8a90bc3bb5608f4e532b980abe Mon Sep 17 00:00:00 2001 From: Geoffrey Yu Date: Sat, 4 May 2024 10:14:53 -0400 Subject: [PATCH 13/30] Add table movement enabled scale down experiment and log movement progress (#507) Part of #487. --- .../15-e2e-scenarios-v2/scale_down/COND | 9 + .../scale_down/scale_down_config_tm.yml | 164 ++++++++++++++++++ .../scale_down/set_up_starting_blueprint.py | 3 +- src/brad/config/system_event.py | 6 + src/brad/daemon/transition_orchestrator.py | 35 ++++ 5 files changed, 216 insertions(+), 1 deletion(-) create mode 100644 experiments/15-e2e-scenarios-v2/scale_down/scale_down_config_tm.yml diff --git a/experiments/15-e2e-scenarios-v2/scale_down/COND b/experiments/15-e2e-scenarios-v2/scale_down/COND index 22bdb68a..87694f9e 100644 --- a/experiments/15-e2e-scenarios-v2/scale_down/COND +++ b/experiments/15-e2e-scenarios-v2/scale_down/COND @@ -58,6 +58,15 @@ run_experiment( }, ) +run_experiment( + name="brad_100g_tm", + run="./run_workload.sh", + options={ + "system-config-file": "scale_down_config_tm.yml", + **COMMON_100G_CONFIGS, + }, +) + run_command( name="brad_100g_debug", run="./run_workload_debug.sh", diff --git a/experiments/15-e2e-scenarios-v2/scale_down/scale_down_config_tm.yml b/experiments/15-e2e-scenarios-v2/scale_down/scale_down_config_tm.yml new file mode 100644 index 00000000..2bdf9bfa --- /dev/null +++ b/experiments/15-e2e-scenarios-v2/scale_down/scale_down_config_tm.yml @@ -0,0 +1,164 @@ +# This file contains configurations that are used by BRAD. These are default +# values and should be customized for specific situations. + +# BRAD's front end servers will listen for client connections on this interface +# and port. If `num_front_ends` is greater than one, subsequent front ends will +# listen on successive ports (e.g., 6584, 6585, etc.). +front_end_interface: "0.0.0.0" +front_end_port: 6583 +num_front_ends: 8 + +# Logging paths. If the value is in ALL_CAPS (with underscores), it is +# interpreted as an environment variable (BRAD will log to the path stored in +# the environment variable). + +# Where BRAD's daemon process will write its logs. +daemon_log_file: COND_OUT + +# Where BRAD's front end processes will write their logs. +front_end_log_path: COND_OUT + +# Where BRAD's blueprint planner will write debug logs. +planner_log_path: COND_OUT + +# Where BRAD's metrics loggers will write their logs. +metrics_log_path: COND_OUT + +# Probability that each transactional query will be logged. +txn_log_prob: 0.01 + +# Set to a non-zero value enable automatic data syncing. When this is set to 0, +# automatic syncing is disabled. +data_sync_period_seconds: 0 + +# BRAD's front end servers will report their metrics at regular intervals. +front_end_metrics_reporting_period_seconds: 30 +front_end_query_latency_buffer_size: 100 + +# `default` means to use the policy encoded in the blueprint. Other values will +# override the blueprint. +routing_policy: default + +# Whether to disable table movement for benchmark purposes (i.e., keep all +# tables on all engines.) +disable_table_movement: false +skip_sync_before_table_movement: true + +# Epoch length for metrics and forecasting. This is the granularity at which +# metrics/forecasting will be performed. +epoch_length: + weeks: 0 + days: 0 + hours: 0 + minutes: 1 + +# Blueprint planning strategy. +strategy: fp_query_based_beam + +# Used to specify the period of time over which to use data for planning. +# Currrently, this is a "look behind" window for the workload. +planning_window: + weeks: 0 + days: 0 + hours: 1 + minutes: 0 + +# Used to aggregate metrics collected in the planning window. +metrics_agg: + method: ewm # 'mean' is another option + alpha: 0.86466472 # 1 - 1 / e^2 + +# Used during planning. +reinterpret_second_as: 1 + +# The query distribution must change by at least this much for a new blueprint +# to be accepted. +query_dist_change_frac: 0.1 + +# The search bound for the provisioning. +max_provisioning_multiplier: 2.5 + +# Flag options for blueprint planning. +use_io_optimized_aurora: true +use_recorded_routing_if_available: true +ensure_tables_together_on_one_engine: true + +# Loads used to prime the system when no information is available. +aurora_initialize_load_fraction: 0.25 +redshift_initialize_load_fraction: 0.25 + +# BRAD will not reduce predicted load lower than these values. Raise these +# values to be more conservative against mispredictions. +aurora_min_load_removal_fraction: 0.8 +redshift_min_load_removal_fraction: 0.8 + +# Blueprint planning performance ceilings. +query_latency_p90_ceiling_s: 30.0 +txn_latency_p90_ceiling_s: 0.030 + +aurora_provisioning_search_distance: 900.0 +redshift_provisioning_search_distance: 900.0 + +# Used for ordering blueprints during planning. +comparator: + type: benefit_perf_ceiling # or `perf_ceiling` + + benefit_horizon: # Only used by the `benefit_perf_ceiling` comparator + weeks: 0 + days: 0 + hours: 24 + minutes: 0 + + penalty_threshold: 0.8 # Only used by the `benefit_perf_ceiling` comparator + penalty_power: 2 # Only used by the `benefit_perf_ceiling` comparator + +aurora_max_query_factor: 4.0 +aurora_max_query_factor_replace: 10000.0 +redshift_peak_load_threshold: 99.0 +redshift_peak_load_multiplier: 1.5 + +planner_max_workers: 16 + +# Used for precomputed predictions. +std_datasets: + - name: regular + path: workloads/IMDB_100GB/regular_test/ + - name: adhoc + path: workloads/IMDB_100GB/adhoc_test/ + +# Blueprint planning trigger configs. + +triggers: + enabled: true + check_period_s: 90 # Triggers are checked every X seconds. + check_period_offset_s: 360 # Wait 6 mins before starting. + observe_new_blueprint_mins: 3 + + elapsed_time: + disabled: true + multiplier: 60 # Multiplier over `planning_window`. + + redshift_cpu: + lo: 15 + hi: 85 + sustained_epochs: 3 + + aurora_cpu: + lo: 15 + hi: 85 + sustained_epochs: 3 + + variable_costs: + disabled: true + threshold: 1.0 + + query_latency_ceiling: + ceiling_s: 30.0 + sustained_epochs: 3 + + txn_latency_ceiling: + ceiling_s: 0.030 + sustained_epochs: 3 + + recent_change: + delay_epochs: 5 diff --git a/experiments/15-e2e-scenarios-v2/scale_down/set_up_starting_blueprint.py b/experiments/15-e2e-scenarios-v2/scale_down/set_up_starting_blueprint.py index c96eec27..542c6afa 100644 --- a/experiments/15-e2e-scenarios-v2/scale_down/set_up_starting_blueprint.py +++ b/experiments/15-e2e-scenarios-v2/scale_down/set_up_starting_blueprint.py @@ -71,6 +71,7 @@ def main(): help="Comma separated list of indices.", default="99,56,32,92,91,49,30,83,94,38,87,86,76,37,31,46", ) + parser.add_argument("--place-tables-both", action="store_true") args = parser.parse_args() set_up_logging(debug_mode=True) @@ -130,7 +131,7 @@ def main(): new_placement = {} aurora_txn = ["theatres", "showings", "ticket_orders", "movie_info", "aka_title"] for table in blueprint.tables(): - if table.name in aurora_txn: + if args.place_tables_both or table.name in aurora_txn: new_placement[table.name] = [Engine.Aurora, Engine.Redshift] else: new_placement[table.name] = [Engine.Redshift] diff --git a/src/brad/config/system_event.py b/src/brad/config/system_event.py index 68c125f5..71574149 100644 --- a/src/brad/config/system_event.py +++ b/src/brad/config/system_event.py @@ -37,3 +37,9 @@ class SystemEvent(enum.Enum): # Used when a service level objective is changed while BRAD is running (used # for experiments). ChangedSlos = "changed_slos" + + # Used to mark table movement progress. + PreTableMovementStarted = "pre_table_movement_started" + PreTableMovementCompleted = "pre_table_movement_completed" + PostTableMovementStarted = "post_table_movement_started" + PostTableMovementCompleted = "post_table_movement_completed" diff --git a/src/brad/daemon/transition_orchestrator.py b/src/brad/daemon/transition_orchestrator.py index 9c065721..8a94165e 100644 --- a/src/brad/daemon/transition_orchestrator.py +++ b/src/brad/daemon/transition_orchestrator.py @@ -143,6 +143,9 @@ async def run_prepare_then_transition( else: logger.info("Not running table sync before movement.") + if self._system_event_logger is not None: + self._system_event_logger.log(SystemEvent.PreTableMovementStarted) + # 3. Create tables in new locations as needed directory = self._blueprint_mgr.get_directory() @@ -192,6 +195,8 @@ async def run_prepare_then_transition( await asyncio.gather(*table_awaitables) logger.info("Table movement complete.") + if self._system_event_logger is not None: + self._system_event_logger.log(SystemEvent.PreTableMovementCompleted) # Close connections await self._cxns.close() @@ -480,6 +485,11 @@ async def _run_aurora_post_transition( and len(table_diffs) > 0 and self._config.disable_table_movement is False ): + if self._system_event_logger is not None: + self._system_event_logger.log( + SystemEvent.PostTableMovementStarted, "aurora" + ) + views_to_drop = [] triggers_to_drop = [] tables_to_drop = [] @@ -511,6 +521,11 @@ async def _run_aurora_post_transition( assert self._cxns is not None self._cxns.get_connection(Engine.Aurora).cursor_sync().commit_sync() + if self._system_event_logger is not None: + self._system_event_logger.log( + SystemEvent.PostTableMovementCompleted, "aurora" + ) + # Change the provisioning. if diff is not None: if new.num_nodes() == 0: @@ -611,6 +626,11 @@ async def _run_redshift_post_transition( ) -> None: # Drop removed tables if table_diffs is not None and self._config.disable_table_movement is False: + if self._system_event_logger is not None: + self._system_event_logger.log( + SystemEvent.PostTableMovementStarted, "redshift" + ) + to_drop = [] for table_diff in table_diffs: if Engine.Redshift in table_diff.removed_locations(): @@ -623,6 +643,11 @@ async def _run_redshift_post_transition( assert self._cxns is not None self._cxns.get_connection(Engine.Redshift).cursor_sync().commit_sync() + if self._system_event_logger is not None: + self._system_event_logger.log( + SystemEvent.PostTableMovementCompleted, "redshift" + ) + # Pause the cluster if we are transitioning to 0 nodes. if diff is not None: if diff.new_num_nodes() == 0: @@ -642,6 +667,11 @@ async def _run_athena_post_transition( and self._config.disable_table_movement is False and self._config.skip_athena_table_deletion is False ): + if self._system_event_logger is not None: + self._system_event_logger.log( + SystemEvent.PostTableMovementStarted, "athena" + ) + for table_diff in table_diffs: if Engine.Athena in table_diff.removed_locations(): to_drop.append(table_diff.table_name()) @@ -650,6 +680,11 @@ async def _run_athena_post_transition( ctx = self._new_execution_context() await d.execute(ctx) + if self._system_event_logger is not None: + self._system_event_logger.log( + SystemEvent.PostTableMovementCompleted, "athena" + ) + async def _enforce_table_diff_additions(self, diff: TableDiff) -> None: # Unload table to S3 table_name = diff.table_name() From 7fb971f2519b9a58115875e51647eb8c17bf93bb Mon Sep 17 00:00:00 2001 From: Geoffrey Yu Date: Sat, 4 May 2024 10:33:51 -0400 Subject: [PATCH 14/30] Skip Aurora table deletion to speed up restarts --- src/brad/config/file.py | 8 ++++++++ src/brad/daemon/transition_orchestrator.py | 1 + 2 files changed, 9 insertions(+) diff --git a/src/brad/config/file.py b/src/brad/config/file.py index b8ef4054..c14facc2 100644 --- a/src/brad/config/file.py +++ b/src/brad/config/file.py @@ -206,6 +206,14 @@ def skip_athena_table_deletion(self) -> bool: # Skip by default. return True + @property + def skip_aurora_table_deletion(self) -> bool: + try: + return self._raw["skip_aurora_table_deletion"] + except KeyError: + # Skip by default. + return True + @property def use_preset_redshift_clusters(self) -> bool: try: diff --git a/src/brad/daemon/transition_orchestrator.py b/src/brad/daemon/transition_orchestrator.py index 8a94165e..a08f5e26 100644 --- a/src/brad/daemon/transition_orchestrator.py +++ b/src/brad/daemon/transition_orchestrator.py @@ -484,6 +484,7 @@ async def _run_aurora_post_transition( table_diffs is not None and len(table_diffs) > 0 and self._config.disable_table_movement is False + and self._config.skip_aurora_table_deletion is False ): if self._system_event_logger is not None: self._system_event_logger.log( From cdc391af83f43737d6f08f12cac408980b6bda4c Mon Sep 17 00:00:00 2001 From: Geoffrey Yu Date: Sat, 4 May 2024 17:21:14 -0400 Subject: [PATCH 15/30] Fix table movement connection context --- src/brad/daemon/transition_orchestrator.py | 19 ++++++++++++++----- src/brad/front_end/engine_connections.py | 6 ++++++ 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/src/brad/daemon/transition_orchestrator.py b/src/brad/daemon/transition_orchestrator.py index a08f5e26..75b6c208 100644 --- a/src/brad/daemon/transition_orchestrator.py +++ b/src/brad/daemon/transition_orchestrator.py @@ -480,8 +480,11 @@ async def _run_aurora_post_transition( table_diffs: Optional[list[TableDiff]], ) -> None: # Drop removed tables. + assert self._curr_blueprint is not None + aurora_on = self._curr_blueprint.aurora_provisioning().num_nodes() > 0 if ( - table_diffs is not None + aurora_on + and table_diffs is not None and len(table_diffs) > 0 and self._config.disable_table_movement is False and self._config.skip_aurora_table_deletion is False @@ -626,7 +629,13 @@ async def _run_redshift_post_transition( self, diff: Optional[ProvisioningDiff], table_diffs: Optional[list[TableDiff]] ) -> None: # Drop removed tables - if table_diffs is not None and self._config.disable_table_movement is False: + assert self._curr_blueprint is not None + redshift_on = self._curr_blueprint.redshift_provisioning().num_nodes() > 0 + if ( + redshift_on + and table_diffs is not None + and self._config.disable_table_movement is False + ): if self._system_event_logger is not None: self._system_event_logger.log( SystemEvent.PostTableMovementStarted, "redshift" @@ -825,9 +834,9 @@ def _new_execution_context(self) -> ExecutionContext: nonsilent_assert(self._cxns is not None) assert self._cxns is not None return ExecutionContext( - aurora=self._cxns.get_connection(Engine.Aurora), - athena=self._cxns.get_connection(Engine.Athena), - redshift=self._cxns.get_connection(Engine.Redshift), + aurora=self._cxns.get_connection_if_exists(Engine.Aurora), + athena=self._cxns.get_connection_if_exists(Engine.Athena), + redshift=self._cxns.get_connection_if_exists(Engine.Redshift), blueprint=self._blueprint_mgr.get_blueprint(), config=self._config, ) diff --git a/src/brad/front_end/engine_connections.py b/src/brad/front_end/engine_connections.py index 5c63dd3c..b1aa041c 100644 --- a/src/brad/front_end/engine_connections.py +++ b/src/brad/front_end/engine_connections.py @@ -274,6 +274,12 @@ def get_connection(self, engine: Engine) -> Connection: except KeyError as ex: raise RuntimeError("Not connected to {}".format(engine)) from ex + def get_connection_if_exists(self, engine: Engine) -> Optional[Connection]: + try: + return self._connection_map[engine] + except KeyError: + return None + def get_reader_connection( self, engine: Engine, specific_index: Optional[int] = None ) -> Connection: From 531599b8e34e3fe229988aeb5b70bfdb2efb2014 Mon Sep 17 00:00:00 2001 From: Geoffrey Yu Date: Sun, 5 May 2024 17:12:49 -0400 Subject: [PATCH 16/30] Scoring fixes for scale down and type fixes in scoring methods (#508) Part of #487. --- .../scoring/performance/unified_aurora.py | 16 +++++++++------- .../scoring/performance/unified_redshift.py | 14 +++++++------- 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/src/brad/planner/scoring/performance/unified_aurora.py b/src/brad/planner/scoring/performance/unified_aurora.py index f015126c..eedb59ba 100644 --- a/src/brad/planner/scoring/performance/unified_aurora.py +++ b/src/brad/planner/scoring/performance/unified_aurora.py @@ -114,8 +114,10 @@ def predict_loads( current_has_replicas = curr_prov.num_nodes() > 1 next_has_replicas = next_prov.num_nodes() > 1 - curr_writer_cpu_util = ctx.metrics.aurora_writer_cpu_avg / 100 - curr_writer_cpu_util_denorm = curr_writer_cpu_util * aurora_num_cpus(curr_prov) + curr_writer_cpu_util = float(ctx.metrics.aurora_writer_cpu_avg / 100) + curr_writer_cpu_util_denorm = float( + curr_writer_cpu_util * aurora_num_cpus(curr_prov) + ) # We take a very conservative approach to query movement. If new queries # are added onto Aurora, we increase the load. But if queries are @@ -209,7 +211,7 @@ def predict_loads( # We currently have read replicas. curr_num_read_replicas = curr_prov.num_nodes() - 1 total_reader_cpu_denorm = ( - (ctx.metrics.aurora_reader_cpu_avg / 100) + float(ctx.metrics.aurora_reader_cpu_avg / 100) * aurora_num_cpus(curr_prov) * curr_num_read_replicas ) @@ -277,11 +279,11 @@ def compute_direct_cpu_denorm( per_query_cpu_denorm = np.clip( query_run_times * alpha, a_min=0.0, a_max=load_max ) - total_denorm = np.dot(per_query_cpu_denorm, arrival_weights) - max_query_cpu_denorm = per_query_cpu_denorm.max() + total_denorm = np.dot(per_query_cpu_denorm, arrival_weights).item() + max_query_cpu_denorm = (per_query_cpu_denorm * arrival_weights).max().item() else: # Edge case: Query with 0 arrival count (used as a constraint). - total_denorm = np.zeros_like(query_run_times) + total_denorm = 0.0 max_query_cpu_denorm = 0.0 if debug_dict is not None: debug_dict["aurora_total_cpu_denorm"] = total_denorm @@ -309,7 +311,7 @@ def query_movement_factor( total_next_latency = np.dot( curr_query_run_times, workload.get_arrival_counts_batch(query_indices) ) - return total_next_latency / norm_factor + return total_next_latency.item() / norm_factor @classmethod def predict_query_latency_load_resources( diff --git a/src/brad/planner/scoring/performance/unified_redshift.py b/src/brad/planner/scoring/performance/unified_redshift.py index 4f51b85b..e509cc9d 100644 --- a/src/brad/planner/scoring/performance/unified_redshift.py +++ b/src/brad/planner/scoring/performance/unified_redshift.py @@ -53,10 +53,10 @@ def compute( ctx.metrics.redshift_cpu_list is not None and ctx.metrics.redshift_cpu_list.shape[0] > 0 ): - avg_cpu = ctx.metrics.redshift_cpu_list.mean() + avg_cpu: float = ctx.metrics.redshift_cpu_list.mean().item() else: # This won't be used. This is actually max. - avg_cpu = ctx.metrics.redshift_cpu_avg + avg_cpu = float(ctx.metrics.redshift_cpu_avg) gamma_norm_factor = HotConfig.instance().get_value( "query_lat_p90", default=30.0 @@ -180,7 +180,7 @@ def predict_max_node_cpu_util( curr_cpu_util *= gamma curr_cpu_denorm = curr_cpu_util * redshift_num_cpus(curr_prov) - curr_max_cpu_denorm = curr_cpu_denorm.max() + curr_max_cpu_denorm = curr_cpu_denorm.max().item() ( peak_load, @@ -262,11 +262,11 @@ def compute_direct_cpu_denorm( per_query_cpu_denorm = np.clip( query_run_times * alpha, a_min=0.0, a_max=load_max ) - total_denorm = np.dot(per_query_cpu_denorm, arrival_weights) - max_query_cpu_denorm = per_query_cpu_denorm.max() + total_denorm = np.dot(per_query_cpu_denorm, arrival_weights).item() + max_query_cpu_denorm = (per_query_cpu_denorm * arrival_weights).max().item() else: # Edge case: Query with 0 arrival count (used as a constraint). - total_denorm = np.zeros_like(query_run_times) + total_denorm = 0.0 max_query_cpu_denorm = 0.0 if debug_dict is not None: debug_dict["redshift_total_cpu_denorm"] = total_denorm @@ -294,7 +294,7 @@ def query_movement_factor( total_next_latency = np.dot( curr_query_run_times, workload.get_arrival_counts_batch(query_indices) ) - return total_next_latency / norm_factor + return total_next_latency.item() / norm_factor @staticmethod def predict_query_latency_load_resources( From 3ea86e3b7a59e9387473012902902817dc4e976b Mon Sep 17 00:00:00 2001 From: Geoffrey Yu Date: Mon, 6 May 2024 10:00:21 -0400 Subject: [PATCH 17/30] Add A+R baseline to the scale down scenario --- .../15-e2e-scenarios-v2/scale_down/COND | 9 +++++ .../scale_down/run_ar_baseline.sh | 40 +++++++++++++++++++ 2 files changed, 49 insertions(+) create mode 100755 experiments/15-e2e-scenarios-v2/scale_down/run_ar_baseline.sh diff --git a/experiments/15-e2e-scenarios-v2/scale_down/COND b/experiments/15-e2e-scenarios-v2/scale_down/COND index 87694f9e..978d8cbf 100644 --- a/experiments/15-e2e-scenarios-v2/scale_down/COND +++ b/experiments/15-e2e-scenarios-v2/scale_down/COND @@ -85,3 +85,12 @@ run_experiment( **COMMON_100G_CONFIGS, }, ) + +run_experiment( + name="ar_100g", + run="./run_ar_baseline.sh", + options={ + # System config file not needed. + **COMMON_100G_CONFIGS, + }, +) diff --git a/experiments/15-e2e-scenarios-v2/scale_down/run_ar_baseline.sh b/experiments/15-e2e-scenarios-v2/scale_down/run_ar_baseline.sh new file mode 100755 index 00000000..be81b7a0 --- /dev/null +++ b/experiments/15-e2e-scenarios-v2/scale_down/run_ar_baseline.sh @@ -0,0 +1,40 @@ +#! /bin/bash + +script_loc=$(cd $(dirname $0) && pwd -P) +cd $script_loc +source ../common.sh + +# Arguments: +# --system-config-file +# --physical-config-file +# --query-indexes +extract_named_arguments $@ + +schema_name="imdb_extended_100g" + +log_workload_point "clients_starting" +start_redshift_serverless_olap_runner 8 15 5 $ra_query_indexes "ra_8" $schema_name +rana_pid=$runner_pid + +start_aurora_serverless_txn_runner_serial 4 $schema_name # Implicit: --dataset-type +txn_pid=$runner_pid + +log_workload_point "clients_started" + +function inner_cancel_experiment() { + cancel_experiment $rana_pid $txn_pid +} + +trap "inner_cancel_experiment" INT +trap "inner_cancel_experiment" TERM + +# The workload should run for 90 minutes. +# We will run for ~100 mins to add some buffer. +sleep $(( 100 * 60 )) + +# Shut down everything now. +log_workload_point "experiment_workload_done" +>&2 echo "Experiment done. Shutting down runners..." +graceful_shutdown $rana_pid $txn_pid +log_workload_point "shutdown_complete" + From 3578922f3a9e793757cf1fdd49097ddf57317142 Mon Sep 17 00:00:00 2001 From: Geoffrey Yu Date: Mon, 6 May 2024 13:45:42 -0400 Subject: [PATCH 18/30] Add transaction model constants for CH-BenCHmark --- src/brad/planner/constants.yml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/brad/planner/constants.yml b/src/brad/planner/constants.yml index 4c393d51..87eb9e24 100644 --- a/src/brad/planner/constants.yml +++ b/src/brad/planner/constants.yml @@ -366,6 +366,16 @@ aurora_txns: b_p50: 0.0008631267119199038 b_p90: 0.002251814818009734 + # These constants are for the W = 1740 version of the dataset. + chbenchmark: + # Note that C_1, C_2 are meant to be the same for this dataset. + C_1: 0.008586008776871991 + C_2: 0.008586008776871991 + + K: 1.0293710231781006 + b_p50: 0.011220300570130348 + b_p90: 0.022309081628918648 + aurora_scaling: # [Deprecated] From a242352cdff3c5cc82db3a5583be9e3501e1de7e Mon Sep 17 00:00:00 2001 From: Geoffrey Yu Date: Mon, 6 May 2024 18:26:03 -0400 Subject: [PATCH 19/30] Check in CH-BenCHmark precomputed predictions Co-authored-by: Ziniu Wu --- workloads/chbenchmark/data_accessed-athena.npy | Bin 0 -> 216 bytes .../pred-data_accessed-athena-aurora.npy | Bin 0 -> 304 bytes .../chbenchmark/pred-data_accessed-athena.npy | Bin 0 -> 216 bytes .../pred-run_time_s-athena-aurora-redshift.npy | Bin 0 -> 656 bytes .../run_time_s-athena-aurora-redshift.npy | Bin 0 -> 656 bytes 5 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 workloads/chbenchmark/data_accessed-athena.npy create mode 100644 workloads/chbenchmark/pred-data_accessed-athena-aurora.npy create mode 100644 workloads/chbenchmark/pred-data_accessed-athena.npy create mode 100644 workloads/chbenchmark/pred-run_time_s-athena-aurora-redshift.npy create mode 100644 workloads/chbenchmark/run_time_s-athena-aurora-redshift.npy diff --git a/workloads/chbenchmark/data_accessed-athena.npy b/workloads/chbenchmark/data_accessed-athena.npy new file mode 100644 index 0000000000000000000000000000000000000000..591d89d8d60c2088a461e7df9bacc1db37af8ef1 GIT binary patch literal 216 zcmbR27wQ`j$;eQ~P_3SlTAW;@Zl$1ZlV+l>qoAIaUsO_*m=~X4l#&V(cT3DEP6dh= zXCxM+0{I$7Mmm~03bhL411@8m82=m7`257TG5RmhdE}>m;JV*30~6m=>y>_`5g+_& z?OJ?e0xtMI(46O+^0wRW&B|6^mQ}ufVb`bnaBN`ozx%_<*Xv}Qe^Q}@f9kFuzCyVV Kd``PF_yYi1H9^Aw literal 0 HcmV?d00001 diff --git a/workloads/chbenchmark/pred-data_accessed-athena-aurora.npy b/workloads/chbenchmark/pred-data_accessed-athena-aurora.npy new file mode 100644 index 0000000000000000000000000000000000000000..157ba57ed9480f9129714e191b0595ff7b8f3634 GIT binary patch literal 304 zcmbR27wQ`j$;eQ~P_3SlTAW;@Zl$1ZlV+l>qoAIaUsO_*m=~X4l#&V(4=E~51qv5u zBo?Fsxf(`BItoUbItsN4WCN~(EsTC!<(T|duVeO~!S&nEaHp`}jeIxXy`{_i&h2FL zHwDUv05OQZQ!)cgYa56AStL#KaXS3cPdLNPm)+mZKU&tr|1!6~-^HfuK1W~u_hVpa I*iQ=#0LJ@S-~a#s literal 0 HcmV?d00001 diff --git a/workloads/chbenchmark/pred-data_accessed-athena.npy b/workloads/chbenchmark/pred-data_accessed-athena.npy new file mode 100644 index 0000000000000000000000000000000000000000..86e2bda3632ee5853d5dca2c9a78e6e0c05ba02b GIT binary patch literal 216 zcmbR27wQ`j$;eQ~P_3SlTAW;@Zl$1ZlV+l>qoAIaUsO_*m=~X4l#&V(cT3DEP6dh= zXCxM+0{I$7Mmm~03bhL41FnKCjDB0?nEY0+WA>lH_1n*Ir?B6Rd^g{{rOW)z?PT*e z1iI{ea4IK$1C-QUeWTGqt>GPl3q#ir{%M_>K-0|2(g BK92wZ literal 0 HcmV?d00001 diff --git a/workloads/chbenchmark/pred-run_time_s-athena-aurora-redshift.npy b/workloads/chbenchmark/pred-run_time_s-athena-aurora-redshift.npy new file mode 100644 index 0000000000000000000000000000000000000000..be06e870c346f46313950f8df84b30449bcbc680 GIT binary patch literal 656 zcmbR27wQ`j$;eQ~P_3SlTAW;@Zl$1ZlV+i=qoAIaUsO_*m=~X4l#&V(cT3DEP6dh= zXCxM+0{I$7Mmh?{nmP)#3giMV1_p)$*Goklfb;`K2?r3(z~C?+L@Ts?b_UT2XU=7U zXqY%dX_^Lze;_sjtPdvsV7;g&NPL0lt~3x06F+b?0Bp{J2MbZmS@2xQ9;EJp;c*o8 z3!VzegTxn`J3=)4(tAye&T)Jz5kb X-Vm|?>`J3=)4(tAye&T)Jz5kb X-Vm|?> Date: Mon, 6 May 2024 20:11:57 -0400 Subject: [PATCH 20/30] Check in updated predictions Co-authored-by: Ziniu Wu --- .../pred-run_time_s-athena-aurora-redshift.npy | Bin 656 -> 656 bytes .../run_time_s-athena-aurora-redshift.npy | Bin 656 -> 656 bytes 2 files changed, 0 insertions(+), 0 deletions(-) diff --git a/workloads/chbenchmark/pred-run_time_s-athena-aurora-redshift.npy b/workloads/chbenchmark/pred-run_time_s-athena-aurora-redshift.npy index be06e870c346f46313950f8df84b30449bcbc680..c13e41f82a9451888a907a59066e5d28ad55e383 100644 GIT binary patch literal 656 zcmbR27wQ`j$;eQ~P_3SlTAW;@Zl$1ZlV+i=qoAIaUsO_*m=~X4l#&V(cT3DEP6dh= zXCxM+0{I$7Mmh?{nmP)#3giMV1_p)$*GoklfHVVx!+a3^fKkE$L@Ts?b_UTf@q{zy zGQs@PGz~BfRevBh0<8bR22o99@dnXdX(0ZAqX8)9EO@XGY|et`LiWh&9~d47t7mvB zB#$i4P{AMo=1*cru{YtY2iTm3Z(oH_)H5)Aux9{*ZC`|u#UIovf&GO`{Q>Ed3drgi z>W#%g`~;(N6mt}Il^cM>7bN6MfM|I5eA&wi;y*|{gW?_sDPALxc*2V~6!$AUmd*i* zGfdY<2?qx~NH{1wO;QER!{Q~W8SEa0%mcE>;tCvT;P5|C@l*v_{6S+CI6N0@j}}E1 WZwOfc_RoUV{}hqMA8dIl>;M1?hKH#D literal 656 zcmbR27wQ`j$;eQ~P_3SlTAW;@Zl$1ZlV+i=qoAIaUsO_*m=~X4l#&V(cT3DEP6dh= zXCxM+0{I$7Mmh?{nmP)#3giMV1_p)$*Goklfb;`K2?r3(z~C?+L@Ts?b_UT2XU=7U zXqY%dX_^Lze;_sjtPdvsV7;g&NPL0lt~3x06F+b?0Bp{J2MbZmS@2xQ9;EJp;c*o8 z3!VzegTxn`J3=)4(tAye&T)Jz5kb X-Vm|?>yRK3Ht$pK)#ri~%8c*AMobP#{R$E*ls@dxb_3P5~?O|mM;;tq*796|gC zRaa2l^FVvYYkQ#hfwF%n;s?ra+k@0A=*J3xXqY<_w2yrUsb^So2SvTZ#eZObHN3uv zqJF^x7cmE*@&iR0svsI>jslD2Kd?Cr3=n@q{R>n8<^#= y^$I7F!Qu`J3=)4(tAye&T)Jz5kb X-Vm|?> Date: Mon, 6 May 2024 23:34:20 -0400 Subject: [PATCH 21/30] Check in hardware/load model constants for CH-BenCHmark --- src/brad/config/planner.py | 14 ++---- src/brad/planner/constants.yml | 43 ++++++++++++++----- .../scoring/performance/unified_aurora.py | 4 +- .../scoring/performance/unified_redshift.py | 4 +- 4 files changed, 41 insertions(+), 24 deletions(-) diff --git a/src/brad/config/planner.py b/src/brad/config/planner.py index 0017b95a..7771f2c8 100644 --- a/src/brad/config/planner.py +++ b/src/brad/config/planner.py @@ -255,15 +255,12 @@ def aurora_scaling_coefs(self) -> npt.NDArray: def aurora_txn_coefs(self, schema_name: str) -> Dict[str, float]: return self._raw["aurora_txns"][schema_name] - def aurora_new_scaling_coefs(self) -> npt.NDArray: + def aurora_new_scaling_coefs(self, schema_name: str) -> npt.NDArray: if self._aurora_new_scaling_coefs is None: - coefs = self._raw["aurora_scaling_new"] + coefs = self._raw["aurora_scaling_new"][schema_name] self._aurora_new_scaling_coefs = np.array([coefs["coef1"], coefs["coef2"]]) return self._aurora_new_scaling_coefs - def aurora_new_scaling_alpha(self) -> float: - return self._raw["aurora_scaling_new"]["alpha"] - ### ### Unified Redshift scaling ### @@ -275,17 +272,14 @@ def redshift_scaling_coefs(self) -> npt.NDArray: ) return self._redshift_scaling_coefs - def redshift_new_scaling_coefs(self) -> npt.NDArray: + def redshift_new_scaling_coefs(self, schema_name: str) -> npt.NDArray: if self._redshift_new_scaling_coefs is None: - coefs = self._raw["redshift_scaling_new"] + coefs = self._raw["redshift_scaling_new"][schema_name] self._redshift_new_scaling_coefs = np.array( [coefs["coef1"], coefs["coef2"]] ) return self._redshift_new_scaling_coefs - def redshift_new_scaling_alpha(self) -> float: - return self._raw["redshift_scaling_new"]["alpha"] - def use_io_optimized_aurora(self) -> bool: if "use_io_optimized_aurora" not in self._raw: # By default. diff --git a/src/brad/planner/constants.yml b/src/brad/planner/constants.yml index 87eb9e24..7b65852b 100644 --- a/src/brad/planner/constants.yml +++ b/src/brad/planner/constants.yml @@ -202,20 +202,43 @@ table_extract_bytes_per_row: ### aurora_scaling_new: - # Wait time (from queuing theory) - # alpha * avg_query_time * (u / (1 - u)) + base - alpha: 0.0464553 + imdb_extended_100g: + # Wait time (from queuing theory) + # alpha * avg_query_time * (u / (1 - u)) + base + alpha: 0.0464553 + + # Resources + # [coef1 (s/d) + coef2] * base + coef1: 0.75851053 + coef2: 0.5486482 - # Resources - # [coef1 (s/d) + coef2] * base - coef1: 0.75851053 - coef2: 0.5486482 + imdb_specialized_100g: + alpha: 0.0464553 + coef1: 0.75851053 + coef2: 0.5486482 + + chbenchmark: + # Queries cannot complete in time on Aurora. + alpha: 1.0 + coef1: 0.0 + coef2: 1.0 redshift_scaling_new: # Same model as above. - alpha: 0.730064 - coef1: 0.89125617 - coef2: 0.1139099 + imdb_extended_100g: + alpha: 0.730064 + coef1: 0.89125617 + coef2: 0.1139099 + + imdb_specialized_100g: + alpha: 0.730064 + coef1: 0.89125617 + coef2: 0.1139099 + + chbenchmark: + alpha: 1.0 # Now unused + coef1: 0.16853629 + coef2: 0.61977525 run_time_to_denorm_cpu: aurora: diff --git a/src/brad/planner/scoring/performance/unified_aurora.py b/src/brad/planner/scoring/performance/unified_aurora.py index eedb59ba..474aa033 100644 --- a/src/brad/planner/scoring/performance/unified_aurora.py +++ b/src/brad/planner/scoring/performance/unified_aurora.py @@ -376,7 +376,7 @@ def predict_query_latency_resources_batch( rf = np.array(resource_factors) basis = np.stack([rf, np.ones_like(rf)]) basis = np.transpose(basis) - coefs = ctx.planner_config.aurora_new_scaling_coefs() + coefs = ctx.planner_config.aurora_new_scaling_coefs(ctx.schema_name) coefs = np.multiply(coefs, basis) num_coefs = coefs.shape[1] @@ -467,7 +467,7 @@ def predict_base_latency( return np.ones_like(latency) * np.inf # Ideally we should adjust for load as well. resource_factor = _AURORA_BASE_RESOURCE_VALUE / aurora_num_cpus(prov) - coefs = ctx.planner_config.aurora_new_scaling_coefs() + coefs = ctx.planner_config.aurora_new_scaling_coefs(ctx.schema_name) coefs[0] *= resource_factor return latency / coefs.sum() diff --git a/src/brad/planner/scoring/performance/unified_redshift.py b/src/brad/planner/scoring/performance/unified_redshift.py index e509cc9d..2965b172 100644 --- a/src/brad/planner/scoring/performance/unified_redshift.py +++ b/src/brad/planner/scoring/performance/unified_redshift.py @@ -365,7 +365,7 @@ def predict_query_latency_resources_batch( rf = np.array(resource_factors) basis = np.stack([rf, np.ones_like(rf)]) basis = np.transpose(basis) - coefs = ctx.planner_config.redshift_new_scaling_coefs() + coefs = ctx.planner_config.redshift_new_scaling_coefs(ctx.schema_name) coefs = np.multiply(coefs, basis) num_coefs = coefs.shape[1] @@ -415,7 +415,7 @@ def predict_base_latency( resource_factor = _REDSHIFT_BASE_RESOURCE_VALUE / ( redshift_num_cpus(prov) * prov.num_nodes() ) - coefs = ctx.planner_config.redshift_new_scaling_coefs() + coefs = ctx.planner_config.redshift_new_scaling_coefs(ctx.schema_name) coefs[0] *= resource_factor return latency / coefs.sum() From ccd45c438ae61c15d7bbe5e5c641d07300ef2a13 Mon Sep 17 00:00:00 2001 From: Geoffrey Yu Date: Thu, 9 May 2024 23:43:14 -0400 Subject: [PATCH 22/30] Add experiment configs for CH-BenCHmark scenario (#509) Part of #487. --- experiments/17-chbenchmark/common.sh | 58 +++++- experiments/17-chbenchmark/debug/COND | 25 +++ .../17-chbenchmark/debug/debug_config.yml | 12 +- experiments/17-chbenchmark/debug/run_full.sh | 25 +++ .../debug/set_up_starting_blueprint.sh | 20 +++ experiments/17-chbenchmark/scale_down/COND | 24 +++ .../17-chbenchmark/scale_down/brad.config | 6 + .../scale_down/ch_scale_down_config.yml | 167 ++++++++++++++++++ .../17-chbenchmark/scale_down/run_full.sh | 25 +++ .../scale_down/set_up_starting_blueprint.sh | 21 +++ src/brad/config/file.py | 6 + src/brad/daemon/daemon.py | 6 +- src/brad/front_end/front_end.py | 34 +++- src/brad/front_end/session.py | 5 +- .../set_up_starting_blueprint.py | 18 +- workloads/chbenchmark/queries.sql | 22 +++ 16 files changed, 457 insertions(+), 17 deletions(-) create mode 100755 experiments/17-chbenchmark/debug/run_full.sh create mode 100755 experiments/17-chbenchmark/debug/set_up_starting_blueprint.sh create mode 100644 experiments/17-chbenchmark/scale_down/COND create mode 100644 experiments/17-chbenchmark/scale_down/brad.config create mode 100644 experiments/17-chbenchmark/scale_down/ch_scale_down_config.yml create mode 100755 experiments/17-chbenchmark/scale_down/run_full.sh create mode 100755 experiments/17-chbenchmark/scale_down/set_up_starting_blueprint.sh create mode 100644 workloads/chbenchmark/queries.sql diff --git a/experiments/17-chbenchmark/common.sh b/experiments/17-chbenchmark/common.sh index 2db49e0e..95ee520c 100644 --- a/experiments/17-chbenchmark/common.sh +++ b/experiments/17-chbenchmark/common.sh @@ -13,6 +13,7 @@ function start_brad() { } function run_tpcc() { + local results_name=$1 pushd ../../../workloads/chbenchmark/py-tpcc/ local args=( --no-load @@ -25,11 +26,66 @@ function run_tpcc() { if [[ ! -z $txn_zipfian_alpha ]]; then args+=(--zipfian-alpha $txn_zipfian_alpha) fi - RECORD_DETAILED_STATS=1 python3 -m pytpcc.tpcc brad "${args[@]}" & + mkdir -p $COND_OUT/$results_name + RECORD_DETAILED_STATS=1 COND_OUT=$COND_OUT/$results_name python3 -m pytpcc.tpcc brad "${args[@]}" & tpcc_pid=$! popd } +function log_workload_point() { + msg=$1 + now=$(date --utc "+%Y-%m-%d %H:%M:%S") + echo "$now,$msg" >> $COND_OUT/points.log +} + +function start_repeating_olap_runner() { + local ra_clients=$1 + local ra_gap_s=$2 + local ra_gap_std_s=$3 + local query_indexes=$4 + local results_name=$5 + local client_offset=$6 + + local args=( + --num-clients $ra_clients + --num-front-ends $num_front_ends + --query-indexes $query_indexes + --query-bank-file $ra_query_bank_file + --avg-gap-s $ra_gap_s + --avg-gap-std-s $ra_gap_std_s + ) + + if [[ ! -z $ra_query_frequency_path ]]; then + args+=(--query-frequency-path $ra_query_frequency_path) + fi + + if [[ ! -z $client_offset ]]; then + args+=(--client-offset $client_offset) + fi + + >&2 echo "[Serial Repeating Analytics] Running with $ra_clients..." + results_dir=$COND_OUT/$results_name + mkdir -p $results_dir + + log_workload_point $results_name + COND_OUT=$results_dir python3.11 ../../../workloads/IMDB_extended/run_repeating_analytics_serial.py "${args[@]}" & + + # This is a special return value variable that we use. + runner_pid=$! +} + +function graceful_shutdown() { + for pid_var in "$@"; do + kill -INT $pid_var + done + for pid_var in "$@"; do + wait $pid_var + done + + kill -INT $brad_pid + wait $brad_pid +} + function extract_named_arguments() { # Evaluates any environment variables in this script's arguments. This script # should only be run on trusted input. diff --git a/experiments/17-chbenchmark/debug/COND b/experiments/17-chbenchmark/debug/COND index 7feaa352..7f403f67 100644 --- a/experiments/17-chbenchmark/debug/COND +++ b/experiments/17-chbenchmark/debug/COND @@ -81,3 +81,28 @@ run_experiment( "txn-zipfian-alpha": ZIPFIAN_ALPHA, }, ) + +# Query indices. +QUERIES = list(range(22)) +QUERIES.remove(4) +QUERIES.remove(13) +QUERIES_STR = ",".join([str(v) for v in QUERIES]) + +run_experiment( + name="run_full", + run="./run_full.sh", + options={ + "physical-config-file": "../../../config/physical_config_chbench.yml", + "system-config-file": "debug_config.yml", # Relative to one level up. + "schema-name": "chbenchmark", + "txn-config-file": "brad.config", + "txn-warehouses": 1740, + "txn-scale-factor": 1, # TBD + "t-clients": 1, # TBD + "num-front-ends": 2, # TBD + "run-for-s": 60 * 60, # One hour + "txn-zipfian-alpha": ZIPFIAN_ALPHA, + "ra-query-indexes": QUERIES_STR, + "ra-query-bank-file": "../../../workloads/chbenchmark/queries.sql", + }, +) diff --git a/experiments/17-chbenchmark/debug/debug_config.yml b/experiments/17-chbenchmark/debug/debug_config.yml index c279878d..3b8a6015 100644 --- a/experiments/17-chbenchmark/debug/debug_config.yml +++ b/experiments/17-chbenchmark/debug/debug_config.yml @@ -6,7 +6,7 @@ # listen on successive ports (e.g., 6584, 6585, etc.). front_end_interface: "0.0.0.0" front_end_port: 6583 -num_front_ends: 1 +num_front_ends: 2 # If installed and enabled, BRAD will serve its UI from a webserver that listens # for connections on this network interface and port. @@ -42,7 +42,7 @@ front_end_query_latency_buffer_size: 100 # `default` means to use the policy encoded in the blueprint. Other values will # override the blueprint. -routing_policy: always_aurora +routing_policy: default # Whether to disable table movement for benchmark purposes (i.e., keep all # tables on all engines.) @@ -104,6 +104,8 @@ txn_latency_p90_ceiling_s: 0.030 # clusters instead of resizing the main Redshift cluster. use_preset_redshift_clusters: false +result_row_limit: 10 + # Used for ordering blueprints during planning. comparator: type: benefit_perf_ceiling # or `perf_ceiling` @@ -119,10 +121,8 @@ comparator: # Used for precomputed predictions. std_datasets: - - name: regular - path: workloads/IMDB_100GB/regular_test/ - - name: adhoc - path: workloads/IMDB_100GB/adhoc_test/ + - name: chbenchmark + path: workloads/chbenchmark/ # Blueprint planning trigger configs. diff --git a/experiments/17-chbenchmark/debug/run_full.sh b/experiments/17-chbenchmark/debug/run_full.sh new file mode 100755 index 00000000..f06a0504 --- /dev/null +++ b/experiments/17-chbenchmark/debug/run_full.sh @@ -0,0 +1,25 @@ +#! /bin/bash + +script_loc=$(cd $(dirname $0) && pwd -P) +cd $script_loc +source ../common.sh +extract_named_arguments $@ + +# Resolve paths into absolute paths +abs_txn_config_file=$(realpath $txn_config_file) +abs_system_config_file=$(realpath $system_config_file) +abs_physical_config_file=$(realpath $physical_config_file) + +export BRAD_IGNORE_BLUEPRINT=1 +start_brad $abs_system_config_file $abs_physical_config_file + +sleep 30 + +run_tpcc "t_1" +start_repeating_olap_runner 1 10 5 $ra_query_indexes "ch_1" $t_clients +ra_pid=$runner_pid + +sleep $run_for_s + +# Shut down. +graceful_shutdown $tpcc_pid $ra_pid diff --git a/experiments/17-chbenchmark/debug/set_up_starting_blueprint.sh b/experiments/17-chbenchmark/debug/set_up_starting_blueprint.sh new file mode 100755 index 00000000..2e7c9986 --- /dev/null +++ b/experiments/17-chbenchmark/debug/set_up_starting_blueprint.sh @@ -0,0 +1,20 @@ +#! /bin/bash + +if [ -z $1 ]; then + >&2 echo "Usage: $0 path/to/physical/config.yml" + exit 1 +fi + +script_loc=$(cd $(dirname $0) && pwd -P) +cd $script_loc +source ../common.sh + +python3 ../../../workloads/IMDB_extended/set_up_starting_blueprint.py \ + --schema-name chbenchmark \ + --query-bank-file ../../../workloads/chbenchmark/queries.sql \ + --redshift-queries "0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21" \ + --redshift-provisioning "dc2.large:16" \ + --aurora-provisioning "db.r6g.xlarge:1" \ + --system-config-file debug_config.yml \ + --physical-config-file $1 \ + --override-definite-routing redshift diff --git a/experiments/17-chbenchmark/scale_down/COND b/experiments/17-chbenchmark/scale_down/COND new file mode 100644 index 00000000..f62230bc --- /dev/null +++ b/experiments/17-chbenchmark/scale_down/COND @@ -0,0 +1,24 @@ +ZIPFIAN_ALPHA = 5.0 + +# Query indices. +QUERIES = list(range(22)) +QUERIES_STR = ",".join([str(v) for v in QUERIES]) + +run_experiment( + name="run_full", + run="./run_full.sh", + options={ + "physical-config-file": "../../../config/physical_config_chbench.yml", + "system-config-file": "ch_scale_down_config.yml", # Relative to one level up. + "schema-name": "chbenchmark", + "txn-config-file": "brad.config", + "txn-warehouses": 1740, + "txn-scale-factor": 1, # TBD + "t-clients": 1, # TBD + "num-front-ends": 2, # TBD + "run-for-s": 2 * 60 * 60, # 2 hours + "txn-zipfian-alpha": ZIPFIAN_ALPHA, + "ra-query-indexes": QUERIES_STR, + "ra-query-bank-file": "../../../workloads/chbenchmark/queries.sql", + }, +) diff --git a/experiments/17-chbenchmark/scale_down/brad.config b/experiments/17-chbenchmark/scale_down/brad.config new file mode 100644 index 00000000..c71fe1e5 --- /dev/null +++ b/experiments/17-chbenchmark/scale_down/brad.config @@ -0,0 +1,6 @@ +# BradDriver Configuration File +[brad] +host = localhost +port = 6583 +isolation_level = REPEATABLE READ +use_worker_offset = true diff --git a/experiments/17-chbenchmark/scale_down/ch_scale_down_config.yml b/experiments/17-chbenchmark/scale_down/ch_scale_down_config.yml new file mode 100644 index 00000000..3e40530d --- /dev/null +++ b/experiments/17-chbenchmark/scale_down/ch_scale_down_config.yml @@ -0,0 +1,167 @@ +# This file contains configurations that are used by BRAD. These are default +# values and should be customized for specific situations. + +# BRAD's front end servers will listen for client connections on this interface +# and port. If `num_front_ends` is greater than one, subsequent front ends will +# listen on successive ports (e.g., 6584, 6585, etc.). +front_end_interface: "0.0.0.0" +front_end_port: 6583 +num_front_ends: 2 + +# If installed and enabled, BRAD will serve its UI from a webserver that listens +# for connections on this network interface and port. +ui_interface: "0.0.0.0" +ui_port: 7583 + +# Logging paths. If the value is in ALL_CAPS (with underscores), it is +# interpreted as an environment variable (BRAD will log to the path stored in +# the environment variable). + +# Where BRAD's daemon process will write its logs. +daemon_log_file: COND_OUT + +# Where BRAD's front end processes will write their logs. +front_end_log_path: COND_OUT + +# Where BRAD's blueprint planner will write debug logs. +planner_log_path: COND_OUT + +# Where BRAD's metrics loggers will write their logs. +metrics_log_path: COND_OUT + +# Probability that each transactional query will be logged. +txn_log_prob: 0.10 + +# Set to a non-zero value enable automatic data syncing. When this is set to 0, +# automatic syncing is disabled. +data_sync_period_seconds: 0 + +# BRAD's front end servers will report their metrics at regular intervals. +front_end_metrics_reporting_period_seconds: 30 +front_end_query_latency_buffer_size: 100 + +# `default` means to use the policy encoded in the blueprint. Other values will +# override the blueprint. +routing_policy: default + +# Whether to disable table movement for benchmark purposes (i.e., keep all +# tables on all engines.) +disable_table_movement: true + +# Epoch length for metrics and forecasting. This is the granularity at which +# metrics/forecasting will be performed. +epoch_length: + weeks: 0 + days: 0 + hours: 0 + minutes: 1 + +# Blueprint planning strategy. +strategy: fp_query_based_beam + +# Used to specify the period of time over which to use data for planning. +# Currrently, this is a "look behind" window for the workload. +planning_window: + weeks: 0 + days: 0 + hours: 1 + minutes: 0 + +# Used to aggregate metrics collected in the planning window. +metrics_agg: + method: ewm # 'mean' is another option + alpha: 0.86466472 # 1 - 1 / e^2 + +# Used during planning. +reinterpret_second_as: 1 + +# The query distribution must change by at least this much for a new blueprint +# to be accepted. +query_dist_change_frac: 0.1 + +# The search bound for the provisioning. +max_provisioning_multiplier: 2.5 + +# Flag options for blueprint planning. +use_io_optimized_aurora: true +use_recorded_routing_if_available: true +ensure_tables_together_on_one_engine: true + +# Loads used to prime the system when no information is available. +aurora_initialize_load_fraction: 0.25 +redshift_initialize_load_fraction: 0.25 + +# BRAD will not reduce predicted load lower than these values. Raise these +# values to be more conservative against mispredictions. +aurora_min_load_removal_fraction: 0.8 +redshift_min_load_removal_fraction: 0.8 + +# Blueprint planning performance ceilings. +query_latency_p90_ceiling_s: 360.0 +txn_latency_p90_ceiling_s: 0.080 + +# If set to true, BRAD will attempt to use the specified preset Redshift +# clusters instead of resizing the main Redshift cluster. +use_preset_redshift_clusters: false + +result_row_limit: 10 + +# Used for ordering blueprints during planning. +comparator: + type: benefit_perf_ceiling # or `perf_ceiling` + + benefit_horizon: # Only used by the `benefit_perf_ceiling` comparator + weeks: 0 + days: 0 + hours: 24 + minutes: 0 + + penalty_threshold: 0.8 # Only used by the `benefit_perf_ceiling` comparator + penalty_power: 8 # Only used by the `benefit_perf_ceiling` comparator + +# Used for precomputed predictions. +std_datasets: + - name: chbenchmark + path: workloads/chbenchmark/ + +# Blueprint planning trigger configs. + +triggers: + enabled: false + check_period_s: 90 # Triggers are checked every X seconds. + check_period_offset_s: 360 # Wait 6 mins before starting. + + # Triggers will not fire for at least this many minutes after a new blueprint + # takes effect. Usually this should be greater than zero to give BRAD + # sufficient time to observe the effect of the blueprint on the workload. BRAD + # may wait longer to ensure metrics are also available for this many minutes. + observe_new_blueprint_mins: 5 + + elapsed_time: + disabled: true + multiplier: 60 # Multiplier over `planning_window`. + + redshift_cpu: + lo: 15 + hi: 85 + sustained_epochs: 3 + + aurora_cpu: + lo: 15 + hi: 85 + sustained_epochs: 3 + + variable_costs: + disabled: true + threshold: 1.0 + + query_latency_ceiling: + ceiling_s: 360.0 + sustained_epochs: 3 + + txn_latency_ceiling: + ceiling_s: 0.080 + sustained_epochs: 3 + + recent_change: + delay_epochs: 5 diff --git a/experiments/17-chbenchmark/scale_down/run_full.sh b/experiments/17-chbenchmark/scale_down/run_full.sh new file mode 100755 index 00000000..f06a0504 --- /dev/null +++ b/experiments/17-chbenchmark/scale_down/run_full.sh @@ -0,0 +1,25 @@ +#! /bin/bash + +script_loc=$(cd $(dirname $0) && pwd -P) +cd $script_loc +source ../common.sh +extract_named_arguments $@ + +# Resolve paths into absolute paths +abs_txn_config_file=$(realpath $txn_config_file) +abs_system_config_file=$(realpath $system_config_file) +abs_physical_config_file=$(realpath $physical_config_file) + +export BRAD_IGNORE_BLUEPRINT=1 +start_brad $abs_system_config_file $abs_physical_config_file + +sleep 30 + +run_tpcc "t_1" +start_repeating_olap_runner 1 10 5 $ra_query_indexes "ch_1" $t_clients +ra_pid=$runner_pid + +sleep $run_for_s + +# Shut down. +graceful_shutdown $tpcc_pid $ra_pid diff --git a/experiments/17-chbenchmark/scale_down/set_up_starting_blueprint.sh b/experiments/17-chbenchmark/scale_down/set_up_starting_blueprint.sh new file mode 100755 index 00000000..1735545e --- /dev/null +++ b/experiments/17-chbenchmark/scale_down/set_up_starting_blueprint.sh @@ -0,0 +1,21 @@ +#! /bin/bash + +if [ -z $1 ]; then + >&2 echo "Usage: $0 path/to/physical/config.yml" + exit 1 +fi + +script_loc=$(cd $(dirname $0) && pwd -P) +cd $script_loc +source ../common.sh + +python3 ../../../workloads/IMDB_extended/set_up_starting_blueprint.py \ + --schema-name chbenchmark \ + --query-bank-file ../../../workloads/chbenchmark/queries.sql \ + --redshift-queries "0,1,2,3,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21" \ + --athena-queries "4" \ + --redshift-provisioning "dc2.large:16" \ + --aurora-provisioning "db.r6g.2xlarge:1" \ + --system-config-file ch_scale_down_config.yml \ + --physical-config-file $1 \ + --override-definite-routing redshift diff --git a/src/brad/config/file.py b/src/brad/config/file.py index c14facc2..fe781c23 100644 --- a/src/brad/config/file.py +++ b/src/brad/config/file.py @@ -288,6 +288,12 @@ def ui_port(self) -> int: else: return 7583 + def result_row_limit(self) -> Optional[int]: + try: + return self._raw["result_row_limit"] + except KeyError: + return None + def _extract_log_path(self, config_key: str) -> Optional[pathlib.Path]: if config_key not in self._raw: return None diff --git a/src/brad/daemon/daemon.py b/src/brad/daemon/daemon.py index f634eb05..56045c7b 100644 --- a/src/brad/daemon/daemon.py +++ b/src/brad/daemon/daemon.py @@ -66,6 +66,7 @@ from brad.planner.workload.builder import WorkloadBuilder from brad.planner.workload.provider import LoggedWorkloadProvider from brad.routing.policy import RoutingPolicy +from brad.routing.tree_based.forest_policy import ForestPolicy from brad.row_list import RowList from brad.utils.time_periods import period_start, universal_now from brad.ui.manager import UiManager @@ -328,7 +329,10 @@ async def _run_setup(self) -> None: or self._config.routing_policy == RoutingPolicy.Default ): logger.info("Setting up the cardinality estimator...") - if is_stub_mode: + blueprint = self._blueprint_mgr.get_blueprint() + policy = blueprint.get_routing_policy() + requires_estimator = isinstance(policy.definite_policy, ForestPolicy) + if is_stub_mode or not requires_estimator: estimator: Estimator = StubEstimator() else: estimator = await PostgresEstimator.connect( diff --git a/src/brad/front_end/front_end.py b/src/brad/front_end/front_end.py index f7f871fe..560ba211 100644 --- a/src/brad/front_end/front_end.py +++ b/src/brad/front_end/front_end.py @@ -453,8 +453,20 @@ async def _run_query_impl( else: connection = session.engines.get_reader_connection(engine_to_use) cursor = connection.cursor_sync() + # HACK: To work around dialect differences between + # Athena/Aurora/Redshift for now. This should be replaced by + # a more robust translation layer. + if ( + engine_to_use == Engine.Athena + and "ascii" in query_rep.raw_query + ): + translated_query = query_rep.raw_query.replace( + "ascii", "codepoint" + ) + else: + translated_query = query_rep.raw_query start = universal_now() - await cursor.execute(query_rep.raw_query) + await cursor.execute(translated_query) end = universal_now() except ( pyodbc.ProgrammingError, @@ -513,9 +525,23 @@ async def _run_query_impl( # Extract and return the results, if any. try: - # Using `fetchall_sync()` is lower overhead than the async interface. - results = [tuple(row) for row in cursor.fetchall_sync()] - log_verbose(logger, "Responded with %d rows.", len(results)) + result_row_limit = self._config.result_row_limit() + if result_row_limit is not None: + results = [] + for _ in range(result_row_limit): + row = cursor.fetchone_sync() + if row is None: + break + results.append(tuple(row)) + log_verbose( + logger, + "Responded with %d rows (limited to %d rows).", + len(results), + ) + else: + # Using `fetchall_sync()` is lower overhead than the async interface. + results = [tuple(row) for row in cursor.fetchall_sync()] + log_verbose(logger, "Responded with %d rows.", len(results)) return ( results, (cursor.result_schema(results) if retrieve_schema else None), diff --git a/src/brad/front_end/session.py b/src/brad/front_end/session.py index 09ae5311..416e2515 100644 --- a/src/brad/front_end/session.py +++ b/src/brad/front_end/session.py @@ -11,6 +11,7 @@ from brad.front_end.engine_connections import EngineConnections from brad.planner.estimator import Estimator from brad.routing.policy import RoutingPolicy +from brad.routing.tree_based.forest_policy import ForestPolicy from brad.data_stats.postgres_estimator import PostgresEstimator from brad.data_stats.stub_estimator import StubEstimator from brad.utils.time_periods import universal_now @@ -117,7 +118,9 @@ async def create_new_session(self) -> Tuple[SessionId, Session]: routing_policy_override == RoutingPolicy.ForestTableSelectivity or routing_policy_override == RoutingPolicy.Default ): - if self._config.stub_mode_path() is None: + policy = blueprint.get_routing_policy() + requires_estimator = isinstance(policy.definite_policy, ForestPolicy) + if self._config.stub_mode_path() is None and requires_estimator: estimator: Optional[Estimator] = await PostgresEstimator.connect( self._schema_name, self._config ) diff --git a/workloads/IMDB_extended/set_up_starting_blueprint.py b/workloads/IMDB_extended/set_up_starting_blueprint.py index be5bf2c1..62589379 100644 --- a/workloads/IMDB_extended/set_up_starting_blueprint.py +++ b/workloads/IMDB_extended/set_up_starting_blueprint.py @@ -15,6 +15,7 @@ from brad.routing.cached import CachedLocationPolicy from brad.routing.policy import RoutingPolicy from brad.routing.tree_based.forest_policy import ForestPolicy +from brad.routing.always_one import AlwaysOneRouter from brad.utils import set_up_logging logger = logging.getLogger(__name__) @@ -83,6 +84,11 @@ def main(): parser.add_argument( "--aurora-provisioning", type=str, help="Format: :" ) + parser.add_argument( + "--override-definite-routing", + type=str, + help="An engine to always route queries for if the indefinite policy does not capture it.", + ) args = parser.parse_args() set_up_logging(debug_mode=True) @@ -124,11 +130,15 @@ def main(): # 5. Replace the policy. enum_blueprint = EnumeratedBlueprint(blueprint) - definite_policy = asyncio.run( - ForestPolicy.from_assets( - args.schema_name, RoutingPolicy.ForestTableCardinality, assets + if args.override_definite_routing is not None: + routing_engine = Engine.from_str(args.override_definite_routing) + definite_policy = AlwaysOneRouter(routing_engine) + else: + definite_policy = asyncio.run( + ForestPolicy.from_assets( + args.schema_name, RoutingPolicy.ForestTableCardinality, assets + ) ) - ) replaced_policy = FullRoutingPolicy( indefinite_policies=[clp], definite_policy=definite_policy ) diff --git a/workloads/chbenchmark/queries.sql b/workloads/chbenchmark/queries.sql new file mode 100644 index 00000000..6ced3e67 --- /dev/null +++ b/workloads/chbenchmark/queries.sql @@ -0,0 +1,22 @@ +select ol_number, sum(ol_quantity) as sum_qty, sum(ol_amount) as sum_amount, avg(ol_quantity) as avg_qty, avg(ol_amount) as avg_amount, count(*) as count_order from order_line group by ol_number order by ol_number; +select su_suppkey, su_name, n_name, i_id, i_name, su_address, su_phone, su_comment from item, supplier, stock, nation, region, (select s_i_id as m_i_id, min(s_quantity) as m_s_quantity from stock, supplier, nation, region where mod((s_w_id*s_i_id),10000)=su_suppkey and su_nationkey=n_nationkey and n_regionkey=r_regionkey and r_name like 'Europ%' group by s_i_id) m where i_id = s_i_id and mod((s_w_id * s_i_id), 10000) = su_suppkey and su_nationkey = n_nationkey and n_regionkey = r_regionkey and i_data like '%b' and r_name like 'Europ%' and i_id=m_i_id and s_quantity = m_s_quantity order by n_name, su_name, i_id; +select ol_o_id, ol_w_id, ol_d_id, sum(ol_amount) as revenue, o_entry_d from customer, new_order, orders, order_line where c_state like 'A%' and c_id = o_c_id and c_w_id = o_w_id and c_d_id = o_d_id and no_w_id = o_w_id and no_d_id = o_d_id and no_o_id = o_id and ol_w_id = o_w_id and ol_d_id = o_d_id and ol_o_id = o_id group by ol_o_id, ol_w_id, ol_d_id, o_entry_d order by revenue desc, o_entry_d; +select o_ol_cnt, count(*) as order_count from orders where exists (select * from order_line where o_id = ol_o_id and o_w_id = ol_w_id and o_d_id = ol_d_id and ol_delivery_d >= o_entry_d) group by o_ol_cnt order by o_ol_cnt; +select n_name, sum(ol_amount) as revenue from customer, orders, order_line, stock, supplier, nation, region where c_id = o_c_id and c_w_id = o_w_id and c_d_id = o_d_id and ol_o_id = o_id and ol_w_id = o_w_id and ol_d_id=o_d_id and ol_w_id = s_w_id and ol_i_id = s_i_id and mod((s_w_id * s_i_id),10000) = su_suppkey and ascii(cast(substring(c_state,1,1) as varchar(1))) = su_nationkey and su_nationkey = n_nationkey and n_regionkey = r_regionkey and r_name = 'Europe' group by n_name order by revenue desc; +select sum(ol_amount) as revenue from order_line where ol_quantity between 1 and 100000; +WITH inner_query AS (select su_nationkey as supp_nation, substring(c_state,1,1) as cust_nation, extract(year from o_entry_d) as l_year, ol_amount as revenue from supplier, stock, order_line, orders, customer, nation n1, nation n2 where ol_supply_w_id = s_w_id and ol_i_id = s_i_id and mod((s_w_id * s_i_id), 10000) = su_suppkey and ol_w_id = o_w_id and ol_d_id = o_d_id and ol_o_id = o_id and c_id = o_c_id and c_w_id = o_w_id and c_d_id = o_d_id and su_nationkey = n1.n_nationkey and ascii(substring(c_state,1,1)) = n2.n_nationkey and ((n1.n_name = 'Germany' and n2.n_name = 'Cambodia') or (n1.n_name = 'Cambodia' and n2.n_name = 'Germany'))) SELECT supp_nation, cust_nation, l_year, sum(revenue) as revenue FROM inner_query group by supp_nation, cust_nation, l_year order by supp_nation, cust_nation, l_year; +select extract(year from o_entry_d) as l_year, sum(case when n2.n_name = 'Germany' then ol_amount else 0 end) / sum(ol_amount) as mkt_share from item, supplier, stock, order_line, orders, customer, nation n1, nation n2, region where i_id = s_i_id and ol_i_id = s_i_id and ol_supply_w_id = s_w_id and mod((s_w_id * s_i_id),10000) = su_suppkey and ol_w_id = o_w_id and ol_d_id = o_d_id and ol_o_id = o_id and c_id = o_c_id and c_w_id = o_w_id and c_d_id = o_d_id and n1.n_nationkey = ascii(cast(substring(c_state,1,1) as varchar(1))) and n1.n_regionkey = r_regionkey and ol_i_id < 1000 and r_name = 'Europe' and su_nationkey = n2.n_nationkey and i_data like '%b' and i_id = ol_i_id group by extract(year from o_entry_d) order by l_year; +select n_name, extract(year from o_entry_d) as l_year, sum(ol_amount) as sum_profit from item, stock, supplier, order_line, orders, nation where ol_i_id = s_i_id and ol_supply_w_id = s_w_id and mod((s_w_id * s_i_id), 10000) = su_suppkey and ol_w_id = o_w_id and ol_d_id = o_d_id and ol_o_id = o_id and ol_i_id = i_id and su_nationkey = n_nationkey and i_data like '%BB' group by n_name, extract(year from o_entry_d) order by n_name, l_year desc; +select c_id, c_last, sum(ol_amount) as revenue, c_city, c_phone, n_name from customer, orders, order_line, nation where c_id = o_c_id and c_w_id = o_w_id and c_d_id = o_d_id and ol_w_id = o_w_id and ol_d_id = o_d_id and ol_o_id = o_id and o_entry_d <= ol_delivery_d and n_nationkey = ascii(cast(substring(c_state,1,1) as varchar(1))) group by c_id, c_last, c_city, c_phone, n_name order by revenue desc; +select s_i_id, sum(s_order_cnt) as ordercount from stock, supplier, nation where mod((s_w_id * s_i_id),10000) = su_suppkey and su_nationkey = n_nationkey and n_name = 'Germany' group by s_i_id having sum(s_order_cnt) > (select sum(s_order_cnt) * .005 from stock, supplier, nation where mod((s_w_id * s_i_id),10000) = su_suppkey and su_nationkey = n_nationkey and n_name = 'Germany') order by ordercount desc; +select o_ol_cnt, sum(case when o_carrier_id = 1 or o_carrier_id = 2 then 1 else 0 end) as high_line_count, sum(case when o_carrier_id <> 1 and o_carrier_id <> 2 then 1 else 0 end) as low_line_count from orders, order_line where ol_w_id = o_w_id and ol_d_id = o_d_id and ol_o_id = o_id and o_entry_d <= ol_delivery_d group by o_ol_cnt order by o_ol_cnt; +select c_count, count(*) as custdist from (select c_id, count(o_id) from customer left outer join orders on ( c_w_id = o_w_id and c_d_id = o_d_id and c_id = o_c_id and o_carrier_id > 8) group by c_id) as c_orders (c_id, c_count) group by c_count order by custdist desc, c_count desc; +select 100.00 * sum(case when i_data like 'PR%' then ol_amount else 0 end) / (1+sum(ol_amount)) as promo_revenue from order_line, item where ol_i_id = i_id; +with revenue (supplier_no, total_revenue) as (select mod((s_w_id * s_i_id),10000) as supplier_no, sum(ol_amount) as total_revenue from order_line, stock where ol_i_id = s_i_id and ol_supply_w_id = s_w_id group by mod((s_w_id * s_i_id),10000)) select su_suppkey, su_name, su_address, su_phone, total_revenue from supplier, revenue where su_suppkey = supplier_no and total_revenue = (select max(total_revenue) from revenue) order by su_suppkey; +select i_name, substring(i_data, 1, 3) as brand, i_price, count(distinct (mod((s_w_id * s_i_id),10000))) as supplier_cnt from stock, item where i_id = s_i_id and i_data not like 'zz%' and (mod((s_w_id * s_i_id),10000) not in (select su_suppkey from supplier where su_comment like '%bad%')) group by i_name, substring(i_data, 1, 3), i_price order by supplier_cnt desc; +select sum(ol_amount) / 2.0 as avg_yearly from order_line, (select i_id, avg(ol_quantity) as a from item, order_line where i_data like '%b' and ol_i_id = i_id group by i_id) t where ol_i_id = t.i_id and ol_quantity < t.a; +select c_last, c_id o_id, o_entry_d, o_ol_cnt, sum(ol_amount) from customer, orders, order_line where c_id = o_c_id and c_w_id = o_w_id and c_d_id = o_d_id and ol_w_id = o_w_id and ol_d_id = o_d_id and ol_o_id = o_id group by o_id, o_w_id, o_d_id, c_id, c_last, o_entry_d, o_ol_cnt having sum(ol_amount) > 200 order by sum(ol_amount) desc, o_entry_d; +select sum(ol_amount) as revenue from order_line, item where (ol_i_id = i_id and i_data like '%a' and ol_quantity >= 1 and ol_quantity <= 10 and i_price between 1 and 400000 and ol_w_id in (1,2,3)) or ( ol_i_id = i_id and i_data like '%b' and ol_quantity >= 1 and ol_quantity <= 10 and i_price between 1 and 400000 and ol_w_id in (1,2,4)) or ( ol_i_id = i_id and i_data like '%c' and ol_quantity >= 1 and ol_quantity <= 10 and i_price between 1 and 400000 and ol_w_id in (1,5,3)); +select su_name, su_address from supplier, nation where su_suppkey in (select mod(s_i_id * s_w_id, 10000) from stock, order_line where s_i_id in (select i_id from item where i_data like 'co%') and ol_i_id=s_i_id group by s_i_id, s_w_id, s_quantity having 2*s_quantity > sum(ol_quantity)) and su_nationkey = n_nationkey and n_name = 'Germany' order by su_name; +select su_name, count(*) as numwait from supplier, order_line l1, orders, stock, nation where ol_o_id = o_id and ol_w_id = o_w_id and ol_d_id = o_d_id and ol_w_id = s_w_id and ol_i_id = s_i_id and mod((s_w_id * s_i_id),10000) = su_suppkey and l1.ol_delivery_d > o_entry_d and not exists (select * from order_line l2 where l2.ol_o_id = l1.ol_o_id and l2.ol_w_id = l1.ol_w_id and l2.ol_d_id = l1.ol_d_id and l2.ol_delivery_d > l1.ol_delivery_d) and su_nationkey = n_nationkey and n_name = 'Germany' group by su_name order by numwait desc, su_name; +select substring(c_state,1,1) as country, count(*) as numcust, sum(c_balance) as totacctbal from customer where substring(c_phone,1,1) in ('1','2','3','4','5','6','7') and c_balance > (select avg(c_BALANCE) from customer where c_balance > 0.00 and substring(c_phone,1,1) in ('1','2','3','4','5','6','7')) and not exists (select * from orders where o_c_id = c_id and o_w_id = c_w_id and o_d_id = c_d_id) group by substring(c_state,1,1) order by substring(c_state,1,1); From 4f760a2fdc8e97d3e53b10c5ed2057576512a526 Mon Sep 17 00:00:00 2001 From: Geoffrey Yu Date: Fri, 10 May 2024 12:38:37 -0400 Subject: [PATCH 23/30] Adjust initial CH-BenCHmark scenario --- experiments/17-chbenchmark/scale_down/COND | 6 +++--- .../17-chbenchmark/scale_down/ch_scale_down_config.yml | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/experiments/17-chbenchmark/scale_down/COND b/experiments/17-chbenchmark/scale_down/COND index f62230bc..ee3a78a8 100644 --- a/experiments/17-chbenchmark/scale_down/COND +++ b/experiments/17-chbenchmark/scale_down/COND @@ -14,9 +14,9 @@ run_experiment( "txn-config-file": "brad.config", "txn-warehouses": 1740, "txn-scale-factor": 1, # TBD - "t-clients": 1, # TBD - "num-front-ends": 2, # TBD - "run-for-s": 2 * 60 * 60, # 2 hours + "t-clients": 4, # TBD + "num-front-ends": 5, # TBD + "run-for-s": 1 * 60 * 60, # 1 hour "txn-zipfian-alpha": ZIPFIAN_ALPHA, "ra-query-indexes": QUERIES_STR, "ra-query-bank-file": "../../../workloads/chbenchmark/queries.sql", diff --git a/experiments/17-chbenchmark/scale_down/ch_scale_down_config.yml b/experiments/17-chbenchmark/scale_down/ch_scale_down_config.yml index 3e40530d..ca39cceb 100644 --- a/experiments/17-chbenchmark/scale_down/ch_scale_down_config.yml +++ b/experiments/17-chbenchmark/scale_down/ch_scale_down_config.yml @@ -6,7 +6,7 @@ # listen on successive ports (e.g., 6584, 6585, etc.). front_end_interface: "0.0.0.0" front_end_port: 6583 -num_front_ends: 2 +num_front_ends: 5 # If installed and enabled, BRAD will serve its UI from a webserver that listens # for connections on this network interface and port. @@ -127,7 +127,7 @@ std_datasets: # Blueprint planning trigger configs. triggers: - enabled: false + enabled: true check_period_s: 90 # Triggers are checked every X seconds. check_period_offset_s: 360 # Wait 6 mins before starting. From ddd44d24cef5c402309f757fdb96176c71eca8aa Mon Sep 17 00:00:00 2001 From: Geoffrey Yu Date: Fri, 10 May 2024 14:57:14 -0400 Subject: [PATCH 24/30] CH-BenCHmark: Adjust starting config again --- experiments/17-chbenchmark/scale_down/COND | 2 +- experiments/17-chbenchmark/scale_down/run_full.sh | 3 +-- .../17-chbenchmark/scale_down/set_up_starting_blueprint.sh | 4 ++-- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/experiments/17-chbenchmark/scale_down/COND b/experiments/17-chbenchmark/scale_down/COND index ee3a78a8..fc283ad9 100644 --- a/experiments/17-chbenchmark/scale_down/COND +++ b/experiments/17-chbenchmark/scale_down/COND @@ -16,7 +16,7 @@ run_experiment( "txn-scale-factor": 1, # TBD "t-clients": 4, # TBD "num-front-ends": 5, # TBD - "run-for-s": 1 * 60 * 60, # 1 hour + "run-for-s": 2 * 60 * 60, # 1 hour "txn-zipfian-alpha": ZIPFIAN_ALPHA, "ra-query-indexes": QUERIES_STR, "ra-query-bank-file": "../../../workloads/chbenchmark/queries.sql", diff --git a/experiments/17-chbenchmark/scale_down/run_full.sh b/experiments/17-chbenchmark/scale_down/run_full.sh index f06a0504..4d9f43ee 100755 --- a/experiments/17-chbenchmark/scale_down/run_full.sh +++ b/experiments/17-chbenchmark/scale_down/run_full.sh @@ -10,12 +10,11 @@ abs_txn_config_file=$(realpath $txn_config_file) abs_system_config_file=$(realpath $system_config_file) abs_physical_config_file=$(realpath $physical_config_file) -export BRAD_IGNORE_BLUEPRINT=1 start_brad $abs_system_config_file $abs_physical_config_file sleep 30 -run_tpcc "t_1" +run_tpcc "t_4" start_repeating_olap_runner 1 10 5 $ra_query_indexes "ch_1" $t_clients ra_pid=$runner_pid diff --git a/experiments/17-chbenchmark/scale_down/set_up_starting_blueprint.sh b/experiments/17-chbenchmark/scale_down/set_up_starting_blueprint.sh index 1735545e..77ac577b 100755 --- a/experiments/17-chbenchmark/scale_down/set_up_starting_blueprint.sh +++ b/experiments/17-chbenchmark/scale_down/set_up_starting_blueprint.sh @@ -12,8 +12,8 @@ source ../common.sh python3 ../../../workloads/IMDB_extended/set_up_starting_blueprint.py \ --schema-name chbenchmark \ --query-bank-file ../../../workloads/chbenchmark/queries.sql \ - --redshift-queries "0,1,2,3,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21" \ - --athena-queries "4" \ + --redshift-queries "0,1,2,3,5,6,7,8,10,11,12,13,14,15,16,18,19,20,21" \ + --athena-queries "4,9,17" \ --redshift-provisioning "dc2.large:16" \ --aurora-provisioning "db.r6g.2xlarge:1" \ --system-config-file ch_scale_down_config.yml \ From d72f117574bf84496446491b5bafc70eb61de97c Mon Sep 17 00:00:00 2001 From: Geoffrey Yu Date: Sat, 11 May 2024 19:43:13 -0400 Subject: [PATCH 25/30] Additional workload adjustments --- experiments/17-chbenchmark/scale_down/COND | 3 +++ 1 file changed, 3 insertions(+) diff --git a/experiments/17-chbenchmark/scale_down/COND b/experiments/17-chbenchmark/scale_down/COND index fc283ad9..eff507cc 100644 --- a/experiments/17-chbenchmark/scale_down/COND +++ b/experiments/17-chbenchmark/scale_down/COND @@ -2,6 +2,9 @@ ZIPFIAN_ALPHA = 5.0 # Query indices. QUERIES = list(range(22)) +QUERIES.remove(4) +QUERIES.remove(9) +QUERIES.remove(17) QUERIES_STR = ",".join([str(v) for v in QUERIES]) run_experiment( From 5aa6bd30db65487d2bd00ab71343abcf17b888ef Mon Sep 17 00:00:00 2001 From: Geoffrey Yu Date: Sat, 11 May 2024 23:51:55 +0000 Subject: [PATCH 26/30] Decrease query latency ceiling --- .../17-chbenchmark/scale_down/ch_scale_down_config.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/experiments/17-chbenchmark/scale_down/ch_scale_down_config.yml b/experiments/17-chbenchmark/scale_down/ch_scale_down_config.yml index ca39cceb..bdc3986c 100644 --- a/experiments/17-chbenchmark/scale_down/ch_scale_down_config.yml +++ b/experiments/17-chbenchmark/scale_down/ch_scale_down_config.yml @@ -97,7 +97,7 @@ aurora_min_load_removal_fraction: 0.8 redshift_min_load_removal_fraction: 0.8 # Blueprint planning performance ceilings. -query_latency_p90_ceiling_s: 360.0 +query_latency_p90_ceiling_s: 50.0 txn_latency_p90_ceiling_s: 0.080 # If set to true, BRAD will attempt to use the specified preset Redshift @@ -156,7 +156,7 @@ triggers: threshold: 1.0 query_latency_ceiling: - ceiling_s: 360.0 + ceiling_s: 50.0 sustained_epochs: 3 txn_latency_ceiling: From a081664249cc08931447dac706a983d953f907af Mon Sep 17 00:00:00 2001 From: Geoffrey Yu Date: Sun, 12 May 2024 16:22:03 -0400 Subject: [PATCH 27/30] Add verbose logging to the transaction runner --- .../py-tpcc/pytpcc/runtime/executor.py | 31 +++++++++++++++---- 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/workloads/chbenchmark/py-tpcc/pytpcc/runtime/executor.py b/workloads/chbenchmark/py-tpcc/pytpcc/runtime/executor.py index f10f111f..3ea3eec9 100644 --- a/workloads/chbenchmark/py-tpcc/pytpcc/runtime/executor.py +++ b/workloads/chbenchmark/py-tpcc/pytpcc/runtime/executor.py @@ -41,6 +41,7 @@ from datetime import datetime from pprint import pprint, pformat from brad.utils.rand_exponential_backoff import RandomizedExponentialBackoff +from brad.utils import create_custom_logger from typing import Optional from .. import constants @@ -103,6 +104,13 @@ def execute( logging.info("Not recording detailed stats.") options = {} + verbose_log_dir = out_path / "verbose_logs" + verbose_log_dir.mkdir(exist_ok=True) + verbose_logger = create_custom_logger( + "txn_runner_verbose", str(verbose_log_dir / f"runner_{worker_index}.log") + ) + verbose_logger.info("[T %d] Workload starting...", worker_index) + # Compute warehouse ranges. self.worker_index = worker_index self.total_workers = total_workers @@ -117,7 +125,7 @@ def execute( logging.info( "Worker index %d - Warehouse range: %d to %d (inclusive)", self.worker_index, - *self.local_warehouse_range + *self.local_warehouse_range, ) if zipfian_alpha is not None: @@ -145,9 +153,19 @@ def execute( if debug: logging.debug("Executing '%s' transaction" % txn) try: + verbose_logger.info("[T %d] Issuing transaction %s", worker_index, txn) val = self.driver.executeTransaction(txn, params) backoff = None + # if debug: logging.debug("%s\nParameters:\n%s\nResult:\n%s" % (txn, pformat(params), pformat(val))) + r.stopTransaction(txn_id) + verbose_logger.info( + "[T %d] Finished transaction %s, %d", worker_index, txn, txn_id + ) + except KeyboardInterrupt: + verbose_logger.info( + "[T %d] Aborting early due to KeyboardInterrupt", worker_index + ) return -1 except (Exception, AssertionError) as ex: if debug: @@ -156,6 +174,7 @@ def execute( elif random.random() < 0.01: logging.warning("Aborted transaction: %s: %s", txn, ex) traceback.print_exc(file=sys.stdout) + verbose_logger.exception("[T %d] Ran into error", worker_index) if self.stop_on_error: raise r.abortTransaction(txn_id) @@ -168,16 +187,16 @@ def execute( ) wait_s = backoff.wait_time_s() if wait_s is not None: + verbose_logger.info( + "[T %d] Backing off for %.4f seconds", worker_index, wait_s + ) time.sleep(wait_s) - continue - - # if debug: logging.debug("%s\nParameters:\n%s\nResult:\n%s" % (txn, pformat(params), pformat(val))) - - r.stopTransaction(txn_id) ## WHILE + verbose_logger.info("[T %d] Benchmark stopping...", worker_index) r.stopBenchmark() + verbose_logger.info("[T %d] Benchmark done.", worker_index) return r ## DEF From 24fb2c1dce1f2893188f8b8fec02f0180a3be0a7 Mon Sep 17 00:00:00 2001 From: Geoffrey Yu Date: Sun, 12 May 2024 16:28:10 -0400 Subject: [PATCH 28/30] Handle exceptions when attempting rollback --- workloads/chbenchmark/py-tpcc/pytpcc/runtime/executor.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/workloads/chbenchmark/py-tpcc/pytpcc/runtime/executor.py b/workloads/chbenchmark/py-tpcc/pytpcc/runtime/executor.py index 3ea3eec9..010ae97a 100644 --- a/workloads/chbenchmark/py-tpcc/pytpcc/runtime/executor.py +++ b/workloads/chbenchmark/py-tpcc/pytpcc/runtime/executor.py @@ -178,7 +178,14 @@ def execute( if self.stop_on_error: raise r.abortTransaction(txn_id) - self.driver.ensureRollback() + + try: + self.driver.ensureRollback() + except: # pylint: disable=bare-except + # This may happen if we try to issue a rollback when the connection has dropped. + verbose_logger.exception( + "[T %d] Ran into error when running rollback.", worker_index + ) # Back off slightly. if backoff is None: From 4a793e57cbed7f6fe93ac3e5f9681ca1d13bcbf6 Mon Sep 17 00:00:00 2001 From: Geoffrey Yu Date: Sun, 12 May 2024 16:45:48 -0400 Subject: [PATCH 29/30] Additional workload adjustments --- experiments/17-chbenchmark/scale_down/COND | 1 + 1 file changed, 1 insertion(+) diff --git a/experiments/17-chbenchmark/scale_down/COND b/experiments/17-chbenchmark/scale_down/COND index eff507cc..a8a2a218 100644 --- a/experiments/17-chbenchmark/scale_down/COND +++ b/experiments/17-chbenchmark/scale_down/COND @@ -5,6 +5,7 @@ QUERIES = list(range(22)) QUERIES.remove(4) QUERIES.remove(9) QUERIES.remove(17) +QUERIES.remove(20) QUERIES_STR = ",".join([str(v) for v in QUERIES]) run_experiment( From 04e8112347f96704eed13bfe19bffcb81b4b920d Mon Sep 17 00:00:00 2001 From: Geoffrey Yu Date: Sun, 12 May 2024 23:04:55 -0400 Subject: [PATCH 30/30] CH-BenCHmark: Add support for A+R baseline --- experiments/17-chbenchmark/common.sh | 52 +++++++++++++++++++ .../17-chbenchmark/scale_down/.gitignore | 1 + experiments/17-chbenchmark/scale_down/COND | 20 ++++++- .../scale_down/run_full_ar_baseline.sh | 24 +++++++++ workloads/chbenchmark/queries.sql | 2 +- 5 files changed, 97 insertions(+), 2 deletions(-) create mode 100644 experiments/17-chbenchmark/scale_down/.gitignore create mode 100755 experiments/17-chbenchmark/scale_down/run_full_ar_baseline.sh diff --git a/experiments/17-chbenchmark/common.sh b/experiments/17-chbenchmark/common.sh index 95ee520c..56b91ca2 100644 --- a/experiments/17-chbenchmark/common.sh +++ b/experiments/17-chbenchmark/common.sh @@ -32,6 +32,26 @@ function run_tpcc() { popd } +function run_tpcc_aurora_serverless() { + local results_name=$1 + pushd ../../../workloads/chbenchmark/py-tpcc/ + local args=( + --no-load + --config $abs_txn_config_file + --warehouses $txn_warehouses + --duration $run_for_s + --clients $t_clients + --scalefactor $txn_scale_factor + ) + if [[ ! -z $txn_zipfian_alpha ]]; then + args+=(--zipfian-alpha $txn_zipfian_alpha) + fi + mkdir -p $COND_OUT/$results_name + RECORD_DETAILED_STATS=1 COND_OUT=$COND_OUT/$results_name python3 -m pytpcc.tpcc aurora "${args[@]}" & + tpcc_pid=$! + popd +} + function log_workload_point() { msg=$1 now=$(date --utc "+%Y-%m-%d %H:%M:%S") @@ -74,6 +94,38 @@ function start_repeating_olap_runner() { runner_pid=$! } +function start_repeating_olap_runner_redshift_serverless() { + local ra_clients=$1 + local ra_gap_s=$2 + local ra_gap_std_s=$3 + local query_indexes=$4 + local results_name=$5 + + local args=( + --num-clients $ra_clients + --num-front-ends $num_front_ends + --query-indexes $query_indexes + --query-bank-file $ra_query_bank_file + --avg-gap-s $ra_gap_s + --avg-gap-std-s $ra_gap_std_s + --brad-direct + --engine redshift + --serverless-redshift + --schema-name $schema_name + --config-file $abs_physical_config_file + ) + + >&2 echo "[Serial Repeating Analytics] Running with $ra_clients..." + results_dir=$COND_OUT/$results_name + mkdir -p $results_dir + + log_workload_point $results_name + COND_OUT=$results_dir python3.11 ../../../workloads/IMDB_extended/run_repeating_analytics_serial.py "${args[@]}" & + + # This is a special return value variable that we use. + runner_pid=$! +} + function graceful_shutdown() { for pid_var in "$@"; do kill -INT $pid_var diff --git a/experiments/17-chbenchmark/scale_down/.gitignore b/experiments/17-chbenchmark/scale_down/.gitignore new file mode 100644 index 00000000..0949a3cb --- /dev/null +++ b/experiments/17-chbenchmark/scale_down/.gitignore @@ -0,0 +1 @@ +aurora.config diff --git a/experiments/17-chbenchmark/scale_down/COND b/experiments/17-chbenchmark/scale_down/COND index a8a2a218..a78e79fb 100644 --- a/experiments/17-chbenchmark/scale_down/COND +++ b/experiments/17-chbenchmark/scale_down/COND @@ -20,7 +20,25 @@ run_experiment( "txn-scale-factor": 1, # TBD "t-clients": 4, # TBD "num-front-ends": 5, # TBD - "run-for-s": 2 * 60 * 60, # 1 hour + "run-for-s": 2 * 60 * 60, # 2 hours + "txn-zipfian-alpha": ZIPFIAN_ALPHA, + "ra-query-indexes": QUERIES_STR, + "ra-query-bank-file": "../../../workloads/chbenchmark/queries.sql", + }, +) + + +run_experiment( + name="run_full_ar", + run="./run_full_ar_baseline.sh", + options={ + "physical-config-file": "../../../config/physical_config_chbench.yml", + "schema-name": "chbenchmark", + "txn-config-file": "aurora.config", + "txn-warehouses": 1740, + "txn-scale-factor": 1, # TBD + "t-clients": 4, # TBD + "run-for-s": 2 * 60 * 60, # 2 hours "txn-zipfian-alpha": ZIPFIAN_ALPHA, "ra-query-indexes": QUERIES_STR, "ra-query-bank-file": "../../../workloads/chbenchmark/queries.sql", diff --git a/experiments/17-chbenchmark/scale_down/run_full_ar_baseline.sh b/experiments/17-chbenchmark/scale_down/run_full_ar_baseline.sh new file mode 100755 index 00000000..4e904032 --- /dev/null +++ b/experiments/17-chbenchmark/scale_down/run_full_ar_baseline.sh @@ -0,0 +1,24 @@ +#! /bin/bash + +script_loc=$(cd $(dirname $0) && pwd -P) +cd $script_loc +source ../common.sh +extract_named_arguments $@ + +# Resolve paths into absolute paths +abs_txn_config_file=$(realpath $txn_config_file) +abs_physical_config_file=$(realpath $physical_config_file) + +sleep 30 + +run_tpcc_aurora_serverless "t_4" +start_repeating_olap_runner_redshift_serverless 1 10 5 $ra_query_indexes "ch_1" +ra_pid=$runner_pid + +sleep $run_for_s + +# Shut down. +kill $tpcc_pid +kill $ra_pid +wait $tpcc_pid +wait $ra_pid diff --git a/workloads/chbenchmark/queries.sql b/workloads/chbenchmark/queries.sql index 6ced3e67..c21976be 100644 --- a/workloads/chbenchmark/queries.sql +++ b/workloads/chbenchmark/queries.sql @@ -4,7 +4,7 @@ select ol_o_id, ol_w_id, ol_d_id, sum(ol_amount) as revenue, o_entry_d from cust select o_ol_cnt, count(*) as order_count from orders where exists (select * from order_line where o_id = ol_o_id and o_w_id = ol_w_id and o_d_id = ol_d_id and ol_delivery_d >= o_entry_d) group by o_ol_cnt order by o_ol_cnt; select n_name, sum(ol_amount) as revenue from customer, orders, order_line, stock, supplier, nation, region where c_id = o_c_id and c_w_id = o_w_id and c_d_id = o_d_id and ol_o_id = o_id and ol_w_id = o_w_id and ol_d_id=o_d_id and ol_w_id = s_w_id and ol_i_id = s_i_id and mod((s_w_id * s_i_id),10000) = su_suppkey and ascii(cast(substring(c_state,1,1) as varchar(1))) = su_nationkey and su_nationkey = n_nationkey and n_regionkey = r_regionkey and r_name = 'Europe' group by n_name order by revenue desc; select sum(ol_amount) as revenue from order_line where ol_quantity between 1 and 100000; -WITH inner_query AS (select su_nationkey as supp_nation, substring(c_state,1,1) as cust_nation, extract(year from o_entry_d) as l_year, ol_amount as revenue from supplier, stock, order_line, orders, customer, nation n1, nation n2 where ol_supply_w_id = s_w_id and ol_i_id = s_i_id and mod((s_w_id * s_i_id), 10000) = su_suppkey and ol_w_id = o_w_id and ol_d_id = o_d_id and ol_o_id = o_id and c_id = o_c_id and c_w_id = o_w_id and c_d_id = o_d_id and su_nationkey = n1.n_nationkey and ascii(substring(c_state,1,1)) = n2.n_nationkey and ((n1.n_name = 'Germany' and n2.n_name = 'Cambodia') or (n1.n_name = 'Cambodia' and n2.n_name = 'Germany'))) SELECT supp_nation, cust_nation, l_year, sum(revenue) as revenue FROM inner_query group by supp_nation, cust_nation, l_year order by supp_nation, cust_nation, l_year; +WITH inner_query AS (select su_nationkey as supp_nation, substring(c_state,1,1) as cust_nation, extract(year from o_entry_d) as l_year, ol_amount as revenue from supplier, stock, order_line, orders, customer, nation n1, nation n2 where ol_supply_w_id = s_w_id and ol_i_id = s_i_id and mod((s_w_id * s_i_id), 10000) = su_suppkey and ol_w_id = o_w_id and ol_d_id = o_d_id and ol_o_id = o_id and c_id = o_c_id and c_w_id = o_w_id and c_d_id = o_d_id and su_nationkey = n1.n_nationkey and ascii(cast(substring(c_state,1,1) as varchar(1))) = n2.n_nationkey and ((n1.n_name = 'Germany' and n2.n_name = 'Cambodia') or (n1.n_name = 'Cambodia' and n2.n_name = 'Germany'))) SELECT supp_nation, cust_nation, l_year, sum(revenue) as revenue FROM inner_query group by supp_nation, cust_nation, l_year order by supp_nation, cust_nation, l_year; select extract(year from o_entry_d) as l_year, sum(case when n2.n_name = 'Germany' then ol_amount else 0 end) / sum(ol_amount) as mkt_share from item, supplier, stock, order_line, orders, customer, nation n1, nation n2, region where i_id = s_i_id and ol_i_id = s_i_id and ol_supply_w_id = s_w_id and mod((s_w_id * s_i_id),10000) = su_suppkey and ol_w_id = o_w_id and ol_d_id = o_d_id and ol_o_id = o_id and c_id = o_c_id and c_w_id = o_w_id and c_d_id = o_d_id and n1.n_nationkey = ascii(cast(substring(c_state,1,1) as varchar(1))) and n1.n_regionkey = r_regionkey and ol_i_id < 1000 and r_name = 'Europe' and su_nationkey = n2.n_nationkey and i_data like '%b' and i_id = ol_i_id group by extract(year from o_entry_d) order by l_year; select n_name, extract(year from o_entry_d) as l_year, sum(ol_amount) as sum_profit from item, stock, supplier, order_line, orders, nation where ol_i_id = s_i_id and ol_supply_w_id = s_w_id and mod((s_w_id * s_i_id), 10000) = su_suppkey and ol_w_id = o_w_id and ol_d_id = o_d_id and ol_o_id = o_id and ol_i_id = i_id and su_nationkey = n_nationkey and i_data like '%BB' group by n_name, extract(year from o_entry_d) order by n_name, l_year desc; select c_id, c_last, sum(ol_amount) as revenue, c_city, c_phone, n_name from customer, orders, order_line, nation where c_id = o_c_id and c_w_id = o_w_id and c_d_id = o_d_id and ol_w_id = o_w_id and ol_d_id = o_d_id and ol_o_id = o_id and o_entry_d <= ol_delivery_d and n_nationkey = ascii(cast(substring(c_state,1,1) as varchar(1))) group by c_id, c_last, c_city, c_phone, n_name order by revenue desc;