-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add data gathering scripts for CH-BenCHmark (load and instance types) (…
- Loading branch information
Showing
6 changed files
with
229 additions
and
0 deletions.
There are no files selected for viewing
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
#! /bin/bash | ||
|
||
if [ -z $2 ]; then | ||
>&2 echo "Usage: $0 <config_path> <cluster id>" | ||
>&2 echo "The config path should be relative to the redshift/ subdirectory." | ||
exit 1 | ||
fi | ||
|
||
export BRAD_CONFIG=$1 | ||
cluster_identifier=$2 | ||
|
||
export BRAD_SCHEMA="chbenchmark" | ||
|
||
function run_warm_up() { | ||
>&2 echo "Running warm up..." | ||
pushd redshift | ||
python3 -m brad.calibration.measure_load --run-warmup --engine redshift --query-file ../../../../tools/calibration/load_chbench/selected_queries.sql | ||
popd | ||
} | ||
|
||
function sync_redshift_resize() { | ||
raw_instance=$1 | ||
target_instance_type=${raw_instance//_/.} | ||
target_node_count=$2 | ||
|
||
if [[ $target_node_count = "2" ]] && [[ $raw_instance = "dc2_large" ]]; then | ||
>&2 echo "Skipping initial resize to $raw_instance $target_node_count (special case)" | ||
return | ||
fi | ||
|
||
# Try an elastic resize first. | ||
>&2 echo "Resizing Redshift cluster to $target_instance_type with $target_node_count nodes (attempt elastic)" | ||
aws redshift resize-cluster --cluster-identifier "$cluster_identifier" --cluster-type multi-node --node-type "$target_instance_type" --number-of-nodes "$target_node_count" --no-classic --region us-east-1 > /dev/null | ||
result=$? | ||
|
||
# Resize Redshift cluster | ||
if [ $result -ne 0 ]; then | ||
>&2 echo "Classic resizing Redshift cluster to $target_instance_type with $target_node_count nodes" | ||
aws redshift modify-cluster --cluster-identifier "$cluster_identifier" --node-type "$target_instance_type" --number-of-nodes "$target_node_count" > /dev/null | ||
fi | ||
|
||
sleep 60 | ||
|
||
# Wait for resize to complete | ||
while true; do | ||
cluster_status=$(aws redshift describe-clusters --cluster-identifier "$cluster_identifier" --query 'Clusters[0].ClusterStatus' --output text) | ||
if [[ $cluster_status == "available" ]]; then | ||
break | ||
fi | ||
>&2 echo "Waiting for resize to complete..." | ||
sleep 10 | ||
done | ||
} | ||
|
||
function run_cfg() { | ||
instance_type=$1 | ||
num_nodes=$2 | ||
|
||
>&2 echo "$instance_type $num_nodes" | ||
sync_redshift_resize $instance_type $num_nodes | ||
>&2 echo "Warming up..." | ||
run_warm_up | ||
>&2 echo "Running..." | ||
cond run "//redshift:${instance_type}-${num_nodes}" | ||
} | ||
|
||
>&2 echo "Running $cluster_identifier" | ||
>&2 echo "Config $BRAD_CONFIG" | ||
>&2 echo "Cluster id $cluster_identifier" | ||
sleep 10 | ||
|
||
run_cfg "dc2_large" 2 | ||
run_cfg "dc2_large" 4 | ||
run_cfg "dc2_large" 8 | ||
run_cfg "dc2_large" 16 | ||
run_cfg "ra3_xlplus" 2 | ||
run_cfg "ra3_xlplus" 4 | ||
run_cfg "ra3_xlplus" 8 | ||
run_cfg "ra3_4xlarge" 8 | ||
run_cfg "ra3_4xlarge" 4 | ||
run_cfg "ra3_4xlarge" 2 | ||
|
||
sleep 60 | ||
|
||
>&2 echo "Done. Pausing $cluster_identifier..." | ||
aws redshift pause-cluster --cluster-identifier "$cluster_identifier" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
from itertools import product | ||
|
||
|
||
AVG_GAP_S = 3 | ||
RUN_FOR_S = 5 * 60 # 5 minutes | ||
NUM_CLIENTS = [1, 2, 4, 6] | ||
WAIT_BEFORE_START = 10 | ||
NUM_QUERIES = 22 | ||
|
||
|
||
# Relative to experiment definition directories. | ||
QUERY_BANK = "../selected_queries.sql" | ||
|
||
|
||
CLUSTER_CONFIGS = [ | ||
("dc2_large", 2), | ||
("dc2_large", 4), | ||
("dc2_large", 8), | ||
("dc2_large", 16), | ||
("ra3_xlplus", 2), | ||
("ra3_xlplus", 4), | ||
("ra3_xlplus", 8), | ||
("ra3_4xlarge", 2), | ||
("ra3_4xlarge", 4), | ||
("ra3_4xlarge", 8), | ||
] | ||
|
||
|
||
for inst, nodes in CLUSTER_CONFIGS: | ||
cfg_name = f"{inst}-{nodes}" | ||
run_experiment_group( | ||
name=cfg_name, | ||
run="python3 -m brad.calibration.measure_load", | ||
experiments=[ | ||
ExperimentInstance( | ||
name=f"{cfg_name}-{clients}-q{query_idx}", | ||
options={ | ||
"num-clients": clients, | ||
"specific-query-idx": query_idx, | ||
"run-for-s": RUN_FOR_S, | ||
"avg-gap-s": AVG_GAP_S, | ||
"wait-before-start": WAIT_BEFORE_START, | ||
"query-file": QUERY_BANK, | ||
"engine": "redshift", | ||
}, | ||
) | ||
for query_idx, clients in product(range(NUM_QUERIES), NUM_CLIENTS) | ||
], | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
import argparse | ||
import random | ||
|
||
|
||
def main() -> None: | ||
parser = argparse.ArgumentParser() | ||
parser.add_argument("--query-file", type=str, required=True) | ||
parser.add_argument("--seed", type=int, default=42) | ||
parser.add_argument("--num-query-blocks", type=int, default=22) | ||
parser.add_argument("--queries-per-block", type=int, default=200) | ||
args = parser.parse_args() | ||
|
||
prng = random.Random(args.seed) | ||
|
||
with open(args.query_file, "r", encoding="UTF-8") as file: | ||
queries = [line.strip() for line in file] | ||
|
||
selected = [] | ||
for qidx in range(args.num_query_blocks): | ||
offset = prng.randint(0, args.queries_per_block - 1) | ||
selected.append(queries[qidx * args.queries_per_block + offset]) | ||
|
||
with open("selected_queries.sql", "w", encoding="UTF-8") as file: | ||
for q in selected: | ||
print(q, file=file) | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
select ol_number, sum(ol_quantity) as sum_qty, sum(ol_amount) as sum_amount, avg(ol_quantity) as avg_qty, avg(ol_amount) as avg_amount, count(*) as count_order from order_line where ol_amount <= 33.06648003661816 group by ol_number order by ol_number; | ||
select su_suppkey, su_name, n_name, i_id, i_name, su_address, su_phone, su_comment from item, supplier, stock, nation, region, (select s_i_id as m_i_id, min(s_quantity) as m_s_quantity from stock, supplier, nation, region where mod((s_w_id*s_i_id),10000)=su_suppkey and s_quantity >= 11.777062936461403 and su_nationkey=n_nationkey and n_regionkey=r_regionkey and r_name like 'Europ%' group by s_i_id) m where i_id = s_i_id and su_suppkey >= 2941.508980163913 and i_id <= 79857.67421953629 and mod((s_w_id * s_i_id), 10000) = su_suppkey and su_nationkey = n_nationkey and n_regionkey = r_regionkey and i_data like '%b' and r_name like 'Europ%' and i_id=m_i_id and s_quantity = m_s_quantity order by n_name, su_name, i_id; | ||
select ol_o_id, ol_w_id, ol_d_id, sum(ol_amount) as revenue, o_entry_d from customer, new_order, orders, order_line where c_state like 'A%' and o_w_id >= 42.7486611465113 and ol_amount <= 99.70737680321619 and c_id = o_c_id and c_w_id = o_w_id and c_d_id = o_d_id and no_w_id = o_w_id and no_d_id = o_d_id and no_o_id = o_id and ol_w_id = o_w_id and ol_d_id = o_d_id and ol_o_id = o_id group by ol_o_id, ol_w_id, ol_d_id, o_entry_d order by revenue desc, o_entry_d; | ||
select o_ol_cnt, count(*) as order_count from orders where o_carrier_id <= 6.749521428520183 and exists (select * from order_line where o_id = ol_o_id and o_w_id = ol_w_id and o_d_id = ol_d_id and ol_delivery_d >= o_entry_d) group by o_ol_cnt order by o_ol_cnt; | ||
select n_name, sum(ol_amount) as revenue from customer, orders, order_line, stock, supplier, nation, region where c_id = o_c_id and n_nationkey <= 99.21862547006236 and s_order_cnt >= 42.71343586058127 and c_w_id = o_w_id and c_d_id = o_d_id and ol_o_id = o_id and ol_w_id = o_w_id and ol_d_id=o_d_id and ol_w_id = s_w_id and ol_i_id = s_i_id and mod((s_w_id * s_i_id),10000) = su_suppkey and ascii(substring(c_state,1,1)) = su_nationkey and su_nationkey = n_nationkey and n_regionkey = r_regionkey and r_name = 'Europe' group by n_name order by revenue desc; | ||
select sum(ol_amount) as revenue from order_line where ol_quantity <= 5.0 and ol_quantity between 1 and 100000; | ||
WITH inner_query AS ( select su_nationkey as supp_nation, substring(c_state,1,1) as cust_nation, extract(year from o_entry_d) as l_year, ol_amount as revenue from supplier, stock, order_line, orders, customer, nation n1, nation n2 where ol_supply_w_id = s_w_id and c_id <= 2665.5792747107366 and s_quantity <= 72.4600053847094 and su_suppkey >= 528.9934672447876 and ol_i_id = s_i_id and mod((s_w_id * s_i_id), 10000) = su_suppkey and ol_w_id = o_w_id and ol_d_id = o_d_id and ol_o_id = o_id and c_id = o_c_id and c_w_id = o_w_id and c_d_id = o_d_id and su_nationkey = n1.n_nationkey and ascii(substring(c_state,1,1)) = n2.n_nationkey and ( (n1.n_name = 'Germany' and n2.n_name = 'Cambodia') or (n1.n_name = 'Cambodia' and n2.n_name = 'Germany') ) ) SELECT supp_nation, cust_nation, l_year, sum(revenue) as revenue FROM inner_query group by supp_nation, cust_nation, l_year order by supp_nation, cust_nation, l_year; | ||
select extract(year from o_entry_d) as l_year, sum(case when n2.n_name = 'Germany' then ol_amount else 0 end) / sum(ol_amount) as mkt_share from item, supplier, stock, order_line, orders, customer, nation n1, nation n2, region where i_id = s_i_id and o_w_id >= 583.5206747942913 and s_order_cnt <= 81.86012188607452 and su_suppkey >= 689.8054116558625 and ol_i_id = s_i_id and ol_supply_w_id = s_w_id and mod((s_w_id * s_i_id),10000) = su_suppkey and ol_w_id = o_w_id and ol_d_id = o_d_id and ol_o_id = o_id and c_id = o_c_id and c_w_id = o_w_id and c_d_id = o_d_id and n1.n_nationkey = ascii(cast(substring(c_state,1,1) as varchar(1))) and n1.n_regionkey = r_regionkey and ol_i_id < 1000 and r_name = 'Europe' and su_nationkey = n2.n_nationkey and i_data like '%b' and i_id = ol_i_id group by extract(year from o_entry_d) order by l_year; | ||
select n_name, extract(year from o_entry_d) as l_year, sum(ol_amount) as sum_profit from item, stock, supplier, order_line, orders, nation where ol_i_id = s_i_id and o_id <= 2939.378308830152 and s_quantity <= 86.59608959532211 and ol_supply_w_id = s_w_id and mod((s_w_id * s_i_id), 10000) = su_suppkey and ol_w_id = o_w_id and ol_d_id = o_d_id and ol_o_id = o_id and ol_i_id = i_id and su_nationkey = n_nationkey and i_data like '%BB' group by n_name, extract(year from o_entry_d) order by n_name, l_year desc; | ||
select c_id, c_last, sum(ol_amount) as revenue, c_city, c_phone, n_name from customer, orders, order_line, nation where c_id = o_c_id and n_nationkey <= 116.40294250401558 and o_id <= 1560.082691309974 and c_w_id = o_w_id and c_d_id = o_d_id and ol_w_id = o_w_id and ol_d_id = o_d_id and ol_o_id = o_id and o_entry_d <= ol_delivery_d and n_nationkey = ascii(cast(substring(c_state,1,1) as varchar(1))) group by c_id, c_last, c_city, c_phone, n_name order by revenue desc; | ||
select s_i_id, sum(s_order_cnt) as ordercount from stock, supplier, nation where mod((s_w_id * s_i_id),10000) = su_suppkey and su_suppkey >= 2406.641955682944 and su_nationkey = n_nationkey and n_name = 'Germany' group by s_i_id having sum(s_order_cnt) > (select sum(s_order_cnt) * .005 from stock, supplier, nation where mod((s_w_id * s_i_id),10000) = su_suppkey and s_quantity >= 27.35152833573742 and su_nationkey = n_nationkey and n_name = 'Germany') order by ordercount desc; | ||
select o_ol_cnt, sum(case when o_carrier_id = 1 or o_carrier_id = 2 then 1 else 0 end) as high_line_count, sum(case when o_carrier_id <> 1 and o_carrier_id <> 2 then 1 else 0 end) as low_line_count from orders, order_line where ol_w_id = o_w_id and ol_amount <= 36.57742392006392 and ol_d_id = o_d_id and ol_o_id = o_id and o_entry_d <= ol_delivery_d group by o_ol_cnt order by o_ol_cnt; | ||
select c_count, count(*) as custdist from (select c_id, count(o_id) from customer left outer join orders on ( c_w_id = o_w_id and c_d_id = o_d_id and c_id = o_c_id and o_carrier_id >= 4.392919247526648 ) group by c_id) as c_orders (c_id, c_count) group by c_count order by custdist desc, c_count desc; | ||
select 100.00 * sum(case when i_data like 'PR%' then ol_amount else 0 end) / (1+sum(ol_amount)) as promo_revenue from order_line, item where ol_i_id = i_id and i_id <= 82830.86056286634; | ||
with revenue (supplier_no, total_revenue) as ( select mod((s_w_id * s_i_id),10000) as supplier_no, sum(ol_amount) as total_revenue from order_line, stock where ol_i_id = s_i_id and ol_supply_w_id = s_w_id and s_quantity >= 52.409029036617504 group by mod((s_w_id * s_i_id),10000)) select su_suppkey, su_name, su_address, su_phone, total_revenue from supplier, revenue where su_suppkey = supplier_no and total_revenue = (select max(total_revenue) from revenue) and su_suppkey >= 600.4811082997699 order by su_suppkey; | ||
select i_name, substring(i_data, 1, 3) as brand, i_price, count(distinct (mod((s_w_id * s_i_id),10000))) as supplier_cnt from stock, item where i_id = s_i_id and i_data not like 'zz%' and i_price >= 17.67656701310919 and (mod((s_w_id * s_i_id),10000) not in (select su_suppkey from supplier where su_comment like '%bad%')) group by i_name, substring(i_data, 1, 3), i_price order by supplier_cnt desc; | ||
select sum(ol_amount) / 2.0 as avg_yearly from order_line, (select i_id, avg(ol_quantity) as a from item, order_line where i_data like '%b' and ol_quantity <= 5.0 and ol_i_id = i_id group by i_id) t where ol_i_id = t.i_id and ol_quantity < t.a; | ||
select c_last, c_id o_id, o_entry_d, o_ol_cnt, sum(ol_amount) from customer, orders, order_line where c_id = o_c_id and c_d_id <= 6.652803502875462 and c_w_id = o_w_id and c_d_id = o_d_id and ol_w_id = o_w_id and ol_d_id = o_d_id and ol_o_id = o_id group by o_id, o_w_id, o_d_id, c_id, c_last, o_entry_d, o_ol_cnt having sum(ol_amount) > 200 order by sum(ol_amount) desc, o_entry_d; | ||
select sum(ol_amount) as revenue from order_line, item where ( ol_i_id = i_id and i_data like '%a' and ol_quantity >= 1 and ol_quantity <= 10 and i_price <= 97.10832030687996 and ol_w_id in (1,2,3) ) or ( ol_i_id = i_id and i_data like '%b' and ol_quantity >= 1 and ol_quantity <= 10 and i_price <= 79.78326318023088 and ol_w_id in (1,2,4) ) or ( ol_i_id = i_id and i_data like '%c' and ol_quantity >= 1 and ol_quantity <= 10 and i_price >= 5.450023586551352 and ol_w_id in (1,5,3) ); | ||
select su_name, su_address from supplier, nation where su_suppkey in (select mod(s_i_id * s_w_id, 10000) from stock, order_line where s_i_id in (select i_id from item where i_data like 'co%') and ol_i_id=s_i_id group by s_i_id, s_w_id, s_quantity having 2*s_quantity > sum(ol_quantity)) and su_nationkey = n_nationkey and su_suppkey <= 8996.163667412242 and n_name = 'Germany' order by su_name; | ||
select su_name, count(*) as numwait from supplier, order_line l1, orders, stock, nation where ol_o_id = o_id and su_suppkey <= 8526.675416612981 and o_w_id >= 369.02551642220345 and ol_w_id = o_w_id and ol_d_id = o_d_id and ol_w_id = s_w_id and ol_i_id = s_i_id and mod((s_w_id * s_i_id),10000) = su_suppkey and l1.ol_delivery_d > o_entry_d and not exists (select * from order_line l2 where l2.ol_o_id = l1.ol_o_id and l2.ol_w_id = l1.ol_w_id and l2.ol_d_id = l1.ol_d_id and l2.ol_delivery_d > l1.ol_delivery_d) and su_nationkey = n_nationkey and n_name = 'Germany' group by su_name order by numwait desc, su_name; | ||
select substring(c_state,1,1) as country, count(*) as numcust, sum(c_balance) as totacctbal from customer where substring(c_phone,1,1) in ('1','2','3','4','5','6','7') and c_balance > (select avg(c_BALANCE) from customer where c_balance > 0.00 and substring(c_phone,1,1) in ('1','2','3','4','5','6','7')) and not exists (select * from orders where o_c_id = c_id and o_w_id = c_w_id and o_d_id = c_d_id and o_w_id <= 1264.1427731874844 ) group by substring(c_state,1,1) order by substring(c_state,1,1); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
import argparse | ||
import asyncio | ||
from brad.config.file import ConfigFile | ||
from brad.connection.factory import ConnectionFactory | ||
from brad.config.engine import Engine | ||
from brad.provisioning.directory import Directory | ||
|
||
|
||
def main(): | ||
parser = argparse.ArgumentParser() | ||
parser.add_argument("--schema-name", type=str, required=True) | ||
parser.add_argument("--physical-config-file", type=str, required=True) | ||
parser.add_argument("--query-file", type=str, required=True) | ||
args = parser.parse_args() | ||
|
||
with open(args.query_file, "r", encoding="UTF-8") as file: | ||
queries = [line.strip() for line in file] | ||
|
||
config = ConfigFile.load_from_physical_config(args.physical_config_file) | ||
directory = Directory(config) | ||
asyncio.run(directory.refresh()) | ||
connection = ConnectionFactory.connect_to_sync( | ||
Engine.Redshift, args.schema_name, config, directory, autocommit=True | ||
) | ||
|
||
cursor = connection.cursor_sync() | ||
num_succeeded = 0 | ||
for idx, q in enumerate(queries): | ||
try: | ||
print("Running query", idx, "of", len(queries) - 1) | ||
cursor.execute_sync(q) | ||
num_succeeded += 1 | ||
except Exception as ex: | ||
print("Query", idx, "failed with error", str(ex)) | ||
|
||
if num_succeeded == len(queries): | ||
print("All succeeded.") | ||
else: | ||
print((len(queries) - num_succeeded), "failed.") | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |