diff --git a/experiments/15-e2e-scenarios-v2/common.sh b/experiments/15-e2e-scenarios-v2/common.sh index 40640957..53e4c9c1 100644 --- a/experiments/15-e2e-scenarios-v2/common.sh +++ b/experiments/15-e2e-scenarios-v2/common.sh @@ -151,6 +151,43 @@ function start_repeating_olap_runner() { runner_pid=$! } +function start_redshift_serverless_olap_runner() { + local ra_clients=$1 + local ra_gap_s=$2 + local ra_gap_std_s=$3 + local query_indexes=$4 + local results_name=$5 + local schema_name=$6 + + local args=( + --num-clients $ra_clients + --num-front-ends $num_front_ends + --query-indexes $query_indexes + --query-bank-file $ra_query_bank_file + --avg-gap-s $ra_gap_s + --avg-gap-std-s $ra_gap_std_s + --brad-direct + --engine redshift + --serverless-redshift + --schema-name $schema_name + --config-file ../../../$physical_config_file + ) + + if [[ ! -z $ra_query_frequency_path ]]; then + args+=(--query-frequency-path $ra_query_frequency_path) + fi + + >&2 echo "[Serial Repeating Analytics] Running with $ra_clients on Redshift serverless..." + results_dir=$COND_OUT/$results_name + mkdir -p $results_dir + + log_workload_point $results_name + COND_OUT=$results_dir python3.11 ../../../workloads/IMDB_extended/run_repeating_analytics_serial.py "${args[@]}" & + + # This is a special return value variable that we use. + runner_pid=$! +} + function start_snowset_repeating_olap_runner() { local ra_clients=$1 local time_scale_factor=$2 @@ -268,6 +305,33 @@ function start_txn_runner_serial() { runner_pid=$! } +function start_aurora_serverless_txn_runner_serial() { + local t_clients=$1 + local schema_name=$2 + + >&2 echo "[Serial Transactions] Running with $t_clients on Aurora Serverless..." + results_dir=$COND_OUT/t_${t_clients} + mkdir -p $results_dir + + local args=( + --num-clients $t_clients + --num-front-ends $num_front_ends + # --scale-factor $txn_scale_factor + # --dataset-type $dataset_type + --brad-direct + --serverless-aurora + --schema-name $schema_name + --config-file ../../../$physical_config_file + ) + + log_workload_point "txn_${t_clients}" + COND_OUT=$results_dir python3 ../../../workloads/IMDB_extended/run_transactions_serial.py \ + "${args[@]}" & + + # This is a special return value variable that we use. + runner_pid=$! +} + function start_snowset_txn_runner() { local t_clients=$1 local time_scale_factor=$2 diff --git a/experiments/15-e2e-scenarios-v2/slo_change/COND b/experiments/15-e2e-scenarios-v2/slo_change/COND index 64c440ea..e422768b 100644 --- a/experiments/15-e2e-scenarios-v2/slo_change/COND +++ b/experiments/15-e2e-scenarios-v2/slo_change/COND @@ -7,7 +7,7 @@ COMMON_CONFIGS = { "schema-name": "imdb_extended_100g", "ra-query-bank-file": IMDB_100GB_REGULAR_QUERY_BANK, "txn-scale-factor": IMDB_100GB_SF, - "num-front-ends": 24, + "num-front-ends": 32, "dataset-type": "100gb", "ra-query-indexes": ",".join(map(str, QUERIES)) } @@ -29,3 +29,12 @@ run_command( **COMMON_CONFIGS, }, ) + +run_experiment( + name="ar_100g", + run="./run_ar_baseline.sh", + options={ + # System config file not needed. + **COMMON_CONFIGS, + } +) diff --git a/experiments/15-e2e-scenarios-v2/slo_change/run_ar_baseline.sh b/experiments/15-e2e-scenarios-v2/slo_change/run_ar_baseline.sh new file mode 100755 index 00000000..60dd8011 --- /dev/null +++ b/experiments/15-e2e-scenarios-v2/slo_change/run_ar_baseline.sh @@ -0,0 +1,45 @@ +#! /bin/bash + +script_loc=$(cd $(dirname $0) && pwd -P) +cd $script_loc +source ../common.sh + +# Arguments: +# --system-config-file +# --physical-config-file +# --query-indexes +extract_named_arguments $@ + +schema_name="imdb_extended_100g" + +log_workload_point "clients_starting" +# 12 clients, offset 20 (for the transactional clients) +start_redshift_serverless_olap_runner 12 5 2 $ra_query_indexes "ra_8" $schema_name +rana_pid=$runner_pid + +start_aurora_serverless_txn_runner_serial 20 $schema_name # Implicit: --dataset-type +txn_pid=$runner_pid + +log_workload_point "clients_started" + +function inner_cancel_experiment() { + cancel_experiment $rana_pid $txn_pid +} + +trap "inner_cancel_experiment" INT +trap "inner_cancel_experiment" TERM + +# Sleep for 10 minutes and then change the SLOs. +sleep $(( 10 * 60 )) + +# No-op (changing SLOs on BRAD). + +# Wait another hour before stopping. +sleep $(( 60 * 60 )) + +# Shut down everything now. +log_workload_point "experiment_workload_done" +>&2 echo "Experiment done. Shutting down runners..." +graceful_shutdown $rana_pid $txn_pid +log_workload_point "shutdown_complete" + diff --git a/experiments/15-e2e-scenarios-v2/slo_change/run_workload.sh b/experiments/15-e2e-scenarios-v2/slo_change/run_workload.sh index 626148f9..36dfdb7c 100755 --- a/experiments/15-e2e-scenarios-v2/slo_change/run_workload.sh +++ b/experiments/15-e2e-scenarios-v2/slo_change/run_workload.sh @@ -15,11 +15,11 @@ log_workload_point "brad_start_initiated" sleep 30 log_workload_point "clients_starting" -# 6 clients, offset 12 (for the transactional clients) -start_repeating_olap_runner 6 15 5 $ra_query_indexes "ra_8" 12 +# 12 clients, offset 20 (for the transactional clients) +start_repeating_olap_runner 12 5 2 $ra_query_indexes "ra_8" 20 rana_pid=$runner_pid -start_txn_runner_serial 12 # Implicit: --dataset-type +start_txn_runner_serial 20 # Implicit: --dataset-type txn_pid=$runner_pid log_workload_point "clients_started" @@ -35,7 +35,7 @@ trap "inner_cancel_experiment" TERM sleep $(( 10 * 60 )) log_workload_point "changing_slo" -brad cli --command "BRAD_CHANGE_SLO 30.0 0.030" +brad cli --command "BRAD_CHANGE_SLO 30.0 0.015" log_workload_point "changed_slo" # Wait another hour before stopping. diff --git a/experiments/15-e2e-scenarios-v2/slo_change/run_workload_debug.sh b/experiments/15-e2e-scenarios-v2/slo_change/run_workload_debug.sh index 39b7b40c..3c661793 100755 --- a/experiments/15-e2e-scenarios-v2/slo_change/run_workload_debug.sh +++ b/experiments/15-e2e-scenarios-v2/slo_change/run_workload_debug.sh @@ -16,11 +16,11 @@ log_workload_point "brad_start_initiated" sleep 30 log_workload_point "clients_starting" -# 6 clients, offset 12 (for the transactional clients) -start_repeating_olap_runner 6 15 5 $ra_query_indexes "ra_8" 12 +# 12 clients, offset 20 (for the transactional clients) +start_repeating_olap_runner 12 5 2 $ra_query_indexes "ra_8" 20 rana_pid=$runner_pid -start_txn_runner_serial 12 # Implicit: --dataset-type +start_txn_runner_serial 20 # Implicit: --dataset-type txn_pid=$runner_pid log_workload_point "clients_started" @@ -36,7 +36,7 @@ trap "inner_cancel_experiment" TERM sleep $(( 2 * 60 )) log_workload_point "changing_slo" -brad cli --command "BRAD_CHANGE_SLO 30.0 0.030" +brad cli --command "BRAD_CHANGE_SLO 30.0 0.015" log_workload_point "changed_slo" # Wait another 10 mins before stopping. diff --git a/experiments/15-e2e-scenarios-v2/slo_change/set_up_starting_blueprint.sh b/experiments/15-e2e-scenarios-v2/slo_change/set_up_starting_blueprint.sh index 6a21a231..7c17816d 100755 --- a/experiments/15-e2e-scenarios-v2/slo_change/set_up_starting_blueprint.sh +++ b/experiments/15-e2e-scenarios-v2/slo_change/set_up_starting_blueprint.sh @@ -15,6 +15,6 @@ python3 ../../../workloads/IMDB_extended/set_up_starting_blueprint.py \ --aurora-queries "99,56,32,92,91" \ --redshift-queries "49,30,83,94,38,87,86,76,37,31,46,58,61,62,64,69,73,74,51,57,60" \ --redshift-provisioning "dc2.large:2" \ - --aurora-provisioning "db.t4g.medium:2" \ + --aurora-provisioning "db.r6g.xlarge:2" \ --system-config-file slo_change_config.yml \ --physical-config-file $1 diff --git a/experiments/15-e2e-scenarios-v2/slo_change/slo_change_config.yml b/experiments/15-e2e-scenarios-v2/slo_change/slo_change_config.yml index 0dbee531..c290f94c 100644 --- a/experiments/15-e2e-scenarios-v2/slo_change/slo_change_config.yml +++ b/experiments/15-e2e-scenarios-v2/slo_change/slo_change_config.yml @@ -6,7 +6,7 @@ # listen on successive ports (e.g., 6584, 6585, etc.). front_end_interface: "0.0.0.0" front_end_port: 6583 -num_front_ends: 24 +num_front_ends: 32 # Logging paths. If the value is in ALL_CAPS (with underscores), it is # interpreted as an environment variable (BRAD will log to the path stored in @@ -93,8 +93,8 @@ redshift_min_load_removal_fraction: 0.9 # Blueprint planning performance ceilings. # These will change to 30 s and 30 ms during the experiment. -query_latency_p90_ceiling_s: 60.0 -txn_latency_p90_ceiling_s: 0.060 +query_latency_p90_ceiling_s: 30.0 +txn_latency_p90_ceiling_s: 0.030 # If set to true, BRAD will attempt to use the specified preset Redshift # clusters instead of resizing the main Redshift cluster. @@ -157,11 +157,11 @@ triggers: threshold: 1.0 query_latency_ceiling: - ceiling_s: 60.0 + ceiling_s: 30.0 sustained_epochs: 3 txn_latency_ceiling: - ceiling_s: 0.060 + ceiling_s: 0.030 sustained_epochs: 3 recent_change: diff --git a/workloads/IMDB_extended/run_repeating_analytics_serial.py b/workloads/IMDB_extended/run_repeating_analytics_serial.py index c95ba952..783faa7a 100644 --- a/workloads/IMDB_extended/run_repeating_analytics_serial.py +++ b/workloads/IMDB_extended/run_repeating_analytics_serial.py @@ -210,7 +210,7 @@ def noop(_signal, _frame): time_unsimulated_str, qidx, end - start, - engine.value, + engine.value if engine is not None else "serverless_redshift", ), file=file, flush=True, @@ -559,6 +559,11 @@ def main(): type=int, help="Start the client trace at the given number of clients. Used for debugging only.", ) + parser.add_argument( + "--serverless-redshift", + action="store_true", + help="Set if running on serverless Redshift.", + ) args = parser.parse_args() set_up_logging() diff --git a/workloads/IMDB_extended/run_transactions_serial.py b/workloads/IMDB_extended/run_transactions_serial.py index 319f314a..fdfe16ef 100644 --- a/workloads/IMDB_extended/run_transactions_serial.py +++ b/workloads/IMDB_extended/run_transactions_serial.py @@ -329,6 +329,7 @@ def main(): parser.add_argument("--brad-host", type=str, default="localhost") parser.add_argument("--brad-port", type=int, default=6583) parser.add_argument("--num-front-ends", type=int, default=1) + parser.add_argument("--serverless-aurora", action="store_true") args = parser.parse_args() set_up_logging() diff --git a/workloads/IMDB_extended/set_up_starting_blueprint.py b/workloads/IMDB_extended/set_up_starting_blueprint.py index dc36cf2a..be5bf2c1 100644 --- a/workloads/IMDB_extended/set_up_starting_blueprint.py +++ b/workloads/IMDB_extended/set_up_starting_blueprint.py @@ -135,15 +135,20 @@ def main(): enum_blueprint.set_routing_policy(replaced_policy) # Ensure the provisioning is as expected. - enum_blueprint.set_aurora_provisioning(parse_provisioning(args.aurora_provisioning)) - enum_blueprint.set_redshift_provisioning( - parse_provisioning(args.redshift_provisioning) - ) + aurora_prov = parse_provisioning(args.aurora_provisioning) + redshift_prov = parse_provisioning(args.redshift_provisioning) + enum_blueprint.set_aurora_provisioning(aurora_prov) + enum_blueprint.set_redshift_provisioning(redshift_prov) # 6. Adjust the placement. new_placement = {} + engines = [Engine.Aurora, Engine.Redshift, Engine.Athena] + if aurora_prov.num_nodes() == 0: + engines.remove(Engine.Aurora) + if redshift_prov.num_nodes() == 0: + engines.remove(Engine.Redshift) for table in blueprint.tables(): - new_placement[table.name] = [Engine.Aurora, Engine.Athena] + new_placement[table.name] = engines enum_blueprint.set_table_locations(new_placement) # 6. Transition to the new blueprint. diff --git a/workloads/IMDB_extended/workload_utils/connect.py b/workloads/IMDB_extended/workload_utils/connect.py index e354a450..32991699 100644 --- a/workloads/IMDB_extended/workload_utils/connect.py +++ b/workloads/IMDB_extended/workload_utils/connect.py @@ -6,6 +6,7 @@ from brad.config.engine import Engine from brad.connection.factory import ConnectionFactory, Connection, RedshiftConnection +from brad.connection.odbc_connection import OdbcConnection from brad.config.file import ConfigFile from brad.grpc_client import BradGrpcClient from brad.provisioning.directory import Directory @@ -50,6 +51,23 @@ def do_connect() -> Connection: timeout_s=10, ) + elif ( + direct_engine == Engine.Aurora + and hasattr(args, "serverless_aurora") + and args.serverless_aurora + ): + print("Connecting to serverless Aurora") + + def do_connect() -> Connection: + # pylint: disable-next=protected-access + cstr = ConnectionFactory._pg_aurora_odbc_connection_string( + connection_details["serverless_endpoint"], + 5432, + connection_details, + args.schema_name, + ) + return OdbcConnection.connect_sync(cstr, autocommit=False, timeout_s=10) + else: if directory is None: directory_to_use = Directory(config)