Additional fixes for the changing SLO experiment, add a baseline (#495)

* More fixes and adjustments * Additional adjustments * Add Aurora/Redshift baseline * Fixes * More fixes * Fix engine recording
mitdbg · Apr 18, 2024 · 08ae1f3 · 08ae1f3
1 parent 2d01708
commit 08ae1f3
Show file tree

Hide file tree

Showing 11 changed files with 168 additions and 21 deletions.
diff --git a/experiments/15-e2e-scenarios-v2/common.sh b/experiments/15-e2e-scenarios-v2/common.sh
@@ -151,6 +151,43 @@ function start_repeating_olap_runner() {
   runner_pid=$!
 }
 
+function start_redshift_serverless_olap_runner() {
+  local ra_clients=$1
+  local ra_gap_s=$2
+  local ra_gap_std_s=$3
+  local query_indexes=$4
+  local results_name=$5
+  local schema_name=$6
+
+  local args=(
+    --num-clients $ra_clients
+    --num-front-ends $num_front_ends
+    --query-indexes $query_indexes
+    --query-bank-file $ra_query_bank_file
+    --avg-gap-s $ra_gap_s
+    --avg-gap-std-s $ra_gap_std_s
+    --brad-direct
+    --engine redshift
+    --serverless-redshift
+    --schema-name $schema_name
+    --config-file ../../../$physical_config_file
+  )
+
+  if [[ ! -z $ra_query_frequency_path ]]; then
+    args+=(--query-frequency-path $ra_query_frequency_path)
+  fi
+
+  >&2 echo "[Serial Repeating Analytics] Running with $ra_clients on Redshift serverless..."
+  results_dir=$COND_OUT/$results_name
+  mkdir -p $results_dir
+
+  log_workload_point $results_name
+  COND_OUT=$results_dir python3.11 ../../../workloads/IMDB_extended/run_repeating_analytics_serial.py "${args[@]}" &
+
+  # This is a special return value variable that we use.
+  runner_pid=$!
+}
+
 function start_snowset_repeating_olap_runner() {
   local ra_clients=$1
   local time_scale_factor=$2
@@ -268,6 +305,33 @@ function start_txn_runner_serial() {
   runner_pid=$!
 }
 
+function start_aurora_serverless_txn_runner_serial() {
+  local t_clients=$1
+  local schema_name=$2
+
+  >&2 echo "[Serial Transactions] Running with $t_clients on Aurora Serverless..."
+  results_dir=$COND_OUT/t_${t_clients}
+  mkdir -p $results_dir
+
+  local args=(
+    --num-clients $t_clients
+    --num-front-ends $num_front_ends
+    # --scale-factor $txn_scale_factor
+    # --dataset-type $dataset_type
+    --brad-direct
+    --serverless-aurora
+    --schema-name $schema_name
+    --config-file ../../../$physical_config_file
+  )
+
+  log_workload_point "txn_${t_clients}"
+  COND_OUT=$results_dir python3 ../../../workloads/IMDB_extended/run_transactions_serial.py \
+    "${args[@]}" &
+
+  # This is a special return value variable that we use.
+  runner_pid=$!
+}
+
 function start_snowset_txn_runner() {
   local t_clients=$1
   local time_scale_factor=$2

diff --git a/experiments/15-e2e-scenarios-v2/slo_change/COND b/experiments/15-e2e-scenarios-v2/slo_change/COND
@@ -7,7 +7,7 @@ COMMON_CONFIGS = {
   "schema-name": "imdb_extended_100g",
   "ra-query-bank-file": IMDB_100GB_REGULAR_QUERY_BANK,
   "txn-scale-factor": IMDB_100GB_SF,
-  "num-front-ends": 24,
+  "num-front-ends": 32,
   "dataset-type": "100gb",
   "ra-query-indexes": ",".join(map(str, QUERIES))
 }
@@ -29,3 +29,12 @@ run_command(
     **COMMON_CONFIGS,
   },
 )
+
+run_experiment(
+  name="ar_100g",
+  run="./run_ar_baseline.sh",
+  options={
+    # System config file not needed.
+    **COMMON_CONFIGS,
+  }
+)
diff --git a/experiments/15-e2e-scenarios-v2/slo_change/run_ar_baseline.sh b/experiments/15-e2e-scenarios-v2/slo_change/run_ar_baseline.sh
@@ -0,0 +1,45 @@
+#! /bin/bash
+
+script_loc=$(cd $(dirname $0) && pwd -P)
+cd $script_loc
+source ../common.sh
+
+# Arguments:
+# --system-config-file
+# --physical-config-file
+# --query-indexes
+extract_named_arguments $@
+
+schema_name="imdb_extended_100g"
+
+log_workload_point "clients_starting"
+# 12 clients, offset 20 (for the transactional clients)
+start_redshift_serverless_olap_runner 12 5 2 $ra_query_indexes "ra_8" $schema_name
+rana_pid=$runner_pid
+
+start_aurora_serverless_txn_runner_serial 20 $schema_name  # Implicit: --dataset-type
+txn_pid=$runner_pid
+
+log_workload_point "clients_started"
+
+function inner_cancel_experiment() {
+  cancel_experiment $rana_pid $txn_pid
+}
+
+trap "inner_cancel_experiment" INT
+trap "inner_cancel_experiment" TERM
+
+# Sleep for 10 minutes and then change the SLOs.
+sleep $(( 10 * 60 ))
+
+# No-op (changing SLOs on BRAD).
+
+# Wait another hour before stopping.
+sleep $(( 60 * 60 ))
+
+# Shut down everything now.
+log_workload_point "experiment_workload_done"
+>&2 echo "Experiment done. Shutting down runners..."
+graceful_shutdown $rana_pid $txn_pid
+log_workload_point "shutdown_complete"
+
diff --git a/experiments/15-e2e-scenarios-v2/slo_change/run_workload.sh b/experiments/15-e2e-scenarios-v2/slo_change/run_workload.sh
@@ -15,11 +15,11 @@ log_workload_point "brad_start_initiated"
 sleep 30
 
 log_workload_point "clients_starting"
-# 6 clients, offset 12 (for the transactional clients)
-start_repeating_olap_runner 6 15 5 $ra_query_indexes "ra_8" 12
+# 12 clients, offset 20 (for the transactional clients)
+start_repeating_olap_runner 12 5 2 $ra_query_indexes "ra_8" 20
 rana_pid=$runner_pid
 
-start_txn_runner_serial 12  # Implicit: --dataset-type
+start_txn_runner_serial 20  # Implicit: --dataset-type
 txn_pid=$runner_pid
 
 log_workload_point "clients_started"
@@ -35,7 +35,7 @@ trap "inner_cancel_experiment" TERM
 sleep $(( 10 * 60 ))
 
 log_workload_point "changing_slo"
-brad cli --command "BRAD_CHANGE_SLO 30.0 0.030"
+brad cli --command "BRAD_CHANGE_SLO 30.0 0.015"
 log_workload_point "changed_slo"
 
 # Wait another hour before stopping.

diff --git a/experiments/15-e2e-scenarios-v2/slo_change/run_workload_debug.sh b/experiments/15-e2e-scenarios-v2/slo_change/run_workload_debug.sh
@@ -16,11 +16,11 @@ log_workload_point "brad_start_initiated"
 sleep 30
 
 log_workload_point "clients_starting"
-# 6 clients, offset 12 (for the transactional clients)
-start_repeating_olap_runner 6 15 5 $ra_query_indexes "ra_8" 12
+# 12 clients, offset 20 (for the transactional clients)
+start_repeating_olap_runner 12 5 2 $ra_query_indexes "ra_8" 20
 rana_pid=$runner_pid
 
-start_txn_runner_serial 12  # Implicit: --dataset-type
+start_txn_runner_serial 20  # Implicit: --dataset-type
 txn_pid=$runner_pid
 
 log_workload_point "clients_started"
@@ -36,7 +36,7 @@ trap "inner_cancel_experiment" TERM
 sleep $(( 2 * 60 ))
 
 log_workload_point "changing_slo"
-brad cli --command "BRAD_CHANGE_SLO 30.0 0.030"
+brad cli --command "BRAD_CHANGE_SLO 30.0 0.015"
 log_workload_point "changed_slo"
 
 # Wait another 10 mins before stopping.

diff --git a/experiments/15-e2e-scenarios-v2/slo_change/set_up_starting_blueprint.sh b/experiments/15-e2e-scenarios-v2/slo_change/set_up_starting_blueprint.sh
@@ -15,6 +15,6 @@ python3 ../../../workloads/IMDB_extended/set_up_starting_blueprint.py \
   --aurora-queries "99,56,32,92,91" \
   --redshift-queries "49,30,83,94,38,87,86,76,37,31,46,58,61,62,64,69,73,74,51,57,60" \
   --redshift-provisioning "dc2.large:2" \
-  --aurora-provisioning "db.t4g.medium:2" \
+  --aurora-provisioning "db.r6g.xlarge:2" \
   --system-config-file slo_change_config.yml \
   --physical-config-file $1
diff --git a/experiments/15-e2e-scenarios-v2/slo_change/slo_change_config.yml b/experiments/15-e2e-scenarios-v2/slo_change/slo_change_config.yml
@@ -6,7 +6,7 @@
 # listen on successive ports (e.g., 6584, 6585, etc.).
 front_end_interface: "0.0.0.0"
 front_end_port: 6583
-num_front_ends: 24
+num_front_ends: 32
 
 # Logging paths. If the value is in ALL_CAPS (with underscores), it is
 # interpreted as an environment variable (BRAD will log to the path stored in
@@ -93,8 +93,8 @@ redshift_min_load_removal_fraction: 0.9
 
 # Blueprint planning performance ceilings.
 # These will change to 30 s and 30 ms during the experiment.
-query_latency_p90_ceiling_s: 60.0
-txn_latency_p90_ceiling_s: 0.060
+query_latency_p90_ceiling_s: 30.0
+txn_latency_p90_ceiling_s: 0.030
 
 # If set to true, BRAD will attempt to use the specified preset Redshift
 # clusters instead of resizing the main Redshift cluster.
@@ -157,11 +157,11 @@ triggers:
     threshold: 1.0
 
   query_latency_ceiling:
-    ceiling_s: 60.0
+    ceiling_s: 30.0
     sustained_epochs: 3
 
   txn_latency_ceiling:
-    ceiling_s: 0.060
+    ceiling_s: 0.030
     sustained_epochs: 3
 
   recent_change:

diff --git a/workloads/IMDB_extended/run_repeating_analytics_serial.py b/workloads/IMDB_extended/run_repeating_analytics_serial.py
@@ -210,7 +210,7 @@ def noop(_signal, _frame):
                         time_unsimulated_str,
                         qidx,
                         end - start,
-                        engine.value,
+                        engine.value if engine is not None else "serverless_redshift",
                     ),
                     file=file,
                     flush=True,
@@ -559,6 +559,11 @@ def main():
         type=int,
         help="Start the client trace at the given number of clients. Used for debugging only.",
     )
+    parser.add_argument(
+        "--serverless-redshift",
+        action="store_true",
+        help="Set if running on serverless Redshift.",
+    )
     args = parser.parse_args()
 
     set_up_logging()

diff --git a/workloads/IMDB_extended/run_transactions_serial.py b/workloads/IMDB_extended/run_transactions_serial.py
@@ -329,6 +329,7 @@ def main():
     parser.add_argument("--brad-host", type=str, default="localhost")
     parser.add_argument("--brad-port", type=int, default=6583)
     parser.add_argument("--num-front-ends", type=int, default=1)
+    parser.add_argument("--serverless-aurora", action="store_true")
     args = parser.parse_args()
 
     set_up_logging()

diff --git a/workloads/IMDB_extended/set_up_starting_blueprint.py b/workloads/IMDB_extended/set_up_starting_blueprint.py
@@ -135,15 +135,20 @@ def main():
     enum_blueprint.set_routing_policy(replaced_policy)
 
     # Ensure the provisioning is as expected.
-    enum_blueprint.set_aurora_provisioning(parse_provisioning(args.aurora_provisioning))
-    enum_blueprint.set_redshift_provisioning(
-        parse_provisioning(args.redshift_provisioning)
-    )
+    aurora_prov = parse_provisioning(args.aurora_provisioning)
+    redshift_prov = parse_provisioning(args.redshift_provisioning)
+    enum_blueprint.set_aurora_provisioning(aurora_prov)
+    enum_blueprint.set_redshift_provisioning(redshift_prov)
 
     # 6. Adjust the placement.
     new_placement = {}
+    engines = [Engine.Aurora, Engine.Redshift, Engine.Athena]
+    if aurora_prov.num_nodes() == 0:
+        engines.remove(Engine.Aurora)
+    if redshift_prov.num_nodes() == 0:
+        engines.remove(Engine.Redshift)
     for table in blueprint.tables():
-        new_placement[table.name] = [Engine.Aurora, Engine.Athena]
+        new_placement[table.name] = engines
     enum_blueprint.set_table_locations(new_placement)
 
     # 6. Transition to the new blueprint.

diff --git a/workloads/IMDB_extended/workload_utils/connect.py b/workloads/IMDB_extended/workload_utils/connect.py
@@ -6,6 +6,7 @@
 
 from brad.config.engine import Engine
 from brad.connection.factory import ConnectionFactory, Connection, RedshiftConnection
+from brad.connection.odbc_connection import OdbcConnection
 from brad.config.file import ConfigFile
 from brad.grpc_client import BradGrpcClient
 from brad.provisioning.directory import Directory
@@ -50,6 +51,23 @@ def do_connect() -> Connection:
                     timeout_s=10,
                 )
 
+        elif (
+            direct_engine == Engine.Aurora
+            and hasattr(args, "serverless_aurora")
+            and args.serverless_aurora
+        ):
+            print("Connecting to serverless Aurora")
+
+            def do_connect() -> Connection:
+                # pylint: disable-next=protected-access
+                cstr = ConnectionFactory._pg_aurora_odbc_connection_string(
+                    connection_details["serverless_endpoint"],
+                    5432,
+                    connection_details,
+                    args.schema_name,
+                )
+                return OdbcConnection.connect_sync(cstr, autocommit=False, timeout_s=10)
+
         else:
             if directory is None:
                 directory_to_use = Directory(config)