Various TPC-C runner and experiment improvements (#499)

Part of #487. - Start BRAD clients on different front ends - Add Conductor experiment definitions to measure transaction latency (to sanity check the values) - Add an Aurora-direct driver (to check for connection overheads)
mitdbg · Apr 24, 2024 · 361f074 · 361f074
1 parent b0acff8
commit 361f074
Show file tree

Hide file tree

Showing 10 changed files with 921 additions and 3 deletions.
diff --git a/experiments/17-chbenchmark/common.sh b/experiments/17-chbenchmark/common.sh
@@ -0,0 +1,96 @@
+function start_brad() {
+  system_config_file=$1
+  physical_config_file=$2
+
+  pushd ../../../
+  brad daemon \
+    --physical-config-file $physical_config_file \
+    --system-config-file $system_config_file \
+    --schema-name $schema_name \
+    &
+  brad_pid=$!
+  popd
+}
+
+function run_tpcc() {
+  pushd ../../../workloads/chbenchmark/py-tpcc/
+  RECORD_DETAILED_STATS=1 python3 -m pytpcc.tpcc brad \
+    --no-load \
+    --config $abs_txn_config_file \
+    --warehouses $txn_warehouses \
+    --duration $run_for_s \
+    --clients $t_clients \
+    --scalefactor $txn_scale_factor &
+  tpcc_pid=$!
+  popd
+}
+
+function extract_named_arguments() {
+  # Evaluates any environment variables in this script's arguments. This script
+  # should only be run on trusted input.
+  orig_args=($@)
+  for val in "${orig_args[@]}"; do
+    phys_arg=$(eval "echo $val")
+
+    if [[ $phys_arg =~ --ra-clients=.+ ]]; then
+      ra_clients=${phys_arg:13}
+    fi
+
+    if [[ $phys_arg =~ --t-clients=.+ ]]; then
+      t_clients=${phys_arg:12}
+    fi
+
+    if [[ $phys_arg =~ --ra-query-indexes=.+ ]]; then
+      ra_query_indexes=${phys_arg:19}
+    fi
+
+    if [[ $phys_arg =~ --ra-query-bank-file=.+ ]]; then
+      ra_query_bank_file=${phys_arg:21}
+    fi
+
+    if [[ $phys_arg =~ --ra-gap-s=.+ ]]; then
+      ra_gap_s=${phys_arg:11}
+    fi
+
+    if [[ $phys_arg =~ --ra-gap-std-s=.+ ]]; then
+      ra_gap_std_s=${phys_arg:15}
+    fi
+
+    if [[ $phys_arg =~ --num-front-ends=.+ ]]; then
+      num_front_ends=${phys_arg:17}
+    fi
+
+    if [[ $phys_arg =~ --run-for-s=.+ ]]; then
+      run_for_s=${phys_arg:12}
+    fi
+
+    if [[ $phys_arg =~ --physical-config-file=.+ ]]; then
+      physical_config_file=${phys_arg:23}
+    fi
+
+    if [[ $phys_arg =~ --system-config-file=.+ ]]; then
+      system_config_file=${phys_arg:21}
+    fi
+
+    if [[ $phys_arg =~ --schema-name=.+ ]]; then
+      schema_name=${phys_arg:14}
+    fi
+
+    if [[ $phys_arg =~ --query-sequence-file=.+ ]]; then
+      query_sequence_file=${phys_arg:22}
+    fi
+
+    if [[ $phys_arg =~ --txn-scale-factor=.+ ]]; then
+      txn_scale_factor=${phys_arg:19}
+    fi
+
+    if [[ $phys_arg =~ --txn-warehouses=.+ ]]; then
+      txn_warehouses=${phys_arg:17}
+    fi
+
+    if [[ $phys_arg =~ --txn-config-file=.+ ]]; then
+      txn_config_file=${phys_arg:18}
+    fi
+  done
+}
+
diff --git a/experiments/17-chbenchmark/debug/.gitignore b/experiments/17-chbenchmark/debug/.gitignore
@@ -0,0 +1 @@
+aurora.config
diff --git a/experiments/17-chbenchmark/debug/COND b/experiments/17-chbenchmark/debug/COND
@@ -0,0 +1,26 @@
+run_command(
+  name="txn_lat",
+  run="./run_tpcc.sh",
+  options={
+    "physical-config-file": "../../../config/physical_config_chbench.yml",
+    "system-config-file": "debug_config.yml",  # Relative to one level up.
+    "txn-config-file": "brad.config",
+    "schema-name": "chbenchmark",
+    "txn-warehouses": 1740,
+    "txn-scale-factor": 1,  # TBD
+    "t-clients": 1,  # TBD
+    "run-for-s": 180,
+  },
+)
+
+run_command(
+  name="aurora_direct",
+  run="./run_aurora_direct.sh",
+  options={
+    "txn-config-file": "aurora.config",
+    "txn-warehouses": 1740,
+    "txn-scale-factor": 1,  # TBD
+    "t-clients": 1,  # TBD
+    "run-for-s": 180,
+  },
+)
diff --git a/experiments/17-chbenchmark/debug/brad.config b/experiments/17-chbenchmark/debug/brad.config
@@ -0,0 +1,6 @@
+# BradDriver Configuration File
+[brad]
+host                 = localhost
+port                 = 6583
+isolation_level      = REPEATABLE READ
+use_worker_offset    = true
diff --git a/experiments/17-chbenchmark/debug/debug_config.yml b/experiments/17-chbenchmark/debug/debug_config.yml
@@ -0,0 +1,167 @@
+# This file contains configurations that are used by BRAD. These are default
+# values and should be customized for specific situations.
+
+# BRAD's front end servers will listen for client connections on this interface
+# and port. If `num_front_ends` is greater than one, subsequent front ends will
+# listen on successive ports (e.g., 6584, 6585, etc.).
+front_end_interface: "0.0.0.0"
+front_end_port: 6583
+num_front_ends: 1
+
+# If installed and enabled, BRAD will serve its UI from a webserver that listens
+# for connections on this network interface and port.
+ui_interface: "0.0.0.0"
+ui_port: 7583
+
+# Logging paths. If the value is in ALL_CAPS (with underscores), it is
+# interpreted as an environment variable (BRAD will log to the path stored in
+# the environment variable).
+
+# Where BRAD's daemon process will write its logs.
+daemon_log_file: COND_OUT
+
+# Where BRAD's front end processes will write their logs.
+front_end_log_path: COND_OUT
+
+# Where BRAD's blueprint planner will write debug logs.
+planner_log_path: COND_OUT
+
+# Where BRAD's metrics loggers will write their logs.
+metrics_log_path: COND_OUT
+
+# Probability that each transactional query will be logged.
+txn_log_prob: 0.10
+
+# Set to a non-zero value enable automatic data syncing. When this is set to 0,
+# automatic syncing is disabled.
+data_sync_period_seconds: 0
+
+# BRAD's front end servers will report their metrics at regular intervals.
+front_end_metrics_reporting_period_seconds: 30
+front_end_query_latency_buffer_size: 100
+
+# `default` means to use the policy encoded in the blueprint. Other values will
+# override the blueprint.
+routing_policy: always_aurora
+
+# Whether to disable table movement for benchmark purposes (i.e., keep all
+# tables on all engines.)
+disable_table_movement: true
+
+# Epoch length for metrics and forecasting. This is the granularity at which
+# metrics/forecasting will be performed.
+epoch_length:
+  weeks: 0
+  days: 0
+  hours: 0
+  minutes: 1
+
+# Blueprint planning strategy.
+strategy: fp_query_based_beam
+
+# Used to specify the period of time over which to use data for planning.
+# Currrently, this is a "look behind" window for the workload.
+planning_window:
+  weeks: 0
+  days: 0
+  hours: 1
+  minutes: 0
+
+# Used to aggregate metrics collected in the planning window.
+metrics_agg:
+  method: ewm         # 'mean' is another option
+  alpha: 0.86466472   # 1 - 1 / e^2
+
+# Used during planning.
+reinterpret_second_as: 1
+
+# The query distribution must change by at least this much for a new blueprint
+# to be accepted.
+query_dist_change_frac: 0.1
+
+# The search bound for the provisioning.
+max_provisioning_multiplier: 2.5
+
+# Flag options for blueprint planning.
+use_io_optimized_aurora: true
+use_recorded_routing_if_available: true
+ensure_tables_together_on_one_engine: true
+
+# Loads used to prime the system when no information is available.
+aurora_initialize_load_fraction: 0.25
+redshift_initialize_load_fraction: 0.25
+
+# BRAD will not reduce predicted load lower than these values. Raise these
+# values to be more conservative against mispredictions.
+aurora_min_load_removal_fraction: 0.8
+redshift_min_load_removal_fraction: 0.8
+
+# Blueprint planning performance ceilings.
+query_latency_p90_ceiling_s: 30.0
+txn_latency_p90_ceiling_s: 0.030
+
+# If set to true, BRAD will attempt to use the specified preset Redshift
+# clusters instead of resizing the main Redshift cluster.
+use_preset_redshift_clusters: false
+
+# Used for ordering blueprints during planning.
+comparator:
+  type: benefit_perf_ceiling  # or `perf_ceiling`
+
+  benefit_horizon:  # Only used by the `benefit_perf_ceiling` comparator
+    weeks: 0
+    days: 0
+    hours: 1
+    minutes: 0
+
+  penalty_threshold: 0.8  # Only used by the `benefit_perf_ceiling` comparator
+  penalty_power: 8  # Only used by the `benefit_perf_ceiling` comparator
+
+# Used for precomputed predictions.
+std_datasets:
+  - name: regular
+    path: workloads/IMDB_100GB/regular_test/
+  - name: adhoc
+    path: workloads/IMDB_100GB/adhoc_test/
+
+# Blueprint planning trigger configs.
+
+triggers:
+  enabled: false
+  check_period_s: 90  # Triggers are checked every X seconds.
+  check_period_offset_s: 360  # Wait 6 mins before starting.
+
+  # Triggers will not fire for at least this many minutes after a new blueprint
+  # takes effect. Usually this should be greater than zero to give BRAD
+  # sufficient time to observe the effect of the blueprint on the workload. BRAD
+  # may wait longer to ensure metrics are also available for this many minutes.
+  observe_new_blueprint_mins: 3
+
+  elapsed_time:
+    disabled: true
+    multiplier: 60  # Multiplier over `planning_window`.
+
+  redshift_cpu:
+    lo: 15
+    hi: 85
+    sustained_epochs: 3
+
+  aurora_cpu:
+    lo: 15
+    hi: 85
+    sustained_epochs: 3
+
+  variable_costs:
+    disabled: true
+    threshold: 1.0
+
+  query_latency_ceiling:
+    ceiling_s: 30.0
+    sustained_epochs: 3
+
+  txn_latency_ceiling:
+    ceiling_s: 0.030
+    sustained_epochs: 3
+
+  recent_change:
+    delay_epochs: 5
diff --git a/experiments/17-chbenchmark/debug/run_aurora_direct.sh b/experiments/17-chbenchmark/debug/run_aurora_direct.sh
@@ -0,0 +1,18 @@
+#! /bin/bash
+
+script_loc=$(cd $(dirname $0) && pwd -P)
+cd $script_loc
+source ../common.sh
+extract_named_arguments $@
+
+# Resolve paths into absolute paths
+abs_txn_config_file=$(realpath $txn_config_file)
+
+cd ../../../workloads/chbenchmark/py-tpcc/
+RECORD_DETAILED_STATS=1 python3 -m pytpcc.tpcc aurora \
+  --no-load \
+  --config $abs_txn_config_file \
+  --warehouses $txn_warehouses \
+  --duration $run_for_s \
+  --clients $t_clients \
+  --scalefactor $txn_scale_factor
diff --git a/experiments/17-chbenchmark/debug/run_tpcc.sh b/experiments/17-chbenchmark/debug/run_tpcc.sh
@@ -0,0 +1,27 @@
+#! /bin/bash
+
+script_loc=$(cd $(dirname $0) && pwd -P)
+cd $script_loc
+source ../common.sh
+extract_named_arguments $@
+
+# Resolve paths into absolute paths
+abs_txn_config_file=$(realpath $txn_config_file)
+abs_system_config_file=$(realpath $system_config_file)
+abs_physical_config_file=$(realpath $physical_config_file)
+
+export BRAD_IGNORE_BLUEPRINT=1
+start_brad $abs_system_config_file $abs_physical_config_file
+
+# Wait for BRAD to start up.
+sleep 30
+
+# Start the TPC-C workload.
+run_tpcc
+
+sleep $run_for_s
+sleep 10
+
+kill $tpcc_pid
+kill $brad_pid
+wait $brad_pid