mitdbg · geoffxy · May 10, 2024 · May 7, 2024 · May 7, 2024 · May 8, 2024
diff --git a/experiments/17-chbenchmark/common.sh b/experiments/17-chbenchmark/common.sh
@@ -13,6 +13,7 @@ function start_brad() {
 }
 
 function run_tpcc() {
+  local results_name=$1
   pushd ../../../workloads/chbenchmark/py-tpcc/
   local args=(
     --no-load
@@ -25,11 +26,66 @@ function run_tpcc() {
   if [[ ! -z $txn_zipfian_alpha ]]; then
     args+=(--zipfian-alpha $txn_zipfian_alpha)
   fi
-  RECORD_DETAILED_STATS=1 python3 -m pytpcc.tpcc brad "${args[@]}" &
+  mkdir -p $COND_OUT/$results_name
+  RECORD_DETAILED_STATS=1 COND_OUT=$COND_OUT/$results_name python3 -m pytpcc.tpcc brad "${args[@]}" &
   tpcc_pid=$!
   popd
 }
 
+function log_workload_point() {
+  msg=$1
+  now=$(date --utc "+%Y-%m-%d %H:%M:%S")
+  echo "$now,$msg" >> $COND_OUT/points.log
+}
+
+function start_repeating_olap_runner() {
+  local ra_clients=$1
+  local ra_gap_s=$2
+  local ra_gap_std_s=$3
+  local query_indexes=$4
+  local results_name=$5
+  local client_offset=$6
+
+  local args=(
+    --num-clients $ra_clients
+    --num-front-ends $num_front_ends
+    --query-indexes $query_indexes
+    --query-bank-file $ra_query_bank_file
+    --avg-gap-s $ra_gap_s
+    --avg-gap-std-s $ra_gap_std_s
+  )
+
+  if [[ ! -z $ra_query_frequency_path ]]; then
+    args+=(--query-frequency-path $ra_query_frequency_path)
+  fi
+
+  if [[ ! -z $client_offset ]]; then
+    args+=(--client-offset $client_offset)
+  fi
+
+  >&2 echo "[Serial Repeating Analytics] Running with $ra_clients..."
+  results_dir=$COND_OUT/$results_name
+  mkdir -p $results_dir
+
+  log_workload_point $results_name
+  COND_OUT=$results_dir python3.11 ../../../workloads/IMDB_extended/run_repeating_analytics_serial.py "${args[@]}" &
+
+  # This is a special return value variable that we use.
+  runner_pid=$!
+}
+
+function graceful_shutdown() {
+  for pid_var in "$@"; do
+    kill -INT $pid_var
+  done
+  for pid_var in "$@"; do
+    wait $pid_var
+  done
+
+  kill -INT $brad_pid
+  wait $brad_pid
+}
+
 function extract_named_arguments() {
   # Evaluates any environment variables in this script's arguments. This script
   # should only be run on trusted input.

diff --git a/experiments/17-chbenchmark/debug/COND b/experiments/17-chbenchmark/debug/COND
@@ -81,3 +81,28 @@ run_experiment(
     "txn-zipfian-alpha": ZIPFIAN_ALPHA,
   },
 )
+
+# Query indices.
+QUERIES = list(range(22))
+QUERIES.remove(4)
+QUERIES.remove(13)
+QUERIES_STR = ",".join([str(v) for v in QUERIES])
+
+run_experiment(
+  name="run_full",
+  run="./run_full.sh",
+  options={
+    "physical-config-file": "../../../config/physical_config_chbench.yml",
+    "system-config-file": "debug_config.yml",  # Relative to one level up.
+    "schema-name": "chbenchmark",
+    "txn-config-file": "brad.config",
+    "txn-warehouses": 1740,
+    "txn-scale-factor": 1,  # TBD
+    "t-clients": 1,  # TBD
+    "num-front-ends": 2, # TBD
+    "run-for-s": 60 * 60,  # One hour
+    "txn-zipfian-alpha": ZIPFIAN_ALPHA,
+    "ra-query-indexes": QUERIES_STR,
+    "ra-query-bank-file": "../../../workloads/chbenchmark/queries.sql",
+  },
+)
diff --git a/experiments/17-chbenchmark/debug/debug_config.yml b/experiments/17-chbenchmark/debug/debug_config.yml
@@ -6,7 +6,7 @@
 # listen on successive ports (e.g., 6584, 6585, etc.).
 front_end_interface: "0.0.0.0"
 front_end_port: 6583
-num_front_ends: 1
+num_front_ends: 2
 
 # If installed and enabled, BRAD will serve its UI from a webserver that listens
 # for connections on this network interface and port.
@@ -42,7 +42,7 @@ front_end_query_latency_buffer_size: 100
 
 # `default` means to use the policy encoded in the blueprint. Other values will
 # override the blueprint.
-routing_policy: always_aurora
+routing_policy: default
 
 # Whether to disable table movement for benchmark purposes (i.e., keep all
 # tables on all engines.)
@@ -104,6 +104,8 @@ txn_latency_p90_ceiling_s: 0.030
 # clusters instead of resizing the main Redshift cluster.
 use_preset_redshift_clusters: false
 
+result_row_limit: 10
+
 # Used for ordering blueprints during planning.
 comparator:
   type: benefit_perf_ceiling  # or `perf_ceiling`
@@ -119,10 +121,8 @@ comparator:
 
 # Used for precomputed predictions.
 std_datasets:
-  - name: regular
-    path: workloads/IMDB_100GB/regular_test/
-  - name: adhoc
-    path: workloads/IMDB_100GB/adhoc_test/
+  - name: chbenchmark
+    path: workloads/chbenchmark/
 
 # Blueprint planning trigger configs.
 

diff --git a/experiments/17-chbenchmark/debug/run_full.sh b/experiments/17-chbenchmark/debug/run_full.sh
@@ -0,0 +1,25 @@
+#! /bin/bash
+
+script_loc=$(cd $(dirname $0) && pwd -P)
+cd $script_loc
+source ../common.sh
+extract_named_arguments $@
+
+# Resolve paths into absolute paths
+abs_txn_config_file=$(realpath $txn_config_file)
+abs_system_config_file=$(realpath $system_config_file)
+abs_physical_config_file=$(realpath $physical_config_file)
+
+export BRAD_IGNORE_BLUEPRINT=1
+start_brad $abs_system_config_file $abs_physical_config_file
+
+sleep 30
+
+run_tpcc "t_1"
+start_repeating_olap_runner 1 10 5 $ra_query_indexes "ch_1" $t_clients
+ra_pid=$runner_pid
+
+sleep $run_for_s
+
+# Shut down.
+graceful_shutdown $tpcc_pid $ra_pid
diff --git a/experiments/17-chbenchmark/debug/set_up_starting_blueprint.sh b/experiments/17-chbenchmark/debug/set_up_starting_blueprint.sh
@@ -0,0 +1,20 @@
+#! /bin/bash
+
+if [ -z $1 ]; then
+  >&2 echo "Usage: $0 path/to/physical/config.yml"
+  exit 1
+fi
+
+script_loc=$(cd $(dirname $0) && pwd -P)
+cd $script_loc
+source ../common.sh
+
+python3 ../../../workloads/IMDB_extended/set_up_starting_blueprint.py \
+  --schema-name chbenchmark \
+  --query-bank-file ../../../workloads/chbenchmark/queries.sql \
+  --redshift-queries "0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21" \
+  --redshift-provisioning "dc2.large:16" \
+  --aurora-provisioning "db.r6g.xlarge:1" \
+  --system-config-file debug_config.yml \
+  --physical-config-file $1 \
+  --override-definite-routing redshift
diff --git a/experiments/17-chbenchmark/scale_down/COND b/experiments/17-chbenchmark/scale_down/COND
@@ -0,0 +1,24 @@
+ZIPFIAN_ALPHA = 5.0
+
+# Query indices.
+QUERIES = list(range(22))
+QUERIES_STR = ",".join([str(v) for v in QUERIES])
+
+run_experiment(
+  name="run_full",
+  run="./run_full.sh",
+  options={
+    "physical-config-file": "../../../config/physical_config_chbench.yml",
+    "system-config-file": "ch_scale_down_config.yml",  # Relative to one level up.
+    "schema-name": "chbenchmark",
+    "txn-config-file": "brad.config",
+    "txn-warehouses": 1740,
+    "txn-scale-factor": 1,  # TBD
+    "t-clients": 1,  # TBD
+    "num-front-ends": 2, # TBD
+    "run-for-s": 2 * 60 * 60,  # 2 hours
+    "txn-zipfian-alpha": ZIPFIAN_ALPHA,
+    "ra-query-indexes": QUERIES_STR,
+    "ra-query-bank-file": "../../../workloads/chbenchmark/queries.sql",
+  },
+)
diff --git a/experiments/17-chbenchmark/scale_down/brad.config b/experiments/17-chbenchmark/scale_down/brad.config
@@ -0,0 +1,6 @@
+# BradDriver Configuration File
+[brad]
+host                 = localhost
+port                 = 6583
+isolation_level      = REPEATABLE READ
+use_worker_offset    = true
diff --git a/experiments/17-chbenchmark/scale_down/ch_scale_down_config.yml b/experiments/17-chbenchmark/scale_down/ch_scale_down_config.yml
@@ -0,0 +1,167 @@
+# This file contains configurations that are used by BRAD. These are default
+# values and should be customized for specific situations.
+
+# BRAD's front end servers will listen for client connections on this interface
+# and port. If `num_front_ends` is greater than one, subsequent front ends will
+# listen on successive ports (e.g., 6584, 6585, etc.).
+front_end_interface: "0.0.0.0"
+front_end_port: 6583
+num_front_ends: 2
+
+# If installed and enabled, BRAD will serve its UI from a webserver that listens
+# for connections on this network interface and port.
+ui_interface: "0.0.0.0"
+ui_port: 7583
+
+# Logging paths. If the value is in ALL_CAPS (with underscores), it is
+# interpreted as an environment variable (BRAD will log to the path stored in
+# the environment variable).
+
+# Where BRAD's daemon process will write its logs.
+daemon_log_file: COND_OUT
+
+# Where BRAD's front end processes will write their logs.
+front_end_log_path: COND_OUT
+
+# Where BRAD's blueprint planner will write debug logs.
+planner_log_path: COND_OUT
+
+# Where BRAD's metrics loggers will write their logs.
+metrics_log_path: COND_OUT
+
+# Probability that each transactional query will be logged.
+txn_log_prob: 0.10
+
+# Set to a non-zero value enable automatic data syncing. When this is set to 0,
+# automatic syncing is disabled.
+data_sync_period_seconds: 0
+
+# BRAD's front end servers will report their metrics at regular intervals.
+front_end_metrics_reporting_period_seconds: 30
+front_end_query_latency_buffer_size: 100
+
+# `default` means to use the policy encoded in the blueprint. Other values will
+# override the blueprint.
+routing_policy: default
+
+# Whether to disable table movement for benchmark purposes (i.e., keep all
+# tables on all engines.)
+disable_table_movement: true
+
+# Epoch length for metrics and forecasting. This is the granularity at which
+# metrics/forecasting will be performed.
+epoch_length:
+  weeks: 0
+  days: 0
+  hours: 0
+  minutes: 1
+
+# Blueprint planning strategy.
+strategy: fp_query_based_beam
+
+# Used to specify the period of time over which to use data for planning.
+# Currrently, this is a "look behind" window for the workload.
+planning_window:
+  weeks: 0
+  days: 0
+  hours: 1
+  minutes: 0
+
+# Used to aggregate metrics collected in the planning window.
+metrics_agg:
+  method: ewm         # 'mean' is another option
+  alpha: 0.86466472   # 1 - 1 / e^2
+
+# Used during planning.
+reinterpret_second_as: 1
+
+# The query distribution must change by at least this much for a new blueprint
+# to be accepted.
+query_dist_change_frac: 0.1
+
+# The search bound for the provisioning.
+max_provisioning_multiplier: 2.5
+
+# Flag options for blueprint planning.
+use_io_optimized_aurora: true
+use_recorded_routing_if_available: true
+ensure_tables_together_on_one_engine: true
+
+# Loads used to prime the system when no information is available.
+aurora_initialize_load_fraction: 0.25
+redshift_initialize_load_fraction: 0.25
+
+# BRAD will not reduce predicted load lower than these values. Raise these
+# values to be more conservative against mispredictions.
+aurora_min_load_removal_fraction: 0.8
+redshift_min_load_removal_fraction: 0.8
+
+# Blueprint planning performance ceilings.
+query_latency_p90_ceiling_s: 360.0
+txn_latency_p90_ceiling_s: 0.080
+
+# If set to true, BRAD will attempt to use the specified preset Redshift
+# clusters instead of resizing the main Redshift cluster.
+use_preset_redshift_clusters: false
+
+result_row_limit: 10
+
+# Used for ordering blueprints during planning.
+comparator:
+  type: benefit_perf_ceiling  # or `perf_ceiling`
+
+  benefit_horizon:  # Only used by the `benefit_perf_ceiling` comparator
+    weeks: 0
+    days: 0
+    hours: 24
+    minutes: 0
+
+  penalty_threshold: 0.8  # Only used by the `benefit_perf_ceiling` comparator
+  penalty_power: 8  # Only used by the `benefit_perf_ceiling` comparator
+
+# Used for precomputed predictions.
+std_datasets:
+  - name: chbenchmark
+    path: workloads/chbenchmark/
+
+# Blueprint planning trigger configs.
+
+triggers:
+  enabled: false
+  check_period_s: 90  # Triggers are checked every X seconds.
+  check_period_offset_s: 360  # Wait 6 mins before starting.
+
+  # Triggers will not fire for at least this many minutes after a new blueprint
+  # takes effect. Usually this should be greater than zero to give BRAD
+  # sufficient time to observe the effect of the blueprint on the workload. BRAD
+  # may wait longer to ensure metrics are also available for this many minutes.
+  observe_new_blueprint_mins: 5
+
+  elapsed_time:
+    disabled: true
+    multiplier: 60  # Multiplier over `planning_window`.
+
+  redshift_cpu:
+    lo: 15
+    hi: 85
+    sustained_epochs: 3
+
+  aurora_cpu:
+    lo: 15
+    hi: 85
+    sustained_epochs: 3
+
+  variable_costs:
+    disabled: true
+    threshold: 1.0
+
+  query_latency_ceiling:
+    ceiling_s: 360.0
+    sustained_epochs: 3
+
+  txn_latency_ceiling:
+    ceiling_s: 0.080
+    sustained_epochs: 3
+
+  recent_change:
+    delay_epochs: 5