Skip to content

Commit

Permalink
Add experiment configs for CH-BenCHmark scenario (#509)
Browse files Browse the repository at this point in the history
Part of #487.
  • Loading branch information
geoffxy authored May 10, 2024
1 parent cf35b38 commit ccd45c4
Show file tree
Hide file tree
Showing 16 changed files with 457 additions and 17 deletions.
58 changes: 57 additions & 1 deletion experiments/17-chbenchmark/common.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ function start_brad() {
}

function run_tpcc() {
local results_name=$1
pushd ../../../workloads/chbenchmark/py-tpcc/
local args=(
--no-load
Expand All @@ -25,11 +26,66 @@ function run_tpcc() {
if [[ ! -z $txn_zipfian_alpha ]]; then
args+=(--zipfian-alpha $txn_zipfian_alpha)
fi
RECORD_DETAILED_STATS=1 python3 -m pytpcc.tpcc brad "${args[@]}" &
mkdir -p $COND_OUT/$results_name
RECORD_DETAILED_STATS=1 COND_OUT=$COND_OUT/$results_name python3 -m pytpcc.tpcc brad "${args[@]}" &
tpcc_pid=$!
popd
}

function log_workload_point() {
msg=$1
now=$(date --utc "+%Y-%m-%d %H:%M:%S")
echo "$now,$msg" >> $COND_OUT/points.log
}

function start_repeating_olap_runner() {
local ra_clients=$1
local ra_gap_s=$2
local ra_gap_std_s=$3
local query_indexes=$4
local results_name=$5
local client_offset=$6

local args=(
--num-clients $ra_clients
--num-front-ends $num_front_ends
--query-indexes $query_indexes
--query-bank-file $ra_query_bank_file
--avg-gap-s $ra_gap_s
--avg-gap-std-s $ra_gap_std_s
)

if [[ ! -z $ra_query_frequency_path ]]; then
args+=(--query-frequency-path $ra_query_frequency_path)
fi

if [[ ! -z $client_offset ]]; then
args+=(--client-offset $client_offset)
fi

>&2 echo "[Serial Repeating Analytics] Running with $ra_clients..."
results_dir=$COND_OUT/$results_name
mkdir -p $results_dir

log_workload_point $results_name
COND_OUT=$results_dir python3.11 ../../../workloads/IMDB_extended/run_repeating_analytics_serial.py "${args[@]}" &

# This is a special return value variable that we use.
runner_pid=$!
}

function graceful_shutdown() {
for pid_var in "$@"; do
kill -INT $pid_var
done
for pid_var in "$@"; do
wait $pid_var
done

kill -INT $brad_pid
wait $brad_pid
}

function extract_named_arguments() {
# Evaluates any environment variables in this script's arguments. This script
# should only be run on trusted input.
Expand Down
25 changes: 25 additions & 0 deletions experiments/17-chbenchmark/debug/COND
Original file line number Diff line number Diff line change
Expand Up @@ -81,3 +81,28 @@ run_experiment(
"txn-zipfian-alpha": ZIPFIAN_ALPHA,
},
)

# Query indices.
QUERIES = list(range(22))
QUERIES.remove(4)
QUERIES.remove(13)
QUERIES_STR = ",".join([str(v) for v in QUERIES])

run_experiment(
name="run_full",
run="./run_full.sh",
options={
"physical-config-file": "../../../config/physical_config_chbench.yml",
"system-config-file": "debug_config.yml", # Relative to one level up.
"schema-name": "chbenchmark",
"txn-config-file": "brad.config",
"txn-warehouses": 1740,
"txn-scale-factor": 1, # TBD
"t-clients": 1, # TBD
"num-front-ends": 2, # TBD
"run-for-s": 60 * 60, # One hour
"txn-zipfian-alpha": ZIPFIAN_ALPHA,
"ra-query-indexes": QUERIES_STR,
"ra-query-bank-file": "../../../workloads/chbenchmark/queries.sql",
},
)
12 changes: 6 additions & 6 deletions experiments/17-chbenchmark/debug/debug_config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
# listen on successive ports (e.g., 6584, 6585, etc.).
front_end_interface: "0.0.0.0"
front_end_port: 6583
num_front_ends: 1
num_front_ends: 2

# If installed and enabled, BRAD will serve its UI from a webserver that listens
# for connections on this network interface and port.
Expand Down Expand Up @@ -42,7 +42,7 @@ front_end_query_latency_buffer_size: 100

# `default` means to use the policy encoded in the blueprint. Other values will
# override the blueprint.
routing_policy: always_aurora
routing_policy: default

# Whether to disable table movement for benchmark purposes (i.e., keep all
# tables on all engines.)
Expand Down Expand Up @@ -104,6 +104,8 @@ txn_latency_p90_ceiling_s: 0.030
# clusters instead of resizing the main Redshift cluster.
use_preset_redshift_clusters: false

result_row_limit: 10

# Used for ordering blueprints during planning.
comparator:
type: benefit_perf_ceiling # or `perf_ceiling`
Expand All @@ -119,10 +121,8 @@ comparator:

# Used for precomputed predictions.
std_datasets:
- name: regular
path: workloads/IMDB_100GB/regular_test/
- name: adhoc
path: workloads/IMDB_100GB/adhoc_test/
- name: chbenchmark
path: workloads/chbenchmark/

# Blueprint planning trigger configs.

Expand Down
25 changes: 25 additions & 0 deletions experiments/17-chbenchmark/debug/run_full.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#! /bin/bash

script_loc=$(cd $(dirname $0) && pwd -P)
cd $script_loc
source ../common.sh
extract_named_arguments $@

# Resolve paths into absolute paths
abs_txn_config_file=$(realpath $txn_config_file)
abs_system_config_file=$(realpath $system_config_file)
abs_physical_config_file=$(realpath $physical_config_file)

export BRAD_IGNORE_BLUEPRINT=1
start_brad $abs_system_config_file $abs_physical_config_file

sleep 30

run_tpcc "t_1"
start_repeating_olap_runner 1 10 5 $ra_query_indexes "ch_1" $t_clients
ra_pid=$runner_pid

sleep $run_for_s

# Shut down.
graceful_shutdown $tpcc_pid $ra_pid
20 changes: 20 additions & 0 deletions experiments/17-chbenchmark/debug/set_up_starting_blueprint.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#! /bin/bash

if [ -z $1 ]; then
>&2 echo "Usage: $0 path/to/physical/config.yml"
exit 1
fi

script_loc=$(cd $(dirname $0) && pwd -P)
cd $script_loc
source ../common.sh

python3 ../../../workloads/IMDB_extended/set_up_starting_blueprint.py \
--schema-name chbenchmark \
--query-bank-file ../../../workloads/chbenchmark/queries.sql \
--redshift-queries "0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21" \
--redshift-provisioning "dc2.large:16" \
--aurora-provisioning "db.r6g.xlarge:1" \
--system-config-file debug_config.yml \
--physical-config-file $1 \
--override-definite-routing redshift
24 changes: 24 additions & 0 deletions experiments/17-chbenchmark/scale_down/COND
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
ZIPFIAN_ALPHA = 5.0

# Query indices.
QUERIES = list(range(22))
QUERIES_STR = ",".join([str(v) for v in QUERIES])

run_experiment(
name="run_full",
run="./run_full.sh",
options={
"physical-config-file": "../../../config/physical_config_chbench.yml",
"system-config-file": "ch_scale_down_config.yml", # Relative to one level up.
"schema-name": "chbenchmark",
"txn-config-file": "brad.config",
"txn-warehouses": 1740,
"txn-scale-factor": 1, # TBD
"t-clients": 1, # TBD
"num-front-ends": 2, # TBD
"run-for-s": 2 * 60 * 60, # 2 hours
"txn-zipfian-alpha": ZIPFIAN_ALPHA,
"ra-query-indexes": QUERIES_STR,
"ra-query-bank-file": "../../../workloads/chbenchmark/queries.sql",
},
)
6 changes: 6 additions & 0 deletions experiments/17-chbenchmark/scale_down/brad.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# BradDriver Configuration File
[brad]
host = localhost
port = 6583
isolation_level = REPEATABLE READ
use_worker_offset = true
167 changes: 167 additions & 0 deletions experiments/17-chbenchmark/scale_down/ch_scale_down_config.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
# This file contains configurations that are used by BRAD. These are default
# values and should be customized for specific situations.

# BRAD's front end servers will listen for client connections on this interface
# and port. If `num_front_ends` is greater than one, subsequent front ends will
# listen on successive ports (e.g., 6584, 6585, etc.).
front_end_interface: "0.0.0.0"
front_end_port: 6583
num_front_ends: 2

# If installed and enabled, BRAD will serve its UI from a webserver that listens
# for connections on this network interface and port.
ui_interface: "0.0.0.0"
ui_port: 7583

# Logging paths. If the value is in ALL_CAPS (with underscores), it is
# interpreted as an environment variable (BRAD will log to the path stored in
# the environment variable).

# Where BRAD's daemon process will write its logs.
daemon_log_file: COND_OUT

# Where BRAD's front end processes will write their logs.
front_end_log_path: COND_OUT

# Where BRAD's blueprint planner will write debug logs.
planner_log_path: COND_OUT

# Where BRAD's metrics loggers will write their logs.
metrics_log_path: COND_OUT

# Probability that each transactional query will be logged.
txn_log_prob: 0.10

# Set to a non-zero value enable automatic data syncing. When this is set to 0,
# automatic syncing is disabled.
data_sync_period_seconds: 0

# BRAD's front end servers will report their metrics at regular intervals.
front_end_metrics_reporting_period_seconds: 30
front_end_query_latency_buffer_size: 100

# `default` means to use the policy encoded in the blueprint. Other values will
# override the blueprint.
routing_policy: default

# Whether to disable table movement for benchmark purposes (i.e., keep all
# tables on all engines.)
disable_table_movement: true

# Epoch length for metrics and forecasting. This is the granularity at which
# metrics/forecasting will be performed.
epoch_length:
weeks: 0
days: 0
hours: 0
minutes: 1

# Blueprint planning strategy.
strategy: fp_query_based_beam

# Used to specify the period of time over which to use data for planning.
# Currrently, this is a "look behind" window for the workload.
planning_window:
weeks: 0
days: 0
hours: 1
minutes: 0

# Used to aggregate metrics collected in the planning window.
metrics_agg:
method: ewm # 'mean' is another option
alpha: 0.86466472 # 1 - 1 / e^2

# Used during planning.
reinterpret_second_as: 1

# The query distribution must change by at least this much for a new blueprint
# to be accepted.
query_dist_change_frac: 0.1

# The search bound for the provisioning.
max_provisioning_multiplier: 2.5

# Flag options for blueprint planning.
use_io_optimized_aurora: true
use_recorded_routing_if_available: true
ensure_tables_together_on_one_engine: true

# Loads used to prime the system when no information is available.
aurora_initialize_load_fraction: 0.25
redshift_initialize_load_fraction: 0.25

# BRAD will not reduce predicted load lower than these values. Raise these
# values to be more conservative against mispredictions.
aurora_min_load_removal_fraction: 0.8
redshift_min_load_removal_fraction: 0.8

# Blueprint planning performance ceilings.
query_latency_p90_ceiling_s: 360.0
txn_latency_p90_ceiling_s: 0.080

# If set to true, BRAD will attempt to use the specified preset Redshift
# clusters instead of resizing the main Redshift cluster.
use_preset_redshift_clusters: false

result_row_limit: 10

# Used for ordering blueprints during planning.
comparator:
type: benefit_perf_ceiling # or `perf_ceiling`

benefit_horizon: # Only used by the `benefit_perf_ceiling` comparator
weeks: 0
days: 0
hours: 24
minutes: 0

penalty_threshold: 0.8 # Only used by the `benefit_perf_ceiling` comparator
penalty_power: 8 # Only used by the `benefit_perf_ceiling` comparator

# Used for precomputed predictions.
std_datasets:
- name: chbenchmark
path: workloads/chbenchmark/

# Blueprint planning trigger configs.

triggers:
enabled: false
check_period_s: 90 # Triggers are checked every X seconds.
check_period_offset_s: 360 # Wait 6 mins before starting.

# Triggers will not fire for at least this many minutes after a new blueprint
# takes effect. Usually this should be greater than zero to give BRAD
# sufficient time to observe the effect of the blueprint on the workload. BRAD
# may wait longer to ensure metrics are also available for this many minutes.
observe_new_blueprint_mins: 5

elapsed_time:
disabled: true
multiplier: 60 # Multiplier over `planning_window`.

redshift_cpu:
lo: 15
hi: 85
sustained_epochs: 3

aurora_cpu:
lo: 15
hi: 85
sustained_epochs: 3

variable_costs:
disabled: true
threshold: 1.0

query_latency_ceiling:
ceiling_s: 360.0
sustained_epochs: 3

txn_latency_ceiling:
ceiling_s: 0.080
sustained_epochs: 3

recent_change:
delay_epochs: 5
Loading

0 comments on commit ccd45c4

Please sign in to comment.