Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add tooling to gather TPC-C transactional data #500

Merged
merged 8 commits into from
Apr 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 44 additions & 0 deletions tools/calibration/transactions/chbenchmark/COND
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
RUN_FOR_S = 3 * 60 + 30 # 3 minutes 30 seconds
NUM_CLIENTS = {
"db.r6g.xlarge": [1, 2, 4, 8, 16, 32, 64],
"db.r6g.2xlarge": [1, 2, 4, 8, 16, 32, 64, 128],
"db.r6g.4xlarge": [1, 2, 4, 8, 16, 32, 64, 128, 256],
"db.r6g.large": [1, 2, 4, 8, 16, 32],
"db.t4g.medium": [1, 2, 4, 8, 16, 32],
}

INSTANCES = list(NUM_CLIENTS.keys())

COND_INSTANCES = {
instance: instance.replace(".", "_").replace("db.", "") for instance in INSTANCES
}

combine(
name="all",
deps=[
":{}".format(COND_INSTANCES[instance])
for instance in INSTANCES
],
)

for instance in INSTANCES:
run_experiment_group(
name=COND_INSTANCES[instance],
run="./run_instance.sh",
experiments=[
ExperimentInstance(
name="{}-{}".format(COND_INSTANCES[instance], clients),
options={
"t-clients": clients,
"run-for-s": RUN_FOR_S,
"system-config-file": "system_config_chbench.yml",
"physical-config-file": "../../../../config/physical_config_chbench.yml",
"txn-warehouses": 1740,
"txn-config-file": "aurora.config",
"schema-name": "chbenchmark",
"instance": instance,
},
)
for clients in NUM_CLIENTS[instance]
],
)
117 changes: 117 additions & 0 deletions tools/calibration/transactions/chbenchmark/retrieve_metrics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
import argparse
import asyncio
from datetime import timedelta
from typing import List

from brad.config.file import ConfigFile
from brad.provisioning.directory import Directory
from brad.daemon.metrics_def import MetricDef
from brad.daemon.perf_insights import PerfInsightsClient


BASE_METRICS = [
"os.loadAverageMinute.one",
"os.loadAverageMinute.five",
"os.loadAverageMinute.fifteen",
"os.cpuUtilization.system",
"os.cpuUtilization.total",
"os.cpuUtilization.user",
"os.diskIO.avgQueueLen",
"os.diskIO.tps",
"os.diskIO.util",
"os.diskIO.readIOsPS",
"os.diskIO.readKbPS",
"os.diskIO.writeIOsPS",
"os.diskIO.writeKbPS",
"os.network.rx",
"os.network.tx",
"os.memory.active",
"os.memory.dirty",
"os.memory.free",
"os.memory.writeback",
"os.memory.total",
"os.tasks.blocked",
"os.tasks.running",
"os.tasks.sleeping",
"os.tasks.stopped",
"os.tasks.total",
"db.SQL.queries",
"db.SQL.total_query_time",
"db.SQL.tup_deleted",
"db.SQL.tup_fetched",
"db.SQL.tup_inserted",
"db.SQL.tup_returned",
"db.SQL.tup_updated",
"db.Transactions.active_transactions",
"db.Transactions.blocked_transactions",
"db.Transactions.duration_commits",
"db.Transactions.xact_commit",
"db.Transactions.xact_rollback",
# NOTE: Aurora has specific storage metrics (probably because they use a custom storage engine)
# https://docs.aws.amazon.com/AmazonRDS/latest/AuroraUserGuide/USER_PerfInsights_Counters.html#USER_PerfInsights_Counters.Aurora_PostgreSQL
"os.diskIO.auroraStorage.auroraStorageBytesRx",
"os.diskIO.auroraStorage.auroraStorageBytesTx",
"os.diskIO.auroraStorage.diskQueueDepth",
"os.diskIO.auroraStorage.readThroughput",
"os.diskIO.auroraStorage.writeThroughput",
"os.diskIO.auroraStorage.readLatency",
"os.diskIO.auroraStorage.writeLatency",
"os.diskIO.auroraStorage.readIOsPS",
"os.diskIO.auroraStorage.writeIOsPS",
]

ALL_METRICS: List[MetricDef] = []
for m in BASE_METRICS:
# N.B. The metrics are reported no more than once a minute. So
# average/max/min will all report the same number.
ALL_METRICS.append((m, "avg"))


def main():
parser = argparse.ArgumentParser()
parser.add_argument(
"--instance-id",
type=str,
help="The Aurora instance's identifier.",
)
parser.add_argument(
"--physical-config-file",
type=str,
help="Used to specify the Aurora instance instead of by an ID.",
)
parser.add_argument(
"--out-file",
type=str,
required=True,
help="The path where the results should be saved.",
)
parser.add_argument(
"--num-prev-points",
type=int,
default=60,
help="The number of metric data points to retrieve.",
)
args = parser.parse_args()

if args.instance_id is not None:
client = PerfInsightsClient.from_instance_identifier(
instance_identifier=args.instance_id
)
elif args.physical_config_file is not None:
config = ConfigFile.load_from_physical_config(args.physical_config_file)
directory = Directory(config)
asyncio.run(directory.refresh())
client = PerfInsightsClient(resource_id=directory.aurora_writer().resource_id())
else:
raise RuntimeError()

metrics = client.fetch_metrics(
ALL_METRICS,
period=timedelta(minutes=1),
num_prev_points=args.num_prev_points,
)
metrics.to_csv(args.out_file)


if __name__ == "__main__":
main()
84 changes: 84 additions & 0 deletions tools/calibration/transactions/chbenchmark/run_instance.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
#! /bin/bash

function extract_named_arguments() {
# Evaluates any environment variables in this script's arguments. This script
# should only be run on trusted input.
orig_args=($@)
for val in "${orig_args[@]}"; do
phys_arg=$(eval "echo $val")

if [[ $phys_arg =~ --t-clients=.+ ]]; then
t_clients=${phys_arg:12}
fi

if [[ $phys_arg =~ --run-for-s=.+ ]]; then
run_for_s=${phys_arg:12}
fi

if [[ $phys_arg =~ --system-config-file=.+ ]]; then
system_config_file=${phys_arg:21}
fi

if [[ $phys_arg =~ --physical-config-file=.+ ]]; then
physical_config_file=${phys_arg:23}
fi

if [[ $phys_arg =~ --txn-warehouses=.+ ]]; then
txn_warehouses=${phys_arg:17}
fi

if [[ $phys_arg =~ --txn-config-file=.+ ]]; then
txn_config_file=${phys_arg:18}
fi

if [[ $phys_arg =~ --schema-name=.+ ]]; then
schema_name=${phys_arg:14}
fi

if [[ $phys_arg =~ --instance=.+ ]]; then
instance=${phys_arg:11}
fi
done
}

script_loc=$(cd $(dirname $0) && pwd -P)
cd $script_loc
extract_named_arguments $@

abs_txn_config_file=$(realpath $txn_config_file)
abs_physical_config_file=$(realpath $physical_config_file)
abs_system_config_file=$(realpath $system_config_file)

>&2 echo "Adjusting blueprint"
brad admin --debug modify_blueprint \
--schema-name $schema_name \
--physical-config-file $abs_physical_config_file \
--system-config-file $abs_system_config_file \
--aurora-instance-type $instance \
--aurora-num-nodes 1

>&2 echo "Waiting 30 seconds before retrieving pre-metrics..."
sleep 30

>&2 echo "Retrieving pre-metrics..."
python3 retrieve_metrics.py --out-file $COND_OUT/pi_metrics_before.csv --physical-config-file $abs_physical_config_file

>&2 echo "Running the transactional workload..."

# We run against Aurora directly.
pushd ../../../../workloads/chbenchmark/py-tpcc/
RECORD_DETAILED_STATS=1 python3 -m pytpcc.tpcc aurora \
--no-load \
--config $abs_txn_config_file \
--warehouses $txn_warehouses \
--duration $run_for_s \
--clients $t_clients \
--scalefactor 1 \
--lat-sample-prob 0.25
popd

>&2 echo "Waiting 10 seconds before retrieving metrics..."
sleep 10

>&2 echo "Retrieving metrics..."
python3 retrieve_metrics.py --out-file $COND_OUT/pi_metrics.csv --physical-config-file $abs_physical_config_file
Loading