Skip to content

Commit

Permalink
Adding chbench
Browse files Browse the repository at this point in the history
  • Loading branch information
amlatyrngom committed May 13, 2024
1 parent 6891b5f commit 2948b5e
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 14 deletions.
8 changes: 4 additions & 4 deletions load_baseline.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,9 +129,9 @@ def main():
with open("good_adhoc_queries.sql", "w", encoding="utf-8") as f:
f.write("\n".join(res))

if __name__ == "__main__":
main()
sys.exit(0)
# if __name__ == "__main__":
# main()
# sys.exit(0)

import yaml

Expand Down Expand Up @@ -164,7 +164,7 @@ def index_definition(table_name, index_columns):


def yaml_main():
with open("config/schemas/chbenchmark.yml", "r", encoding="utf-8") as f:
with open("config/schemas/imdb_extended.yml", "r", encoding="utf-8") as f:
tables = yaml.safe_load(f)
print(f"Tables: {tables}")

Expand Down
29 changes: 19 additions & 10 deletions workloads/IMDB_extended/workload_utils/baseline.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,15 +151,23 @@ def __init__(self, config_file="config/baseline.yml"):
cur.execute("SET GLOBAL local_infile = 1;")
self.conn.commit()

def manually_copy_s3_data(self, table_names, source_dir="chbenchmark"):
def manually_copy_s3_data(self, table_names, source_dir="chbenchmark", source_bucket="geoffxy-research", source_ext="tbl"):
s3 = boto3.resource("s3")
for t in table_names:
source_key = f"{source_dir}/{t}/{t}.tbl"
source_key = f"{source_dir}/{t}/{t}.{source_ext}"
target_key = f"{source_dir}/test.{t}.csv"
copy_source = {"Bucket": "geoffxy-research", "Key": source_key}
# copy_source = {"Bucket": "geoffxy-research", "Key": source_key}
copy_source = {"Bucket": source_bucket, "Key": source_key}
print(f"Copying {t}")
start_t = time.perf_counter()
s3.meta.client.copy(copy_source, self.s3_bucket, target_key)
try:
s3.meta.client.copy(copy_source, self.s3_bucket, target_key)
except Exception as e:
e = f"{e}"
if "Not Found" in e:
source_key = f"{source_dir}/{t}/test.{t}.{source_ext}"
copy_source = {"Bucket": source_bucket, "Key": source_key}
s3.meta.client.copy(copy_source, self.s3_bucket, target_key)
print(f"Copied {t} in {time.perf_counter() - start_t:.2f} secs")

def fetch_metrics(self, start_time=None, end_time=None):
Expand All @@ -169,9 +177,9 @@ def load_database(self, schema_file, table_names):
with open(schema_file, "r", encoding="utf-8") as f:
schema = f.read()
self.submit_query(schema, until_success=True)
for t in table_names:
replica_cmd = f"ALTER TABLE {t} SET TIFLASH REPLICA 1"
self.submit_query(replica_cmd, until_success=True)
# for t in table_names:
# replica_cmd = f"ALTER TABLE {t} SET TIFLASH REPLICA 1"
# self.submit_query(replica_cmd, until_success=True)

# print("Creating Indexes")
# indexes_sql = load_schema_sql(dataset, "indexes.sql")
Expand Down Expand Up @@ -564,9 +572,10 @@ def run_query_with_results(self, sql: str):

if __name__ == "__main__":
baseline = TiDBLoader()
chtables = ['warehouse', 'item', 'stock', 'district', 'customer', 'history', 'orders', 'new_order', 'order_line', 'region', 'nation', 'supplier']
# baseline.load_database("tables.sql", chtables)
baseline.manually_copy_s3_data(chtables, source_dir="chbenchmark")
imtables = ['homes', 'theatres', 'showings', 'ticket_orders', 'aka_name', 'aka_title', 'cast_info', 'char_name', 'comp_cast_type', 'company_name', 'company_type', 'complete_cast', 'info_type', 'keyword', 'kind_type', 'link_type', 'movie_companies', 'movie_info_idx', 'movie_keyword', 'movie_link', 'name', 'role_type', 'title', 'movie_info', 'person_info']
# chtables = ['warehouse', 'item', 'stock', 'district', 'customer', 'history', 'orders', 'new_order', 'order_line', 'region', 'nation', 'supplier']
baseline.load_database("tables.sql", imtables)
# baseline.manually_copy_s3_data(imtables, source_bucket="brad-personal-data", source_dir="imdb_extended", source_ext="csv")
# baseline.manual_unload("imdb_extended", do_unload=False, start_chunk=-1, end_chunk=-1)
# baseline.manual_count_all("imdb_extended")
# import sys
Expand Down
3 changes: 3 additions & 0 deletions workloads/chbenchmark/py-tpcc/pytpcc/tpcc.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
from .runtime import *
from .drivers.auroradriver import AuroraDriver
from .drivers.braddriver import BradDriver
from .drivers.tidbdriver import TiDBDriver

logging.basicConfig(
level=logging.INFO,
Expand All @@ -61,6 +62,8 @@ def createDriverClass(name):
return BradDriver
elif name == "aurora":
return AuroraDriver
elif name == "tidb":
return TiDBDriver
else:
raise NotImplementedError

Expand Down

0 comments on commit 2948b5e

Please sign in to comment.