From 940749a795b0bb33baf46c469a45715bb54af6c6 Mon Sep 17 00:00:00 2001 From: Geoffrey Yu Date: Sat, 13 Apr 2024 20:16:05 -0400 Subject: [PATCH] Check in TPC-C runner and add BRAD driver (#490) * Check in a fork of the py-tpcc code * Python 2 to 3 fixes * Black reformat * Implement BradDriver with delivery and new_order * Implement order_status, payment, and stock_level --- workloads/chbenchmark/py-tpcc/README.md | 32 + workloads/chbenchmark/py-tpcc/README.original | 52 + .../chbenchmark/py-tpcc/pytpcc/.gitignore | 3 + .../chbenchmark/py-tpcc/pytpcc/CONFIG_EXAMPLE | 17 + .../chbenchmark/py-tpcc/pytpcc/README_v1.1 | 8 + .../chbenchmark/py-tpcc/pytpcc/__init__.py | 1 + .../chbenchmark/py-tpcc/pytpcc/constants.py | 168 +++ .../chbenchmark/py-tpcc/pytpcc/coordinator.py | 277 ++++ .../py-tpcc/pytpcc/drivers/__init__.py | 1 + .../py-tpcc/pytpcc/drivers/abstractdriver.py | 189 +++ .../py-tpcc/pytpcc/drivers/braddriver.py | 469 +++++++ .../py-tpcc/pytpcc/drivers/couchdbdriver.py | 1198 +++++++++++++++++ .../py-tpcc/pytpcc/drivers/csvdriver.py | 108 ++ .../py-tpcc/pytpcc/drivers/mongodbdriver.py | 1031 ++++++++++++++ .../py-tpcc/pytpcc/drivers/sqlitedriver.py | 515 +++++++ .../chbenchmark/py-tpcc/pytpcc/message.py | 54 + .../py-tpcc/pytpcc/runtime/__init__.py | 3 + .../py-tpcc/pytpcc/runtime/executor.py | 316 +++++ .../py-tpcc/pytpcc/runtime/loader.py | 518 +++++++ workloads/chbenchmark/py-tpcc/pytpcc/tpcc.py | 361 +++++ workloads/chbenchmark/py-tpcc/pytpcc/tpcc.sql | 140 ++ .../py-tpcc/pytpcc/util/__init__.py | 3 + .../chbenchmark/py-tpcc/pytpcc/util/nurand.py | 65 + .../chbenchmark/py-tpcc/pytpcc/util/rand.py | 188 +++ .../py-tpcc/pytpcc/util/results.py | 133 ++ .../py-tpcc/pytpcc/util/scaleparameters.py | 111 ++ .../chbenchmark/py-tpcc/pytpcc/worker.py | 154 +++ workloads/chbenchmark/py-tpcc/setup.py | 27 + 28 files changed, 6142 insertions(+) create mode 100644 workloads/chbenchmark/py-tpcc/README.md create mode 100644 workloads/chbenchmark/py-tpcc/README.original create mode 100644 workloads/chbenchmark/py-tpcc/pytpcc/.gitignore create mode 100644 workloads/chbenchmark/py-tpcc/pytpcc/CONFIG_EXAMPLE create mode 100644 workloads/chbenchmark/py-tpcc/pytpcc/README_v1.1 create mode 100644 workloads/chbenchmark/py-tpcc/pytpcc/__init__.py create mode 100644 workloads/chbenchmark/py-tpcc/pytpcc/constants.py create mode 100755 workloads/chbenchmark/py-tpcc/pytpcc/coordinator.py create mode 100644 workloads/chbenchmark/py-tpcc/pytpcc/drivers/__init__.py create mode 100644 workloads/chbenchmark/py-tpcc/pytpcc/drivers/abstractdriver.py create mode 100644 workloads/chbenchmark/py-tpcc/pytpcc/drivers/braddriver.py create mode 100644 workloads/chbenchmark/py-tpcc/pytpcc/drivers/couchdbdriver.py create mode 100644 workloads/chbenchmark/py-tpcc/pytpcc/drivers/csvdriver.py create mode 100644 workloads/chbenchmark/py-tpcc/pytpcc/drivers/mongodbdriver.py create mode 100644 workloads/chbenchmark/py-tpcc/pytpcc/drivers/sqlitedriver.py create mode 100644 workloads/chbenchmark/py-tpcc/pytpcc/message.py create mode 100644 workloads/chbenchmark/py-tpcc/pytpcc/runtime/__init__.py create mode 100644 workloads/chbenchmark/py-tpcc/pytpcc/runtime/executor.py create mode 100644 workloads/chbenchmark/py-tpcc/pytpcc/runtime/loader.py create mode 100755 workloads/chbenchmark/py-tpcc/pytpcc/tpcc.py create mode 100644 workloads/chbenchmark/py-tpcc/pytpcc/tpcc.sql create mode 100644 workloads/chbenchmark/py-tpcc/pytpcc/util/__init__.py create mode 100644 workloads/chbenchmark/py-tpcc/pytpcc/util/nurand.py create mode 100644 workloads/chbenchmark/py-tpcc/pytpcc/util/rand.py create mode 100644 workloads/chbenchmark/py-tpcc/pytpcc/util/results.py create mode 100644 workloads/chbenchmark/py-tpcc/pytpcc/util/scaleparameters.py create mode 100755 workloads/chbenchmark/py-tpcc/pytpcc/worker.py create mode 100644 workloads/chbenchmark/py-tpcc/setup.py diff --git a/workloads/chbenchmark/py-tpcc/README.md b/workloads/chbenchmark/py-tpcc/README.md new file mode 100644 index 00000000..249060a0 --- /dev/null +++ b/workloads/chbenchmark/py-tpcc/README.md @@ -0,0 +1,32 @@ +# Python TPC-C implementation + +Source: https://github.com/apavlo/py-tpcc at commit +`db36d72dfbb6bd800d257279be9bbc1a22095ff9`. + +See the individual files for the license. The license comment from `tpcc.py` is +shown below. + +> ----------------------------------------------------------------------- +> Copyright (C) 2011 +> Andy Pavlo +> http:##www.cs.brown.edu/~pavlo/ +> +> Permission is hereby granted, free of charge, to any person obtaining +> a copy of this software and associated documentation files (the +> "Software"), to deal in the Software without restriction, including +> without limitation the rights to use, copy, modify, merge, publish, +> distribute, sublicense, and/or sell copies of the Software, and to +> permit persons to whom the Software is furnished to do so, subject to +> the following conditions: +> +> The above copyright notice and this permission notice shall be +> included in all copies or substantial portions of the Software. +> +> THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +> EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +> MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT +> IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +> OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +> ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +> OTHER DEALINGS IN THE SOFTWARE. +> ----------------------------------------------------------------------- diff --git a/workloads/chbenchmark/py-tpcc/README.original b/workloads/chbenchmark/py-tpcc/README.original new file mode 100644 index 00000000..5c5d4b37 --- /dev/null +++ b/workloads/chbenchmark/py-tpcc/README.original @@ -0,0 +1,52 @@ ++ ----------------------------------------------- + ++ Python TPC-C + ++ ----------------------------------------------- + + + +The basic idea is that you will need to create a new driver file that +implements the functions defined in "abstractdriver.py". One function will +load in the tuples into your database for a given table. Then there are five +separate functions that execute the given transaction based on a set of input +parameters. All the work for generating the tuples and the input parameters +for the transactions has been done for you. + +Here's what you need to do to get started: + +(1) Download the source code from Github: + +https://github.com/apavlo/py-tpcc/tree/master/pytpcc + +(2) Create a new file in the 'drivers' directory for your system that follows +the proper naming convention. For example, if your system is 'MongoDB', then +your new file will be called 'mongodbdriver.py' and that file will contain a +new class called 'MongodbDriver' (note the capitalization). + +(3) Inside your class you will need to implement the required functions of +defined in AbstractDriver. There is documentation on what these need to do +also available on Github: + +https://github.com/apavlo/py-tpcc/wiki + +(3) Try running your system. I would start by defining the configuration file +that gets returned with by the 'makeDefaultConfig' function in your driver and +then implement the data loading part first, since that will guide how you +actually execute the transactions. Using 'MongoDB' as an example again, you +can print out the driver's configuration dict to a file: + +$ python ./tpcc.py --print-config mongodb > mongodb.config + +Make any changes you need to 'mongodb.config' (e.g., passwords, hostnames). +Then test the loader: + +$ python ./tpcc.py --no-execute --config=mongodb.config mongodb + +You can use the CSV driver if you want to see what the data or transaction +input parameters will look like. The following command will dump out just the +input to the driver's functions to files in /tmp/tpcc-* + +$ python ./tpcc.py csv + +You can also look at my SqliteDriver implementation to get an idea of what +your transaction implementation functions need to do: + +https://github.com/apavlo/py-tpcc/blob/master/pytpcc/drivers/sqlitedriver.py diff --git a/workloads/chbenchmark/py-tpcc/pytpcc/.gitignore b/workloads/chbenchmark/py-tpcc/pytpcc/.gitignore new file mode 100644 index 00000000..499c008b --- /dev/null +++ b/workloads/chbenchmark/py-tpcc/pytpcc/.gitignore @@ -0,0 +1,3 @@ +*.pyc +.#kate-* +*.config \ No newline at end of file diff --git a/workloads/chbenchmark/py-tpcc/pytpcc/CONFIG_EXAMPLE b/workloads/chbenchmark/py-tpcc/pytpcc/CONFIG_EXAMPLE new file mode 100644 index 00000000..c0054bc0 --- /dev/null +++ b/workloads/chbenchmark/py-tpcc/pytpcc/CONFIG_EXAMPLE @@ -0,0 +1,17 @@ +# HypertableDriver Configuration File +# Created 2011-05-02 01:43:37.859545 +[hypertable] + +# hostname +host = localhost + +# namespace name +namespace = tpcc + +# port +port = 38080 + +#clientnodes splited by spaces +clients =u1 u2 192.168.3.21 +#directories of the code on the client node +path =./code/tpcc/py-tpcc/mtpcc diff --git a/workloads/chbenchmark/py-tpcc/pytpcc/README_v1.1 b/workloads/chbenchmark/py-tpcc/pytpcc/README_v1.1 new file mode 100644 index 00000000..c41e2c1f --- /dev/null +++ b/workloads/chbenchmark/py-tpcc/pytpcc/README_v1.1 @@ -0,0 +1,8 @@ +1. 3 newly added files message.py worker.py coordinator.py. Copy them into the old pytpcc directory. +2. Coordinator is the main part. Use it like the old tpcc.py. e.g., python coordinator.py --config hypertable.config --clientprocs 5 hypertable +3. Old argument --clients is replaced with --clientprocs, which specifies how many worker processes you want to run on each client node. +4. All clientnodes(name or ip) must be specified in the configure file. +5. The directory of the code on the client side should be specified in the configure file,too. The default address is user's home address, which is default using ssh. + It should remain the same for each client node, which is not a problem for now. +6. Execnet python module should be installed on each client. Here is how to install it. http://codespeak.net/execnet/install.html + diff --git a/workloads/chbenchmark/py-tpcc/pytpcc/__init__.py b/workloads/chbenchmark/py-tpcc/pytpcc/__init__.py new file mode 100644 index 00000000..792d6005 --- /dev/null +++ b/workloads/chbenchmark/py-tpcc/pytpcc/__init__.py @@ -0,0 +1 @@ +# diff --git a/workloads/chbenchmark/py-tpcc/pytpcc/constants.py b/workloads/chbenchmark/py-tpcc/pytpcc/constants.py new file mode 100644 index 00000000..52afd4dd --- /dev/null +++ b/workloads/chbenchmark/py-tpcc/pytpcc/constants.py @@ -0,0 +1,168 @@ +# -*- coding: utf-8 -*- +# ----------------------------------------------------------------------- +# Copyright (C) 2011 +# Andy Pavlo +# http://www.cs.brown.edu/~pavlo/ +# +# Original Java Version: +# Copyright (C) 2008 +# Evan Jones +# Massachusetts Institute of Technology +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# ----------------------------------------------------------------------- + +MONEY_DECIMALS = 2 + +# Item constants +NUM_ITEMS = 100000 +MIN_IM = 1 +MAX_IM = 10000 +MIN_PRICE = 1.00 +MAX_PRICE = 100.00 +MIN_I_NAME = 14 +MAX_I_NAME = 24 +MIN_I_DATA = 26 +MAX_I_DATA = 50 + +# Warehouse constants +MIN_TAX = 0 +MAX_TAX = 0.2000 +TAX_DECIMALS = 4 +INITIAL_W_YTD = 300000.00 +MIN_NAME = 6 +MAX_NAME = 10 +MIN_STREET = 10 +MAX_STREET = 20 +MIN_CITY = 10 +MAX_CITY = 20 +STATE = 2 +ZIP_LENGTH = 9 +ZIP_SUFFIX = "11111" + +# Stock constants +MIN_QUANTITY = 10 +MAX_QUANTITY = 100 +DIST = 24 +STOCK_PER_WAREHOUSE = 100000 + +# District constants +DISTRICTS_PER_WAREHOUSE = 10 +INITIAL_D_YTD = 30000.00 # different from Warehouse +INITIAL_NEXT_O_ID = 3001 + +# Customer constants +CUSTOMERS_PER_DISTRICT = 3000 +INITIAL_CREDIT_LIM = 50000.00 +MIN_DISCOUNT = 0.0000 +MAX_DISCOUNT = 0.5000 +DISCOUNT_DECIMALS = 4 +INITIAL_BALANCE = -10.00 +INITIAL_YTD_PAYMENT = 10.00 +INITIAL_PAYMENT_CNT = 1 +INITIAL_DELIVERY_CNT = 0 +MIN_FIRST = 6 +MAX_FIRST = 10 +MIDDLE = "OE" +PHONE = 16 +MIN_C_DATA = 300 +MAX_C_DATA = 500 +GOOD_CREDIT = "GC" +BAD_CREDIT = "BC" + +# Order constants +MIN_CARRIER_ID = 1 +MAX_CARRIER_ID = 10 +# HACK: This is not strictly correct, but it works +NULL_CARRIER_ID = 0 +# o_id < than this value, carrier != null, >= -> carrier == null +NULL_CARRIER_LOWER_BOUND = 2101 +MIN_OL_CNT = 5 +MAX_OL_CNT = 15 +INITIAL_ALL_LOCAL = 1 +INITIAL_ORDERS_PER_DISTRICT = 3000 + +# Used to generate new order transactions +MAX_OL_QUANTITY = 10 + +# Order line constants +INITIAL_QUANTITY = 5 +MIN_AMOUNT = 0.01 + +# History constants +MIN_DATA = 12 +MAX_DATA = 24 +INITIAL_AMOUNT = 10.00 + +# New order constants +INITIAL_NEW_ORDERS_PER_DISTRICT = 900 + +# TPC-C 2.4.3.4 (page 31) says this must be displayed when new order rolls back. +INVALID_ITEM_MESSAGE = "Item number is not valid" + +# Used to generate stock level transactions +MIN_STOCK_LEVEL_THRESHOLD = 10 +MAX_STOCK_LEVEL_THRESHOLD = 20 + +# Used to generate payment transactions +MIN_PAYMENT = 1.0 +MAX_PAYMENT = 5000.0 + +# Indicates "brand" items and stock in i_data and s_data. +ORIGINAL_STRING = "ORIGINAL" + +# Table Names +TABLENAME_ITEM = "ITEM" +TABLENAME_WAREHOUSE = "WAREHOUSE" +TABLENAME_DISTRICT = "DISTRICT" +TABLENAME_CUSTOMER = "CUSTOMER" +TABLENAME_STOCK = "STOCK" +TABLENAME_ORDERS = "ORDERS" +TABLENAME_NEW_ORDER = "NEW_ORDER" +TABLENAME_ORDER_LINE = "ORDER_LINE" +TABLENAME_HISTORY = "HISTORY" + +ALL_TABLES = [ + TABLENAME_ITEM, + TABLENAME_WAREHOUSE, + TABLENAME_DISTRICT, + TABLENAME_CUSTOMER, + TABLENAME_STOCK, + TABLENAME_ORDERS, + TABLENAME_NEW_ORDER, + TABLENAME_ORDER_LINE, + TABLENAME_HISTORY, +] + + +# Transaction Types +def enum(*sequential, **named): + enums = dict(map(lambda x: (x, x), sequential)) + # dict(zip(sequential, range(len(sequential))), **named) + return type("Enum", (), enums) + + +TransactionTypes = enum( + "DELIVERY", + "NEW_ORDER", + "ORDER_STATUS", + "PAYMENT", + "STOCK_LEVEL", +) diff --git a/workloads/chbenchmark/py-tpcc/pytpcc/coordinator.py b/workloads/chbenchmark/py-tpcc/pytpcc/coordinator.py new file mode 100755 index 00000000..11e12341 --- /dev/null +++ b/workloads/chbenchmark/py-tpcc/pytpcc/coordinator.py @@ -0,0 +1,277 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# ----------------------------------------------------------------------- +# Copyright (C) 2011 +# Andy Pavlo & Yang Lu +# http:##www.cs.brown.edu/~pavlo/ +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# ----------------------------------------------------------------------- + +import sys +import os +import string +import datetime +import logging +import re +import argparse +import glob +import time +import pickle +import execnet +import worker +import message +from ConfigParser import SafeConfigParser +from pprint import pprint, pformat + +from util import * +from runtime import * +import drivers + +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s [%(funcName)s:%(lineno)03d] %(levelname)-5s: %(message)s", + datefmt="%m-%d-%Y %H:%M:%S", + stream=sys.stdout, +) + + +## ============================================== +## createDriverClass +## ============================================== +def createDriverClass(name): + full_name = "%sDriver" % name.title() + mod = __import__("drivers.%s" % full_name.lower(), globals(), locals(), [full_name]) + klass = getattr(mod, full_name) + return klass + + +## DEF + + +## ============================================== +## getDrivers +## ============================================== +def getDrivers(): + drivers = [] + for f in map( + lambda x: os.path.basename(x).replace("driver.py", ""), + glob.glob("./drivers/*driver.py"), + ): + if f != "abstract": + drivers.append(f) + return drivers + + +## DEF + + +## ============================================== +## startLoading +## ============================================== +def startLoading(scalParameters, args, config, channels): + # Split the warehouses into chunks + procs = len(channels) + w_ids = map(lambda x: [], range(procs)) + for w_id in range( + scaleParameters.starting_warehouse, scaleParameters.ending_warehouse + 1 + ): + idx = w_id % procs + w_ids[idx].append(w_id) + print(w_ids) + + load_start = time.time() + for i in range(len(channels)): + m = message.Message( + header=message.CMD_LOAD, data=[scalParameters, args, config, w_ids[i]] + ) + channels[i].send(pickle.dumps(m, -1)) + for ch in channels: + ch.receive() + pass + return time.time() - load_start + + +## ============================================== +## startExecution +## ============================================== +def startExecution(scaleParameters, args, config, channels): + procs = len(channels) + total_results = results.Results() + + for ch in channels: + m = message.Message( + header=message.CMD_EXECUTE, data=[scaleParameters, args, config] + ) + ch.send(pickle.dumps(m, -1)) + for ch in channels: + r = pickle.loads(ch.receive()).data + total_results.append(r) + return total_results + + +## DEF + + +## ============================================== +## main +## ============================================== +if __name__ == "__main__": + aparser = argparse.ArgumentParser( + description="Python implementation of the TPC-C Benchmark" + ) + aparser.add_argument("system", choices=getDrivers(), help="Target system driver") + aparser.add_argument( + "--config", type=file, help="Path to driver configuration file" + ) + aparser.add_argument( + "--reset", + action="store_true", + help="Instruct the driver to reset the contents of the database", + ) + aparser.add_argument( + "--scalefactor", + default=1, + type=float, + metavar="SF", + help="Benchmark scale factor", + ) + aparser.add_argument( + "--warehouses", default=4, type=int, metavar="W", help="Number of Warehouses" + ) + aparser.add_argument( + "--duration", + default=60, + type=int, + metavar="D", + help="How long to run the benchmark in seconds", + ) + aparser.add_argument( + "--ddl", + default=os.path.realpath(os.path.join(os.path.dirname(__file__), "tpcc.sql")), + help="Path to the TPC-C DDL SQL file", + ) + ## number of processes per node + aparser.add_argument( + "--clientprocs", + default=1, + type=int, + metavar="N", + help="Number of processes on each client node.", + ) + + aparser.add_argument( + "--stop-on-error", + action="store_true", + help="Stop the transaction execution when the driver throws an exception.", + ) + aparser.add_argument( + "--no-load", action="store_true", help="Disable loading the data" + ) + aparser.add_argument( + "--no-execute", action="store_true", help="Disable executing the workload" + ) + aparser.add_argument( + "--print-config", + action="store_true", + help="Print out the default configuration file for the system and exit", + ) + aparser.add_argument( + "--debug", action="store_true", help="Enable debug log messages" + ) + args = vars(aparser.parse_args()) + + if args["debug"]: + logging.getLogger().setLevel(logging.DEBUG) + + ## Arguments validation + assert ( + args["reset"] == False or args["no_load"] == False + ), "'--reset' and '--no-load' are incompatible with each other" + + ## Create a handle to the target client driver + driverClass = createDriverClass(args["system"]) + assert driverClass != None, "Failed to find '%s' class" % args["system"] + driver = driverClass(args["ddl"]) + assert driver != None, "Failed to create '%s' driver" % args["system"] + if args["print_config"]: + config = driver.makeDefaultConfig() + print(driver.formatConfig(config)) + print + sys.exit(0) + + ## Load Configuration file + if args["config"]: + logging.debug("Loading configuration file '%s'" % args["config"]) + cparser = SafeConfigParser() + cparser.read(os.path.realpath(args["config"].name)) + config = dict(cparser.items(args["system"])) + else: + logging.debug("Using default configuration for %s" % args["system"]) + defaultConfig = driver.makeDefaultConfig() + config = dict(map(lambda x: (x, defaultConfig[x][1]), defaultConfig.keys())) + config["reset"] = args["reset"] + config["load"] = False + config["execute"] = False + if config["reset"]: + logging.info("Reseting database") + driver.loadConfig(config) + logging.info("Initializing TPC-C benchmark using %s" % driver) + + ##Get a list of clientnodes from configuration file. + clients = [] + channels = [] + assert config["clients"] != "" + clients = re.split(r"\s+", str(config["clients"])) + # print clients, len(clients),args['clientprocs'] + ##Create ssh channels to client nodes + for node in clients: + cmd = "ssh=" + node + cmd += r"//chdir=" + cmd += config["path"] + # print cmd + for i in range(args["clientprocs"]): + gw = execnet.makegateway(cmd) + ch = gw.remote_exec(worker) + channels.append(ch) + + ## Create ScaleParameters + scaleParameters = scaleparameters.makeWithScaleFactor( + args["warehouses"], args["scalefactor"] + ) + nurand = rand.setNURand(nurand.makeForLoad()) + if args["debug"]: + logging.debug("Scale Parameters:\n%s" % scaleParameters) + + ## DATA LOADER!!! + load_time = None + if not args["no_load"]: + load_time = startLoading(scaleParameters, args, config, channels) + # print load_time + ## IF + + ## WORKLOAD DRIVER!!! + if not args["no_execute"]: + results = startExecution(scaleParameters, args, config, channels) + assert results + print(results.show(load_time)) + ## IF + +## MAIN diff --git a/workloads/chbenchmark/py-tpcc/pytpcc/drivers/__init__.py b/workloads/chbenchmark/py-tpcc/pytpcc/drivers/__init__.py new file mode 100644 index 00000000..792d6005 --- /dev/null +++ b/workloads/chbenchmark/py-tpcc/pytpcc/drivers/__init__.py @@ -0,0 +1 @@ +# diff --git a/workloads/chbenchmark/py-tpcc/pytpcc/drivers/abstractdriver.py b/workloads/chbenchmark/py-tpcc/pytpcc/drivers/abstractdriver.py new file mode 100644 index 00000000..9c87f6f8 --- /dev/null +++ b/workloads/chbenchmark/py-tpcc/pytpcc/drivers/abstractdriver.py @@ -0,0 +1,189 @@ +# -*- coding: utf-8 -*- +# ----------------------------------------------------------------------- +# Copyright (C) 2011 +# Andy Pavlo +# http://www.cs.brown.edu/~pavlo/ +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# ----------------------------------------------------------------------- + +from datetime import datetime + +import constants + + +## ============================================== +## AbstractDriver +## ============================================== +class AbstractDriver(object): + def __init__(self, name, ddl): + self.name = name + self.driver_name = "%sDriver" % self.name.title() + self.ddl = ddl + + def __str__(self): + return self.driver_name + + def makeDefaultConfig(self): + """This function needs to be implemented by all sub-classes. + It should return the items that need to be in your implementation's configuration file. + Each item in the list is a triplet containing: ( , , ) + """ + raise NotImplementedError( + "%s does not implement makeDefaultConfig" % (self.driver_name) + ) + + def loadConfig(self, config): + """Initialize the driver using the given configuration dict""" + raise NotImplementedError( + "%s does not implement loadConfig" % (self.driver_name) + ) + + def formatConfig(self, config): + """Return a formatted version of the config dict that can be used with the --config command line argument""" + ret = "# %s Configuration File\n" % (self.driver_name) + ret += "# Created %s\n" % (datetime.now()) + ret += "[%s]" % self.name + + for name in config.keys(): + desc, default = config[name] + if default == None: + default = "" + ret += "\n\n# %s\n%-20s = %s" % (desc, name, default) + return ret + + def loadStart(self): + """Optional callback to indicate to the driver that the data loading phase is about to begin.""" + return None + + def loadFinish(self): + """Optional callback to indicate to the driver that the data loading phase is finished.""" + return None + + def loadFinishItem(self): + """Optional callback to indicate to the driver that the ITEM data has been passed to the driver.""" + return None + + def loadFinishWarehouse(self, w_id): + """Optional callback to indicate to the driver that the data for the given warehouse is finished.""" + return None + + def loadFinishDistrict(self, w_id, d_id): + """Optional callback to indicate to the driver that the data for the given district is finished.""" + return None + + def loadTuples(self, tableName, tuples): + """Load a list of tuples into the target table""" + raise NotImplementedError( + "%s does not implement loadTuples" % (self.driver_name) + ) + + def executeStart(self): + """Optional callback before the execution phase starts""" + return None + + def executeFinish(self): + """Callback after the execution phase finishes""" + return None + + def executeTransaction(self, txn, params): + """Execute a transaction based on the given name""" + + if constants.TransactionTypes.DELIVERY == txn: + result = self.doDelivery(params) + elif constants.TransactionTypes.NEW_ORDER == txn: + result = self.doNewOrder(params) + elif constants.TransactionTypes.ORDER_STATUS == txn: + result = self.doOrderStatus(params) + elif constants.TransactionTypes.PAYMENT == txn: + result = self.doPayment(params) + elif constants.TransactionTypes.STOCK_LEVEL == txn: + result = self.doStockLevel(params) + else: + assert False, "Unexpected TransactionType: " + txn + return result + + def doDelivery(self, params): + """Execute DELIVERY Transaction + Parameters Dict: + w_id + o_carrier_id + ol_delivery_d + """ + raise NotImplementedError( + "%s does not implement doDelivery" % (self.driver_name) + ) + + def doNewOrder(self, params): + """Execute NEW_ORDER Transaction + Parameters Dict: + w_id + d_id + c_id + o_entry_d + i_ids + i_w_ids + i_qtys + """ + raise NotImplementedError( + "%s does not implement doNewOrder" % (self.driver_name) + ) + + def doOrderStatus(self, params): + """Execute ORDER_STATUS Transaction + Parameters Dict: + w_id + d_id + c_id + c_last + """ + raise NotImplementedError( + "%s does not implement doOrderStatus" % (self.driver_name) + ) + + def doPayment(self, params): + """Execute PAYMENT Transaction + Parameters Dict: + w_id + d_id + h_amount + c_w_id + c_d_id + c_id + c_last + h_date + """ + raise NotImplementedError( + "%s does not implement doPayment" % (self.driver_name) + ) + + def doStockLevel(self, params): + """Execute STOCK_LEVEL Transaction + Parameters Dict: + w_id + d_id + threshold + """ + raise NotImplementedError( + "%s does not implement doStockLevel" % (self.driver_name) + ) + + +## CLASS diff --git a/workloads/chbenchmark/py-tpcc/pytpcc/drivers/braddriver.py b/workloads/chbenchmark/py-tpcc/pytpcc/drivers/braddriver.py new file mode 100644 index 00000000..a95c495a --- /dev/null +++ b/workloads/chbenchmark/py-tpcc/pytpcc/drivers/braddriver.py @@ -0,0 +1,469 @@ +import logging +from typing import Dict, Tuple, Any, Optional, List + +from abstractdriver import * +import constants + +from brad.grpc_client import BradGrpcClient + +Config = Dict[str, Tuple[str, Any]] + +logger = logging.getLogger(__name__) + + +TXN_QUERIES = { + "DELIVERY": { + "getNewOrder": "SELECT no_o_id FROM new_order WHERE no_d_id = {} AND no_w_id = {} AND no_o_id > -1 LIMIT 1", # + "deleteNewOrder": "DELETE FROM new_order WHERE no_d_id = {} AND no_w_id = {} AND no_o_id = {}", # d_id, w_id, no_o_id + "getCId": "SELECT o_c_id FROM orders WHERE o_id = {} AND o_d_id = {} AND o_w_id = {}", # no_o_id, d_id, w_id + "updateOrders": "UPDATE orders SET o_carrier_id = {} WHERE o_id = {} AND o_d_id = {} AND o_w_id = {}", # o_carrier_id, no_o_id, d_id, w_id + "updateOrderLine": "UPDATE order_line SET ol_delivery_d = {} WHERE ol_o_id = {} AND ol_d_id = {} AND ol_w_id = {}", # o_entry_d, no_o_id, d_id, w_id + "sumOLAmount": "SELECT SUM(ol_amount) FROM order_line WHERE ol_o_id = {} AND ol_d_id = {} AND ol_w_id = {}", # no_o_id, d_id, w_id + "updateCustomer": "UPDATE customer SET c_balance = c_balance + {} WHERE c_id = {} AND c_d_id = {} AND c_w_id = {}", # ol_total, c_id, d_id, w_id + }, + "NEW_ORDER": { + "getWarehouseTaxRate": "SELECT w_tax FROM warehouse WHERE w_id = {}", # w_id + "getDistrict": "SELECT d_tax, d_next_o_id FROM district WHERE d_id = {} AND d_w_id = {}", # d_id, w_id + "incrementNextOrderId": "UPDATE district SET d_next_o_id = {} WHERE d_id = {} AND d_w_id = {}", # d_next_o_id, d_id, w_id + "getCustomer": "SELECT c_discount, c_last, c_credit FROM customer WHERE c_w_id = {} AND c_d_id = {} AND c_id = {}", # w_id, d_id, c_id + "createOrder": "INSERT INTO orders (o_id, o_d_id, o_w_id, o_c_id, o_entry_d, o_carrier_id, o_ol_cnt, o_all_local) VALUES ({}, {}, {}, {}, {}, {}, {}, {})", # d_next_o_id, d_id, w_id, c_id, o_entry_d, o_carrier_id, o_ol_cnt, o_all_local + "createNewOrder": "INSERT INTO new_order (no_o_id, no_d_id, no_w_id) VALUES ({}, {}, {})", # o_id, d_id, w_id + "getItemInfo": "SELECT i_price, i_name, i_data FROM item WHERE i_id = {}", # ol_i_id + "getStockInfo": "SELECT s_quantity, s_data, s_ytd, s_order_cnt, s_remote_cnt, s_dist_{:02d} FROM stock WHERE s_i_id = {} AND s_w_id = {}", # d_id, ol_i_id, ol_supply_w_id + "updateStock": "UPDATE stock SET s_quantity = {}, s_ytd = {}, s_order_cnt = {}, s_remote_cnt = {} WHERE s_i_id = {} AND s_w_id = {}", # s_quantity, s_order_cnt, s_remote_cnt, ol_i_id, ol_supply_w_id + "createOrderLine": "INSERT INTO order_line (ol_o_id, ol_d_id, ol_w_id, ol_number, ol_i_id, ol_supply_w_id, ol_delivery_d, ol_quantity, ol_amount, ol_dist_info) VALUES ({}, {}, {}, {}, {}, {}, {}, {}, {}, {})", # o_id, d_id, w_id, ol_number, ol_i_id, ol_supply_w_id, ol_quantity, ol_amount, ol_dist_info + }, + "ORDER_STATUS": { + "getCustomerByCustomerId": "SELECT c_id, c_first, c_middle, c_last, c_balance FROM customer WHERE c_w_id = {} AND c_d_id = {} AND c_id = {}", # w_id, d_id, c_id + "getCustomersByLastName": "SELECT c_id, c_first, c_middle, c_last, c_balance FROM customer WHERE c_w_id = {} AND c_d_id = {} AND c_last = {} ORDER BY c_first", # w_id, d_id, c_last + "getLastOrder": "SELECT o_id, o_carrier_id, o_entry_d FROM orders WHERE o_w_id = ? AND o_d_id = ? AND o_c_id = ? ORDER BY o_id DESC LIMIT 1", # w_id, d_id, c_id + "getOrderLines": "SELECT ol_supply_w_id, ol_i_id, ol_quantity, ol_amount, ol_delivery_d FROM order_line WHERE ol_w_id = ? AND ol_d_id = ? AND ol_o_id = ?", # w_id, d_id, o_id + }, + "PAYMENT": { + "getWarehouse": "SELECT w_name, w_street_1, w_street_2, w_city, w_state, w_zip FROM warehouse WHERE w_id = {}", # w_id + "updateWarehouseBalance": "UPDATE warehouse SET w_ytd = w_ytd + {} WHERE w_id = {}", # h_amount, w_id + "getDistrict": "SELECT d_name, d_street_1, d_street_2, d_city, d_state, d_zip FROM district WHERE d_w_id = {} AND d_id = {}", # w_id, d_id + "updateDistrictBalance": "UPDATE district SET d_ytd = d_ytd + {} WHERE d_w_id = {} AND d_id = {}", # h_amount, d_w_id, d_id + "getCustomerByCustomerId": "SELECT c_id, c_first, c_middle, c_last, c_street_1, c_street_2, c_city, c_state, c_zip, c_phone, c_since, c_credit, c_credit_lim, c_discount, c_balance, c_ytd_payment, c_payment_cnt, c_data FROM customer WHERE c_w_id = {} AND c_d_id = {} AND c_id = {}", # w_id, d_id, c_id + "getCustomersByLastName": "SELECT c_id, c_first, c_middle, c_last, c_street_1, c_street_2, c_city, c_state, c_zip, c_phone, c_since, c_credit, c_credit_lim, c_discount, c_balance, c_ytd_payment, c_payment_cnt, c_data FROM customer WHERE c_w_id = {} AND c_d_id = {} AND c_last = {} ORDER BY c_first", # w_id, d_id, c_last + "updateBCCustomer": "UPDATE customer SET c_balance = {}, c_ytd_payment = {}, c_payment_cnt = {}, c_data = {} WHERE c_w_id = {} AND c_d_id = {} AND c_id = {}", # c_balance, c_ytd_payment, c_payment_cnt, c_data, c_w_id, c_d_id, c_id + "updateGCCustomer": "UPDATE customer SET c_balance = {}, c_ytd_payment = {}, c_payment_cnt = {} WHERE c_w_id = {} AND c_d_id = {} AND c_id = {}", # c_balance, c_ytd_payment, c_payment_cnt, c_w_id, c_d_id, c_id + "insertHistory": "INSERT INTO history VALUES ({}, {}, {}, {}, {}, {}, {}, {})", + }, + "STOCK_LEVEL": { + "getOId": "SELECT d_next_o_id FROM district WHERE d_w_id = {} AND d_id = {}", + "getStockCount": """ + SELECT COUNT(DISTINCT(ol_i_id)) FROM order_line, stock + WHERE ol_w_id = {} + AND ol_d_id = {} + AND ol_o_id < {} + AND ol_o_id >= {} + AND s_w_id = {} + AND s_i_id = ol_i_id + AND s_quantity < {} + """, + }, +} + + +class BradDriver(AbstractDriver): + DEFAULT_CONFIG = { + "host": ("Host running the BRAD front end.", "localhost"), + "port": ("Port on which the BRAD front end is listening.", 6583), + } + + def __init__(self, ddl: str) -> None: + super().__init__("BradDriver", ddl) + self._client: Optional[BradGrpcClient] = None + + def makeDefaultConfig(self) -> Config: + return BradDriver.DEFAULT_CONFIG + + def loadConfig(self, config: Config) -> None: + self._client = BradGrpcClient(host=config["host"], port=config["port"]) + self._client.connect() + + def loadTuples(self, tableName: str, tuples) -> None: + # We don't support data loading directly here. + pass + + def doDelivery(self, params: Dict[str, Any]) -> List[Tuple[Any, ...]]: + assert self._client is not None + + q = TXN_QUERIES["DELIVERY"] + w_id = params["w_id"] + o_carrier_id = params["o_carrier_id"] + ol_delivery_d = params["ol_delivery_d"] + + result = [] + self._client.run_query_json("BEGIN") + for d_id in range(1, constants.DISTRICTS_PER_WAREHOUSE + 1): + r, _ = self._client.run_query_json(q["getNewOrder"].format(d_id, w_id)) + if len(r) == 0: + ## No orders for this district: skip it. Note: This must be reported if > 1% + continue + no_o_id = r[0][0] + + r, _ = self._client.run_query_json(q["getCId"].format(no_o_id, d_id, w_id)) + c_id = r[0][0] + + r, _ = self._client.run_query_json( + q["sumOLAmount"].format(no_o_id, d_id, w_id) + ) + ol_total = r[0][0] + + self._client.run_query_json(q["deleteNewOrder"].format(d_id, w_id, no_o_id)) + self._client.run_query_json( + q["updateOrders"].format(o_carrier_id, no_o_id, d_id, w_id) + ) + self._client.run_query_json( + q["updateOrderLine"].format(ol_delivery_d, no_o_id, d_id, w_id) + ) + + # These must be logged in the "result file" according to TPC-C 2.7.2.2 (page 39) + # We remove the queued time, completed time, w_id, and o_carrier_id: the client can figure + # them out + # If there are no order lines, SUM returns null. There should always be order lines. + assert ( + ol_total != None + ), "ol_total is NULL: there are no order lines. This should not happen" + assert ol_total > 0.0 + + self._client.run_query_json( + q["updateCustomer"].format(ol_total, c_id, d_id, w_id) + ) + + result.append((d_id, no_o_id)) + + self._client.run_query_json("COMMIT") + return result + + def doNewOrder(self, params: Dict[str, Any]) -> List[Tuple[Any, ...]]: + assert self._client is not None + + q = TXN_QUERIES["NEW_ORDER"] + w_id = params["w_id"] + d_id = params["d_id"] + c_id = params["c_id"] + o_entry_d = params["o_entry_d"] + i_ids = params["i_ids"] + i_w_ids = params["i_w_ids"] + i_qtys = params["i_qtys"] + + assert len(i_ids) > 0 + assert len(i_ids) == len(i_w_ids) + assert len(i_ids) == len(i_qtys) + + self._client.run_query_json("BEGIN") + all_local = True + items = [] + for i in range(len(i_ids)): + ## Determine if this is an all local order or not + all_local = all_local and i_w_ids[i] == w_id + r, _ = self._client.run_query_json(q["getItemInfo"].format(i_ids[i])) + items.append(r[0]) + assert len(items) == len(i_ids) + + ## TPCC defines 1% of neworder gives a wrong itemid, causing rollback. + ## Note that this will happen with 1% of transactions on purpose. + for item in items: + if len(item) == 0: + self._client.run_query_json("ROLLBACK") + return + ## FOR + + ## ---------------- + ## Collect Information from WAREHOUSE, DISTRICT, and CUSTOMER + ## ---------------- + r, _ = self._client.run_query_json(q["getWarehouseTaxRate"].format(w_id)) + w_tax = r[0][0] + + r, _ = self._client.run_query_json(q["getDistrict"].format(d_id, w_id)) + district_info = r[0] + d_tax = district_info[0] + d_next_o_id = district_info[1] + + r, _ = self._client.run_query_json(q["getCustomer"].format(w_id, d_id, c_id)) + customer_info = r[0] + c_discount = customer_info[0] + + ## ---------------- + ## Insert Order Information + ## ---------------- + ol_cnt = len(i_ids) + o_carrier_id = constants.NULL_CARRIER_ID + + self._client.run_query_json( + q["incrementNextOrderId"].format(d_next_o_id + 1, d_id, w_id) + ) + self._client.run_query_json( + q["createOrder"].format( + d_next_o_id, + d_id, + w_id, + c_id, + o_entry_d, + o_carrier_id, + ol_cnt, + all_local, + ), + ) + self._client.run_query_json(q["createNewOrder"].format(d_next_o_id, d_id, w_id)) + + ## ---------------- + ## Insert Order Item Information + ## ---------------- + item_data = [] + total = 0 + for i in range(len(i_ids)): + ol_number = i + 1 + ol_supply_w_id = i_w_ids[i] + ol_i_id = i_ids[i] + ol_quantity = i_qtys[i] + + itemInfo = items[i] + i_name = itemInfo[1] + i_data = itemInfo[2] + i_price = itemInfo[0] + + self._client.run_query_json( + q["getStockInfo"].format(d_id, ol_i_id, ol_supply_w_id) + ) + stockInfo = self.cursor.fetchone() + if len(stockInfo) == 0: + logger.warning( + "No STOCK record for (ol_i_id=%d, ol_supply_w_id=%d)", + ol_i_id, + ol_supply_w_id, + ) + continue + s_quantity = stockInfo[0] + s_ytd = stockInfo[2] + s_order_cnt = stockInfo[3] + s_remote_cnt = stockInfo[4] + s_data = stockInfo[1] + s_dist_xx = stockInfo[5] # Fetches data from the s_dist_[d_id] column + + ## Update stock + s_ytd += ol_quantity + if s_quantity >= ol_quantity + 10: + s_quantity = s_quantity - ol_quantity + else: + s_quantity = s_quantity + 91 - ol_quantity + s_order_cnt += 1 + + if ol_supply_w_id != w_id: + s_remote_cnt += 1 + + self._client.run_query_json( + q["updateStock"].format( + s_quantity, + s_ytd, + s_order_cnt, + s_remote_cnt, + ol_i_id, + ol_supply_w_id, + ), + ) + + if ( + i_data.find(constants.ORIGINAL_STRING) != -1 + and s_data.find(constants.ORIGINAL_STRING) != -1 + ): + brand_generic = "B" + else: + brand_generic = "G" + + ## Transaction profile states to use "ol_quantity * i_price" + ol_amount = ol_quantity * i_price + total += ol_amount + + self._client.run_query_json( + q["createOrderLine"].format( + d_next_o_id, + d_id, + w_id, + ol_number, + ol_i_id, + ol_supply_w_id, + o_entry_d, + ol_quantity, + ol_amount, + s_dist_xx, + ), + ) + + ## Add the info to be returned + item_data.append((i_name, s_quantity, brand_generic, i_price, ol_amount)) + ## FOR + + ## Commit! + self._client.run_query_json("COMMIT") + + ## Adjust the total for the discount + # print "c_discount:", c_discount, type(c_discount) + # print "w_tax:", w_tax, type(w_tax) + # print "d_tax:", d_tax, type(d_tax) + total *= (1 - c_discount) * (1 + w_tax + d_tax) + + ## Pack up values the client is missing (see TPC-C 2.4.3.5) + misc = [(w_tax, d_tax, d_next_o_id, total)] + + return [customer_info, misc, item_data] + + def doOrderStatus(self, params: Dict[str, Any]) -> List[Tuple[Any, ...]]: + assert self._client is not None + + q = TXN_QUERIES["ORDER_STATUS"] + w_id = params["w_id"] + d_id = params["d_id"] + c_id = params["c_id"] + c_last = params["c_last"] + + self._client.run_query_json("BEGIN") + if c_id != None: + r, _ = self._client.run_query_json( + q["getCustomerByCustomerId"].format(w_id, d_id, c_id) + ) + customer = r[0] + else: + # Get the midpoint customer's id + r, _ = self._client.run_query_json( + q["getCustomersByLastName"].format(w_id, d_id, c_last) + ) + all_customers = r + assert len(all_customers) > 0 + namecnt = len(all_customers) + index = (namecnt - 1) / 2 + customer = all_customers[index] + c_id = customer[0] + assert len(customer) > 0 + assert c_id != None + + r, _ = self._client.run_query_json(q["getLastOrder"].format(w_id, d_id, c_id)) + order = r[0] + if order: + r, _ = self._client.run_query_json( + q["getOrderLines"].format(w_id, d_id, order[0]) + ) + orderLines = r + else: + orderLines = [] + + self._client.run_query_json("COMMIT") + return [customer, order, orderLines] + + def doPayment(self, params: Dict[str, Any]) -> List[Tuple[Any, ...]]: + assert self._client is not None + + q = TXN_QUERIES["PAYMENT"] + w_id = params["w_id"] + d_id = params["d_id"] + h_amount = params["h_amount"] + c_w_id = params["c_w_id"] + c_d_id = params["c_d_id"] + c_id = params["c_id"] + c_last = params["c_last"] + h_date = params["h_date"] + + self._client.run_query_json("BEGIN") + if c_id != None: + r, _ = self._client.run_query_json( + q["getCustomerByCustomerId"].format(w_id, d_id, c_id) + ) + customer = r[0] + else: + # Get the midpoint customer's id + r, _ = self._client.run_query_json( + q["getCustomersByLastName"].format(w_id, d_id, c_last) + ) + all_customers = r + assert len(all_customers) > 0 + namecnt = len(all_customers) + index = (namecnt - 1) / 2 + customer = all_customers[index] + c_id = customer[0] + assert len(customer) > 0 + c_balance = customer[14] - h_amount + c_ytd_payment = customer[15] + h_amount + c_payment_cnt = customer[16] + 1 + c_data = customer[17] + + r, _ = self._client.run_query_json(q["getWarehouse"].format(w_id)) + warehouse = r[0] + + r, _ = self._client.run_query_json(q["getDistrict"].format(w_id, d_id)) + district = r[0] + + self._client.run_query_json(q["updateWarehouseBalance"].format(h_amount, w_id)) + self._client.run_query_json( + q["updateDistrictBalance"].format(h_amount, w_id, d_id) + ) + + # Customer Credit Information + if customer[11] == constants.BAD_CREDIT: + newData = " ".join(map(str, [c_id, c_d_id, c_w_id, d_id, w_id, h_amount])) + c_data = newData + "|" + c_data + if len(c_data) > constants.MAX_C_DATA: + c_data = c_data[: constants.MAX_C_DATA] + self._client.run_query_json( + q["updateBCCustomer"].format( + c_balance, + c_ytd_payment, + c_payment_cnt, + c_data, + c_w_id, + c_d_id, + c_id, + ), + ) + else: + c_data = "" + self._client.run_query_json( + q["updateGCCustomer"].format( + c_balance, c_ytd_payment, c_payment_cnt, c_w_id, c_d_id, c_id + ), + ) + + # Concatenate w_name, four spaces, d_name + h_data = "%s %s" % (warehouse[0], district[0]) + # Create the history record + self._client.run_query_json( + q["insertHistory"].format( + c_id, c_d_id, c_w_id, d_id, w_id, h_date, h_amount, h_data + ), + ) + + self._client.run_query_json("COMMIT") + + # TPC-C 2.5.3.3: Must display the following fields: + # W_ID, D_ID, C_ID, C_D_ID, C_W_ID, W_STREET_1, W_STREET_2, W_CITY, W_STATE, W_ZIP, + # D_STREET_1, D_STREET_2, D_CITY, D_STATE, D_ZIP, C_FIRST, C_MIDDLE, C_LAST, C_STREET_1, + # C_STREET_2, C_CITY, C_STATE, C_ZIP, C_PHONE, C_SINCE, C_CREDIT, C_CREDIT_LIM, + # C_DISCOUNT, C_BALANCE, the first 200 characters of C_DATA (only if C_CREDIT = "BC"), + # H_AMOUNT, and H_DATE. + + # Hand back all the warehouse, district, and customer data + return [warehouse, district, customer] + + def doStockLevel(self, params: Dict[str, Any]) -> int: + assert self._client is not None + + q = TXN_QUERIES["STOCK_LEVEL"] + w_id = params["w_id"] + d_id = params["d_id"] + threshold = params["threshold"] + + self._client.run_query_json("BEGIN") + r, _ = self._client.run_query_json(["getOId"].format(w_id, d_id)) + result = r[0] + assert result + o_id = result[0] + + r, _ = self._client.run_query_json( + q["getStockCount"].format(w_id, d_id, o_id, (o_id - 20), w_id, threshold) + ) + result = r[0] + + self._client.run_query_json("COMMIT") + return int(result[0]) diff --git a/workloads/chbenchmark/py-tpcc/pytpcc/drivers/couchdbdriver.py b/workloads/chbenchmark/py-tpcc/pytpcc/drivers/couchdbdriver.py new file mode 100644 index 00000000..fbd5ee85 --- /dev/null +++ b/workloads/chbenchmark/py-tpcc/pytpcc/drivers/couchdbdriver.py @@ -0,0 +1,1198 @@ +# -*- coding: utf-8 -*- +# ----------------------------------------------------------------------- +# Copyright (C) 2011 +# Alex Kalinin +# http://www.cs.brown.edu/~akalinin/ +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# ----------------------------------------------------------------------- + +import logging +from pprint import pformat + +import constants +from abstractdriver import * + +import couchdb +from uuid import uuid4 + +# for parallel view fetching +import threading + +# This describes our scheme: +# db -- the name of the corresponding db in CouchDB (we're using table per database approach) +# attrs -- attributes from the table (will become keys in JSON documents, one document per row) +# prim_key -- primary key from the original table (after being concatenated will become _id in the JSON document) +# distr_key -- defines sharding key (sharding is done in a round-robin manner) +# indexes -- will become CouchDB views (should be seen as CREATE INDEX in SQL; we maintain secondary indexes this way) +# +# To sum up: +# -- We use one CouchDB database per table, one JSON document per row, one key/value per column/value. +# -- Secondary indexes are emulated through CouchDB views. +# +TPCC_SCM = { + "WAREHOUSE": { + "db": "warehouse", + "attrs": [ + "W_ID", + "W_NAME", + "W_STREET_1", + "W_STREET_2", + "W_CITY", + "W_STATE", + "W_ZIP", + "W_TAX", + "W_YTD", + ], + "prim_key": ["W_ID"], + "distr_key": "W_ID", + }, + "DISTRICT": { + "db": "district", + "attrs": [ + "D_ID", + "D_W_ID", + "D_NAME", + "D_STREET_1", + "D_STREET_2", + "D_CITY", + "D_STATE", + "D_ZIP", + "D_TAX", + "D_YTD", + "D_NEXT_O_ID", + ], + "prim_key": ["D_W_ID", "D_ID"], + "distr_key": "D_W_ID", + }, + "ITEM": { + "db": "item", + "attrs": ["I_ID", "I_IM_ID", "I_NAME", "I_PRICE", "I_DATA"], + "prim_key": ["I_ID"], + }, + "CUSTOMER": { + "db": "customer", + "attrs": [ + "C_ID", + "C_D_ID", + "C_W_ID", + "C_FIRST", + "C_MIDDLE", + "C_LAST", + "C_STREET_1", + "C_STREET_2", + "C_CITY", + "C_STATE", + "C_ZIP", + "C_PHONE", + "C_SINCE", + "C_CREDIT", + "C_CREDIT_LIM", + "C_DISCOUNT", + "C_BALANCE", + "C_YTD_PAYMENT", + "C_PAYMENT_CNT", + "C_DELIVERY_CNT", + "C_DATA", + ], + "prim_key": ["C_W_ID", "C_D_ID", "C_ID"], + "distr_key": "C_W_ID", + "indexes": { + "w_d_last": { + "map": """ + function(doc) { + emit([doc.C_W_ID, doc.C_D_ID, doc.C_LAST], doc.C_FIRST); + } + """, + }, + }, + }, + "HISTORY": { + "db": "history", + "attrs": [ + "H_C_ID", + "H_C_D_ID", + "H_C_W_ID", + "H_D_ID", + "H_W_ID", + "H_DATE", + "H_AMOUNT", + "H_DATA", + ], + "prim_key": [], + "distr_key": "H_C_W_ID", + }, + "STOCK": { + "db": "stock", + "attrs": [ + "S_I_ID", + "S_W_ID", + "S_QUANTITY", + "S_DIST_01", + "S_DIST_02", + "S_DIST_03", + "S_DIST_04", + "S_DIST_05", + "S_DIST_06", + "S_DIST_07", + "S_DIST_08", + "S_DIST_09", + "S_DIST_10", + "S_YTD", + "S_ORDER_CNT", + "S_REMOTE_CNT", + "S_DATA", + ], + "prim_key": ["S_W_ID", "S_I_ID"], + "distr_key": "S_W_ID", + "indexes": { + "w_i": { + "map": """ + function(doc) { + emit([doc.S_W_ID, doc.S_I_ID], doc.S_QUANTITY); + } + """, + }, + }, + }, + "ORDERS": { + "db": "orders", + "attrs": [ + "O_ID", + "O_C_ID", + "O_D_ID", + "O_W_ID", + "O_ENTRY_D", + "O_CARRIER_ID", + "O_OL_CNT", + "O_ALL_LOCAL", + ], + "prim_key": ["O_W_ID", "O_D_ID", "O_ID"], + "distr_key": "O_W_ID", + "indexes": { + "w_d_c_o": { + "map": """ + function(doc) { + emit([doc.O_W_ID, doc.O_D_ID, doc.O_C_ID, doc.O_ID], null); + } + """, + }, + }, + }, + "NEW_ORDER": { + "db": "new_order", + "attrs": ["NO_O_ID", "NO_D_ID", "NO_W_ID"], + "prim_key": ["NO_D_ID", "NO_W_ID", "NO_O_ID"], + "distr_key": "NO_W_ID", + }, + "ORDER_LINE": { + "db": "order_line", + "attrs": [ + "OL_O_ID", + "OL_D_ID", + "OL_W_ID", + "OL_NUMBER", + "OL_I_ID", + "OL_SUPPLY_W_ID", + "OL_DELIVERY_D", + "OL_QUANTITY", + "OL_AMOUNT", + "OL_DIST_INFO", + ], + "prim_key": ["OL_W_ID", "OL_D_ID", "OL_O_ID", "OL_NUMBER"], + "distr_key": "OL_W_ID", + "indexes": { + "o_d_w": { + "map": """ + function(doc) { + emit([doc.OL_O_ID, doc.OL_D_ID, doc.OL_W_ID], doc.OL_AMOUNT); + } + """, + "reduce": """ + function(keys, values, rereduce) { + return sum(values); + } + """, + }, + "o_d_w_i": { + "map": """ + function(doc) { + emit([doc.OL_O_ID, doc.OL_D_ID, doc.OL_W_ID], doc.OL_I_ID); + } + """, + }, + }, + }, +} + + +def db_from_table(table_name): + """ + Converts the name of the table to the corresponding CouchDB database name. + Note, that CouchDB doesn't like CAPITAL database names. + """ + return TPCC_SCM[table_name]["db"] + + +def gen_pk_doc(table_name, doc): + """ + Generate primary key for the row-doc from the table_name. + It is done by just concatenating all 'prim_key' attributes of the table + + If we don't have a key in the primary table, then we just generate it via uuid4. + It is usually recommended to generate an id on the client side. + """ + table_schema = TPCC_SCM[table_name] + if len(table_schema["prim_key"]): + pk = "_".join([str(doc[attr]) for attr in table_schema["prim_key"]]) + else: + pk = uuid4().hex + + return pk + + +def touch_view(db, view_name): + """ + Touches the 'view_name' view from the given db object. + + The main point here is to make CouchDB actually create the view. Otherwise it would only + create it on the first query. We don't want that, since that would make things very slow during + the actual transaction processing! + """ + logging.debug( + "HACK: Fetching view '%s' from '%s' with 'limit = 1'" % (view_name, str(db)) + ) + # the result is unimportant here, just use limit=1 + db.view("tpcc/%s" % view_name, limit=1).rows + logging.debug( + "HACK: Fetched view '%s' from '%s' with 'limit = 1'" % (view_name, str(db)) + ) + + +class TouchThread(threading.Thread): + """ + This is a class to handle "touch-view" threads, which + are used to initialize views in the loadFinish function + + The main scheme here is that in case of several shards, we want to fetch the view from all + the shards simultaneously. 'n' shards equals 'n' threads. + + So, the thread just executes 'touch_view' function and then quits. + """ + + def __init__(self, *args): + self._target = touch_view + self._args = args + threading.Thread.__init__(self) + + def run(self): + self._target(*self._args) + + +## ============================================== +## CouchdbDriver +## ============================================== +class CouchdbDriver(AbstractDriver): + DEFAULT_CONFIG = { + "node_urls": ( + "CouchDB URL:", + '["http://localhost:5984"]', + ), # usual "out-of-the-box" value + } + + def __init__(self, ddl): + super(CouchdbDriver, self).__init__("couchdb", ddl) + self.servers = [] # list of shards (couchdb server objects) + self.dbs = None # dict: 'db_name' -> (list of db_obj (shards)) + + ## ---------------------------------------------- + ## makeDefaultConfig + ## ---------------------------------------------- + def makeDefaultConfig(self): + return CouchdbDriver.DEFAULT_CONFIG + + ## ---------------------------------------------- + ## loadConfig + ## ---------------------------------------------- + def loadConfig(self, config): + for key in CouchdbDriver.DEFAULT_CONFIG.keys(): + assert key in config, "Missing parameter '%s' in %s configuration" % ( + key, + self.name, + ) + + # open servers + for srv_name in eval(config["node_urls"]): + logging.debug("Got a CouchDB node from config: '%s'" % srv_name) + # we use delayed commits here since we don't care much about durability + # note, that couchdb would commit the data once per several seconds anyway + self.servers.append(couchdb.Server(url=srv_name, full_commit=False)) + + db_names = [db_from_table(table) for table in TPCC_SCM.keys()] + + # delete the dbs if we're resetting + if config["reset"]: + for db in db_names: + for srv in self.servers: + if db in srv: + logging.debug( + "Deleting database '%s' on server '%s'" % (db, str(srv)) + ) + srv.delete(db) + + # creating databases + self.dbs = dict() + for db in db_names: + sdb = [] # list of shards for the db + for srv in self.servers: + if not db in srv: + logging.debug( + "Creating database '%s' on server '%s'" % (db, str(srv)) + ) + sdb.append(srv.create(db)) + else: + logging.debug( + "Database exists: '%s', server: '%s'" % (db, str(srv)) + ) + sdb.append(srv[db]) + + self.dbs[db] = sdb + + ## ---------------------------------------------- + ## tuples_to_docs + ## ---------------------------------------------- + def shard_from_id(self, key): + """ + Get the shard number from the key. Key is assumed to be integer. + + Just a dumb round-robin. + """ + return key % len(self.servers) + + ## ---------------------------------------------- + ## tuples_to_docs + ## ---------------------------------------------- + def tuples_to_docs(self, table_name, tuples): + """ + This function converts tuples belonging to the table_name to a list + of documents suitable for loading into CouchDB database with the name table_name + + This is actually not very well written and takes the most CPU time from the loader. + However, do we actually care? It's just loading. Fetching the views will probably kill us anyway... + """ + table_schema = TPCC_SCM[table_name] + + # create list of lists for documents (one list of docs per shard) + docs = [list() for s in self.servers] + tuple_len = len(tuples[0]) + + assert tuple_len == len(table_schema["attrs"]), ( + "Number of attributes and the tuple length differ: %s" % table_name + ) + + for tup in tuples: + doc = dict() + + # generate the doc as a simple dict + for i, attr in enumerate(table_schema["attrs"]): + doc[attr] = tup[i] + + # determine the shard number we want to put the doc into. + # + # we use distr_key for that. + # + # if the table doesn't have a distr key, we assume it's + # replicated over all shard nodes + # + # it is assumed that the 'distr_key' is integer + if TPCC_SCM[table_name].has_key("distr_key"): + distr_key = int(doc[TPCC_SCM[table_name]["distr_key"]]) + shard = self.shard_from_id(distr_key) + else: + shard = -1 + + # emulate primary key with "id" or generate a random one + doc["_id"] = gen_pk_doc(table_name, doc) + + # put the doc to the proper list. + # '-1' means 'replicate to all' + if shard != -1: + docs[shard].append(doc) + else: + for l in docs: + l.append(doc) + + return docs + + ## ---------------------------------------------- + ## loadTuples + ## ---------------------------------------------- + def loadTuples(self, tableName, tuples): + if len(tuples) == 0: + return + + # create docs for tuples + docs = self.tuples_to_docs(tableName, tuples) + db_name = db_from_table(tableName) + + # load all documents in bulk on every node + for srv_num, srv in enumerate(self.servers): + if len(docs[srv_num]): + logging.debug( + "Loading tuples from the table '%s' into database '%s' on server '%s'" + % (tableName, db_name, str(srv)) + ) + # should we check the result here? we're assuming a fresh load. + self.dbs[db_name][srv_num].update(docs[srv_num]) + + ## ---------------------------------------------- + ## loadFinish + ## ---------------------------------------------- + def loadFinish(self): + """ + Creates some additional views to speed-up the execution and commits + + This is the tricky part. We want not only to create indexes (views), but also fetch them. Otherwise, + CouchDB would do it in a lazy way, during a first query. We don't want that at all! + """ + view_touch_jobs = [] + for table in TPCC_SCM.keys(): + if "indexes" in TPCC_SCM[table]: + for srv_num, srv in enumerate(self.servers): + # load the design doc: _design/tpcc + try: + logging.debug( + "Creating indexes for '%s' on server '%s'" + % (table, str(srv)) + ) + cdb = self.dbs[db_from_table(table)][srv_num] + design_doc = {"views": TPCC_SCM[table]["indexes"]} + cdb["_design/tpcc"] = design_doc + except couchdb.http.ResourceConflict: + # happens if we have multiple loaders. This is okay. The design doc is still the same. + pass + finally: + for view_name in TPCC_SCM[table]["indexes"].keys(): + view_touch_jobs.append((cdb, view_name)) + + # we want actually to initialize views in parallel on all shard nodes + # to speed-up loading times + touch_thread_pool = [] + logging.debug("We have %d views to touch" % len(view_touch_jobs)) + for job in view_touch_jobs: + t = TouchThread(job[0], job[1]) + t.start() + touch_thread_pool.append(t) + + logging.debug("Waiting for %d view touchers to finish" % len(touch_thread_pool)) + for t in touch_thread_pool: + t.join() + + ## ---------------------------------------------- + ## doDelivery + ## ---------------------------------------------- + def doDelivery(self, params): + w_id = params["w_id"] + o_carrier_id = params["o_carrier_id"] + ol_delivery_d = str(params["ol_delivery_d"]) + + # Note, we want to do this cycle ASAP, since we're deleting the 'NEW_ORDER' docs and + # are very vulnerable to conflicts + no_o_ids = [] + for d_id in range(1, constants.DISTRICTS_PER_WAREHOUSE + 1): + while True: + # fetch any 'NEW_ORDER' doc ('0' as the 'NO_O_ID') + newOrder = ( + self.dbs[db_from_table("NEW_ORDER")][self.shard_from_id(w_id)] + .view( + "_all_docs", + limit=1, + include_docs="true", + startkey=gen_pk_doc( + "NEW_ORDER", + {"NO_D_ID": d_id, "NO_W_ID": w_id, "NO_O_ID": 0}, + ), + ) + .rows + ) + + # it seems that we might fetch a deleted doc in case there are no more. Nice... + if ( + newOrder[0]["value"].has_key("deleted") + and newOrder[0]["value"]["deleted"] == True + ): + logging.debug( + "No documents: _all_docs returned a deleted one. Skipping..." + ) + newOrder = [] + + if len(newOrder) == 0: + ## No orders for this district: skip it. Note: This must be reported if > 1% + break + + newOrder = newOrder[0].doc + + try: + self.dbs[db_from_table("NEW_ORDER")][ + self.shard_from_id(w_id) + ].delete(newOrder) + no_o_ids.append((d_id, newOrder["NO_O_ID"])) + break + except couchdb.http.ResourceNotFound: + # in case somebody got this order first, try to fetch another one + logging.debug( + "Pessimistic concurrency control: Delete failed: Restarting..." + ) + pass + except couchdb.http.ResourceConflict: + # in case somebody got this order first, try to fetch another one + logging.debug( + "Pessimistic concurrency control: Delete failed: Restarting..." + ) + pass + + if len(newOrder) == 0: + ## No orders for this district: skip it. Note: This must be reported if > 1% + continue + ## FOR + + # Now we're "isolated" from concurrent transactions... + # We're trying to fetch all info using as least requests as possible + order_keys = [ + gen_pk_doc("ORDERS", {"O_ID": no_o_id, "O_W_ID": w_id, "O_D_ID": d_id}) + for d_id, no_o_id in no_o_ids + ] + order_docs = ( + self.dbs[db_from_table("ORDERS")][self.shard_from_id(w_id)] + .view("_all_docs", include_docs="true", keys=order_keys) + .rows + ) + order_docs = [od.doc for od in order_docs] + + # use the view for the sum aggregate + ol_totals = ( + self.dbs[db_from_table("ORDER_LINE")][self.shard_from_id(w_id)] + .view( + "tpcc/o_d_w", + group="true", + keys=[[no_o_id, d_id, w_id] for d_id, no_o_id in no_o_ids], + ) + .rows + ) + + # put the fetched information together for every client + c_ids = [] + for i in range(len(no_o_ids)): + # find the total for the current (order, district, warehouse) + # is there some way to find stuff in a list fast? + ol_total = filter( + lambda x: x.key == [no_o_ids[i][1], no_o_ids[i][0], w_id], ol_totals + )[0].value + # These must be logged in the "result file" according to TPC-C 2.7.2.2 (page 39) + # We remove the queued time, completed time, w_id, and o_carrier_id: the client can figure + # them out + # If there are no order lines, SUM returns null. There should always be order lines. + assert ( + ol_total != None + ), "ol_total is NULL: there are no order lines. This should not happen" + assert ol_total > 0.0 + c_ids.append((order_docs[i]["O_C_ID"], no_o_ids[i][0], ol_total)) + + # this should be safe. no conflicts... + for order_doc in order_docs: + order_doc["O_CARRIER_ID"] = o_carrier_id + self.dbs[db_from_table("ORDERS")][self.shard_from_id(w_id)].update(order_docs) + + # ditto... + # we must do the second retrieval from ORDER_LINES, since now we need docs, not aggregates + order_lines = ( + self.dbs[db_from_table("ORDER_LINE")][self.shard_from_id(w_id)] + .view( + "tpcc/o_d_w", + keys=[[no_o_id, d_id, w_id] for d_id, no_o_id in no_o_ids], + reduce="false", + include_docs="true", + ) + .rows + ) + order_lines = [r.doc for r in order_lines] + + for ol in order_lines: + ol["OL_DELIVERY_D"] = ol_delivery_d + + self.dbs[db_from_table("ORDER_LINE")][self.shard_from_id(w_id)].update( + order_lines + ) + + # again, updating clients may introduce conflicts. another bottleneck.... + for c_id, d_id, ol_total in c_ids: + while True: + customer_info = self.dbs[db_from_table("CUSTOMER")][ + self.shard_from_id(w_id) + ].get( + gen_pk_doc( + "CUSTOMER", {"C_W_ID": w_id, "C_D_ID": d_id, "C_ID": c_id} + ) + ) + customer_info["C_BALANCE"] += ol_total + + try: + self.dbs[db_from_table("CUSTOMER")][self.shard_from_id(w_id)].save( + customer_info + ) + break + except couchdb.http.ResourceConflict: + # in case somebody updated the customer first, try again with the new revision + logging.debug( + "Pessimistic concurrency control: Update failed: Restarting..." + ) + pass + + result = no_o_ids + + return result + + ## ---------------------------------------------- + ## doNewOrder + ## ---------------------------------------------- + def doNewOrder(self, params): + w_id = params["w_id"] + d_id = params["d_id"] + c_id = params["c_id"] + o_entry_d = str(params["o_entry_d"]) + i_ids = params["i_ids"] + i_w_ids = params["i_w_ids"] + i_qtys = params["i_qtys"] + + assert len(i_ids) > 0 + assert len(i_ids) == len(i_w_ids) + assert len(i_ids) == len(i_qtys) + + all_local = True + items = [] + + # retrieve and store info about all the items + item_data = ( + self.dbs[db_from_table("ITEM")][self.shard_from_id(w_id)] + .view("_all_docs", include_docs="true", keys=[str(i) for i in i_ids]) + .rows + ) + + for i in range(len(i_ids)): + ## Determine if this is an all local order or not + all_local = all_local and i_w_ids[i] == w_id + + # get info about the item from the just retrieved bundle + # filter is just for finding an item in a list + doc = filter(lambda it: it.id == str(i_ids[i]), item_data)[0].doc + + ## TPCC defines 1% of neworder gives a wrong itemid, causing rollback. + ## Note that this will happen with 1% of transactions on purpose. + if doc is None: + ## TODO Abort here! + return + + items.append((doc["I_PRICE"], doc["I_NAME"], doc["I_DATA"])) + assert len(items) == len(i_ids) + + ## ---------------- + ## Collect Information from WAREHOUSE, DISTRICT, and CUSTOMER + ## ---------------- + doc = self.dbs[db_from_table("WAREHOUSE")][self.shard_from_id(w_id)].get( + str(w_id) + ) + w_tax = doc["W_TAX"] + + # conflict is possible. this is a bottleneck... + while True: + district_info = self.dbs[db_from_table("DISTRICT")][ + self.shard_from_id(w_id) + ].get(gen_pk_doc("DISTRICT", {"D_ID": d_id, "D_W_ID": w_id})) + d_tax = district_info["D_TAX"] + d_next_o_id = district_info["D_NEXT_O_ID"] + + district_info["D_NEXT_O_ID"] += 1 + try: + self.dbs[db_from_table("DISTRICT")][self.shard_from_id(w_id)].save( + district_info + ) + break + except couchdb.http.ResourceConflict: + # want to get a unique order id! + logging.debug( + "Pessimistic concurrency control: Update failed: Restarting..." + ) + pass + + customer_info = self.dbs[db_from_table("CUSTOMER")][ + self.shard_from_id(w_id) + ].get(gen_pk_doc("CUSTOMER", {"C_W_ID": w_id, "C_D_ID": d_id, "C_ID": c_id})) + c_discount = customer_info["C_DISCOUNT"] + + ol_cnt = len(i_ids) + o_carrier_id = constants.NULL_CARRIER_ID + order_line_docs = [] + + ## ---------------- + ## Insert Order Item Information + ## ---------------- + item_data = [] + total = 0 + for i in range(len(i_ids)): + ol_number = i + 1 + ol_supply_w_id = i_w_ids[i] + ol_i_id = i_ids[i] + ol_quantity = i_qtys[i] + + itemInfo = items[i] + i_name = itemInfo[1] + i_data = itemInfo[2] + i_price = itemInfo[0] + + # we have potential conflict for every stock + while True: + stockInfo = self.dbs[db_from_table("STOCK")][ + self.shard_from_id(ol_supply_w_id) + ].get( + gen_pk_doc("STOCK", {"S_I_ID": ol_i_id, "S_W_ID": ol_supply_w_id}) + ) + s_quantity = stockInfo["S_QUANTITY"] + s_ytd = stockInfo["S_YTD"] + s_order_cnt = stockInfo["S_ORDER_CNT"] + s_remote_cnt = stockInfo["S_REMOTE_CNT"] + s_data = stockInfo["S_DATA"] + s_dist_xx = stockInfo[ + "S_DIST_%02d" % d_id + ] # Fetches data from the s_dist_[d_id] column + + ## Update stock + s_ytd += ol_quantity + if s_quantity >= ol_quantity + 10: + s_quantity = s_quantity - ol_quantity + else: + s_quantity = s_quantity + 91 - ol_quantity + s_order_cnt += 1 + + if ol_supply_w_id != w_id: + s_remote_cnt += 1 + + # update stock + stockInfo["S_QUANTITY"] = s_quantity + stockInfo["S_YTD"] = s_ytd + stockInfo["S_ORDER_CNT"] = s_order_cnt + stockInfo["S_REMOTE_CNT"] = s_remote_cnt + + try: + self.dbs[db_from_table("STOCK")][ + self.shard_from_id(ol_supply_w_id) + ].save(stockInfo) + break + except couchdb.http.ResourceConflict: + # if somebody had reserved the stock before us, repeat. + logging.debug( + "Pessimistic concurrency control: Update failed: Restarting..." + ) + pass + + if ( + i_data.find(constants.ORIGINAL_STRING) != -1 + and s_data.find(constants.ORIGINAL_STRING) != -1 + ): + brand_generic = "B" + else: + brand_generic = "G" + + ## Transaction profile states to use "ol_quantity * i_price" + ol_amount = ol_quantity * i_price + total += ol_amount + + # don't insert the order line right now + # we'll do it in bulk later + order_line_row = dict( + zip( + TPCC_SCM["ORDER_LINE"]["attrs"], + [ + d_next_o_id, + d_id, + w_id, + ol_number, + ol_i_id, + ol_supply_w_id, + o_entry_d, + ol_quantity, + ol_amount, + s_dist_xx, + ], + ) + ) + order_line_row["_id"] = gen_pk_doc("ORDER_LINE", order_line_row) + order_line_docs.append(order_line_row) + + ## Add the info to be returned + item_data.append((i_name, s_quantity, brand_generic, i_price, ol_amount)) + ## FOR + + ## ---------------- + ## Insert Order Information + ## ---------------- + self.dbs[db_from_table("ORDER_LINE")][self.shard_from_id(w_id)].update( + order_line_docs + ) + + orders_row = dict( + zip( + TPCC_SCM["ORDERS"]["attrs"], + [ + d_next_o_id, + c_id, + d_id, + w_id, + o_entry_d, + o_carrier_id, + ol_cnt, + all_local, + ], + ) + ) + orders_row["_id"] = gen_pk_doc("ORDERS", orders_row) + self.dbs[db_from_table("ORDERS")][self.shard_from_id(w_id)].save(orders_row) + + new_order_row = dict( + zip(TPCC_SCM["NEW_ORDER"]["attrs"], [d_next_o_id, d_id, w_id]) + ) + new_order_row["_id"] = gen_pk_doc("NEW_ORDER", new_order_row) + self.dbs[db_from_table("NEW_ORDER")][self.shard_from_id(w_id)].save( + new_order_row + ) + + ## Adjust the total for the discount + total *= (1 - c_discount) * (1 + w_tax + d_tax) + + ## Pack up values the client is missing (see TPC-C 2.4.3.5) + misc = [(w_tax, d_tax, d_next_o_id, total)] + customer_info = [ + ( + customer_info["C_DISCOUNT"], + customer_info["C_LAST"], + customer_info["C_CREDIT"], + ) + ] + return [customer_info, misc, item_data] + + ## ---------------------------------------------- + ## doOrderStatus + ## ---------------------------------------------- + def doOrderStatus(self, params): + w_id = params["w_id"] + d_id = params["d_id"] + c_id = params["c_id"] + c_last = params["c_last"] + + assert w_id, pformat(params) + assert d_id, pformat(params) + + if c_id != None: + customer = self.dbs[db_from_table("CUSTOMER")][ + self.shard_from_id(w_id) + ].get( + gen_pk_doc("CUSTOMER", {"C_W_ID": w_id, "C_D_ID": d_id, "C_ID": c_id}) + ) + else: + # Get the midpoint customer's id + all_customers = ( + self.dbs[db_from_table("CUSTOMER")][self.shard_from_id(w_id)] + .view("tpcc/w_d_last", key=[w_id, d_id, c_last], reduce="false") + .rows + ) + all_customers.sort(lambda x, y: cmp(x["value"], y["value"])) + + assert len(all_customers) > 0 + namecnt = len(all_customers) + index = (namecnt - 1) / 2 + customer = all_customers[index] + customer = self.dbs[db_from_table("CUSTOMER")][ + self.shard_from_id(w_id) + ].get(customer["id"]) + c_id = customer["C_ID"] + assert len(customer) > 0 + assert c_id != None + + # get the last order from the customer + order = ( + self.dbs[db_from_table("ORDERS")][self.shard_from_id(w_id)] + .view( + "tpcc/w_d_c_o", + limit=1, + include_docs="true", + startkey=[w_id, d_id, c_id, "a"], # 'a' is just to give all numbers + endkey=[w_id, d_id, c_id, -1], + descending="true", + reduce="false", + ) + .rows + ) + + if len(order) > 0: + order = order[0].doc + orderLines = ( + self.dbs[db_from_table("ORDER_LINE")][self.shard_from_id(w_id)] + .view( + "tpcc/o_d_w", + key=[order["O_ID"], d_id, w_id], + reduce="false", + include_docs="true", + ) + .rows + ) + + orderLines = [ + ( + o.doc["OL_SUPPLY_W_ID"], + o.doc["OL_I_ID"], + o.doc["OL_QUANTITY"], + o.doc["OL_AMOUNT"], + o.doc["OL_DELIVERY_D"], + ) + for o in orderLines + ] + else: + orderLines = [] + + customer = ( + customer["C_ID"], + customer["C_FIRST"], + customer["C_MIDDLE"], + customer["C_LAST"], + customer["C_BALANCE"], + ) + order = (order["O_ID"], order["O_CARRIER_ID"], order["O_ENTRY_D"]) + return [customer, order, orderLines] + + ## ---------------------------------------------- + ## doPayment + ## ---------------------------------------------- + def doPayment(self, params): + w_id = params["w_id"] + d_id = params["d_id"] + h_amount = params["h_amount"] + c_w_id = params["c_w_id"] + c_d_id = params["c_d_id"] + c_id = params["c_id"] + c_last = params["c_last"] + h_date = str(params["h_date"]) + + if c_id != None: + cus_doc_id = gen_pk_doc( + "CUSTOMER", {"C_W_ID": w_id, "C_D_ID": d_id, "C_ID": c_id} + ) + else: + # Get the midpoint customer's id + all_customers = ( + self.dbs[db_from_table("CUSTOMER")][self.shard_from_id(w_id)] + .view("tpcc/w_d_last", key=[w_id, d_id, c_last], reduce="false") + .rows + ) + all_customers.sort(lambda x, y: cmp(x["value"], y["value"])) + + assert len(all_customers) > 0 + namecnt = len(all_customers) + index = (namecnt - 1) / 2 + customer = all_customers[index] + cus_doc_id = customer["id"] + + # try to update the customer record. conflicts expected. + while True: + customer = self.dbs[db_from_table("CUSTOMER")][ + self.shard_from_id(w_id) + ].get(cus_doc_id) + assert len(customer) > 0 + c_id = customer["C_ID"] + + c_balance = customer["C_BALANCE"] - h_amount + c_ytd_payment = customer["C_YTD_PAYMENT"] + h_amount + c_payment_cnt = customer["C_PAYMENT_CNT"] + 1 + + # Customer Credit Information + try: + if customer["C_CREDIT"] == constants.BAD_CREDIT: + c_data = customer["C_DATA"] + newData = " ".join( + map(str, [c_id, c_d_id, c_w_id, d_id, w_id, h_amount]) + ) + c_data = newData + "|" + c_data + if len(c_data) > constants.MAX_C_DATA: + c_data = c_data[: constants.MAX_C_DATA] + customer["C_DATA"] = c_data + + customer["C_BALANCE"] = c_balance + customer["C_YTD_PAYMENT"] = c_ytd_payment + customer["C_PAYMENT_CNT"] = c_payment_cnt + self.dbs[db_from_table("CUSTOMER")][self.shard_from_id(w_id)].save( + customer + ) + break + except couchdb.http.ResourceConflict: + logging.debug( + "Pessimistic concurrency control: Update failed: Restarting..." + ) + pass + + # conflicts when updating warehouse record and... + while True: + warehouse = self.dbs[db_from_table("WAREHOUSE")][ + self.shard_from_id(w_id) + ].get(str(w_id)) + warehouse["W_YTD"] += h_amount + + try: + self.dbs[db_from_table("WAREHOUSE")][self.shard_from_id(w_id)].save( + warehouse + ) + break + except couchdb.http.ResourceConflict: + # pessimistic concurrency control... + logging.debug( + "Pessimistic concurrency control: Update failed: Restarting..." + ) + pass + + # the district record + while True: + district = self.dbs[db_from_table("DISTRICT")][ + self.shard_from_id(w_id) + ].get(gen_pk_doc("DISTRICT", {"D_ID": d_id, "D_W_ID": w_id})) + district["D_YTD"] += h_amount + + try: + self.dbs[db_from_table("DISTRICT")][self.shard_from_id(w_id)].save( + district + ) + break + except couchdb.http.ResourceConflict: + # pessimistic concurrency control... + logging.debug( + "Pessimistic concurrency control: Update failed: Restarting..." + ) + pass + + # Concatenate w_name, four spaces, d_name + h_data = "%s %s" % (warehouse["W_NAME"], district["D_NAME"]) + # Create the history record + hist = dict( + zip( + TPCC_SCM["HISTORY"]["attrs"], + [c_id, c_d_id, c_w_id, d_id, w_id, h_date, h_amount, h_data], + ) + ) + self.dbs[db_from_table("HISTORY")][self.shard_from_id(c_w_id)].save(hist) + + # TPC-C 2.5.3.3: Must display the following fields: + # W_ID, D_ID, C_ID, C_D_ID, C_W_ID, W_STREET_1, W_STREET_2, W_CITY, W_STATE, W_ZIP, + # D_STREET_1, D_STREET_2, D_CITY, D_STATE, D_ZIP, C_FIRST, C_MIDDLE, C_LAST, C_STREET_1, + # C_STREET_2, C_CITY, C_STATE, C_ZIP, C_PHONE, C_SINCE, C_CREDIT, C_CREDIT_LIM, + # C_DISCOUNT, C_BALANCE, the first 200 characters of C_DATA (only if C_CREDIT = "BC"), + # H_AMOUNT, and H_DATE. + + # Hand back all the warehouse, district, and customer data + warehouse = ( + warehouse["W_NAME"], + warehouse["W_STREET_1"], + warehouse["W_STREET_2"], + warehouse["W_CITY"], + warehouse["W_STATE"], + warehouse["W_ZIP"], + ) + district = ( + district["D_NAME"], + district["D_STREET_1"], + district["D_STREET_2"], + district["D_CITY"], + district["D_STATE"], + district["D_ZIP"], + ) + customer = ( + customer["C_ID"], + customer["C_FIRST"], + customer["C_MIDDLE"], + customer["C_LAST"], + customer["C_STREET_1"], + customer["C_STREET_2"], + customer["C_CITY"], + customer["C_STATE"], + customer["C_ZIP"], + customer["C_PHONE"], + customer["C_SINCE"], + customer["C_CREDIT"], + customer["C_CREDIT_LIM"], + customer["C_DISCOUNT"], + customer["C_BALANCE"], + customer["C_YTD_PAYMENT"], + customer["C_PAYMENT_CNT"], + customer["C_DATA"], + ) + + # Hand back all the warehouse, district, and customer data + return [warehouse, district, customer] + + ## ---------------------------------------------- + ## doStockLevel + ## ---------------------------------------------- + def doStockLevel(self, params): + w_id = params["w_id"] + d_id = params["d_id"] + threshold = params["threshold"] + + result = self.dbs[db_from_table("DISTRICT")][self.shard_from_id(w_id)].get( + gen_pk_doc("DISTRICT", {"D_ID": d_id, "D_W_ID": w_id}) + ) + assert result + o_id = result["D_NEXT_O_ID"] + + # note, that we might get only parts of some orders because of isolation issues with NewOrder on 'D_NEXT_O_ID' + # I really doubt anything can be done about it + orderLines = ( + self.dbs[db_from_table("ORDER_LINE")][self.shard_from_id(w_id)] + .view( + "tpcc/o_d_w_i", + startkey=[o_id - 20, d_id, w_id], + endkey=[o_id - 1, d_id, w_id], + reduce="false", + ) + .rows + ) + + # 'set' operation in the next line just filters out duplicates + stock_keys = [[w_id, i_id] for i_id in set([r["value"] for r in orderLines])] + # do an index scan join! + stock_items = ( + self.dbs[db_from_table("STOCK")][self.shard_from_id(w_id)] + .view("tpcc/w_i", keys=stock_keys) + .rows + ) + + count = 0 + for item in stock_items: + if item.value < threshold: + count += 1 + + return count + + +## CLASS diff --git a/workloads/chbenchmark/py-tpcc/pytpcc/drivers/csvdriver.py b/workloads/chbenchmark/py-tpcc/pytpcc/drivers/csvdriver.py new file mode 100644 index 00000000..1e0cc040 --- /dev/null +++ b/workloads/chbenchmark/py-tpcc/pytpcc/drivers/csvdriver.py @@ -0,0 +1,108 @@ +# -*- coding: utf-8 -*- +# ----------------------------------------------------------------------- +# Copyright (C) 2011 +# Andy Pavlo +# http://www.cs.brown.edu/~pavlo/ +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# ----------------------------------------------------------------------- + +import os +import csv +from datetime import datetime +from pprint import pprint, pformat + +from abstractdriver import * + + +## ============================================== +## CSVDriver +## ============================================== +class CsvDriver(AbstractDriver): + DEFAULT_CONFIG = { + "table_directory": ( + "The path to the directory to store the table CSV files", + "/tmp/tpcc-tables", + ), + "txn_directory": ( + "The path to the directory to store the txn CSV files", + "/tmp/tpcc-txns", + ), + } + + def __init__(self, ddl): + super(CsvDriver, self).__init__("csv", ddl) + self.table_directory = None + self.table_outputs = {} + self.txn_directory = None + self.txn_outputs = {} + self.txn_params = {} + + ## DEF + + def makeDefaultConfig(self): + return CsvDriver.DEFAULT_CONFIG + + ## DEF + + def loadConfig(self, config): + for key in CsvDriver.DEFAULT_CONFIG.keys(): + assert key in config, "Missing parameter '%s' in %s configuration" % ( + key, + self.name, + ) + + self.table_directory = config["table_directory"] + assert self.table_directory + if not os.path.exists(self.table_directory): + os.makedirs(self.table_directory) + + self.txn_directory = config["txn_directory"] + assert self.txn_directory + if not os.path.exists(self.txn_directory): + os.makedirs(self.txn_directory) + + ## DEF + + def loadTuples(self, tableName, tuples): + if not tableName in self.table_outputs: + path = os.path.join(self.table_directory, "%s.csv" % tableName) + self.table_outputs[tableName] = csv.writer( + open(path, "wb"), quoting=csv.QUOTE_ALL + ) + ## IF + self.table_outputs[tableName].writerows(tuples) + + ## DEF + + def executeTransaction(self, txn, params): + if not txn in self.txn_outputs: + path = os.path.join(self.txn_directory, "%s.csv" % txn) + self.txn_outputs[txn] = csv.writer(open(path, "wb"), quoting=csv.QUOTE_ALL) + self.txn_params[txn] = params.keys()[:] + self.txn_outputs[txn].writerow(["Timestamp"] + self.txn_params[txn]) + ## IF + row = [datetime.now()] + [params[k] for k in self.txn_params[txn]] + self.txn_outputs[txn].writerow(row) + + ## DEF + + +## CLASS diff --git a/workloads/chbenchmark/py-tpcc/pytpcc/drivers/mongodbdriver.py b/workloads/chbenchmark/py-tpcc/pytpcc/drivers/mongodbdriver.py new file mode 100644 index 00000000..59841be0 --- /dev/null +++ b/workloads/chbenchmark/py-tpcc/pytpcc/drivers/mongodbdriver.py @@ -0,0 +1,1031 @@ +# -*- coding: utf-8 -*- +# ----------------------------------------------------------------------- +# Copyright (C) 2011 +# Andy Pavlo +# http://www.cs.brown.edu/~pavlo/ +# +# Original Java Version: +# Copyright (C) 2008 +# Evan Jones +# Massachusetts Institute of Technology +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# ----------------------------------------------------------------------- + +from __future__ import with_statement + +import os +import sys +import logging +import pymongo +from pprint import pprint, pformat + +import constants +from abstractdriver import * + +TABLE_COLUMNS = { + constants.TABLENAME_ITEM: [ + "I_ID", # INTEGER + "I_IM_ID", # INTEGER + "I_NAME", # VARCHAR + "I_PRICE", # FLOAT + "I_DATA", # VARCHAR + ], + constants.TABLENAME_WAREHOUSE: [ + "W_ID", # SMALLINT + "W_NAME", # VARCHAR + "W_STREET_1", # VARCHAR + "W_STREET_2", # VARCHAR + "W_CITY", # VARCHAR + "W_STATE", # VARCHAR + "W_ZIP", # VARCHAR + "W_TAX", # FLOAT + "W_YTD", # FLOAT + ], + constants.TABLENAME_DISTRICT: [ + "D_ID", # TINYINT + "D_W_ID", # SMALLINT + "D_NAME", # VARCHAR + "D_STREET_1", # VARCHAR + "D_STREET_2", # VARCHAR + "D_CITY", # VARCHAR + "D_STATE", # VARCHAR + "D_ZIP", # VARCHAR + "D_TAX", # FLOAT + "D_YTD", # FLOAT + "D_NEXT_O_ID", # INT + ], + constants.TABLENAME_CUSTOMER: [ + "C_ID", # INTEGER + "C_D_ID", # TINYINT + "C_W_ID", # SMALLINT + "C_FIRST", # VARCHAR + "C_MIDDLE", # VARCHAR + "C_LAST", # VARCHAR + "C_STREET_1", # VARCHAR + "C_STREET_2", # VARCHAR + "C_CITY", # VARCHAR + "C_STATE", # VARCHAR + "C_ZIP", # VARCHAR + "C_PHONE", # VARCHAR + "C_SINCE", # TIMESTAMP + "C_CREDIT", # VARCHAR + "C_CREDIT_LIM", # FLOAT + "C_DISCOUNT", # FLOAT + "C_BALANCE", # FLOAT + "C_YTD_PAYMENT", # FLOAT + "C_PAYMENT_CNT", # INTEGER + "C_DELIVERY_CNT", # INTEGER + "C_DATA", # VARCHAR + ], + constants.TABLENAME_STOCK: [ + "S_I_ID", # INTEGER + "S_W_ID", # SMALLINT + "S_QUANTITY", # INTEGER + "S_DIST_01", # VARCHAR + "S_DIST_02", # VARCHAR + "S_DIST_03", # VARCHAR + "S_DIST_04", # VARCHAR + "S_DIST_05", # VARCHAR + "S_DIST_06", # VARCHAR + "S_DIST_07", # VARCHAR + "S_DIST_08", # VARCHAR + "S_DIST_09", # VARCHAR + "S_DIST_10", # VARCHAR + "S_YTD", # INTEGER + "S_ORDER_CNT", # INTEGER + "S_REMOTE_CNT", # INTEGER + "S_DATA", # VARCHAR + ], + constants.TABLENAME_ORDERS: [ + "O_ID", # INTEGER + "O_C_ID", # INTEGER + "O_D_ID", # TINYINT + "O_W_ID", # SMALLINT + "O_ENTRY_D", # TIMESTAMP + "O_CARRIER_ID", # INTEGER + "O_OL_CNT", # INTEGER + "O_ALL_LOCAL", # INTEGER + ], + constants.TABLENAME_NEW_ORDER: [ + "NO_O_ID", # INTEGER + "NO_D_ID", # TINYINT + "NO_W_ID", # SMALLINT + ], + constants.TABLENAME_ORDER_LINE: [ + "OL_O_ID", # INTEGER + "OL_D_ID", # TINYINT + "OL_W_ID", # SMALLINT + "OL_NUMBER", # INTEGER + "OL_I_ID", # INTEGER + "OL_SUPPLY_W_ID", # SMALLINT + "OL_DELIVERY_D", # TIMESTAMP + "OL_QUANTITY", # INTEGER + "OL_AMOUNT", # FLOAT + "OL_DIST_INFO", # VARCHAR + ], + constants.TABLENAME_HISTORY: [ + "H_C_ID", # INTEGER + "H_C_D_ID", # TINYINT + "H_C_W_ID", # SMALLINT + "H_D_ID", # TINYINT + "H_W_ID", # SMALLINT + "H_DATE", # TIMESTAMP + "H_AMOUNT", # FLOAT + "H_DATA", # VARCHAR + ], +} +TABLE_INDEXES = { + constants.TABLENAME_ITEM: [ + "I_ID", + ], + constants.TABLENAME_WAREHOUSE: [ + "W_ID", + ], + constants.TABLENAME_DISTRICT: [ + "D_ID", + "D_W_ID", + ], + constants.TABLENAME_CUSTOMER: [ + "C_ID", + "C_D_ID", + "C_W_ID", + ], + constants.TABLENAME_STOCK: [ + "S_I_ID", + "S_W_ID", + ], + constants.TABLENAME_ORDERS: [ + "O_ID", + "O_D_ID", + "O_W_ID", + "O_C_ID", + ], + constants.TABLENAME_NEW_ORDER: [ + "NO_O_ID", + "NO_D_ID", + "NO_W_ID", + ], + constants.TABLENAME_ORDER_LINE: [ + "OL_O_ID", + "OL_D_ID", + "OL_W_ID", + ], +} + + +## ============================================== +## MongodbDriver +## ============================================== +class MongodbDriver(AbstractDriver): + DEFAULT_CONFIG = { + "host": ("The hostname to mongod", "localhost"), + "port": ("The port number to mongod", 27017), + "name": ("Collection name", "tpcc"), + "denormalize": ( + "If set to true, then the CUSTOMER data will be denormalized into a single document", + True, + ), + } + DENORMALIZED_TABLES = [ + constants.TABLENAME_CUSTOMER, + constants.TABLENAME_ORDERS, + constants.TABLENAME_ORDER_LINE, + constants.TABLENAME_HISTORY, + ] + + def __init__(self, ddl): + super(MongodbDriver, self).__init__("mongodb", ddl) + self.database = None + self.conn = None + self.denormalize = False + self.w_customers = {} + self.w_orders = {} + + ## Create member mapping to collections + for name in constants.ALL_TABLES: + self.__dict__[name.lower()] = None + + ## ---------------------------------------------- + ## makeDefaultConfig + ## ---------------------------------------------- + def makeDefaultConfig(self): + return MongodbDriver.DEFAULT_CONFIG + + ## ---------------------------------------------- + ## loadConfig + ## ---------------------------------------------- + def loadConfig(self, config): + for key in MongodbDriver.DEFAULT_CONFIG.keys(): + assert key in config, "Missing parameter '%s' in %s configuration" % ( + key, + self.name, + ) + + self.conn = pymongo.MongoClient(config["host"], int(config["port"])) + self.database = self.conn[str(config["name"])] + self.denormalize = config["denormalize"] + if self.denormalize: + logging.debug("Using denormalized data model") + + if config["reset"]: + logging.debug("Deleting database '%s'" % self.database.name) + for name in constants.ALL_TABLES: + if name in self.database.collection_names(): + self.database.drop_collection(name) + logging.debug("Dropped collection %s" % name) + ## IF + + ## Setup! + load_indexes = ("execute" in config and not config["execute"]) and ( + "load" in config and not config["load"] + ) + for name in constants.ALL_TABLES: + if self.denormalize and name in MongodbDriver.DENORMALIZED_TABLES[1:]: + continue + + ## Create member mapping to collections + self.__dict__[name.lower()] = self.database[name] + + ## Create Indexes + if ( + load_indexes + and name in TABLE_INDEXES + and ( + self.denormalize + or ( + self.denormalize == False + and not name in MongodbDriver.DENORMALIZED_TABLES[1:] + ) + ) + ): + logging.debug("Creating index for %s" % name) + for index in TABLE_INDEXES[name]: + self.database[name].create_index(index) + ## FOR + + ## ---------------------------------------------- + ## loadTuples + ## ---------------------------------------------- + def loadTuples(self, tableName, tuples): + if len(tuples) == 0: + return + logging.debug("Loading %d tuples for tableName %s" % (len(tuples), tableName)) + + assert tableName in TABLE_COLUMNS, "Unexpected table %s" % tableName + columns = TABLE_COLUMNS[tableName] + num_columns = range(len(columns)) + + tuple_dicts = [] + + ## We want to combine all of a CUSTOMER's ORDERS, ORDER_LINE, and HISTORY records + ## into a single document + if self.denormalize and tableName in MongodbDriver.DENORMALIZED_TABLES: + ## If this is the CUSTOMER table, then we'll just store the record locally for now + if tableName == constants.TABLENAME_CUSTOMER: + for t in tuples: + key = tuple(t[:3]) # C_ID, D_ID, W_ID + self.w_customers[key] = dict( + map(lambda i: (columns[i], t[i]), num_columns) + ) + ## FOR + + ## If this is an ORDER_LINE record, then we need to stick it inside of the + ## right ORDERS record + elif tableName == constants.TABLENAME_ORDER_LINE: + for t in tuples: + o_key = tuple(t[:3]) # O_ID, O_D_ID, O_W_ID + (c_key, o_idx) = self.w_orders[o_key] + c = self.w_customers[c_key] + assert o_idx >= 0 + assert o_idx < len(c[constants.TABLENAME_ORDERS]) + o = c[constants.TABLENAME_ORDERS][o_idx] + if not tableName in o: + o[tableName] = [] + o[tableName].append( + dict(map(lambda i: (columns[i], t[i]), num_columns[4:])) + ) + ## FOR + + ## Otherwise we have to find the CUSTOMER record for the other tables + ## and append ourselves to them + else: + if tableName == constants.TABLENAME_ORDERS: + key_start = 1 + cols = ( + num_columns[0:1] + num_columns[4:] + ) # Removes O_C_ID, O_D_ID, O_W_ID + else: + key_start = 0 + cols = num_columns[3:] # Removes H_C_ID, H_C_D_ID, H_C_W_ID + + for t in tuples: + c_key = tuple(t[key_start : key_start + 3]) # C_ID, D_ID, W_ID + assert ( + c_key in self.w_customers + ), "Customer Key: %s\nAll Keys:\n%s" % ( + str(c_key), + "\n".join(map(str, sorted(self.w_customers.keys()))), + ) + c = self.w_customers[c_key] + + if not tableName in c: + c[tableName] = [] + c[tableName].append(dict(map(lambda i: (columns[i], t[i]), cols))) + + ## Since ORDER_LINE doesn't have a C_ID, we have to store a reference to + ## this ORDERS record so that we can look it up later + if tableName == constants.TABLENAME_ORDERS: + o_key = (t[0], t[2], t[3]) # O_ID, O_D_ID, O_W_ID + self.w_orders[o_key] = ( + c_key, + len(c[tableName]) - 1, + ) # CUSTOMER, ORDER IDX + ## FOR + ## IF + + ## Otherwise just shove the tuples straight to the target collection + else: + for t in tuples: + tuple_dicts.append(dict(map(lambda i: (columns[i], t[i]), num_columns))) + ## FOR + self.database[tableName].insert(tuple_dicts) + ## IF + + return + + ## ---------------------------------------------- + ## loadFinishDistrict + ## ---------------------------------------------- + def loadFinishDistrict(self, w_id, d_id): + if self.denormalize: + logging.debug( + "Pushing %d denormalized CUSTOMER records for WAREHOUSE %d DISTRICT %d into MongoDB" + % (len(self.w_customers), w_id, d_id) + ) + self.database[constants.TABLENAME_CUSTOMER].insert( + self.w_customers.values() + ) + self.w_customers.clear() + self.w_orders.clear() + ## IF + + ## ---------------------------------------------- + ## loadFinish + ## ---------------------------------------------- + def loadFinish(self): + logging.info("Finished loading tables") + if logging.getLogger().isEnabledFor(logging.DEBUG): + for name in constants.ALL_TABLES: + if self.denormalize and name in MongodbDriver.DENORMALIZED_TABLES[1:]: + continue + logging.debug( + "%-12s%d records" % (name + ":", self.database[name].count()) + ) + ## IF + + ## ---------------------------------------------- + ## doDelivery + ## ---------------------------------------------- + def doDelivery(self, params): + w_id = params["w_id"] + o_carrier_id = params["o_carrier_id"] + ol_delivery_d = params["ol_delivery_d"] + + result = [] + for d_id in range(1, constants.DISTRICTS_PER_WAREHOUSE + 1): + ## getNewOrder + no = self.new_order.find_one( + {"NO_D_ID": d_id, "NO_W_ID": w_id}, {"NO_O_ID": 1} + ) + if no == None: + ## No orders for this district: skip it. Note: This must be reported if > 1% + continue + assert len(no) > 0 + o_id = no["NO_O_ID"] + + if self.denormalize: + ## getCId + c = self.customer.find_one( + {"ORDERS.O_ID": o_id, "C_D_ID": d_id, "C_W_ID": w_id}, + {"C_ID": 1, "ORDERS.O_ID": 1, "ORDERS.ORDER_LINE": 1}, + ) + assert c != None, "No customer record [O_ID=%d, D_ID=%d, W_ID=%d]" % ( + o_id, + d_id, + w_id, + ) + c_id = c["C_ID"] + + ## sumOLAmount + updateOrderLine + ol_total = 0 + for o in c["ORDERS"]: + if o["O_ID"] == o_id: + orderLines = o["ORDER_LINE"] + for ol in orderLines: + ol_total += ol["OL_AMOUNT"] + ## We have to do this here because we can't update the nested array atomically + ol["OL_DELIVERY_D"] = ol_delivery_d + break + ## FOR + + if ol_total == 0: + pprint(params) + pprint(no) + pprint(c) + sys.exit(1) + + ## updateOrders + updateCustomer + self.customer.update( + {"_id": c["_id"], "ORDERS.O_ID": o_id}, + { + "$set": { + "ORDERS.$.O_CARRIER_ID": o_carrier_id, + "ORDERS.$.ORDER_LINE": orderLines, + }, + "$inc": {"C_BALANCE": ol_total}, + }, + multi=False, + ) + + else: + ## getCId + o = self.orders.find_one( + {"O_ID": o_id, "O_D_ID": d_id, "O_W_ID": w_id}, {"O_C_ID": 1} + ) + assert o != None + c_id = o["O_C_ID"] + + ## sumOLAmount + orderLines = self.order_line.find( + {"OL_O_ID": o_id, "OL_D_ID": d_id, "OL_W_ID": w_id}, + {"OL_AMOUNT": 1}, + ) + assert orderLines != None + ol_total = sum([ol["OL_AMOUNT"] for ol in orderLines]) + + ## updateOrders + self.orders.update( + o, {"$set": {"O_CARRIER_ID": o_carrier_id}}, multi=False + ) + + ## updateOrderLine + self.order_line.update( + {"OL_O_ID": o_id, "OL_D_ID": d_id, "OL_W_ID": w_id}, + {"$set": {"OL_DELIVERY_D": ol_delivery_d}}, + multi=True, + ) + + ## updateCustomer + self.customer.update( + {"C_ID": c_id, "C_D_ID": d_id, "C_W_ID": w_id}, + {"$inc": {"C_BALANCE": ol_total}}, + ) + ## IF + + ## deleteNewOrder + self.new_order.remove({"_id": no["_id"]}) + + # These must be logged in the "result file" according to TPC-C 2.7.2.2 (page 39) + # We remove the queued time, completed time, w_id, and o_carrier_id: the client can figure + # them out + # If there are no order lines, SUM returns null. There should always be order lines. + assert ( + ol_total != None + ), "ol_total is NULL: there are no order lines. This should not happen" + assert ol_total > 0.0 + + result.append((d_id, o_id)) + ## FOR + return result + + ## ---------------------------------------------- + ## doNewOrder + ## ---------------------------------------------- + def doNewOrder(self, params): + w_id = params["w_id"] + d_id = params["d_id"] + c_id = params["c_id"] + o_entry_d = params["o_entry_d"] + i_ids = params["i_ids"] + i_w_ids = params["i_w_ids"] + i_qtys = params["i_qtys"] + s_dist_col = "S_DIST_%02d" % d_id + + assert len(i_ids) > 0 + assert len(i_ids) == len(i_w_ids) + assert len(i_ids) == len(i_qtys) + + ## http://stackoverflow.com/q/3844931/ + all_local = not i_w_ids or [w_id] * len(i_w_ids) == i_w_ids + + items = self.item.find( + {"I_ID": {"$in": i_ids}}, + {"I_ID": 1, "I_PRICE": 1, "I_NAME": 1, "I_DATA": 1}, + ) + ## TPCC defines 1% of neworder gives a wrong itemid, causing rollback. + ## Note that this will happen with 1% of transactions on purpose. + if items.count() != len(i_ids): + ## TODO Abort here! + return + ## IF + + ## ---------------- + ## Collect Information from WAREHOUSE, DISTRICT, and CUSTOMER + ## ---------------- + + # getWarehouseTaxRate + w = self.warehouse.find_one({"W_ID": w_id}, {"W_TAX": 1}) + assert w + w_tax = w["W_TAX"] + + # getDistrict + d = self.district.find_one( + {"D_ID": d_id, "D_W_ID": w_id}, {"D_TAX": 1, "D_NEXT_O_ID": 1} + ) + assert d + d_tax = d["D_TAX"] + d_next_o_id = d["D_NEXT_O_ID"] + + # incrementNextOrderId + # HACK: This is not transactionally safe! + self.district.update(d, {"$inc": {"D_NEXT_O_ID": 1}}, multi=False) + + # getCustomer + c = self.customer.find_one( + {"C_ID": c_id, "C_D_ID": d_id, "C_W_ID": w_id}, + {"C_DISCOUNT": 1, "C_LAST": 1, "C_CREDIT": 1}, + ) + assert c + c_discount = c["C_DISCOUNT"] + + ## ---------------- + ## Insert Order Information + ## ---------------- + ol_cnt = len(i_ids) + o_carrier_id = constants.NULL_CARRIER_ID + + # createNewOrder + self.new_order.insert( + {"NO_O_ID": d_next_o_id, "NO_D_ID": d_id, "NO_W_ID": w_id} + ) + + o = { + "O_ID": d_next_o_id, + "O_ENTRY_D": o_entry_d, + "O_CARRIER_ID": o_carrier_id, + "O_OL_CNT": ol_cnt, + "O_ALL_LOCAL": all_local, + } + if self.denormalize: + o[constants.TABLENAME_ORDER_LINE] = [] + else: + o["O_D_ID"] = d_id + o["O_W_ID"] = w_id + o["O_C_ID"] = c_id + + # createOrder + self.orders.insert(o) + + ## ---------------- + ## OPTIMIZATION: + ## If all of the items are at the same warehouse, then we'll issue a single + ## request to get their information + ## ---------------- + stockInfos = None + if all_local and False: + # getStockInfo + allStocks = self.stock.find( + {"S_I_ID": {"$in": i_ids}, "S_W_ID": w_id}, + { + "S_I_ID": 1, + "S_QUANTITY": 1, + "S_DATA": 1, + "S_YTD": 1, + "S_ORDER_CNT": 1, + "S_REMOTE_CNT": 1, + s_dist_col: 1, + }, + ) + assert allStocks.count() == ol_cnt + stockInfos = {} + for si in allStocks: + stockInfos["S_I_ID"] = si # HACK + ## IF + + ## ---------------- + ## Insert Order Item Information + ## ---------------- + item_data = [] + total = 0 + for i in range(ol_cnt): + ol_number = i + 1 + ol_supply_w_id = i_w_ids[i] + ol_i_id = i_ids[i] + ol_quantity = i_qtys[i] + + itemInfo = items[i] + i_name = itemInfo["I_NAME"] + i_data = itemInfo["I_DATA"] + i_price = itemInfo["I_PRICE"] + + # getStockInfo + if all_local and stockInfos != None: + si = stockInfos[ol_i_id] + assert si["S_I_ID"] == ol_i_id, "S_I_ID should be %d\n%s" % ( + ol_i_id, + pformat(si), + ) + else: + si = self.stock.find_one( + {"S_I_ID": ol_i_id, "S_W_ID": w_id}, + { + "S_I_ID": 1, + "S_QUANTITY": 1, + "S_DATA": 1, + "S_YTD": 1, + "S_ORDER_CNT": 1, + "S_REMOTE_CNT": 1, + s_dist_col: 1, + }, + ) + assert si, "Failed to find S_I_ID: %d\n%s" % (ol_i_id, pformat(itemInfo)) + + s_quantity = si["S_QUANTITY"] + s_ytd = si["S_YTD"] + s_order_cnt = si["S_ORDER_CNT"] + s_remote_cnt = si["S_REMOTE_CNT"] + s_data = si["S_DATA"] + s_dist_xx = si[s_dist_col] # Fetches data from the s_dist_[d_id] column + + ## Update stock + s_ytd += ol_quantity + if s_quantity >= ol_quantity + 10: + s_quantity = s_quantity - ol_quantity + else: + s_quantity = s_quantity + 91 - ol_quantity + s_order_cnt += 1 + + if ol_supply_w_id != w_id: + s_remote_cnt += 1 + + # updateStock + self.stock.update( + si, + { + "$set": { + "S_QUANTITY": s_quantity, + "S_YTD": s_ytd, + "S_ORDER_CNT": s_order_cnt, + "S_REMOTE_CNT": s_remote_cnt, + } + }, + ) + + if ( + i_data.find(constants.ORIGINAL_STRING) != -1 + and s_data.find(constants.ORIGINAL_STRING) != -1 + ): + brand_generic = "B" + else: + brand_generic = "G" + ## Transaction profile states to use "ol_quantity * i_price" + ol_amount = ol_quantity * i_price + total += ol_amount + + ol = { + "OL_O_ID": d_next_o_id, + "OL_NUMBER": ol_number, + "OL_I_ID": ol_i_id, + "OL_SUPPLY_W_ID": ol_supply_w_id, + "OL_DELIVERY_D": o_entry_d, + "OL_QUANTITY": ol_quantity, + "OL_AMOUNT": ol_amount, + "OL_DIST_INFO": s_dist_xx, + } + + if self.denormalize: + # createOrderLine + o[constants.TABLENAME_ORDER_LINE].append(ol) + else: + ol["OL_D_ID"] = d_id + ol["OL_W_ID"] = w_id + + # createOrderLine + self.order_line.insert(ol) + ## IF + + ## Add the info to be returned + item_data.append((i_name, s_quantity, brand_generic, i_price, ol_amount)) + ## FOR + + ## Adjust the total for the discount + # print "c_discount:", c_discount, type(c_discount) + # print "w_tax:", w_tax, type(w_tax) + # print "d_tax:", d_tax, type(d_tax) + total *= (1 - c_discount) * (1 + w_tax + d_tax) + + # createOrder + self.customer.update({"_id": c["_id"]}, {"$push": {"ORDERS": o}}) + + ## Pack up values the client is missing (see TPC-C 2.4.3.5) + misc = [(w_tax, d_tax, d_next_o_id, total)] + + return [c, misc, item_data] + + ## ---------------------------------------------- + ## doOrderStatus + ## ---------------------------------------------- + def doOrderStatus(self, params): + w_id = params["w_id"] + d_id = params["d_id"] + c_id = params["c_id"] + c_last = params["c_last"] + + assert w_id, pformat(params) + assert d_id, pformat(params) + + search_fields = {"C_W_ID": w_id, "C_D_ID": d_id} + return_fields = { + "C_ID": 1, + "C_FIRST": 1, + "C_MIDDLE": 1, + "C_LAST": 1, + "C_BALANCE": 1, + } + if self.denormalize: + for f in ["O_ID", "O_CARRIER_ID", "O_ENTRY_D"]: + return_fields["%s.%s" % (constants.TABLENAME_ORDERS, f)] = 1 + for f in ["OL_SUPPLY_W_ID", "OL_I_ID", "OL_QUANTITY"]: + return_fields[ + "%s.%s.%s" + % (constants.TABLENAME_ORDERS, constants.TABLENAME_ORDER_LINE, f) + ] = 1 + ## IF + + if c_id != None: + # getCustomerByCustomerId + search_fields["C_ID"] = c_id + c = self.customer.find_one(search_fields, return_fields) + assert c + + else: + # getCustomersByLastName + # Get the midpoint customer's id + search_fields["C_LAST"] = c_last + + all_customers = self.customer.find(search_fields, return_fields) + namecnt = all_customers.count() + assert namecnt > 0 + index = (namecnt - 1) / 2 + c = all_customers[index] + c_id = c["C_ID"] + assert len(c) > 0 + assert c_id != None + + orderLines = [] + order = None + + if self.denormalize: + # getLastOrder + if constants.TABLENAME_ORDERS in c: + order = c[constants.TABLENAME_ORDERS][-1] + # getOrderLines + orderLines = order[constants.TABLENAME_ORDER_LINE] + else: + # getLastOrder + order = ( + self.orders.find( + {"O_W_ID": w_id, "O_D_ID": d_id, "O_C_ID": c_id}, + {"O_ID": 1, "O_CARRIER_ID": 1, "O_ENTRY_D": 1}, + ) + .sort("O_ID", direction=pymongo.DESCENDING) + .limit(1)[0] + ) + o_id = order["O_ID"] + + if order: + # getOrderLines + orderLines = self.order_line.find( + {"OL_W_ID": w_id, "OL_D_ID": d_id, "OL_O_ID": o_id}, + { + "OL_SUPPLY_W_ID": 1, + "OL_I_ID": 1, + "OL_QUANTITY": 1, + "OL_AMOUNT": 1, + "OL_DELIVERY_D": 1, + }, + ) + ## IF + + return [c, order, orderLines] + + ## ---------------------------------------------- + ## doPayment + ## ---------------------------------------------- + def doPayment(self, params): + w_id = params["w_id"] + d_id = params["d_id"] + h_amount = params["h_amount"] + c_w_id = params["c_w_id"] + c_d_id = params["c_d_id"] + c_id = params["c_id"] + c_last = params["c_last"] + h_date = params["h_date"] + + search_fields = {"C_W_ID": w_id, "C_D_ID": d_id} + return_fields = {"C_BALANCE": 0, "C_YTD_PAYMENT": 0, "C_PAYMENT_CNT": 0} + + if c_id != None: + # getCustomerByCustomerId + search_fields["C_ID"] = c_id + c = self.customer.find_one(search_fields, return_fields) + assert c + + else: + # getCustomersByLastName + # Get the midpoint customer's id + search_fields["C_LAST"] = c_last + all_customers = self.customer.find(search_fields, return_fields) + namecnt = all_customers.count() + assert namecnt > 0 + index = (namecnt - 1) / 2 + c = all_customers[index] + c_id = c["C_ID"] + assert len(c) > 0 + assert c_id != None + + if c_id != None: + # getCustomerByCustomerId + c = self.customer.find_one({"C_W_ID": w_id, "C_D_ID": d_id, "C_ID": c_id}) + else: + # getCustomersByLastName + # Get the midpoint customer's id + all_customers = self.customer.find( + {"C_W_ID": w_id, "C_D_ID": d_id, "C_LAST": c_last} + ) + namecnt = all_customers.count() + assert namecnt > 0 + index = (namecnt - 1) / 2 + c = all_customers[index] + c_id = c["C_ID"] + assert len(c) > 0 + assert c_id != None + c_data = c["C_DATA"] + + # getWarehouse + w = self.warehouse.find_one( + {"W_ID": w_id}, + { + "W_NAME": 1, + "W_STREET_1": 1, + "W_STREET_2": 1, + "W_CITY": 1, + "W_STATE": 1, + "W_ZIP": 1, + }, + ) + assert w + + # updateWarehouseBalance + self.warehouse.update({"_id": w["_id"]}, {"$inc": {"W_YTD": h_amount}}) + + # getDistrict + d = self.district.find_one( + {"D_W_ID": w_id, "D_ID": d_id}, + { + "D_NAME": 1, + "D_STREET_1": 1, + "D_STREET_2": 1, + "D_CITY": 1, + "D_STATE": 1, + "D_ZIP": 1, + }, + ) + assert d + + # updateDistrictBalance + self.district.update({"_id": d["_id"]}, {"$inc": {"D_YTD": h_amount}}) + + # Build CUSTOMER update command + customer_update = { + "$inc": { + "C_BALANCE": h_amount * -1, + "C_YTD_PAYMENT": h_amount, + "C_PAYMENT_CNT": 1, + } + } + + # Customer Credit Information + if c["C_CREDIT"] == constants.BAD_CREDIT: + newData = " ".join(map(str, [c_id, c_d_id, c_w_id, d_id, w_id, h_amount])) + c_data = newData + "|" + c_data + if len(c_data) > constants.MAX_C_DATA: + c_data = c_data[: constants.MAX_C_DATA] + customer_update["$set"] = {"C_DATA": c_data} + ## IF + + # Concatenate w_name, four spaces, d_name + h_data = "%s %s" % (w["W_NAME"], d["D_NAME"]) + h = { + "H_D_ID": d_id, + "H_W_ID": w_id, + "H_DATE": h_date, + "H_AMOUNT": h_amount, + "H_DATA": h_data, + } + if self.denormalize: + # insertHistory + updateCustomer + customer_update["$push"] = {constants.TABLENAME_HISTORY: h} + self.customer.update({"_id": c["_id"]}, customer_update) + else: + # updateCustomer + self.customer.update({"_id": c["_id"]}, customer_update) + + # insertHistory + self.history.insert(h) + + # TPC-C 2.5.3.3: Must display the following fields: + # W_ID, D_ID, C_ID, C_D_ID, C_W_ID, W_STREET_1, W_STREET_2, W_CITY, W_STATE, W_ZIP, + # D_STREET_1, D_STREET_2, D_CITY, D_STATE, D_ZIP, C_FIRST, C_MIDDLE, C_LAST, C_STREET_1, + # C_STREET_2, C_CITY, C_STATE, C_ZIP, C_PHONE, C_SINCE, C_CREDIT, C_CREDIT_LIM, + # C_DISCOUNT, C_BALANCE, the first 200 characters of C_DATA (only if C_CREDIT = "BC"), + # H_AMOUNT, and H_DATE. + + # Hand back all the warehouse, district, and customer data + return [w, d, c] + + ## ---------------------------------------------- + ## doStockLevel + ## ---------------------------------------------- + def doStockLevel(self, params): + w_id = params["w_id"] + d_id = params["d_id"] + threshold = params["threshold"] + + # getOId + d = self.district.find_one({"D_W_ID": w_id, "D_ID": d_id}, {"D_NEXT_O_ID": 1}) + assert d + o_id = d["D_NEXT_O_ID"] + + # getStockCount + # Outer Table: ORDER_LINE + # Inner Table: STOCK + if self.denormalize: + c = self.customer.find( + { + "C_W_ID": w_id, + "C_D_ID": d_id, + "ORDERS.O_ID": {"$lt": o_id, "$gte": o_id - 20}, + }, + {"ORDERS.ORDER_LINE.OL_I_ID": 1}, + ) + assert c + orderLines = [] + for ol in c: + assert "ORDER_LINE" in ol["ORDERS"][0] + orderLines.extend(ol["ORDERS"][0]["ORDER_LINE"]) + else: + orderLines = self.order_line.find( + { + "OL_W_ID": w_id, + "OL_D_ID": d_id, + "OL_O_ID": {"$lt": o_id, "$gte": o_id - 20}, + }, + {"OL_I_ID": 1}, + ) + + assert orderLines + ol_ids = set() + for ol in orderLines: + ol_ids.add(ol["OL_I_ID"]) + ## FOR + result = self.stock.find( + { + "S_W_ID": w_id, + "S_I_ID": {"$in": list(ol_ids)}, + "S_QUANTITY": {"$lt": threshold}, + } + ).count() + + return int(result) + + +## CLASS diff --git a/workloads/chbenchmark/py-tpcc/pytpcc/drivers/sqlitedriver.py b/workloads/chbenchmark/py-tpcc/pytpcc/drivers/sqlitedriver.py new file mode 100644 index 00000000..623affd8 --- /dev/null +++ b/workloads/chbenchmark/py-tpcc/pytpcc/drivers/sqlitedriver.py @@ -0,0 +1,515 @@ +# -*- coding: utf-8 -*- +# ----------------------------------------------------------------------- +# Copyright (C) 2011 +# Andy Pavlo +# http://www.cs.brown.edu/~pavlo/ +# +# Original Java Version: +# Copyright (C) 2008 +# Evan Jones +# Massachusetts Institute of Technology +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# ----------------------------------------------------------------------- + +from __future__ import with_statement + +import os +import sqlite3 +import logging +import commands +from pprint import pprint, pformat + +import constants +from abstractdriver import * + +TXN_QUERIES = { + "DELIVERY": { + "getNewOrder": "SELECT NO_O_ID FROM NEW_ORDER WHERE NO_D_ID = ? AND NO_W_ID = ? AND NO_O_ID > -1 LIMIT 1", # + "deleteNewOrder": "DELETE FROM NEW_ORDER WHERE NO_D_ID = ? AND NO_W_ID = ? AND NO_O_ID = ?", # d_id, w_id, no_o_id + "getCId": "SELECT O_C_ID FROM ORDERS WHERE O_ID = ? AND O_D_ID = ? AND O_W_ID = ?", # no_o_id, d_id, w_id + "updateOrders": "UPDATE ORDERS SET O_CARRIER_ID = ? WHERE O_ID = ? AND O_D_ID = ? AND O_W_ID = ?", # o_carrier_id, no_o_id, d_id, w_id + "updateOrderLine": "UPDATE ORDER_LINE SET OL_DELIVERY_D = ? WHERE OL_O_ID = ? AND OL_D_ID = ? AND OL_W_ID = ?", # o_entry_d, no_o_id, d_id, w_id + "sumOLAmount": "SELECT SUM(OL_AMOUNT) FROM ORDER_LINE WHERE OL_O_ID = ? AND OL_D_ID = ? AND OL_W_ID = ?", # no_o_id, d_id, w_id + "updateCustomer": "UPDATE CUSTOMER SET C_BALANCE = C_BALANCE + ? WHERE C_ID = ? AND C_D_ID = ? AND C_W_ID = ?", # ol_total, c_id, d_id, w_id + }, + "NEW_ORDER": { + "getWarehouseTaxRate": "SELECT W_TAX FROM WAREHOUSE WHERE W_ID = ?", # w_id + "getDistrict": "SELECT D_TAX, D_NEXT_O_ID FROM DISTRICT WHERE D_ID = ? AND D_W_ID = ?", # d_id, w_id + "incrementNextOrderId": "UPDATE DISTRICT SET D_NEXT_O_ID = ? WHERE D_ID = ? AND D_W_ID = ?", # d_next_o_id, d_id, w_id + "getCustomer": "SELECT C_DISCOUNT, C_LAST, C_CREDIT FROM CUSTOMER WHERE C_W_ID = ? AND C_D_ID = ? AND C_ID = ?", # w_id, d_id, c_id + "createOrder": "INSERT INTO ORDERS (O_ID, O_D_ID, O_W_ID, O_C_ID, O_ENTRY_D, O_CARRIER_ID, O_OL_CNT, O_ALL_LOCAL) VALUES (?, ?, ?, ?, ?, ?, ?, ?)", # d_next_o_id, d_id, w_id, c_id, o_entry_d, o_carrier_id, o_ol_cnt, o_all_local + "createNewOrder": "INSERT INTO NEW_ORDER (NO_O_ID, NO_D_ID, NO_W_ID) VALUES (?, ?, ?)", # o_id, d_id, w_id + "getItemInfo": "SELECT I_PRICE, I_NAME, I_DATA FROM ITEM WHERE I_ID = ?", # ol_i_id + "getStockInfo": "SELECT S_QUANTITY, S_DATA, S_YTD, S_ORDER_CNT, S_REMOTE_CNT, S_DIST_%02d FROM STOCK WHERE S_I_ID = ? AND S_W_ID = ?", # d_id, ol_i_id, ol_supply_w_id + "updateStock": "UPDATE STOCK SET S_QUANTITY = ?, S_YTD = ?, S_ORDER_CNT = ?, S_REMOTE_CNT = ? WHERE S_I_ID = ? AND S_W_ID = ?", # s_quantity, s_order_cnt, s_remote_cnt, ol_i_id, ol_supply_w_id + "createOrderLine": "INSERT INTO ORDER_LINE (OL_O_ID, OL_D_ID, OL_W_ID, OL_NUMBER, OL_I_ID, OL_SUPPLY_W_ID, OL_DELIVERY_D, OL_QUANTITY, OL_AMOUNT, OL_DIST_INFO) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", # o_id, d_id, w_id, ol_number, ol_i_id, ol_supply_w_id, ol_quantity, ol_amount, ol_dist_info + }, + "ORDER_STATUS": { + "getCustomerByCustomerId": "SELECT C_ID, C_FIRST, C_MIDDLE, C_LAST, C_BALANCE FROM CUSTOMER WHERE C_W_ID = ? AND C_D_ID = ? AND C_ID = ?", # w_id, d_id, c_id + "getCustomersByLastName": "SELECT C_ID, C_FIRST, C_MIDDLE, C_LAST, C_BALANCE FROM CUSTOMER WHERE C_W_ID = ? AND C_D_ID = ? AND C_LAST = ? ORDER BY C_FIRST", # w_id, d_id, c_last + "getLastOrder": "SELECT O_ID, O_CARRIER_ID, O_ENTRY_D FROM ORDERS WHERE O_W_ID = ? AND O_D_ID = ? AND O_C_ID = ? ORDER BY O_ID DESC LIMIT 1", # w_id, d_id, c_id + "getOrderLines": "SELECT OL_SUPPLY_W_ID, OL_I_ID, OL_QUANTITY, OL_AMOUNT, OL_DELIVERY_D FROM ORDER_LINE WHERE OL_W_ID = ? AND OL_D_ID = ? AND OL_O_ID = ?", # w_id, d_id, o_id + }, + "PAYMENT": { + "getWarehouse": "SELECT W_NAME, W_STREET_1, W_STREET_2, W_CITY, W_STATE, W_ZIP FROM WAREHOUSE WHERE W_ID = ?", # w_id + "updateWarehouseBalance": "UPDATE WAREHOUSE SET W_YTD = W_YTD + ? WHERE W_ID = ?", # h_amount, w_id + "getDistrict": "SELECT D_NAME, D_STREET_1, D_STREET_2, D_CITY, D_STATE, D_ZIP FROM DISTRICT WHERE D_W_ID = ? AND D_ID = ?", # w_id, d_id + "updateDistrictBalance": "UPDATE DISTRICT SET D_YTD = D_YTD + ? WHERE D_W_ID = ? AND D_ID = ?", # h_amount, d_w_id, d_id + "getCustomerByCustomerId": "SELECT C_ID, C_FIRST, C_MIDDLE, C_LAST, C_STREET_1, C_STREET_2, C_CITY, C_STATE, C_ZIP, C_PHONE, C_SINCE, C_CREDIT, C_CREDIT_LIM, C_DISCOUNT, C_BALANCE, C_YTD_PAYMENT, C_PAYMENT_CNT, C_DATA FROM CUSTOMER WHERE C_W_ID = ? AND C_D_ID = ? AND C_ID = ?", # w_id, d_id, c_id + "getCustomersByLastName": "SELECT C_ID, C_FIRST, C_MIDDLE, C_LAST, C_STREET_1, C_STREET_2, C_CITY, C_STATE, C_ZIP, C_PHONE, C_SINCE, C_CREDIT, C_CREDIT_LIM, C_DISCOUNT, C_BALANCE, C_YTD_PAYMENT, C_PAYMENT_CNT, C_DATA FROM CUSTOMER WHERE C_W_ID = ? AND C_D_ID = ? AND C_LAST = ? ORDER BY C_FIRST", # w_id, d_id, c_last + "updateBCCustomer": "UPDATE CUSTOMER SET C_BALANCE = ?, C_YTD_PAYMENT = ?, C_PAYMENT_CNT = ?, C_DATA = ? WHERE C_W_ID = ? AND C_D_ID = ? AND C_ID = ?", # c_balance, c_ytd_payment, c_payment_cnt, c_data, c_w_id, c_d_id, c_id + "updateGCCustomer": "UPDATE CUSTOMER SET C_BALANCE = ?, C_YTD_PAYMENT = ?, C_PAYMENT_CNT = ? WHERE C_W_ID = ? AND C_D_ID = ? AND C_ID = ?", # c_balance, c_ytd_payment, c_payment_cnt, c_w_id, c_d_id, c_id + "insertHistory": "INSERT INTO HISTORY VALUES (?, ?, ?, ?, ?, ?, ?, ?)", + }, + "STOCK_LEVEL": { + "getOId": "SELECT D_NEXT_O_ID FROM DISTRICT WHERE D_W_ID = ? AND D_ID = ?", + "getStockCount": """ + SELECT COUNT(DISTINCT(OL_I_ID)) FROM ORDER_LINE, STOCK + WHERE OL_W_ID = ? + AND OL_D_ID = ? + AND OL_O_ID < ? + AND OL_O_ID >= ? + AND S_W_ID = ? + AND S_I_ID = OL_I_ID + AND S_QUANTITY < ? + """, + }, +} + + +## ============================================== +## SqliteDriver +## ============================================== +class SqliteDriver(AbstractDriver): + DEFAULT_CONFIG = { + "database": ("The path to the SQLite database", "/tmp/tpcc.db"), + } + + def __init__(self, ddl): + super(SqliteDriver, self).__init__("sqlite", ddl) + self.database = None + self.conn = None + self.cursor = None + + ## ---------------------------------------------- + ## makeDefaultConfig + ## ---------------------------------------------- + def makeDefaultConfig(self): + return SqliteDriver.DEFAULT_CONFIG + + ## ---------------------------------------------- + ## loadConfig + ## ---------------------------------------------- + def loadConfig(self, config): + for key in SqliteDriver.DEFAULT_CONFIG.keys(): + assert key in config, "Missing parameter '%s' in %s configuration" % ( + key, + self.name, + ) + + self.database = str(config["database"]) + + if config["reset"] and os.path.exists(self.database): + logging.debug("Deleting database '%s'" % self.database) + os.unlink(self.database) + + if os.path.exists(self.database) == False: + logging.debug("Loading DDL file '%s'" % (self.ddl)) + ## HACK + cmd = "sqlite3 %s < %s" % (self.database, self.ddl) + (result, output) = commands.getstatusoutput(cmd) + assert result == 0, cmd + "\n" + output + ## IF + + self.conn = sqlite3.connect(self.database) + self.cursor = self.conn.cursor() + + ## ---------------------------------------------- + ## loadTuples + ## ---------------------------------------------- + def loadTuples(self, tableName, tuples): + if len(tuples) == 0: + return + + p = ["?"] * len(tuples[0]) + sql = "INSERT INTO %s VALUES (%s)" % (tableName, ",".join(p)) + self.cursor.executemany(sql, tuples) + + logging.debug("Loaded %d tuples for tableName %s" % (len(tuples), tableName)) + return + + ## ---------------------------------------------- + ## loadFinish + ## ---------------------------------------------- + def loadFinish(self): + logging.info("Commiting changes to database") + self.conn.commit() + + ## ---------------------------------------------- + ## doDelivery + ## ---------------------------------------------- + def doDelivery(self, params): + q = TXN_QUERIES["DELIVERY"] + + w_id = params["w_id"] + o_carrier_id = params["o_carrier_id"] + ol_delivery_d = params["ol_delivery_d"] + + result = [] + for d_id in range(1, constants.DISTRICTS_PER_WAREHOUSE + 1): + self.cursor.execute(q["getNewOrder"], [d_id, w_id]) + newOrder = self.cursor.fetchone() + if newOrder == None: + ## No orders for this district: skip it. Note: This must be reported if > 1% + continue + assert len(newOrder) > 0 + no_o_id = newOrder[0] + + self.cursor.execute(q["getCId"], [no_o_id, d_id, w_id]) + c_id = self.cursor.fetchone()[0] + + self.cursor.execute(q["sumOLAmount"], [no_o_id, d_id, w_id]) + ol_total = self.cursor.fetchone()[0] + + self.cursor.execute(q["deleteNewOrder"], [d_id, w_id, no_o_id]) + self.cursor.execute(q["updateOrders"], [o_carrier_id, no_o_id, d_id, w_id]) + self.cursor.execute( + q["updateOrderLine"], [ol_delivery_d, no_o_id, d_id, w_id] + ) + + # These must be logged in the "result file" according to TPC-C 2.7.2.2 (page 39) + # We remove the queued time, completed time, w_id, and o_carrier_id: the client can figure + # them out + # If there are no order lines, SUM returns null. There should always be order lines. + assert ( + ol_total != None + ), "ol_total is NULL: there are no order lines. This should not happen" + assert ol_total > 0.0 + + self.cursor.execute(q["updateCustomer"], [ol_total, c_id, d_id, w_id]) + + result.append((d_id, no_o_id)) + ## FOR + + self.conn.commit() + return result + + ## ---------------------------------------------- + ## doNewOrder + ## ---------------------------------------------- + def doNewOrder(self, params): + q = TXN_QUERIES["NEW_ORDER"] + + w_id = params["w_id"] + d_id = params["d_id"] + c_id = params["c_id"] + o_entry_d = params["o_entry_d"] + i_ids = params["i_ids"] + i_w_ids = params["i_w_ids"] + i_qtys = params["i_qtys"] + + assert len(i_ids) > 0 + assert len(i_ids) == len(i_w_ids) + assert len(i_ids) == len(i_qtys) + + all_local = True + items = [] + for i in range(len(i_ids)): + ## Determine if this is an all local order or not + all_local = all_local and i_w_ids[i] == w_id + self.cursor.execute(q["getItemInfo"], [i_ids[i]]) + items.append(self.cursor.fetchone()) + assert len(items) == len(i_ids) + + ## TPCC defines 1% of neworder gives a wrong itemid, causing rollback. + ## Note that this will happen with 1% of transactions on purpose. + for item in items: + if len(item) == 0: + ## TODO Abort here! + return + ## FOR + + ## ---------------- + ## Collect Information from WAREHOUSE, DISTRICT, and CUSTOMER + ## ---------------- + self.cursor.execute(q["getWarehouseTaxRate"], [w_id]) + w_tax = self.cursor.fetchone()[0] + + self.cursor.execute(q["getDistrict"], [d_id, w_id]) + district_info = self.cursor.fetchone() + d_tax = district_info[0] + d_next_o_id = district_info[1] + + self.cursor.execute(q["getCustomer"], [w_id, d_id, c_id]) + customer_info = self.cursor.fetchone() + c_discount = customer_info[0] + + ## ---------------- + ## Insert Order Information + ## ---------------- + ol_cnt = len(i_ids) + o_carrier_id = constants.NULL_CARRIER_ID + + self.cursor.execute(q["incrementNextOrderId"], [d_next_o_id + 1, d_id, w_id]) + self.cursor.execute( + q["createOrder"], + [d_next_o_id, d_id, w_id, c_id, o_entry_d, o_carrier_id, ol_cnt, all_local], + ) + self.cursor.execute(q["createNewOrder"], [d_next_o_id, d_id, w_id]) + + ## ---------------- + ## Insert Order Item Information + ## ---------------- + item_data = [] + total = 0 + for i in range(len(i_ids)): + ol_number = i + 1 + ol_supply_w_id = i_w_ids[i] + ol_i_id = i_ids[i] + ol_quantity = i_qtys[i] + + itemInfo = items[i] + i_name = itemInfo[1] + i_data = itemInfo[2] + i_price = itemInfo[0] + + self.cursor.execute(q["getStockInfo"] % (d_id), [ol_i_id, ol_supply_w_id]) + stockInfo = self.cursor.fetchone() + if len(stockInfo) == 0: + logging.warn( + "No STOCK record for (ol_i_id=%d, ol_supply_w_id=%d)" + % (ol_i_id, ol_supply_w_id) + ) + continue + s_quantity = stockInfo[0] + s_ytd = stockInfo[2] + s_order_cnt = stockInfo[3] + s_remote_cnt = stockInfo[4] + s_data = stockInfo[1] + s_dist_xx = stockInfo[5] # Fetches data from the s_dist_[d_id] column + + ## Update stock + s_ytd += ol_quantity + if s_quantity >= ol_quantity + 10: + s_quantity = s_quantity - ol_quantity + else: + s_quantity = s_quantity + 91 - ol_quantity + s_order_cnt += 1 + + if ol_supply_w_id != w_id: + s_remote_cnt += 1 + + self.cursor.execute( + q["updateStock"], + [s_quantity, s_ytd, s_order_cnt, s_remote_cnt, ol_i_id, ol_supply_w_id], + ) + + if ( + i_data.find(constants.ORIGINAL_STRING) != -1 + and s_data.find(constants.ORIGINAL_STRING) != -1 + ): + brand_generic = "B" + else: + brand_generic = "G" + + ## Transaction profile states to use "ol_quantity * i_price" + ol_amount = ol_quantity * i_price + total += ol_amount + + self.cursor.execute( + q["createOrderLine"], + [ + d_next_o_id, + d_id, + w_id, + ol_number, + ol_i_id, + ol_supply_w_id, + o_entry_d, + ol_quantity, + ol_amount, + s_dist_xx, + ], + ) + + ## Add the info to be returned + item_data.append((i_name, s_quantity, brand_generic, i_price, ol_amount)) + ## FOR + + ## Commit! + self.conn.commit() + + ## Adjust the total for the discount + # print "c_discount:", c_discount, type(c_discount) + # print "w_tax:", w_tax, type(w_tax) + # print "d_tax:", d_tax, type(d_tax) + total *= (1 - c_discount) * (1 + w_tax + d_tax) + + ## Pack up values the client is missing (see TPC-C 2.4.3.5) + misc = [(w_tax, d_tax, d_next_o_id, total)] + + return [customer_info, misc, item_data] + + ## ---------------------------------------------- + ## doOrderStatus + ## ---------------------------------------------- + def doOrderStatus(self, params): + q = TXN_QUERIES["ORDER_STATUS"] + + w_id = params["w_id"] + d_id = params["d_id"] + c_id = params["c_id"] + c_last = params["c_last"] + + assert w_id, pformat(params) + assert d_id, pformat(params) + + if c_id != None: + self.cursor.execute(q["getCustomerByCustomerId"], [w_id, d_id, c_id]) + customer = self.cursor.fetchone() + else: + # Get the midpoint customer's id + self.cursor.execute(q["getCustomersByLastName"], [w_id, d_id, c_last]) + all_customers = self.cursor.fetchall() + assert len(all_customers) > 0 + namecnt = len(all_customers) + index = (namecnt - 1) / 2 + customer = all_customers[index] + c_id = customer[0] + assert len(customer) > 0 + assert c_id != None + + self.cursor.execute(q["getLastOrder"], [w_id, d_id, c_id]) + order = self.cursor.fetchone() + if order: + self.cursor.execute(q["getOrderLines"], [w_id, d_id, order[0]]) + orderLines = self.cursor.fetchall() + else: + orderLines = [] + + self.conn.commit() + return [customer, order, orderLines] + + ## ---------------------------------------------- + ## doPayment + ## ---------------------------------------------- + def doPayment(self, params): + q = TXN_QUERIES["PAYMENT"] + + w_id = params["w_id"] + d_id = params["d_id"] + h_amount = params["h_amount"] + c_w_id = params["c_w_id"] + c_d_id = params["c_d_id"] + c_id = params["c_id"] + c_last = params["c_last"] + h_date = params["h_date"] + + if c_id != None: + self.cursor.execute(q["getCustomerByCustomerId"], [w_id, d_id, c_id]) + customer = self.cursor.fetchone() + else: + # Get the midpoint customer's id + self.cursor.execute(q["getCustomersByLastName"], [w_id, d_id, c_last]) + all_customers = self.cursor.fetchall() + assert len(all_customers) > 0 + namecnt = len(all_customers) + index = (namecnt - 1) / 2 + customer = all_customers[index] + c_id = customer[0] + assert len(customer) > 0 + c_balance = customer[14] - h_amount + c_ytd_payment = customer[15] + h_amount + c_payment_cnt = customer[16] + 1 + c_data = customer[17] + + self.cursor.execute(q["getWarehouse"], [w_id]) + warehouse = self.cursor.fetchone() + + self.cursor.execute(q["getDistrict"], [w_id, d_id]) + district = self.cursor.fetchone() + + self.cursor.execute(q["updateWarehouseBalance"], [h_amount, w_id]) + self.cursor.execute(q["updateDistrictBalance"], [h_amount, w_id, d_id]) + + # Customer Credit Information + if customer[11] == constants.BAD_CREDIT: + newData = " ".join(map(str, [c_id, c_d_id, c_w_id, d_id, w_id, h_amount])) + c_data = newData + "|" + c_data + if len(c_data) > constants.MAX_C_DATA: + c_data = c_data[: constants.MAX_C_DATA] + self.cursor.execute( + q["updateBCCustomer"], + [c_balance, c_ytd_payment, c_payment_cnt, c_data, c_w_id, c_d_id, c_id], + ) + else: + c_data = "" + self.cursor.execute( + q["updateGCCustomer"], + [c_balance, c_ytd_payment, c_payment_cnt, c_w_id, c_d_id, c_id], + ) + + # Concatenate w_name, four spaces, d_name + h_data = "%s %s" % (warehouse[0], district[0]) + # Create the history record + self.cursor.execute( + q["insertHistory"], + [c_id, c_d_id, c_w_id, d_id, w_id, h_date, h_amount, h_data], + ) + + self.conn.commit() + + # TPC-C 2.5.3.3: Must display the following fields: + # W_ID, D_ID, C_ID, C_D_ID, C_W_ID, W_STREET_1, W_STREET_2, W_CITY, W_STATE, W_ZIP, + # D_STREET_1, D_STREET_2, D_CITY, D_STATE, D_ZIP, C_FIRST, C_MIDDLE, C_LAST, C_STREET_1, + # C_STREET_2, C_CITY, C_STATE, C_ZIP, C_PHONE, C_SINCE, C_CREDIT, C_CREDIT_LIM, + # C_DISCOUNT, C_BALANCE, the first 200 characters of C_DATA (only if C_CREDIT = "BC"), + # H_AMOUNT, and H_DATE. + + # Hand back all the warehouse, district, and customer data + return [warehouse, district, customer] + + ## ---------------------------------------------- + ## doStockLevel + ## ---------------------------------------------- + def doStockLevel(self, params): + q = TXN_QUERIES["STOCK_LEVEL"] + + w_id = params["w_id"] + d_id = params["d_id"] + threshold = params["threshold"] + + self.cursor.execute(q["getOId"], [w_id, d_id]) + result = self.cursor.fetchone() + assert result + o_id = result[0] + + self.cursor.execute( + q["getStockCount"], [w_id, d_id, o_id, (o_id - 20), w_id, threshold] + ) + result = self.cursor.fetchone() + + self.conn.commit() + + return int(result[0]) + + +## CLASS diff --git a/workloads/chbenchmark/py-tpcc/pytpcc/message.py b/workloads/chbenchmark/py-tpcc/pytpcc/message.py new file mode 100644 index 00000000..336c64e2 --- /dev/null +++ b/workloads/chbenchmark/py-tpcc/pytpcc/message.py @@ -0,0 +1,54 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# ----------------------------------------------------------------------- +# Copyright (C) 2011 +# Andy Pavlo & Yang Lu +# http:##www.cs.brown.edu/~pavlo/ +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# ----------------------------------------------------------------------- + +import sys +import os +import string +import datetime +import logging +import re +import argparse +import glob +import time +from pprint import pprint, pformat + +from util import * +from runtime import * +import drivers + +EMPTY = 0 +CMD_LOAD = 1 +CMD_EXECUTE = 2 +CMD_STOP = 3 +LOAD_COMPLETED = 4 +EXECUTE_COMPLETED = 5 + + +class Message: + def __init__(self, header=EMPTY, data=None): + self.header = header + self.data = data diff --git a/workloads/chbenchmark/py-tpcc/pytpcc/runtime/__init__.py b/workloads/chbenchmark/py-tpcc/pytpcc/runtime/__init__.py new file mode 100644 index 00000000..0e480b16 --- /dev/null +++ b/workloads/chbenchmark/py-tpcc/pytpcc/runtime/__init__.py @@ -0,0 +1,3 @@ +# -*- coding: utf-8 -*- + +__all__ = ["executor", "loader"] diff --git a/workloads/chbenchmark/py-tpcc/pytpcc/runtime/executor.py b/workloads/chbenchmark/py-tpcc/pytpcc/runtime/executor.py new file mode 100644 index 00000000..af25ba8d --- /dev/null +++ b/workloads/chbenchmark/py-tpcc/pytpcc/runtime/executor.py @@ -0,0 +1,316 @@ +# -*- coding: utf-8 -*- +# ----------------------------------------------------------------------- +# Copyright (C) 2011 +# Andy Pavlo +# http://www.cs.brown.edu/~pavlo/ +# +# Original Java Version: +# Copyright (C) 2008 +# Evan Jones +# Massachusetts Institute of Technology +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# ----------------------------------------------------------------------- + +import sys +import multiprocessing +import time +import random +import traceback +import logging +from datetime import datetime +from pprint import pprint, pformat + +import constants +from util import * + + +class Executor: + + def __init__(self, driver, scaleParameters, stop_on_error=False): + self.driver = driver + self.scaleParameters = scaleParameters + self.stop_on_error = stop_on_error + + ## DEF + + def execute(self, duration): + r = results.Results() + assert r + logging.info("Executing benchmark for %d seconds" % duration) + start = r.startBenchmark() + debug = logging.getLogger().isEnabledFor(logging.DEBUG) + + while (time.time() - start) <= duration: + txn, params = self.doOne() + txn_id = r.startTransaction(txn) + + if debug: + logging.debug("Executing '%s' transaction" % txn) + try: + val = self.driver.executeTransaction(txn, params) + except KeyboardInterrupt: + return -1 + except (Exception, AssertionError) as ex: + logging.warn("Failed to execute Transaction '%s': %s" % (txn, ex)) + if debug: + traceback.print_exc(file=sys.stdout) + if self.stop_on_error: + raise + r.abortTransaction(txn_id) + continue + + # if debug: logging.debug("%s\nParameters:\n%s\nResult:\n%s" % (txn, pformat(params), pformat(val))) + + r.stopTransaction(txn_id) + ## WHILE + + r.stopBenchmark() + return r + + ## DEF + + def doOne(self): + """Selects and executes a transaction at random. The number of new order transactions executed per minute is the official "tpmC" metric. See TPC-C 5.4.2 (page 71).""" + + ## This is not strictly accurate: The requirement is for certain + ## *minimum* percentages to be maintained. This is close to the right + ## thing, but not precisely correct. See TPC-C 5.2.4 (page 68). + x = rand.number(1, 100) + params = None + txn = None + if x <= 4: ## 4% + txn, params = ( + constants.TransactionTypes.STOCK_LEVEL, + self.generateStockLevelParams(), + ) + elif x <= 4 + 4: ## 4% + txn, params = ( + constants.TransactionTypes.DELIVERY, + self.generateDeliveryParams(), + ) + elif x <= 4 + 4 + 4: ## 4% + txn, params = ( + constants.TransactionTypes.ORDER_STATUS, + self.generateOrderStatusParams(), + ) + elif x <= 43 + 4 + 4 + 4: ## 43% + txn, params = ( + constants.TransactionTypes.PAYMENT, + self.generatePaymentParams(), + ) + else: ## 45% + assert x > 100 - 45 + txn, params = ( + constants.TransactionTypes.NEW_ORDER, + self.generateNewOrderParams(), + ) + + return (txn, params) + + ## DEF + + ## ---------------------------------------------- + ## generateDeliveryParams + ## ---------------------------------------------- + def generateDeliveryParams(self): + """Return parameters for DELIVERY""" + w_id = self.makeWarehouseId() + o_carrier_id = rand.number(constants.MIN_CARRIER_ID, constants.MAX_CARRIER_ID) + ol_delivery_d = datetime.now() + return makeParameterDict(locals(), "w_id", "o_carrier_id", "ol_delivery_d") + + ## DEF + + ## ---------------------------------------------- + ## generateNewOrderParams + ## ---------------------------------------------- + def generateNewOrderParams(self): + """Return parameters for NEW_ORDER""" + w_id = self.makeWarehouseId() + d_id = self.makeDistrictId() + c_id = self.makeCustomerId() + ol_cnt = rand.number(constants.MIN_OL_CNT, constants.MAX_OL_CNT) + o_entry_d = datetime.now() + + ## 1% of transactions roll back + rollback = False # FIXME rand.number(1, 100) == 1 + + i_ids = [] + i_w_ids = [] + i_qtys = [] + for i in range(0, ol_cnt): + if rollback and i + 1 == ol_cnt: + i_ids.append(self.scaleParameters.items + 1) + else: + i_id = self.makeItemId() + while i_id in i_ids: + i_id = self.makeItemId() + i_ids.append(i_id) + + ## 1% of items are from a remote warehouse + remote = rand.number(1, 100) == 1 + if self.scaleParameters.warehouses > 1 and remote: + i_w_ids.append( + rand.numberExcluding( + self.scaleParameters.starting_warehouse, + self.scaleParameters.ending_warehouse, + w_id, + ) + ) + else: + i_w_ids.append(w_id) + + i_qtys.append(rand.number(1, constants.MAX_OL_QUANTITY)) + ## FOR + + return makeParameterDict( + locals(), "w_id", "d_id", "c_id", "o_entry_d", "i_ids", "i_w_ids", "i_qtys" + ) + + ## DEF + + ## ---------------------------------------------- + ## generateOrderStatusParams + ## ---------------------------------------------- + def generateOrderStatusParams(self): + """Return parameters for ORDER_STATUS""" + w_id = self.makeWarehouseId() + d_id = self.makeDistrictId() + c_last = None + c_id = None + + ## 60%: order status by last name + if rand.number(1, 100) <= 60: + c_last = rand.makeRandomLastName(self.scaleParameters.customersPerDistrict) + + ## 40%: order status by id + else: + c_id = self.makeCustomerId() + + return makeParameterDict(locals(), "w_id", "d_id", "c_id", "c_last") + + ## DEF + + ## ---------------------------------------------- + ## generatePaymentParams + ## ---------------------------------------------- + def generatePaymentParams(self): + """Return parameters for PAYMENT""" + x = rand.number(1, 100) + y = rand.number(1, 100) + + w_id = self.makeWarehouseId() + d_id = self.makeDistrictId() + c_w_id = None + c_d_id = None + c_id = None + c_last = None + h_amount = rand.fixedPoint(2, constants.MIN_PAYMENT, constants.MAX_PAYMENT) + h_date = datetime.now() + + ## 85%: paying through own warehouse (or there is only 1 warehouse) + if self.scaleParameters.warehouses == 1 or x <= 85: + c_w_id = w_id + c_d_id = d_id + ## 15%: paying through another warehouse: + else: + ## select in range [1, num_warehouses] excluding w_id + c_w_id = rand.numberExcluding( + self.scaleParameters.starting_warehouse, + self.scaleParameters.ending_warehouse, + w_id, + ) + assert c_w_id != w_id + c_d_id = self.makeDistrictId() + + ## 60%: payment by last name + if y <= 60: + c_last = rand.makeRandomLastName(self.scaleParameters.customersPerDistrict) + ## 40%: payment by id + else: + assert y > 60 + c_id = self.makeCustomerId() + + return makeParameterDict( + locals(), + "w_id", + "d_id", + "h_amount", + "c_w_id", + "c_d_id", + "c_id", + "c_last", + "h_date", + ) + + ## DEF + + ## ---------------------------------------------- + ## generateStockLevelParams + ## ---------------------------------------------- + def generateStockLevelParams(self): + """Returns parameters for STOCK_LEVEL""" + w_id = self.makeWarehouseId() + d_id = self.makeDistrictId() + threshold = rand.number( + constants.MIN_STOCK_LEVEL_THRESHOLD, constants.MAX_STOCK_LEVEL_THRESHOLD + ) + return makeParameterDict(locals(), "w_id", "d_id", "threshold") + + ## DEF + + def makeWarehouseId(self): + w_id = rand.number( + self.scaleParameters.starting_warehouse, + self.scaleParameters.ending_warehouse, + ) + assert w_id >= self.scaleParameters.starting_warehouse, ( + "Invalid W_ID: %d" % w_id + ) + assert w_id <= self.scaleParameters.ending_warehouse, "Invalid W_ID: %d" % w_id + return w_id + + ## DEF + + def makeDistrictId(self): + return rand.number(1, self.scaleParameters.districtsPerWarehouse) + + ## DEF + + def makeCustomerId(self): + return rand.NURand(1023, 1, self.scaleParameters.customersPerDistrict) + + ## DEF + + def makeItemId(self): + return rand.NURand(8191, 1, self.scaleParameters.items) + + ## DEF + + +## CLASS + + +def makeParameterDict(values, *args): + return dict(map(lambda x: (x, values[x]), args)) + + +## DEF diff --git a/workloads/chbenchmark/py-tpcc/pytpcc/runtime/loader.py b/workloads/chbenchmark/py-tpcc/pytpcc/runtime/loader.py new file mode 100644 index 00000000..1a196f58 --- /dev/null +++ b/workloads/chbenchmark/py-tpcc/pytpcc/runtime/loader.py @@ -0,0 +1,518 @@ +# -*- coding: utf-8 -*- +# ----------------------------------------------------------------------- +# Copyright (C) 2011 +# Andy Pavlo +# http:##www.cs.brown.edu/~pavlo/ +# +# Original Java Version: +# Copyright (C) 2008 +# Evan Jones +# Massachusetts Institute of Technology +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# ----------------------------------------------------------------------- + +import os +import sys + +import logging +from datetime import datetime +from random import shuffle +from pprint import pprint, pformat + +import constants +from util import * + + +class Loader: + + def __init__(self, handle, scaleParameters, w_ids, needLoadItems): + self.handle = handle + self.scaleParameters = scaleParameters + self.w_ids = w_ids + self.needLoadItems = needLoadItems + self.batch_size = 2500 + + ## ============================================== + ## execute + ## ============================================== + def execute(self): + + ## Item Table + if self.needLoadItems: + logging.debug("Loading ITEM table") + self.loadItems() + self.handle.loadFinishItem() + + ## Then create the warehouse-specific tuples + for w_id in self.w_ids: + self.loadWarehouse(w_id) + self.handle.loadFinishWarehouse(w_id) + ## FOR + + return None + + ## ============================================== + ## loadItems + ## ============================================== + def loadItems(self): + ## Select 10% of the rows to be marked "original" + originalRows = rand.selectUniqueIds( + self.scaleParameters.items / 10, 1, self.scaleParameters.items + ) + + ## Load all of the items + tuples = [] + total_tuples = 0 + for i in range(1, self.scaleParameters.items + 1): + original = i in originalRows + tuples.append(self.generateItem(i, original)) + total_tuples += 1 + if len(tuples) == self.batch_size: + logging.debug( + "LOAD - %s: %5d / %d" + % ( + constants.TABLENAME_ITEM, + total_tuples, + self.scaleParameters.items, + ) + ) + self.handle.loadTuples(constants.TABLENAME_ITEM, tuples) + tuples = [] + ## FOR + if len(tuples) > 0: + logging.debug( + "LOAD - %s: %5d / %d" + % (constants.TABLENAME_ITEM, total_tuples, self.scaleParameters.items) + ) + self.handle.loadTuples(constants.TABLENAME_ITEM, tuples) + + ## DEF + + ## ============================================== + ## loadWarehouse + ## ============================================== + def loadWarehouse(self, w_id): + logging.debug( + "LOAD - %s: %d / %d" + % (constants.TABLENAME_WAREHOUSE, w_id, len(self.w_ids)) + ) + + ## WAREHOUSE + w_tuples = [self.generateWarehouse(w_id)] + self.handle.loadTuples(constants.TABLENAME_WAREHOUSE, w_tuples) + + ## DISTRICT + d_tuples = [] + for d_id in range(1, self.scaleParameters.districtsPerWarehouse + 1): + d_next_o_id = self.scaleParameters.customersPerDistrict + 1 + d_tuples = [self.generateDistrict(w_id, d_id, d_next_o_id)] + + c_tuples = [] + h_tuples = [] + + ## Select 10% of the customers to have bad credit + selectedRows = rand.selectUniqueIds( + self.scaleParameters.customersPerDistrict / 10, + 1, + self.scaleParameters.customersPerDistrict, + ) + + ## TPC-C 4.3.3.1. says that o_c_id should be a permutation of [1, 3000]. But since it + ## is a c_id field, it seems to make sense to have it be a permutation of the + ## customers. For the "real" thing this will be equivalent + cIdPermutation = [] + + for c_id in range(1, self.scaleParameters.customersPerDistrict + 1): + badCredit = c_id in selectedRows + c_tuples.append( + self.generateCustomer(w_id, d_id, c_id, badCredit, True) + ) + h_tuples.append(self.generateHistory(w_id, d_id, c_id)) + cIdPermutation.append(c_id) + ## FOR + assert cIdPermutation[0] == 1 + assert ( + cIdPermutation[self.scaleParameters.customersPerDistrict - 1] + == self.scaleParameters.customersPerDistrict + ) + shuffle(cIdPermutation) + + o_tuples = [] + ol_tuples = [] + no_tuples = [] + + for o_id in range(1, self.scaleParameters.customersPerDistrict + 1): + o_ol_cnt = rand.number(constants.MIN_OL_CNT, constants.MAX_OL_CNT) + + ## The last newOrdersPerDistrict are new orders + newOrder = ( + self.scaleParameters.customersPerDistrict + - self.scaleParameters.newOrdersPerDistrict + ) < o_id + o_tuples.append( + self.generateOrder( + w_id, d_id, o_id, cIdPermutation[o_id - 1], o_ol_cnt, newOrder + ) + ) + + ## Generate each OrderLine for the order + for ol_number in range(0, o_ol_cnt): + ol_tuples.append( + self.generateOrderLine( + w_id, + d_id, + o_id, + ol_number, + self.scaleParameters.items, + newOrder, + ) + ) + ## FOR + + ## This is a new order: make one for it + if newOrder: + no_tuples.append([o_id, d_id, w_id]) + ## FOR + + self.handle.loadTuples(constants.TABLENAME_DISTRICT, d_tuples) + self.handle.loadTuples(constants.TABLENAME_CUSTOMER, c_tuples) + self.handle.loadTuples(constants.TABLENAME_ORDERS, o_tuples) + self.handle.loadTuples(constants.TABLENAME_ORDER_LINE, ol_tuples) + self.handle.loadTuples(constants.TABLENAME_NEW_ORDER, no_tuples) + self.handle.loadTuples(constants.TABLENAME_HISTORY, h_tuples) + self.handle.loadFinishDistrict(w_id, d_id) + ## FOR + + ## Select 10% of the stock to be marked "original" + s_tuples = [] + selectedRows = rand.selectUniqueIds( + self.scaleParameters.items / 10, 1, self.scaleParameters.items + ) + total_tuples = 0 + for i_id in range(1, self.scaleParameters.items + 1): + original = i_id in selectedRows + s_tuples.append(self.generateStock(w_id, i_id, original)) + if len(s_tuples) >= self.batch_size: + logging.debug( + "LOAD - %s [W_ID=%d]: %5d / %d" + % ( + constants.TABLENAME_STOCK, + w_id, + total_tuples, + self.scaleParameters.items, + ) + ) + self.handle.loadTuples(constants.TABLENAME_STOCK, s_tuples) + s_tuples = [] + total_tuples += 1 + ## FOR + if len(s_tuples) > 0: + logging.debug( + "LOAD - %s [W_ID=%d]: %5d / %d" + % ( + constants.TABLENAME_STOCK, + w_id, + total_tuples, + self.scaleParameters.items, + ) + ) + self.handle.loadTuples(constants.TABLENAME_STOCK, s_tuples) + + ## DEF + + ## ============================================== + ## generateItem + ## ============================================== + def generateItem(self, id, original): + i_id = id + i_im_id = rand.number(constants.MIN_IM, constants.MAX_IM) + i_name = rand.astring(constants.MIN_I_NAME, constants.MAX_I_NAME) + i_price = rand.fixedPoint( + constants.MONEY_DECIMALS, constants.MIN_PRICE, constants.MAX_PRICE + ) + i_data = rand.astring(constants.MIN_I_DATA, constants.MAX_I_DATA) + if original: + i_data = self.fillOriginal(i_data) + + return [i_id, i_im_id, i_name, i_price, i_data] + + ## DEF + + ## ============================================== + ## generateWarehouse + ## ============================================== + def generateWarehouse(self, w_id): + w_tax = self.generateTax() + w_ytd = constants.INITIAL_W_YTD + w_address = self.generateAddress() + return [w_id] + w_address + [w_tax, w_ytd] + + ## DEF + + ## ============================================== + ## generateDistrict + ## ============================================== + def generateDistrict(self, d_w_id, d_id, d_next_o_id): + d_tax = self.generateTax() + d_ytd = constants.INITIAL_D_YTD + d_address = self.generateAddress() + return [d_id, d_w_id] + d_address + [d_tax, d_ytd, d_next_o_id] + + ## DEF + + ## ============================================== + ## generateCustomer + ## ============================================== + def generateCustomer(self, c_w_id, c_d_id, c_id, badCredit, doesReplicateName): + c_first = rand.astring(constants.MIN_FIRST, constants.MAX_FIRST) + c_middle = constants.MIDDLE + + assert 1 <= c_id and c_id <= constants.CUSTOMERS_PER_DISTRICT + if c_id <= 1000: + c_last = rand.makeLastName(c_id - 1) + else: + c_last = rand.makeRandomLastName(constants.CUSTOMERS_PER_DISTRICT) + + c_phone = rand.nstring(constants.PHONE, constants.PHONE) + c_since = datetime.now() + c_credit = constants.BAD_CREDIT if badCredit else constants.GOOD_CREDIT + c_credit_lim = constants.INITIAL_CREDIT_LIM + c_discount = rand.fixedPoint( + constants.DISCOUNT_DECIMALS, constants.MIN_DISCOUNT, constants.MAX_DISCOUNT + ) + c_balance = constants.INITIAL_BALANCE + c_ytd_payment = constants.INITIAL_YTD_PAYMENT + c_payment_cnt = constants.INITIAL_PAYMENT_CNT + c_delivery_cnt = constants.INITIAL_DELIVERY_CNT + c_data = rand.astring(constants.MIN_C_DATA, constants.MAX_C_DATA) + + c_street1 = rand.astring(constants.MIN_STREET, constants.MAX_STREET) + c_street2 = rand.astring(constants.MIN_STREET, constants.MAX_STREET) + c_city = rand.astring(constants.MIN_CITY, constants.MAX_CITY) + c_state = rand.astring(constants.STATE, constants.STATE) + c_zip = self.generateZip() + + return [ + c_id, + c_d_id, + c_w_id, + c_first, + c_middle, + c_last, + c_street1, + c_street2, + c_city, + c_state, + c_zip, + c_phone, + c_since, + c_credit, + c_credit_lim, + c_discount, + c_balance, + c_ytd_payment, + c_payment_cnt, + c_delivery_cnt, + c_data, + ] + + ## DEF + + ## ============================================== + ## generateOrder + ## ============================================== + def generateOrder(self, o_w_id, o_d_id, o_id, o_c_id, o_ol_cnt, newOrder): + """Returns the generated o_ol_cnt value.""" + o_entry_d = datetime.now() + o_carrier_id = ( + constants.NULL_CARRIER_ID + if newOrder + else rand.number(constants.MIN_CARRIER_ID, constants.MAX_CARRIER_ID) + ) + o_all_local = constants.INITIAL_ALL_LOCAL + return [ + o_id, + o_c_id, + o_d_id, + o_w_id, + o_entry_d, + o_carrier_id, + o_ol_cnt, + o_all_local, + ] + + ## DEF + + ## ============================================== + ## generateOrderLine + ## ============================================== + def generateOrderLine( + self, ol_w_id, ol_d_id, ol_o_id, ol_number, max_items, newOrder + ): + ol_i_id = rand.number(1, max_items) + ol_supply_w_id = ol_w_id + ol_delivery_d = datetime.now() + ol_quantity = constants.INITIAL_QUANTITY + + ## 1% of items are from a remote warehouse + remote = rand.number(1, 100) == 1 + if self.scaleParameters.warehouses > 1 and remote: + ol_supply_w_id = rand.numberExcluding( + self.scaleParameters.starting_warehouse, + self.scaleParameters.ending_warehouse, + ol_w_id, + ) + + if newOrder == False: + ol_amount = 0.00 + else: + ol_amount = rand.fixedPoint( + constants.MONEY_DECIMALS, + constants.MIN_AMOUNT, + constants.MAX_PRICE * constants.MAX_OL_QUANTITY, + ) + ol_delivery_d = None + ol_dist_info = rand.astring(constants.DIST, constants.DIST) + + return [ + ol_o_id, + ol_d_id, + ol_w_id, + ol_number, + ol_i_id, + ol_supply_w_id, + ol_delivery_d, + ol_quantity, + ol_amount, + ol_dist_info, + ] + + ## DEF + + ## ============================================== + ## generateStock + ## ============================================== + def generateStock(self, s_w_id, s_i_id, original): + s_quantity = rand.number(constants.MIN_QUANTITY, constants.MAX_QUANTITY) + s_ytd = 0 + s_order_cnt = 0 + s_remote_cnt = 0 + + s_data = rand.astring(constants.MIN_I_DATA, constants.MAX_I_DATA) + if original: + self.fillOriginal(s_data) + + s_dists = [] + for i in range(0, constants.DISTRICTS_PER_WAREHOUSE): + s_dists.append(rand.astring(constants.DIST, constants.DIST)) + + return ( + [s_i_id, s_w_id, s_quantity] + + s_dists + + [s_ytd, s_order_cnt, s_remote_cnt, s_data] + ) + + ## DEF + + ## ============================================== + ## generateHistory + ## ============================================== + def generateHistory(self, h_c_w_id, h_c_d_id, h_c_id): + h_w_id = h_c_w_id + h_d_id = h_c_d_id + h_date = datetime.now() + h_amount = constants.INITIAL_AMOUNT + h_data = rand.astring(constants.MIN_DATA, constants.MAX_DATA) + return [h_c_id, h_c_d_id, h_c_w_id, h_d_id, h_w_id, h_date, h_amount, h_data] + + ## DEF + + ## ============================================== + ## generateAddress + ## ============================================== + def generateAddress(self): + """ + Returns a name and a street address + Used by both generateWarehouse and generateDistrict. + """ + name = rand.astring(constants.MIN_NAME, constants.MAX_NAME) + return [name] + self.generateStreetAddress() + + ## DEF + + ## ============================================== + ## generateStreetAddress + ## ============================================== + def generateStreetAddress(self): + """ + Returns a list for a street address + Used for warehouses, districts and customers. + """ + street1 = rand.astring(constants.MIN_STREET, constants.MAX_STREET) + street2 = rand.astring(constants.MIN_STREET, constants.MAX_STREET) + city = rand.astring(constants.MIN_CITY, constants.MAX_CITY) + state = rand.astring(constants.STATE, constants.STATE) + zip = self.generateZip() + + return [street1, street2, city, state, zip] + + ## DEF + + ## ============================================== + ## generateTax + ## ============================================== + def generateTax(self): + return rand.fixedPoint( + constants.TAX_DECIMALS, constants.MIN_TAX, constants.MAX_TAX + ) + + ## DEF + + ## ============================================== + ## generateZip + ## ============================================== + def generateZip(self): + length = constants.ZIP_LENGTH - len(constants.ZIP_SUFFIX) + return rand.nstring(length, length) + constants.ZIP_SUFFIX + + ## DEF + + ## ============================================== + ## fillOriginal + ## ============================================== + def fillOriginal(self, data): + """ + a string with ORIGINAL_STRING at a random position + """ + originalLength = len(constants.ORIGINAL_STRING) + position = rand.number(0, len(data) - originalLength) + out = ( + data[:position] + + constants.ORIGINAL_STRING + + data[position + originalLength :] + ) + assert len(out) == len(data) + return out + + ## DEF + + +## CLASS diff --git a/workloads/chbenchmark/py-tpcc/pytpcc/tpcc.py b/workloads/chbenchmark/py-tpcc/pytpcc/tpcc.py new file mode 100755 index 00000000..923bf6fe --- /dev/null +++ b/workloads/chbenchmark/py-tpcc/pytpcc/tpcc.py @@ -0,0 +1,361 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# ----------------------------------------------------------------------- +# Copyright (C) 2011 +# Andy Pavlo +# http:##www.cs.brown.edu/~pavlo/ +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# ----------------------------------------------------------------------- + +import sys +import os +import string +import datetime +import logging +import re +import argparse +import glob +import time +import multiprocessing +from ConfigParser import SafeConfigParser +from pprint import pprint, pformat + +from util import * +from runtime import * +import drivers + +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s [%(funcName)s:%(lineno)03d] %(levelname)-5s: %(message)s", + datefmt="%m-%d-%Y %H:%M:%S", + stream=sys.stdout, +) + + +## ============================================== +## createDriverClass +## ============================================== +def createDriverClass(name): + full_name = "%sDriver" % name.title() + mod = __import__("drivers.%s" % full_name.lower(), globals(), locals(), [full_name]) + klass = getattr(mod, full_name) + return klass + + +## DEF + + +## ============================================== +## getDrivers +## ============================================== +def getDrivers(): + drivers = [] + for f in map( + lambda x: os.path.basename(x).replace("driver.py", ""), + glob.glob("./drivers/*driver.py"), + ): + if f != "abstract": + drivers.append(f) + return drivers + + +## DEF + + +## ============================================== +## startLoading +## ============================================== +def startLoading(driverClass, scaleParameters, args, config): + logging.debug("Creating client pool with %d processes" % args["clients"]) + pool = multiprocessing.Pool(args["clients"]) + debug = logging.getLogger().isEnabledFor(logging.DEBUG) + + # Split the warehouses into chunks + w_ids = map(lambda x: [], range(args["clients"])) + for w_id in range( + scaleParameters.starting_warehouse, scaleParameters.ending_warehouse + 1 + ): + idx = w_id % args["clients"] + w_ids[idx].append(w_id) + ## FOR + + loader_results = [] + for i in range(args["clients"]): + r = pool.apply_async( + loaderFunc, (driverClass, scaleParameters, args, config, w_ids[i], True) + ) + loader_results.append(r) + ## FOR + + pool.close() + logging.debug("Waiting for %d loaders to finish" % args["clients"]) + pool.join() + + +## DEF + + +## ============================================== +## loaderFunc +## ============================================== +def loaderFunc(driverClass, scaleParameters, args, config, w_ids, debug): + driver = driverClass(args["ddl"]) + assert driver != None + logging.debug( + "Starting client execution: %s [warehouses=%d]" % (driver, len(w_ids)) + ) + + config["load"] = True + config["execute"] = False + config["reset"] = False + driver.loadConfig(config) + + try: + loadItems = 1 in w_ids + l = loader.Loader(driver, scaleParameters, w_ids, loadItems) + driver.loadStart() + l.execute() + driver.loadFinish() + except KeyboardInterrupt: + return -1 + except (Exception, AssertionError) as ex: + logging.warn("Failed to load data: %s" % (ex)) + # if debug: + traceback.print_exc(file=sys.stdout) + raise + + +## DEF + + +## ============================================== +## startExecution +## ============================================== +def startExecution(driverClass, scaleParameters, args, config): + logging.debug("Creating client pool with %d processes" % args["clients"]) + pool = multiprocessing.Pool(args["clients"]) + debug = logging.getLogger().isEnabledFor(logging.DEBUG) + + worker_results = [] + for i in range(args["clients"]): + r = pool.apply_async( + executorFunc, + ( + driverClass, + scaleParameters, + args, + config, + debug, + ), + ) + worker_results.append(r) + ## FOR + pool.close() + pool.join() + + total_results = results.Results() + for asyncr in worker_results: + asyncr.wait() + r = asyncr.get() + assert r != None, "No results object returned!" + if type(r) == int and r == -1: + sys.exit(1) + total_results.append(r) + ## FOR + + return total_results + + +## DEF + + +## ============================================== +## executorFunc +## ============================================== +def executorFunc(driverClass, scaleParameters, args, config, debug): + driver = driverClass(args["ddl"]) + assert driver != None + logging.debug("Starting client execution: %s" % driver) + + config["execute"] = True + config["reset"] = False + driver.loadConfig(config) + + e = executor.Executor(driver, scaleParameters, stop_on_error=args["stop_on_error"]) + driver.executeStart() + results = e.execute(args["duration"]) + driver.executeFinish() + + return results + + +## DEF + +## ============================================== +## main +## ============================================== +if __name__ == "__main__": + aparser = argparse.ArgumentParser( + description="Python implementation of the TPC-C Benchmark" + ) + aparser.add_argument("system", choices=getDrivers(), help="Target system driver") + aparser.add_argument( + "--config", type=file, help="Path to driver configuration file" + ) + aparser.add_argument( + "--reset", + action="store_true", + help="Instruct the driver to reset the contents of the database", + ) + aparser.add_argument( + "--scalefactor", + default=1, + type=float, + metavar="SF", + help="Benchmark scale factor", + ) + aparser.add_argument( + "--warehouses", default=4, type=int, metavar="W", help="Number of Warehouses" + ) + aparser.add_argument( + "--duration", + default=60, + type=int, + metavar="D", + help="How long to run the benchmark in seconds", + ) + aparser.add_argument( + "--ddl", + default=os.path.realpath(os.path.join(os.path.dirname(__file__), "tpcc.sql")), + help="Path to the TPC-C DDL SQL file", + ) + aparser.add_argument( + "--clients", + default=1, + type=int, + metavar="N", + help="The number of blocking clients to fork", + ) + aparser.add_argument( + "--stop-on-error", + action="store_true", + help="Stop the transaction execution when the driver throws an exception.", + ) + aparser.add_argument( + "--no-load", action="store_true", help="Disable loading the data" + ) + aparser.add_argument( + "--no-execute", action="store_true", help="Disable executing the workload" + ) + aparser.add_argument( + "--print-config", + action="store_true", + help="Print out the default configuration file for the system and exit", + ) + aparser.add_argument( + "--debug", action="store_true", help="Enable debug log messages" + ) + args = vars(aparser.parse_args()) + + if args["debug"]: + logging.getLogger().setLevel(logging.DEBUG) + + ## Arguments validation + assert ( + args["reset"] == False or args["no_load"] == False + ), "'--reset' and '--no-load' are incompatible with each other" + + ## Create a handle to the target client driver + driverClass = createDriverClass(args["system"]) + assert driverClass != None, "Failed to find '%s' class" % args["system"] + driver = driverClass(args["ddl"]) + assert driver != None, "Failed to create '%s' driver" % args["system"] + if args["print_config"]: + config = driver.makeDefaultConfig() + print(driver.formatConfig(config)) + print + sys.exit(0) + + ## Load Configuration file + if args["config"]: + logging.debug("Loading configuration file '%s'" % args["config"]) + cparser = SafeConfigParser() + cparser.read(os.path.realpath(args["config"].name)) + config = dict(cparser.items(args["system"])) + else: + logging.debug("Using default configuration for %s" % args["system"]) + defaultConfig = driver.makeDefaultConfig() + config = dict(map(lambda x: (x, defaultConfig[x][1]), defaultConfig.keys())) + config["reset"] = args["reset"] + config["load"] = False + config["execute"] = False + if config["reset"]: + logging.info("Reseting database") + driver.loadConfig(config) + logging.info("Initializing TPC-C benchmark using %s" % driver) + + ## Create ScaleParameters + scaleParameters = scaleparameters.makeWithScaleFactor( + args["warehouses"], args["scalefactor"] + ) + nurand = rand.setNURand(nurand.makeForLoad()) + if args["debug"]: + logging.debug("Scale Parameters:\n%s" % scaleParameters) + + ## DATA LOADER!!! + load_time = None + if not args["no_load"]: + logging.info("Loading TPC-C benchmark data using %s" % (driver)) + load_start = time.time() + if args["clients"] == 1: + l = loader.Loader( + driver, + scaleParameters, + range( + scaleParameters.starting_warehouse, + scaleParameters.ending_warehouse + 1, + ), + True, + ) + driver.loadStart() + l.execute() + driver.loadFinish() + else: + startLoading(driverClass, scaleParameters, args, config) + load_time = time.time() - load_start + ## IF + + ## WORKLOAD DRIVER!!! + if not args["no_execute"]: + if args["clients"] == 1: + e = executor.Executor( + driver, scaleParameters, stop_on_error=args["stop_on_error"] + ) + driver.executeStart() + results = e.execute(args["duration"]) + driver.executeFinish() + else: + results = startExecution(driverClass, scaleParameters, args, config) + assert results + print(results.show(load_time)) + ## IF + +## MAIN diff --git a/workloads/chbenchmark/py-tpcc/pytpcc/tpcc.sql b/workloads/chbenchmark/py-tpcc/pytpcc/tpcc.sql new file mode 100644 index 00000000..42f6dc28 --- /dev/null +++ b/workloads/chbenchmark/py-tpcc/pytpcc/tpcc.sql @@ -0,0 +1,140 @@ +CREATE TABLE WAREHOUSE ( + W_ID SMALLINT DEFAULT '0' NOT NULL, + W_NAME VARCHAR(16) DEFAULT NULL, + W_STREET_1 VARCHAR(32) DEFAULT NULL, + W_STREET_2 VARCHAR(32) DEFAULT NULL, + W_CITY VARCHAR(32) DEFAULT NULL, + W_STATE VARCHAR(2) DEFAULT NULL, + W_ZIP VARCHAR(9) DEFAULT NULL, + W_TAX FLOAT DEFAULT NULL, + W_YTD FLOAT DEFAULT NULL, + CONSTRAINT W_PK_ARRAY PRIMARY KEY (W_ID) +); + +CREATE TABLE DISTRICT ( + D_ID TINYINT DEFAULT '0' NOT NULL, + D_W_ID SMALLINT DEFAULT '0' NOT NULL REFERENCES WAREHOUSE (W_ID), + D_NAME VARCHAR(16) DEFAULT NULL, + D_STREET_1 VARCHAR(32) DEFAULT NULL, + D_STREET_2 VARCHAR(32) DEFAULT NULL, + D_CITY VARCHAR(32) DEFAULT NULL, + D_STATE VARCHAR(2) DEFAULT NULL, + D_ZIP VARCHAR(9) DEFAULT NULL, + D_TAX FLOAT DEFAULT NULL, + D_YTD FLOAT DEFAULT NULL, + D_NEXT_O_ID INT DEFAULT NULL, + PRIMARY KEY (D_W_ID,D_ID) +); + +CREATE TABLE ITEM ( + I_ID INTEGER DEFAULT '0' NOT NULL, + I_IM_ID INTEGER DEFAULT NULL, + I_NAME VARCHAR(32) DEFAULT NULL, + I_PRICE FLOAT DEFAULT NULL, + I_DATA VARCHAR(64) DEFAULT NULL, + CONSTRAINT I_PK_ARRAY PRIMARY KEY (I_ID) +); + +CREATE TABLE CUSTOMER ( + C_ID INTEGER DEFAULT '0' NOT NULL, + C_D_ID TINYINT DEFAULT '0' NOT NULL, + C_W_ID SMALLINT DEFAULT '0' NOT NULL, + C_FIRST VARCHAR(32) DEFAULT NULL, + C_MIDDLE VARCHAR(2) DEFAULT NULL, + C_LAST VARCHAR(32) DEFAULT NULL, + C_STREET_1 VARCHAR(32) DEFAULT NULL, + C_STREET_2 VARCHAR(32) DEFAULT NULL, + C_CITY VARCHAR(32) DEFAULT NULL, + C_STATE VARCHAR(2) DEFAULT NULL, + C_ZIP VARCHAR(9) DEFAULT NULL, + C_PHONE VARCHAR(32) DEFAULT NULL, + C_SINCE TIMESTAMP DEFAULT CURRENT_TIMESTAMP NOT NULL, + C_CREDIT VARCHAR(2) DEFAULT NULL, + C_CREDIT_LIM FLOAT DEFAULT NULL, + C_DISCOUNT FLOAT DEFAULT NULL, + C_BALANCE FLOAT DEFAULT NULL, + C_YTD_PAYMENT FLOAT DEFAULT NULL, + C_PAYMENT_CNT INTEGER DEFAULT NULL, + C_DELIVERY_CNT INTEGER DEFAULT NULL, + C_DATA VARCHAR(500), + PRIMARY KEY (C_W_ID,C_D_ID,C_ID), + UNIQUE (C_W_ID,C_D_ID,C_LAST,C_FIRST), + CONSTRAINT C_FKEY_D FOREIGN KEY (C_D_ID, C_W_ID) REFERENCES DISTRICT (D_ID, D_W_ID) +); +CREATE INDEX IDX_CUSTOMER ON CUSTOMER (C_W_ID,C_D_ID,C_LAST); + +CREATE TABLE HISTORY ( + H_C_ID INTEGER DEFAULT NULL, + H_C_D_ID TINYINT DEFAULT NULL, + H_C_W_ID SMALLINT DEFAULT NULL, + H_D_ID TINYINT DEFAULT NULL, + H_W_ID SMALLINT DEFAULT '0' NOT NULL, + H_DATE TIMESTAMP DEFAULT CURRENT_TIMESTAMP NOT NULL, + H_AMOUNT FLOAT DEFAULT NULL, + H_DATA VARCHAR(32) DEFAULT NULL, + CONSTRAINT H_FKEY_C FOREIGN KEY (H_C_ID, H_C_D_ID, H_C_W_ID) REFERENCES CUSTOMER (C_ID, C_D_ID, C_W_ID), + CONSTRAINT H_FKEY_D FOREIGN KEY (H_D_ID, H_W_ID) REFERENCES DISTRICT (D_ID, D_W_ID) +); + +CREATE TABLE STOCK ( + S_I_ID INTEGER DEFAULT '0' NOT NULL REFERENCES ITEM (I_ID), + S_W_ID SMALLINT DEFAULT '0 ' NOT NULL REFERENCES WAREHOUSE (W_ID), + S_QUANTITY INTEGER DEFAULT '0' NOT NULL, + S_DIST_01 VARCHAR(32) DEFAULT NULL, + S_DIST_02 VARCHAR(32) DEFAULT NULL, + S_DIST_03 VARCHAR(32) DEFAULT NULL, + S_DIST_04 VARCHAR(32) DEFAULT NULL, + S_DIST_05 VARCHAR(32) DEFAULT NULL, + S_DIST_06 VARCHAR(32) DEFAULT NULL, + S_DIST_07 VARCHAR(32) DEFAULT NULL, + S_DIST_08 VARCHAR(32) DEFAULT NULL, + S_DIST_09 VARCHAR(32) DEFAULT NULL, + S_DIST_10 VARCHAR(32) DEFAULT NULL, + S_YTD INTEGER DEFAULT NULL, + S_ORDER_CNT INTEGER DEFAULT NULL, + S_REMOTE_CNT INTEGER DEFAULT NULL, + S_DATA VARCHAR(64) DEFAULT NULL, + PRIMARY KEY (S_W_ID,S_I_ID) +); + +CREATE TABLE ORDERS ( + O_ID INTEGER DEFAULT '0' NOT NULL, + O_C_ID INTEGER DEFAULT NULL, + O_D_ID TINYINT DEFAULT '0' NOT NULL, + O_W_ID SMALLINT DEFAULT '0' NOT NULL, + O_ENTRY_D TIMESTAMP DEFAULT CURRENT_TIMESTAMP NOT NULL, + O_CARRIER_ID INTEGER DEFAULT NULL, + O_OL_CNT INTEGER DEFAULT NULL, + O_ALL_LOCAL INTEGER DEFAULT NULL, + PRIMARY KEY (O_W_ID,O_D_ID,O_ID), + UNIQUE (O_W_ID,O_D_ID,O_C_ID,O_ID), + CONSTRAINT O_FKEY_C FOREIGN KEY (O_C_ID, O_D_ID, O_W_ID) REFERENCES CUSTOMER (C_ID, C_D_ID, C_W_ID) +); +CREATE INDEX IDX_ORDERS ON ORDERS (O_W_ID,O_D_ID,O_C_ID); + +CREATE TABLE NEW_ORDER ( + NO_O_ID INTEGER DEFAULT '0' NOT NULL, + NO_D_ID TINYINT DEFAULT '0' NOT NULL, + NO_W_ID SMALLINT DEFAULT '0' NOT NULL, + CONSTRAINT NO_PK_TREE PRIMARY KEY (NO_D_ID,NO_W_ID,NO_O_ID), + CONSTRAINT NO_FKEY_O FOREIGN KEY (NO_O_ID, NO_D_ID, NO_W_ID) REFERENCES ORDERS (O_ID, O_D_ID, O_W_ID) +); + +CREATE TABLE ORDER_LINE ( + OL_O_ID INTEGER DEFAULT '0' NOT NULL, + OL_D_ID TINYINT DEFAULT '0' NOT NULL, + OL_W_ID SMALLINT DEFAULT '0' NOT NULL, + OL_NUMBER INTEGER DEFAULT '0' NOT NULL, + OL_I_ID INTEGER DEFAULT NULL, + OL_SUPPLY_W_ID SMALLINT DEFAULT NULL, + OL_DELIVERY_D TIMESTAMP DEFAULT NULL, + OL_QUANTITY INTEGER DEFAULT NULL, + OL_AMOUNT FLOAT DEFAULT NULL, + OL_DIST_INFO VARCHAR(32) DEFAULT NULL, + PRIMARY KEY (OL_W_ID,OL_D_ID,OL_O_ID,OL_NUMBER), + CONSTRAINT OL_FKEY_O FOREIGN KEY (OL_O_ID, OL_D_ID, OL_W_ID) REFERENCES ORDERS (O_ID, O_D_ID, O_W_ID), + CONSTRAINT OL_FKEY_S FOREIGN KEY (OL_I_ID, OL_SUPPLY_W_ID) REFERENCES STOCK (S_I_ID, S_W_ID) +); +--CREATE INDEX IDX_ORDER_LINE_3COL ON ORDER_LINE (OL_W_ID,OL_D_ID,OL_O_ID); +--CREATE INDEX IDX_ORDER_LINE_2COL ON ORDER_LINE (OL_W_ID,OL_D_ID); +CREATE INDEX IDX_ORDER_LINE_TREE ON ORDER_LINE (OL_W_ID,OL_D_ID,OL_O_ID); diff --git a/workloads/chbenchmark/py-tpcc/pytpcc/util/__init__.py b/workloads/chbenchmark/py-tpcc/pytpcc/util/__init__.py new file mode 100644 index 00000000..242a6b44 --- /dev/null +++ b/workloads/chbenchmark/py-tpcc/pytpcc/util/__init__.py @@ -0,0 +1,3 @@ +# -*- coding: utf-8 -*- + +__all__ = ["scaleparameters", "rand", "nurand", "results"] diff --git a/workloads/chbenchmark/py-tpcc/pytpcc/util/nurand.py b/workloads/chbenchmark/py-tpcc/pytpcc/util/nurand.py new file mode 100644 index 00000000..7127427a --- /dev/null +++ b/workloads/chbenchmark/py-tpcc/pytpcc/util/nurand.py @@ -0,0 +1,65 @@ +# -*- coding: utf-8 -*- +# ----------------------------------------------------------------------- +# Copyright (C) 2011 +# Andy Pavlo +# http://www.cs.brown.edu/~pavlo/ +# +# Original Java Version: +# Copyright (C) 2008 +# Evan Jones +# Massachusetts Institute of Technology +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# ----------------------------------------------------------------------- + +import rand + + +def makeForLoad(): + """Create random NURand constants, appropriate for loading the database.""" + cLast = rand.number(0, 255) + cId = rand.number(0, 1023) + orderLineItemId = rand.number(0, 8191) + return NURandC(cLast, cId, orderLineItemId) + + +def validCRun(cRun, cLoad): + """Returns true if the cRun value is valid for running. See TPC-C 2.1.6.1 (page 20)""" + cDelta = abs(cRun - cLoad) + return 65 <= cDelta and cDelta <= 119 and cDelta != 96 and cDelta != 112 + + +def makeForRun(loadC): + """Create random NURand constants for running TPC-C. TPC-C 2.1.6.1. (page 20) specifies the valid range for these constants.""" + cRun = rand.number(0, 255) + while validCRun(cRun, loadC.cLast) == False: + cRun = rand.number(0, 255) + assert validCRun(cRun, loadC.cLast) + + cId = rand.number(0, 1023) + orderLineItemId = rand.number(0, 8191) + return NURandC(cRun, cId, orderLineItemId) + + +class NURandC: + def __init__(self, cLast, cId, orderLineItemId): + self.cLast = cLast + self.cId = cId + self.orderLineItemId = orderLineItemId diff --git a/workloads/chbenchmark/py-tpcc/pytpcc/util/rand.py b/workloads/chbenchmark/py-tpcc/pytpcc/util/rand.py new file mode 100644 index 00000000..faefbdcb --- /dev/null +++ b/workloads/chbenchmark/py-tpcc/pytpcc/util/rand.py @@ -0,0 +1,188 @@ +# -*- coding: utf-8 -*- +# ----------------------------------------------------------------------- +# Copyright (C) 2011 +# Andy Pavlo +# http://www.cs.brown.edu/~pavlo/ +# +# Original Java Version: +# Copyright (C) 2008 +# Evan Jones +# Massachusetts Institute of Technology +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# ----------------------------------------------------------------------- + +import random +import nurand + +SYLLABLES = [ + "BAR", + "OUGHT", + "ABLE", + "PRI", + "PRES", + "ESE", + "ANTI", + "CALLY", + "ATION", + "EING", +] + +nurandVar = None # NURand + + +def setNURand(nu): + global nurandVar + nurandVar = nu + + +## DEF + + +def NURand(a, x, y): + """A non-uniform random number, as defined by TPC-C 2.1.6. (page 20).""" + global nurandVar + assert x <= y + if nurandVar is None: + setNURand(nurand.makeForLoad()) + + if a == 255: + c = nurandVar.cLast + elif a == 1023: + c = nurandVar.cId + elif a == 8191: + c = nurandVar.orderLineItemId + else: + raise Exception("a = " + a + " is not a supported value") + + return (((number(0, a) | number(x, y)) + c) % (y - x + 1)) + x + + +## DEF + + +def number(minimum, maximum): + value = random.randint(minimum, maximum) + assert minimum <= value and value <= maximum + return value + + +## DEF + + +def numberExcluding(minimum, maximum, excluding): + """An in the range [minimum, maximum], excluding excluding.""" + assert minimum < maximum + assert minimum <= excluding and excluding <= maximum + + ## Generate 1 less number than the range + num = number(minimum, maximum - 1) + + ## Adjust the numbers to remove excluding + if num >= excluding: + num += 1 + assert minimum <= num and num <= maximum and num != excluding + return num + + +## DEF + + +def fixedPoint(decimal_places, minimum, maximum): + assert decimal_places > 0 + assert minimum < maximum + + multiplier = 1 + for i in range(0, decimal_places): + multiplier *= 10 + + int_min = int(minimum * multiplier + 0.5) + int_max = int(maximum * multiplier + 0.5) + + return float(number(int_min, int_max) / float(multiplier)) + + +## DEF + + +def selectUniqueIds(numUnique, minimum, maximum): + rows = set() + for i in range(0, numUnique): + index = None + while index == None or index in rows: + index = number(minimum, maximum) + ## WHILE + rows.add(index) + ## FOR + assert len(rows) == numUnique + return rows + + +## DEF + + +def astring(minimum_length, maximum_length): + """A random alphabetic string with length in range [minimum_length, maximum_length].""" + return randomString(minimum_length, maximum_length, "a", 26) + + +## DEF + + +def nstring(minimum_length, maximum_length): + """A random numeric string with length in range [minimum_length, maximum_length].""" + return randomString(minimum_length, maximum_length, "0", 10) + + +## DEF + + +def randomString(minimum_length, maximum_length, base, numCharacters): + length = number(minimum_length, maximum_length) + baseByte = ord(base) + string = "" + for i in range(length): + string += chr(baseByte + number(0, numCharacters - 1)) + return string + + +## DEF + + +def makeLastName(number): + """A last name as defined by TPC-C 4.3.2.3. Not actually random.""" + global SYLLABLES + assert 0 <= number and number <= 999 + indicies = [number / 100, (number / 10) % 10, number % 10] + return "".join(map(lambda x: SYLLABLES[x], indicies)) + + +## DEF + + +def makeRandomLastName(maxCID): + """A non-uniform random last name, as defined by TPC-C 4.3.2.3. The name will be limited to maxCID.""" + min_cid = 999 + if (maxCID - 1) < min_cid: + min_cid = maxCID - 1 + return makeLastName(NURand(255, 0, min_cid)) + + +## DEF diff --git a/workloads/chbenchmark/py-tpcc/pytpcc/util/results.py b/workloads/chbenchmark/py-tpcc/pytpcc/util/results.py new file mode 100644 index 00000000..99e32bab --- /dev/null +++ b/workloads/chbenchmark/py-tpcc/pytpcc/util/results.py @@ -0,0 +1,133 @@ +# -*- coding: utf-8 -*- +# ----------------------------------------------------------------------- +# Copyright (C) 2011 +# Andy Pavlo +# http://www.cs.brown.edu/~pavlo/ +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# ----------------------------------------------------------------------- + +import logging +import time + + +class Results: + + def __init__(self): + self.start = None + self.stop = None + self.txn_id = 0 + + self.txn_counters = {} + self.txn_times = {} + self.running = {} + + def startBenchmark(self): + """Mark the benchmark as having been started""" + assert self.start == None + logging.debug("Starting benchmark statistics collection") + self.start = time.time() + return self.start + + def stopBenchmark(self): + """Mark the benchmark as having been stopped""" + assert self.start != None + assert self.stop == None + logging.debug("Stopping benchmark statistics collection") + self.stop = time.time() + + def startTransaction(self, txn): + self.txn_id += 1 + id = self.txn_id + self.running[id] = (txn, time.time()) + return id + + def abortTransaction(self, id): + """Abort a transaction and discard its times""" + assert id in self.running + txn_name, txn_start = self.running[id] + del self.running[id] + + def stopTransaction(self, id): + """Record that the benchmark completed an invocation of the given transaction""" + assert id in self.running + txn_name, txn_start = self.running[id] + del self.running[id] + + duration = time.time() - txn_start + total_time = self.txn_times.get(txn_name, 0) + self.txn_times[txn_name] = total_time + duration + + total_cnt = self.txn_counters.get(txn_name, 0) + self.txn_counters[txn_name] = total_cnt + 1 + + def append(self, r): + for txn_name in r.txn_counters.keys(): + orig_cnt = self.txn_counters.get(txn_name, 0) + orig_time = self.txn_times.get(txn_name, 0) + + self.txn_counters[txn_name] = orig_cnt + r.txn_counters[txn_name] + self.txn_times[txn_name] = orig_time + r.txn_times[txn_name] + # logging.debug("%s [cnt=%d, time=%d]" % (txn_name, self.txn_counters[txn_name], self.txn_times[txn_name])) + ## HACK + self.start = r.start + self.stop = r.stop + + def __str__(self): + return self.show() + + def show(self, load_time=None): + if self.start == None: + return "Benchmark not started" + if self.stop == None: + duration = time.time() - self.start + else: + duration = self.stop - self.start + + col_width = 16 + total_width = (col_width * 4) + 2 + f = "\n " + (("%-" + str(col_width) + "s") * 4) + line = "-" * total_width + + ret = "" + "=" * total_width + "\n" + if load_time != None: + ret += "Data Loading Time: %d seconds\n\n" % (load_time) + + ret += "Execution Results after %d seconds\n%s" % (duration, line) + ret += f % ("", "Executed", "Time (µs)", "Rate") + + total_time = 0 + total_cnt = 0 + for txn in sorted(self.txn_counters.keys()): + txn_time = self.txn_times[txn] + txn_cnt = self.txn_counters[txn] + rate = "%.02f txn/s" % ((txn_cnt / txn_time)) + ret += f % (txn, str(txn_cnt), str(txn_time * 1000000), rate) + + total_time += txn_time + total_cnt += txn_cnt + ret += "\n" + ("-" * total_width) + total_rate = "%.02f txn/s" % ((total_cnt / total_time)) + ret += f % ("TOTAL", str(total_cnt), str(total_time * 1000000), total_rate) + + return ret.encode("utf-8") + + +## CLASS diff --git a/workloads/chbenchmark/py-tpcc/pytpcc/util/scaleparameters.py b/workloads/chbenchmark/py-tpcc/pytpcc/util/scaleparameters.py new file mode 100644 index 00000000..76a53028 --- /dev/null +++ b/workloads/chbenchmark/py-tpcc/pytpcc/util/scaleparameters.py @@ -0,0 +1,111 @@ +# -*- coding: utf-8 -*- + +# ----------------------------------------------------------------------- +# Copyright (C) 2011 +# Andy Pavlo +# http://www.cs.brown.edu/~pavlo/ +# +# Original Java Version: +# Copyright (C) 2008 +# Evan Jones +# Massachusetts Institute of Technology +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# ----------------------------------------------------------------------- + +import constants + + +def makeDefault(warehouses): + return ScaleParameters( + constants.NUM_ITEMS, + warehouses, + constants.DISTRICTS_PER_WAREHOUSE, + constants.CUSTOMERS_PER_DISTRICT, + constants.INITIAL_NEW_ORDERS_PER_DISTRICT, + ) + + +## DEF + + +def makeWithScaleFactor(warehouses, scaleFactor): + assert scaleFactor >= 1.0 + + items = int(constants.NUM_ITEMS / scaleFactor) + if items <= 0: + items = 1 + districts = int(max(constants.DISTRICTS_PER_WAREHOUSE, 1)) + customers = int(max(constants.CUSTOMERS_PER_DISTRICT / scaleFactor, 1)) + newOrders = int(max(constants.INITIAL_NEW_ORDERS_PER_DISTRICT / scaleFactor, 0)) + + return ScaleParameters(items, warehouses, districts, customers, newOrders) + + +## DEF + + +class ScaleParameters: + + def __init__( + self, + items, + warehouses, + districtsPerWarehouse, + customersPerDistrict, + newOrdersPerDistrict, + ): + assert 1 <= items and items <= constants.NUM_ITEMS + self.items = items + assert warehouses > 0 + self.warehouses = warehouses + self.starting_warehouse = 1 + assert ( + 1 <= districtsPerWarehouse + and districtsPerWarehouse <= constants.DISTRICTS_PER_WAREHOUSE + ) + self.districtsPerWarehouse = districtsPerWarehouse + assert ( + 1 <= customersPerDistrict + and customersPerDistrict <= constants.CUSTOMERS_PER_DISTRICT + ) + self.customersPerDistrict = customersPerDistrict + assert ( + 0 <= newOrdersPerDistrict + and newOrdersPerDistrict <= constants.CUSTOMERS_PER_DISTRICT + ) + assert newOrdersPerDistrict <= constants.INITIAL_NEW_ORDERS_PER_DISTRICT + self.newOrdersPerDistrict = newOrdersPerDistrict + self.ending_warehouse = self.warehouses + self.starting_warehouse - 1 + + ## DEF + + def __str__(self): + out = "%d items\n" % self.items + out += "%d warehouses\n" % self.warehouses + out += "%d districts/warehouse\n" % self.districtsPerWarehouse + out += "%d customers/district\n" % self.customersPerDistrict + out += "%d initial new orders/district" % self.newOrdersPerDistrict + return out + + ## DEF + + +## CLASS diff --git a/workloads/chbenchmark/py-tpcc/pytpcc/worker.py b/workloads/chbenchmark/py-tpcc/pytpcc/worker.py new file mode 100755 index 00000000..ee20516c --- /dev/null +++ b/workloads/chbenchmark/py-tpcc/pytpcc/worker.py @@ -0,0 +1,154 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# ----------------------------------------------------------------------- +# Copyright (C) 2011 +# Andy Pavlo & Yang Lu +# http:##www.cs.brown.edu/~pavlo/ +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# ----------------------------------------------------------------------- + +import sys +import os +import string +import datetime +import logging +import re +import argparse +import glob +import time +import message +import pickle +import traceback +from pprint import pprint, pformat + +from util import * +from runtime import * +import drivers + + +## ============================================== +## createDriverClass +## ============================================== +def createDriverClass(name): + full_name = "%sDriver" % name.title() + mod = __import__("drivers.%s" % full_name.lower(), globals(), locals(), [full_name]) + klass = getattr(mod, full_name) + return klass + + +## DEF + + +## ============================================== +## loaderFunc +## ============================================== +def loaderFunc(driverClass, scaleParameters, args, config, w_ids, debug): + driver = driverClass(args["ddl"]) + assert driver != None + logging.debug( + "Starting client execution: %s [warehouses=%d]" % (driver, len(w_ids)) + ) + + config["load"] = True + config["execute"] = False + config["reset"] = False + driver.loadConfig(config) + + try: + loadItems = 1 in w_ids + l = loader.Loader(driver, scaleParameters, w_ids, loadItems) + driver.loadStart() + l.execute() + driver.loadFinish() + except KeyboardInterrupt: + return -1 + except (Exception, AssertionError) as ex: + logging.warn("Failed to load data: %s" % (ex)) + # if debug: + traceback.print_exc(file=sys.stdout) + raise + + +## DEF + + +## ============================================== +## executorFunc +## ============================================== +def executorFunc(driverClass, scaleParameters, args, config, debug): + driver = driverClass(args["ddl"]) + assert driver != None + logging.debug("Starting client execution: %s" % driver) + + config["execute"] = True + config["reset"] = False + driver.loadConfig(config) + + e = executor.Executor(driver, scaleParameters, stop_on_error=args["stop_on_error"]) + driver.executeStart() + results = e.execute(args["duration"]) + driver.executeFinish() + + return results + + +## DEF + +## MAIN +if __name__ == "__channelexec__": + + driverClass = None + for item in channel: + command = pickle.loads(item) + if command.header == message.CMD_LOAD: + scaleParameters = command.data[0] + args = command.data[1] + config = command.data[2] + w_ids = command.data[3] + + ## Create a handle to the target client driver at the client side + driverClass = createDriverClass(args["system"]) + assert driverClass != None, "Failed to find '%s' class" % args["system"] + driver = driverClass(args["ddl"]) + assert driver != None, "Failed to create '%s' driver" % args["system"] + + loaderFunc(driverClass, scaleParameters, args, config, w_ids, True) + m = message.Message(header=message.LOAD_COMPLETED) + channel.send(pickle.dumps(m, -1)) + + elif command.header == message.CMD_EXECUTE: + scaleParameters = command.data[0] + args = command.data[1] + config = command.data[2] + + ## Create a handle to the target client driver at the client side + if driverClass == None: + driverClass = createDriverClass(args["system"]) + assert driverClass != None, "Failed to find '%s' class" % args["system"] + driver = driverClass(args["ddl"]) + assert driver != None, "Failed to create '%s' driver" % args["system"] + + results = executorFunc(driverClass, scaleParameters, args, config, True) + m = message.Message(header=message.EXECUTE_COMPLETED, data=results) + channel.send(pickle.dumps(m, -1)) + + elif command.header == message.CMD_STOP: + pass diff --git a/workloads/chbenchmark/py-tpcc/setup.py b/workloads/chbenchmark/py-tpcc/setup.py new file mode 100644 index 00000000..8a1779e5 --- /dev/null +++ b/workloads/chbenchmark/py-tpcc/setup.py @@ -0,0 +1,27 @@ +from setuptools import setup, find_packages +import sys, os + +version = "0.0" + +setup( + name="py-tpcc", + version=version, + description="Python implementation of the TPC-C benchmark", + long_description="""\ +""", + classifiers=[], # Get strings from http://pypi.python.org/pypi?%3Aaction=list_classifiers + keywords="", + author="Andy Pavlo", + author_email="pavlo@cs.brown.edu", + url="http://www.cs.brown.edu/~pavlo/", + license="BSD", + packages=find_packages(exclude=["ez_setup", "examples", "tests"]), + include_package_data=True, + zip_safe=False, + install_requires=[ + # -*- Extra requirements: -*- + ], + entry_points=""" + # -*- Entry points: -*- + """, +)