From 9474a89a42da42e6c19e0241737010a3aaa4b8cc Mon Sep 17 00:00:00 2001 From: Markos Markakis Date: Tue, 19 Dec 2023 19:24:43 +0200 Subject: [PATCH] Zipfian movie/theater IDs (#413) * Optionally pick zipfian movie and theater ids * Compare zipfian IDs to bounds * Add command line flags * Use store_true action --------- Co-authored-by: Geoffrey Yu --- workloads/IMDB_extended/run_transactions.py | 20 +++++++++++++++- .../workload_utils/transaction_worker.py | 24 +++++++++++++++---- 2 files changed, 39 insertions(+), 5 deletions(-) diff --git a/workloads/IMDB_extended/run_transactions.py b/workloads/IMDB_extended/run_transactions.py index fcf2e8f9..2bc49621 100644 --- a/workloads/IMDB_extended/run_transactions.py +++ b/workloads/IMDB_extended/run_transactions.py @@ -42,7 +42,12 @@ def noop_handler(_signal, _frame): signal.signal(signal.SIGINT, noop_handler) worker = TransactionWorker( - worker_idx, args.seed ^ worker_idx, args.scale_factor, args.dataset_type + worker_idx, + args.seed ^ worker_idx, + args.scale_factor, + args.dataset_type, + args.use_zipfian_ids, + args.zipfian_alpha, ) txn_prng = random.Random(~(args.seed ^ worker_idx)) @@ -281,6 +286,19 @@ def main(): help="This controls the range of reads the transaction worker performs, " "depending on the dataset size.", ) + parser.add_argument( + "--use-zipfian-ids", + action="store_true", + help="Whether the transaction worker should draw movie and theatre IDs " + "from a Zipfian distribution.", + ) + parser.add_argument( + "--zipfian-alpha", + type=float, + default=1.1, + help="The alpha parameter for the Zipfian distribution. Only used if " + "--use-zipfian-ids is `True`. Must be strictly greater than 1. ", + ) # These three arguments are used for the day long experiment. parser.add_argument( "--num-client-path", diff --git a/workloads/IMDB_extended/workload_utils/transaction_worker.py b/workloads/IMDB_extended/workload_utils/transaction_worker.py index 7a202652..d73a5393 100644 --- a/workloads/IMDB_extended/workload_utils/transaction_worker.py +++ b/workloads/IMDB_extended/workload_utils/transaction_worker.py @@ -1,4 +1,5 @@ import random +import numpy as np import logging from datetime import datetime, timedelta from typing import List, Tuple, Any @@ -26,9 +27,15 @@ def __init__( seed: int, scale_factor: int, dataset_type: str = "original", + use_zipfian_ids: bool = False, + zipfian_alpha: float = 1.1, ) -> None: self.worker_id = worker_id self.prng = random.Random(seed) + self.use_zipfian_ids = use_zipfian_ids + if use_zipfian_ids: + self.zprng = np.random.default_rng(seed) + self.zipfian_alpha = zipfian_alpha self.min_movie_id = MIN_MOVIE_ID if dataset_type == "original": @@ -50,6 +57,15 @@ def __init__( self.loc_max = 1e6 self.showing_years = 2 + def _sample_id(self, min_id: int, max_id: int) -> int: + sampled = ( + self.zprng.zipf(self.zipfian_alpha) - 1 + min_id + if self.use_zipfian_ids + else self.prng.randint(min_id, max_id) + ) + + return min(max_id, sampled) + def edit_movie_note(self, db: Database) -> bool: """ Represents editing the "misc info" for a specific movie. @@ -64,7 +80,7 @@ def edit_movie_note(self, db: Database) -> bool: """ # 1. Select a random movie id. - movie_id = self.prng.randint(self.min_movie_id, self.max_movie_id) + movie_id = self._sample_id(self.min_movie_id, self.max_movie_id) try: # Start the transaction. @@ -114,10 +130,10 @@ def add_new_showing(self, db: Database) -> bool: - Insert into showing """ # 1. Select a random theatre id. - theatre_id = self.prng.randint(self.min_theatre_id, self.max_theatre_id) + theatre_id = self._sample_id(self.min_theatre_id, self.max_theatre_id) # 2. Select a random movie id. - movie_id = self.prng.randint(self.min_movie_id, self.max_movie_id) + movie_id = self._sample_id(self.min_movie_id, self.max_movie_id) showings_to_add = self.prng.randint(*self.showings_to_add) @@ -167,7 +183,7 @@ def purchase_tickets(self, db: Database, select_using_name: bool) -> bool: """ # 1. Select a random theatre number. - theatre_num = self.prng.randint(self.min_theatre_id, self.max_theatre_id) + theatre_num = self._sample_id(self.min_theatre_id, self.max_theatre_id) try: # Start the transaction.