Skip to content

Commit

Permalink
Zipfian movie/theater IDs (#413)
Browse files Browse the repository at this point in the history
* Optionally pick zipfian movie and theater ids

* Compare zipfian IDs to bounds

* Add command line flags

* Use store_true action

---------

Co-authored-by: Geoffrey Yu <[email protected]>
  • Loading branch information
mmarkakis and geoffxy authored Dec 19, 2023
1 parent c1b823e commit 9474a89
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 5 deletions.
20 changes: 19 additions & 1 deletion workloads/IMDB_extended/run_transactions.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,12 @@ def noop_handler(_signal, _frame):
signal.signal(signal.SIGINT, noop_handler)

worker = TransactionWorker(
worker_idx, args.seed ^ worker_idx, args.scale_factor, args.dataset_type
worker_idx,
args.seed ^ worker_idx,
args.scale_factor,
args.dataset_type,
args.use_zipfian_ids,
args.zipfian_alpha,
)

txn_prng = random.Random(~(args.seed ^ worker_idx))
Expand Down Expand Up @@ -281,6 +286,19 @@ def main():
help="This controls the range of reads the transaction worker performs, "
"depending on the dataset size.",
)
parser.add_argument(
"--use-zipfian-ids",
action="store_true",
help="Whether the transaction worker should draw movie and theatre IDs "
"from a Zipfian distribution.",
)
parser.add_argument(
"--zipfian-alpha",
type=float,
default=1.1,
help="The alpha parameter for the Zipfian distribution. Only used if "
"--use-zipfian-ids is `True`. Must be strictly greater than 1. ",
)
# These three arguments are used for the day long experiment.
parser.add_argument(
"--num-client-path",
Expand Down
24 changes: 20 additions & 4 deletions workloads/IMDB_extended/workload_utils/transaction_worker.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import random
import numpy as np
import logging
from datetime import datetime, timedelta
from typing import List, Tuple, Any
Expand Down Expand Up @@ -26,9 +27,15 @@ def __init__(
seed: int,
scale_factor: int,
dataset_type: str = "original",
use_zipfian_ids: bool = False,
zipfian_alpha: float = 1.1,
) -> None:
self.worker_id = worker_id
self.prng = random.Random(seed)
self.use_zipfian_ids = use_zipfian_ids
if use_zipfian_ids:
self.zprng = np.random.default_rng(seed)
self.zipfian_alpha = zipfian_alpha

self.min_movie_id = MIN_MOVIE_ID
if dataset_type == "original":
Expand All @@ -50,6 +57,15 @@ def __init__(
self.loc_max = 1e6
self.showing_years = 2

def _sample_id(self, min_id: int, max_id: int) -> int:
sampled = (
self.zprng.zipf(self.zipfian_alpha) - 1 + min_id
if self.use_zipfian_ids
else self.prng.randint(min_id, max_id)
)

return min(max_id, sampled)

def edit_movie_note(self, db: Database) -> bool:
"""
Represents editing the "misc info" for a specific movie.
Expand All @@ -64,7 +80,7 @@ def edit_movie_note(self, db: Database) -> bool:
"""

# 1. Select a random movie id.
movie_id = self.prng.randint(self.min_movie_id, self.max_movie_id)
movie_id = self._sample_id(self.min_movie_id, self.max_movie_id)

try:
# Start the transaction.
Expand Down Expand Up @@ -114,10 +130,10 @@ def add_new_showing(self, db: Database) -> bool:
- Insert into showing
"""
# 1. Select a random theatre id.
theatre_id = self.prng.randint(self.min_theatre_id, self.max_theatre_id)
theatre_id = self._sample_id(self.min_theatre_id, self.max_theatre_id)

# 2. Select a random movie id.
movie_id = self.prng.randint(self.min_movie_id, self.max_movie_id)
movie_id = self._sample_id(self.min_movie_id, self.max_movie_id)

showings_to_add = self.prng.randint(*self.showings_to_add)

Expand Down Expand Up @@ -167,7 +183,7 @@ def purchase_tickets(self, db: Database, select_using_name: bool) -> bool:
"""

# 1. Select a random theatre number.
theatre_num = self.prng.randint(self.min_theatre_id, self.max_theatre_id)
theatre_num = self._sample_id(self.min_theatre_id, self.max_theatre_id)

try:
# Start the transaction.
Expand Down

0 comments on commit 9474a89

Please sign in to comment.