Skip to content

Commit

Permalink
Check in hardware/load model constants for CH-BenCHmark
Browse files Browse the repository at this point in the history
  • Loading branch information
geoffxy committed May 7, 2024
1 parent d8b5404 commit cf35b38
Show file tree
Hide file tree
Showing 4 changed files with 41 additions and 24 deletions.
14 changes: 4 additions & 10 deletions src/brad/config/planner.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,15 +255,12 @@ def aurora_scaling_coefs(self) -> npt.NDArray:
def aurora_txn_coefs(self, schema_name: str) -> Dict[str, float]:
return self._raw["aurora_txns"][schema_name]

def aurora_new_scaling_coefs(self) -> npt.NDArray:
def aurora_new_scaling_coefs(self, schema_name: str) -> npt.NDArray:
if self._aurora_new_scaling_coefs is None:
coefs = self._raw["aurora_scaling_new"]
coefs = self._raw["aurora_scaling_new"][schema_name]
self._aurora_new_scaling_coefs = np.array([coefs["coef1"], coefs["coef2"]])
return self._aurora_new_scaling_coefs

def aurora_new_scaling_alpha(self) -> float:
return self._raw["aurora_scaling_new"]["alpha"]

###
### Unified Redshift scaling
###
Expand All @@ -275,17 +272,14 @@ def redshift_scaling_coefs(self) -> npt.NDArray:
)
return self._redshift_scaling_coefs

def redshift_new_scaling_coefs(self) -> npt.NDArray:
def redshift_new_scaling_coefs(self, schema_name: str) -> npt.NDArray:
if self._redshift_new_scaling_coefs is None:
coefs = self._raw["redshift_scaling_new"]
coefs = self._raw["redshift_scaling_new"][schema_name]
self._redshift_new_scaling_coefs = np.array(
[coefs["coef1"], coefs["coef2"]]
)
return self._redshift_new_scaling_coefs

def redshift_new_scaling_alpha(self) -> float:
return self._raw["redshift_scaling_new"]["alpha"]

def use_io_optimized_aurora(self) -> bool:
if "use_io_optimized_aurora" not in self._raw:
# By default.
Expand Down
43 changes: 33 additions & 10 deletions src/brad/planner/constants.yml
Original file line number Diff line number Diff line change
Expand Up @@ -202,20 +202,43 @@ table_extract_bytes_per_row:
###

aurora_scaling_new:
# Wait time (from queuing theory)
# alpha * avg_query_time * (u / (1 - u)) + base
alpha: 0.0464553
imdb_extended_100g:
# Wait time (from queuing theory)
# alpha * avg_query_time * (u / (1 - u)) + base
alpha: 0.0464553

# Resources
# [coef1 (s/d) + coef2] * base
coef1: 0.75851053
coef2: 0.5486482

# Resources
# [coef1 (s/d) + coef2] * base
coef1: 0.75851053
coef2: 0.5486482
imdb_specialized_100g:
alpha: 0.0464553
coef1: 0.75851053
coef2: 0.5486482

chbenchmark:
# Queries cannot complete in time on Aurora.
alpha: 1.0
coef1: 0.0
coef2: 1.0

redshift_scaling_new:
# Same model as above.
alpha: 0.730064
coef1: 0.89125617
coef2: 0.1139099
imdb_extended_100g:
alpha: 0.730064
coef1: 0.89125617
coef2: 0.1139099

imdb_specialized_100g:
alpha: 0.730064
coef1: 0.89125617
coef2: 0.1139099

chbenchmark:
alpha: 1.0 # Now unused
coef1: 0.16853629
coef2: 0.61977525

run_time_to_denorm_cpu:
aurora:
Expand Down
4 changes: 2 additions & 2 deletions src/brad/planner/scoring/performance/unified_aurora.py
Original file line number Diff line number Diff line change
Expand Up @@ -376,7 +376,7 @@ def predict_query_latency_resources_batch(
rf = np.array(resource_factors)
basis = np.stack([rf, np.ones_like(rf)])
basis = np.transpose(basis)
coefs = ctx.planner_config.aurora_new_scaling_coefs()
coefs = ctx.planner_config.aurora_new_scaling_coefs(ctx.schema_name)
coefs = np.multiply(coefs, basis)

num_coefs = coefs.shape[1]
Expand Down Expand Up @@ -467,7 +467,7 @@ def predict_base_latency(
return np.ones_like(latency) * np.inf
# Ideally we should adjust for load as well.
resource_factor = _AURORA_BASE_RESOURCE_VALUE / aurora_num_cpus(prov)
coefs = ctx.planner_config.aurora_new_scaling_coefs()
coefs = ctx.planner_config.aurora_new_scaling_coefs(ctx.schema_name)
coefs[0] *= resource_factor
return latency / coefs.sum()

Expand Down
4 changes: 2 additions & 2 deletions src/brad/planner/scoring/performance/unified_redshift.py
Original file line number Diff line number Diff line change
Expand Up @@ -365,7 +365,7 @@ def predict_query_latency_resources_batch(
rf = np.array(resource_factors)
basis = np.stack([rf, np.ones_like(rf)])
basis = np.transpose(basis)
coefs = ctx.planner_config.redshift_new_scaling_coefs()
coefs = ctx.planner_config.redshift_new_scaling_coefs(ctx.schema_name)
coefs = np.multiply(coefs, basis)

num_coefs = coefs.shape[1]
Expand Down Expand Up @@ -415,7 +415,7 @@ def predict_base_latency(
resource_factor = _REDSHIFT_BASE_RESOURCE_VALUE / (
redshift_num_cpus(prov) * prov.num_nodes()
)
coefs = ctx.planner_config.redshift_new_scaling_coefs()
coefs = ctx.planner_config.redshift_new_scaling_coefs(ctx.schema_name)
coefs[0] *= resource_factor
return latency / coefs.sum()

Expand Down

0 comments on commit cf35b38

Please sign in to comment.