diff --git a/src/brad/config/planner.py b/src/brad/config/planner.py index 0017b95a..7771f2c8 100644 --- a/src/brad/config/planner.py +++ b/src/brad/config/planner.py @@ -255,15 +255,12 @@ def aurora_scaling_coefs(self) -> npt.NDArray: def aurora_txn_coefs(self, schema_name: str) -> Dict[str, float]: return self._raw["aurora_txns"][schema_name] - def aurora_new_scaling_coefs(self) -> npt.NDArray: + def aurora_new_scaling_coefs(self, schema_name: str) -> npt.NDArray: if self._aurora_new_scaling_coefs is None: - coefs = self._raw["aurora_scaling_new"] + coefs = self._raw["aurora_scaling_new"][schema_name] self._aurora_new_scaling_coefs = np.array([coefs["coef1"], coefs["coef2"]]) return self._aurora_new_scaling_coefs - def aurora_new_scaling_alpha(self) -> float: - return self._raw["aurora_scaling_new"]["alpha"] - ### ### Unified Redshift scaling ### @@ -275,17 +272,14 @@ def redshift_scaling_coefs(self) -> npt.NDArray: ) return self._redshift_scaling_coefs - def redshift_new_scaling_coefs(self) -> npt.NDArray: + def redshift_new_scaling_coefs(self, schema_name: str) -> npt.NDArray: if self._redshift_new_scaling_coefs is None: - coefs = self._raw["redshift_scaling_new"] + coefs = self._raw["redshift_scaling_new"][schema_name] self._redshift_new_scaling_coefs = np.array( [coefs["coef1"], coefs["coef2"]] ) return self._redshift_new_scaling_coefs - def redshift_new_scaling_alpha(self) -> float: - return self._raw["redshift_scaling_new"]["alpha"] - def use_io_optimized_aurora(self) -> bool: if "use_io_optimized_aurora" not in self._raw: # By default. diff --git a/src/brad/planner/constants.yml b/src/brad/planner/constants.yml index 87eb9e24..7b65852b 100644 --- a/src/brad/planner/constants.yml +++ b/src/brad/planner/constants.yml @@ -202,20 +202,43 @@ table_extract_bytes_per_row: ### aurora_scaling_new: - # Wait time (from queuing theory) - # alpha * avg_query_time * (u / (1 - u)) + base - alpha: 0.0464553 + imdb_extended_100g: + # Wait time (from queuing theory) + # alpha * avg_query_time * (u / (1 - u)) + base + alpha: 0.0464553 + + # Resources + # [coef1 (s/d) + coef2] * base + coef1: 0.75851053 + coef2: 0.5486482 - # Resources - # [coef1 (s/d) + coef2] * base - coef1: 0.75851053 - coef2: 0.5486482 + imdb_specialized_100g: + alpha: 0.0464553 + coef1: 0.75851053 + coef2: 0.5486482 + + chbenchmark: + # Queries cannot complete in time on Aurora. + alpha: 1.0 + coef1: 0.0 + coef2: 1.0 redshift_scaling_new: # Same model as above. - alpha: 0.730064 - coef1: 0.89125617 - coef2: 0.1139099 + imdb_extended_100g: + alpha: 0.730064 + coef1: 0.89125617 + coef2: 0.1139099 + + imdb_specialized_100g: + alpha: 0.730064 + coef1: 0.89125617 + coef2: 0.1139099 + + chbenchmark: + alpha: 1.0 # Now unused + coef1: 0.16853629 + coef2: 0.61977525 run_time_to_denorm_cpu: aurora: diff --git a/src/brad/planner/scoring/performance/unified_aurora.py b/src/brad/planner/scoring/performance/unified_aurora.py index eedb59ba..474aa033 100644 --- a/src/brad/planner/scoring/performance/unified_aurora.py +++ b/src/brad/planner/scoring/performance/unified_aurora.py @@ -376,7 +376,7 @@ def predict_query_latency_resources_batch( rf = np.array(resource_factors) basis = np.stack([rf, np.ones_like(rf)]) basis = np.transpose(basis) - coefs = ctx.planner_config.aurora_new_scaling_coefs() + coefs = ctx.planner_config.aurora_new_scaling_coefs(ctx.schema_name) coefs = np.multiply(coefs, basis) num_coefs = coefs.shape[1] @@ -467,7 +467,7 @@ def predict_base_latency( return np.ones_like(latency) * np.inf # Ideally we should adjust for load as well. resource_factor = _AURORA_BASE_RESOURCE_VALUE / aurora_num_cpus(prov) - coefs = ctx.planner_config.aurora_new_scaling_coefs() + coefs = ctx.planner_config.aurora_new_scaling_coefs(ctx.schema_name) coefs[0] *= resource_factor return latency / coefs.sum() diff --git a/src/brad/planner/scoring/performance/unified_redshift.py b/src/brad/planner/scoring/performance/unified_redshift.py index e509cc9d..2965b172 100644 --- a/src/brad/planner/scoring/performance/unified_redshift.py +++ b/src/brad/planner/scoring/performance/unified_redshift.py @@ -365,7 +365,7 @@ def predict_query_latency_resources_batch( rf = np.array(resource_factors) basis = np.stack([rf, np.ones_like(rf)]) basis = np.transpose(basis) - coefs = ctx.planner_config.redshift_new_scaling_coefs() + coefs = ctx.planner_config.redshift_new_scaling_coefs(ctx.schema_name) coefs = np.multiply(coefs, basis) num_coefs = coefs.shape[1] @@ -415,7 +415,7 @@ def predict_base_latency( resource_factor = _REDSHIFT_BASE_RESOURCE_VALUE / ( redshift_num_cpus(prov) * prov.num_nodes() ) - coefs = ctx.planner_config.redshift_new_scaling_coefs() + coefs = ctx.planner_config.redshift_new_scaling_coefs(ctx.schema_name) coefs[0] *= resource_factor return latency / coefs.sum()