Check in hardware/load model constants for CH-BenCHmark

mitdbg · May 7, 2024 · cf35b38 · cf35b38
1 parent d8b5404
commit cf35b38
Show file tree

Hide file tree

Showing 4 changed files with 41 additions and 24 deletions.
diff --git a/src/brad/config/planner.py b/src/brad/config/planner.py
@@ -255,15 +255,12 @@ def aurora_scaling_coefs(self) -> npt.NDArray:
     def aurora_txn_coefs(self, schema_name: str) -> Dict[str, float]:
         return self._raw["aurora_txns"][schema_name]
 
-    def aurora_new_scaling_coefs(self) -> npt.NDArray:
+    def aurora_new_scaling_coefs(self, schema_name: str) -> npt.NDArray:
         if self._aurora_new_scaling_coefs is None:
-            coefs = self._raw["aurora_scaling_new"]
+            coefs = self._raw["aurora_scaling_new"][schema_name]
             self._aurora_new_scaling_coefs = np.array([coefs["coef1"], coefs["coef2"]])
         return self._aurora_new_scaling_coefs
 
-    def aurora_new_scaling_alpha(self) -> float:
-        return self._raw["aurora_scaling_new"]["alpha"]
-
     ###
     ### Unified Redshift scaling
     ###
@@ -275,17 +272,14 @@ def redshift_scaling_coefs(self) -> npt.NDArray:
             )
         return self._redshift_scaling_coefs
 
-    def redshift_new_scaling_coefs(self) -> npt.NDArray:
+    def redshift_new_scaling_coefs(self, schema_name: str) -> npt.NDArray:
         if self._redshift_new_scaling_coefs is None:
-            coefs = self._raw["redshift_scaling_new"]
+            coefs = self._raw["redshift_scaling_new"][schema_name]
             self._redshift_new_scaling_coefs = np.array(
                 [coefs["coef1"], coefs["coef2"]]
             )
         return self._redshift_new_scaling_coefs
 
-    def redshift_new_scaling_alpha(self) -> float:
-        return self._raw["redshift_scaling_new"]["alpha"]
-
     def use_io_optimized_aurora(self) -> bool:
         if "use_io_optimized_aurora" not in self._raw:
             # By default.

diff --git a/src/brad/planner/constants.yml b/src/brad/planner/constants.yml
@@ -202,20 +202,43 @@ table_extract_bytes_per_row:
 ###
 
 aurora_scaling_new:
-  # Wait time (from queuing theory)
-  # alpha * avg_query_time * (u / (1 - u)) + base
-  alpha: 0.0464553
+  imdb_extended_100g:
+    # Wait time (from queuing theory)
+    # alpha * avg_query_time * (u / (1 - u)) + base
+    alpha: 0.0464553
+
+    # Resources
+    # [coef1 (s/d) + coef2] * base
+    coef1: 0.75851053
+    coef2: 0.5486482
 
-  # Resources
-  # [coef1 (s/d) + coef2] * base
-  coef1: 0.75851053
-  coef2: 0.5486482
+  imdb_specialized_100g:
+    alpha: 0.0464553
+    coef1: 0.75851053
+    coef2: 0.5486482
+
+  chbenchmark:
+    # Queries cannot complete in time on Aurora.
+    alpha: 1.0
+    coef1: 0.0
+    coef2: 1.0
 
 redshift_scaling_new:
   # Same model as above.
-  alpha: 0.730064
-  coef1: 0.89125617
-  coef2: 0.1139099
+  imdb_extended_100g:
+    alpha: 0.730064
+    coef1: 0.89125617
+    coef2: 0.1139099
+
+  imdb_specialized_100g:
+    alpha: 0.730064
+    coef1: 0.89125617
+    coef2: 0.1139099
+
+  chbenchmark:
+    alpha: 1.0  # Now unused
+    coef1: 0.16853629
+    coef2: 0.61977525
 
 run_time_to_denorm_cpu:
   aurora:

diff --git a/src/brad/planner/scoring/performance/unified_aurora.py b/src/brad/planner/scoring/performance/unified_aurora.py
@@ -376,7 +376,7 @@ def predict_query_latency_resources_batch(
         rf = np.array(resource_factors)
         basis = np.stack([rf, np.ones_like(rf)])
         basis = np.transpose(basis)
-        coefs = ctx.planner_config.aurora_new_scaling_coefs()
+        coefs = ctx.planner_config.aurora_new_scaling_coefs(ctx.schema_name)
         coefs = np.multiply(coefs, basis)
 
         num_coefs = coefs.shape[1]
@@ -467,7 +467,7 @@ def predict_base_latency(
             return np.ones_like(latency) * np.inf
         # Ideally we should adjust for load as well.
         resource_factor = _AURORA_BASE_RESOURCE_VALUE / aurora_num_cpus(prov)
-        coefs = ctx.planner_config.aurora_new_scaling_coefs()
+        coefs = ctx.planner_config.aurora_new_scaling_coefs(ctx.schema_name)
         coefs[0] *= resource_factor
         return latency / coefs.sum()
 

diff --git a/src/brad/planner/scoring/performance/unified_redshift.py b/src/brad/planner/scoring/performance/unified_redshift.py
@@ -365,7 +365,7 @@ def predict_query_latency_resources_batch(
         rf = np.array(resource_factors)
         basis = np.stack([rf, np.ones_like(rf)])
         basis = np.transpose(basis)
-        coefs = ctx.planner_config.redshift_new_scaling_coefs()
+        coefs = ctx.planner_config.redshift_new_scaling_coefs(ctx.schema_name)
         coefs = np.multiply(coefs, basis)
 
         num_coefs = coefs.shape[1]
@@ -415,7 +415,7 @@ def predict_base_latency(
         resource_factor = _REDSHIFT_BASE_RESOURCE_VALUE / (
             redshift_num_cpus(prov) * prov.num_nodes()
         )
-        coefs = ctx.planner_config.redshift_new_scaling_coefs()
+        coefs = ctx.planner_config.redshift_new_scaling_coefs(ctx.schema_name)
         coefs[0] *= resource_factor
         return latency / coefs.sum()