From 68954e773116de8a90bc3bb5608f4e532b980abe Mon Sep 17 00:00:00 2001
From: Geoffrey Yu <geoffxy@mit.edu>
Date: Sat, 4 May 2024 10:14:53 -0400
Subject: [PATCH] Add table movement enabled scale down experiment and log
 movement progress (#507)

Part of #487.
---
 .../15-e2e-scenarios-v2/scale_down/COND       |   9 +
 .../scale_down/scale_down_config_tm.yml       | 164 ++++++++++++++++++
 .../scale_down/set_up_starting_blueprint.py   |   3 +-
 src/brad/config/system_event.py               |   6 +
 src/brad/daemon/transition_orchestrator.py    |  35 ++++
 5 files changed, 216 insertions(+), 1 deletion(-)
 create mode 100644 experiments/15-e2e-scenarios-v2/scale_down/scale_down_config_tm.yml

diff --git a/experiments/15-e2e-scenarios-v2/scale_down/COND b/experiments/15-e2e-scenarios-v2/scale_down/COND
index 22bdb68a..87694f9e 100644
--- a/experiments/15-e2e-scenarios-v2/scale_down/COND
+++ b/experiments/15-e2e-scenarios-v2/scale_down/COND
@@ -58,6 +58,15 @@ run_experiment(
   },
 )
 
+run_experiment(
+  name="brad_100g_tm",
+  run="./run_workload.sh",
+  options={
+    "system-config-file": "scale_down_config_tm.yml",
+    **COMMON_100G_CONFIGS,
+  },
+)
+
 run_command(
   name="brad_100g_debug",
   run="./run_workload_debug.sh",
diff --git a/experiments/15-e2e-scenarios-v2/scale_down/scale_down_config_tm.yml b/experiments/15-e2e-scenarios-v2/scale_down/scale_down_config_tm.yml
new file mode 100644
index 00000000..2bdf9bfa
--- /dev/null
+++ b/experiments/15-e2e-scenarios-v2/scale_down/scale_down_config_tm.yml
@@ -0,0 +1,164 @@
+# This file contains configurations that are used by BRAD. These are default
+# values and should be customized for specific situations.
+
+# BRAD's front end servers will listen for client connections on this interface
+# and port. If `num_front_ends` is greater than one, subsequent front ends will
+# listen on successive ports (e.g., 6584, 6585, etc.).
+front_end_interface: "0.0.0.0"
+front_end_port: 6583
+num_front_ends: 8
+
+# Logging paths. If the value is in ALL_CAPS (with underscores), it is
+# interpreted as an environment variable (BRAD will log to the path stored in
+# the environment variable).
+
+# Where BRAD's daemon process will write its logs.
+daemon_log_file: COND_OUT
+
+# Where BRAD's front end processes will write their logs.
+front_end_log_path: COND_OUT
+
+# Where BRAD's blueprint planner will write debug logs.
+planner_log_path: COND_OUT
+
+# Where BRAD's metrics loggers will write their logs.
+metrics_log_path: COND_OUT
+
+# Probability that each transactional query will be logged.
+txn_log_prob: 0.01
+
+# Set to a non-zero value enable automatic data syncing. When this is set to 0,
+# automatic syncing is disabled.
+data_sync_period_seconds: 0
+
+# BRAD's front end servers will report their metrics at regular intervals.
+front_end_metrics_reporting_period_seconds: 30
+front_end_query_latency_buffer_size: 100
+
+# `default` means to use the policy encoded in the blueprint. Other values will
+# override the blueprint.
+routing_policy: default
+
+# Whether to disable table movement for benchmark purposes (i.e., keep all
+# tables on all engines.)
+disable_table_movement: false
+skip_sync_before_table_movement: true
+
+# Epoch length for metrics and forecasting. This is the granularity at which
+# metrics/forecasting will be performed.
+epoch_length:
+  weeks: 0
+  days: 0
+  hours: 0
+  minutes: 1
+
+# Blueprint planning strategy.
+strategy: fp_query_based_beam
+
+# Used to specify the period of time over which to use data for planning.
+# Currrently, this is a "look behind" window for the workload.
+planning_window:
+  weeks: 0
+  days: 0
+  hours: 1
+  minutes: 0
+
+# Used to aggregate metrics collected in the planning window.
+metrics_agg:
+  method: ewm         # 'mean' is another option
+  alpha: 0.86466472   # 1 - 1 / e^2
+
+# Used during planning.
+reinterpret_second_as: 1
+
+# The query distribution must change by at least this much for a new blueprint
+# to be accepted.
+query_dist_change_frac: 0.1
+
+# The search bound for the provisioning.
+max_provisioning_multiplier: 2.5
+
+# Flag options for blueprint planning.
+use_io_optimized_aurora: true
+use_recorded_routing_if_available: true
+ensure_tables_together_on_one_engine: true
+
+# Loads used to prime the system when no information is available.
+aurora_initialize_load_fraction: 0.25
+redshift_initialize_load_fraction: 0.25
+
+# BRAD will not reduce predicted load lower than these values. Raise these
+# values to be more conservative against mispredictions.
+aurora_min_load_removal_fraction: 0.8
+redshift_min_load_removal_fraction: 0.8
+
+# Blueprint planning performance ceilings.
+query_latency_p90_ceiling_s: 30.0
+txn_latency_p90_ceiling_s: 0.030
+
+aurora_provisioning_search_distance: 900.0
+redshift_provisioning_search_distance: 900.0
+
+# Used for ordering blueprints during planning.
+comparator:
+  type: benefit_perf_ceiling  # or `perf_ceiling`
+
+  benefit_horizon:  # Only used by the `benefit_perf_ceiling` comparator
+    weeks: 0
+    days: 0
+    hours: 24
+    minutes: 0
+
+  penalty_threshold: 0.8  # Only used by the `benefit_perf_ceiling` comparator
+  penalty_power: 2  # Only used by the `benefit_perf_ceiling` comparator
+
+aurora_max_query_factor: 4.0
+aurora_max_query_factor_replace: 10000.0
+redshift_peak_load_threshold: 99.0
+redshift_peak_load_multiplier: 1.5
+
+planner_max_workers: 16
+
+# Used for precomputed predictions.
+std_datasets:
+  - name: regular
+    path: workloads/IMDB_100GB/regular_test/
+  - name: adhoc
+    path: workloads/IMDB_100GB/adhoc_test/
+
+# Blueprint planning trigger configs.
+
+triggers:
+  enabled: true
+  check_period_s: 90  # Triggers are checked every X seconds.
+  check_period_offset_s: 360  # Wait 6 mins before starting.
+  observe_new_blueprint_mins: 3
+
+  elapsed_time:
+    disabled: true
+    multiplier: 60  # Multiplier over `planning_window`.
+
+  redshift_cpu:
+    lo: 15
+    hi: 85
+    sustained_epochs: 3
+
+  aurora_cpu:
+    lo: 15
+    hi: 85
+    sustained_epochs: 3
+
+  variable_costs:
+    disabled: true
+    threshold: 1.0
+
+  query_latency_ceiling:
+    ceiling_s: 30.0
+    sustained_epochs: 3
+
+  txn_latency_ceiling:
+    ceiling_s: 0.030
+    sustained_epochs: 3
+
+  recent_change:
+    delay_epochs: 5
diff --git a/experiments/15-e2e-scenarios-v2/scale_down/set_up_starting_blueprint.py b/experiments/15-e2e-scenarios-v2/scale_down/set_up_starting_blueprint.py
index c96eec27..542c6afa 100644
--- a/experiments/15-e2e-scenarios-v2/scale_down/set_up_starting_blueprint.py
+++ b/experiments/15-e2e-scenarios-v2/scale_down/set_up_starting_blueprint.py
@@ -71,6 +71,7 @@ def main():
         help="Comma separated list of indices.",
         default="99,56,32,92,91,49,30,83,94,38,87,86,76,37,31,46",
     )
+    parser.add_argument("--place-tables-both", action="store_true")
     args = parser.parse_args()
     set_up_logging(debug_mode=True)
 
@@ -130,7 +131,7 @@ def main():
     new_placement = {}
     aurora_txn = ["theatres", "showings", "ticket_orders", "movie_info", "aka_title"]
     for table in blueprint.tables():
-        if table.name in aurora_txn:
+        if args.place_tables_both or table.name in aurora_txn:
             new_placement[table.name] = [Engine.Aurora, Engine.Redshift]
         else:
             new_placement[table.name] = [Engine.Redshift]
diff --git a/src/brad/config/system_event.py b/src/brad/config/system_event.py
index 68c125f5..71574149 100644
--- a/src/brad/config/system_event.py
+++ b/src/brad/config/system_event.py
@@ -37,3 +37,9 @@ class SystemEvent(enum.Enum):
     # Used when a service level objective is changed while BRAD is running (used
     # for experiments).
     ChangedSlos = "changed_slos"
+
+    # Used to mark table movement progress.
+    PreTableMovementStarted = "pre_table_movement_started"
+    PreTableMovementCompleted = "pre_table_movement_completed"
+    PostTableMovementStarted = "post_table_movement_started"
+    PostTableMovementCompleted = "post_table_movement_completed"
diff --git a/src/brad/daemon/transition_orchestrator.py b/src/brad/daemon/transition_orchestrator.py
index 9c065721..8a94165e 100644
--- a/src/brad/daemon/transition_orchestrator.py
+++ b/src/brad/daemon/transition_orchestrator.py
@@ -143,6 +143,9 @@ async def run_prepare_then_transition(
         else:
             logger.info("Not running table sync before movement.")
 
+        if self._system_event_logger is not None:
+            self._system_event_logger.log(SystemEvent.PreTableMovementStarted)
+
         # 3. Create tables in new locations as needed
         directory = self._blueprint_mgr.get_directory()
 
@@ -192,6 +195,8 @@ async def run_prepare_then_transition(
         await asyncio.gather(*table_awaitables)
 
         logger.info("Table movement complete.")
+        if self._system_event_logger is not None:
+            self._system_event_logger.log(SystemEvent.PreTableMovementCompleted)
 
         # Close connections
         await self._cxns.close()
@@ -480,6 +485,11 @@ async def _run_aurora_post_transition(
             and len(table_diffs) > 0
             and self._config.disable_table_movement is False
         ):
+            if self._system_event_logger is not None:
+                self._system_event_logger.log(
+                    SystemEvent.PostTableMovementStarted, "aurora"
+                )
+
             views_to_drop = []
             triggers_to_drop = []
             tables_to_drop = []
@@ -511,6 +521,11 @@ async def _run_aurora_post_transition(
             assert self._cxns is not None
             self._cxns.get_connection(Engine.Aurora).cursor_sync().commit_sync()
 
+            if self._system_event_logger is not None:
+                self._system_event_logger.log(
+                    SystemEvent.PostTableMovementCompleted, "aurora"
+                )
+
         # Change the provisioning.
         if diff is not None:
             if new.num_nodes() == 0:
@@ -611,6 +626,11 @@ async def _run_redshift_post_transition(
     ) -> None:
         # Drop removed tables
         if table_diffs is not None and self._config.disable_table_movement is False:
+            if self._system_event_logger is not None:
+                self._system_event_logger.log(
+                    SystemEvent.PostTableMovementStarted, "redshift"
+                )
+
             to_drop = []
             for table_diff in table_diffs:
                 if Engine.Redshift in table_diff.removed_locations():
@@ -623,6 +643,11 @@ async def _run_redshift_post_transition(
             assert self._cxns is not None
             self._cxns.get_connection(Engine.Redshift).cursor_sync().commit_sync()
 
+            if self._system_event_logger is not None:
+                self._system_event_logger.log(
+                    SystemEvent.PostTableMovementCompleted, "redshift"
+                )
+
         # Pause the cluster if we are transitioning to 0 nodes.
         if diff is not None:
             if diff.new_num_nodes() == 0:
@@ -642,6 +667,11 @@ async def _run_athena_post_transition(
             and self._config.disable_table_movement is False
             and self._config.skip_athena_table_deletion is False
         ):
+            if self._system_event_logger is not None:
+                self._system_event_logger.log(
+                    SystemEvent.PostTableMovementStarted, "athena"
+                )
+
             for table_diff in table_diffs:
                 if Engine.Athena in table_diff.removed_locations():
                     to_drop.append(table_diff.table_name())
@@ -650,6 +680,11 @@ async def _run_athena_post_transition(
             ctx = self._new_execution_context()
             await d.execute(ctx)
 
+            if self._system_event_logger is not None:
+                self._system_event_logger.log(
+                    SystemEvent.PostTableMovementCompleted, "athena"
+                )
+
     async def _enforce_table_diff_additions(self, diff: TableDiff) -> None:
         # Unload table to S3
         table_name = diff.table_name()