diff --git a/config/system_config_demo.yml b/config/system_config_demo.yml new file mode 100644 index 00000000..720a4d7f --- /dev/null +++ b/config/system_config_demo.yml @@ -0,0 +1,169 @@ +# This file contains configurations that are used by BRAD. These are default +# values and should be customized for specific situations. + +# BRAD's front end servers will listen for client connections on this interface +# and port. If `num_front_ends` is greater than one, subsequent front ends will +# listen on successive ports (e.g., 6584, 6585, etc.). +front_end_interface: "0.0.0.0" +front_end_port: 6583 +num_front_ends: 1 + +# If installed and enabled, BRAD will serve its UI from a webserver that listens +# for connections on this network interface and port. +ui_interface: "0.0.0.0" +ui_port: 7583 + +# Logging paths. If the value is in ALL_CAPS (with underscores), it is +# interpreted as an environment variable (BRAD will log to the path stored in +# the environment variable). + +# Where BRAD's daemon process will write its logs. +daemon_log_file: /tmp + +# Where BRAD's front end processes will write their logs. +front_end_log_path: /tmp + +# Where BRAD's blueprint planner will write debug logs. +planner_log_path: /tmp + +# Where BRAD's metrics loggers will write their logs. +metrics_log_path: /tmp + +# Probability that each transactional query will be logged. +txn_log_prob: 0.01 + +# Set to a non-zero value enable automatic data syncing. When this is set to 0, +# automatic syncing is disabled. +data_sync_period_seconds: 0 + +# BRAD's front end servers will report their metrics at regular intervals. +front_end_metrics_reporting_period_seconds: 30 +front_end_query_latency_buffer_size: 100 + +# `default` means to use the policy encoded in the blueprint. Other values will +# override the blueprint. +routing_policy: default + +# Whether to disable table movement for benchmark purposes (i.e., keep all +# tables on all engines.) +disable_table_movement: true + +# Epoch length for metrics and forecasting. This is the granularity at which +# metrics/forecasting will be performed. +epoch_length: + weeks: 0 + days: 0 + hours: 0 + minutes: 1 + +# Blueprint planning strategy. +strategy: fp_query_based_beam + +# Used to specify the period of time over which to use data for planning. +# Currrently, this is a "look behind" window for the workload. +planning_window: + weeks: 0 + days: 0 + hours: 1 + minutes: 0 + +# Used to aggregate metrics collected in the planning window. +metrics_agg: + method: ewm # 'mean' is another option + alpha: 0.86466472 # 1 - 1 / e^2 + +# Used during planning. +reinterpret_second_as: 1 + +# The query distribution must change by at least this much for a new blueprint +# to be accepted. +query_dist_change_frac: 0.1 + +# The search bound for the provisioning. +max_provisioning_multiplier: 2.5 + +# Flag options for blueprint planning. +use_io_optimized_aurora: true +use_recorded_routing_if_available: true +ensure_tables_together_on_one_engine: true + +# Loads used to prime the system when no information is available. +aurora_initialize_load_fraction: 0.25 +redshift_initialize_load_fraction: 0.25 + +# BRAD will not reduce predicted load lower than these values. Raise these +# values to be more conservative against mispredictions. +aurora_min_load_removal_fraction: 0.8 +redshift_min_load_removal_fraction: 0.8 + +# Blueprint planning performance ceilings. +query_latency_p90_ceiling_s: 30.0 +txn_latency_p90_ceiling_s: 0.030 + +# If set to true, BRAD will attempt to use the specified preset Redshift +# clusters instead of resizing the main Redshift cluster. +use_preset_redshift_clusters: false + +# Used for ordering blueprints during planning. +comparator: + type: benefit_perf_ceiling # or `perf_ceiling` + + benefit_horizon: # Only used by the `benefit_perf_ceiling` comparator + weeks: 0 + days: 0 + hours: 1 + minutes: 0 + + penalty_threshold: 0.8 # Only used by the `benefit_perf_ceiling` comparator + penalty_power: 8 # Only used by the `benefit_perf_ceiling` comparator + +# Used for precomputed predictions. +std_datasets: + - name: regular + path: workloads/IMDB_100GB/regular_test/ + - name: adhoc + path: workloads/IMDB_100GB/adhoc_test/ + +bootstrap_vdbe_path: config/vdbe_demo/imdb_extended_vdbes.json + +# Blueprint planning trigger configs. + +triggers: + enabled: false + check_period_s: 90 # Triggers are checked every X seconds. + check_period_offset_s: 360 # Wait 6 mins before starting. + + # Triggers will not fire for at least this many minutes after a new blueprint + # takes effect. Usually this should be greater than zero to give BRAD + # sufficient time to observe the effect of the blueprint on the workload. BRAD + # may wait longer to ensure metrics are also available for this many minutes. + observe_new_blueprint_mins: 3 + + elapsed_time: + disabled: true + multiplier: 60 # Multiplier over `planning_window`. + + redshift_cpu: + lo: 15 + hi: 85 + sustained_epochs: 3 + + aurora_cpu: + lo: 15 + hi: 85 + sustained_epochs: 3 + + variable_costs: + disabled: true + threshold: 1.0 + + query_latency_ceiling: + ceiling_s: 30.0 + sustained_epochs: 3 + + txn_latency_ceiling: + ceiling_s: 0.030 + sustained_epochs: 3 + + recent_change: + delay_epochs: 5 diff --git a/config/vdbe_demo/imdb_extended_vdbes.json b/config/vdbe_demo/imdb_extended_vdbes.json new file mode 100644 index 00000000..f37a78cc --- /dev/null +++ b/config/vdbe_demo/imdb_extended_vdbes.json @@ -0,0 +1,220 @@ +{ + "schema_name": "imdb_extended", + "engines": [ + { + "name": "VDBE (T)", + "max_staleness_ms": 0, + "p90_latency_slo_ms": 30, + "interface": "postgresql", + "tables": [ + { + "name": "theatres", + "writable": true + }, + { + "name": "showings", + "writable": true + }, + { + "name": "ticket_orders", + "writable": true + }, + { + "name": "movie_info", + "writable": true + }, + { + "name": "aka_title", + "writable": true + } + ], + "mapped_to": "aurora" + }, + { + "name": "VDBE (A)", + "max_staleness_ms": 3600000, + "p90_latency_slo_ms": 30000, + "interface": "postgresql", + "tables": [ + { + "name": "homes", + "writable": false + }, + { + "name": "theatres", + "writable": false + }, + { + "name": "showings", + "writable": false + }, + { + "name": "ticket_orders", + "writable": false + }, + { + "name": "aka_name", + "writable": false + }, + { + "name": "aka_title", + "writable": false + }, + { + "name": "cast_info", + "writable": false + }, + { + "name": "char_name", + "writable": false + }, + { + "name": "comp_cast_type", + "writable": false + }, + { + "name": "company_name", + "writable": false + }, + { + "name": "company_type", + "writable": false + }, + { + "name": "complete_cast", + "writable": false + }, + { + "name": "info_type", + "writable": false + }, + { + "name": "keyword", + "writable": false + }, + { + "name": "kind_type", + "writable": false + }, + { + "name": "link_type", + "writable": false + }, + { + "name": "movie_companies", + "writable": false + }, + { + "name": "movie_info_idx", + "writable": false + }, + { + "name": "movie_keyword", + "writable": false + }, + { + "name": "movie_link", + "writable": false + }, + { + "name": "name", + "writable": false + }, + { + "name": "role_type", + "writable": false + }, + { + "name": "title", + "writable": false + }, + { + "name": "movie_info", + "writable": false + }, + { + "name": "person_info", + "writable": false + } + ], + "mapped_to": "redshift" + } + ], + "tables": [ + { + "name": "homes" + }, + { + "name": "theatres" + }, + { + "name": "showings" + }, + { + "name": "ticket_orders" + }, + { + "name": "aka_name" + }, + { + "name": "aka_title" + }, + { + "name": "cast_info" + }, + { + "name": "char_name" + }, + { + "name": "comp_cast_type" + }, + { + "name": "company_name" + }, + { + "name": "company_type" + }, + { + "name": "complete_cast" + }, + { + "name": "info_type" + }, + { + "name": "keyword" + }, + { + "name": "kind_type" + }, + { + "name": "link_type" + }, + { + "name": "movie_companies" + }, + { + "name": "movie_info_idx" + }, + { + "name": "movie_keyword" + }, + { + "name": "movie_link" + }, + { + "name": "name" + }, + { + "name": "role_type" + }, + { + "name": "title" + }, + { + "name": "movie_info" + }, + { + "name": "person_info" + } + ] +} diff --git a/setup.py b/setup.py index c321b6d7..34653397 100644 --- a/setup.py +++ b/setup.py @@ -45,6 +45,7 @@ "ddsketch", "tqdm", "psycopg[binary]", + "pydantic", ] DEV_REQUIRES = [ @@ -68,7 +69,6 @@ UI_REQUIRES = [ "fastapi", "uvicorn[standard]", - "pydantic", "requests", "types-requests", ] diff --git a/src/brad/admin/modify_blueprint.py b/src/brad/admin/modify_blueprint.py index 64ca6c75..2e3db4c8 100644 --- a/src/brad/admin/modify_blueprint.py +++ b/src/brad/admin/modify_blueprint.py @@ -257,6 +257,8 @@ def modify_blueprint(args) -> None: blueprint = blueprint_mgr.get_blueprint() if args.fetch_only: + active_version = blueprint_mgr.get_active_blueprint_version() + print(f"Active version: {active_version}") print(blueprint) return diff --git a/src/brad/config/file.py b/src/brad/config/file.py index fe781c23..7c26a0cf 100644 --- a/src/brad/config/file.py +++ b/src/brad/config/file.py @@ -294,6 +294,12 @@ def result_row_limit(self) -> Optional[int]: except KeyError: return None + def bootstrap_vdbe_path(self) -> Optional[pathlib.Path]: + try: + return pathlib.Path(self._raw["bootstrap_vdbe_path"]) + except KeyError: + return None + def _extract_log_path(self, config_key: str) -> Optional[pathlib.Path]: if config_key not in self._raw: return None diff --git a/src/brad/daemon/daemon.py b/src/brad/daemon/daemon.py index 56045c7b..1c37d947 100644 --- a/src/brad/daemon/daemon.py +++ b/src/brad/daemon/daemon.py @@ -70,6 +70,7 @@ from brad.row_list import RowList from brad.utils.time_periods import period_start, universal_now from brad.ui.manager import UiManager +from brad.vdbe.manager import VdbeManager logger = logging.getLogger(__name__) @@ -128,17 +129,30 @@ def __init__( self._system_event_logger = SystemEventLogger.create_if_requested(self._config) self._watchdog = BlueprintWatchdog(self._system_event_logger) + load_vdbe_path = self._config.bootstrap_vdbe_path() + if load_vdbe_path is not None: + self._vdbe_manager: Optional[VdbeManager] = VdbeManager.load_from( + load_vdbe_path + ) + else: + self._vdbe_manager = None + # This is used to hold references to internal command tasks we create. # https://docs.python.org/3/library/asyncio-task.html#asyncio.create_task self._internal_command_tasks: Set[asyncio.Task] = set() self._startup_timestamp = universal_now() - if self._start_ui and UiManager.is_supported(): + if ( + self._start_ui + and UiManager.is_supported() + and self._vdbe_manager is not None + ): self._ui_mgr: Optional[UiManager] = UiManager.create( self._config, self._monitor, self._blueprint_mgr, + self._vdbe_manager, self._system_event_logger, ) else: @@ -148,6 +162,10 @@ def __init__( "Cannot start the BRAD UI because it is not supported. " "Please make sure you install BRAD with the [ui] option." ) + if self._vdbe_manager is None: + logger.warning( + "Cannot start the BRAD UI because a VDBE definition is not available." + ) async def run_forever(self) -> None: """ diff --git a/src/brad/ui/manager.py b/src/brad/ui/manager.py index c9ec9ade..dfafda31 100644 --- a/src/brad/ui/manager.py +++ b/src/brad/ui/manager.py @@ -5,6 +5,7 @@ from brad.blueprint.manager import BlueprintManager from brad.daemon.system_event_logger import SystemEventLogger from brad.planner.abstract import BlueprintPlanner +from brad.vdbe.manager import VdbeManager class UiManager: @@ -33,11 +34,14 @@ def create( config: ConfigFile, monitor: Monitor, blueprint_mgr: BlueprintManager, + vdbe_mgr: VdbeManager, system_event_logger: Optional[SystemEventLogger], ) -> "UiManager": from brad.ui.manager_impl import UiManagerImpl - return cls(UiManagerImpl(config, monitor, blueprint_mgr, system_event_logger)) + return cls( + UiManagerImpl(config, monitor, blueprint_mgr, vdbe_mgr, system_event_logger) + ) # We hide away the implementation details to allow external code to import # `UiManager` without worrying about import errors (e.g., because the diff --git a/src/brad/ui/manager_impl.py b/src/brad/ui/manager_impl.py index 760fce03..b3854a29 100644 --- a/src/brad/ui/manager_impl.py +++ b/src/brad/ui/manager_impl.py @@ -22,15 +22,13 @@ TimestampedMetrics, DisplayableBlueprint, SystemState, - DisplayableVirtualEngine, - VirtualInfrastructure, - DisplayableTable, Status, ClientState, SetClientState, ) from brad.daemon.front_end_metrics import FrontEndMetric from brad.daemon.system_event_logger import SystemEventLogger, SystemEventRecord +from brad.vdbe.manager import VdbeManager logger = logging.getLogger(__name__) @@ -41,11 +39,13 @@ def __init__( config: ConfigFile, monitor: Monitor, blueprint_mgr: BlueprintManager, + vdbe_mgr: VdbeManager, system_event_logger: Optional[SystemEventLogger], ) -> None: self.config = config self.monitor = monitor self.blueprint_mgr = blueprint_mgr + self.vdbe_mgr = vdbe_mgr self.system_event_logger = system_event_logger self.planner: Optional[BlueprintPlanner] = None @@ -109,7 +109,6 @@ def get_system_state(filter_tables_for_demo: bool = False) -> SystemState: # TODO: Hardcoded virtualized infrasturcture and writers. txn_tables = ["theatres", "showings", "ticket_orders", "movie_info", "aka_title"] - txn_only = ["theatres", "showings", "ticket_orders"] if filter_tables_for_demo: # To improve how the UI looks in a screenshot, we filter out some tables @@ -134,47 +133,7 @@ def get_system_state(filter_tables_for_demo: bool = False) -> SystemState: ) dbp = DisplayableBlueprint.from_blueprint(blueprint) - vdbe1 = DisplayableVirtualEngine( - name="VDBE 1", - freshness="No staleness (SI)", - dialect="PostgreSQL SQL", - peak_latency_s=0.030, - tables=[ - DisplayableTable(name=name, is_writer=True, mapped_to=["Aurora"]) - for name in [ - "theatres", - "showings", - "ticket_orders", - "movie_info", - "aka_title", - ] - ], - ) - vdbe1.tables.sort(key=lambda t: t.name) - vdbe2 = DisplayableVirtualEngine( - name="VDBE 2", - freshness="≤ 10 minutes stale (SI)", - dialect="PostgreSQL SQL", - peak_latency_s=30.0, - tables=[ - DisplayableTable( - name=table.name, - is_writer=False, - mapped_to=_analytics_table_mapper_temp(table.name, blueprint), - ) - for table in blueprint.tables() - if table.name not in txn_only - ], - ) - vdbe2.tables.sort(key=lambda t: t.name) - for engine in dbp.engines: - if engine.name != "Aurora": - continue - for t in engine.tables: - if t.name in txn_tables: - t.is_writer = True - virtual_infra = VirtualInfrastructure(engines=[vdbe1, vdbe2]) - + virtual_infra = manager.vdbe_mgr.infra() status = _determine_current_status(manager) if status is Status.Transitioning: next_blueprint = manager.blueprint_mgr.get_transition_metadata().next_blueprint @@ -188,7 +147,6 @@ def get_system_state(filter_tables_for_demo: bool = False) -> SystemState: blueprint=dbp, next_blueprint=next_dbp, ) - _add_reverse_mapping_temp(system_state) return system_state diff --git a/src/brad/ui/models.py b/src/brad/ui/models.py index 7389b663..276a7d2e 100644 --- a/src/brad/ui/models.py +++ b/src/brad/ui/models.py @@ -4,6 +4,7 @@ from brad.blueprint import Blueprint from brad.config.engine import Engine +from brad.vdbe.models import VirtualInfrastructure class TimestampedMetrics(BaseModel): @@ -84,18 +85,6 @@ def from_blueprint(cls, blueprint: Blueprint) -> "DisplayableBlueprint": return cls(engines=engines) -class DisplayableVirtualEngine(BaseModel): - name: str - freshness: str - dialect: str - peak_latency_s: Optional[float] = None - tables: List[DisplayableTable] = [] - - -class VirtualInfrastructure(BaseModel): - engines: List[DisplayableVirtualEngine] - - class Status(enum.Enum): Running = "running" Planning = "planning" diff --git a/src/brad/vdbe/__init__.py b/src/brad/vdbe/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/brad/vdbe/manager.py b/src/brad/vdbe/manager.py new file mode 100644 index 00000000..7e8cc692 --- /dev/null +++ b/src/brad/vdbe/manager.py @@ -0,0 +1,24 @@ +import pathlib +from typing import List +from brad.vdbe.models import VirtualInfrastructure, VirtualEngine + + +class VdbeManager: + """ + Used to manage the current VDBE state. + """ + + @classmethod + def load_from(cls, serialized_infra_json: pathlib.Path) -> "VdbeManager": + with open(serialized_infra_json, "r", encoding="utf-8") as f: + infra = VirtualInfrastructure.model_validate_json(f.read()) + return cls(infra) + + def __init__(self, infra: VirtualInfrastructure) -> None: + self._infra = infra + + def infra(self) -> VirtualInfrastructure: + return self._infra + + def engines(self) -> List[VirtualEngine]: + return self._infra.engines diff --git a/src/brad/vdbe/models.py b/src/brad/vdbe/models.py new file mode 100644 index 00000000..42f644da --- /dev/null +++ b/src/brad/vdbe/models.py @@ -0,0 +1,38 @@ +import enum +from typing import List +from pydantic import BaseModel + +from brad.config.engine import Engine + +# This is a simple implementation of a Virtual Database Engine (VDBE) metadata +# model meant for demonstration purposes only. + + +class VirtualTable(BaseModel): + name: str + writable: bool + + +class SchemaTable(BaseModel): + name: str + + +class QueryInterface(enum.Enum): + Common = "common" + PostgreSQL = "postgresql" + Athena = "athena" + + +class VirtualEngine(BaseModel): + name: str + max_staleness_ms: int + p90_latency_slo_ms: int + interface: QueryInterface + tables: List[VirtualTable] + mapped_to: Engine + + +class VirtualInfrastructure(BaseModel): + schema_name: str + engines: List[VirtualEngine] + tables: List[SchemaTable] diff --git a/tools/serialize_vdbes.py b/tools/serialize_vdbes.py new file mode 100644 index 00000000..363c1aa3 --- /dev/null +++ b/tools/serialize_vdbes.py @@ -0,0 +1,75 @@ +import argparse +import yaml +from typing import Any, Dict +from brad.vdbe.models import ( + VirtualInfrastructure, + VirtualEngine, + VirtualTable, + SchemaTable, + QueryInterface, +) +from brad.config.engine import Engine + + +# Define your virtual infrastructure here. +def to_serialize(schema: Dict[str, Any]) -> VirtualInfrastructure: + all_table_names = [tbl["table_name"] for tbl in schema["tables"]] + t_tables = [ + VirtualTable(name=name, writable=True) + for name in [ + "theatres", + "showings", + "ticket_orders", + "movie_info", + "aka_title", + ] + ] + a_tables = [VirtualTable(name=name, writable=False) for name in all_table_names] + t_engine = VirtualEngine( + name="VDBE (T)", + max_staleness_ms=0, + p90_latency_slo_ms=30, + interface=QueryInterface.PostgreSQL, + tables=t_tables, + mapped_to=Engine.Aurora, + ) + a_engine = VirtualEngine( + name="VDBE (A)", + max_staleness_ms=60 * 60 * 1000, # 1 hour + p90_latency_slo_ms=30 * 1000, + interface=QueryInterface.PostgreSQL, + tables=a_tables, + mapped_to=Engine.Redshift, + ) + return VirtualInfrastructure( + schema_name=schema["schema_name"], + engines=[t_engine, a_engine], + tables=[SchemaTable(name=name) for name in all_table_names], + ) + + +def main(): + parser = argparse.ArgumentParser( + description="Tool used to serialize VDBE defintions." + ) + parser.add_argument("--out-file", type=str, help="Output file path.", required=True) + parser.add_argument("--compact", action="store_true", help="Compact JSON output.") + parser.add_argument( + "--schema-file", type=str, required=True, help="Schema file path." + ) + args = parser.parse_args() + + with open(args.schema_file, "r", encoding="utf-8") as f: + schema = yaml.load(f, Loader=yaml.Loader) + + infra = to_serialize(schema) + indent = None if args.compact else 2 + out_str = infra.model_dump_json(indent=indent) + + with open(args.out_file, "w", encoding="utf-8") as f: + f.write(out_str) + f.write("\n") + + +if __name__ == "__main__": + main() diff --git a/ui/package.json b/ui/package.json index bd782bcb..d54c71cc 100644 --- a/ui/package.json +++ b/ui/package.json @@ -8,7 +8,7 @@ "build": "vite build", "lint": "eslint .", "preview": "vite preview", - "watch": "vite build --watch", + "watch": "vite build --mode development --watch", "format": "prettier --write .", "check": "prettier --check . ; eslint .", "fcheck": "prettier --check ." diff --git a/ui/src/App.jsx b/ui/src/App.jsx index 9b0e147a..4fa17b16 100644 --- a/ui/src/App.jsx +++ b/ui/src/App.jsx @@ -65,20 +65,20 @@ function App() { }); }; - // Fetch updated system state periodically. - useEffect(async () => { - const refreshData = async () => { - const newSystemState = await fetchSystemState( - /*filterTablesForDemo=*/ false, - ); - // TODO: Not the best way to check for equality. - if (JSON.stringify(systemState) !== JSON.stringify(newSystemState)) { - setSystemState(newSystemState); - } - }; + const refreshData = async () => { + const newSystemState = await fetchSystemState( + /*filterTablesForDemo=*/ false, + ); + // TODO: Not the best way to check for equality. + if (JSON.stringify(systemState) !== JSON.stringify(newSystemState)) { + setSystemState(newSystemState); + } + }; + // Fetch updated system state periodically. + useEffect(() => { // Run first fetch immediately. - await refreshData(); + refreshData(); const intervalId = setInterval(refreshData, REFRESH_INTERVAL_MS); return () => { if (intervalId === null) { diff --git a/ui/src/components/PerfView.jsx b/ui/src/components/PerfView.jsx index f4697792..62aa2d1d 100644 --- a/ui/src/components/PerfView.jsx +++ b/ui/src/components/PerfView.jsx @@ -78,25 +78,25 @@ function PerfView({ virtualInfra }) { return metricsManagerRef.current; } - useEffect(async () => { - const refreshData = async () => { - const rawMetrics = await fetchMetrics(60, /*useGenerated=*/ false); - const fetchedMetrics = parseMetrics(rawMetrics); - const metricsManager = getMetricsManager(); - const addedNewMetrics = metricsManager.mergeInMetrics(fetchedMetrics); - if (addedNewMetrics) { - setMetricsData({ + const refreshData = async () => { + const rawMetrics = await fetchMetrics(60, /*useGenerated=*/ false); + const fetchedMetrics = parseMetrics(rawMetrics); + const metricsManager = getMetricsManager(); + const addedNewMetrics = metricsManager.mergeInMetrics(fetchedMetrics); + if (addedNewMetrics) { + setMetricsData({ + windowSizeMinutes, + metrics: metricsManager.getMetricsInWindow( windowSizeMinutes, - metrics: metricsManager.getMetricsInWindow( - windowSizeMinutes, - /*extendForward=*/ true, - ), - }); - } - }; + /*extendForward=*/ true, + ), + }); + } + }; + useEffect(() => { // Run first fetch immediately. - await refreshData(); + refreshData(); const intervalId = setInterval(refreshData, REFRESH_INTERVAL_MS); return () => { if (intervalId === null) { diff --git a/ui/src/components/PhysDbView.jsx b/ui/src/components/PhysDbView.jsx index 509106a6..06eda4f8 100644 --- a/ui/src/components/PhysDbView.jsx +++ b/ui/src/components/PhysDbView.jsx @@ -47,11 +47,11 @@ function PhysDbView({ )}