Skip to content

Commit

Permalink
Add hot configuration singleton to simplify changing configs during a…
Browse files Browse the repository at this point in the history
…n experiment (#498)

Part of #487.
  • Loading branch information
geoffxy authored Apr 24, 2024
1 parent d751324 commit b0acff8
Show file tree
Hide file tree
Showing 5 changed files with 61 additions and 7 deletions.
4 changes: 3 additions & 1 deletion src/brad/connection/sqlite_connection.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@ def connect(cls, db_path: str, autocommit: bool) -> Connection:
@classmethod
def connect_sync(cls, db_path: str, autocommit: bool) -> Connection:
# Note in Python 3.12, the `autocommit` parameter becomes available.
conn = sqlite3.connect(db_path, isolation_level=None if autocommit else "")
conn = sqlite3.connect(
db_path, isolation_level=None if autocommit else "DEFERRED"
)
return cls(conn)

def __init__(self, connection_impl: sqlite3.Connection) -> None:
Expand Down
13 changes: 11 additions & 2 deletions src/brad/daemon/daemon.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from brad.config.system_event import SystemEvent
from brad.config.temp_config import TempConfig
from brad.connection.factory import ConnectionFactory
from brad.daemon.hot_config import HotConfig
from brad.daemon.messages import (
ShutdownFrontEnd,
Sentinel,
Expand Down Expand Up @@ -234,10 +235,14 @@ async def _run_setup(self) -> None:
athena_accessed_bytes_path=self._temp_config.athena_data_access_path(),
)

query_lat_p90 = self._temp_config.query_latency_p90_ceiling_s()
txn_lat_p90 = self._temp_config.txn_latency_p90_ceiling_s()
comparator_provider = self._get_comparator_provider(
self._temp_config.query_latency_p90_ceiling_s(),
self._temp_config.txn_latency_p90_ceiling_s(),
query_lat_p90, txn_lat_p90
)
hot_config = HotConfig.instance()
hot_config.set_value("query_lat_p90", query_lat_p90)
hot_config.set_value("txn_lat_p90", txn_lat_p90)

else:
logger.warning(
Expand Down Expand Up @@ -757,6 +762,10 @@ async def _handle_internal_command(self, command: str) -> RowList:
elif isinstance(t, TransactionLatencyCeiling):
t.set_latency_ceiling(txn_p90_s)

hot_config = HotConfig.instance()
hot_config.set_value("query_lat_p90", query_p90_s)
hot_config.set_value("txn_lat_p90", txn_p90_s)

if self._system_event_logger is not None:
self._system_event_logger.log(
SystemEvent.ChangedSlos,
Expand Down
36 changes: 36 additions & 0 deletions src/brad/daemon/hot_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
from typing import Any, Dict, Optional


class HotConfig:
"""
This class is used for any configurations that are meant to be modified
while the daemon is running (hence "hot"). This is meant to be a singleton.
This is meant to be used to work around invasive changes to the codebase for
exploratory experiments. Longer-term features should not be implemented
using this class.
"""

@classmethod
def instance(cls) -> "HotConfig":
global _INSTANCE # pylint: disable=global-statement
if _INSTANCE is None:
_INSTANCE = cls()
return _INSTANCE

def __init__(self) -> None:
self._config: Dict[str, Any] = {}

def set_value(self, key: str, value: Any) -> None:
self._config[key] = value

def get_value(self, key: str, default: Optional[Any] = None) -> Any:
try:
return self._config[key]
except KeyError:
if default is not None:
return default
raise


_INSTANCE: Optional[HotConfig] = None
9 changes: 6 additions & 3 deletions src/brad/planner/scoring/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,10 @@
from typing import Dict, List, Optional, Tuple
from datetime import timedelta

from brad.config.engine import Engine
from brad.blueprint import Blueprint
from brad.config.engine import Engine
from brad.config.planner import PlannerConfig
from brad.daemon.hot_config import HotConfig
from brad.planner.enumeration.provisioning import ProvisioningEnumerator
from brad.planner.metrics import Metrics
from brad.planner.workload import Workload
Expand Down Expand Up @@ -267,6 +268,8 @@ def correct_predictions_based_on_observations(self) -> None:
logger.info("No queries in the workload.")
return

query_lat_p90 = HotConfig.instance().get_value("query_lat_p90", default=30.0)

# Process Redshift.
is_redshift = np.where(obs_locs == Workload.EngineLatencyIndex[Engine.Redshift])
if is_redshift[0].sum() > 0:
Expand All @@ -279,7 +282,7 @@ def correct_predictions_based_on_observations(self) -> None:
ratio = redshift_preds / base
# Queries where we have observations where the predictions are probably
# 5x larger and the predictions violate the SLOs.
hes = np.where((ratio > 3.0) & (redshift_preds > 30.0))
hes = np.where((ratio > 3.0) & (redshift_preds > query_lat_p90))
redshift_to_replace = redshift_qidx[hes]
logger.info(
"[Redshift Prediction Corrections] Replacing %d base predictions.",
Expand Down Expand Up @@ -311,7 +314,7 @@ def correct_predictions_based_on_observations(self) -> None:
aurora_obs, self.current_blueprint.aurora_provisioning(), self
)
aurora_ratio = aurora_preds / aurora_base
ahes = np.where((aurora_ratio > 3.0) & (aurora_preds > 30.0))
ahes = np.where((aurora_ratio > 3.0) & (aurora_preds > query_lat_p90))
aurora_to_replace = aurora_qidx[ahes]
logger.info(
"[Aurora Prediction Corrections] Replacing %d base predictions.",
Expand Down
6 changes: 5 additions & 1 deletion src/brad/planner/scoring/performance/unified_redshift.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from typing import Dict, TYPE_CHECKING, Optional, Iterator, List, Tuple, Any

from brad.config.engine import Engine
from brad.daemon.hot_config import HotConfig
from brad.blueprint.provisioning import Provisioning
from brad.planner.scoring.provisioning import redshift_num_cpus
from brad.planner.scoring.performance.queuing import predict_mm1_wait_time
Expand Down Expand Up @@ -57,8 +58,11 @@ def compute(
# This won't be used. This is actually max.
avg_cpu = ctx.metrics.redshift_cpu_avg

gamma_norm_factor = HotConfig.instance().get_value(
"query_lat_p90", default=30.0
)
gamma = (
min(ctx.metrics.query_lat_s_p90 / 30.0 + 0.35, 1.0)
min(ctx.metrics.query_lat_s_p90 / gamma_norm_factor + 0.35, 1.0)
if avg_cpu >= 90.0
else 1.0
)
Expand Down

0 comments on commit b0acff8

Please sign in to comment.