-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add utilities to support the specialized scenario (#383)
* Add utility to add tables to an existing blueprint * Add the watchdog check * Check in the watchdog * Lint/type fixes * Fixes to the alter schema script * More fixes
- Loading branch information
Showing
9 changed files
with
717 additions
and
1 deletion.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,172 @@ | ||
import asyncio | ||
import logging | ||
|
||
from brad.asset_manager import AssetManager | ||
from brad.blueprint import Blueprint | ||
from brad.blueprint.user import UserProvidedBlueprint | ||
from brad.blueprint.sql_gen.table import TableSqlGenerator | ||
from brad.blueprint.manager import BlueprintManager | ||
from brad.config.engine import Engine | ||
from brad.config.file import ConfigFile | ||
from brad.front_end.engine_connections import EngineConnections | ||
from brad.planner.data import bootstrap_blueprint | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
def register_admin_action(subparser) -> None: | ||
parser = subparser.add_parser( | ||
"alter_schema", help="Alters an existing schema on BRAD." | ||
) | ||
parser.add_argument( | ||
"--config-file", | ||
type=str, | ||
required=True, | ||
help="Path to BRAD's configuration file.", | ||
) | ||
parser.add_argument( | ||
"--schema-name", | ||
type=str, | ||
required=True, | ||
help="The name of the schema.", | ||
) | ||
parser.add_argument( | ||
"--new-schema-file", | ||
type=str, | ||
help="Path to the database schema to bootstrap.", | ||
) | ||
parser.add_argument( | ||
"--skip-persisting-blueprint", | ||
action="store_true", | ||
help="Set this flag to avoid persisting the blueprint. " | ||
"Only meant to be used if you know what you are doing!", | ||
) | ||
parser.add_argument( | ||
"--engines", nargs="+", default=["aurora", "redshift", "athena"] | ||
) | ||
parser.add_argument("--take-action", action="store_true") | ||
parser.set_defaults(admin_action=alter_schema) | ||
|
||
|
||
async def alter_schema_impl(args): | ||
# 1. Load the config. | ||
config = ConfigFile.load(args.config_file) | ||
assets = AssetManager(config) | ||
blueprint_mgr = BlueprintManager(config, assets, args.schema_name) | ||
await blueprint_mgr.load() | ||
current_blueprint = blueprint_mgr.get_blueprint() | ||
|
||
# 2. Load and validate the user-provided schema. | ||
user = UserProvidedBlueprint.load_from_yaml_file(args.new_schema_file) | ||
user.validate() | ||
|
||
# 3. Get the bootstrapped blueprint. | ||
altered_blueprint = bootstrap_blueprint(user) | ||
|
||
# This alter schema is primitive for now (only to support experiments). It | ||
# only adds tables that are missing from the current blueprint. | ||
|
||
# 4. Connect to the engines. | ||
engines_filter = {Engine.from_str(engine_str) for engine_str in args.engines} | ||
cxns = EngineConnections.connect_sync( | ||
config, | ||
blueprint_mgr.get_directory(), | ||
schema_name=args.schema_name, | ||
autocommit=False, | ||
specific_engines=engines_filter, | ||
) | ||
|
||
# 5. Figure out which tables are new. These will be created. | ||
existing_tables = {table.name for table in current_blueprint.tables()} | ||
tables_to_create = { | ||
table.name | ||
for table in altered_blueprint.tables() | ||
if table.name not in existing_tables | ||
} | ||
|
||
logger.info("Will create the following tables: %s", str(tables_to_create)) | ||
if not args.take_action: | ||
logger.info("Set --take-action to make the schema changes.") | ||
return | ||
|
||
# 6. Install the required extensions. | ||
if Engine.Aurora in engines_filter: | ||
aurora = cxns.get_connection(Engine.Aurora) | ||
cursor = aurora.cursor_sync() | ||
cursor.execute_sync("CREATE EXTENSION IF NOT EXISTS vector") | ||
cursor.commit_sync() | ||
|
||
# 7. Set up the new tables. | ||
sql_gen = TableSqlGenerator(config, altered_blueprint) | ||
|
||
for table in altered_blueprint.tables(): | ||
if table.name not in tables_to_create: | ||
continue | ||
|
||
table_locations = altered_blueprint.get_table_locations(table.name) | ||
for location in table_locations: | ||
if location not in engines_filter: | ||
logger.info( | ||
"Skipping creating '%s' on %s because the engine was not " | ||
"specified using --engines.", | ||
table.name, | ||
location, | ||
) | ||
continue | ||
logger.info( | ||
"Creating table '%s' on %s...", | ||
table.name, | ||
location, | ||
) | ||
queries, db_type = sql_gen.generate_create_table_sql(table, location) | ||
conn = cxns.get_connection(db_type) | ||
cursor = conn.cursor_sync() | ||
for q in queries: | ||
logger.debug("Running on %s: %s", str(db_type), q) | ||
cursor.execute_sync(q) | ||
|
||
# 8. Update the extraction progress table. | ||
if Engine.Aurora in engines_filter: | ||
for table_name in tables_to_create: | ||
queries, db_type = sql_gen.generate_extraction_progress_init(table_name) | ||
conn = cxns.get_connection(db_type) | ||
cursor = conn.cursor_sync() | ||
for q in queries: | ||
logger.debug("Running on %s: %s", str(db_type), q) | ||
cursor.execute_sync(q) | ||
|
||
# 9. Commit the changes. | ||
# N.B. Athena does not support the notion of committing a transaction. | ||
if Engine.Aurora in engines_filter: | ||
cxns.get_connection(Engine.Aurora).cursor_sync().commit_sync() | ||
if Engine.Redshift in engines_filter: | ||
cxns.get_connection(Engine.Redshift).cursor_sync().commit_sync() | ||
|
||
# 10. Persist the data blueprint. | ||
if not args.skip_persisting_blueprint: | ||
merged_tables = current_blueprint.tables().copy() | ||
merged_table_locations = current_blueprint.table_locations().copy() | ||
|
||
# Append the new table metadata to the blueprint. | ||
for table_name in tables_to_create: | ||
merged_tables.append(altered_blueprint.get_table(table_name)) | ||
merged_table_locations[table_name] = altered_blueprint.get_table_locations( | ||
table_name | ||
) | ||
|
||
merged_blueprint = Blueprint( | ||
current_blueprint.schema_name(), | ||
merged_tables, | ||
merged_table_locations, | ||
current_blueprint.aurora_provisioning(), | ||
current_blueprint.redshift_provisioning(), | ||
current_blueprint.get_routing_policy(), | ||
) | ||
blueprint_mgr.force_new_blueprint_sync(merged_blueprint, score=None) | ||
|
||
logger.info("Done!") | ||
|
||
|
||
# This method is called by `brad.exec.admin.main`. | ||
def alter_schema(args): | ||
asyncio.run(alter_schema_impl(args)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
from typing import Optional | ||
|
||
from brad.config.engine import Engine | ||
from brad.blueprint import Blueprint | ||
from brad.config.system_event import SystemEvent | ||
from brad.daemon.system_event_logger import SystemEventLogger | ||
|
||
|
||
class BlueprintWatchdog: | ||
""" | ||
Used to prevent selecting blueprints that would cause issues. If the | ||
watchdog fires, we must redo the experiment (this is meant as a backstop). | ||
""" | ||
|
||
def __init__(self, event_logger: Optional[SystemEventLogger]) -> None: | ||
self._event_logger = event_logger | ||
|
||
def reject_blueprint(self, blueprint: Blueprint) -> bool: | ||
# Telemetry table should not go onto Aurora. | ||
try: | ||
telemetry_locations = blueprint.get_table_locations("telemetry") | ||
if Engine.Aurora in telemetry_locations: | ||
if self._event_logger is not None: | ||
self._event_logger.log( | ||
SystemEvent.WatchdogFired, | ||
f"telemetry_placed_on_aurora: {str(telemetry_locations)}", | ||
) | ||
return True | ||
except ValueError: | ||
# Indicates the table is not used in this schema - no problem. | ||
pass | ||
|
||
# Embedding table should not leave Aurora. | ||
try: | ||
embedding_locations = blueprint.get_table_locations("embeddings") | ||
if embedding_locations != [Engine.Aurora]: | ||
if self._event_logger is not None: | ||
self._event_logger.log( | ||
SystemEvent.WatchdogFired, | ||
f"embedding_left_aurora: {str(embedding_locations)}", | ||
) | ||
return True | ||
except ValueError: | ||
# Indicates the table is not used in this schema - no problem. | ||
pass | ||
|
||
# All ok. | ||
return False |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters