diff --git a/experiments/15-e2e-scenarios-v2/specialized/set_up_vector_blueprint.py b/experiments/15-e2e-scenarios-v2/specialized/set_up_vector_blueprint.py index cd79ecb3..53a834a5 100644 --- a/experiments/15-e2e-scenarios-v2/specialized/set_up_vector_blueprint.py +++ b/experiments/15-e2e-scenarios-v2/specialized/set_up_vector_blueprint.py @@ -133,8 +133,10 @@ def main(): new_placement[table.name] = [Engine.Aurora, Engine.Athena, Engine.Redshift] if table.name == "telemetry": new_placement[table.name] = [Engine.Athena] - if table.name == "embeddings" or table.name == "title": + if table.name == "title": new_placement[table.name] = [Engine.Aurora, Engine.Athena] + if table.name == "embeddings": + new_placement[table.name] = [Engine.Aurora] enum_blueprint.set_table_locations(new_placement) # 6. Transition to the new blueprint. diff --git a/src/brad/blueprint/sql_gen/table.py b/src/brad/blueprint/sql_gen/table.py index 5ae0ab6d..59b2d082 100644 --- a/src/brad/blueprint/sql_gen/table.py +++ b/src/brad/blueprint/sql_gen/table.py @@ -318,5 +318,12 @@ def _type_for(data_type: str, for_db: Engine) -> str: return "BIGINT" elif data_type_upper.startswith("VARCHAR") and for_db == Engine.Athena: return "STRING" + elif data_type_upper.startswith("VECTOR"): + if for_db == Engine.Athena: + return "BINARY" + elif for_db == Engine.Redshift: + return "VARBYTE" + else: + return data_type else: return data_type diff --git a/src/brad/planner/beam/fpqb.py b/src/brad/planner/beam/fpqb.py index d61c4718..8ac887c3 100644 --- a/src/brad/planner/beam/fpqb.py +++ b/src/brad/planner/beam/fpqb.py @@ -74,7 +74,11 @@ async def _run_replan_impl( # on at least one engine. This ensures that arbitrary unseen join # templates can always be immediately handled. all_tables = ", ".join( - [table.name for table in self._current_blueprint.tables()] + [ + table.name + for table in self._current_blueprint.tables() + if table.name != "embeddings" + ] ) next_workload.add_priming_analytical_query( f"SELECT 1 FROM {all_tables} LIMIT 1" diff --git a/src/brad/planner/beam/query_based.py b/src/brad/planner/beam/query_based.py index 7f6c8e95..eebfa834 100644 --- a/src/brad/planner/beam/query_based.py +++ b/src/brad/planner/beam/query_based.py @@ -75,7 +75,11 @@ async def _run_replan_impl( # on at least one engine. This ensures that arbitrary unseen join # templates can always be immediately handled. all_tables = ", ".join( - [table.name for table in self._current_blueprint.tables()] + [ + table.name + for table in self._current_blueprint.tables() + if table.name != "embeddings" + ] ) next_workload.add_priming_analytical_query( f"SELECT 1 FROM {all_tables} LIMIT 1" diff --git a/src/brad/planner/beam/table_based.py b/src/brad/planner/beam/table_based.py index 36cc6138..2040598f 100644 --- a/src/brad/planner/beam/table_based.py +++ b/src/brad/planner/beam/table_based.py @@ -75,7 +75,11 @@ async def _run_replan_impl( # on at least one engine. This ensures that arbitrary unseen join # templates can always be immediately handled. all_tables = ", ".join( - [table.name for table in self._current_blueprint.tables()] + [ + table.name + for table in self._current_blueprint.tables() + if table.name != "embeddings" + ] ) next_workload.add_priming_analytical_query( f"SELECT 1 FROM {all_tables} LIMIT 1"