From 1a8ca927b00f5b7172f34352ea964e6c8850c891 Mon Sep 17 00:00:00 2001 From: Geoffrey Yu Date: Thu, 12 Sep 2024 10:50:25 -0400 Subject: [PATCH 1/6] First pass at updated VDBE/blueprint definitions --- proto/interface.proto | 13 +++++++++++++ proto/interface/blueprint.proto | 34 +++++++++++++++++++++++++++++++++ proto/interface/schema.proto | 23 ++++++++++++++++++++++ proto/interface/vdbe.proto | 27 ++++++++++++++++++++++++++ 4 files changed, 97 insertions(+) create mode 100644 proto/interface.proto create mode 100644 proto/interface/blueprint.proto create mode 100644 proto/interface/schema.proto create mode 100644 proto/interface/vdbe.proto diff --git a/proto/interface.proto b/proto/interface.proto new file mode 100644 index 00000000..67874ff7 --- /dev/null +++ b/proto/interface.proto @@ -0,0 +1,13 @@ +syntax = "proto3"; + +package vdbe; + +import "interface/blueprint.proto"; +import "interface/schema.proto"; +import "interface/vdbe.proto"; + +message SystemState { + repeated Table tables = 1; + repeated VirtualEngine vdbes = 2; + Blueprint blueprint = 3; +} diff --git a/proto/interface/blueprint.proto b/proto/interface/blueprint.proto new file mode 100644 index 00000000..8a7a1012 --- /dev/null +++ b/proto/interface/blueprint.proto @@ -0,0 +1,34 @@ +syntax = "proto3"; + +package vdbe; + +message Blueprint { + Provisioning aurora = 1; + Provisioning redshift = 2; + RoutingPolicy policy = 3; +} + +message RoutingPolicy { + // A serialized form of the routing policy. + bytes policy = 1; +} + +message PhysicalSnapshot { + // The VDBEs that are mapped to this physical snapshot. + repeated string vdbes = 1; + // The tables in this physical snapshot. + repeated string tables = 2; + // Where the data in this physical snapshot resides. + Engine location = 3; +} + +// Used to indicate the location of data. +enum Engine { + UNKNOWN = 0; + // The data is stored in Aurora. + AURORA = 1; + // The data is stored in Redshift. + REDSHIFT = 2; + // The data is stored on S3 in an Iceberg table and registered with Athena. + ATHENA = 3; +} diff --git a/proto/interface/schema.proto b/proto/interface/schema.proto new file mode 100644 index 00000000..73498308 --- /dev/null +++ b/proto/interface/schema.proto @@ -0,0 +1,23 @@ +// This file contains definitions for table schema information. + +syntax = "proto3"; + +package vdbe; + +message Table { + string name = 1; + repeated TableColumn columns = 2; +} + +message TableColumn { + string name = 1; + DataType type = 2; + bool nullable = 3; +} + +enum DataType { + UNKNOWN = 0; + INT_32 = 1; + INT_64 = 2; + STRING = 3; +} diff --git a/proto/interface/vdbe.proto b/proto/interface/vdbe.proto new file mode 100644 index 00000000..177ded69 --- /dev/null +++ b/proto/interface/vdbe.proto @@ -0,0 +1,27 @@ +syntax = "proto3"; + +message VirtualEngine { + string name = 1; + QueryInterface qi = 2; + repeated VirtualTable tables = 3; + uint64 max_staleness_ms = 4; + // TODO: Meaningful representation for performance properties. +} + +message VirtualTable { + // Name must reference a table that has a schema definition. + string name = 1; + bool writable = 2; +} + +enum QueryInterface { + UNKNOWN = 0; + + // 1xx - Open source SQL DBMSes. + SQL_POSTGRESQL = 101; + SQL_MYSQL = 102; + + // 2xx - AWS SQL DBMSes. + SQL_AWS_REDSHIFT = 201; + SQL_AWS_ATHENA = 202; +} From b8200fb83aa2df221e84933538b55a979461b772 Mon Sep 17 00:00:00 2001 From: Geoffrey Yu Date: Thu, 12 Sep 2024 14:00:53 -0400 Subject: [PATCH 2/6] Protobuf definition fixes, update generate script --- proto/interface/blueprint.proto | 13 +++++++++---- proto/interface/vdbe.proto | 13 ++++++++----- tools/generate_proto.sh | 14 ++++++++++++++ 3 files changed, 31 insertions(+), 9 deletions(-) diff --git a/proto/interface/blueprint.proto b/proto/interface/blueprint.proto index 8a7a1012..c06024bf 100644 --- a/proto/interface/blueprint.proto +++ b/proto/interface/blueprint.proto @@ -24,11 +24,16 @@ message PhysicalSnapshot { // Used to indicate the location of data. enum Engine { - UNKNOWN = 0; + ENGINE_UNKNOWN = 0; // The data is stored in Aurora. - AURORA = 1; + ENGINE_AURORA = 1; // The data is stored in Redshift. - REDSHIFT = 2; + ENGINE_REDSHIFT = 2; // The data is stored on S3 in an Iceberg table and registered with Athena. - ATHENA = 3; + ENGINE_ATHENA = 3; +} + +message Provisioning { + string instance_type = 1; + uint32 num_nodes = 2; } diff --git a/proto/interface/vdbe.proto b/proto/interface/vdbe.proto index 177ded69..f286b29c 100644 --- a/proto/interface/vdbe.proto +++ b/proto/interface/vdbe.proto @@ -1,11 +1,14 @@ syntax = "proto3"; +package vdbe; + message VirtualEngine { string name = 1; QueryInterface qi = 2; repeated VirtualTable tables = 3; uint64 max_staleness_ms = 4; // TODO: Meaningful representation for performance properties. + // TODO: Flag set for feature support. } message VirtualTable { @@ -15,13 +18,13 @@ message VirtualTable { } enum QueryInterface { - UNKNOWN = 0; + QI_UNKNOWN = 0; // 1xx - Open source SQL DBMSes. - SQL_POSTGRESQL = 101; - SQL_MYSQL = 102; + QI_SQL_POSTGRESQL = 101; + QI_SQL_MYSQL = 102; // 2xx - AWS SQL DBMSes. - SQL_AWS_REDSHIFT = 201; - SQL_AWS_ATHENA = 202; + QI_SQL_AWS_REDSHIFT = 201; + QI_SQL_AWS_ATHENA = 202; } diff --git a/tools/generate_proto.sh b/tools/generate_proto.sh index 0c32bc86..13211666 100755 --- a/tools/generate_proto.sh +++ b/tools/generate_proto.sh @@ -4,6 +4,20 @@ SCRIPT_PATH=$(cd $(dirname $0) && pwd -P) cd $SCRIPT_PATH source shared.sh +# NOTE: You need to manually add `# type: ignore` to the .pyi stubs, since the +# generated code causes an error in mypy's type check. +python3 -m grpc_tools.protoc \ + -I../proto \ + --python_out=../src/brad/proto_gen \ + --pyi_out=../src/brad/proto_gen \ + ../proto/interface.proto \ + ../proto/interface/blueprint.proto \ + ../proto/interface/schema.proto \ + ../proto/interface/vdbe.proto + +# Fix the import path. +sed -i -e "s/from interface import/from brad.proto_gen.interface import/g" ../src/brad/proto_gen/interface_pb2.py + python3 -m grpc_tools.protoc \ -I../proto \ --python_out=../src/brad/proto_gen \ From 3a705275670769707bdb63e350323a18ab82c8fc Mon Sep 17 00:00:00 2001 From: Geoffrey Yu Date: Thu, 12 Sep 2024 14:24:14 -0400 Subject: [PATCH 3/6] Update protobuf definitions --- proto/interface.proto | 13 +++-- proto/interface/blueprint.proto | 9 ++-- proto/interface/schema.proto | 11 ++-- proto/interface/vdbe.proto | 3 ++ src/brad/proto_gen/interface/blueprint_pb2.py | 34 ++++++++++++ .../proto_gen/interface/blueprint_pb2.pyi | 52 +++++++++++++++++++ src/brad/proto_gen/interface/schema_pb2.py | 30 +++++++++++ src/brad/proto_gen/interface/schema_pb2.pyi | 36 +++++++++++++ src/brad/proto_gen/interface/vdbe_pb2.py | 30 +++++++++++ src/brad/proto_gen/interface/vdbe_pb2.pyi | 40 ++++++++++++++ src/brad/proto_gen/interface_pb2.py | 29 +++++++++++ src/brad/proto_gen/interface_pb2.pyi | 21 ++++++++ 12 files changed, 298 insertions(+), 10 deletions(-) create mode 100644 src/brad/proto_gen/interface/blueprint_pb2.py create mode 100644 src/brad/proto_gen/interface/blueprint_pb2.pyi create mode 100644 src/brad/proto_gen/interface/schema_pb2.py create mode 100644 src/brad/proto_gen/interface/schema_pb2.pyi create mode 100644 src/brad/proto_gen/interface/vdbe_pb2.py create mode 100644 src/brad/proto_gen/interface/vdbe_pb2.pyi create mode 100644 src/brad/proto_gen/interface_pb2.py create mode 100644 src/brad/proto_gen/interface_pb2.pyi diff --git a/proto/interface.proto b/proto/interface.proto index 67874ff7..32705505 100644 --- a/proto/interface.proto +++ b/proto/interface.proto @@ -1,3 +1,7 @@ +// This file is the entrypoint for the virtual database engine (VDBE) interface +// definitions. We have split the definitions across multiple files to make them +// managable to read. + syntax = "proto3"; package vdbe; @@ -6,8 +10,11 @@ import "interface/blueprint.proto"; import "interface/schema.proto"; import "interface/vdbe.proto"; +// All the information needed to describe a data infrastructure deployment. message SystemState { - repeated Table tables = 1; - repeated VirtualEngine vdbes = 2; - Blueprint blueprint = 3; + // Used to uniquely identify the system state. + string schema_name = 1; + repeated Table tables = 2; + repeated VirtualEngine vdbes = 3; + Blueprint blueprint = 4; } diff --git a/proto/interface/blueprint.proto b/proto/interface/blueprint.proto index c06024bf..3b2af1a7 100644 --- a/proto/interface/blueprint.proto +++ b/proto/interface/blueprint.proto @@ -1,11 +1,14 @@ +// This file contains definitions for the blueprint (physical realization of a +// set of VDBEs). + syntax = "proto3"; package vdbe; message Blueprint { - Provisioning aurora = 1; - Provisioning redshift = 2; - RoutingPolicy policy = 3; + Provisioning aurora = 101; + Provisioning redshift = 102; + RoutingPolicy policy = 201; } message RoutingPolicy { diff --git a/proto/interface/schema.proto b/proto/interface/schema.proto index 73498308..776d2ab9 100644 --- a/proto/interface/schema.proto +++ b/proto/interface/schema.proto @@ -4,20 +4,23 @@ syntax = "proto3"; package vdbe; +// Represents a relational table. message Table { string name = 1; repeated TableColumn columns = 2; } +// Represents a column in a relational table. message TableColumn { string name = 1; DataType type = 2; bool nullable = 3; } +// The data types we currently support. enum DataType { - UNKNOWN = 0; - INT_32 = 1; - INT_64 = 2; - STRING = 3; + DT_UNKNOWN = 0; + DT_INT_32 = 1; + DT_INT_64 = 2; + DT_STRING = 3; } diff --git a/proto/interface/vdbe.proto b/proto/interface/vdbe.proto index f286b29c..eb6b4569 100644 --- a/proto/interface/vdbe.proto +++ b/proto/interface/vdbe.proto @@ -1,3 +1,5 @@ +// This file contains definitions for virtual database engines (VDBEs). + syntax = "proto3"; package vdbe; @@ -17,6 +19,7 @@ message VirtualTable { bool writable = 2; } +// These are the query interfaces we currently support. enum QueryInterface { QI_UNKNOWN = 0; diff --git a/src/brad/proto_gen/interface/blueprint_pb2.py b/src/brad/proto_gen/interface/blueprint_pb2.py new file mode 100644 index 00000000..1b185070 --- /dev/null +++ b/src/brad/proto_gen/interface/blueprint_pb2.py @@ -0,0 +1,34 @@ +# -*- coding: utf-8 -*- +# Generated by the protocol buffer compiler. DO NOT EDIT! +# source: interface/blueprint.proto +"""Generated protocol buffer code.""" +from google.protobuf import descriptor as _descriptor +from google.protobuf import descriptor_pool as _descriptor_pool +from google.protobuf import symbol_database as _symbol_database +from google.protobuf.internal import builder as _builder +# @@protoc_insertion_point(imports) + +_sym_db = _symbol_database.Default() + + + + +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x19interface/blueprint.proto\x12\x04vdbe\"{\n\tBlueprint\x12\"\n\x06\x61urora\x18\x65 \x01(\x0b\x32\x12.vdbe.Provisioning\x12$\n\x08redshift\x18\x66 \x01(\x0b\x32\x12.vdbe.Provisioning\x12$\n\x06policy\x18\xc9\x01 \x01(\x0b\x32\x13.vdbe.RoutingPolicy\"\x1f\n\rRoutingPolicy\x12\x0e\n\x06policy\x18\x01 \x01(\x0c\"Q\n\x10PhysicalSnapshot\x12\r\n\x05vdbes\x18\x01 \x03(\t\x12\x0e\n\x06tables\x18\x02 \x03(\t\x12\x1e\n\x08location\x18\x03 \x01(\x0e\x32\x0c.vdbe.Engine\"8\n\x0cProvisioning\x12\x15\n\rinstance_type\x18\x01 \x01(\t\x12\x11\n\tnum_nodes\x18\x02 \x01(\r*W\n\x06\x45ngine\x12\x12\n\x0e\x45NGINE_UNKNOWN\x10\x00\x12\x11\n\rENGINE_AURORA\x10\x01\x12\x13\n\x0f\x45NGINE_REDSHIFT\x10\x02\x12\x11\n\rENGINE_ATHENA\x10\x03\x62\x06proto3') + +_globals = globals() +_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) +_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'interface.blueprint_pb2', _globals) +if _descriptor._USE_C_DESCRIPTORS == False: + + DESCRIPTOR._options = None + _globals['_ENGINE']._serialized_start=334 + _globals['_ENGINE']._serialized_end=421 + _globals['_BLUEPRINT']._serialized_start=35 + _globals['_BLUEPRINT']._serialized_end=158 + _globals['_ROUTINGPOLICY']._serialized_start=160 + _globals['_ROUTINGPOLICY']._serialized_end=191 + _globals['_PHYSICALSNAPSHOT']._serialized_start=193 + _globals['_PHYSICALSNAPSHOT']._serialized_end=274 + _globals['_PROVISIONING']._serialized_start=276 + _globals['_PROVISIONING']._serialized_end=332 +# @@protoc_insertion_point(module_scope) diff --git a/src/brad/proto_gen/interface/blueprint_pb2.pyi b/src/brad/proto_gen/interface/blueprint_pb2.pyi new file mode 100644 index 00000000..8f66b2ec --- /dev/null +++ b/src/brad/proto_gen/interface/blueprint_pb2.pyi @@ -0,0 +1,52 @@ +from google.protobuf.internal import containers as _containers +from google.protobuf.internal import enum_type_wrapper as _enum_type_wrapper +from google.protobuf import descriptor as _descriptor +from google.protobuf import message as _message +from typing import ClassVar as _ClassVar, Iterable as _Iterable, Mapping as _Mapping, Optional as _Optional, Union as _Union + +DESCRIPTOR: _descriptor.FileDescriptor + +class Engine(int, metaclass=_enum_type_wrapper.EnumTypeWrapper): + __slots__ = [] # type: ignore + ENGINE_UNKNOWN: _ClassVar[Engine] + ENGINE_AURORA: _ClassVar[Engine] + ENGINE_REDSHIFT: _ClassVar[Engine] + ENGINE_ATHENA: _ClassVar[Engine] +ENGINE_UNKNOWN: Engine +ENGINE_AURORA: Engine +ENGINE_REDSHIFT: Engine +ENGINE_ATHENA: Engine + +class Blueprint(_message.Message): + __slots__ = ["aurora", "redshift", "policy"] + AURORA_FIELD_NUMBER: _ClassVar[int] + REDSHIFT_FIELD_NUMBER: _ClassVar[int] + POLICY_FIELD_NUMBER: _ClassVar[int] + aurora: Provisioning + redshift: Provisioning + policy: RoutingPolicy + def __init__(self, aurora: _Optional[_Union[Provisioning, _Mapping]] = ..., redshift: _Optional[_Union[Provisioning, _Mapping]] = ..., policy: _Optional[_Union[RoutingPolicy, _Mapping]] = ...) -> None: ... + +class RoutingPolicy(_message.Message): + __slots__ = ["policy"] + POLICY_FIELD_NUMBER: _ClassVar[int] + policy: bytes + def __init__(self, policy: _Optional[bytes] = ...) -> None: ... + +class PhysicalSnapshot(_message.Message): + __slots__ = ["vdbes", "tables", "location"] + VDBES_FIELD_NUMBER: _ClassVar[int] + TABLES_FIELD_NUMBER: _ClassVar[int] + LOCATION_FIELD_NUMBER: _ClassVar[int] + vdbes: _containers.RepeatedScalarFieldContainer[str] + tables: _containers.RepeatedScalarFieldContainer[str] + location: Engine + def __init__(self, vdbes: _Optional[_Iterable[str]] = ..., tables: _Optional[_Iterable[str]] = ..., location: _Optional[_Union[Engine, str]] = ...) -> None: ... + +class Provisioning(_message.Message): + __slots__ = ["instance_type", "num_nodes"] + INSTANCE_TYPE_FIELD_NUMBER: _ClassVar[int] + NUM_NODES_FIELD_NUMBER: _ClassVar[int] + instance_type: str + num_nodes: int + def __init__(self, instance_type: _Optional[str] = ..., num_nodes: _Optional[int] = ...) -> None: ... diff --git a/src/brad/proto_gen/interface/schema_pb2.py b/src/brad/proto_gen/interface/schema_pb2.py new file mode 100644 index 00000000..3fe5a7ae --- /dev/null +++ b/src/brad/proto_gen/interface/schema_pb2.py @@ -0,0 +1,30 @@ +# -*- coding: utf-8 -*- +# Generated by the protocol buffer compiler. DO NOT EDIT! +# source: interface/schema.proto +"""Generated protocol buffer code.""" +from google.protobuf import descriptor as _descriptor +from google.protobuf import descriptor_pool as _descriptor_pool +from google.protobuf import symbol_database as _symbol_database +from google.protobuf.internal import builder as _builder +# @@protoc_insertion_point(imports) + +_sym_db = _symbol_database.Default() + + + + +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x16interface/schema.proto\x12\x04vdbe\"9\n\x05Table\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\"\n\x07\x63olumns\x18\x02 \x03(\x0b\x32\x11.vdbe.TableColumn\"K\n\x0bTableColumn\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x1c\n\x04type\x18\x02 \x01(\x0e\x32\x0e.vdbe.DataType\x12\x10\n\x08nullable\x18\x03 \x01(\x08*G\n\x08\x44\x61taType\x12\x0e\n\nDT_UNKNOWN\x10\x00\x12\r\n\tDT_INT_32\x10\x01\x12\r\n\tDT_INT_64\x10\x02\x12\r\n\tDT_STRING\x10\x03\x62\x06proto3') + +_globals = globals() +_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) +_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'interface.schema_pb2', _globals) +if _descriptor._USE_C_DESCRIPTORS == False: + + DESCRIPTOR._options = None + _globals['_DATATYPE']._serialized_start=168 + _globals['_DATATYPE']._serialized_end=239 + _globals['_TABLE']._serialized_start=32 + _globals['_TABLE']._serialized_end=89 + _globals['_TABLECOLUMN']._serialized_start=91 + _globals['_TABLECOLUMN']._serialized_end=166 +# @@protoc_insertion_point(module_scope) diff --git a/src/brad/proto_gen/interface/schema_pb2.pyi b/src/brad/proto_gen/interface/schema_pb2.pyi new file mode 100644 index 00000000..c125c7d3 --- /dev/null +++ b/src/brad/proto_gen/interface/schema_pb2.pyi @@ -0,0 +1,36 @@ +from google.protobuf.internal import containers as _containers +from google.protobuf.internal import enum_type_wrapper as _enum_type_wrapper +from google.protobuf import descriptor as _descriptor +from google.protobuf import message as _message +from typing import ClassVar as _ClassVar, Iterable as _Iterable, Mapping as _Mapping, Optional as _Optional, Union as _Union + +DESCRIPTOR: _descriptor.FileDescriptor + +class DataType(int, metaclass=_enum_type_wrapper.EnumTypeWrapper): + __slots__ = [] # type: ignore + DT_UNKNOWN: _ClassVar[DataType] + DT_INT_32: _ClassVar[DataType] + DT_INT_64: _ClassVar[DataType] + DT_STRING: _ClassVar[DataType] +DT_UNKNOWN: DataType +DT_INT_32: DataType +DT_INT_64: DataType +DT_STRING: DataType + +class Table(_message.Message): + __slots__ = ["name", "columns"] + NAME_FIELD_NUMBER: _ClassVar[int] + COLUMNS_FIELD_NUMBER: _ClassVar[int] + name: str + columns: _containers.RepeatedCompositeFieldContainer[TableColumn] + def __init__(self, name: _Optional[str] = ..., columns: _Optional[_Iterable[_Union[TableColumn, _Mapping]]] = ...) -> None: ... + +class TableColumn(_message.Message): + __slots__ = ["name", "type", "nullable"] + NAME_FIELD_NUMBER: _ClassVar[int] + TYPE_FIELD_NUMBER: _ClassVar[int] + NULLABLE_FIELD_NUMBER: _ClassVar[int] + name: str + type: DataType + nullable: bool + def __init__(self, name: _Optional[str] = ..., type: _Optional[_Union[DataType, str]] = ..., nullable: bool = ...) -> None: ... diff --git a/src/brad/proto_gen/interface/vdbe_pb2.py b/src/brad/proto_gen/interface/vdbe_pb2.py new file mode 100644 index 00000000..173c2976 --- /dev/null +++ b/src/brad/proto_gen/interface/vdbe_pb2.py @@ -0,0 +1,30 @@ +# -*- coding: utf-8 -*- +# Generated by the protocol buffer compiler. DO NOT EDIT! +# source: interface/vdbe.proto +"""Generated protocol buffer code.""" +from google.protobuf import descriptor as _descriptor +from google.protobuf import descriptor_pool as _descriptor_pool +from google.protobuf import symbol_database as _symbol_database +from google.protobuf.internal import builder as _builder +# @@protoc_insertion_point(imports) + +_sym_db = _symbol_database.Default() + + + + +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x14interface/vdbe.proto\x12\x04vdbe\"}\n\rVirtualEngine\x12\x0c\n\x04name\x18\x01 \x01(\t\x12 \n\x02qi\x18\x02 \x01(\x0e\x32\x14.vdbe.QueryInterface\x12\"\n\x06tables\x18\x03 \x03(\x0b\x32\x12.vdbe.VirtualTable\x12\x18\n\x10max_staleness_ms\x18\x04 \x01(\x04\".\n\x0cVirtualTable\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x10\n\x08writable\x18\x02 \x01(\x08*{\n\x0eQueryInterface\x12\x0e\n\nQI_UNKNOWN\x10\x00\x12\x15\n\x11QI_SQL_POSTGRESQL\x10\x65\x12\x10\n\x0cQI_SQL_MYSQL\x10\x66\x12\x18\n\x13QI_SQL_AWS_REDSHIFT\x10\xc9\x01\x12\x16\n\x11QI_SQL_AWS_ATHENA\x10\xca\x01\x62\x06proto3') + +_globals = globals() +_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) +_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'interface.vdbe_pb2', _globals) +if _descriptor._USE_C_DESCRIPTORS == False: + + DESCRIPTOR._options = None + _globals['_QUERYINTERFACE']._serialized_start=205 + _globals['_QUERYINTERFACE']._serialized_end=328 + _globals['_VIRTUALENGINE']._serialized_start=30 + _globals['_VIRTUALENGINE']._serialized_end=155 + _globals['_VIRTUALTABLE']._serialized_start=157 + _globals['_VIRTUALTABLE']._serialized_end=203 +# @@protoc_insertion_point(module_scope) diff --git a/src/brad/proto_gen/interface/vdbe_pb2.pyi b/src/brad/proto_gen/interface/vdbe_pb2.pyi new file mode 100644 index 00000000..01107ed6 --- /dev/null +++ b/src/brad/proto_gen/interface/vdbe_pb2.pyi @@ -0,0 +1,40 @@ +from google.protobuf.internal import containers as _containers +from google.protobuf.internal import enum_type_wrapper as _enum_type_wrapper +from google.protobuf import descriptor as _descriptor +from google.protobuf import message as _message +from typing import ClassVar as _ClassVar, Iterable as _Iterable, Mapping as _Mapping, Optional as _Optional, Union as _Union + +DESCRIPTOR: _descriptor.FileDescriptor + +class QueryInterface(int, metaclass=_enum_type_wrapper.EnumTypeWrapper): + __slots__ = [] # type: ignore + QI_UNKNOWN: _ClassVar[QueryInterface] + QI_SQL_POSTGRESQL: _ClassVar[QueryInterface] + QI_SQL_MYSQL: _ClassVar[QueryInterface] + QI_SQL_AWS_REDSHIFT: _ClassVar[QueryInterface] + QI_SQL_AWS_ATHENA: _ClassVar[QueryInterface] +QI_UNKNOWN: QueryInterface +QI_SQL_POSTGRESQL: QueryInterface +QI_SQL_MYSQL: QueryInterface +QI_SQL_AWS_REDSHIFT: QueryInterface +QI_SQL_AWS_ATHENA: QueryInterface + +class VirtualEngine(_message.Message): + __slots__ = ["name", "qi", "tables", "max_staleness_ms"] + NAME_FIELD_NUMBER: _ClassVar[int] + QI_FIELD_NUMBER: _ClassVar[int] + TABLES_FIELD_NUMBER: _ClassVar[int] + MAX_STALENESS_MS_FIELD_NUMBER: _ClassVar[int] + name: str + qi: QueryInterface + tables: _containers.RepeatedCompositeFieldContainer[VirtualTable] + max_staleness_ms: int + def __init__(self, name: _Optional[str] = ..., qi: _Optional[_Union[QueryInterface, str]] = ..., tables: _Optional[_Iterable[_Union[VirtualTable, _Mapping]]] = ..., max_staleness_ms: _Optional[int] = ...) -> None: ... + +class VirtualTable(_message.Message): + __slots__ = ["name", "writable"] + NAME_FIELD_NUMBER: _ClassVar[int] + WRITABLE_FIELD_NUMBER: _ClassVar[int] + name: str + writable: bool + def __init__(self, name: _Optional[str] = ..., writable: bool = ...) -> None: ... diff --git a/src/brad/proto_gen/interface_pb2.py b/src/brad/proto_gen/interface_pb2.py new file mode 100644 index 00000000..dbd1dac2 --- /dev/null +++ b/src/brad/proto_gen/interface_pb2.py @@ -0,0 +1,29 @@ +# -*- coding: utf-8 -*- +# Generated by the protocol buffer compiler. DO NOT EDIT! +# source: interface.proto +"""Generated protocol buffer code.""" +from google.protobuf import descriptor as _descriptor +from google.protobuf import descriptor_pool as _descriptor_pool +from google.protobuf import symbol_database as _symbol_database +from google.protobuf.internal import builder as _builder +# @@protoc_insertion_point(imports) + +_sym_db = _symbol_database.Default() + + +from brad.proto_gen.interface import blueprint_pb2 as interface_dot_blueprint__pb2 +from brad.proto_gen.interface import schema_pb2 as interface_dot_schema__pb2 +from brad.proto_gen.interface import vdbe_pb2 as interface_dot_vdbe__pb2 + + +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x0finterface.proto\x12\x04vdbe\x1a\x19interface/blueprint.proto\x1a\x16interface/schema.proto\x1a\x14interface/vdbe.proto\"\x87\x01\n\x0bSystemState\x12\x13\n\x0bschema_name\x18\x01 \x01(\t\x12\x1b\n\x06tables\x18\x02 \x03(\x0b\x32\x0b.vdbe.Table\x12\"\n\x05vdbes\x18\x03 \x03(\x0b\x32\x13.vdbe.VirtualEngine\x12\"\n\tblueprint\x18\x04 \x01(\x0b\x32\x0f.vdbe.Blueprintb\x06proto3') + +_globals = globals() +_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) +_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'interface_pb2', _globals) +if _descriptor._USE_C_DESCRIPTORS == False: + + DESCRIPTOR._options = None + _globals['_SYSTEMSTATE']._serialized_start=99 + _globals['_SYSTEMSTATE']._serialized_end=234 +# @@protoc_insertion_point(module_scope) diff --git a/src/brad/proto_gen/interface_pb2.pyi b/src/brad/proto_gen/interface_pb2.pyi new file mode 100644 index 00000000..05ca2629 --- /dev/null +++ b/src/brad/proto_gen/interface_pb2.pyi @@ -0,0 +1,21 @@ +from interface import blueprint_pb2 as _blueprint_pb2 +from interface import schema_pb2 as _schema_pb2 +from interface import vdbe_pb2 as _vdbe_pb2 +from google.protobuf.internal import containers as _containers +from google.protobuf import descriptor as _descriptor +from google.protobuf import message as _message +from typing import ClassVar as _ClassVar, Iterable as _Iterable, Mapping as _Mapping, Optional as _Optional, Union as _Union + +DESCRIPTOR: _descriptor.FileDescriptor + +class SystemState(_message.Message): + __slots__ = ["schema_name", "tables", "vdbes", "blueprint"] + SCHEMA_NAME_FIELD_NUMBER: _ClassVar[int] + TABLES_FIELD_NUMBER: _ClassVar[int] + VDBES_FIELD_NUMBER: _ClassVar[int] + BLUEPRINT_FIELD_NUMBER: _ClassVar[int] + schema_name: str + tables: _containers.RepeatedCompositeFieldContainer[_schema_pb2.Table] + vdbes: _containers.RepeatedCompositeFieldContainer[_vdbe_pb2.VirtualEngine] + blueprint: _blueprint_pb2.Blueprint + def __init__(self, schema_name: _Optional[str] = ..., tables: _Optional[_Iterable[_Union[_schema_pb2.Table, _Mapping]]] = ..., vdbes: _Optional[_Iterable[_Union[_vdbe_pb2.VirtualEngine, _Mapping]]] = ..., blueprint: _Optional[_Union[_blueprint_pb2.Blueprint, _Mapping]] = ...) -> None: ... From 3ec6e687ff74a4178f9537d3f37bbf784365b893 Mon Sep 17 00:00:00 2001 From: Geoffrey Yu Date: Fri, 13 Sep 2024 17:50:48 -0400 Subject: [PATCH 4/6] Implement example, start building new bootstrap tool --- config/vdbes/quickflix.yml | 50 +++++++++++++++++++ proto/interface/blueprint.proto | 1 + setup.py | 2 +- src/brad/admin/bootstrap_vdbes.py | 27 ++++++++++ src/brad/blueprint/vdbe.py | 25 ++++++++++ src/brad/exec/admin.py | 2 + src/brad/proto_gen/interface/blueprint_pb2.py | 22 ++++---- .../proto_gen/interface/blueprint_pb2.pyi | 6 ++- 8 files changed, 121 insertions(+), 14 deletions(-) create mode 100644 config/vdbes/quickflix.yml create mode 100644 src/brad/admin/bootstrap_vdbes.py create mode 100644 src/brad/blueprint/vdbe.py diff --git a/config/vdbes/quickflix.yml b/config/vdbes/quickflix.yml new file mode 100644 index 00000000..0665247a --- /dev/null +++ b/config/vdbes/quickflix.yml @@ -0,0 +1,50 @@ +schema_name: quickflix + +vdbes: + - name: order_flow + tables: + - name: tickets + writable: true + - name: customers + writable: true + max_staleness_ms: 0 + query_interface: SQL_POSTGRESQL + + - name: transform + tables: + - name: tickets + - name: customers + - name: sales_history + writable: true + max_staleness_ms: 3600000 # 1 hour + query_interface: SQL_AWS_REDSHIFT + + - name: analysis + tables: + - name: sales_history + max_staleness_ms: 0 + query_interface: SQL_AWS_REDSHIFT + +tables: + - name: tickets + columns: + - name: t_customer_id + type: INT_64 + - name: t_movie_id + type: INT_64 + + - name: customers + columns: + - name: c_customer_id + type: INT_64 + - name: c_name + type: STRING + + - name: sales_history + columns: + - name: sh_customer_id + type: INT_64 + - name: sh_movie_id + type: INT_64 + - name: sh_name + type: STRING diff --git a/proto/interface/blueprint.proto b/proto/interface/blueprint.proto index 3b2af1a7..daa21835 100644 --- a/proto/interface/blueprint.proto +++ b/proto/interface/blueprint.proto @@ -9,6 +9,7 @@ message Blueprint { Provisioning aurora = 101; Provisioning redshift = 102; RoutingPolicy policy = 201; + repeated PhysicalSnapshot snapshots = 301; } message RoutingPolicy { diff --git a/setup.py b/setup.py index c321b6d7..34653397 100644 --- a/setup.py +++ b/setup.py @@ -45,6 +45,7 @@ "ddsketch", "tqdm", "psycopg[binary]", + "pydantic", ] DEV_REQUIRES = [ @@ -68,7 +69,6 @@ UI_REQUIRES = [ "fastapi", "uvicorn[standard]", - "pydantic", "requests", "types-requests", ] diff --git a/src/brad/admin/bootstrap_vdbes.py b/src/brad/admin/bootstrap_vdbes.py new file mode 100644 index 00000000..f709da62 --- /dev/null +++ b/src/brad/admin/bootstrap_vdbes.py @@ -0,0 +1,27 @@ +import logging + +logger = logging.getLogger(__name__) + + +def register_admin_action(subparser) -> None: + parser = subparser.add_parser( + "bootstrap_vdbes", help="Set up a new virtual infrastructure on BRAD." + ) + parser.add_argument( + "--physical-config-file", + type=str, + required=True, + help="Path to BRAD's physical configuration file.", + ) + parser.add_argument( + "--vdbe-file", + type=str, + required=True, + help="Path to the virtual infrastructure definition to boostrap.", + ) + parser.set_defaults(admin_action=bootstrap_vdbes) + + +# This method is called by `brad.exec.admin.main`. +def bootstrap_vdbes(_args) -> None: + logger.info("Running VDBE boostrap.") diff --git a/src/brad/blueprint/vdbe.py b/src/brad/blueprint/vdbe.py new file mode 100644 index 00000000..e2d1a235 --- /dev/null +++ b/src/brad/blueprint/vdbe.py @@ -0,0 +1,25 @@ +import enum + +from pydantic import BaseModel +from typing import Set + +# This class contains Python definitions + + +class QueryInterface(enum.Enum): + SqlPostgresql = "SQL_POSTGRESQL" + SqlMysql = "SQL_MYSQL" + SqlAwsRedshift = "AWS_REDSHIFT" + SqlAwsAthena = "AWS_ATHENA" + + +class VirtualTable(BaseModel): + name: str + writable: bool + + +class VirtualEngine(BaseModel): + name: str + query_interface: QueryInterface + max_staleness_ms: int + tables: Set[VirtualTable] diff --git a/src/brad/exec/admin.py b/src/brad/exec/admin.py index d70e7d7d..dde894b2 100644 --- a/src/brad/exec/admin.py +++ b/src/brad/exec/admin.py @@ -16,6 +16,7 @@ import brad.admin.clean_dataset as clean_dataset import brad.admin.alter_schema as alter_schema import brad.admin.table_adjustments as table_adjustments +import brad.admin.bootstrap_vdbes as bootstrap_vdbes logger = logging.getLogger(__name__) @@ -31,6 +32,7 @@ def register_command(subparsers) -> None: help="Set to enable debug logging.", ) admin_subparsers = parser.add_subparsers(title="Administrative Actions") + bootstrap_vdbes.register_admin_action(admin_subparsers) bootstrap_schema.register_admin_action(admin_subparsers) drop_schema.register_admin_action(admin_subparsers) bulk_load.register_admin_action(admin_subparsers) diff --git a/src/brad/proto_gen/interface/blueprint_pb2.py b/src/brad/proto_gen/interface/blueprint_pb2.py index 1b185070..b1037462 100644 --- a/src/brad/proto_gen/interface/blueprint_pb2.py +++ b/src/brad/proto_gen/interface/blueprint_pb2.py @@ -13,7 +13,7 @@ -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x19interface/blueprint.proto\x12\x04vdbe\"{\n\tBlueprint\x12\"\n\x06\x61urora\x18\x65 \x01(\x0b\x32\x12.vdbe.Provisioning\x12$\n\x08redshift\x18\x66 \x01(\x0b\x32\x12.vdbe.Provisioning\x12$\n\x06policy\x18\xc9\x01 \x01(\x0b\x32\x13.vdbe.RoutingPolicy\"\x1f\n\rRoutingPolicy\x12\x0e\n\x06policy\x18\x01 \x01(\x0c\"Q\n\x10PhysicalSnapshot\x12\r\n\x05vdbes\x18\x01 \x03(\t\x12\x0e\n\x06tables\x18\x02 \x03(\t\x12\x1e\n\x08location\x18\x03 \x01(\x0e\x32\x0c.vdbe.Engine\"8\n\x0cProvisioning\x12\x15\n\rinstance_type\x18\x01 \x01(\t\x12\x11\n\tnum_nodes\x18\x02 \x01(\r*W\n\x06\x45ngine\x12\x12\n\x0e\x45NGINE_UNKNOWN\x10\x00\x12\x11\n\rENGINE_AURORA\x10\x01\x12\x13\n\x0f\x45NGINE_REDSHIFT\x10\x02\x12\x11\n\rENGINE_ATHENA\x10\x03\x62\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x19interface/blueprint.proto\x12\x04vdbe\"\xa7\x01\n\tBlueprint\x12\"\n\x06\x61urora\x18\x65 \x01(\x0b\x32\x12.vdbe.Provisioning\x12$\n\x08redshift\x18\x66 \x01(\x0b\x32\x12.vdbe.Provisioning\x12$\n\x06policy\x18\xc9\x01 \x01(\x0b\x32\x13.vdbe.RoutingPolicy\x12*\n\tsnapshots\x18\xad\x02 \x03(\x0b\x32\x16.vdbe.PhysicalSnapshot\"\x1f\n\rRoutingPolicy\x12\x0e\n\x06policy\x18\x01 \x01(\x0c\"Q\n\x10PhysicalSnapshot\x12\r\n\x05vdbes\x18\x01 \x03(\t\x12\x0e\n\x06tables\x18\x02 \x03(\t\x12\x1e\n\x08location\x18\x03 \x01(\x0e\x32\x0c.vdbe.Engine\"8\n\x0cProvisioning\x12\x15\n\rinstance_type\x18\x01 \x01(\t\x12\x11\n\tnum_nodes\x18\x02 \x01(\r*W\n\x06\x45ngine\x12\x12\n\x0e\x45NGINE_UNKNOWN\x10\x00\x12\x11\n\rENGINE_AURORA\x10\x01\x12\x13\n\x0f\x45NGINE_REDSHIFT\x10\x02\x12\x11\n\rENGINE_ATHENA\x10\x03\x62\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) @@ -21,14 +21,14 @@ if _descriptor._USE_C_DESCRIPTORS == False: DESCRIPTOR._options = None - _globals['_ENGINE']._serialized_start=334 - _globals['_ENGINE']._serialized_end=421 - _globals['_BLUEPRINT']._serialized_start=35 - _globals['_BLUEPRINT']._serialized_end=158 - _globals['_ROUTINGPOLICY']._serialized_start=160 - _globals['_ROUTINGPOLICY']._serialized_end=191 - _globals['_PHYSICALSNAPSHOT']._serialized_start=193 - _globals['_PHYSICALSNAPSHOT']._serialized_end=274 - _globals['_PROVISIONING']._serialized_start=276 - _globals['_PROVISIONING']._serialized_end=332 + _globals['_ENGINE']._serialized_start=379 + _globals['_ENGINE']._serialized_end=466 + _globals['_BLUEPRINT']._serialized_start=36 + _globals['_BLUEPRINT']._serialized_end=203 + _globals['_ROUTINGPOLICY']._serialized_start=205 + _globals['_ROUTINGPOLICY']._serialized_end=236 + _globals['_PHYSICALSNAPSHOT']._serialized_start=238 + _globals['_PHYSICALSNAPSHOT']._serialized_end=319 + _globals['_PROVISIONING']._serialized_start=321 + _globals['_PROVISIONING']._serialized_end=377 # @@protoc_insertion_point(module_scope) diff --git a/src/brad/proto_gen/interface/blueprint_pb2.pyi b/src/brad/proto_gen/interface/blueprint_pb2.pyi index 8f66b2ec..975f3ef3 100644 --- a/src/brad/proto_gen/interface/blueprint_pb2.pyi +++ b/src/brad/proto_gen/interface/blueprint_pb2.pyi @@ -18,14 +18,16 @@ ENGINE_REDSHIFT: Engine ENGINE_ATHENA: Engine class Blueprint(_message.Message): - __slots__ = ["aurora", "redshift", "policy"] + __slots__ = ["aurora", "redshift", "policy", "snapshots"] AURORA_FIELD_NUMBER: _ClassVar[int] REDSHIFT_FIELD_NUMBER: _ClassVar[int] POLICY_FIELD_NUMBER: _ClassVar[int] + SNAPSHOTS_FIELD_NUMBER: _ClassVar[int] aurora: Provisioning redshift: Provisioning policy: RoutingPolicy - def __init__(self, aurora: _Optional[_Union[Provisioning, _Mapping]] = ..., redshift: _Optional[_Union[Provisioning, _Mapping]] = ..., policy: _Optional[_Union[RoutingPolicy, _Mapping]] = ...) -> None: ... + snapshots: _containers.RepeatedCompositeFieldContainer[PhysicalSnapshot] + def __init__(self, aurora: _Optional[_Union[Provisioning, _Mapping]] = ..., redshift: _Optional[_Union[Provisioning, _Mapping]] = ..., policy: _Optional[_Union[RoutingPolicy, _Mapping]] = ..., snapshots: _Optional[_Iterable[_Union[PhysicalSnapshot, _Mapping]]] = ...) -> None: ... class RoutingPolicy(_message.Message): __slots__ = ["policy"] From 2b9ff0a7bffe64bb4c69bc812e2dfd27db30395a Mon Sep 17 00:00:00 2001 From: Geoffrey Yu Date: Thu, 26 Sep 2024 10:44:09 -0400 Subject: [PATCH 5/6] Remove utils --- setup.py | 1 - src/brad/admin/bootstrap_vdbes.py | 27 --------------------------- src/brad/blueprint/vdbe.py | 25 ------------------------- src/brad/exec/admin.py | 2 -- 4 files changed, 55 deletions(-) delete mode 100644 src/brad/admin/bootstrap_vdbes.py delete mode 100644 src/brad/blueprint/vdbe.py diff --git a/setup.py b/setup.py index 34653397..d6a4cd2b 100644 --- a/setup.py +++ b/setup.py @@ -45,7 +45,6 @@ "ddsketch", "tqdm", "psycopg[binary]", - "pydantic", ] DEV_REQUIRES = [ diff --git a/src/brad/admin/bootstrap_vdbes.py b/src/brad/admin/bootstrap_vdbes.py deleted file mode 100644 index f709da62..00000000 --- a/src/brad/admin/bootstrap_vdbes.py +++ /dev/null @@ -1,27 +0,0 @@ -import logging - -logger = logging.getLogger(__name__) - - -def register_admin_action(subparser) -> None: - parser = subparser.add_parser( - "bootstrap_vdbes", help="Set up a new virtual infrastructure on BRAD." - ) - parser.add_argument( - "--physical-config-file", - type=str, - required=True, - help="Path to BRAD's physical configuration file.", - ) - parser.add_argument( - "--vdbe-file", - type=str, - required=True, - help="Path to the virtual infrastructure definition to boostrap.", - ) - parser.set_defaults(admin_action=bootstrap_vdbes) - - -# This method is called by `brad.exec.admin.main`. -def bootstrap_vdbes(_args) -> None: - logger.info("Running VDBE boostrap.") diff --git a/src/brad/blueprint/vdbe.py b/src/brad/blueprint/vdbe.py deleted file mode 100644 index e2d1a235..00000000 --- a/src/brad/blueprint/vdbe.py +++ /dev/null @@ -1,25 +0,0 @@ -import enum - -from pydantic import BaseModel -from typing import Set - -# This class contains Python definitions - - -class QueryInterface(enum.Enum): - SqlPostgresql = "SQL_POSTGRESQL" - SqlMysql = "SQL_MYSQL" - SqlAwsRedshift = "AWS_REDSHIFT" - SqlAwsAthena = "AWS_ATHENA" - - -class VirtualTable(BaseModel): - name: str - writable: bool - - -class VirtualEngine(BaseModel): - name: str - query_interface: QueryInterface - max_staleness_ms: int - tables: Set[VirtualTable] diff --git a/src/brad/exec/admin.py b/src/brad/exec/admin.py index dde894b2..d70e7d7d 100644 --- a/src/brad/exec/admin.py +++ b/src/brad/exec/admin.py @@ -16,7 +16,6 @@ import brad.admin.clean_dataset as clean_dataset import brad.admin.alter_schema as alter_schema import brad.admin.table_adjustments as table_adjustments -import brad.admin.bootstrap_vdbes as bootstrap_vdbes logger = logging.getLogger(__name__) @@ -32,7 +31,6 @@ def register_command(subparsers) -> None: help="Set to enable debug logging.", ) admin_subparsers = parser.add_subparsers(title="Administrative Actions") - bootstrap_vdbes.register_admin_action(admin_subparsers) bootstrap_schema.register_admin_action(admin_subparsers) drop_schema.register_admin_action(admin_subparsers) bulk_load.register_admin_action(admin_subparsers) From 93d8d698248bfc48d54084bba54522975de1bf81 Mon Sep 17 00:00:00 2001 From: Geoffrey Yu Date: Thu, 26 Sep 2024 10:46:16 -0400 Subject: [PATCH 6/6] Undo removal --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index d6a4cd2b..c321b6d7 100644 --- a/setup.py +++ b/setup.py @@ -68,6 +68,7 @@ UI_REQUIRES = [ "fastapi", "uvicorn[standard]", + "pydantic", "requests", "types-requests", ]