-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Extract schema information from the underlying connections, add Arrow…
… schema conversion utility (#497) This PR modifies BRAD's connection code to extract schema information (the column names and types) from the underlying database connections. Each database has slightly different types, so we unify them in a "best effort" way for now. Later on we will have better support for specific SQL dialects. **High level summary of the changes** - Add `result_schema()` to BRAD cursors - Define BRAD-specific `DataType`, `Field` and `Schema` classes - Add a C++ helper function that converts a Python BRAD `Schema` into an Arrow schema - Various minor modifications to pass the Python schema to the Flight SQL code
- Loading branch information
Showing
14 changed files
with
420 additions
and
28 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
#include "python_utils.h" | ||
|
||
#include <vector> | ||
#include <arrow/type.h> | ||
#include <iostream> | ||
|
||
namespace py = pybind11; | ||
|
||
namespace { | ||
|
||
std::shared_ptr<arrow::DataType> ArrowDataTypeFromBradDataType(const pybind11::object& data_type) { | ||
// NOTE: If you change values here, make sure to change | ||
// `brad.connection.schema.DataType` as well. | ||
const int64_t value = py::cast<int64_t>(data_type.attr("value")); | ||
switch (value) { | ||
// DataType.Integer | ||
case 1: | ||
return arrow::int64(); | ||
|
||
// DataType.Float | ||
case 2: | ||
return arrow::float32(); | ||
|
||
// DataType.Decimal | ||
case 3: | ||
// Ideally these values should be stored with the data type and not be | ||
// hardcoded here. | ||
return arrow::decimal(/*precision=*/10, /*scale=*/2); | ||
|
||
// DataType.String | ||
case 4: | ||
return arrow::utf8(); | ||
|
||
// DataType.Timestamp | ||
case 5: | ||
return arrow::date64(); | ||
|
||
default: | ||
case 0: | ||
return arrow::null(); | ||
} | ||
} | ||
|
||
} // namespace | ||
|
||
namespace brad { | ||
|
||
std::shared_ptr<arrow::Schema> ArrowSchemaFromBradSchema(const pybind11::object& schema) { | ||
const size_t num_fields = py::cast<size_t>(schema.attr("num_fields")); | ||
std::vector<std::shared_ptr<arrow::Field>> fields; | ||
fields.reserve(num_fields); | ||
|
||
for (const auto& brad_field : schema) { | ||
std::string field_name = py::cast<std::string>(brad_field.attr("name")); | ||
std::shared_ptr<arrow::DataType> data_type = ArrowDataTypeFromBradDataType(brad_field.attr("data_type")); | ||
fields.push_back(arrow::field(std::move(field_name), std::move(data_type))); | ||
} | ||
|
||
return arrow::schema(std::move(fields)); | ||
} | ||
|
||
} // namespace brad |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
#pragma once | ||
|
||
#include <memory> | ||
#include <arrow/type.h> | ||
#include <pybind11/pybind11.h> | ||
|
||
namespace brad { | ||
|
||
// Converts a `brad.connection.schema.Schema` Python object into an | ||
// `arrow::Schema`. The passed in `schema` must be an instance of | ||
// `brad.connection.schema.Schema`. | ||
// | ||
// NOTE: The GIL must be held while running this function. | ||
std::shared_ptr<arrow::Schema> ArrowSchemaFromBradSchema(const pybind11::object& schema); | ||
|
||
} // namespace brad |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.