diff --git a/java_based_implementation/tests/test_data_types.py b/java_based_implementation/tests/test_data_types.py index 8f09fae..4919a2d 100644 --- a/java_based_implementation/tests/test_data_types.py +++ b/java_based_implementation/tests/test_data_types.py @@ -25,7 +25,7 @@ import unittest from java_based_implementation.api_impl import Catalog -from java_based_implementation.util import setup_utils +from java_based_implementation.util import setup_utils, java_utils from paimon_python_api.table import Schema @@ -80,6 +80,19 @@ def test_bool(self): expected_types = ['BOOLEAN'] self._test_impl(pa_schema, expected_types) + def test_null(self): + pa_schema = pa.schema([('_null', pa.null())]) + expected_types = ['STRING'] + self._test_impl(pa_schema, expected_types) + + def test_unsupported_type(self): + pa_schema = pa.schema([('_array', pa.list_(pa.int32()))]) + schema = Schema(pa_schema) + with self.assertRaises(ValueError) as e: + java_utils.to_paimon_schema(schema) + self.assertEqual( + str(e.exception), 'Found unsupported data type list for field _array.') + def _test_impl(self, pa_schema, expected_types): scheme = Schema(pa_schema) letters = string.ascii_letters diff --git a/java_based_implementation/util/java_utils.py b/java_based_implementation/util/java_utils.py index adad49d..052a537 100644 --- a/java_based_implementation/util/java_utils.py +++ b/java_based_implementation/util/java_utils.py @@ -48,7 +48,7 @@ def to_paimon_schema(schema: Schema): for field in schema.pa_schema: column_name = field.name - column_type = _to_j_type(field.type) + column_type = _to_j_type(column_name, field.type) j_schema_builder.column(column_name, column_type) return j_schema_builder.build() @@ -61,7 +61,7 @@ def check_batch_write(j_table): raise TypeError("Doesn't support writing dynamic bucket or cross partition table.") -def _to_j_type(pa_type): +def _to_j_type(name, pa_type): jvm = get_gateway().jvm # int if pa.types.is_int8(pa_type): @@ -83,5 +83,11 @@ def _to_j_type(pa_type): # bool elif pa.types.is_boolean(pa_type): return jvm.DataTypes.BOOLEAN() + elif pa.types.is_null(pa_type): + print(f"WARN: The type of column '{name}' is null, " + "and it will be converted to string type by default. " + "Please check if the original type is string. " + f"If not, please manually specify the type of '{name}'.") + return jvm.DataTypes.STRING() else: - raise ValueError('Unsupported pyarrow DataType:' + pa_type) + raise ValueError(f'Found unsupported data type {str(pa_type)} for field {name}.')