Skip to content

Commit

Permalink
fix
Browse files Browse the repository at this point in the history
  • Loading branch information
yuzelin committed Sep 19, 2024
1 parent df957ae commit 3e3c354
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 4 deletions.
15 changes: 14 additions & 1 deletion java_based_implementation/tests/test_data_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
import unittest

from java_based_implementation.api_impl import Catalog
from java_based_implementation.util import setup_utils
from java_based_implementation.util import setup_utils, java_utils
from paimon_python_api.table import Schema


Expand Down Expand Up @@ -80,6 +80,19 @@ def test_bool(self):
expected_types = ['BOOLEAN']
self._test_impl(pa_schema, expected_types)

def test_null(self):
pa_schema = pa.schema([('_null', pa.null())])
expected_types = ['STRING']
self._test_impl(pa_schema, expected_types)

def test_unsupported_type(self):
pa_schema = pa.schema([('_array', pa.list_(pa.int32()))])
schema = Schema(pa_schema)
with self.assertRaises(ValueError) as e:
java_utils.to_paimon_schema(schema)
self.assertEqual(
str(e.exception), 'Found unsupported data type list<item: int32> for field _array.')

def _test_impl(self, pa_schema, expected_types):
scheme = Schema(pa_schema)
letters = string.ascii_letters
Expand Down
12 changes: 9 additions & 3 deletions java_based_implementation/util/java_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def to_paimon_schema(schema: Schema):

for field in schema.pa_schema:
column_name = field.name
column_type = _to_j_type(field.type)
column_type = _to_j_type(column_name, field.type)
j_schema_builder.column(column_name, column_type)
return j_schema_builder.build()

Expand All @@ -61,7 +61,7 @@ def check_batch_write(j_table):
raise TypeError("Doesn't support writing dynamic bucket or cross partition table.")


def _to_j_type(pa_type):
def _to_j_type(name, pa_type):
jvm = get_gateway().jvm
# int
if pa.types.is_int8(pa_type):
Expand All @@ -83,5 +83,11 @@ def _to_j_type(pa_type):
# bool
elif pa.types.is_boolean(pa_type):
return jvm.DataTypes.BOOLEAN()
elif pa.types.is_null(pa_type):
print(f"WARN: The type of column '{name}' is null, "
"and it will be converted to string type by default. "
"Please check if the original type is string. "
f"If not, please manually specify the type of '{name}'.")
return jvm.DataTypes.STRING()
else:
raise ValueError('Unsupported pyarrow DataType:' + pa_type)
raise ValueError(f'Found unsupported data type {str(pa_type)} for field {name}.')

0 comments on commit 3e3c354

Please sign in to comment.