Skip to content

Commit

Permalink
Add linter fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
ndrluis committed Mar 7, 2024
1 parent 7b5c156 commit 47b2320
Show file tree
Hide file tree
Showing 4 changed files with 104 additions and 124 deletions.
26 changes: 12 additions & 14 deletions tests/catalog/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,20 +92,18 @@ def create_table(
self.__namespaces[namespace] = {}

new_location = location or f's3://warehouse/{"/".join(identifier)}/data'
metadata = TableMetadataV1(
**{
"format-version": 1,
"table-uuid": "d20125c8-7284-442c-9aea-15fee620737c",
"location": new_location,
"last-updated-ms": 1602638573874,
"last-column-id": schema.highest_field_id,
"schema": schema.model_dump(),
"partition-spec": partition_spec.model_dump()["fields"],
"properties": properties,
"current-snapshot-id": -1,
"snapshots": [{"snapshot-id": 1925, "timestamp-ms": 1602638573822}],
}
)
metadata = TableMetadataV1(**{
"format-version": 1,
"table-uuid": "d20125c8-7284-442c-9aea-15fee620737c",
"location": new_location,
"last-updated-ms": 1602638573874,
"last-column-id": schema.highest_field_id,
"schema": schema.model_dump(),
"partition-spec": partition_spec.model_dump()["fields"],
"properties": properties,
"current-snapshot-id": -1,
"snapshots": [{"snapshot-id": 1925, "timestamp-ms": 1602638573822}],
})
table = Table(
identifier=identifier,
metadata=metadata,
Expand Down
10 changes: 4 additions & 6 deletions tests/catalog/test_sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -903,12 +903,10 @@ def test_write_and_evolve(catalog: SqlCatalog, format_version: int) -> None:
'foo': ['a', None, 'z'],
'bar': [19, None, 25],
},
schema=pa.schema(
[
pa.field("foo", pa.string(), nullable=True),
pa.field("bar", pa.int32(), nullable=True),
]
),
schema=pa.schema([
pa.field("foo", pa.string(), nullable=True),
pa.field("bar", pa.int32(), nullable=True),
]),
)

with tbl.transaction() as txn:
Expand Down
48 changes: 22 additions & 26 deletions tests/integration/test_writes.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,26 +97,24 @@

@pytest.fixture(scope="session")
def pa_schema() -> pa.Schema:
return pa.schema(
[
("bool", pa.bool_()),
("string", pa.string()),
("string_long", pa.string()),
("int", pa.int32()),
("long", pa.int64()),
("float", pa.float32()),
("double", pa.float64()),
("timestamp", pa.timestamp(unit="us")),
("timestamptz", pa.timestamp(unit="us", tz="UTC")),
("date", pa.date32()),
# Not supported by Spark
# ("time", pa.time64("us")),
# Not natively supported by Arrow
# ("uuid", pa.fixed(16)),
("binary", pa.large_binary()),
("fixed", pa.binary(16)),
]
)
return pa.schema([
("bool", pa.bool_()),
("string", pa.string()),
("string_long", pa.string()),
("int", pa.int32()),
("long", pa.int64()),
("float", pa.float32()),
("double", pa.float64()),
("timestamp", pa.timestamp(unit="us")),
("timestamptz", pa.timestamp(unit="us", tz="UTC")),
("date", pa.date32()),
# Not supported by Spark
# ("time", pa.time64("us")),
# Not natively supported by Arrow
# ("uuid", pa.fixed(16)),
("binary", pa.large_binary()),
("fixed", pa.binary(16)),
])


@pytest.fixture(scope="session")
Expand Down Expand Up @@ -619,12 +617,10 @@ def test_write_and_evolve(session_catalog: Catalog, format_version: int) -> None
'foo': ['a', None, 'z'],
'bar': [19, None, 25],
},
schema=pa.schema(
[
pa.field("foo", pa.string(), nullable=True),
pa.field("bar", pa.int32(), nullable=True),
]
),
schema=pa.schema([
pa.field("foo", pa.string(), nullable=True),
pa.field("bar", pa.int32(), nullable=True),
]),
)

with tbl.transaction() as txn:
Expand Down
144 changes: 66 additions & 78 deletions tests/table/test_init.py
Original file line number Diff line number Diff line change
Expand Up @@ -930,56 +930,52 @@ def test_assert_default_sort_order_id(table_v2: Table) -> None:


def test_correct_schema() -> None:
table_metadata = TableMetadataV2(
**{
"format-version": 2,
"table-uuid": "9c12d441-03fe-4693-9a96-a0705ddf69c1",
"location": "s3://bucket/test/location",
"last-sequence-number": 34,
"last-updated-ms": 1602638573590,
"last-column-id": 3,
"current-schema-id": 1,
"schemas": [
{"type": "struct", "schema-id": 0, "fields": [{"id": 1, "name": "x", "required": True, "type": "long"}]},
{
"type": "struct",
"schema-id": 1,
"identifier-field-ids": [1, 2],
"fields": [
{"id": 1, "name": "x", "required": True, "type": "long"},
{"id": 2, "name": "y", "required": True, "type": "long"},
{"id": 3, "name": "z", "required": True, "type": "long"},
],
},
],
"default-spec-id": 0,
"partition-specs": [
{"spec-id": 0, "fields": [{"name": "x", "transform": "identity", "source-id": 1, "field-id": 1000}]}
],
"last-partition-id": 1000,
"default-sort-order-id": 0,
"sort-orders": [],
"current-snapshot-id": 123,
"snapshots": [
{
"snapshot-id": 234,
"timestamp-ms": 1515100955770,
"sequence-number": 0,
"summary": {"operation": "append"},
"manifest-list": "s3://a/b/1.avro",
"schema-id": 10,
},
{
"snapshot-id": 123,
"timestamp-ms": 1515100955770,
"sequence-number": 0,
"summary": {"operation": "append"},
"manifest-list": "s3://a/b/1.avro",
"schema-id": 0,
},
],
}
)
table_metadata = TableMetadataV2(**{
"format-version": 2,
"table-uuid": "9c12d441-03fe-4693-9a96-a0705ddf69c1",
"location": "s3://bucket/test/location",
"last-sequence-number": 34,
"last-updated-ms": 1602638573590,
"last-column-id": 3,
"current-schema-id": 1,
"schemas": [
{"type": "struct", "schema-id": 0, "fields": [{"id": 1, "name": "x", "required": True, "type": "long"}]},
{
"type": "struct",
"schema-id": 1,
"identifier-field-ids": [1, 2],
"fields": [
{"id": 1, "name": "x", "required": True, "type": "long"},
{"id": 2, "name": "y", "required": True, "type": "long"},
{"id": 3, "name": "z", "required": True, "type": "long"},
],
},
],
"default-spec-id": 0,
"partition-specs": [{"spec-id": 0, "fields": [{"name": "x", "transform": "identity", "source-id": 1, "field-id": 1000}]}],
"last-partition-id": 1000,
"default-sort-order-id": 0,
"sort-orders": [],
"current-snapshot-id": 123,
"snapshots": [
{
"snapshot-id": 234,
"timestamp-ms": 1515100955770,
"sequence-number": 0,
"summary": {"operation": "append"},
"manifest-list": "s3://a/b/1.avro",
"schema-id": 10,
},
{
"snapshot-id": 123,
"timestamp-ms": 1515100955770,
"sequence-number": 0,
"summary": {"operation": "append"},
"manifest-list": "s3://a/b/1.avro",
"schema-id": 0,
},
],
})

t = Table(
identifier=("default", "t1"),
Expand Down Expand Up @@ -1018,13 +1014,11 @@ def test_correct_schema() -> None:


def test_schema_mismatch_type(table_schema_simple: Schema) -> None:
other_schema = pa.schema(
(
pa.field("foo", pa.string(), nullable=True),
pa.field("bar", pa.decimal128(18, 6), nullable=False),
pa.field("baz", pa.bool_(), nullable=True),
)
)
other_schema = pa.schema((
pa.field("foo", pa.string(), nullable=True),
pa.field("bar", pa.decimal128(18, 6), nullable=False),
pa.field("baz", pa.bool_(), nullable=True),
))

expected = r"""Mismatch in fields:
┏━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
Expand All @@ -1041,13 +1035,11 @@ def test_schema_mismatch_type(table_schema_simple: Schema) -> None:


def test_schema_mismatch_nullability(table_schema_simple: Schema) -> None:
other_schema = pa.schema(
(
pa.field("foo", pa.string(), nullable=True),
pa.field("bar", pa.int32(), nullable=True),
pa.field("baz", pa.bool_(), nullable=True),
)
)
other_schema = pa.schema((
pa.field("foo", pa.string(), nullable=True),
pa.field("bar", pa.int32(), nullable=True),
pa.field("baz", pa.bool_(), nullable=True),
))

expected = """Mismatch in fields:
┏━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━┓
Expand All @@ -1064,12 +1056,10 @@ def test_schema_mismatch_nullability(table_schema_simple: Schema) -> None:


def test_schema_mismatch_missing_field(table_schema_simple: Schema) -> None:
other_schema = pa.schema(
(
pa.field("foo", pa.string(), nullable=True),
pa.field("baz", pa.bool_(), nullable=True),
)
)
other_schema = pa.schema((
pa.field("foo", pa.string(), nullable=True),
pa.field("baz", pa.bool_(), nullable=True),
))

expected = """Mismatch in fields:
┏━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━┓
Expand All @@ -1086,14 +1076,12 @@ def test_schema_mismatch_missing_field(table_schema_simple: Schema) -> None:


def test_schema_mismatch_additional_field(table_schema_simple: Schema) -> None:
other_schema = pa.schema(
(
pa.field("foo", pa.string(), nullable=True),
pa.field("bar", pa.int32(), nullable=True),
pa.field("baz", pa.bool_(), nullable=True),
pa.field("new_field", pa.date32(), nullable=True),
)
)
other_schema = pa.schema((
pa.field("foo", pa.string(), nullable=True),
pa.field("bar", pa.int32(), nullable=True),
pa.field("baz", pa.bool_(), nullable=True),
pa.field("new_field", pa.date32(), nullable=True),
))

expected = r"PyArrow table contains more columns: new_field. Update the schema first \(hint, use union_by_name\)."

Expand Down

0 comments on commit 47b2320

Please sign in to comment.