Skip to content

Commit

Permalink
explictly check for schema_id
Browse files Browse the repository at this point in the history
  • Loading branch information
kevinjqliu committed Mar 2, 2024
1 parent 36b56eb commit f9711b3
Show file tree
Hide file tree
Showing 6 changed files with 25 additions and 22 deletions.
8 changes: 4 additions & 4 deletions tests/catalog/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -614,9 +614,9 @@ def test_add_column(catalog: InMemoryCatalog) -> None:
NestedField(field_id=2, name="y", field_type=LongType(), required=True, doc="comment"),
NestedField(field_id=3, name="z", field_type=LongType(), required=True),
NestedField(field_id=4, name="new_column1", field_type=IntegerType(), required=False),
schema_id=0,
identifier_field_ids=[],
)
assert given_table.schema().schema_id == 1

transaction = given_table.transaction()
transaction.update_schema().add_column(path="new_column2", field_type=IntegerType(), doc="doc").commit()
Expand All @@ -628,9 +628,9 @@ def test_add_column(catalog: InMemoryCatalog) -> None:
NestedField(field_id=3, name="z", field_type=LongType(), required=True),
NestedField(field_id=4, name="new_column1", field_type=IntegerType(), required=False),
NestedField(field_id=5, name="new_column2", field_type=IntegerType(), required=False, doc="doc"),
schema_id=0,
identifier_field_ids=[],
)
assert given_table.schema().schema_id == 2


def test_add_column_with_statement(catalog: InMemoryCatalog) -> None:
Expand All @@ -644,9 +644,9 @@ def test_add_column_with_statement(catalog: InMemoryCatalog) -> None:
NestedField(field_id=2, name="y", field_type=LongType(), required=True, doc="comment"),
NestedField(field_id=3, name="z", field_type=LongType(), required=True),
NestedField(field_id=4, name="new_column1", field_type=IntegerType(), required=False),
schema_id=0,
identifier_field_ids=[],
)
assert given_table.schema().schema_id == 1

with given_table.transaction() as tx:
tx.update_schema().add_column(path="new_column2", field_type=IntegerType(), doc="doc").commit()
Expand All @@ -657,9 +657,9 @@ def test_add_column_with_statement(catalog: InMemoryCatalog) -> None:
NestedField(field_id=3, name="z", field_type=LongType(), required=True),
NestedField(field_id=4, name="new_column1", field_type=IntegerType(), required=False),
NestedField(field_id=5, name="new_column2", field_type=IntegerType(), required=False, doc="doc"),
schema_id=0,
identifier_field_ids=[],
)
assert given_table.schema().schema_id == 2


def test_catalog_repr(catalog: InMemoryCatalog) -> None:
Expand Down
2 changes: 1 addition & 1 deletion tests/integration/test_partition_evolution.py
Original file line number Diff line number Diff line change
Expand Up @@ -419,9 +419,9 @@ def test_change_specs_and_schema_transaction(catalog: Catalog) -> None:
NestedField(field_id=2, name='event_ts', field_type=TimestampType(), required=False),
NestedField(field_id=3, name='str', field_type=StringType(), required=False),
NestedField(field_id=4, name='col_string', field_type=StringType(), required=False),
schema_id=1,
identifier_field_ids=[],
)
assert table.schema().schema_id == 1


@pytest.mark.integration
Expand Down
1 change: 0 additions & 1 deletion tests/integration/test_reads.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,6 @@ def create_table(catalog: Catalog) -> Table:
NestedField(field_id=2, name="int", field_type=IntegerType(), required=True),
NestedField(field_id=3, name="bool", field_type=BooleanType(), required=False),
NestedField(field_id=4, name="datetime", field_type=TimestampType(), required=False),
schema_id=1,
)

return catalog.create_table(identifier=TABLE_NAME, schema=schema)
Expand Down
4 changes: 2 additions & 2 deletions tests/integration/test_rest_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -361,19 +361,19 @@ def test_revert_changes(simple_table: Table, table_schema_simple: Schema) -> Non
NestedField(field_id=1, name='foo', field_type=StringType(), required=False),
NestedField(field_id=2, name='bar', field_type=IntegerType(), required=True),
NestedField(field_id=3, name='baz', field_type=BooleanType(), required=False),
schema_id=0,
identifier_field_ids=[2],
),
1: Schema(
NestedField(field_id=1, name='foo', field_type=StringType(), required=False),
NestedField(field_id=2, name='bar', field_type=IntegerType(), required=True),
NestedField(field_id=3, name='baz', field_type=BooleanType(), required=False),
NestedField(field_id=4, name='data', field_type=IntegerType(), required=False),
schema_id=1,
identifier_field_ids=[2],
),
}
assert simple_table.schema().schema_id == 0
assert simple_table.schemas()[0].schema_id == 0
assert simple_table.schemas()[1].schema_id == 1


@pytest.mark.integration
Expand Down
31 changes: 18 additions & 13 deletions tests/table/test_init.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,26 +107,26 @@ def test_schema(table_v2: Table) -> None:
NestedField(field_id=1, name="x", field_type=LongType(), required=True),
NestedField(field_id=2, name="y", field_type=LongType(), required=True, doc="comment"),
NestedField(field_id=3, name="z", field_type=LongType(), required=True),
schema_id=1,
identifier_field_ids=[1, 2],
)
assert table_v2.schema().schema_id == 1


def test_schemas(table_v2: Table) -> None:
assert table_v2.schemas() == {
0: Schema(
NestedField(field_id=1, name="x", field_type=LongType(), required=True),
schema_id=0,
identifier_field_ids=[],
),
1: Schema(
NestedField(field_id=1, name="x", field_type=LongType(), required=True),
NestedField(field_id=2, name="y", field_type=LongType(), required=True, doc="comment"),
NestedField(field_id=3, name="z", field_type=LongType(), required=True),
schema_id=1,
identifier_field_ids=[1, 2],
),
}
assert table_v2.schemas()[0].schema_id == 0
assert table_v2.schemas()[1].schema_id == 1


def test_spec(table_v2: Table) -> None:
Expand Down Expand Up @@ -266,31 +266,34 @@ def test_table_scan_ref_does_not_exists(table_v2: Table) -> None:

def test_table_scan_projection_full_schema(table_v2: Table) -> None:
scan = table_v2.scan()
assert scan.select("x", "y", "z").projection() == Schema(
projection_schema = scan.select("x", "y", "z").projection()
assert projection_schema == Schema(
NestedField(field_id=1, name="x", field_type=LongType(), required=True),
NestedField(field_id=2, name="y", field_type=LongType(), required=True, doc="comment"),
NestedField(field_id=3, name="z", field_type=LongType(), required=True),
schema_id=1,
identifier_field_ids=[1, 2],
)
assert projection_schema.schema_id == 1


def test_table_scan_projection_single_column(table_v2: Table) -> None:
scan = table_v2.scan()
assert scan.select("y").projection() == Schema(
projection_schema = scan.select("y").projection()
assert projection_schema == Schema(
NestedField(field_id=2, name="y", field_type=LongType(), required=True, doc="comment"),
schema_id=1,
identifier_field_ids=[2],
)
assert projection_schema.schema_id == 1


def test_table_scan_projection_single_column_case_sensitive(table_v2: Table) -> None:
scan = table_v2.scan()
assert scan.with_case_sensitive(False).select("Y").projection() == Schema(
projection_schema = scan.with_case_sensitive(False).select("Y").projection()
assert projection_schema == Schema(
NestedField(field_id=2, name="y", field_type=LongType(), required=True, doc="comment"),
schema_id=1,
identifier_field_ids=[2],
)
assert projection_schema.schema_id == 1


def test_table_scan_projection_unknown_column(table_v2: Table) -> None:
Expand Down Expand Up @@ -983,20 +986,22 @@ def test_correct_schema() -> None:
)

# Should use the current schema, instead the one from the snapshot
assert t.scan().projection() == Schema(
projection_schema = t.scan().projection()
assert projection_schema == Schema(
NestedField(field_id=1, name='x', field_type=LongType(), required=True),
NestedField(field_id=2, name='y', field_type=LongType(), required=True),
NestedField(field_id=3, name='z', field_type=LongType(), required=True),
schema_id=1,
identifier_field_ids=[1, 2],
)
assert projection_schema.schema_id == 1

# When we explicitly filter on the commit, we want to have the schema that's linked to the snapshot
assert t.scan(snapshot_id=123).projection() == Schema(
projection_schema = t.scan(snapshot_id=123).projection()
assert projection_schema == Schema(
NestedField(field_id=1, name='x', field_type=LongType(), required=True),
schema_id=0,
identifier_field_ids=[],
)
assert projection_schema.schema_id == 0

with pytest.warns(UserWarning, match="Metadata does not contain schema with id: 10"):
t.scan(snapshot_id=234).projection()
Expand Down
1 change: 0 additions & 1 deletion tests/table/test_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,6 @@ def test_v1_metadata_parsing_directly(example_table_metadata_v1: Dict[str, Any])
NestedField(field_id=1, name="x", field_type=LongType(), required=True),
NestedField(field_id=2, name="y", field_type=LongType(), required=True, doc="comment"),
NestedField(field_id=3, name="z", field_type=LongType(), required=True),
schema_id=0,
identifier_field_ids=[],
)
]
Expand Down

0 comments on commit f9711b3

Please sign in to comment.