Skip to content

Commit 85b36e4

Browse files
committed
Test for passing empty values as hints
1 parent 9beabb7 commit 85b36e4

File tree

3 files changed

+49
-5
lines changed

3 files changed

+49
-5
lines changed

dlt/common/schema/utils.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -528,9 +528,9 @@ def diff_table(
528528
else:
529529
new_columns.append(col_b)
530530

531-
if respect_merge_type:
532-
for col_a in tab_a_columns.values():
533-
remove_column_props_with_merge_type(col_a, "remove_if_empty")
531+
# if respect_merge_type:
532+
# for col_a in tab_a_columns.values():
533+
# remove_column_props_with_merge_type(col_a, "remove_if_empty")
534534

535535
# return partial table containing only name and properties that differ (column, filters etc.)
536536
table_name = tab_a["name"]

tests/normalize/test_model_item_normalizer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -294,7 +294,7 @@ def test_selected_column_names_normalized(
294294
parsed_norm_select_query = sqlglot.parse_one(normalized_select_query, read=dialect)
295295

296296
# Ensure the normalized model query contains a subquery in the FROM clause
297-
from_clause = parsed_norm_select_query.args.get("from")
297+
from_clause = parsed_norm_select_query.find(sqlglot.exp.From)
298298
assert isinstance(from_clause, sqlglot.exp.From)
299299
assert isinstance(from_clause.this, sqlglot.exp.Subquery)
300300
assert isinstance(from_clause.this.this, sqlglot.exp.Select)

tests/pipeline/test_pipeline.py

Lines changed: 45 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
import shutil
1111
import threading
1212
from time import sleep
13-
from typing import Any, List, Tuple, cast
13+
from typing import Any, List, Tuple, cast, Union
1414
from tenacity import retry_if_exception, Retrying, stop_after_attempt
1515
from unittest.mock import patch
1616
import pytest
@@ -1837,6 +1837,50 @@ def infer():
18371837
# print(pipeline.default_schema.to_pretty_yaml())
18381838

18391839

1840+
@pytest.mark.parametrize(
1841+
"empty_value",
1842+
["", []],
1843+
ids=["empty_string", "empty_list"],
1844+
)
1845+
def test_apply_hints_with_empty_values(empty_value: Union[str, List[Any]]) -> None:
1846+
@dlt.resource
1847+
def some_data():
1848+
yield {"id": 1, "val": "some_data"}
1849+
1850+
s = some_data()
1851+
pipeline = dlt.pipeline(pipeline_name="empty_value_hints", destination=DUMMY_COMPLETE)
1852+
1853+
# check initial schema
1854+
pipeline.run(s)
1855+
table = pipeline.default_schema.get_table("some_data")
1856+
assert table["columns"]["id"] == {
1857+
"name": "id",
1858+
"data_type": "bigint",
1859+
"nullable": True,
1860+
}
1861+
1862+
# check schema after setting primary key
1863+
s.apply_hints(primary_key=["id"])
1864+
pipeline.run(s)
1865+
table = pipeline.default_schema.get_table("some_data")
1866+
assert table["columns"]["id"] == {
1867+
"name": "id",
1868+
"data_type": "bigint",
1869+
"nullable": False,
1870+
"primary_key": True,
1871+
}
1872+
1873+
# check schema after passin an empty value as hints, which should remove primary
1874+
s.apply_hints(primary_key="")
1875+
pipeline.run(s)
1876+
table = pipeline.default_schema.get_table("some_data")
1877+
assert table["columns"]["id"] == {
1878+
"name": "id",
1879+
"data_type": "bigint",
1880+
"nullable": False,
1881+
}
1882+
1883+
18401884
def test_invalid_data_edge_cases() -> None:
18411885
# pass lambda directly to run, allowed now because functions can be extracted too
18421886
pipeline = dlt.pipeline(pipeline_name="invalid", destination=DUMMY_COMPLETE)

0 commit comments

Comments
 (0)