Test for passing empty values as hints

anuunchin · anuunchin · commit 85b36e4f21d9 · 2025-11-20T11:09:50.000+01:00
diff --git a/dlt/common/schema/utils.py b/dlt/common/schema/utils.py
@@ -528,9 +528,9 @@ def diff_table(
         else:
             new_columns.append(col_b)
 
-    if respect_merge_type:
-        for col_a in tab_a_columns.values():
-            remove_column_props_with_merge_type(col_a, "remove_if_empty")
+    #    if respect_merge_type:
+    #        for col_a in tab_a_columns.values():
+    #            remove_column_props_with_merge_type(col_a, "remove_if_empty")
 
     # return partial table containing only name and properties that differ (column, filters etc.)
     table_name = tab_a["name"]
diff --git a/tests/normalize/test_model_item_normalizer.py b/tests/normalize/test_model_item_normalizer.py
@@ -294,7 +294,7 @@ def test_selected_column_names_normalized(
     parsed_norm_select_query = sqlglot.parse_one(normalized_select_query, read=dialect)
 
     # Ensure the normalized model query contains a subquery in the FROM clause
-    from_clause = parsed_norm_select_query.args.get("from")
+    from_clause = parsed_norm_select_query.find(sqlglot.exp.From)
     assert isinstance(from_clause, sqlglot.exp.From)
     assert isinstance(from_clause.this, sqlglot.exp.Subquery)
     assert isinstance(from_clause.this.this, sqlglot.exp.Select)
diff --git a/tests/pipeline/test_pipeline.py b/tests/pipeline/test_pipeline.py
@@ -10,7 +10,7 @@
 import shutil
 import threading
 from time import sleep
-from typing import Any, List, Tuple, cast
+from typing import Any, List, Tuple, cast, Union
 from tenacity import retry_if_exception, Retrying, stop_after_attempt
 from unittest.mock import patch
 import pytest
@@ -1837,6 +1837,50 @@ def infer():
     # print(pipeline.default_schema.to_pretty_yaml())
 
 
+@pytest.mark.parametrize(
+    "empty_value",
+    ["", []],
+    ids=["empty_string", "empty_list"],
+)
+def test_apply_hints_with_empty_values(empty_value: Union[str, List[Any]]) -> None:
+    @dlt.resource
+    def some_data():
+        yield {"id": 1, "val": "some_data"}
+
+    s = some_data()
+    pipeline = dlt.pipeline(pipeline_name="empty_value_hints", destination=DUMMY_COMPLETE)
+
+    # check initial schema
+    pipeline.run(s)
+    table = pipeline.default_schema.get_table("some_data")
+    assert table["columns"]["id"] == {
+        "name": "id",
+        "data_type": "bigint",
+        "nullable": True,
+    }
+
+    # check schema after setting primary key
+    s.apply_hints(primary_key=["id"])
+    pipeline.run(s)
+    table = pipeline.default_schema.get_table("some_data")
+    assert table["columns"]["id"] == {
+        "name": "id",
+        "data_type": "bigint",
+        "nullable": False,
+        "primary_key": True,
+    }
+
+    # check schema after passin an empty value as hints, which should remove primary
+    s.apply_hints(primary_key="")
+    pipeline.run(s)
+    table = pipeline.default_schema.get_table("some_data")
+    assert table["columns"]["id"] == {
+        "name": "id",
+        "data_type": "bigint",
+        "nullable": False,
+    }
+
+
 def test_invalid_data_edge_cases() -> None:
     # pass lambda directly to run, allowed now because functions can be extracted too
     pipeline = dlt.pipeline(pipeline_name="invalid", destination=DUMMY_COMPLETE)