11import dlt
2+ from dlt .sources .rest_api import rest_api_resources
23
34
45def test_transformation_decorator () -> None :
@@ -12,3 +13,71 @@ def get_even_rows(dataset: dlt.Dataset):
1213 # get instance without license
1314 transformation = get_even_rows (dlt .dataset ("duckdb" , "mock_dataset" ))
1415 assert transformation .name == "get_even_rows"
16+
17+
18+ def test_missing_columns_bug () -> None :
19+ """Regression test: bare Identifier nodes were not properly type annotated in dlt.dataset.lineage.compute_columns_schema,
20+ causing success_count and success_rate to have UNKNOWN typetype and be excluded as incomplete columns by dlt.
21+ """
22+ import dlthub .data_quality as dq
23+
24+ @dlt .source
25+ def jaffleshop ():
26+ jaffle_rest_resources = rest_api_resources (
27+ {
28+ "client" : {
29+ "base_url" : "https://jaffle-shop.dlthub.com/api/v1" ,
30+ "paginator" : {"type" : "header_link" },
31+ },
32+ "resources" : [
33+ "customers" ,
34+ "products" ,
35+ "orders" ,
36+ ],
37+ "resource_defaults" : {
38+ "endpoint" : {
39+ "params" : {
40+ "start_date" : "2017-01-01" ,
41+ "end_date" : "2017-01-15" ,
42+ },
43+ },
44+ },
45+ }
46+ )
47+
48+ return jaffle_rest_resources
49+
50+ @dlt .hub .transformation
51+ def jaffle_checks (dataset : dlt .Dataset ) -> dlt .Relation :
52+ checks = {"orders" : [dq .checks .is_unique ("id" ), dq .checks .case ("subtotal > 0" )]}
53+ return dq .prepare_checks (dataset , checks = checks ) # type: ignore
54+
55+ pipeline = dlt .pipeline ("test_missing_columns" , destination = "duckdb" )
56+ pipeline .run ([jaffleshop ()])
57+ pipeline .run (jaffle_checks (pipeline .dataset ()))
58+
59+ expected_column_names = [
60+ "table_name" ,
61+ "check_qualified_name" ,
62+ "row_count" ,
63+ "success_count" , # was missing due to unqualified UNION columns
64+ "success_rate" , # was missing due to unqualified UNION columns
65+ ]
66+
67+ # direct query execution returns raw select output (no dlt columns)
68+ query = dq .prepare_checks (
69+ pipeline .dataset (),
70+ checks = {
71+ "orders" : [dq .checks .is_unique ("id" ), dq .checks .case ("subtotal > 0" )], # type: ignore
72+ },
73+ )
74+ assert query .arrow ().column_names == expected_column_names
75+
76+ # materialized table includes _dlt_load_id added by pipeline
77+ with pipeline .sql_client () as client :
78+ with client .execute_query (
79+ f"SELECT * FROM { pipeline .pipeline_name } .{ pipeline .dataset_name } .jaffle_checks"
80+ ) as cursor :
81+ df = cursor .df ()
82+ columns = list (df .columns )
83+ assert columns == expected_column_names + ["_dlt_load_id" ]
0 commit comments