Skip to content

Commit 28822f1

Browse files
authored
Merge pull request #47 from dbt-msft/shim_dbt-expectations_macros
Shim dbt-expectations macros
2 parents c162fba + bc7fe8f commit 28822f1

15 files changed

+620
-20
lines changed

.circleci/config.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@ workflows:
146146
context: DBT_SYNAPSE_PROFILE
147147
- integration-dbt-utils-azuresql: *dbt-context
148148
- integration-dbt-audit-helper-azuresql: *dbt-context
149-
# - integration-dbt-expectations-azuresql: *dbt-context
149+
- integration-dbt-expectations-azuresql: *dbt-context
150150
- integration-dbt-date-azuresql: *dbt-context
151151
- integration-dbt-utils-synapse: &syn-step
152152
<<: *dbt-context
@@ -156,12 +156,12 @@ workflows:
156156
<<: *dbt-context
157157
requires:
158158
- start-synapse
159-
# - integration-dbt-expectations-synapse: *syn-step
159+
- integration-dbt-expectations-synapse: *syn-step
160160
- integration-dbt-date-synapse: *syn-step
161161
- pause-synapse:
162162
<<: *dbt-context
163163
requires:
164164
- integration-dbt-utils-synapse
165165
- integration-dbt-audit-helper-synapse
166-
# - integration-dbt-expectations-synapse
166+
- integration-dbt-expectations-synapse
167167
- integration-dbt-date-synapse

.gitmodules

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,5 @@
1212
branch = main
1313
[submodule "dbt-audit-helper"]
1414
path = dbt-audit-helper
15-
url = https://github.com/alieus/dbt-audit-helper
16-
branch = adapter_dispatch
17-
15+
url = https://github.com/fishtown-analytics/dbt-audit-helper
16+
branch = master

dbt-audit-helper

dbt-expectations

Submodule dbt-expectations updated 44 files

dbt-utils

integration_tests/dbt_expectations/dbt_project.yml

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,5 +24,15 @@ vars:
2424

2525
models:
2626
dbt_expectations_integration_tests:
27-
data_test_factored:
28-
+materialized: table
27+
schema_tests:
28+
data_test_factored:
29+
+materialized: table
30+
emails: &disabled #TODO
31+
+enabled: false
32+
timeseries_data: *disabled
33+
# anders's weird NoneType error
34+
# (see https://github.com/calogica/dbt-expectations/pull/63)
35+
data_test: *disabled
36+
# Need to implement for synapse
37+
data_test_factored: *disabled
38+
timeseries_data_extended: *disabled
Lines changed: 99 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,108 @@
11
{%- macro sqlserver__get_select(model, expression, row_condition, group_by) %}
22
select
3-
{% for g in group_by -%}
4-
{{ g }} as col_{{ loop.index }},
5-
{% endfor -%}
3+
{# {%- if group_by %} #}
4+
{% for g in group_by or [] -%}
5+
{{ g }} as col_{{ loop.index }},
6+
{% endfor -%}
7+
{# {% endif %} #}
68
{{ expression }} as expression
79
from
810
{{ model }}
911
{%- if row_condition %}
1012
where
1113
{{ row_condition }}
1214
{% endif %}
13-
group by
14-
{% for g in group_by -%}
15-
{{ g }}{% if not loop.last %}, {% endif %}
16-
{% endfor %}
17-
{% endmacro -%}
15+
{%- if group_by %}
16+
{%- if group_by|length > 1 or group_by[0] != "'col'" %}
17+
group by
18+
{% for g in group_by -%}
19+
{{ g }}{% if not loop.last %},{% endif %}
20+
{% endfor %}
21+
{% endif %}
22+
{% endif %}
23+
{% endmacro -%}
24+
25+
{%- macro sqlserver__test_equal_expression(model, expression,
26+
compare_model,
27+
compare_expression,
28+
group_by,
29+
compare_group_by,
30+
row_condition,
31+
compare_row_condition,
32+
tolerance,
33+
tolerance_percent,
34+
return_difference) -%}
35+
36+
{%- set compare_model = model if not compare_model else compare_model -%}
37+
{%- set compare_expression = expression if not compare_expression else compare_expression -%}
38+
{%- set compare_row_condition = row_condition if not compare_row_condition else compare_row_condition -%}
39+
{%- set compare_group_by = group_by if not compare_group_by else compare_group_by -%}
40+
41+
{%- set n_cols = group_by|length if group_by else 0 %}
42+
with a as (
43+
{{ dbt_expectations.get_select(model, expression, row_condition, group_by) }}
44+
),
45+
b as (
46+
{{ dbt_expectations.get_select(compare_model, compare_expression, compare_row_condition, compare_group_by) }}
47+
),
48+
final as (
49+
50+
select
51+
{% for i in range(1, n_cols + 1) -%}
52+
coalesce(a.col_{{ i }}, b.col_{{ i }}) as col_{{ i }},
53+
{% endfor %}
54+
a.expression,
55+
b.expression as compare_expression,
56+
abs(coalesce(a.expression, 0) - coalesce(b.expression, 0)) as expression_difference,
57+
abs(coalesce(a.expression, 0) - coalesce(b.expression, 0))/
58+
nullif(a.expression, 0) as expression_difference_percent
59+
from
60+
{% if n_cols > 0 %}
61+
a
62+
full outer join
63+
b on
64+
{% for i in range(1, n_cols + 1) -%}
65+
a.col_{{ i }} = b.col_{{ i }} {% if not loop.last %}and{% endif %}
66+
{% endfor -%}
67+
{% else %}
68+
a cross join b
69+
{% endif %}
70+
)
71+
-- DEBUG:
72+
-- select * from final
73+
select
74+
{% if return_difference %}
75+
coalesce(sum(expression_difference), 0)
76+
{% else %}
77+
count(*)
78+
{% endif %}
79+
from final
80+
where
81+
{% if tolerance_percent %}
82+
expression_difference_percent > {{ tolerance_percent }}
83+
{% else %}
84+
expression_difference > {{ tolerance }}
85+
{% endif %}
86+
{%- endmacro -%}
87+
88+
{%- macro synapse__test_equal_expression(model, expression,
89+
compare_model,
90+
compare_expression,
91+
group_by,
92+
compare_group_by,
93+
row_condition,
94+
compare_row_condition,
95+
tolerance,
96+
tolerance_percent,
97+
return_difference) -%}
98+
{% do return( tsql_utils.sqlserver__test_equal_expression(model, expression,
99+
compare_model,
100+
compare_expression,
101+
group_by,
102+
compare_group_by,
103+
row_condition,
104+
compare_row_condition,
105+
tolerance,
106+
tolerance_percent,
107+
return_difference)) -%}
108+
{%- endmacro -%}
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
{% macro sqlserver__expression_is_true(model, expression, test_condition, group_by_columns, row_condition) %}
2+
3+
{% if test_condition == "= true" %}
4+
{% set test_condition = "= 1" %}
5+
{% endif %}
6+
7+
8+
with grouped_expression as (
9+
10+
select
11+
{% if group_by_columns %}
12+
{% for group_by_column in group_by_columns -%}
13+
{{ group_by_column }} as col_{{ loop.index }},
14+
{% endfor -%}
15+
{% endif %}
16+
case when {{ expression }} then 1 else 0 end as expression
17+
from {{ model }}
18+
{%- if row_condition %}
19+
where
20+
{{ row_condition }}
21+
{% endif %}
22+
{% if group_by_columns %}
23+
group by
24+
{% for group_by_column in group_by_columns -%}
25+
{{ group_by_column }}{% if not loop.last %},{% endif %}
26+
{% endfor %}
27+
{% endif %}
28+
29+
),
30+
validation_errors as (
31+
32+
select
33+
*
34+
from
35+
grouped_expression
36+
where
37+
not(expression {{ test_condition }})
38+
39+
)
40+
41+
select count(*)
42+
from validation_errors
43+
44+
{% endmacro %}
45+
46+
{% macro synapse__expression_is_true(model, expression, test_condition, group_by_columns, row_condition) %}
47+
{% do return( tsql_utils.sqlserver__expression_is_true(model, expression, test_condition, group_by_columns, row_condition)) %}
48+
{% endmacro %}
Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
{% macro sqlserver__test_expect_column_most_common_value_to_be_in_set(model, column_name,
2+
value_set,
3+
top_n,
4+
quote_values=False,
5+
data_type="decimal",
6+
row_condition=None
7+
) -%}
8+
9+
with value_counts as (
10+
11+
select
12+
{% if quote_values -%}
13+
{{ column_name }}
14+
{%- else -%}
15+
cast({{ column_name }} as {{ data_type }})
16+
{%- endif %} as value_field,
17+
count(*) as value_count
18+
19+
from {{ model }}
20+
{% if row_condition %}
21+
where {{ row_condition }}
22+
{% endif %}
23+
24+
group by {% if quote_values -%}
25+
{{ column_name }}
26+
{%- else -%}
27+
cast({{ column_name }} as {{ data_type }})
28+
{%- endif %}
29+
30+
),
31+
value_counts_ranked as (
32+
33+
select
34+
*,
35+
row_number() over(order by value_count desc) as value_count_rank
36+
from
37+
value_counts
38+
39+
),
40+
value_count_top_n as (
41+
42+
select
43+
value_field
44+
from
45+
value_counts_ranked
46+
where
47+
value_count_rank = {{ top_n }}
48+
49+
),
50+
set_values as (
51+
52+
{% for value in value_set -%}
53+
select
54+
{% if quote_values -%}
55+
'{{ value }}'
56+
{%- else -%}
57+
cast({{ value }} as {{ data_type }})
58+
{%- endif %} as value_field
59+
{% if not loop.last %}union all{% endif %}
60+
{% endfor %}
61+
62+
),
63+
unique_set_values as (
64+
65+
select distinct value_field
66+
from
67+
set_values
68+
69+
),
70+
validation_errors as (
71+
-- values from the model that are not in the set
72+
select
73+
value_field
74+
from
75+
value_count_top_n
76+
where
77+
value_field not in (select value_field from unique_set_values)
78+
79+
)
80+
81+
select count(*) as validation_errors
82+
from validation_errors
83+
84+
{% endmacro %}
85+
86+
{% macro synapse__test_expect_column_most_common_value_to_be_in_set(model, column_name,
87+
value_set,
88+
top_n,
89+
quote_values,
90+
data_type,
91+
row_condition
92+
) -%}
93+
{% do return( tsql_utils.sqlserver__test_expect_column_most_common_value_to_be_in_set(model, column_name,
94+
value_set,
95+
top_n,
96+
quote_values=False,
97+
data_type="decimal",
98+
row_condition=None
99+
)) -%}
100+
{% endmacro %}

0 commit comments

Comments
 (0)