Skip to content

Commit 838522a

Browse files
committed
Merge remote-tracking branch 'private/release/0.10.0'
2 parents 82c61bf + f5b8e51 commit 838522a

17 files changed

+387
-150
lines changed

LICENSE

+1-1
Original file line numberDiff line numberDiff line change
@@ -198,4 +198,4 @@
198198
distributed under the License is distributed on an "AS IS" BASIS,
199199
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200200
See the License for the specific language governing permissions and
201-
limitations under the License.
201+
limitations under the License.

NOTICE

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,4 +11,4 @@ Unless required by applicable law or agreed to in writing, software
1111
distributed under the License is distributed on an "AS IS" BASIS,
1212
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1313
See the License for the specific language governing permissions and
14-
limitations under the License.
14+
limitations under the License.

dbt_project.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
name: automate_dv
2-
version: 0.9.7
2+
version: 0.10.0
33
require-dbt-version: [">=1.0.0", "<2.0.0"]
44
config-version: 2
55

+89
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
/*
2+
* Copyright (c) Business Thinking Ltd. 2019-2023
3+
* This software includes code developed by the AutomateDV (f.k.a dbtvault) Team at Business Thinking Ltd. Trading as Datavault
4+
*/
5+
6+
7+
{% macro wrap_warning(warning_message) %}
8+
9+
{%- set new_message = [] -%}
10+
{%- set length_list = [] -%}
11+
12+
{%- for ln in warning_message.split('\n') -%}
13+
{%- do new_message.append((ln | trim)) -%}
14+
{%- do length_list.append((ln | length)) -%}
15+
{%- endfor -%}
16+
17+
{%- set max_line_length = length_list | max -%}
18+
{%- set padding_length = (max_line_length - 7) // 2 -%}
19+
20+
{%- set border = modules.itertools.repeat('=', padding_length) | join ('') ~ 'WARNING' ~ modules.itertools.repeat('=', padding_length) | join ('') -%}
21+
22+
{%- set wrapped_message = '\n' ~ border ~ '\n' ~ new_message | join('\n') ~ '\n' ~ border -%}
23+
24+
{%- do return(wrapped_message) -%}
25+
26+
{% endmacro %}
27+
28+
29+
{%- macro datepart_too_small_error(period) -%}
30+
31+
{%- set message -%}
32+
This datepart ({{ period }}) is too small and not recommended, consider using a different datepart value (e.g. day) or rank column.
33+
'vault_insert_by_x' materialisations are intended for experimental or testing purposes only. They are not intended for use in production.
34+
35+
Please see: https://automate-dv.readthedocs.io/en/latest/materialisations/
36+
{%- endset -%}
37+
38+
{%- if execute -%}
39+
{{- exceptions.raise_compiler_error(message) -}}
40+
{%- endif -%}
41+
42+
{%- endmacro -%}
43+
44+
45+
{%- macro datepart_not_recommended_warning(period) -%}
46+
47+
{%- set message -%}
48+
This datepart ({{ period }}) is too small and not recommended, consider using a different datepart value (e.g. day) or rank column.
49+
'vault_insert_by_x' materialisations are intended for experimental or testing purposes only. They are not intended for use in production.
50+
51+
Please see: https://automate-dv.readthedocs.io/en/latest/materialisations/
52+
{%- endset -%}
53+
54+
{%- if execute -%}
55+
{{- exceptions.warn(automate_dv.wrap_warning(message)) -}}
56+
{%- endif -%}
57+
58+
{%- endmacro -%}
59+
60+
61+
{%- macro max_iterations_error() -%}
62+
63+
{%- set message -%}
64+
Max iterations is 100,000. Consider using a different datepart value (e.g. day), rank column or loading data for a shorter time period.
65+
'vault_insert_by_x' materialisations are intended for experimental or testing purposes only. They are not intended for use in production.
66+
67+
Please see: https://automate-dv.readthedocs.io/en/latest/materialisations/
68+
{%- endset %}
69+
70+
{%- if execute -%}
71+
{{- exceptions.raise_compiler_error(message) -}}
72+
{%- endif -%}
73+
74+
{%- endmacro -%}
75+
76+
77+
{%- macro experimental_not_recommended_warning(func_name) -%}
78+
79+
{%- set message -%}
80+
This functionality ({{ func_name }}) is intended for experimental or testing purposes only.
81+
Its behavior, reliability, and performance have not been thoroughly vetted for production environments.
82+
Using this functionality in a live production setting may result in unpredictable outcomes, data loss, or system instability.
83+
{%- endset -%}
84+
85+
{%- if execute -%}
86+
{{- exceptions.warn(automate_dv.wrap_warning(message)) -}}
87+
{%- endif -%}
88+
89+
{%- endmacro -%}

macros/materialisations/period_mat_helpers/check_datediff.sql

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
period=period) %}
1212

1313
{%- if num_periods > 100000 -%}
14-
{{ automate_dv.sqlserver_max_iterations_error() }}
14+
{{ automate_dv.max_iterations_error() }}
1515
{%- endif -%}
1616

1717
{% do return(num_periods) %}

macros/materialisations/period_mat_helpers/get_period_boundaries.sql

+1-1
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@
8484

8585
{% macro sqlserver__get_period_boundaries(target_relation, timestamp_field, start_date, stop_date, period) -%}
8686
{%- if period is in ['microsecond', 'millisecond', 'second'] -%}
87-
{{ automate_dv.sqlserver_datepart_too_small_error(period=period) }}
87+
{{ automate_dv.datepart_too_small_error(period=period) }}
8888
{%- endif -%}
8989

9090
{# MSSQL cannot CAST datetime2 strings with more than 7 decimal places #}

macros/materialisations/shared_helpers.sql

+3-2
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,9 @@
88
{%- if model_sql.find(placeholder) == -1 -%}
99
{%- set error_message -%}
1010
Model '{{ model.unique_id }}' does not include the required string '{{ placeholder }}' in its sql
11-
{%- endset -%}
12-
{{- exceptions.raise_compiler_error(error_message) -}}
11+
{%- endset -%}
12+
13+
{{- exceptions.raise_compiler_error(error_message) -}}
1314
{%- endif -%}
1415

1516
{%- endmacro -%}

macros/materialisations/vault_insert_by_period_materialization.sql

+14-20
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,23 @@
77

88
{%- set full_refresh_mode = (should_full_refresh()) -%}
99

10+
{%- set period = config.get('period', default='day') -%}
11+
12+
{#- Raise the errors/warnings in this order so that we do not get both -#}
13+
{%- if period == 'microsecond' -%}
14+
{{ automate_dv.datepart_too_small_error(period=period) }}
15+
{%- elif period is in ['millisecond', 'second', 'minute', 'hour'] -%}
16+
{{ automate_dv.datepart_not_recommended_warning(period=period) }}
17+
{%- endif -%}
18+
19+
{{ automate_dv.experimental_not_recommended_warning(func_name='vault_insert_by_period') }}
20+
1021
{% if target.type == "sqlserver" %}
1122
{%- set target_relation = this.incorporate(type='table') -%}
1223
{% else %}
1324
{%- set target_relation = this -%}
1425
{% endif %}
26+
1527
{%- set existing_relation = load_relation(this) -%}
1628
{%- set tmp_relation = make_temp_relation(target_relation) -%}
1729

@@ -20,25 +32,6 @@
2032

2133
{%- set start_stop_dates = automate_dv.get_start_stop_dates(timestamp_field, date_source_models) | as_native -%}
2234

23-
{%- set period = config.get('period', default='day') -%}
24-
{%- if period == 'microsecond' -%}
25-
{%- set error_message -%}
26-
'This datepart ({{ period }}) is too small and cannot be used for this purpose, consider using a different datepart value (e.g. day).
27-
Vault_insert_by materialisations are not intended for this purpose,
28-
please see https://automate-dv.readthedocs.io/en/latest/materialisations/'
29-
{%- endset -%}
30-
31-
{{- exceptions.raise_compiler_error(error_message) -}}
32-
{%- elif period is in ['millisecond', 'second', 'minute', 'hour'] -%}
33-
{%- set warn_message -%}
34-
'WARNING: The use of this datepart ({{ period }}) is not recommended, consider using a different datepart value (e.g. day).
35-
Vault_insert_by materialisations are not intended for this purpose,
36-
please see https://automate-dv.readthedocs.io/en/latest/materialisations/'
37-
{%- endset -%}
38-
39-
{{- exceptions.warn(warn_message) -}}
40-
{%- endif -%}
41-
4235
{%- set to_drop = [] -%}
4336

4437
{%- do automate_dv.check_placeholder(sql) -%}
@@ -76,9 +69,10 @@
7669
start_timestamp=start_stop_dates.start_date,
7770
stop_timestamp=start_stop_dates.stop_date,
7871
offset=0, period=period) %}
79-
{% if target.type == "postgres" %}
72+
{% if target.type in ['postgres', 'sqlserver'] %}
8073
{{ automate_dv.drop_temporary_special(target_relation) }}
8174
{% endif %}
75+
8276
{% set build_sql = create_table_as(False, target_relation, filtered_sql) %}
8377
{% else %}
8478
{% set period_boundaries = automate_dv.get_period_boundaries(target_relation,

macros/materialisations/vault_insert_by_rank_materialization.sql

+76-79
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
{% else %}
1313
{%- set target_relation = this -%}
1414
{% endif %}
15+
1516
{%- set existing_relation = load_relation(this) -%}
1617
{%- set tmp_relation = make_temp_relation(target_relation) -%}
1718

@@ -20,6 +21,14 @@
2021

2122
{%- set min_max_ranks = automate_dv.get_min_max_ranks(rank_column, rank_source_models) | as_native -%}
2223

24+
{#- Raise the errors/warnings in this order so that we do not get both -#}
25+
{% if min_max_ranks.max_rank | int > 100000 %}
26+
{{ automate_dv.max_iterations_error(func_name='vault_insert_by_rank') }}
27+
{% endif %}
28+
29+
{{ automate_dv.experimental_not_recommended_warning(func_name='vault_insert_by_rank') }}
30+
31+
2332
{%- set to_drop = [] -%}
2433

2534
{%- do automate_dv.check_placeholder(sql, "__RANK_FILTER__") -%}
@@ -46,92 +55,80 @@
4655

4756
{% elif full_refresh_mode %}
4857
{% set filtered_sql = automate_dv.replace_placeholder_with_rank_filter(sql, rank_column, 1) %}
49-
{% if target.type == "postgres" %}
58+
{% if target.type in ['postgres', 'sqlserver'] %}
5059
{{ automate_dv.drop_temporary_special(target_relation) }}
5160
{% endif %}
5261
{% set build_sql = create_table_as(False, target_relation, filtered_sql) %}
5362
{% else %}
5463

55-
{% if min_max_ranks.max_rank | int > 100000 %}
56-
{%- set error_message -%}
57-
'Max iterations is 100,000. Consider using a different rank column
58-
or loading a smaller amount of data.
59-
vault_insert_by materialisations are not intended for this purpose,
60-
please see https://automate-dv.readthedocs.io/en/latest/materialisations/'
61-
{%- endset -%}
62-
63-
{{- exceptions.raise_compiler_error(error_message) -}}
64-
{% else %}
65-
{% set target_columns = adapter.get_columns_in_relation(target_relation) %}
66-
{%- set target_cols_csv = target_columns | map(attribute='quoted') | join(', ') -%}
67-
{%- set loop_vars = {'sum_rows_inserted': 0} -%}
68-
69-
{% for i in range(min_max_ranks.max_rank | int ) -%}
70-
71-
{%- set iteration_number = i + 1 -%}
72-
73-
{%- set filtered_sql = automate_dv.replace_placeholder_with_rank_filter(sql, rank_column, iteration_number) -%}
74-
75-
{{ dbt_utils.log_info("Running for {} {} of {} on column '{}' [{}]".format('rank', iteration_number, min_max_ranks.max_rank, rank_column, model.unique_id)) }}
76-
77-
{% set tmp_relation = make_temp_relation(target_relation) %}
78-
79-
{# This call statement drops and then creates a temporary table #}
80-
{# but MSSQL will fail to drop any temporary table created by a previous loop iteration #}
81-
{# See MSSQL note and drop code below #}
82-
{% call statement() -%}
83-
{{ create_table_as(True, tmp_relation, filtered_sql) }}
84-
{%- endcall %}
85-
86-
{{ adapter.expand_target_column_types(from_relation=tmp_relation,
87-
to_relation=target_relation) }}
88-
89-
{%- set insert_query_name = 'main-' ~ i -%}
90-
{% call statement(insert_query_name, fetch_result=True) -%}
91-
INSERT INTO {{ target_relation }} ({{ target_cols_csv }})
92-
(
93-
SELECT {{ target_cols_csv }}
94-
FROM {{ tmp_relation.include(schema=True) }}
95-
);
96-
{%- endcall %}
97-
98-
{% set result = load_result(insert_query_name) %}
99-
{% if 'response' in result.keys() %} {# added in v0.19.0 #}
100-
{# Investigate for Databricks #}
101-
{%- if result['response']['rows_affected'] == None %}
102-
{% set rows_inserted = 0 %}
103-
{%- else %}
104-
{% set rows_inserted = result['response']['rows_affected'] %}
105-
{%- endif %}
106-
107-
{% else %} {# older versions #}
108-
{% set rows_inserted = result['status'].split(" ")[2] | int %}
109-
{% endif %}
110-
111-
{%- set sum_rows_inserted = loop_vars['sum_rows_inserted'] + rows_inserted -%}
112-
{%- do loop_vars.update({'sum_rows_inserted': sum_rows_inserted}) %}
113-
114-
{{ dbt_utils.log_info("Ran for {} {} of {}; {} records inserted [{}]".format('rank', iteration_number,
115-
min_max_ranks.max_rank,
116-
rows_inserted,
117-
model.unique_id)) }}
118-
119-
{# In databricks and sqlserver a temporary view/table can only be dropped by #}
120-
{# the connection or session that created it so drop it now before the commit below closes this session #} model.unique_id)) }}
121-
{% if target.type in ['databricks', 'sqlserver'] %}
122-
{{ automate_dv.drop_temporary_special(tmp_relation) }}
123-
{% else %}
124-
{% do to_drop.append(tmp_relation) %}
125-
{% endif %}
126-
127-
{% do adapter.commit() %}
128-
129-
{% endfor %}
130-
{% call noop_statement('main', "INSERT {}".format(loop_vars['sum_rows_inserted']) ) -%}
131-
{{ filtered_sql }}
64+
{% set target_columns = adapter.get_columns_in_relation(target_relation) %}
65+
{%- set target_cols_csv = target_columns | map(attribute='quoted') | join(', ') -%}
66+
{%- set loop_vars = {'sum_rows_inserted': 0} -%}
67+
68+
{% for i in range(min_max_ranks.max_rank | int ) -%}
69+
70+
{%- set iteration_number = i + 1 -%}
71+
72+
{%- set filtered_sql = automate_dv.replace_placeholder_with_rank_filter(sql, rank_column, iteration_number) -%}
73+
74+
{{ dbt_utils.log_info("Running for {} {} of {} on column '{}' [{}]".format('rank', iteration_number, min_max_ranks.max_rank, rank_column, model.unique_id)) }}
75+
76+
{% set tmp_relation = make_temp_relation(target_relation) %}
77+
78+
{# This call statement drops and then creates a temporary table #}
79+
{# but MSSQL will fail to drop any temporary table created by a previous loop iteration #}
80+
{# See MSSQL note and drop code below #}
81+
{% call statement() -%}
82+
{{ create_table_as(True, tmp_relation, filtered_sql) }}
13283
{%- endcall %}
133-
{% endif %}
13484

85+
{{ adapter.expand_target_column_types(from_relation=tmp_relation,
86+
to_relation=target_relation) }}
87+
88+
{%- set insert_query_name = 'main-' ~ i -%}
89+
{% call statement(insert_query_name, fetch_result=True) -%}
90+
INSERT INTO {{ target_relation }} ({{ target_cols_csv }})
91+
(
92+
SELECT {{ target_cols_csv }}
93+
FROM {{ tmp_relation.include(schema=True) }}
94+
);
95+
{%- endcall %}
96+
97+
{% set result = load_result(insert_query_name) %}
98+
{% if 'response' in result.keys() %} {# added in v0.19.0 #}
99+
{# Investigate for Databricks #}
100+
{%- if result['response']['rows_affected'] == None %}
101+
{% set rows_inserted = 0 %}
102+
{%- else %}
103+
{% set rows_inserted = result['response']['rows_affected'] %}
104+
{%- endif %}
105+
106+
{% else %} {# older versions #}
107+
{% set rows_inserted = result['status'].split(" ")[2] | int %}
108+
{% endif %}
109+
110+
{%- set sum_rows_inserted = loop_vars['sum_rows_inserted'] + rows_inserted -%}
111+
{%- do loop_vars.update({'sum_rows_inserted': sum_rows_inserted}) %}
112+
113+
{{ dbt_utils.log_info("Ran for {} {} of {}; {} records inserted [{}]".format('rank', iteration_number,
114+
min_max_ranks.max_rank,
115+
rows_inserted,
116+
model.unique_id)) }}
117+
118+
{# In databricks and sqlserver a temporary view/table can only be dropped by #}
119+
{# the connection or session that created it so drop it now before the commit below closes this session #} model.unique_id)) }}
120+
{% if target.type in ['databricks', 'sqlserver'] %}
121+
{{ automate_dv.drop_temporary_special(tmp_relation) }}
122+
{% else %}
123+
{% do to_drop.append(tmp_relation) %}
124+
{% endif %}
125+
126+
{% do adapter.commit() %}
127+
128+
{% endfor %}
129+
{% call noop_statement('main', "INSERT {}".format(loop_vars['sum_rows_inserted']) ) -%}
130+
{{ filtered_sql }}
131+
{%- endcall %}
135132
{% endif %}
136133

137134
{% if build_sql is defined %}

0 commit comments

Comments
 (0)