Skip to content

Commit bd7408b

Browse files
committed
Merge branch 'main'
2 parents bd3c5c5 + e9f2ac8 commit bd7408b

14 files changed

+223
-118
lines changed

dbt_project.yml

+1-10
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
name: dbtvault
2-
version: 0.9.2
2+
version: 0.9.5
33
require-dbt-version: [">=1.0.0", "<2.0.0"]
44
config-version: 2
55

@@ -14,12 +14,3 @@ target-path: "target"
1414
clean-targets:
1515
- "target"
1616
- "dbt_packages"
17-
18-
vars:
19-
hash: MD5
20-
hash_content_casing: 'UPPER' # Default UPPER, alternatively DISABLED
21-
null_key_required: '-1' # Default -1, allows user to configure
22-
null_key_optional: '-2' # Default -2, allows user to configure
23-
enable_ghost_records: false #default false to ghost records are enabled
24-
system_record_value: 'DBTVAULT_SYSTEM' #Default DBTVAULT_SYSTEM, allows user to configure
25-

macros/internal/helpers/logging/log_relation_sources.sql

+2-2
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
{% macro default__log_relation_sources(relation, source_count) %}
1111

12-
{%- if execute -%}
12+
{%- if 'docs' not in invocation_args_dict['rpc_method'] and execute -%}
1313

1414
{%- do dbt_utils.log_info('Loading {} from {} source(s)'.format("{}.{}.{}".format(relation.database, relation.schema, relation.identifier),
1515
source_count)) -%}
@@ -18,7 +18,7 @@
1818

1919
{% macro databricks__log_relation_sources(relation, source_count) %}
2020

21-
{%- if execute -%}
21+
{%- if 'docs' not in invocation_args_dict['rpc_method'] and execute -%}
2222

2323
{%- do dbt_utils.log_info('Loading {} from {} source(s)'.format("{}.{}".format(relation.schema, relation.identifier),
2424
source_count)) -%}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
/*
2+
* Copyright (c) Business Thinking Ltd. 2019-2023
3+
* This software includes code developed by the dbtvault Team at Business Thinking Ltd. Trading as Datavault
4+
*/
5+
6+
{%- macro check_num_periods(start_date, stop_date, period) -%}
7+
8+
{% set num_periods = adapter.dispatch('check_num_periods',
9+
'dbtvault')(start_date=start_date,
10+
stop_date=stop_date,
11+
period=period) %}
12+
13+
{%- if num_periods > 100000 -%}
14+
{%- set error_message -%}
15+
'Max iterations is 100,000. Consider using a different datepart value (e.g. day)
16+
or loading data for a shorter time period.
17+
vault_insert_by materialisations are not intended for this purpose,
18+
please see https://dbtvault.readthedocs.io/en/latest/materialisations/'
19+
{%- endset -%}
20+
21+
{{- exceptions.raise_compiler_error(error_message) -}}
22+
{%- endif -%}
23+
24+
{% do return(num_periods) %}
25+
26+
{%- endmacro %}
27+
28+
{% macro default__check_num_periods(start_date, stop_date, period) %}
29+
30+
{% set num_periods_check_sql %}
31+
SELECT {{ datediff('start_timestamp', 'stop_timestamp', period) }} AS NUM_PERIODS
32+
FROM
33+
(SELECT CAST('{{ start_date }}' AS {{ dbt.type_timestamp() }}) AS start_timestamp,
34+
CAST(NULLIF('{{ stop_date | lower }}', 'none') AS {{ dbt.type_timestamp() }}) AS stop_timestamp)
35+
{% endset %}
36+
{% set num_periods_dict = dbtvault.get_query_results_as_dict(num_periods_check_sql) %}
37+
{% set num_periods = num_periods_dict['NUM_PERIODS'][0] | int %}
38+
39+
{% do return(num_periods) %}
40+
41+
{% endmacro %}
42+
43+
{% macro sqlserver__check_num_periods(start_date, stop_date, period) %}
44+
45+
{% set num_periods_check_sql %}
46+
SELECT DATEDIFF_BIG({{ period }}, CAST('{{ start_date }}' AS DATETIME2),
47+
CAST(NULLIF('{{ stop_date | lower }}', 'none') AS DATETIME2)) AS NUM_PERIODS
48+
{% endset %}
49+
{% set num_periods_dict = dbtvault.get_query_results_as_dict(num_periods_check_sql) %}
50+
{% set num_periods = num_periods_dict['NUM_PERIODS'][0] | int %}
51+
52+
{% do return(num_periods) %}
53+
54+
{% endmacro %}

macros/materialisations/period_mat_helpers/get_period_boundaries.sql

+2-2
Original file line numberDiff line numberDiff line change
@@ -92,8 +92,8 @@
9292
WITH period_data AS (
9393
SELECT
9494
CAST(COALESCE(MAX({{ timestamp_field }}), CAST('{{ start_date }}' AS DATETIME2)) AS DATETIME2) AS start_timestamp,
95-
COALESCE({{ dbtvault.dateadd('millisecond', 86399999, from_date_or_timestamp) }},
96-
{{ current_timestamp() }} ) AS stop_timestamp
95+
CAST(COALESCE({{ dbtvault.dateadd('millisecond', 86399999, from_date_or_timestamp) }},
96+
{{ current_timestamp() }} ) AS DATETIME2) AS stop_timestamp
9797
FROM {{ target_relation }}
9898
)
9999
SELECT

macros/materialisations/vault_insert_by_period_materialization.sql

+2
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@
2929

3030
{%- do dbtvault.check_placeholder(sql) -%}
3131

32+
{%- do dbtvault.check_num_periods(start_stop_dates.start_date, start_stop_dates.stop_date, period) -%}
33+
3234
{{ run_hooks(pre_hooks, inside_transaction=False) }}
3335

3436
-- `BEGIN` happens here:

macros/materialisations/vault_insert_by_rank_materialization.sql

+78-67
Original file line numberDiff line numberDiff line change
@@ -53,74 +53,85 @@
5353
{% set build_sql = create_table_as(False, target_relation, filtered_sql) %}
5454
{% else %}
5555

56-
{% set target_columns = adapter.get_columns_in_relation(target_relation) %}
57-
{%- set target_cols_csv = target_columns | map(attribute='quoted') | join(', ') -%}
58-
{%- set loop_vars = {'sum_rows_inserted': 0} -%}
59-
60-
{% for i in range(min_max_ranks.max_rank | int ) -%}
61-
62-
{%- set iteration_number = i + 1 -%}
63-
64-
{%- set filtered_sql = dbtvault.replace_placeholder_with_rank_filter(sql, rank_column, iteration_number) -%}
65-
66-
{{ dbt_utils.log_info("Running for {} {} of {} on column '{}' [{}]".format('rank', iteration_number, min_max_ranks.max_rank, rank_column, model.unique_id)) }}
67-
68-
{% set tmp_relation = make_temp_relation(target_relation) %}
69-
70-
{# This call statement drops and then creates a temporary table #}
71-
{# but MSSQL will fail to drop any temporary table created by a previous loop iteration #}
72-
{# See MSSQL note and drop code below #}
73-
{% call statement() -%}
74-
{{ create_table_as(True, tmp_relation, filtered_sql) }}
56+
{% if min_max_ranks.max_rank | int > 100000 %}
57+
{%- set error_message -%}
58+
'Max iterations is 100,000. Consider using a different rank column
59+
or loading a smaller amount of data.
60+
vault_insert_by materialisations are not intended for this purpose,
61+
please see https://dbtvault.readthedocs.io/en/latest/materialisations/'
62+
{%- endset -%}
63+
64+
{{- exceptions.raise_compiler_error(error_message) -}}
65+
{% else %}
66+
{% set target_columns = adapter.get_columns_in_relation(target_relation) %}
67+
{%- set target_cols_csv = target_columns | map(attribute='quoted') | join(', ') -%}
68+
{%- set loop_vars = {'sum_rows_inserted': 0} -%}
69+
70+
{% for i in range(min_max_ranks.max_rank | int ) -%}
71+
72+
{%- set iteration_number = i + 1 -%}
73+
74+
{%- set filtered_sql = dbtvault.replace_placeholder_with_rank_filter(sql, rank_column, iteration_number) -%}
75+
76+
{{ dbt_utils.log_info("Running for {} {} of {} on column '{}' [{}]".format('rank', iteration_number, min_max_ranks.max_rank, rank_column, model.unique_id)) }}
77+
78+
{% set tmp_relation = make_temp_relation(target_relation) %}
79+
80+
{# This call statement drops and then creates a temporary table #}
81+
{# but MSSQL will fail to drop any temporary table created by a previous loop iteration #}
82+
{# See MSSQL note and drop code below #}
83+
{% call statement() -%}
84+
{{ create_table_as(True, tmp_relation, filtered_sql) }}
85+
{%- endcall %}
86+
87+
{{ adapter.expand_target_column_types(from_relation=tmp_relation,
88+
to_relation=target_relation) }}
89+
90+
{%- set insert_query_name = 'main-' ~ i -%}
91+
{% call statement(insert_query_name, fetch_result=True) -%}
92+
INSERT INTO {{ target_relation }} ({{ target_cols_csv }})
93+
(
94+
SELECT {{ target_cols_csv }}
95+
FROM {{ tmp_relation.include(schema=True) }}
96+
);
97+
{%- endcall %}
98+
99+
{% set result = load_result(insert_query_name) %}
100+
{% if 'response' in result.keys() %} {# added in v0.19.0 #}
101+
{# Investigate for Databricks #}
102+
{%- if result['response']['rows_affected'] == None %}
103+
{% set rows_inserted = 0 %}
104+
{%- else %}
105+
{% set rows_inserted = result['response']['rows_affected'] %}
106+
{%- endif %}
107+
108+
{% else %} {# older versions #}
109+
{% set rows_inserted = result['status'].split(" ")[2] | int %}
110+
{% endif %}
111+
112+
{%- set sum_rows_inserted = loop_vars['sum_rows_inserted'] + rows_inserted -%}
113+
{%- do loop_vars.update({'sum_rows_inserted': sum_rows_inserted}) %}
114+
115+
{{ dbt_utils.log_info("Ran for {} {} of {}; {} records inserted [{}]".format('rank', iteration_number,
116+
min_max_ranks.max_rank,
117+
rows_inserted,
118+
model.unique_id)) }}
119+
120+
{# In databricks and sqlserver a temporary view/table can only be dropped by #}
121+
{# the connection or session that created it so drop it now before the commit below closes this session #} model.unique_id)) }}
122+
{% if target.type in ['databricks', 'sqlserver'] %}
123+
{{ dbtvault.drop_temporary_special(tmp_relation) }}
124+
{% else %}
125+
{% do to_drop.append(tmp_relation) %}
126+
{% endif %}
127+
128+
{% do adapter.commit() %}
129+
130+
{% endfor %}
131+
{% call noop_statement('main', "INSERT {}".format(loop_vars['sum_rows_inserted']) ) -%}
132+
{{ filtered_sql }}
75133
{%- endcall %}
76-
77-
{{ adapter.expand_target_column_types(from_relation=tmp_relation,
78-
to_relation=target_relation) }}
79-
80-
{%- set insert_query_name = 'main-' ~ i -%}
81-
{% call statement(insert_query_name, fetch_result=True) -%}
82-
INSERT INTO {{ target_relation }} ({{ target_cols_csv }})
83-
(
84-
SELECT {{ target_cols_csv }}
85-
FROM {{ tmp_relation.include(schema=True) }}
86-
);
87-
{%- endcall %}
88-
89-
{% set result = load_result(insert_query_name) %}
90-
{% if 'response' in result.keys() %} {# added in v0.19.0 #}
91-
{# Investigate for Databricks #}
92-
{%- if result['response']['rows_affected'] == None %}
93-
{% set rows_inserted = 0 %}
94-
{%- else %}
95-
{% set rows_inserted = result['response']['rows_affected'] %}
96-
{%- endif %}
97-
98-
{% else %} {# older versions #}
99-
{% set rows_inserted = result['status'].split(" ")[2] | int %}
100-
{% endif %}
101-
102-
{%- set sum_rows_inserted = loop_vars['sum_rows_inserted'] + rows_inserted -%}
103-
{%- do loop_vars.update({'sum_rows_inserted': sum_rows_inserted}) %}
104-
105-
{{ dbt_utils.log_info("Ran for {} {} of {}; {} records inserted [{}]".format('rank', iteration_number,
106-
min_max_ranks.max_rank,
107-
rows_inserted,
108-
model.unique_id)) }}
109-
110-
{# In databricks and sqlserver a temporary view/table can only be dropped by #}
111-
{# the connection or session that created it so drop it now before the commit below closes this session #} model.unique_id)) }}
112-
{% if target.type in ['databricks', 'sqlserver'] %}
113-
{{ dbtvault.drop_temporary_special(tmp_relation) }}
114-
{% else %}
115-
{% do to_drop.append(tmp_relation) %}
116-
{% endif %}
117-
118-
{% do adapter.commit() %}
119-
120-
{% endfor %}
121-
{% call noop_statement('main', "INSERT {}".format(loop_vars['sum_rows_inserted']) ) -%}
122-
{{ filtered_sql }}
123-
{%- endcall %}
134+
{% endif %}
124135

125136
{% endif %}
126137

macros/supporting/data_types/type_string.sql

+2-2
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,9 @@
2121

2222
{%- macro databricks__type_string(is_hash=false, char_length=255) -%}
2323
{%- if is_hash -%}
24-
{%- if var('hash') | lower == 'md5' -%}
24+
{%- if var('hash', 'MD5') | lower == 'md5' -%}
2525
VARCHAR(16)
26-
{%- elif var('hash') | lower == 'sha' -%}
26+
{%- elif var('hash', 'MD5') | lower == 'sha' -%}
2727
VARCHAR(32)
2828
{%- endif -%}
2929
{%- else -%}

macros/supporting/ghost_records/binary_ghost.sql

+5-3
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,12 @@
2020

2121
{%- macro sqlserver__binary_ghost(alias, hash) -%}
2222
{%- if hash | lower == 'md5' -%}
23-
CAST(REPLICATE(CAST(CAST('0' AS tinyint) AS BINARY(16)), 16) AS BINARY(16)) AS {{ alias }}
23+
CAST(REPLICATE(CAST(CAST('0' AS tinyint) AS BINARY(16)), 16) AS BINARY(16))
2424
{%- elif hash | lower == 'sha' -%}
25-
CAST(REPLICATE(CAST(CAST('0' AS tinyint) AS BINARY(32)), 32) AS BINARY(32)) AS {{ alias }}
25+
CAST(REPLICATE(CAST(CAST('0' AS tinyint) AS BINARY(32)), 32) AS BINARY(32))
2626
{%- else -%}
27-
CAST(REPLICATE(CAST(CAST('0' AS tinyint) AS BINARY(16)), 16) AS BINARY(16)) AS {{ alias }}
27+
CAST(REPLICATE(CAST(CAST('0' AS tinyint) AS BINARY(16)), 16) AS BINARY(16))
2828
{%- endif -%}
29+
30+
{%- if alias %} AS {{ alias }} {%- endif -%}
2931
{%- endmacro -%}

macros/supporting/ghost_records/create_ghost_record.sql

+1-5
Original file line numberDiff line numberDiff line change
@@ -40,11 +40,7 @@
4040
{%- do col_definitions.append(col_sql) -%}
4141

4242
{%- elif ((col_name | lower) == (src_eff | lower)) or ((col_name | lower) == (src_ldts | lower))-%}
43-
{%- if (col.dtype | lower) == 'date' -%}
44-
{%- set col_sql = dbtvault.cast_date('1900-01-01', as_string=true, datetime=false, alias=col_name)-%}
45-
{%- else -%}
46-
{%- set col_sql = dbtvault.cast_date('1900-01-01 00:00:00', as_string=true, datetime=true, alias=col_name, date_type=col.dtype)-%}
47-
{%- endif -%}
43+
{% set col_sql = dbtvault.date_ghost(date_type = (col.dtype | lower), alias=col_name) -%}
4844
{%- do col_definitions.append(col_sql) -%}
4945

5046
{%- elif (col_name | lower) == (src_source | lower) -%}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
{%- macro date_ghost(date_type, alias) -%}
2+
{{ adapter.dispatch('date_ghost', 'dbtvault')(date_type=date_type, alias=alias) }}
3+
{%- endmacro -%}
4+
5+
{%- macro default__date_ghost(date_type, alias=none) -%}
6+
7+
{%- if date_type == 'date' -%}
8+
{{ dbtvault.cast_date('1900-01-01', as_string=true, datetime=false, alias=alias) }}
9+
{%- else -%}
10+
{{ dbtvault.cast_date('1900-01-01 00:00:00', as_string=true, datetime=true, alias=alias, date_type=date_type) }}
11+
{%- endif -%}
12+
13+
{%- endmacro -%}

macros/tables/bigquery/eff_sat.sql

+1-1
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,7 @@ new_closed_records AS (
140140
h.{{ src_ldts }},
141141
lo.{{ src_source }}
142142
FROM source_data AS h
143-
LEFT JOIN Latest_open AS lo
143+
LEFT JOIN latest_open AS lo
144144
ON lo.{{ src_pk }} = h.{{ src_pk }}
145145
LEFT JOIN latest_closed AS lc
146146
ON lc.{{ src_pk }} = h.{{ src_pk }}

0 commit comments

Comments
 (0)