-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathdbt_project.yml
75 lines (62 loc) · 2.95 KB
/
dbt_project.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
name: 'snowplow_normalize'
version: '0.4.0'
config-version: 2
require-dbt-version: [">=1.4.0", "<2.0.0"]
profile: 'default'
model-paths: ["models"]
docs-paths: ["docs"]
macro-paths: ["macros"]
target-path: "target"
clean-targets:
- "target"
- "dbt_packages"
dispatch:
- macro_namespace: dbt
search_order: ['snowplow_utils', 'dbt']
vars:
snowplow_normalize:
# Sources
# snowplow__atomic_schema: 'atomic_data_sample' # Only set if not using 'atomic' schema for Snowplow events data
# snowplow__database: # Only set if not using target.database for Snowplow events data -- WILL BE IGNORED FOR DATABRICKS
snowplow__events: "{{ source('atomic', 'events') }}"
snowplow__grant_select_to: []
snowplow__grant_schema_usage: true
# Variables - Standard Config
snowplow__start_date: '2020-01-01'
snowplow__backfill_limit_days: 30
snowplow__app_id: []
snowplow__derived_tstamp_partitioned: true
# Variables - Advanced Config
snowplow__lookback_window_hours: 6
snowplow__days_late_allowed: 3
snowplow__upsert_lookback_days: 30
snowplow__query_tag: "snowplow_dbt"
snowplow__dev_target_name: 'dev'
snowplow__allow_refresh: false
snowplow__session_timestamp: 'collector_tstamp'
snowplow__partition_tstamp: 'collector_tstamp' # This is the column that will be used to partition the data in the derived tables, it should be a timestamp column that is present in the data
# Variables - Databricks Only
# Add the following variable to your dbt project's dbt_project.yml file
# Depending on the use case it should either be the catalog (for Unity Catalog users from databricks connector 1.1.1 onwards) or the same value as your snowplow__atomic_schema (unless changed it should be 'atomic')
# snowplow__databricks_catalog: 'hive_metastore'
# Completely or partially remove models from the manifest during run start.
on-run-start:
- "{{ snowplow_utils.snowplow_delete_from_manifest(var('models_to_remove',[]), ref('snowplow_normalize_incremental_manifest')) }}"
# Update manifest table with last event consumed per sucessfully executed node/model
on-run-end:
- "{{ snowplow_utils.snowplow_incremental_post_hook('snowplow_normalize', 'snowplow_normalize_incremental_manifest', 'snowplow_normalize_base_events_this_run', var('snowplow__session_timestamp')) }}"
- "{{ snowplow_utils.grant_usage_on_schemas_built_into(var('snowplow__grant_schemas', true)) }}"
# Tag 'snowplow_normalize_incremental' allows snowplow_incremental_post_hook to identify Snowplow models
# and add their last sucessfull collector_tstamp to the manifest.
models:
snowplow_normalize:
+materialized: table
+file_format: "{{ 'delta' if target.type not in ['spark'] else 'iceberg'}}"
+bind: false
base:
manifest:
+schema: "snowplow_manifest"
scratch:
+schema: "scratch"
+tags: "scratch"
+enabled: "{{ target.type in ['bigquery', 'databricks', 'spark', 'snowflake'] | as_bool() }}"