Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add pre-hooks and modified macro #56

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 38 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.6.0
hooks:
- id: check-yaml
- id: end-of-file-fixer
- id: trailing-whitespace
- repo: https://github.com/sqlfluff/sqlfluff
rev: 3.1.1 # ensure version matches your Pipfile
hooks:
- id: sqlfluff-lint
additional_dependencies: [
'dbt-snowflake==1.8.3',
'sqlfluff-templater-dbt==3.1.1' # ensure version matches your Pipfile
]
- id: sqlfluff-fix
stages: [manual] # this command is available only to run manually
additional_dependencies: [
'dbt-snowflake==1.8.3',
'sqlfluff-templater-dbt==3.1.1' # ensure version matches your Pipfile
]
- repo: https://github.com/dbt-checkpoint/dbt-checkpoint
rev: v2.0.3
hooks:
- id: dbt-compile # Compiles dbt (necessary for future hooks)
- id: dbt-docs-generate # Generates the dbt docs (necessary for some future hooks)
- id: check-source-table-has-description # Ensures all source tables have descriptions
- id: check-column-desc-are-same # Ensures all models have the same descriptions for the same column names.
- id: check-model-has-tests # Ensures all models have at least 2 tests
args: ["--test-cnt", "2", "--"]
files: ^models/
- id: check-script-semicolon # Ensure that the model does not have a semicolon at the end of the file.
- id: check-script-has-no-table-name # Ensures models only use source or ref macro to specify the table name.
- id: check-model-has-all-columns # Ensures that mart models have all columns in the database also specified in the .yml
files: ^models/marts
- id: check-model-tags # Ensures that models inside the core folder have specified tags
args: ["--tags", "hourly,daily,weekly,monthly","--"]
files: ^models/marts/core
14 changes: 14 additions & 0 deletions Pipfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
[[source]]
url = "https://pypi.org/simple"
verify_ssl = true
name = "pypi"

[packages]
dbt-core = "==1.8.3"
dbt-snowflake = "==1.8.3"
sqlfluff = "==3.1.1"
sqlfluff-templater-dbt = "==3.1.1"
pre-commit = "==3.8.0"

[requires]
python_version = "3.12"
1,378 changes: 1,378 additions & 0 deletions Pipfile.lock

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion dbt_project.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,4 +44,4 @@ seeds:
dbt_project_evaluator_exceptions:
+enabled: false

# on-run-end: "{{ dbt_project_evaluator.print_dbt_project_evaluator_issues() }}"
on-run-end: "{{ dbt_project_evaluator.print_dbt_project_evaluator_issues() }}"
18 changes: 18 additions & 0 deletions macros/lagging_over_column.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
{% macro lagging_over_column(
column_name,
partition_by_first_column,
order_by,
coalesce_value,
new_name_column,
partition_by_second_column=none) %}

COALESCE(LAG( {{ column_name }} ) OVER (
{% if partition_by_second_column is not none %}
PARTITION BY {{ partition_by_first_column }}, {{ partition_by_second_column }}
{% else %}
PARTITION BY {{ partition_by_first_column }}
{% endif %}

ORDER BY {{ order_by }}), {{ coalesce_value }}
) AS {{ new_name_column }}
{% endmacro %}
24 changes: 24 additions & 0 deletions macros/lagging_over_column.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
version: 2

macros:
- name: lagging_over_column
description: A macro applies lagging function to the specified column and allows to coalesce null with a defined value.
arguments:
- name: column_name
type: string
description: The name of the column that needs to be lagged.
- name: partition_by_first_column
type: string
description: The first grouping column.
- name: order_by
type: string
description: Ordering column (created_at as a default value).
- name: coalesce_value
type: string
description: Value that should be used instead of null.
- name: new_name_column
type: string
description: Name of the column returned by the macro.
- name: partition_by_second_column
type: string
description: The second grouping column (none by default).
17 changes: 17 additions & 0 deletions macros/rolling_aggregations_over_time.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
{% macro rolling_aggregations_over_time(
column_name,
aggregation_type=avg,
time_window=7,
partition_by=none,
order_by='created_at') %}

{{ aggregation_type }}( {{ column_name }} ) OVER (
{% if partition_by is not none %}
PARTITION BY {{ partition_by }}
{% endif %}

ORDER BY {{ order_by }}

ROWS BETWEEN {{ time_window }} PRECEDING AND CURRENT ROW
) AS {{ aggregation_type }}_{{ time_window }}_periods_{{ column_name }}
{% endmacro %}
21 changes: 21 additions & 0 deletions macros/rolling_aggregations_over_time.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
version: 2

macros:
- name: rolling_aggregations_over_time
description: A macro calculates an aggregation over a specified time window.
arguments:
- name: column_name
type: string
description: The name of the column to which an aggregation function is applied.
- name: aggregation_type
type: string
description: Type of an aggregation (average is set as default).
- name: time_window
type: integer
description: Time window of an aggregation.
- name: partition_by
type: string
description: Grouping column (none value as a default).
- name: order_by
type: string
description: Ordering column (created_at as a default value).
127 changes: 126 additions & 1 deletion models/docs/docs_bingeflix.md
Original file line number Diff line number Diff line change
@@ -1,9 +1,134 @@
# Bingeflix Docs
This file contains doumentation for Bingeflix data sources.

## Events

This section contains documentation on events.

{% docs bingeflix_events %}
This table contains information about the behavioral events of users while they interact with the platform.
{% enddocs %}

{% docs bingeflix_column_session_id %}
The unique identifier of the session in the Bingeflix application.
{% enddocs %}

{% docs bingeflix_column_event_created_at %}
When the event was logged.
{% enddocs %}

{% docs bingeflix_column_event_name %}
The name of the event.
{% enddocs %}

{% docs bingeflix_column_event_id %}
The unique identifier of the event.
{% enddocs %}

## Subscription plans

This section contains documentation on subscription plans.

{% docs bingeflix_subscription_plans %}
This table contains information on users' subscription plans.
{% enddocs %}

{% docs bingeflix_column_subscription_plan_id %}
The unique identifier of the subscription plan.
{% enddocs %}

{% docs bingeflix_column_plan_name %}
The name of the subscription plan.
{% enddocs %}

{% docs bingeflix_column_payment_period %}
The user's payment period.
{% enddocs %}

## Subscriptions

This section contains documentation on subscriptions.

{% docs bingeflix_subscriptions %}
This table contains information on users' subscriptions.
{% enddocs %}

{% docs bingeflix_column_subscription_id %}
The unique identifier of the subscription.
{% enddocs %}

{% docs bingeflix_column_starts_at %}
When the subscription started.
{% enddocs %}

{% docs bingeflix_column_ends_at %}
When the subscription ends.
{% enddocs %}

{% docs bingeflix_column_pricing %}
The monthly cost of the subscription.
{% enddocs %}

{% docs bingeflix_column_billing_period %}
The cadence of the billing period.
{% enddocs %}


## Users
This section contains documentation from the Bingeflix Users table.

{% docs bingeflix_users %}
This table contains information on all users.
{% enddocs %}

{% docs bingeflix_column_user_id %}
The unique identifier of the Bingeflix user. A user is created when...
The unique identifier of the Bingeflix user.
{% enddocs %}

{% docs bingeflix_column_created_at %}
When the user account was created.
{% enddocs %}

{% docs bingeflix_column_phone_number %}
The user's phone number.
{% enddocs %}

{% docs bingeflix_column_deleted_at %}
When the user's account was deleted.
{% enddocs %}

{% docs bingeflix_column_username %}
The username associated with the user.
{% enddocs %}

{% docs bingeflix_column_name %}
The full name of the user (first and last).
{% enddocs %}

{% docs bingeflix_column_sex %}
The user's gender.
{% enddocs %}

{% docs bingeflix_column_email %}
The user's email address.
{% enddocs %}

{% docs bingeflix_column_birthdate %}
The user's bithdate.
{% enddocs %}

{% docs bingeflix_column_current_age %}
The user's current age.
{% enddocs %}

{% docs bingeflix_column_age_at_acquisition %}
The age of the user when they became a Bingeflix user.
{% enddocs %}

{% docs bingeflix_column_region %}
The region where the user lives.
{% enddocs %}

{% docs bingeflix_column_country %}
The country where the user lives.
{% enddocs %}
55 changes: 55 additions & 0 deletions models/marts/core/dim_users_max_14_days.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
WITH

users AS (
SELECT
*
FROM
{{ ref('stg_bingeflix__users') }}
),

users_subscription_facts AS (
SELECT
user_id,
MIN(starts_at) AS first_subscription_starts_at,
COUNT(DISTINCT subscription_id) AS count_of_subscriptions
FROM
{{ ref('stg_bingeflix__subscriptions') }}
GROUP BY 1
),

final AS (
SELECT
u.user_id,
created_at,
phone_number,
deleted_at,
username,
name,
sex,
email,
birthdate,
TRUNCATE(DATEDIFF(MONTH, birthdate, CURRENT_DATE)/12) AS current_age,
TRUNCATE(DATEDIFF(MONTH, birthdate, created_at)/12) AS age_at_acquisition,
region,
country,
usf.first_subscription_starts_at,
usf.count_of_subscriptions
FROM
users AS u
LEFT JOIN users_subscription_facts AS usf ON u.user_id = usf.user_id
)

SELECT

region,
country,
created_at,

{{ rolling_aggregations_over_time(
column_name='count_of_subscriptions',
aggregation_type='max',
time_window=14,
partition_by='region'
) }}

FROM final
24 changes: 24 additions & 0 deletions models/marts/core/dim_users_max_14_days.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
models:
- name: dim_users_max_14_days
description: This model contains information on maximum number of subscriptions for regions and countries with a rollout of 14 days.
columns:

- name: created_at
description: When the user account was created.
data_tests:
- not_null

- name: region
description: Where the user lives.
data_tests:
- not_null

- name: country
description: Where the user lives.
data_tests:
- not_null

- name: max_14_periods_count_of_subscriptions
description: Maximum number of subscriptions for the last 14 days.
data_tests:
- not_null
31 changes: 31 additions & 0 deletions models/marts/core/fct_events.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
models:
- name: fct_events
description: This model contains information about Bingeflix events.
columns:
- name: session_id
description: '{{ doc("bingeflix_column_session_id") }}'
data_tests:
- not_null

- name: created_at
description: '{{ doc("bingeflix_column_event_created_at") }}'
data_tests:
- not_null

- name: user_id
description: '{{ doc("bingeflix_column_user_id") }}'
data_tests:
- not_null

- name: event_name
description: '{{ doc("bingeflix_column_event_name") }}'
data_tests:
- not_null

- name: event_id
description: '{{ doc("bingeflix_column_event_id") }}'
data_tests:
- unique
- not_null


Loading