-
Notifications
You must be signed in to change notification settings - Fork 134
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit d42d611
Showing
10 changed files
with
120 additions
and
0 deletions.
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
# Cumulative Table Design Example | ||
|
||
This repo shows how to build a robust cumulative table design with an example of monthly active users. | ||
|
||
In this repo, we'll be using **2022-01-01** as 'today' and **2021-12-31** as 'yesterday' | ||
|
||
|
||
The steps for this design are: | ||
|
||
- The Daily Table step | ||
- In this step we aggregate just the events of today to see who is daily active | ||
- This query is pretty simple and straight forward check |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
INSERT INTO active_users_cumulated | ||
|
||
-- First read in yesterday from the cumulated table | ||
WITH yesterday AS ( | ||
SELECT * FROM active_users_cumulated | ||
WHERE snapshot_date = '2021-12-31' | ||
), | ||
-- Read in the daily active user numbers for just today from the daily table | ||
today AS ( | ||
SELECT * FROM active_users_daily | ||
WHERE snapshot_date = '2022-01-01' | ||
), | ||
|
||
-- we FULL OUTER JOIN today and yesterday. We need to do some COALESCE both because | ||
-- activity_array may not exist yet for a given user (i.e. they are brand new) | ||
-- is_active_today may be null as well since it's null on days when a user didn't generate an event | ||
combined AS ( | ||
SELECT | ||
-- We need to COALESCE here since t.user_id and y.user_id may be | ||
COALESCE(y.user_id, t.user_id) AS user_id, | ||
COALESCE( | ||
IF(CARDINALITY( y.activity_array) < 30, | ||
ARRAY[COALESCE(t.is_active_today, 0)] || y.activity_array, | ||
ARRAY[COALESCE(t.is_active_today, 0)] || SLICE(y.activity_array, -1, 29) | ||
) | ||
, ARRAY[t.is_active_today] | ||
) as activity_array, | ||
t.snapshot_date | ||
FROM yesterday y | ||
FULL OUTER JOIN today t | ||
ON y.user_id = t.user_id | ||
) | ||
|
||
SELECT | ||
user_id, | ||
activity_array[1] AS is_daily_active, | ||
-- if any of the array values are 1, then the user was active in the last month | ||
CASE WHEN ARRAY_SUM(activity_array) > 0 THEN 1 ELSE 0 END AS is_monthly_active, | ||
-- if any of the first 7 array values are non-zero, then the user was active in the last week | ||
CASE WHEN ARRAY_SUM(SLICE(activity_array, 1, 7)) > 0 THEN 1 ELSE 0 END AS is_weekly_active | ||
activity_array, | ||
|
||
FROM combined | ||
|
||
|
||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
INSERT INTO active_users_daily | ||
|
||
SELECT | ||
user_id, | ||
-- If the user_id has at least 1 event, they are daily active | ||
IF(COUNT(user_id) > 0, 1, 0) as is_daily_active | ||
FROM events | ||
WHERE event_date = '2022-01-01' | ||
GROUP BY user_id |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
create table active_users_cumulated ( | ||
user_id: integer, | ||
is_daily_active: integer, | ||
is_weekly_active: integer, | ||
is_monthly_active: integer, | ||
activity_array: array<integer>, | ||
snapshot_date: date | ||
) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
create table active_users_cumulated ( | ||
user_id: integer, | ||
is_active_today: integer, | ||
snapshot≈_date: date | ||
) |