Skip to content

Commit

Permalink
initial repo commit
Browse files Browse the repository at this point in the history
  • Loading branch information
EcZachly committed Dec 31, 2021
0 parents commit d42d611
Show file tree
Hide file tree
Showing 10 changed files with 120 additions and 0 deletions.
5 changes: 5 additions & 0 deletions .idea/.gitignore

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 12 additions & 0 deletions .idea/cumulative-table-design.iml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions .idea/misc.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions .idea/modules.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions .idea/vcs.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 12 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# Cumulative Table Design Example

This repo shows how to build a robust cumulative table design with an example of monthly active users.

In this repo, we'll be using **2022-01-01** as 'today' and **2021-12-31** as 'yesterday'


The steps for this design are:

- The Daily Table step
- In this step we aggregate just the events of today to see who is daily active
- This query is pretty simple and straight forward check
48 changes: 48 additions & 0 deletions queries/active_users_cumulated_populate.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
INSERT INTO active_users_cumulated

-- First read in yesterday from the cumulated table
WITH yesterday AS (
SELECT * FROM active_users_cumulated
WHERE snapshot_date = '2021-12-31'
),
-- Read in the daily active user numbers for just today from the daily table
today AS (
SELECT * FROM active_users_daily
WHERE snapshot_date = '2022-01-01'
),

-- we FULL OUTER JOIN today and yesterday. We need to do some COALESCE both because
-- activity_array may not exist yet for a given user (i.e. they are brand new)
-- is_active_today may be null as well since it's null on days when a user didn't generate an event
combined AS (
SELECT
-- We need to COALESCE here since t.user_id and y.user_id may be
COALESCE(y.user_id, t.user_id) AS user_id,
COALESCE(
IF(CARDINALITY( y.activity_array) < 30,
ARRAY[COALESCE(t.is_active_today, 0)] || y.activity_array,
ARRAY[COALESCE(t.is_active_today, 0)] || SLICE(y.activity_array, -1, 29)
)
, ARRAY[t.is_active_today]
) as activity_array,
t.snapshot_date
FROM yesterday y
FULL OUTER JOIN today t
ON y.user_id = t.user_id
)

SELECT
user_id,
activity_array[1] AS is_daily_active,
-- if any of the array values are 1, then the user was active in the last month
CASE WHEN ARRAY_SUM(activity_array) > 0 THEN 1 ELSE 0 END AS is_monthly_active,
-- if any of the first 7 array values are non-zero, then the user was active in the last week
CASE WHEN ARRAY_SUM(SLICE(activity_array, 1, 7)) > 0 THEN 1 ELSE 0 END AS is_weekly_active
activity_array,

FROM combined





9 changes: 9 additions & 0 deletions queries/active_users_daily_populate.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
INSERT INTO active_users_daily

SELECT
user_id,
-- If the user_id has at least 1 event, they are daily active
IF(COUNT(user_id) > 0, 1, 0) as is_daily_active
FROM events
WHERE event_date = '2022-01-01'
GROUP BY user_id
9 changes: 9 additions & 0 deletions tables/active_users_cumulated.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
create table active_users_cumulated (
user_id: integer,
is_daily_active: integer,
is_weekly_active: integer,
is_monthly_active: integer,
activity_array: array<integer>,
snapshot_date: date
)

5 changes: 5 additions & 0 deletions tables/active_users_daily.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
create table active_users_cumulated (
user_id: integer,
is_active_today: integer,
snapshot≈_date: date
)

0 comments on commit d42d611

Please sign in to comment.