From efce39f6fbad0393aaee394d148fa6867469e01e Mon Sep 17 00:00:00 2001 From: Bryce Codell Date: Tue, 30 May 2023 18:07:40 -0400 Subject: [PATCH 1/4] fix extra join parsing --- macros/aql/parse.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/macros/aql/parse.sql b/macros/aql/parse.sql index a3c01ee..a582d4e 100644 --- a/macros/aql/parse.sql +++ b/macros/aql/parse.sql @@ -320,7 +320,7 @@ be wrapped in a valid aggregation function. {% macro _parse_filters(query) %} {%- set ws = dbt_aql.whitespace() -%} {%- set ws_join = ws~"join"~ws -%} -{%- set query_stripped = query.strip() -%} +{%- set query_stripped = ' '~query.strip() -%} {%- set join_ixs = modules.re.search(ws_join, query_stripped) -%} {%- if join_ixs is not none -%} From 20578f388ce7c4370eb1f7a935c6f4d3263a82fc Mon Sep 17 00:00:00 2001 From: Bryce Codell Date: Tue, 30 May 2023 18:08:43 -0400 Subject: [PATCH 2/4] optimize dataset query performance for filtered joined activities --- macros/activity_schema/dataset/dataset.sql | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/macros/activity_schema/dataset/dataset.sql b/macros/activity_schema/dataset/dataset.sql index 73d4782..5a4281f 100644 --- a/macros/activity_schema/dataset/dataset.sql +++ b/macros/activity_schema/dataset/dataset.sql @@ -108,7 +108,7 @@ with and {{ primary_activity.relationship_clause }} ){% if joined_activities|length > 0 %},{% endif %} {% for ja in joined_activities %} -{% if ja.filters is not none and (ja.verb, ja.join_condition) != (av.aggregate, jc.all) %} +{% if ja.filters is not none and ja.verb == av.append %} {{dbt_aql.alias_activity(ja, loop.index)}}{{fs}} as ( select {%- for column in columns.items() %} @@ -141,7 +141,7 @@ with {{ column.aggfunc(parsed_col) }} as {{ column.alias }}{% if not loop.last -%},{%- endif -%} {%- endfor %} from {{primary_activity_alias}} as {{primary}} - left join {% if ja.filters is none %}{{ stream_relation }}{% else %}{{dbt_aql.alias_activity(ja, loop.index)}}{{fs}}{% endif %} {{joined}} + left join {% if ja.filters is not none and ja.verb == av.append %}{{dbt_aql.alias_activity(ja, loop.index)}}{{fs}}{% else %}{{ stream_relation }}{% endif %} {{joined}} -- filter joined activity first to improve query performance on {{joined}}.{{columns.activity}} = {{dbt_aql.clean_activity_name(stream, ja.activity_name)}} {%- if ja.relationship_clause is not none %} @@ -157,6 +157,12 @@ with and {{ej_formatted}} {%- endfor %} {%- endif %} + {% if ja.filters is not none and ja.verb == av.aggregate %} + {% for f in ja.filters %} + {%- set f_formatted = f.format(primary=primary, joined=joined, **columns) %} + and {{f_formatted}} + {%- endfor %} + {% endif %} group by 1 {% else %} -- special join case for aggregate all to improve performance From 544ead8f62640ed16002dc57a1e1d1d55956ade6 Mon Sep 17 00:00:00 2001 From: Bryce Codell Date: Tue, 30 May 2023 18:22:40 -0400 Subject: [PATCH 3/4] add test for dataset with extra joins --- .../datasets/aql_syntax/dataset__joined.sql | 20 +++++++++++++++++++ .../models/datasets/aql_syntax/schema.yml | 7 +++++++ .../datasets/aql_syntax/output__joined.csv | 4 ++++ 3 files changed, 31 insertions(+) create mode 100644 integration_tests/models/datasets/aql_syntax/dataset__joined.sql create mode 100644 integration_tests/seeds/datasets/aql_syntax/output__joined.csv diff --git a/integration_tests/models/datasets/aql_syntax/dataset__joined.sql b/integration_tests/models/datasets/aql_syntax/dataset__joined.sql new file mode 100644 index 0000000..bd4932c --- /dev/null +++ b/integration_tests/models/datasets/aql_syntax/dataset__joined.sql @@ -0,0 +1,20 @@ +{% set aql %} +using customer_stream +select first visited_page ( + activity_id as activity_id, + entity_uuid as customer_id, + ts as first_visited_google_at + filter {{dbt_aql.json_extract('{feature_json}', 'referrer_url')}} = 'google.com' +) +aggregate after bought_something ( + count(activity_id) as total_large_purchases_after + join nullif({{dbt_aql.json_extract('{joined}.{feature_json}', 'total_sales')}}, '')::int > 100 +) +include ( + total_items_purchased_after +) +{% endset %} + +-- depends_on: {{ ref('output__joined') }} + +{{ dbt_aql.dataset(aql) }} diff --git a/integration_tests/models/datasets/aql_syntax/schema.yml b/integration_tests/models/datasets/aql_syntax/schema.yml index bec213d..ffede97 100644 --- a/integration_tests/models/datasets/aql_syntax/schema.yml +++ b/integration_tests/models/datasets/aql_syntax/schema.yml @@ -7,3 +7,10 @@ models: tests: - dbt_utils.equality: compare_model: ref("output__filtered") + + - name: dataset__joined + description: A test to validate the functionality of join clauses in aql. + tests: + - dbt_utils.equality: + compare_model: ref("output__joined") + diff --git a/integration_tests/seeds/datasets/aql_syntax/output__joined.csv b/integration_tests/seeds/datasets/aql_syntax/output__joined.csv new file mode 100644 index 0000000..488a60a --- /dev/null +++ b/integration_tests/seeds/datasets/aql_syntax/output__joined.csv @@ -0,0 +1,4 @@ +activity_id,customer_id,first_visited_google_at,total_large_purchases_after,total_items_purchased_after +e58cfb189af4fbf30f22821af7aa9316,1,2022-01-01 22:10:11,2,11 +fdf62f7ddcd69fc3c3dbd54bf7a34452,7,2022-04-13 22:10:11,0,0 +d5d41e942d4b0b5325741007e8814f00,10,2022-01-13 22:10:11,2,18 From 8bb9d35681c962ac4a8509500f67becc08e307bb Mon Sep 17 00:00:00 2001 From: Bryce Codell Date: Tue, 30 May 2023 18:31:21 -0400 Subject: [PATCH 4/4] bump version --- dbt_project.yml | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dbt_project.yml b/dbt_project.yml index 3cc07b4..edf8917 100644 --- a/dbt_project.yml +++ b/dbt_project.yml @@ -1,5 +1,5 @@ name: "dbt_aql" -version: "0.1.0" +version: "0.1.3" config-version: 2 diff --git a/pyproject.toml b/pyproject.toml index a0def7a..7ceb588 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dbt-aql" -version = "0.1.0" +version = "0.1.3" description = "A dbt package to query Activity Streams using a SQL-esque interface." authors = ["Bryce Codell