Skip to content

Commit

Permalink
wrap up first pass, add ACLs to prod env
Browse files Browse the repository at this point in the history
  • Loading branch information
brabster committed Jan 4, 2024
1 parent d14d739 commit 60e0feb
Show file tree
Hide file tree
Showing 8 changed files with 50 additions and 14 deletions.
2 changes: 1 addition & 1 deletion .envs/prod.env → .envs/prod/.env
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
export DBT_DATASET=pypi_vulnerabilities
export DBT_DATASET=pypi_vulnerabilities_us
export DBT_LOCATION=US
export DBT_PROJECT=pypi-vulnerabilities
20 changes: 20 additions & 0 deletions .envs/prod/dataset_acl.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
{
"access": [
{
"role": "READER",
"specialGroup": "projectReaders"
},
{
"role": "WRITER",
"specialGroup": "projectWriters"
},
{
"role": "OWNER",
"specialGroup": "projectOwners"
},
{
"role": "roles/bigquery.dataViewer",
"specialGroup": "allAuthenticatedUsers"
}
]
}
2 changes: 1 addition & 1 deletion .github/actions/dbt_build/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ runs:
shell: bash
run: |
source .venv/bin/activate
source .envs/${{ inputs.env }}.env
source .envs/${{ inputs.env }}/.env
rm -rf logs
dbt clean
dbt deps
Expand Down
11 changes: 9 additions & 2 deletions .github/workflows/deploy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,16 @@ jobs:
- uses: google-github-actions/setup-gcloud@v2
with:
version: '>= 363.0.0'
- run: |
- name: ensure prod dataset exists
run: |
source .venv/bin/activate
source .envs/prod.env
source .envs/prod/.env
dbt run-operation ensure_target_dataset_exists
bq update --source .envs/prod/dataset_acl.json
- name: load prod safety db
run: |
source .venv/bin/activate
source .envs/prod/.env
python etl/safety_db_to_jsonl.py
python etl/safety_jsonl_to_bq.py
- uses: ./.github/actions/dbt_build
Expand Down
16 changes: 8 additions & 8 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@
# Python virtualenv files
.venv/
/.venv/

# User's environment settings
.env
/.env

# DBT logs
logs/
/logs/

# DBT target dir
target/
/target/

# DBT packages
dbt_packages/
package-lock.yml
/dbt_packages/
/package-lock.yml

# files that we don't want committed
uncommitted/*
!uncommitted/README.md
/uncommitted/*
!/uncommitted/README.md
1 change: 1 addition & 0 deletions dbt_project.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ clean-targets: # directories to be removed by `dbt clean`
models:
+labels:
stability: stable
data_classification: public
+grant_access_to: []
+persist_docs:
# push any model/column descriptions to the target database
Expand Down
3 changes: 2 additions & 1 deletion macros/ensure_target_dataset_exists.sql
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
CREATE SCHEMA IF NOT EXISTS `{{ project_id }}`.`{{ dataset_name }}`
OPTIONS (
description = 'Exploring vulnerable PyPI downloads. Managed by https://github.com/brabster/pypi_vulnerabilities',
location = '{{ dataset_location }}'
location = '{{ dataset_location }}',
labels = [('data_classification': 'public')]
)

{% endmacro %}
9 changes: 8 additions & 1 deletion models/overview.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
{% docs __overview__ %}
BigQuery-based investigation into PyPI package downloads and vulnerabilities.
{% enddocs %}

- Derived from public domain information
- Operating in BigQuery sandbox - 60 day expiry on tables
- More info in [README.md on GitHub](https://github.com/brabster/pypi_vulnerabilities)
- Star project {{ env_var('DBT_PROJECT') }} in BigQuery console

{% enddocs %}

0 comments on commit 60e0feb

Please sign in to comment.