Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/dev' into pytest-cov-testing
Browse files Browse the repository at this point in the history
  • Loading branch information
rousik committed Dec 5, 2023
2 parents 5a1faf9 + 3452bae commit 9a42c91
Show file tree
Hide file tree
Showing 45 changed files with 1,575 additions and 2,009 deletions.
5 changes: 4 additions & 1 deletion .codecov.yml
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
---
coverage:
range: 70..100
round: down
round: nearest
precision: 1

ignore:
- "src/pudl/validate.py"

codecov:
token: 23a7ee04-6ac5-4d1b-9d36-86b0c50d40c5
require_ci_to_pass: true
Expand Down
2 changes: 1 addition & 1 deletion .gitattributes
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
*.ipynb linguist-detectable=false
*.html linguist-detectable=false
eia861-transform.ipynb merge=ours
environments/conda-*lock.yml merge=ours
environments/conda-*lock.yml merge=ours linguist-generated=true
*.csv text
*.py text
*.json text
Expand Down
20 changes: 14 additions & 6 deletions .github/workflows/build-deploy-pudl.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ env:
GITHUB_REF: ${{ github.ref_name }} # This is changed to dev if running on a schedule
GCE_INSTANCE: pudl-deployment-tag # This is changed to pudl-deployment-dev if running on a schedule
GCE_INSTANCE_ZONE: ${{ secrets.GCE_INSTANCE_ZONE }}
GCS_OUTPUT_BUCKET: gs://nightly-build-outputs.catalyst.coop

jobs:
build_and_deploy_pudl:
Expand All @@ -27,13 +28,14 @@ jobs:
echo "This action was triggered by a schedule." && echo "GCE_INSTANCE=pudl-deployment-dev" >> $GITHUB_ENV && echo "GITHUB_REF=dev" >> $GITHUB_ENV

- name: Checkout Repository
uses: actions/checkout@v3
uses: actions/checkout@v4
with:
ref: ${{ env.GITHUB_REF }}

- name: Get HEAD of the branch (main or dev)
run: |
echo "ACTION_SHA=$(git rev-parse HEAD)" >> $GITHUB_ENV
echo "SHORT_SHA=$(git rev-parse --short HEAD)" >> $GITHUB_ENV

- name: Print action vars
run: |
Expand All @@ -53,17 +55,17 @@ jobs:
type=ref,event=tag

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v2.5.0
uses: docker/setup-buildx-action@v3.0.0

- name: Login to DockerHub
if: github.event_name != 'pull_request'
uses: docker/login-action@v2.1.0
uses: docker/login-action@v3.0.0
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}

- name: Build image and push to Docker Hub
uses: docker/build-push-action@v4.0.0
uses: docker/build-push-action@v5.1.0
with:
context: .
file: docker/Dockerfile
Expand All @@ -74,7 +76,7 @@ jobs:
cache-to: type=gha,mode=max

- id: "auth"
uses: "google-github-actions/auth@v1"
uses: "google-github-actions/auth@v2"
with:
workload_identity_provider: "projects/345950277072/locations/global/workloadIdentityPools/gh-actions-pool/providers/gh-actions-provider"
service_account: "deploy-pudl-github-action@catalyst-cooperative-pudl.iam.gserviceaccount.com"
Expand All @@ -83,6 +85,11 @@ jobs:
- name: Set up Cloud SDK
uses: google-github-actions/setup-gcloud@v1

- name: Determine commit information
run: |-
echo "COMMIT_BRANCH=$(gitrev-parse --abbrev-ref HEAD)" >> $GITHUB_ENV
echo "COMMIT_TIME=$(git log -1 --format=%cd --date=format:%Y-%m-%d-%H%M)" >> $GITHUB_ENV

# Deploy PUDL image to GCE
- name: Deploy
env:
Expand Down Expand Up @@ -119,6 +126,7 @@ jobs:
--container-env DAGSTER_PG_DB="dagster-storage" \
--container-env FLY_ACCESS_TOKEN=${{ secrets.FLY_ACCESS_TOKEN }} \
--container-env PUDL_SETTINGS_YML="/home/mambauser/src/pudl/package_data/settings/etl_full.yml" \
--container-env PUDL_GCS_OUTPUT=${{ env.GCS_OUTPUT_BUCKET }}/${{ env.COMMIT_TIME }}-${{ env.SHORT_SHA }}-${{ env.COMMIT_BRANCH }}

# Start the VM
- name: Start the deploy-pudl-vm
Expand All @@ -129,6 +137,6 @@ jobs:
uses: slackapi/[email protected]
with:
channel-id: "C03FHB9N0PQ"
slack-message: "build-deploy-pudl status: ${{ job.status }}\n${{ env.ACTION_SHA }}-${{ env.GITHUB_REF }}"
slack-message: "build-deploy-pudl status: ${{ job.status }}\n${{ env.COMMIT_TIME}}-${{ env.SHORT_SHA }}-${{ env.COMMIT_BRANCH }}"
env:
SLACK_BOT_TOKEN: ${{ secrets.PUDL_DEPLOY_SLACK_TOKEN }}
6 changes: 3 additions & 3 deletions .github/workflows/docker-build-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ jobs:
id-token: write
steps:
- name: Checkout Repository
uses: actions/checkout@v3
uses: actions/checkout@v4

- name: Docker Metadata
id: docker_metadata
Expand All @@ -24,10 +24,10 @@ jobs:
latest=auto

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v2.5.0
uses: docker/setup-buildx-action@v3.0.0

- name: Build image but do not push to Docker Hub
uses: docker/build-push-action@v4.0.0
uses: docker/build-push-action@v5.1.0
with:
context: .
file: docker/Dockerfile
Expand Down
28 changes: 14 additions & 14 deletions .github/workflows/pytest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ jobs:
- name: Set default GCP credentials
id: gcloud-auth
continue-on-error: true
uses: "google-github-actions/auth@v1"
uses: "google-github-actions/auth@v2"
with:
workload_identity_provider: "projects/345950277072/locations/global/workloadIdentityPools/gh-actions-pool/providers/gh-actions-provider"
service_account: "tox-pytest-github-action@catalyst-cooperative-pudl.iam.gserviceaccount.com"
Expand All @@ -180,19 +180,19 @@ jobs:
run: |
coverage run ${{ env.COVERAGE_OPTIONS }} \
src/pudl/ferc_to_sqlite/cli.py --clobber ${{ env.ETL_COMMANDLINE_OPTIONS }} ${{ env.ETL_CONFIG }}
# - name: Run pudl_etl
# env:
# COVERAGE_FILE: .coverage.pudl_etl
# run: |
# alembic upgrade head
# coverage run ${{ env.COVERAGE_OPTIONS }} \
# src/pudl/cli/etl.py ${{ env.ETL_COMMANDLINE_OPTIONS }} ${{ env.ETL_CONFIG }}
# - name: Run integration tests
# env:
# COVERAGE_FILE: .coverage.pytest
# run: |
# coverage run ${{ env.COVERAGE_OPTIONS }} \
# -m pytest -n auto --live-dbs test/integration
# - name: Run pudl_etl
# env:
# COVERAGE_FILE: .coverage.pudl_etl
# run: |
# alembic upgrade head
# coverage run ${{ env.COVERAGE_OPTIONS }} \
# src/pudl/cli/etl.py ${{ env.ETL_COMMANDLINE_OPTIONS }} ${{ env.ETL_CONFIG }}
# - name: Run integration tests
# env:
# COVERAGE_FILE: .coverage.pytest
# run: |
# coverage run ${{ env.COVERAGE_OPTIONS }} \
# -m pytest -n auto --live-dbs test/integration
- name: Checksum coverage files
run: ls .coverage* | xargs md5sum | sort
- name: Generate coverage
Expand Down
10 changes: 5 additions & 5 deletions .github/workflows/run-etl.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ jobs:
id-token: write
steps:
- name: Checkout Repository
uses: actions/checkout@v3
uses: actions/checkout@v4
- name: Docker Metadata
id: docker_metadata
uses: docker/[email protected]
Expand All @@ -24,15 +24,15 @@ jobs:
latest=auto
tags: type=sha
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v2.5.0
uses: docker/setup-buildx-action@v3.0.0
- name: Login to DockerHub
if: github.event_name != 'pull_request'
uses: docker/login-action@v2.1.0
uses: docker/login-action@v3.0.0
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Build image and push to Docker Hub
uses: docker/build-push-action@v4.0.0
uses: docker/build-push-action@v5.1.0
with:
context: .
file: docker/Dockerfile
Expand All @@ -48,7 +48,7 @@ jobs:
contents: read
steps:
- id: gcloud-auth
uses: google-github-actions/auth@v1
uses: google-github-actions/auth@v2
with:
workload_identity_provider: "projects/345950277072/locations/global/workloadIdentityPools/gh-actions-pool/providers/gh-actions-provider"

Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/zenodo-cache-sync.yml
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ jobs:

- name: Set default gcp credentials
id: gcloud-auth
uses: "google-github-actions/auth@v1"
uses: "google-github-actions/auth@v2"
with:
workload_identity_provider: "projects/345950277072/locations/global/workloadIdentityPools/gh-actions-pool/providers/gh-actions-provider"
service_account: "zenodo-cache-manager@catalyst-cooperative-pudl.iam.gserviceaccount.com"
Expand Down
23 changes: 9 additions & 14 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
covargs := --append --source=src/pudl
gcs_cache_path := --gcs-cache-path=gs://zenodo-cache.catalyst.coop
pytest_covargs := --cov-append --cov=src/pudl --cov-report=xml
coverage_report := coverage report --sort=cover
pytest_args := --durations 20 ${pytest_covargs} ${gcs_cache_path}
covargs := --append
pytest_args := --durations 20 ${gcs_cache_path}
etl_fast_yml := src/pudl/package_data/settings/etl_fast.yml
etl_full_yml := src/pudl/package_data/settings/etl_full.yml

Expand Down Expand Up @@ -96,10 +94,7 @@ ferc:
rm -f ${PUDL_OUTPUT}/ferc*.sqlite
rm -f ${PUDL_OUTPUT}/ferc*_xbrl_datapackage.json
rm -f ${PUDL_OUTPUT}/ferc*_xbrl_taxonomy_metadata.json
coverage run ${covargs} -- \
src/pudl/ferc_to_sqlite/cli.py \
${gcs_cache_path} \
${etl_full_yml}
coverage run ${covargs} -- src/pudl/ferc_to_sqlite/cli.py ${gcs_cache_path} ${etl_full_yml}

# Remove the existing PUDL DB if it exists.
# Create a new empty DB using alembic.
Expand All @@ -108,7 +103,7 @@ ferc:
pudl:
rm -f ${PUDL_OUTPUT}/pudl.sqlite
alembic upgrade head
coverage run ${covargs} -- src/pudl/cli/etl.py ${gcs_cache_path} ${etl_full_yml}
coverage run ${covargs} -- src/pudl/etl/cli.py ${gcs_cache_path} ${etl_full_yml}

########################################################################################
# Targets that are coordinated by pytest -- mostly they're actual tests.
Expand All @@ -125,13 +120,13 @@ pytest-integration:
coverage-erase:
coverage erase

.PHONY: pytest-coverage
pytest-coverage: coverage-erase docs-build pytest-ci
${coverage_report}

.PHONY: pytest-ci
pytest-ci: pytest-unit pytest-integration

.PHONY: pytest-coverage
pytest-coverage: coverage-erase docs-build pytest-ci
coverage report

.PHONY: pytest-integration-full
pytest-integration-full:
pytest ${pytest_args} -n auto --live-dbs --etl-settings ${etl_full_yml} test/integration
Expand All @@ -151,7 +146,7 @@ nuke: coverage-erase docs-build pytest-unit ferc pudl
pudl_check_fks
pytest ${pytest_args} -n auto --live-dbs --etl-settings ${etl_full_yml} test/integration
pytest ${pytest_args} -n auto --live-dbs test/validate
${coverage_report}
coverage report

# Check that designated Jupyter notebooks can be run against the current DB
.PHONY: pytest-jupyter
Expand Down
74 changes: 0 additions & 74 deletions devtools/data-release.sh

This file was deleted.

1 change: 1 addition & 0 deletions devtools/sqlite_to_duckdb.py
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#! /usr/bin/env python
"""A naive script for converting SQLite to DuckDB."""
import logging
from pathlib import Path
Expand Down
5 changes: 1 addition & 4 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,7 @@ ENV LD_LIBRARY_PATH=${CONDA_PREFIX}/lib
# We need information from .git to get version with setuptools_scm so we mount that
# directory without copying it into the image.
RUN --mount=type=bind,source=.git,target=${PUDL_REPO}/.git \
${CONDA_RUN} pip install --no-cache-dir --no-deps --editable . && \
# Run the PUDL setup script so we know where to read and write data
${CONDA_RUN} pudl_setup

${CONDA_RUN} pip install --no-cache-dir --no-deps --editable .

# Install awscli2
# Change back to root because the install script needs access to /usr/local/aws-cli
Expand Down
Loading

0 comments on commit 9a42c91

Please sign in to comment.