-
-
Notifications
You must be signed in to change notification settings - Fork 118
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Improve flexibility for publishing options #2964
Changes from all commits
d316743
c6ad306
794eef4
ada7664
1ef6491
f066781
6a47bd3
cc0bbcc
cd7ff93
3758dbc
d85da7a
cdecdde
df8347f
38d3837
a519e97
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -12,6 +12,7 @@ env: | |
GITHUB_REF: ${{ github.ref_name }} # This is changed to dev if running on a schedule | ||
GCE_INSTANCE: pudl-deployment-tag # This is changed to pudl-deployment-dev if running on a schedule | ||
GCE_INSTANCE_ZONE: ${{ secrets.GCE_INSTANCE_ZONE }} | ||
GCS_OUTPUT_BUCKET: gs://nightly-build-outputs.catalyst.coop | ||
|
||
jobs: | ||
build_and_deploy_pudl: | ||
|
@@ -34,6 +35,7 @@ jobs: | |
- name: Get HEAD of the branch (main or dev) | ||
run: | | ||
echo "ACTION_SHA=$(git rev-parse HEAD)" >> $GITHUB_ENV | ||
echo "SHORT_SHA=$(git rev-parse --short HEAD)" >> $GITHUB_ENV | ||
|
||
- name: Print action vars | ||
run: | | ||
|
@@ -83,6 +85,11 @@ jobs: | |
- name: Set up Cloud SDK | ||
uses: google-github-actions/setup-gcloud@v1 | ||
|
||
- name: Determine commit information | ||
run: |- | ||
echo "COMMIT_BRANCH=$(gitrev-parse --abbrev-ref HEAD)" >> $GITHUB_ENV | ||
echo "COMMIT_TIME=$(git log -1 --format=%cd --date=format:%Y-%m-%d-%H%M)" >> $GITHUB_ENV | ||
|
||
# Deploy PUDL image to GCE | ||
- name: Deploy | ||
env: | ||
|
@@ -119,6 +126,7 @@ jobs: | |
--container-env DAGSTER_PG_DB="dagster-storage" \ | ||
--container-env FLY_ACCESS_TOKEN=${{ secrets.FLY_ACCESS_TOKEN }} \ | ||
--container-env PUDL_SETTINGS_YML="/home/mambauser/src/pudl/package_data/settings/etl_full.yml" \ | ||
--container-env PUDL_GCS_OUTPUT=${{ env.GCS_OUTPUT_BUCKET }}/${{ env.COMMIT_TIME }}-${{ env.SHORT_SHA }}-${{ env.COMMIT_BRANCH }} | ||
|
||
# Start the VM | ||
- name: Start the deploy-pudl-vm | ||
|
@@ -129,6 +137,6 @@ jobs: | |
uses: slackapi/[email protected] | ||
with: | ||
channel-id: "C03FHB9N0PQ" | ||
slack-message: "build-deploy-pudl status: ${{ job.status }}\n${{ env.ACTION_SHA }}-${{ env.GITHUB_REF }}" | ||
slack-message: "build-deploy-pudl status: ${{ job.status }}\n${{ env.COMMIT_TIME}}-${{ env.SHORT_SHA }}-${{ env.COMMIT_BRANCH }}" | ||
env: | ||
SLACK_BOT_TOKEN: ${{ secrets.PUDL_DEPLOY_SLACK_TOKEN }} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,6 +2,9 @@ | |
# This script runs the entire ETL and validation tests in a docker container on a Google Compute Engine instance. | ||
# This script won't work locally because it needs adequate GCP permissions. | ||
|
||
# Set PUDL_GCS_OUTPUT *only* if it is currently unset | ||
: "${PUDL_GCS_OUTPUT:=gs://nightly-build-outputs.catalyst.coop/$ACTION_SHA-$GITHUB_REF}" | ||
zaneselvans marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
set -x | ||
|
||
function send_slack_msg() { | ||
|
@@ -27,26 +30,26 @@ function run_pudl_etl() { | |
--loglevel DEBUG \ | ||
--gcs-cache-path gs://internal-zenodo-cache.catalyst.coop \ | ||
--workers 8 \ | ||
$PUDL_SETTINGS_YML && \ | ||
pudl_etl \ | ||
$PUDL_SETTINGS_YML \ | ||
&& pudl_etl \ | ||
--loglevel DEBUG \ | ||
--gcs-cache-path gs://internal-zenodo-cache.catalyst.coop \ | ||
$PUDL_SETTINGS_YML && \ | ||
pytest \ | ||
$PUDL_SETTINGS_YML \ | ||
&& pytest \ | ||
-n auto \ | ||
--gcs-cache-path gs://internal-zenodo-cache.catalyst.coop \ | ||
--etl-settings $PUDL_SETTINGS_YML \ | ||
--live-dbs test/integration test/unit && \ | ||
pytest \ | ||
--live-dbs test/integration test/unit \ | ||
&& pytest \ | ||
-n auto \ | ||
--gcs-cache-path gs://internal-zenodo-cache.catalyst.coop \ | ||
--etl-settings $PUDL_SETTINGS_YML \ | ||
--live-dbs test/validate | ||
&& touch ${PUDL_OUTPUT}/success | ||
zaneselvans marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
|
||
function shutdown_vm() { | ||
# Copy the outputs to the GCS bucket | ||
gsutil -m cp -r $PUDL_OUTPUT "gs://nightly-build-outputs.catalyst.coop/$ACTION_SHA-$GITHUB_REF" | ||
|
||
upload_file_to_slack $LOGFILE "pudl_etl logs for $ACTION_SHA-$GITHUB_REF:" | ||
|
||
|
@@ -59,6 +62,12 @@ function shutdown_vm() { | |
curl -X POST -H "Content-Length: 0" -H "Authorization: Bearer ${ACCESS_TOKEN}" https://compute.googleapis.com/compute/v1/projects/catalyst-cooperative-pudl/zones/$GCE_INSTANCE_ZONE/instances/$GCE_INSTANCE/stop | ||
} | ||
|
||
function copy_outputs_to_gcs() { | ||
echo "Copying outputs to GCP bucket $PUDL_GCS_OUTPUT" | ||
gsutil -m cp -r $PUDL_OUTPUT ${PUDL_GCS_OUTPUT} | ||
rm ${PUDL_OUTPUT}/success | ||
} | ||
|
||
function copy_outputs_to_distribution_bucket() { | ||
echo "Copying outputs to GCP distribution bucket" | ||
gsutil -m -u $GCP_BILLING_PROJECT cp -r "$PUDL_OUTPUT/*" "gs://pudl.catalyst.coop/$GITHUB_REF" | ||
|
@@ -93,6 +102,8 @@ run_pudl_etl 2>&1 | tee $LOGFILE | |
|
||
ETL_SUCCESS=${PIPESTATUS[0]} | ||
|
||
copy_outputs_to_gcs | ||
|
||
# if pipeline is successful, distribute + publish datasette | ||
if [[ $ETL_SUCCESS == 0 ]]; then | ||
# Deploy the updated data to datasette | ||
|
@@ -109,6 +120,9 @@ if [[ $ETL_SUCCESS == 0 ]]; then | |
ETL_SUCCESS=${PIPESTATUS[0]} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Probably want to remove the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If we replace There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think a little build metadata file would be worth including. We're distributing the |
||
|
||
# Dump outputs to s3 bucket if branch is dev or build was triggered by a tag | ||
# TODO: this behavior should be controlled by on/off switch here and this logic | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Agree - having the action just pass in a switch would be nice. I think at some point we should replace this whole nightly build harness with a Python script that's more robust and that would be a nice time to fix this too. |
||
# should be moved to the triggering github action. Having it here feels | ||
# fragmented. | ||
if [ $GITHUB_ACTION_TRIGGER = "push" ] || [ $GITHUB_REF = "dev" ]; then | ||
copy_outputs_to_distribution_bucket | ||
ETL_SUCCESS=${PIPESTATUS[0]} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
TIL about the block chomping operator. Does GHA complain when there's an extra newline at the end here?