Skip to content

Commit

Permalink
Minor bug Fixes (#149)
Browse files Browse the repository at this point in the history
* Minor bug fixes

* More bug fixes

* Update names

* update docs

* Updated docs

* PR Comments

* Update catalog name

* update test failure

* More PR Comments

* Update Docs
  • Loading branch information
aravind-segu authored Feb 28, 2024
1 parent e09dfa2 commit c9c6f6a
Show file tree
Hide file tree
Showing 16 changed files with 148 additions and 41 deletions.
83 changes: 74 additions & 9 deletions databricks_template_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -132,17 +132,82 @@
"type": "string",
"description": "\nWhether to use the Model Registry with Unity Catalog",
"default": "no",
"enum": ["yes", "no"],
"enum": ["yes", "no"]
},
"input_staging_catalog_name": {
"order": 12,
"type": "string",
"description": "\nName of the catalog in Unity Catalog that will host the staging UC resources. \nThis catalog must already exist and service principals must have access to it.\nDefault",
"default": "staging",
"skip_prompt_if": {
"properties": {
"input_setup_cicd_and_project": {
"const": "CICD_Only"
"anyOf": [
{
"properties": {
"input_include_models_in_unity_catalog": {
"const": "no"
}
}
},
{
"properties": {
"input_setup_cicd_and_project": {
"const": "Project_Only"
}
}
}
}
]
}
},
"input_prod_catalog_name": {
"order": 13,
"type": "string",
"description": "\nName of the catalog in Unity Catalog that will host the production UC resources.\nThis catalog must already exist and service principals must have access to it.\nDefault",
"default": "prod",
"skip_prompt_if": {
"anyOf": [
{
"properties": {
"input_include_models_in_unity_catalog": {
"const": "no"
}
}
},
{
"properties": {
"input_setup_cicd_and_project": {
"const": "Project_Only"
}
}
}
]
}
},
"input_test_catalog_name": {
"order": 14,
"type": "string",
"description": "\nName of the catalog in Unity Catalog that will be used for integration tests.\nThis catalog must already exist and service principals must have access to it.\nDefault",
"default": "test",
"skip_prompt_if": {
"anyOf": [
{
"properties": {
"input_include_models_in_unity_catalog": {
"const": "no"
}
}
},
{
"properties": {
"input_setup_cicd_and_project": {
"const": "Project_Only"
}
}
}
]
}
},
"input_schema_name": {
"order": 12,
"order": 15,
"type": "string",
"description": "\nName of schema to use when registering a model in Unity Catalog.\nThis schema must already exist and service principals must have access.\nWe recommend using the project name.\nDefault",
"default": "{{if (eq .input_include_models_in_unity_catalog `no`)}}schema{{else}}{{ .input_project_name }}{{end}}",
Expand All @@ -168,7 +233,7 @@
}
},
"input_unity_catalog_read_user_group": {
"order": 13,
"order": 16,
"type": "string",
"default": "account users",
"description": "\nUser group name to give EXECUTE privileges to models in Unity Catalog (UC).\nIt must exist in UC with access granted to the staging and prod workspaces.\nDefault",
Expand All @@ -192,7 +257,7 @@
}
},
"input_include_feature_store": {
"order": 14,
"order": 17,
"type": "string",
"description": "\nWhether to include Feature Store",
"default": "no",
Expand All @@ -206,7 +271,7 @@
}
},
"input_include_mlflow_recipes": {
"order": 15,
"order": 18,
"type": "string",
"description": "\nWhether to include MLflow Recipes",
"default": "no",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ jobs:

# Update databricks.yml
- script: |
echo -e " staging:\n workspace:\n host: {{template `databricks_staging_workspace_host` .}}\n\n prod:\n workspace:\n host: {{template `databricks_prod_workspace_host` .}}\n\n test:\n workspace:\n host: {{template `databricks_staging_workspace_host` .}}" >> "$(PROJECT_NAME_ALPHA)/databricks.yml"
echo -e " {{ .input_staging_catalog_name }}:\n workspace:\n host: {{template `databricks_staging_workspace_host` .}}\n\n {{ .input_prod_catalog_name }}:\n workspace:\n host: {{template `databricks_prod_workspace_host` .}}\n\n {{ .input_test_catalog_name }}:\n workspace:\n host: {{template `databricks_staging_workspace_host` .}}" >> "$(PROJECT_NAME_ALPHA)/databricks.yml"
displayName: 'Update databricks.yml'

# Initialize CICD Bundle
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ trigger:
paths:
include:
- {{template `project_name_alphanumeric_underscore` .}}/*
- '.azure/devops-pipelines/{{ .input_project_name }}-run-tests.yml'

variables:
- name: workingDirectory
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@ jobs:
cicd:
runs-on: ubuntu-latest
steps:
- name: Get current timestamp
id: timestamp
run: |
echo "timestamp=$(date +'%s')" >> "$GITHUB_ENV"
- uses: actions/checkout@v3
with:
ref: {{`${{ github.event.pull_request.head.sha || github.sha }}`}}
Expand All @@ -49,7 +53,7 @@ jobs:
- name: Update databricks.yml
id: update
run: |
echo -e " staging:\n workspace:\n host: {{template `databricks_staging_workspace_host` .}}\n\n prod:\n workspace:\n host: {{template `databricks_prod_workspace_host` .}}\n\n test:\n workspace:\n host: {{template `databricks_staging_workspace_host` .}}" >> "$PROJECT_NAME_ALPHA/databricks.yml"
echo -e " {{ .input_staging_catalog_name }}:\n workspace:\n host: {{template `databricks_staging_workspace_host` .}}\n\n {{ .input_prod_catalog_name }}:\n workspace:\n host: {{template `databricks_prod_workspace_host` .}}\n\n {{ .input_test_catalog_name }}:\n workspace:\n host: {{template `databricks_staging_workspace_host` .}}" >> "$PROJECT_NAME_ALPHA/databricks.yml"
- name: Initialize Bundle
id: initialize
run: |
Expand All @@ -61,13 +65,14 @@ jobs:
run: |
git config --global user.name "Deploy CICD Bot"
git config --global user.email "[email protected]"
git checkout -b add-cicd-for-{{`${{ github.event.inputs.project_name }}`}}
git checkout -b add-cicd-for-{{`${{ github.event.inputs.project_name }}`}}-{{`${{ env.timestamp }}`}}
git add .github "$PROJECT_NAME_ALPHA/databricks.yml"
git commit -m "Add CICD for {{`${{ github.event.inputs.project_name }}`}}"
git push origin add-cicd-for-{{`${{ github.event.inputs.project_name }}`}}
git push origin add-cicd-for-{{`${{ github.event.inputs.project_name }}`}}-{{`${{ env.timestamp }}`}}

- name: Create Pull Request
id: pr
env:
GITHUB_TOKEN: {{`${{ github.token }}`}}
run: |
gh pr create --base {{ .input_default_branch }} --head add-cicd-for-{{`${{ github.event.inputs.project_name }}`}} --title "Deploy CICD for {{`${{ github.event.inputs.project_name }}`}}" --body "This PR was generated by the Deploy CICD workflow."
gh pr create --base {{ .input_default_branch }} --head add-cicd-for-{{`${{ github.event.inputs.project_name }}`}}-{{`${{ env.timestamp }}`}} --title "Deploy CICD for {{`${{ github.event.inputs.project_name }}`}}" --body "This PR was generated by the Deploy CICD workflow." --reviewer {{`${{ github.actor }}`}}
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@ jobs:
- name: Validate Bundle For Prod
id: validate
run: |
databricks bundle validate -t prod
databricks bundle validate -t {{ .input_prod_catalog_name }}
- name: Deploy Bundle to Prod
id: deploy
run: |
databricks bundle deploy -t prod
databricks bundle deploy -t {{ .input_prod_catalog_name }}
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@ jobs:
- name: Validate Bundle For Staging
id: validate
run: |
databricks bundle validate -t staging
databricks bundle validate -t {{ .input_staging_catalog_name }}
- name: Deploy Bundle to Staging
id: deploy
run: |
databricks bundle deploy -t staging
databricks bundle deploy -t {{ .input_staging_catalog_name }}
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ jobs:
ARM_CLIENT_SECRET: {{`${{ env.STAGING_ARM_CLIENT_SECRET }}`}}
{{- end }}
run: |
databricks bundle validate -t staging > ../validate_output.txt
databricks bundle validate -t {{ .input_staging_catalog_name }} > ../validate_output.txt
- name: Create Comment with Bundle Configuration
uses: actions/github-script@v6
id: comment
Expand Down Expand Up @@ -90,7 +90,7 @@ jobs:
ARM_CLIENT_SECRET: {{`${{ env.PROD_ARM_CLIENT_SECRET }}`}}
{{- end }}
run: |
databricks bundle validate -t prod > ../validate_output.txt
databricks bundle validate -t {{ .input_prod_catalog_name }} > ../validate_output.txt
- name: Create Comment with Bundle Configuration
uses: actions/github-script@v6
id: comment
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ on:
pull_request:
paths:
- '{{template `project_name_alphanumeric_underscore` .}}/**'
- '.github/workflows/{{ .input_project_name }}-run-tests.yml'

defaults:
run:
Expand Down Expand Up @@ -54,18 +55,18 @@ jobs:
- name: Validate Bundle For Test Deployment Target in Staging Workspace
id: validate
run: |
databricks bundle validate -t test
databricks bundle validate -t {{ .input_test_catalog_name }}
- name: Deploy Bundle to Test Deployment Target in Staging Workspace
id: deploy
run: |
databricks bundle deploy -t test
databricks bundle deploy -t {{ .input_test_catalog_name }}
{{- if (eq .input_include_feature_store `yes`) }}
- name: Run Feature Engineering Workflow for Test Deployment Target in Staging Workspace
id: feature_engineering
run: |
databricks bundle run write_feature_table_job -t test
databricks bundle run write_feature_table_job -t {{ .input_test_catalog_name }}
{{- end }}
- name: Run Training Workflow for Test Deployment Target in Staging Workspace
id: training
run: |
databricks bundle run model_training_job -t test
databricks bundle run model_training_job -t {{ .input_test_catalog_name }}
4 changes: 2 additions & 2 deletions template/{{.input_root_dir}}/README.md.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -178,8 +178,8 @@ This stack comes with a workflow to set up CI/CD for projects that can be found
{{ end }}

To set up CI/CD for projects that were created through MLOps Stacks with the `Project_Only` parameter,
run the abovementioned workflow, specifying the `project_name` as a parameter. This workflow assumes that
all steps in the [MLOps Setup Guide](./docs/mlops-setup.md) have been completed. For example, for the monorepo case:
run the above mentioned workflow, specifying the `project_name` as a parameter. For example, for the monorepo case:

1. Setup your repository by initializing MLOps Stacks via Databricks CLI with the `CICD_and_Project` or `CICD_Only` parameter.
2. Follow the [MLOps Setup Guide](./docs/mlops-setup.md) to setup authentication and get the repo ready for CI/CD.
3. Create a new project by initializing MLOps Stacks again but this time with the `Project_Only` parameter.
Expand Down
Binary file modified template/{{.input_root_dir}}/cicd.tar.gz
Binary file not shown.
22 changes: 20 additions & 2 deletions template/{{.input_root_dir}}/cicd/databricks_template_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -36,13 +36,31 @@
"description": "CLI Version",
"type": "string"
},
"input_project_name": {
"input_test_catalog_name": {
"order": 8,
"type": "string",
"description": "\nName of the Test Unity Catalog",
"default": "test"
},
"input_staging_catalog_name": {
"order": 9,
"type": "string",
"description": "\nName of the Staging Unity Catalog",
"default": "staging"
},
"input_prod_catalog_name": {
"order": 10,
"type": "string",
"description": "\nName of the Prod Unity Catalog",
"default": "prod"
},
"input_project_name": {
"order": 11,
"description": "Project Name",
"type": "string"
},
"input_include_feature_store": {
"order": 9,
"order": 12,
"description": "Use Feature Store (yes) or not (no)",
"type": "string"
}
Expand Down
5 changes: 4 additions & 1 deletion template/{{.input_root_dir}}/cicd_params.json.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,8 @@
"input_default_branch": "{{ .input_default_branch }}",
"input_release_branch": "{{ .input_release_branch }}",
"cloud_specific_node_type_id": "{{template `cli_version` .}}",
"input_cli_version": "{{template `cli_version` .}}"
"input_cli_version": "{{template `cli_version` .}}",
"test_catalog_name": "{{ .input_test_catalog_name }}",
"staging_catalog_name": "{{ .input_staging_catalog_name }}",
"prod_catalog_name": "{{ .input_prod_catalog_name }}"
}
5 changes: 4 additions & 1 deletion template/{{.input_root_dir}}/docs/ml-pull-request.md.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -78,14 +78,17 @@ After merging your pull request, subsequent runs of the model training and batch
jobs in staging and production will automatically use your updated ML code.
{{- end }}

You can track the state of the ML pipelines for the current project from the MLflow registered model UI. Links:
You can track the state of the ML pipelines for the current project from the MLflow registered model UI.
{{ if (eq .input_setup_cicd_and_project `CICD_and_Project`) }}
Links:
{{ if (eq .input_include_models_in_unity_catalog `no`) }}
* [Staging workspace registered model]({{template `databricks_staging_workspace_host` .}}/ml/models/staging-{{template `model_name` .}})
* [Prod workspace registered model]({{template `databricks_prod_workspace_host` .}}/ml/models/prod-{{template `model_name` .}})
{{- else -}}
* [Staging model in UC]({{template `databricks_staging_workspace_host` .}}/explore/data/models/staging/{{.input_project_name}}/{{template `model_name` .}})
* [Prod model in UC]({{template `databricks_prod_workspace_host` .}}/explore/data/models/prod/{{.input_project_name}}/{{template `model_name` .}})
{{end}}.
{{end}}

In both the staging and prod workspaces, the MLflow registered model contains links to:
* The model versions produced through automated retraining
Expand Down
11 changes: 8 additions & 3 deletions template/{{.input_root_dir}}/docs/mlops-setup.md.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ If the created project uses **Unity Catalog**, we expect a catalog to exist with
For example, if the deployment target is dev, we expect a catalog named dev to exist in the workspace.
If you want to use different catalog names, please update the target names declared in the
{{- if (eq .input_setup_cicd_and_project `CICD_and_Project`)}}[{{ .input_project_name }}/databricks.yml](../{{template `project_name_alphanumeric_underscore` .}}/databricks.yml)
{{- else }} `databricks.yml` {{ end }} file.
{{- else }} `databricks.yml` {{ end }} file.
If changing the staging, prod, or test deployment targets, you'll also need to update the workflows located in the .github/workflows directory.

The SP must have proper permission in each respective environment and the catalog for the environments.
Expand Down Expand Up @@ -113,8 +113,10 @@ to add the secrets to GitHub:
- `STAGING_WORKSPACE_TOKEN` : service principal token for staging workspace
- `PROD_WORKSPACE_TOKEN` : service principal token for prod workspace
- `WORKFLOW_TOKEN` : [Github token](https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/managing-your-personal-access-tokens#creating-a-personal-access-token-classic) with workflow permissions. This secret is needed for the Deploy CI/CD Workflow.
Be sure to update the [Workflow Permissions](https://docs.github.com/en/actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token) section under Repo Settings > Actions > General to allow `Read and write permissions`,
and to allow workflows to be able to open pull requests (PRs).

Next, be sure to update the [Workflow Permissions](https://docs.github.com/en/actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token) section under Repo Settings > Actions > General:
- Allow `Read and write permissions`,
- Allow workflows to be able to open pull requests (PRs).
{{ end }}

{{ if and (eq .input_cicd_platform `github_actions`) (eq .input_cloud `azure`) }}
Expand All @@ -138,6 +140,9 @@ After setting up authentication for CI/CD, you can now set up CI/CD workflows. W
This workflow is manually triggered with `project_name` as parameter. This workflow will need to be triggered for each project to set up its set of CI/CD workflows that can be used to deploy ML resources and run ML jobs in the staging and prod workspaces.
These workflows will be defined under `.github/workflows`.

If you want to deploy CI/CD for an initialized project (`Project-Only` MLOps Stacks initialization), you can manually run the `deploy-cicd.yml` workflow from the [Github Actions UI](https://docs.github.com/en/actions/using-workflows/manually-running-a-workflow?tool=webui) once the project code has been added to your main repo.
The workflow will create a pull request with all the changes against your {{ .input_default_branch }} branch. Review and approve it to commit the files to deploy CI/CD for the project.

{{ else if (eq .input_cicd_platform `azure_devops`) -}}
## Configure CI/CD - Azure DevOps

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -237,10 +237,16 @@ i.e. for each environment
You can run unit tests for your ML code via `pytest tests`.

{{ end }}

## Next Steps
When you're satisfied with initial ML experimentation (e.g. validated that a model with reasonable performance can be
trained on your dataset) and ready to deploy production training/inference
pipelines, ask your ops team to set up CI/CD for the current ML project if they haven't already. CI/CD can be set up as part of the
MLOps Stacks initialization even if it was skipped in this case, or this project can be added to a repo setup with CI/CD already,
following the directions under "Setting up CI/CD" in the repo root directory README.

When you're satisfied with initial ML experimentation (e.g. validated that a model with reasonable performance can be trained on your dataset) and ready to deploy production training/inference pipelines, ask your ops team to set up CI/CD for the current ML project if they haven't already. CI/CD can be set up as part of the

MLOps Stacks initialization even if it was skipped in this case, or this project can be added to a repo setup with CI/CD already, following the directions under "Setting up CI/CD" in the repo root directory README.

To add CI/CD to this repo:
1. Run `databricks bundle init mlops-stacks` via the Databricks CLI
2. Select the option to only initialize `CICD_Only`
3. Provide the root directory of this project and answer the subsequent prompts

More details can be found on the homepage [MLOps Stacks README](https://github.com/databricks/mlops-stacks/blob/main/README.md).
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,15 @@ targets:

{{ if (eq .input_setup_cicd_and_project `CICD_and_Project`)}}

staging:
{{ .input_staging_catalog_name }}:
workspace:
host: {{template `databricks_staging_workspace_host` .}}

prod:
{{ .input_prod_catalog_name }}:
workspace:
host: {{template `databricks_prod_workspace_host` .}}

test:
{{ .input_test_catalog_name }}:
workspace:
host: {{template `databricks_staging_workspace_host` .}}
{{ end }}

0 comments on commit c9c6f6a

Please sign in to comment.