From 663fed3907932b3a46deabd29059536b5978990b Mon Sep 17 00:00:00 2001 From: aravind-segu Date: Tue, 20 Feb 2024 18:42:32 -0800 Subject: [PATCH 01/10] Minor bug fixes --- .../.github/workflows/deploy-cicd.yml.tmpl | 1 + ...{{.input_project_name}}-run-tests.yml.tmpl | 2 -- .../docs/mlops-setup.md.tmpl | 10 ++++++--- .../README.md.tmpl | 21 ++++++++++++++----- 4 files changed, 24 insertions(+), 10 deletions(-) diff --git a/template/{{.input_root_dir}}/.github/workflows/deploy-cicd.yml.tmpl b/template/{{.input_root_dir}}/.github/workflows/deploy-cicd.yml.tmpl index 5eedb6c0..0b2cabda 100644 --- a/template/{{.input_root_dir}}/.github/workflows/deploy-cicd.yml.tmpl +++ b/template/{{.input_root_dir}}/.github/workflows/deploy-cicd.yml.tmpl @@ -61,6 +61,7 @@ jobs: run: | git config --global user.name "Deploy CICD Bot" git config --global user.email "noreply-cicd-bot@databricks.com" + git pull git checkout -b add-cicd-for-{{`${{ github.event.inputs.project_name }}`}} git add .github "$PROJECT_NAME_ALPHA/databricks.yml" git commit -m "Add CICD for {{`${{ github.event.inputs.project_name }}`}}" diff --git a/template/{{.input_root_dir}}/.github/workflows/{{.input_project_name}}-run-tests.yml.tmpl b/template/{{.input_root_dir}}/.github/workflows/{{.input_project_name}}-run-tests.yml.tmpl index ca807d72..647daa7d 100644 --- a/template/{{.input_root_dir}}/.github/workflows/{{.input_project_name}}-run-tests.yml.tmpl +++ b/template/{{.input_root_dir}}/.github/workflows/{{.input_project_name}}-run-tests.yml.tmpl @@ -2,8 +2,6 @@ name: {{ if (eq .input_include_feature_store `yes`) }}Feature and {{end}}Trainin on: workflow_dispatch: pull_request: - paths: - - '{{template `project_name_alphanumeric_underscore` .}}/**' defaults: run: diff --git a/template/{{.input_root_dir}}/docs/mlops-setup.md.tmpl b/template/{{.input_root_dir}}/docs/mlops-setup.md.tmpl index 8d298801..7070d830 100644 --- a/template/{{.input_root_dir}}/docs/mlops-setup.md.tmpl +++ b/template/{{.input_root_dir}}/docs/mlops-setup.md.tmpl @@ -85,7 +85,9 @@ For example, if the deployment target is dev, we expect a catalog named dev to e If you want to use different catalog names, please update the target names declared in the {{- if (eq .input_setup_cicd_and_project `CICD_and_Project`)}}[{{ .input_project_name }}/databricks.yml](../{{template `project_name_alphanumeric_underscore` .}}/databricks.yml) {{- else }} `databricks.yml` {{ end }} file. -If changing the staging, prod, or test deployment targets, you'll also need to update the workflows located in the .github/workflows directory. +If changing the staging, prod, or test deployment targets, you'll also need to update the workflows located in the .github/workflows directory. In the +`/.github/workflows/{{.input_project_name}}-bundle-cd-prod.yml.tmpl`, `/.github/workflows/{{.input_project_name}}-bundle-cd-staging.yml.tmpl`, `/.github/workflows/{{.input_project_name}}-bundle-ci.yml.tmpl` update all the `databricks bundle` +to now reflect the new targets that you have changed to. The SP must have proper permission in each respective environment and the catalog for the environments. @@ -113,8 +115,10 @@ to add the secrets to GitHub: - `STAGING_WORKSPACE_TOKEN` : service principal token for staging workspace - `PROD_WORKSPACE_TOKEN` : service principal token for prod workspace - `WORKFLOW_TOKEN` : [Github token](https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/managing-your-personal-access-tokens#creating-a-personal-access-token-classic) with workflow permissions. This secret is needed for the Deploy CI/CD Workflow. -Be sure to update the [Workflow Permissions](https://docs.github.com/en/actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token) section under Repo Settings > Actions > General to allow `Read and write permissions`, -and to allow workflows to be able to open pull requests (PRs). + +Next, be sure to update the [Workflow Permissions](https://docs.github.com/en/actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token) section under Repo Settings > Actions > General: +- Allow `Read and write permissions`, +- Allow workflows to be able to open pull requests (PRs). {{ end }} {{ if and (eq .input_cicd_platform `github_actions`) (eq .input_cloud `azure`) }} diff --git a/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/README.md.tmpl b/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/README.md.tmpl index 84d3bc5c..babce5f0 100644 --- a/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/README.md.tmpl +++ b/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/README.md.tmpl @@ -237,10 +237,21 @@ i.e. for each environment You can run unit tests for your ML code via `pytest tests`. {{ end }} + ## Next Steps -When you're satisfied with initial ML experimentation (e.g. validated that a model with reasonable performance can be -trained on your dataset) and ready to deploy production training/inference -pipelines, ask your ops team to set up CI/CD for the current ML project if they haven't already. CI/CD can be set up as part of the -MLOps Stacks initialization even if it was skipped in this case, or this project can be added to a repo setup with CI/CD already, -following the directions under "Setting up CI/CD" in the repo root directory README. + +When you're satisfied with initial ML experimentation (e.g. validated that a model with reasonable performance can be trained on your dataset) and ready to deploy production training/inference +pipelines, ask your ops team to set up CI/CD for the current ML project if they haven't already. CI/CD can be set up as part of the + +MLOps Stacks initialization even if it was skipped in this case, or this project can be added to a repo setup with CI/CD already, following the directions under "Setting up CI/CD" in the repo root directory README. + +To add CI/CD to this repo: + 1. Run `databricks bundle init mlops-stacks` in your home directory + 2. Select the option to only initialize `CICD_Only` + 3. Provide the current root directory and answer the subsequent prompts + +If you already have CI/CD setup in a separate: +1. Create a new folder in your root CI/CD repo +1. Copy the contents of your current project folder into the new folder + More details can be found on the homepage [MLOps Stacks README](https://github.com/databricks/mlops-stacks/blob/main/README.md). From 057a1bc76a60b7943d22e62a8fcec0f8c21d3b0e Mon Sep 17 00:00:00 2001 From: aravind-segu Date: Wed, 21 Feb 2024 17:24:31 -0800 Subject: [PATCH 02/10] More bug fixes --- databricks_template_schema.json | 80 ++++++++++++++++++- .../.github/workflows/deploy-cicd.yml.tmpl | 11 ++- ...put_project_name}}-bundle-cd-prod.yml.tmpl | 4 +- ...{{.input_project_name}}-run-tests.yml.tmpl | 8 +- .../docs/mlops-setup.md.tmpl | 7 +- .../databricks.yml.tmpl | 6 +- 6 files changed, 96 insertions(+), 20 deletions(-) diff --git a/databricks_template_schema.json b/databricks_template_schema.json index db607bca..b04d8b1b 100644 --- a/databricks_template_schema.json +++ b/databricks_template_schema.json @@ -141,9 +141,81 @@ } } }, - "input_schema_name": { + "test_catalog_name": { "order": 12, "type": "string", + "description": "\nName of the Test Unity Catalog", + "default": "test", + "skip_prompt_if": { + "anyOf": [ + { + "properties": { + "input_include_models_in_unity_catalog": { + "const": "no" + } + } + }, + { + "properties": { + "input_setup_cicd_and_project": { + "const": "Project_Only" + } + } + } + ] + } + }, + "staging_catalog_name": { + "order": 13, + "type": "string", + "description": "\nName of the Staging Unity Catalog", + "default": "staging", + "skip_prompt_if": { + "anyOf": [ + { + "properties": { + "input_include_models_in_unity_catalog": { + "const": "no" + } + } + }, + { + "properties": { + "input_setup_cicd_and_project": { + "const": "Project_Only" + } + } + } + ] + } + }, + "prod_catalog_name": { + "order": 14, + "type": "string", + "description": "\nName of the Prod Unity Catalog", + "default": "prod", + "skip_prompt_if": { + "anyOf": [ + { + "properties": { + "input_include_models_in_unity_catalog": { + "const": "no" + } + } + }, + { + "properties": { + "input_setup_cicd_and_project": { + "const": "Project_Only" + } + } + } + ] + } + }, + "input_schema_name": { + "order": 15, + "type": "string", "description": "\nName of schema to use when registering a model in Unity Catalog.\nThis schema must already exist and service principals must have access.\nWe recommend using the project name.\nDefault", "default": "{{if (eq .input_include_models_in_unity_catalog `no`)}}schema{{else}}{{ .input_project_name }}{{end}}", "pattern": "^[^ .\\-\\/]*$", @@ -168,7 +240,7 @@ } }, "input_unity_catalog_read_user_group": { - "order": 13, + "order": 16, "type": "string", "default": "account users", "description": "\nUser group name to give EXECUTE privileges to models in Unity Catalog (UC).\nIt must exist in UC with access granted to the staging and prod workspaces.\nDefault", @@ -192,7 +264,7 @@ } }, "input_include_feature_store": { - "order": 14, + "order": 17, "type": "string", "description": "\nWhether to include Feature Store", "default": "no", @@ -206,7 +278,7 @@ } }, "input_include_mlflow_recipes": { - "order": 15, + "order": 18, "type": "string", "description": "\nWhether to include MLflow Recipes", "default": "no", diff --git a/template/{{.input_root_dir}}/.github/workflows/deploy-cicd.yml.tmpl b/template/{{.input_root_dir}}/.github/workflows/deploy-cicd.yml.tmpl index 0b2cabda..5838e30a 100644 --- a/template/{{.input_root_dir}}/.github/workflows/deploy-cicd.yml.tmpl +++ b/template/{{.input_root_dir}}/.github/workflows/deploy-cicd.yml.tmpl @@ -23,6 +23,10 @@ jobs: cicd: runs-on: ubuntu-latest steps: + - name: Get current timestamp + id: timestamp + run: | + echo "{timestamp}={$(date +'%s')}" >> $GITHUB_ENV - uses: actions/checkout@v3 with: ref: {{`${{ github.event.pull_request.head.sha || github.sha }}`}} @@ -62,13 +66,14 @@ jobs: git config --global user.name "Deploy CICD Bot" git config --global user.email "noreply-cicd-bot@databricks.com" git pull - git checkout -b add-cicd-for-{{`${{ github.event.inputs.project_name }}`}} + git checkout -b add-cicd-for-{{`${{ github.event.inputs.project_name }}`}}-{{`${{ env.date }}`}} git add .github "$PROJECT_NAME_ALPHA/databricks.yml" git commit -m "Add CICD for {{`${{ github.event.inputs.project_name }}`}}" - git push origin add-cicd-for-{{`${{ github.event.inputs.project_name }}`}} + git push origin add-cicd-for-{{`${{ github.event.inputs.project_name }}`}}-{{`${{ env.date }}`}} + - name: Create Pull Request id: pr env: GITHUB_TOKEN: {{`${{ github.token }}`}} run: | - gh pr create --base {{ .input_default_branch }} --head add-cicd-for-{{`${{ github.event.inputs.project_name }}`}} --title "Deploy CICD for {{`${{ github.event.inputs.project_name }}`}}" --body "This PR was generated by the Deploy CICD workflow." + gh pr create --base {{ .input_default_branch }} --head add-cicd-for-{{`${{ github.event.inputs.project_name }}`}} --title "Deploy CICD for {{`${{ github.event.inputs.project_name }}`}}" --body "This PR was generated by the Deploy CICD workflow." --reviewer {{`${{ github.actor }}`}} diff --git a/template/{{.input_root_dir}}/.github/workflows/{{.input_project_name}}-bundle-cd-prod.yml.tmpl b/template/{{.input_root_dir}}/.github/workflows/{{.input_project_name}}-bundle-cd-prod.yml.tmpl index a5072d40..b9068905 100644 --- a/template/{{.input_root_dir}}/.github/workflows/{{.input_project_name}}-bundle-cd-prod.yml.tmpl +++ b/template/{{.input_root_dir}}/.github/workflows/{{.input_project_name}}-bundle-cd-prod.yml.tmpl @@ -33,8 +33,8 @@ jobs: - name: Validate Bundle For Prod id: validate run: | - databricks bundle validate -t prod + databricks bundle validate -t {{ .prod_catalog_name }} - name: Deploy Bundle to Prod id: deploy run: | - databricks bundle deploy -t prod + databricks bundle deploy -t {{ .prod_catalog_name }} diff --git a/template/{{.input_root_dir}}/.github/workflows/{{.input_project_name}}-run-tests.yml.tmpl b/template/{{.input_root_dir}}/.github/workflows/{{.input_project_name}}-run-tests.yml.tmpl index 647daa7d..69396caf 100644 --- a/template/{{.input_root_dir}}/.github/workflows/{{.input_project_name}}-run-tests.yml.tmpl +++ b/template/{{.input_root_dir}}/.github/workflows/{{.input_project_name}}-run-tests.yml.tmpl @@ -52,18 +52,18 @@ jobs: - name: Validate Bundle For Test Deployment Target in Staging Workspace id: validate run: | - databricks bundle validate -t test + databricks bundle validate -t {{ .test_catalog_name }} - name: Deploy Bundle to Test Deployment Target in Staging Workspace id: deploy run: | - databricks bundle deploy -t test + databricks bundle deploy -t {{ .test_catalog_name }} {{- if (eq .input_include_feature_store `yes`) }} - name: Run Feature Engineering Workflow for Test Deployment Target in Staging Workspace id: feature_engineering run: | - databricks bundle run write_feature_table_job -t test + databricks bundle run write_feature_table_job -t {{ .test_catalog_name }} {{- end }} - name: Run Training Workflow for Test Deployment Target in Staging Workspace id: training run: | - databricks bundle run model_training_job -t test + databricks bundle run model_training_job -t {{ .test_catalog_name }} diff --git a/template/{{.input_root_dir}}/docs/mlops-setup.md.tmpl b/template/{{.input_root_dir}}/docs/mlops-setup.md.tmpl index 7070d830..cc66746c 100644 --- a/template/{{.input_root_dir}}/docs/mlops-setup.md.tmpl +++ b/template/{{.input_root_dir}}/docs/mlops-setup.md.tmpl @@ -84,10 +84,7 @@ If the created project uses **Unity Catalog**, we expect a catalog to exist with For example, if the deployment target is dev, we expect a catalog named dev to exist in the workspace. If you want to use different catalog names, please update the target names declared in the {{- if (eq .input_setup_cicd_and_project `CICD_and_Project`)}}[{{ .input_project_name }}/databricks.yml](../{{template `project_name_alphanumeric_underscore` .}}/databricks.yml) -{{- else }} `databricks.yml` {{ end }} file. -If changing the staging, prod, or test deployment targets, you'll also need to update the workflows located in the .github/workflows directory. In the -`/.github/workflows/{{.input_project_name}}-bundle-cd-prod.yml.tmpl`, `/.github/workflows/{{.input_project_name}}-bundle-cd-staging.yml.tmpl`, `/.github/workflows/{{.input_project_name}}-bundle-ci.yml.tmpl` update all the `databricks bundle` -to now reflect the new targets that you have changed to. +{{- else }} `databricks.yml` {{ end }} file. The SP must have proper permission in each respective environment and the catalog for the environments. @@ -267,6 +264,8 @@ In the case of a monorepo, where there are multiple projects under a single repo After setting up authentication for CI/CD, you can now set up CI/CD workflows. We provide a [Deploy CICD workflow](../.azure/devops-pipelines/deploy-cicd.yml) that can be used to generate the other CICD workflows mentioned below for projects. This workflow is manually triggered with `project_name` as parameter. This workflow will need to be triggered for each project to set up its set of CI/CD workflows that can be used to deploy ML resources and run ML jobs in the staging and prod workspaces. These workflows will be defined under `.azure/devops-pipelines`. After generating these workflows, be sure to go through the above workflow-specific steps again to add the appropriate build branch policies and filters. + +Trigger the workflow manually by running the command `gh workflow run deploy-cicd.yml` in your root folder. The workflow will then create a PR request which you can review, and merge to kick off the CI/CD pipeline in Github. {{ end }} {{- if (eq .input_setup_cicd_and_project `CICD_and_Project`)}} diff --git a/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/databricks.yml.tmpl b/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/databricks.yml.tmpl index b43035d2..3d8cb380 100644 --- a/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/databricks.yml.tmpl +++ b/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/databricks.yml.tmpl @@ -27,15 +27,15 @@ targets: {{ if (eq .input_setup_cicd_and_project `CICD_and_Project`)}} - staging: + {{ .staging_catalog_name }}: workspace: host: {{template `databricks_staging_workspace_host` .}} - prod: + {{ .prod_catalog_name }}: workspace: host: {{template `databricks_prod_workspace_host` .}} - test: + {{ .test_catalog_name }}: workspace: host: {{template `databricks_staging_workspace_host` .}} {{ end }} From 31bb7f6c3146d096c907b9c888e8e89ae253689a Mon Sep 17 00:00:00 2001 From: aravind-segu Date: Thu, 22 Feb 2024 10:20:57 -0800 Subject: [PATCH 03/10] Update names --- databricks_template_schema.json | 9 +-------- .../devops-pipelines/deploy-cicd.yml.tmpl | 2 +- .../.github/workflows/deploy-cicd.yml.tmpl | 9 ++++----- ...t_project_name}}-bundle-cd-staging.yml.tmpl | 4 ++-- .../{{.input_project_name}}-bundle-ci.yml.tmpl | 4 ++-- .../cicd/databricks_template_schema.json | 18 ++++++++++++++++++ .../{{.input_root_dir}}/cicd_params.json.tmpl | 5 ++++- .../docs/mlops-setup.md.tmpl | 4 ++-- 8 files changed, 34 insertions(+), 21 deletions(-) diff --git a/databricks_template_schema.json b/databricks_template_schema.json index b04d8b1b..10b057d0 100644 --- a/databricks_template_schema.json +++ b/databricks_template_schema.json @@ -132,14 +132,7 @@ "type": "string", "description": "\nWhether to use the Model Registry with Unity Catalog", "default": "no", - "enum": ["yes", "no"], - "skip_prompt_if": { - "properties": { - "input_setup_cicd_and_project": { - "const": "CICD_Only" - } - } - } + "enum": ["yes", "no"] }, "test_catalog_name": { "order": 12, diff --git a/template/{{.input_root_dir}}/.azure/devops-pipelines/deploy-cicd.yml.tmpl b/template/{{.input_root_dir}}/.azure/devops-pipelines/deploy-cicd.yml.tmpl index d38a068a..655c10eb 100644 --- a/template/{{.input_root_dir}}/.azure/devops-pipelines/deploy-cicd.yml.tmpl +++ b/template/{{.input_root_dir}}/.azure/devops-pipelines/deploy-cicd.yml.tmpl @@ -59,7 +59,7 @@ jobs: # Update databricks.yml - script: | - echo -e " staging:\n workspace:\n host: {{template `databricks_staging_workspace_host` .}}\n\n prod:\n workspace:\n host: {{template `databricks_prod_workspace_host` .}}\n\n test:\n workspace:\n host: {{template `databricks_staging_workspace_host` .}}" >> "$(PROJECT_NAME_ALPHA)/databricks.yml" + echo -e " {{ .staging_catalog_name }}:\n workspace:\n host: {{template `databricks_staging_workspace_host` .}}\n\n {{ .prod_catalog_name }}:\n workspace:\n host: {{template `databricks_prod_workspace_host` .}}\n\n {{ .test_catalog_name }}:\n workspace:\n host: {{template `databricks_staging_workspace_host` .}}" >> "$(PROJECT_NAME_ALPHA)/databricks.yml" displayName: 'Update databricks.yml' # Initialize CICD Bundle diff --git a/template/{{.input_root_dir}}/.github/workflows/deploy-cicd.yml.tmpl b/template/{{.input_root_dir}}/.github/workflows/deploy-cicd.yml.tmpl index 5838e30a..5779a78f 100644 --- a/template/{{.input_root_dir}}/.github/workflows/deploy-cicd.yml.tmpl +++ b/template/{{.input_root_dir}}/.github/workflows/deploy-cicd.yml.tmpl @@ -26,7 +26,7 @@ jobs: - name: Get current timestamp id: timestamp run: | - echo "{timestamp}={$(date +'%s')}" >> $GITHUB_ENV + echo "timestamp=$(date +'%s')" >> $GITHUB_ENV - uses: actions/checkout@v3 with: ref: {{`${{ github.event.pull_request.head.sha || github.sha }}`}} @@ -53,7 +53,7 @@ jobs: - name: Update databricks.yml id: update run: | - echo -e " staging:\n workspace:\n host: {{template `databricks_staging_workspace_host` .}}\n\n prod:\n workspace:\n host: {{template `databricks_prod_workspace_host` .}}\n\n test:\n workspace:\n host: {{template `databricks_staging_workspace_host` .}}" >> "$PROJECT_NAME_ALPHA/databricks.yml" + echo -e " {{ .staging_catalog_name }}:\n workspace:\n host: {{template `databricks_staging_workspace_host` .}}\n\n {{ .prod_catalog_name }}:\n workspace:\n host: {{template `databricks_prod_workspace_host` .}}\n\n {{ .test_catalog_name }}:\n workspace:\n host: {{template `databricks_staging_workspace_host` .}}" >> "$PROJECT_NAME_ALPHA/databricks.yml" - name: Initialize Bundle id: initialize run: | @@ -65,11 +65,10 @@ jobs: run: | git config --global user.name "Deploy CICD Bot" git config --global user.email "noreply-cicd-bot@databricks.com" - git pull - git checkout -b add-cicd-for-{{`${{ github.event.inputs.project_name }}`}}-{{`${{ env.date }}`}} + git checkout -b add-cicd-for-{{`${{ github.event.inputs.project_name }}`}}-{{`${{ env.timestamp }}`}} git add .github "$PROJECT_NAME_ALPHA/databricks.yml" git commit -m "Add CICD for {{`${{ github.event.inputs.project_name }}`}}" - git push origin add-cicd-for-{{`${{ github.event.inputs.project_name }}`}}-{{`${{ env.date }}`}} + git push origin add-cicd-for-{{`${{ github.event.inputs.project_name }}`}}-{{`${{ env.timestamp }}`}} - name: Create Pull Request id: pr diff --git a/template/{{.input_root_dir}}/.github/workflows/{{.input_project_name}}-bundle-cd-staging.yml.tmpl b/template/{{.input_root_dir}}/.github/workflows/{{.input_project_name}}-bundle-cd-staging.yml.tmpl index e84a9384..9ddf34da 100644 --- a/template/{{.input_root_dir}}/.github/workflows/{{.input_project_name}}-bundle-cd-staging.yml.tmpl +++ b/template/{{.input_root_dir}}/.github/workflows/{{.input_project_name}}-bundle-cd-staging.yml.tmpl @@ -33,8 +33,8 @@ jobs: - name: Validate Bundle For Staging id: validate run: | - databricks bundle validate -t staging + databricks bundle validate -t {{ .staging_catalog_name }} - name: Deploy Bundle to Staging id: deploy run: | - databricks bundle deploy -t staging + databricks bundle deploy -t {{ .staging_catalog_name }} diff --git a/template/{{.input_root_dir}}/.github/workflows/{{.input_project_name}}-bundle-ci.yml.tmpl b/template/{{.input_root_dir}}/.github/workflows/{{.input_project_name}}-bundle-ci.yml.tmpl index 67b78399..47ec8076 100644 --- a/template/{{.input_root_dir}}/.github/workflows/{{.input_project_name}}-bundle-ci.yml.tmpl +++ b/template/{{.input_root_dir}}/.github/workflows/{{.input_project_name}}-bundle-ci.yml.tmpl @@ -46,7 +46,7 @@ jobs: ARM_CLIENT_SECRET: {{`${{ env.STAGING_ARM_CLIENT_SECRET }}`}} {{- end }} run: | - databricks bundle validate -t staging > ../validate_output.txt + databricks bundle validate -t {{ .staging_catalog_name }} > ../validate_output.txt - name: Create Comment with Bundle Configuration uses: actions/github-script@v6 id: comment @@ -90,7 +90,7 @@ jobs: ARM_CLIENT_SECRET: {{`${{ env.PROD_ARM_CLIENT_SECRET }}`}} {{- end }} run: | - databricks bundle validate -t prod > ../validate_output.txt + databricks bundle validate -t {{ .prod_catalog_name }} > ../validate_output.txt - name: Create Comment with Bundle Configuration uses: actions/github-script@v6 id: comment diff --git a/template/{{.input_root_dir}}/cicd/databricks_template_schema.json b/template/{{.input_root_dir}}/cicd/databricks_template_schema.json index b0a066c2..9f856945 100644 --- a/template/{{.input_root_dir}}/cicd/databricks_template_schema.json +++ b/template/{{.input_root_dir}}/cicd/databricks_template_schema.json @@ -45,6 +45,24 @@ "order": 9, "description": "Use Feature Store (yes) or not (no)", "type": "string" + }, + "test_catalog_name": { + "order": 10, + "type": "string", + "description": "\nName of the Test Unity Catalog", + "default": "test" + }, + "staging_catalog_name": { + "order": 13, + "type": "string", + "description": "\nName of the Staging Unity Catalog", + "default": "staging" + }, + "prod_catalog_name": { + "order": 14, + "type": "string", + "description": "\nName of the Prod Unity Catalog", + "default": "prod" } }, "success_message" : "\n✨ CICD files for your MLOps Project '{{.input_project_name}}' have been initialized!" diff --git a/template/{{.input_root_dir}}/cicd_params.json.tmpl b/template/{{.input_root_dir}}/cicd_params.json.tmpl index 8358e6b1..1be267f5 100644 --- a/template/{{.input_root_dir}}/cicd_params.json.tmpl +++ b/template/{{.input_root_dir}}/cicd_params.json.tmpl @@ -5,5 +5,8 @@ "input_default_branch": "{{ .input_default_branch }}", "input_release_branch": "{{ .input_release_branch }}", "cloud_specific_node_type_id": "{{template `cli_version` .}}", - "input_cli_version": "{{template `cli_version` .}}" + "input_cli_version": "{{template `cli_version` .}}", + "test_catalog_name": "{{ .test_catalog_name }}", + "staging_catalog_name": "{{ .staging_catalog_name }}", + "prod_catalog_name": "{{ .prod_catalog_name }}" } diff --git a/template/{{.input_root_dir}}/docs/mlops-setup.md.tmpl b/template/{{.input_root_dir}}/docs/mlops-setup.md.tmpl index cc66746c..5c0e35f8 100644 --- a/template/{{.input_root_dir}}/docs/mlops-setup.md.tmpl +++ b/template/{{.input_root_dir}}/docs/mlops-setup.md.tmpl @@ -139,6 +139,8 @@ After setting up authentication for CI/CD, you can now set up CI/CD workflows. W This workflow is manually triggered with `project_name` as parameter. This workflow will need to be triggered for each project to set up its set of CI/CD workflows that can be used to deploy ML resources and run ML jobs in the staging and prod workspaces. These workflows will be defined under `.github/workflows`. +Trigger the workflow manually by running the command `gh workflow run deploy-cicd.yml` in your root folder. The workflow will then create a PR request which you can review, and merge to kick off the CI/CD pipeline in Github. + {{ else if (eq .input_cicd_platform `azure_devops`) -}} ## Configure CI/CD - Azure DevOps @@ -264,8 +266,6 @@ In the case of a monorepo, where there are multiple projects under a single repo After setting up authentication for CI/CD, you can now set up CI/CD workflows. We provide a [Deploy CICD workflow](../.azure/devops-pipelines/deploy-cicd.yml) that can be used to generate the other CICD workflows mentioned below for projects. This workflow is manually triggered with `project_name` as parameter. This workflow will need to be triggered for each project to set up its set of CI/CD workflows that can be used to deploy ML resources and run ML jobs in the staging and prod workspaces. These workflows will be defined under `.azure/devops-pipelines`. After generating these workflows, be sure to go through the above workflow-specific steps again to add the appropriate build branch policies and filters. - -Trigger the workflow manually by running the command `gh workflow run deploy-cicd.yml` in your root folder. The workflow will then create a PR request which you can review, and merge to kick off the CI/CD pipeline in Github. {{ end }} {{- if (eq .input_setup_cicd_and_project `CICD_and_Project`)}} From 8a328cc8ece61f9dc36ec3835558424d7bdb5c4c Mon Sep 17 00:00:00 2001 From: aravind-segu Date: Thu, 22 Feb 2024 10:55:26 -0800 Subject: [PATCH 04/10] update docs --- template/{{.input_root_dir}}/docs/mlops-setup.md.tmpl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/template/{{.input_root_dir}}/docs/mlops-setup.md.tmpl b/template/{{.input_root_dir}}/docs/mlops-setup.md.tmpl index 5c0e35f8..b524fc15 100644 --- a/template/{{.input_root_dir}}/docs/mlops-setup.md.tmpl +++ b/template/{{.input_root_dir}}/docs/mlops-setup.md.tmpl @@ -139,8 +139,9 @@ After setting up authentication for CI/CD, you can now set up CI/CD workflows. W This workflow is manually triggered with `project_name` as parameter. This workflow will need to be triggered for each project to set up its set of CI/CD workflows that can be used to deploy ML resources and run ML jobs in the staging and prod workspaces. These workflows will be defined under `.github/workflows`. -Trigger the workflow manually by running the command `gh workflow run deploy-cicd.yml` in your root folder. The workflow will then create a PR request which you can review, and merge to kick off the CI/CD pipeline in Github. +If adding CI/CD to a project directory, commit the new changes to github and manually run the `deploy-cicd.yml` file from the Github Actions UI to initalize all the files required for CICD. +The workflow will create a PR with all the changes pushing it to your main branch. Review and approve it to commit the CICD files. {{ else if (eq .input_cicd_platform `azure_devops`) -}} ## Configure CI/CD - Azure DevOps From 450b721ac2b936ec3a4c3a415e664937722a76e1 Mon Sep 17 00:00:00 2001 From: aravind-segu Date: Thu, 22 Feb 2024 11:20:38 -0800 Subject: [PATCH 05/10] Updated docs --- template/{{.input_root_dir}}/docs/ml-pull-request.md.tmpl | 2 ++ template/{{.input_root_dir}}/docs/mlops-setup.md.tmpl | 1 + 2 files changed, 3 insertions(+) diff --git a/template/{{.input_root_dir}}/docs/ml-pull-request.md.tmpl b/template/{{.input_root_dir}}/docs/ml-pull-request.md.tmpl index 3345a25d..ec4159d0 100644 --- a/template/{{.input_root_dir}}/docs/ml-pull-request.md.tmpl +++ b/template/{{.input_root_dir}}/docs/ml-pull-request.md.tmpl @@ -78,6 +78,7 @@ After merging your pull request, subsequent runs of the model training and batch jobs in staging and production will automatically use your updated ML code. {{- end }} +{{ if not (eq .input_setup_cicd_and_project `CICD_Only`) }} You can track the state of the ML pipelines for the current project from the MLflow registered model UI. Links: {{ if (eq .input_include_models_in_unity_catalog `no`) }} * [Staging workspace registered model]({{template `databricks_staging_workspace_host` .}}/ml/models/staging-{{template `model_name` .}}) @@ -86,6 +87,7 @@ You can track the state of the ML pipelines for the current project from the MLf * [Staging model in UC]({{template `databricks_staging_workspace_host` .}}/explore/data/models/staging/{{.input_project_name}}/{{template `model_name` .}}) * [Prod model in UC]({{template `databricks_prod_workspace_host` .}}/explore/data/models/prod/{{.input_project_name}}/{{template `model_name` .}}) {{end}}. +{{end}} In both the staging and prod workspaces, the MLflow registered model contains links to: * The model versions produced through automated retraining diff --git a/template/{{.input_root_dir}}/docs/mlops-setup.md.tmpl b/template/{{.input_root_dir}}/docs/mlops-setup.md.tmpl index b524fc15..ff2f657b 100644 --- a/template/{{.input_root_dir}}/docs/mlops-setup.md.tmpl +++ b/template/{{.input_root_dir}}/docs/mlops-setup.md.tmpl @@ -85,6 +85,7 @@ For example, if the deployment target is dev, we expect a catalog named dev to e If you want to use different catalog names, please update the target names declared in the {{- if (eq .input_setup_cicd_and_project `CICD_and_Project`)}}[{{ .input_project_name }}/databricks.yml](../{{template `project_name_alphanumeric_underscore` .}}/databricks.yml) {{- else }} `databricks.yml` {{ end }} file. +If changing the staging, prod, or test deployment targets, you'll also need to update the workflows located in the .github/workflows directory. The SP must have proper permission in each respective environment and the catalog for the environments. From 1cbcc10ed0913087006f64071a205f06b065104d Mon Sep 17 00:00:00 2001 From: aravind-segu Date: Mon, 26 Feb 2024 14:44:06 -0800 Subject: [PATCH 06/10] PR Comments --- databricks_template_schema.json | 18 +++++++++--------- .../.github/workflows/deploy-cicd.yml.tmpl | 9 +++------ ...{{.input_project_name}}-run-tests.yml.tmpl | 3 +++ template/{{.input_root_dir}}/README.md.tmpl | 2 +- template/{{.input_root_dir}}/cicd.tar.gz | Bin 4321 -> 4935 bytes .../docs/ml-pull-request.md.tmpl | 5 +++-- .../docs/mlops-setup.md.tmpl | 4 ++-- .../README.md.tmpl | 9 ++++----- 8 files changed, 25 insertions(+), 25 deletions(-) diff --git a/databricks_template_schema.json b/databricks_template_schema.json index 10b057d0..0466b7a6 100644 --- a/databricks_template_schema.json +++ b/databricks_template_schema.json @@ -134,11 +134,11 @@ "default": "no", "enum": ["yes", "no"] }, - "test_catalog_name": { + "input_staging_catalog_name": { "order": 12, "type": "string", - "description": "\nName of the Test Unity Catalog", - "default": "test", + "description": "\nName of the catalog in Unity Catalog that will host the staging UC resources. \nThis catalog must already exist and service principals must have access to it.", + "default": "staging", "skip_prompt_if": { "anyOf": [ { @@ -158,11 +158,11 @@ ] } }, - "staging_catalog_name": { + "input_prod_catalog_name": { "order": 13, "type": "string", - "description": "\nName of the Staging Unity Catalog", - "default": "staging", + "description": "\nName of the Prod Unity Catalog", + "default": "prod", "skip_prompt_if": { "anyOf": [ { @@ -182,11 +182,11 @@ ] } }, - "prod_catalog_name": { + "input_test_catalog_name": { "order": 14, "type": "string", - "description": "\nName of the Prod Unity Catalog", - "default": "prod", + "description": "\nName of the catalog in Unity Catalog that will be used for integration tests", + "default": "test", "skip_prompt_if": { "anyOf": [ { diff --git a/template/{{.input_root_dir}}/.github/workflows/deploy-cicd.yml.tmpl b/template/{{.input_root_dir}}/.github/workflows/deploy-cicd.yml.tmpl index 5779a78f..9f6485da 100644 --- a/template/{{.input_root_dir}}/.github/workflows/deploy-cicd.yml.tmpl +++ b/template/{{.input_root_dir}}/.github/workflows/deploy-cicd.yml.tmpl @@ -23,10 +23,6 @@ jobs: cicd: runs-on: ubuntu-latest steps: - - name: Get current timestamp - id: timestamp - run: | - echo "timestamp=$(date +'%s')" >> $GITHUB_ENV - uses: actions/checkout@v3 with: ref: {{`${{ github.event.pull_request.head.sha || github.sha }}`}} @@ -63,12 +59,13 @@ jobs: env: GITHUB_TOKEN: {{`${{ github.token }}`}} run: | + TIMESTAMP=$(date +'%s') git config --global user.name "Deploy CICD Bot" git config --global user.email "noreply-cicd-bot@databricks.com" - git checkout -b add-cicd-for-{{`${{ github.event.inputs.project_name }}`}}-{{`${{ env.timestamp }}`}} + git checkout -b add-cicd-for-{{`${{ github.event.inputs.project_name }}`}}-{{`$TIMESTAMP`}} git add .github "$PROJECT_NAME_ALPHA/databricks.yml" git commit -m "Add CICD for {{`${{ github.event.inputs.project_name }}`}}" - git push origin add-cicd-for-{{`${{ github.event.inputs.project_name }}`}}-{{`${{ env.timestamp }}`}} + git push origin add-cicd-for-{{`${{ github.event.inputs.project_name }}`}}-{{`$TIMESTAMP`}} - name: Create Pull Request id: pr diff --git a/template/{{.input_root_dir}}/.github/workflows/{{.input_project_name}}-run-tests.yml.tmpl b/template/{{.input_root_dir}}/.github/workflows/{{.input_project_name}}-run-tests.yml.tmpl index 69396caf..5d9bfd9d 100644 --- a/template/{{.input_root_dir}}/.github/workflows/{{.input_project_name}}-run-tests.yml.tmpl +++ b/template/{{.input_root_dir}}/.github/workflows/{{.input_project_name}}-run-tests.yml.tmpl @@ -2,6 +2,9 @@ name: {{ if (eq .input_include_feature_store `yes`) }}Feature and {{end}}Trainin on: workflow_dispatch: pull_request: + paths: + - '{{template `project_name_alphanumeric_underscore` .}}/**' + - '.github/workflows/{{ .input_project_name }}-run-tests.yml' defaults: run: diff --git a/template/{{.input_root_dir}}/README.md.tmpl b/template/{{.input_root_dir}}/README.md.tmpl index a3857433..94459fab 100644 --- a/template/{{.input_root_dir}}/README.md.tmpl +++ b/template/{{.input_root_dir}}/README.md.tmpl @@ -178,7 +178,7 @@ This stack comes with a workflow to set up CI/CD for projects that can be found {{ end }} To set up CI/CD for projects that were created through MLOps Stacks with the `Project_Only` parameter, -run the abovementioned workflow, specifying the `project_name` as a parameter. This workflow assumes that +run the above mentioned workflow, specifying the `project_name` as a parameter. This workflow assumes that all steps in the [MLOps Setup Guide](./docs/mlops-setup.md#Steps) have been completed. For example, for the monorepo case: 1. Setup your repository by initializing MLOps Stacks via Databricks CLI with the `CICD_and_Project` or `CICD_Only` parameter. 2. Follow the [MLOps Setup Guide](./docs/mlops-setup.md) to setup authentication and get the repo ready for CI/CD. diff --git a/template/{{.input_root_dir}}/cicd.tar.gz b/template/{{.input_root_dir}}/cicd.tar.gz index b95fe6077ecd78fda46db300169dd2ca9de4d6f8..efb5823622a9a7b213375da5355c649b0e5efc24 100644 GIT binary patch literal 4935 zcmV-N6S(XjiwFS3lhtcRK8MtIfor_+3bYMbpA zF9Gp254xgx+Kp)&3c*)FRz0XT98RIbhl0Z2zm@X4zx}HxfBEFecbbmgoTEP?QRuTL ze+@qk`1u)r=->Y}AIYPOi!7Ry2u1K88nfBFF z_V{5p&Q7VAv1|DjG<*Ne^%<6R( z&sHAKW8M@!=S>%2>R~1!Q<%1M*N-<{h$&k+mAF_pho-LDj)7HLUo|t=vom!)(Yc_5 zUIQI;0d!FRcrigT)f?;)PIZEPH<$gZKKfp?TYw!X&^wTJ)z(Hhi{E}8zf;~2y`~Kp zU})O96~Gh^vE~OJcmvS1OwI>+oZrF%f5Tg&bJ`m1j4`Po$3r%#?~d(M=IfA;3CuTm zD9gFZoq&2Rk5*p){$$fRLC%obw+mo`-hvGS)5clF8!{cCjc8wzy#u#!1#D(_u(?_X z&pDqt`Y3vo>?R11F}shg)%pm+X3Wt++85EglIY=666iWOpU?v#M*M&NlZS^gy4G$m8enY4+>`P5PfHI4+>}M+e)H0{?BL6#1V=T~hg3svBLB0ElFI*d_B&mr-e|Vl z^?IYo|9tv|>A%$a4Q&L@yZ;yS|IJpj*#Fx`5%|wCA&&RPo8d7p{)?UeXg0b9{@Y3^ z@ZX~@Dg3t)J1{T)i}b%+Yqtyhw~dl>{%gN=aB$Er@ZTq2nEnf`-_$vXOj^$vog%?7&R@ zPvbv&{;O8tzipH}{CCi5>=*gJProGee-$r?c3_LV#jO5y* zaP3R?_wvKb_|CejS?;xF2P5p6x*FKvFNp4V_!3ng9@xJA{e2bfc=!t6x#+TbSy9d~ z(AM#hWu3DF1!yPy_{TpVRe#W`ztpM+YW1hgx-R_kQN|vB1zldkkIFeyuGlsZ{4?J5 z*ioH$4<)_W$0$kULodx%N;3bisDW!x17K-mC-B#z{mbD0db3gN|8J)h`2SItr2a<% zz&h=~68OK?F82SoQnK-Xy0%3l zC$gR$q1qznf0}jB|5~TfD)xW2QBv_A*OxVK1TM7x?EF`!TPx0gZKY(dzr5ME9s{t@ z`gfaD|JnV|V*NK?X03nD-I}Tw*j0KPiYVieH3KQmz<+JPdrAFo)|-vu{_j=_UCu)! zP+$bJGVlMP%Q5CCOJ3~L08riE1FU*@xbQU7mN(Sc2Yo<%SDr?ZAiH13FK|Vf*SJJc z=;hdlfZn9&sfI~h&DXE#LpQK2)x$prK<2Pi5|_yc{Rjl{kbyqh5cpROc`^79DImUK zUVF|Qw({_>S1MuqhEkoF7j_L5VCOBJ2cAqmKDs!1e%9}Od#+x*`S#>Boz2V7D8$6N zhe1KkFOFXHU%ycQ@aF8>^V6f=39ko3zGMVg$~gn+3RVmdOJ+xB->DZTua90|sQu$y zPDekyJv&j)Pot_KGfJTMs(&&kfuqyYSN-16MgPs~94hB0y|a^x*_0C1lVcaCTz70e z@H}kmV@hs`g{v+q$Kr8S$gnSDxN1hC%dA{gT&gUU-Z=v%b^%Da$_yZvJ`h7v1qnvM zAh7+QN_$Rx_7U;1D{3KBrB~|7ArcYNo_>w>_Yi!1ebdOPL9idVRd?)PJ9bnHq}^!9B#3G(XrrM_9ojpnJ#AoM2z<{R1U?&98RQ~>E|g<6D1~U?<$Apw8@UiK z*#mCy`ZlbnWzp9ez~>-xCMF?4YEsazG5T_HQFVkMHbSOoU9AQ#h>3ye(`5a4El*Cw z2!ddu_;>!ax{9*LL@uX|9Kl&&iv^+cMLQ_vEq@5K20V*NHxpq#**3-otW6}?3(7U` zR!_(?4B2%;{&_OrObJHxA`&4Rx@CSHHwB5!JY`~8zt=32M)c@8d;J9Z1{@B%+I`Gk zTS4a%U1;7F_F)pjJJo*x&jG$h$6;}G2lTKRJt^;W#@B5Z@IK67*vch@H+k@rk? z*msuhFMl(~BE1#w@|fM%WerGNR;4L9v0)4tdvYM`1tP;0)=72&M}!xd1gL^;J=2fI z;?v!46%75HGm~jx=#WE5w3cy7=d|eHMvj3kRkofvt`7}Ji?Y?SZ2ZRstg0JT`Z|Z{ zLEwz|uhH(N?*Bvd*D3PV;@0yl= z&E~+_3UIlu^4E)gebY%uo6d>m zs*z~yJMXbQVM1Q?FJ8WVo+CD#UwW?u3zoj|Mss5Dqk4b z*G!9i^#ut=Bh4FsCEI=b$j;7vqB@8Gt)v1z8DNhJ1N7idG*$T1-Bb(b){a-Dn^6jTmaa%Y zN!6qOOII5>2DlEsGKr5KDy5GvNdJxcnc4CGrku=FAQ^F6r-zw@VhgyJNf5T6Z^=eq z8*(gJ*^k1jd=y@Vy|Q~5m%mx zvF-l}X|n4@}EY&ag%tyZqBGD))iHjcEj$)$5lll_(OP(e5;`tTUdNEa$d5 z9|KMqDY3c=KTplSTaXd*@R01$)FyRj`B1tdSn%Jj<5*JEZbtnPSOb{$1WYQI(m<&i zCVl`PqhAqmJQ>|5m+!>BF+Im-fx;f@@oa&hS49$C9RgakhJt`gF%$87U|^w@sab>` z2@fOy(k-mni6=>p;k}H1K=GMAM4#=%|E`3epEKjar;*c;HPfF|!jAhIc@J_+@F}}4 zBfjYH>sn@y=obX?di1K2ys^VpheTO80Tt;NHu5V?sfmt2)*@GQQde%K-M5*jysR0| zq@J9TtXoOtsUxO(Uah{PN`E)9zGMTTe<<)*CJTBmA&8u7?Cua_Bur22>Smq=nz-F5 z4N*Uh7Jvzi$+4|5^F_=o#kwHA`fms-JO+DDh{V=>7RVbrZ3<&Xi8Tg|Fm_f_+ux|h zJqz!#9Y^s61d_QD1EC_w@Q|FpLf5_zG>T<=|1(Hd9=}1U8UIXp)`jvHTU`!mG zkkdi!zqImvYeU42?9c(@1|3QtI#>f!cJp}|juUr}Lyl`p^QY@fuzYM=0aQY0r#K$s z>)Pux!NDsh_N$!EVMw=>&V(#@_l>6hq8*4-Zyh_`#pDT^U8dKz8zVyR7Jq#|2IfyNX=#ZHbdY^4~TXhZ17pO}oE;gNHL z`QZx_G{e+QKiWMTGmqlo8`B90?Hltnzz6_qyp!xBkY{mH!i7-4vanUyyTaa0*gJ-z zS2cESbkS+OmW~qMfSJp>9<^2*mviZSih-MU$9#r$n{uT*+q7{Io`hh>0^MpuA7E-? z51U3()FmzuyhV^a#bpASr(m8j=YjJiLXWj_hG8-^j2fpNf1m5%&@`-`g}CEJY9YWZ zA^98uKmy9oi?ok39N3a){R^bMtmi3w4_CeO0&5pod&;?iRpcW&H?S`0$UD|hh{h&G zgJTsp7Go_5Rr0dF5mbqVP?I<%9_6hAq)eo?=L9La!`Ku^nS4{{20%(`>T?jvyz?%? zh60{&js-lCr3j6S(0C~%va~2fFk~%)klC?93f3$IANmYKQHoNOq7-HE^8Ze>BpU$0 F008%L_p$&0 literal 4321 zcmV<75FYOziwFP!000001MMBza@$7I&-{v6*|ii?0x60*5^a*QOeqdGj;)g9q$NiC7@z}LAAFD(k;jz|yM;v=u8kE6@`};-w-`n5Y zPw^k5W%mi%UBv`v!6(lD`x-)9x7gC1F;>Tz5bYXo9iy#t3u^?U0RH$Rj9kx|m0#q=6g{*|2@LZKp9~Ks7O_ zEfVNDXr@p-A!zxF{hxpSo2#LrX<>q3)1W&i@X+_K-nazGiP!J%o7pvdc&MY1Ho<6s zu^rQ0#-Fxo54FeE$(sab|1A@Cv3IvFKd`+2w|8`OQ2BqGfCT?T=Wn8UrZ#|W)%4-X zvNUG+f4lpgPR9T199I6{Cg3S}bVJhyuTAhCg}krb-^-W%xbjpr%N=QUFvbuvsNm;; zJ<~zK_mT4Oz*P7By@Iwqe2w3`sIT-J%?t1vH65Q=)+N*MXghrU^WRRCpETw7UF8c^ z`8Bt#3qO36qsAMk--kacr%b70+d%NY@u9~|b$o;pU+gtXB6)1qYy{Z;m%Lw-0Vt>c zp^!FKkU2!#_&4bl<;MT04-q^# zTn`U2u-Cv-y1xg1>EYqZ(;$upckL}i)9gjh_VKl+(S!iHqN2iL^vLsyOBD6D9Qzo2 zGbwtiVG>vK^$~sO29~9I__qMi95V+FUX94H$R8*)%nu#dIi1z!!$a${&+4_BNS`rm zAO##L9R|!sF=U$Y7&Tjq5vkR%J)u}LtIU#GsIT49RpqYf>B-f}i;G_OyG!-z&39+7 zDR27EpeMn)hhe*2UY)$`y?&|w_~zof%kz`&8E*#**k=fs18WgnK4?GXs`W;2L0OORhyFxrx#E$UAj!?{KBQTw*fXcas7q!|uXTZcRfIXu! zb;_j=#E>YkA5bs|Y(G%w$cfKh5g)rT!4C)~$4JvDa?;XASif-s|M_IEpasEx;41E& zKXUA-705d%{^RgB*k7!LA|j(>)M_($Gb!zXR^|XnBT7j@~5cq6bb&!hyhER{SpctZw z*W2xSZ016|?AgFk%c6%P@E%0Ygy>nKqbdb`h~bxuix1t0th>3ye z(`5U2D^D)O7~8Nl{P+H}w#{aXiCjS+IfIM977U^5MF%M5t#}Hw1w4yMHxpsL*fz!n z@Fo)Ml5)+5)fMs#Q+Az@ev!;~Qi2g3YYN$bSpRuk6(mvv%?w+A&@9uS4PUUsEc6YS zFKWopDLVwCYl*Hj?;88C2l!aj|Jz^?-RO{}he-1R?)1>SM%&-N0$WU+z|+HLG}aq~ zVsF&sVS<5}E4`T8-IDl8>odT}L(E0+M9PoK=nlFaVH=%a5Tt>DfiBlDPHj9g#zI)zFamSjw4z=FwViY zT!73LZ2Ba5lj0eMD2k_-L{TueG@gQqC2`FT`(;EeLTtVnQn`SF9W29gY1;tRih}V@ z=#r(QQ4bHpDRbM~#1rrVn{kcN9AV99l97f!e2}f^Z*u-|;_HZcgcmwJo)J1i!aTY{ zLSfDeaL!Z|5yAPlyM+9m5T}}Gk)fHt4buI?7SsBt#J>PXO7IhNkH;LP!%HFh=xdrV zkS|maURcANbV1j{v}4y9vw(%=41~aRnNtw(gg`;a&ShBP_r;!14il24bEJh^Ms)U_ z8*FFP#mnB+w{Kq*hzO^b&SZdcfbeTb&{s0CvRxmNdgCv7txOBU1bv!Cf%ZevgoQA^ zfv1;uw!U4@S-JWSst5k?^L&K79iZwz1(0hf!#=*2;hr?mfB*H5zbCMKX<%P7E%No3 zBp8o1@9t~a_lL-DdBcc&*^*6a2^7Ed**|vT;j{brR3CCWB$0gCk^@P-5bDaKYRn`A z)LBz!!Xsp7`##YeM1VF@!50}|j|u~f;JxVD<_ebs8gTtF$Irx2uFH$vRvm?MS|- zwFV>+iM~7!8%s9V;#2Ru~R*{ux$R%kWtf=Jm9SQuY<$X{g2M>{^4Gg z|Jwu<xbYWN&&wHqmis7Y`IFVw|1Xw* zW#@Ox9ciuLG;4haSsz)t@o{s43nZjwS@;amWz0Jx_}D7F6eR6`;IiD4rQJC+_q2lA zUkWqpfA`Y$--k!-gUbGI0)!ri-51xf@o$)*K6eULp%g&r&f8RlhpKo4W)XX?XMzzU z*rS@E-v}>(QayQNx~SjedYxC|=BocBJ=PrJ;)&zN?vzI8IPnl)R2|IUC*-~V$rSy@ zAm#tF^;~rs@B;OporCK8UmJm3{`38$xTcFO@aC7T0xi$~qr=Wo#s5vfeEcuU0Qc85jZu?invB-E^lZc;*Md6w^) z*VovivA+uq#~KYekp`hO(yldHlEfnDDcNih0#od)h?Ey3(J*`!Sp{B&#PbOhCw`Ch zfS!udS;4@PO8|7Q>75BSQ=D0XFhN4GvDIRih_i(ia^$AHbTWitS7gwy$QN|)Nq%I8 zNz3K}`h_btK6&_YE6YiCkVaOrxXDyYttNzl#+AvuhA0V&0?sM&P)l5s&gr?(mFZe7 z{K5^zB?Ci|BMCn%$jCr28Q=xWM;Y#G&k5Xkv@Sn*NX1=_gSU!8eg(n*9kV@f6_(D? z91r-Zui0ShY=5wqyf_i~&sUu9TE_b&x1r2A7$z^3lrbUBj+LsX$swej8h9V7=`MQHtM7^}P07PI;j&0pB zTg22-YzX4(jzieOccAYHk>_oE704SqtqNmGiFJk@VQeq8ZU3MDc^V;ZbA4u^n9up2B$!V)=l_br!V*EhNW31BbPy0)_t7E9h=Oha*8F=y`Mz#b+f6?WIfKp zF;_h=-6?QOMpv=aIxYd z&+eE-@$kfS0z${eEDaFSabl3{709!=DdB1!Y!GZ!`mWM<8GXmR=w+2%tS(`-nvNn~ zgPO~$(^;vGE2z+!qTr@g<;>7-Qz~)tRU3E0lP=h)@NTuC4{mDW44Xz$)FhS&-XfPg zMJ3~zr`$Ya&YH8NhaT(Y48vq-7&T6$&@^8?&*P3OsZtKJbjjxk01~eJyq@-H zjsaV+*1z(!7qvW(L06?H z>l?W$u@EZjr^KVYRX8b`)b^ZCO5rrt<)maQ_1EB}q?W$GLz#EoMc7ceC!AvCo=B)d z<0>>>$P-yu6jGm3SXK~n2UbbJilyMO?=VzQK?N05P(cM1R8T<$6;x0`1r=0KK?N05 PkcIyNq&st70H6Q>dq0#` diff --git a/template/{{.input_root_dir}}/docs/ml-pull-request.md.tmpl b/template/{{.input_root_dir}}/docs/ml-pull-request.md.tmpl index ec4159d0..1c8cbcb2 100644 --- a/template/{{.input_root_dir}}/docs/ml-pull-request.md.tmpl +++ b/template/{{.input_root_dir}}/docs/ml-pull-request.md.tmpl @@ -78,8 +78,9 @@ After merging your pull request, subsequent runs of the model training and batch jobs in staging and production will automatically use your updated ML code. {{- end }} -{{ if not (eq .input_setup_cicd_and_project `CICD_Only`) }} -You can track the state of the ML pipelines for the current project from the MLflow registered model UI. Links: +You can track the state of the ML pipelines for the current project from the MLflow registered model UI. +{{ if or (eq .input_setup_cicd_and_project `CICD_and_Project`) (eq .input_cicd_platform `Project_Only`) }} +Links: {{ if (eq .input_include_models_in_unity_catalog `no`) }} * [Staging workspace registered model]({{template `databricks_staging_workspace_host` .}}/ml/models/staging-{{template `model_name` .}}) * [Prod workspace registered model]({{template `databricks_prod_workspace_host` .}}/ml/models/prod-{{template `model_name` .}}) diff --git a/template/{{.input_root_dir}}/docs/mlops-setup.md.tmpl b/template/{{.input_root_dir}}/docs/mlops-setup.md.tmpl index ff2f657b..0b98461b 100644 --- a/template/{{.input_root_dir}}/docs/mlops-setup.md.tmpl +++ b/template/{{.input_root_dir}}/docs/mlops-setup.md.tmpl @@ -140,9 +140,9 @@ After setting up authentication for CI/CD, you can now set up CI/CD workflows. W This workflow is manually triggered with `project_name` as parameter. This workflow will need to be triggered for each project to set up its set of CI/CD workflows that can be used to deploy ML resources and run ML jobs in the staging and prod workspaces. These workflows will be defined under `.github/workflows`. -If adding CI/CD to a project directory, commit the new changes to github and manually run the `deploy-cicd.yml` file from the Github Actions UI to initalize all the files required for CICD. +If you want to deploy CI/CD for an initialized project (`Project-Only` MLOps Stacks initialization), you can manually run the `deploy-cicd.yml` workflow from the [Github Actions UI](https://docs.github.com/en/actions/using-workflows/manually-running-a-workflow?tool=webui) once the project code has been added to your main repo. +The workflow will create a pull request with all the changes against your {{ .input_default_branch }} branch. Review and approve it to commit the files to deploy CI/CD for the project. -The workflow will create a PR with all the changes pushing it to your main branch. Review and approve it to commit the CICD files. {{ else if (eq .input_cicd_platform `azure_devops`) -}} ## Configure CI/CD - Azure DevOps diff --git a/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/README.md.tmpl b/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/README.md.tmpl index babce5f0..b4da0c4b 100644 --- a/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/README.md.tmpl +++ b/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/README.md.tmpl @@ -240,17 +240,16 @@ You can run unit tests for your ML code via `pytest tests`. ## Next Steps -When you're satisfied with initial ML experimentation (e.g. validated that a model with reasonable performance can be trained on your dataset) and ready to deploy production training/inference -pipelines, ask your ops team to set up CI/CD for the current ML project if they haven't already. CI/CD can be set up as part of the +When you're satisfied with initial ML experimentation (e.g. validated that a model with reasonable performance can be trained on your dataset) and ready to deploy production training/inference pipelines, ask your ops team to set up CI/CD for the current ML project if they haven't already. CI/CD can be set up as part of the MLOps Stacks initialization even if it was skipped in this case, or this project can be added to a repo setup with CI/CD already, following the directions under "Setting up CI/CD" in the repo root directory README. To add CI/CD to this repo: - 1. Run `databricks bundle init mlops-stacks` in your home directory + 1. Run `databricks bundle init mlops-stacks` via the Databricks CLI 2. Select the option to only initialize `CICD_Only` - 3. Provide the current root directory and answer the subsequent prompts + 3. Provide the root directory of this project and answer the subsequent prompts -If you already have CI/CD setup in a separate: +If you already have CI/CD setup in a separate repo: 1. Create a new folder in your root CI/CD repo 1. Copy the contents of your current project folder into the new folder From 28399bca69485fe0c2cb32aebf94527335054b60 Mon Sep 17 00:00:00 2001 From: aravind-segu Date: Mon, 26 Feb 2024 14:53:10 -0800 Subject: [PATCH 07/10] Update catalog name --- .../.azure/devops-pipelines/deploy-cicd.yml.tmpl | 2 +- .../.github/workflows/deploy-cicd.yml.tmpl | 2 +- .../{{.input_project_name}}-bundle-cd-prod.yml.tmpl | 4 ++-- .../{{.input_project_name}}-bundle-cd-staging.yml.tmpl | 4 ++-- .../workflows/{{.input_project_name}}-bundle-ci.yml.tmpl | 4 ++-- .../workflows/{{.input_project_name}}-run-tests.yml.tmpl | 8 ++++---- .../cicd/databricks_template_schema.json | 6 +++--- template/{{.input_root_dir}}/cicd_params.json.tmpl | 6 +++--- .../databricks.yml.tmpl | 6 +++--- 9 files changed, 21 insertions(+), 21 deletions(-) diff --git a/template/{{.input_root_dir}}/.azure/devops-pipelines/deploy-cicd.yml.tmpl b/template/{{.input_root_dir}}/.azure/devops-pipelines/deploy-cicd.yml.tmpl index 655c10eb..bfb689b9 100644 --- a/template/{{.input_root_dir}}/.azure/devops-pipelines/deploy-cicd.yml.tmpl +++ b/template/{{.input_root_dir}}/.azure/devops-pipelines/deploy-cicd.yml.tmpl @@ -59,7 +59,7 @@ jobs: # Update databricks.yml - script: | - echo -e " {{ .staging_catalog_name }}:\n workspace:\n host: {{template `databricks_staging_workspace_host` .}}\n\n {{ .prod_catalog_name }}:\n workspace:\n host: {{template `databricks_prod_workspace_host` .}}\n\n {{ .test_catalog_name }}:\n workspace:\n host: {{template `databricks_staging_workspace_host` .}}" >> "$(PROJECT_NAME_ALPHA)/databricks.yml" + echo -e " {{ .input_staging_catalog_name }}:\n workspace:\n host: {{template `databricks_staging_workspace_host` .}}\n\n {{ .input_prod_catalog_name }}:\n workspace:\n host: {{template `databricks_prod_workspace_host` .}}\n\n {{ .input_test_catalog_name }}:\n workspace:\n host: {{template `databricks_staging_workspace_host` .}}" >> "$(PROJECT_NAME_ALPHA)/databricks.yml" displayName: 'Update databricks.yml' # Initialize CICD Bundle diff --git a/template/{{.input_root_dir}}/.github/workflows/deploy-cicd.yml.tmpl b/template/{{.input_root_dir}}/.github/workflows/deploy-cicd.yml.tmpl index 9f6485da..278d2f88 100644 --- a/template/{{.input_root_dir}}/.github/workflows/deploy-cicd.yml.tmpl +++ b/template/{{.input_root_dir}}/.github/workflows/deploy-cicd.yml.tmpl @@ -49,7 +49,7 @@ jobs: - name: Update databricks.yml id: update run: | - echo -e " {{ .staging_catalog_name }}:\n workspace:\n host: {{template `databricks_staging_workspace_host` .}}\n\n {{ .prod_catalog_name }}:\n workspace:\n host: {{template `databricks_prod_workspace_host` .}}\n\n {{ .test_catalog_name }}:\n workspace:\n host: {{template `databricks_staging_workspace_host` .}}" >> "$PROJECT_NAME_ALPHA/databricks.yml" + echo -e " {{ .input_staging_catalog_name }}:\n workspace:\n host: {{template `databricks_staging_workspace_host` .}}\n\n {{ .input_prod_catalog_name }}:\n workspace:\n host: {{template `databricks_prod_workspace_host` .}}\n\n {{ .input_test_catalog_name }}:\n workspace:\n host: {{template `databricks_staging_workspace_host` .}}" >> "$PROJECT_NAME_ALPHA/databricks.yml" - name: Initialize Bundle id: initialize run: | diff --git a/template/{{.input_root_dir}}/.github/workflows/{{.input_project_name}}-bundle-cd-prod.yml.tmpl b/template/{{.input_root_dir}}/.github/workflows/{{.input_project_name}}-bundle-cd-prod.yml.tmpl index b9068905..10f007fd 100644 --- a/template/{{.input_root_dir}}/.github/workflows/{{.input_project_name}}-bundle-cd-prod.yml.tmpl +++ b/template/{{.input_root_dir}}/.github/workflows/{{.input_project_name}}-bundle-cd-prod.yml.tmpl @@ -33,8 +33,8 @@ jobs: - name: Validate Bundle For Prod id: validate run: | - databricks bundle validate -t {{ .prod_catalog_name }} + databricks bundle validate -t {{ .input_prod_catalog_name }} - name: Deploy Bundle to Prod id: deploy run: | - databricks bundle deploy -t {{ .prod_catalog_name }} + databricks bundle deploy -t {{ .input_prod_catalog_name }} diff --git a/template/{{.input_root_dir}}/.github/workflows/{{.input_project_name}}-bundle-cd-staging.yml.tmpl b/template/{{.input_root_dir}}/.github/workflows/{{.input_project_name}}-bundle-cd-staging.yml.tmpl index 9ddf34da..442af3d1 100644 --- a/template/{{.input_root_dir}}/.github/workflows/{{.input_project_name}}-bundle-cd-staging.yml.tmpl +++ b/template/{{.input_root_dir}}/.github/workflows/{{.input_project_name}}-bundle-cd-staging.yml.tmpl @@ -33,8 +33,8 @@ jobs: - name: Validate Bundle For Staging id: validate run: | - databricks bundle validate -t {{ .staging_catalog_name }} + databricks bundle validate -t {{ .input_staging_catalog_name }} - name: Deploy Bundle to Staging id: deploy run: | - databricks bundle deploy -t {{ .staging_catalog_name }} + databricks bundle deploy -t {{ .input_staging_catalog_name }} diff --git a/template/{{.input_root_dir}}/.github/workflows/{{.input_project_name}}-bundle-ci.yml.tmpl b/template/{{.input_root_dir}}/.github/workflows/{{.input_project_name}}-bundle-ci.yml.tmpl index 47ec8076..49d65a55 100644 --- a/template/{{.input_root_dir}}/.github/workflows/{{.input_project_name}}-bundle-ci.yml.tmpl +++ b/template/{{.input_root_dir}}/.github/workflows/{{.input_project_name}}-bundle-ci.yml.tmpl @@ -46,7 +46,7 @@ jobs: ARM_CLIENT_SECRET: {{`${{ env.STAGING_ARM_CLIENT_SECRET }}`}} {{- end }} run: | - databricks bundle validate -t {{ .staging_catalog_name }} > ../validate_output.txt + databricks bundle validate -t {{ .input_staging_catalog_name }} > ../validate_output.txt - name: Create Comment with Bundle Configuration uses: actions/github-script@v6 id: comment @@ -90,7 +90,7 @@ jobs: ARM_CLIENT_SECRET: {{`${{ env.PROD_ARM_CLIENT_SECRET }}`}} {{- end }} run: | - databricks bundle validate -t {{ .prod_catalog_name }} > ../validate_output.txt + databricks bundle validate -t {{ .input_prod_catalog_name }} > ../validate_output.txt - name: Create Comment with Bundle Configuration uses: actions/github-script@v6 id: comment diff --git a/template/{{.input_root_dir}}/.github/workflows/{{.input_project_name}}-run-tests.yml.tmpl b/template/{{.input_root_dir}}/.github/workflows/{{.input_project_name}}-run-tests.yml.tmpl index 5d9bfd9d..df5c1bb6 100644 --- a/template/{{.input_root_dir}}/.github/workflows/{{.input_project_name}}-run-tests.yml.tmpl +++ b/template/{{.input_root_dir}}/.github/workflows/{{.input_project_name}}-run-tests.yml.tmpl @@ -55,18 +55,18 @@ jobs: - name: Validate Bundle For Test Deployment Target in Staging Workspace id: validate run: | - databricks bundle validate -t {{ .test_catalog_name }} + databricks bundle validate -t {{ .input_test_catalog_name }} - name: Deploy Bundle to Test Deployment Target in Staging Workspace id: deploy run: | - databricks bundle deploy -t {{ .test_catalog_name }} + databricks bundle deploy -t {{ .input_test_catalog_name }} {{- if (eq .input_include_feature_store `yes`) }} - name: Run Feature Engineering Workflow for Test Deployment Target in Staging Workspace id: feature_engineering run: | - databricks bundle run write_feature_table_job -t {{ .test_catalog_name }} + databricks bundle run write_feature_table_job -t {{ .input_test_catalog_name }} {{- end }} - name: Run Training Workflow for Test Deployment Target in Staging Workspace id: training run: | - databricks bundle run model_training_job -t {{ .test_catalog_name }} + databricks bundle run model_training_job -t {{ .input_test_catalog_name }} diff --git a/template/{{.input_root_dir}}/cicd/databricks_template_schema.json b/template/{{.input_root_dir}}/cicd/databricks_template_schema.json index 9f856945..2f92c993 100644 --- a/template/{{.input_root_dir}}/cicd/databricks_template_schema.json +++ b/template/{{.input_root_dir}}/cicd/databricks_template_schema.json @@ -46,19 +46,19 @@ "description": "Use Feature Store (yes) or not (no)", "type": "string" }, - "test_catalog_name": { + "input_test_catalog_name": { "order": 10, "type": "string", "description": "\nName of the Test Unity Catalog", "default": "test" }, - "staging_catalog_name": { + "input_staging_catalog_name": { "order": 13, "type": "string", "description": "\nName of the Staging Unity Catalog", "default": "staging" }, - "prod_catalog_name": { + "input_prod_catalog_name": { "order": 14, "type": "string", "description": "\nName of the Prod Unity Catalog", diff --git a/template/{{.input_root_dir}}/cicd_params.json.tmpl b/template/{{.input_root_dir}}/cicd_params.json.tmpl index 1be267f5..e87cf132 100644 --- a/template/{{.input_root_dir}}/cicd_params.json.tmpl +++ b/template/{{.input_root_dir}}/cicd_params.json.tmpl @@ -6,7 +6,7 @@ "input_release_branch": "{{ .input_release_branch }}", "cloud_specific_node_type_id": "{{template `cli_version` .}}", "input_cli_version": "{{template `cli_version` .}}", - "test_catalog_name": "{{ .test_catalog_name }}", - "staging_catalog_name": "{{ .staging_catalog_name }}", - "prod_catalog_name": "{{ .prod_catalog_name }}" + "test_catalog_name": "{{ .input_test_catalog_name }}", + "staging_catalog_name": "{{ .input_staging_catalog_name }}", + "prod_catalog_name": "{{ .input_prod_catalog_name }}" } diff --git a/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/databricks.yml.tmpl b/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/databricks.yml.tmpl index 3d8cb380..d6a827ac 100644 --- a/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/databricks.yml.tmpl +++ b/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/databricks.yml.tmpl @@ -27,15 +27,15 @@ targets: {{ if (eq .input_setup_cicd_and_project `CICD_and_Project`)}} - {{ .staging_catalog_name }}: + {{ .input_staging_catalog_name }}: workspace: host: {{template `databricks_staging_workspace_host` .}} - {{ .prod_catalog_name }}: + {{ .input_prod_catalog_name }}: workspace: host: {{template `databricks_prod_workspace_host` .}} - {{ .test_catalog_name }}: + {{ .input_test_catalog_name }}: workspace: host: {{template `databricks_staging_workspace_host` .}} {{ end }} From c99fcdade45bf24900e6a793e8eccd2bc73b5743 Mon Sep 17 00:00:00 2001 From: aravind-segu Date: Mon, 26 Feb 2024 16:27:27 -0800 Subject: [PATCH 08/10] update test failure --- .../.github/workflows/deploy-cicd.yml.tmpl | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/template/{{.input_root_dir}}/.github/workflows/deploy-cicd.yml.tmpl b/template/{{.input_root_dir}}/.github/workflows/deploy-cicd.yml.tmpl index 278d2f88..448fb2f7 100644 --- a/template/{{.input_root_dir}}/.github/workflows/deploy-cicd.yml.tmpl +++ b/template/{{.input_root_dir}}/.github/workflows/deploy-cicd.yml.tmpl @@ -23,6 +23,10 @@ jobs: cicd: runs-on: ubuntu-latest steps: + - name: Get current timestamp + id: timestamp + run: | + echo "timestamp=$(date +'%s')" >> "$GITHUB_ENV" - uses: actions/checkout@v3 with: ref: {{`${{ github.event.pull_request.head.sha || github.sha }}`}} @@ -59,17 +63,16 @@ jobs: env: GITHUB_TOKEN: {{`${{ github.token }}`}} run: | - TIMESTAMP=$(date +'%s') git config --global user.name "Deploy CICD Bot" git config --global user.email "noreply-cicd-bot@databricks.com" - git checkout -b add-cicd-for-{{`${{ github.event.inputs.project_name }}`}}-{{`$TIMESTAMP`}} + git checkout -b add-cicd-for-{{`${{ github.event.inputs.project_name }}`}}-{{`${{ env.timestamp }}`}} git add .github "$PROJECT_NAME_ALPHA/databricks.yml" git commit -m "Add CICD for {{`${{ github.event.inputs.project_name }}`}}" - git push origin add-cicd-for-{{`${{ github.event.inputs.project_name }}`}}-{{`$TIMESTAMP`}} + git push origin add-cicd-for-{{`${{ github.event.inputs.project_name }}`}}-{{`${{ env.timestamp }}`}} - name: Create Pull Request id: pr env: GITHUB_TOKEN: {{`${{ github.token }}`}} run: | - gh pr create --base {{ .input_default_branch }} --head add-cicd-for-{{`${{ github.event.inputs.project_name }}`}} --title "Deploy CICD for {{`${{ github.event.inputs.project_name }}`}}" --body "This PR was generated by the Deploy CICD workflow." --reviewer {{`${{ github.actor }}`}} + gh pr create --base {{ .input_default_branch }} --head add-cicd-for-{{`${{ github.event.inputs.project_name }}`}}-{{`${{ env.timestamp }}`}} --title "Deploy CICD for {{`${{ github.event.inputs.project_name }}`}}" --body "This PR was generated by the Deploy CICD workflow." --reviewer {{`${{ github.actor }}`}} From 0bdb0f2b4a1b423b7bd580106399638e851d2ab7 Mon Sep 17 00:00:00 2001 From: aravind-segu Date: Tue, 27 Feb 2024 11:06:23 -0800 Subject: [PATCH 09/10] More PR Comments --- databricks_template_schema.json | 6 ++--- template/{{.input_root_dir}}/README.md.tmpl | 3 +-- .../cicd/databricks_template_schema.json | 26 +++++++++---------- .../docs/ml-pull-request.md.tmpl | 2 +- .../README.md.tmpl | 4 --- 5 files changed, 18 insertions(+), 23 deletions(-) diff --git a/databricks_template_schema.json b/databricks_template_schema.json index 0466b7a6..86c1e14e 100644 --- a/databricks_template_schema.json +++ b/databricks_template_schema.json @@ -137,7 +137,7 @@ "input_staging_catalog_name": { "order": 12, "type": "string", - "description": "\nName of the catalog in Unity Catalog that will host the staging UC resources. \nThis catalog must already exist and service principals must have access to it.", + "description": "\nName of the catalog in Unity Catalog that will host the staging UC resources. \nThis catalog must already exist and service principals must have access to it.\nDefault", "default": "staging", "skip_prompt_if": { "anyOf": [ @@ -161,7 +161,7 @@ "input_prod_catalog_name": { "order": 13, "type": "string", - "description": "\nName of the Prod Unity Catalog", + "description": "\nName of the Prod Unity Catalog.\nThis catalog must already exist and service principals must have access to it.\nDefault", "default": "prod", "skip_prompt_if": { "anyOf": [ @@ -185,7 +185,7 @@ "input_test_catalog_name": { "order": 14, "type": "string", - "description": "\nName of the catalog in Unity Catalog that will be used for integration tests", + "description": "\nName of the catalog in Unity Catalog that will be used for integration tests\nDefault", "default": "test", "skip_prompt_if": { "anyOf": [ diff --git a/template/{{.input_root_dir}}/README.md.tmpl b/template/{{.input_root_dir}}/README.md.tmpl index bc010b02..dbabde09 100644 --- a/template/{{.input_root_dir}}/README.md.tmpl +++ b/template/{{.input_root_dir}}/README.md.tmpl @@ -178,8 +178,7 @@ This stack comes with a workflow to set up CI/CD for projects that can be found {{ end }} To set up CI/CD for projects that were created through MLOps Stacks with the `Project_Only` parameter, -run the above mentioned workflow, specifying the `project_name` as a parameter. This workflow assumes that -all steps in the [MLOps Setup Guide](./docs/mlops-setup.md#Steps) have been completed. For example, for the monorepo case: +run the above mentioned workflow, specifying the `project_name` as a parameter. For example, for the monorepo case: 1. Setup your repository by initializing MLOps Stacks via Databricks CLI with the `CICD_and_Project` or `CICD_Only` parameter. 2. Follow the [MLOps Setup Guide](./docs/mlops-setup.md) to setup authentication and get the repo ready for CI/CD. diff --git a/template/{{.input_root_dir}}/cicd/databricks_template_schema.json b/template/{{.input_root_dir}}/cicd/databricks_template_schema.json index 2f92c993..13b5ea58 100644 --- a/template/{{.input_root_dir}}/cicd/databricks_template_schema.json +++ b/template/{{.input_root_dir}}/cicd/databricks_template_schema.json @@ -36,33 +36,33 @@ "description": "CLI Version", "type": "string" }, - "input_project_name": { - "order": 8, - "description": "Project Name", - "type": "string" - }, - "input_include_feature_store": { - "order": 9, - "description": "Use Feature Store (yes) or not (no)", - "type": "string" - }, "input_test_catalog_name": { - "order": 10, + "order": 8, "type": "string", "description": "\nName of the Test Unity Catalog", "default": "test" }, "input_staging_catalog_name": { - "order": 13, + "order": 9, "type": "string", "description": "\nName of the Staging Unity Catalog", "default": "staging" }, "input_prod_catalog_name": { - "order": 14, + "order": 10, "type": "string", "description": "\nName of the Prod Unity Catalog", "default": "prod" + }, + "input_project_name": { + "order": 11, + "description": "Project Name", + "type": "string" + }, + "input_include_feature_store": { + "order": 12, + "description": "Use Feature Store (yes) or not (no)", + "type": "string" } }, "success_message" : "\n✨ CICD files for your MLOps Project '{{.input_project_name}}' have been initialized!" diff --git a/template/{{.input_root_dir}}/docs/ml-pull-request.md.tmpl b/template/{{.input_root_dir}}/docs/ml-pull-request.md.tmpl index 1c8cbcb2..0c895058 100644 --- a/template/{{.input_root_dir}}/docs/ml-pull-request.md.tmpl +++ b/template/{{.input_root_dir}}/docs/ml-pull-request.md.tmpl @@ -79,7 +79,7 @@ jobs in staging and production will automatically use your updated ML code. {{- end }} You can track the state of the ML pipelines for the current project from the MLflow registered model UI. -{{ if or (eq .input_setup_cicd_and_project `CICD_and_Project`) (eq .input_cicd_platform `Project_Only`) }} +{{ if (eq .input_setup_cicd_and_project `CICD_and_Project`) }} Links: {{ if (eq .input_include_models_in_unity_catalog `no`) }} * [Staging workspace registered model]({{template `databricks_staging_workspace_host` .}}/ml/models/staging-{{template `model_name` .}}) diff --git a/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/README.md.tmpl b/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/README.md.tmpl index b4da0c4b..a021af38 100644 --- a/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/README.md.tmpl +++ b/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/README.md.tmpl @@ -249,8 +249,4 @@ To add CI/CD to this repo: 2. Select the option to only initialize `CICD_Only` 3. Provide the root directory of this project and answer the subsequent prompts -If you already have CI/CD setup in a separate repo: -1. Create a new folder in your root CI/CD repo -1. Copy the contents of your current project folder into the new folder - More details can be found on the homepage [MLOps Stacks README](https://github.com/databricks/mlops-stacks/blob/main/README.md). From ff7b5e54c8a0930e4d2128f77008c98845b3bad7 Mon Sep 17 00:00:00 2001 From: aravind-segu Date: Tue, 27 Feb 2024 14:29:43 -0800 Subject: [PATCH 10/10] Update Docs --- databricks_template_schema.json | 4 ++-- .../{{.input_project_name}}-tests-ci.yml.tmpl | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/databricks_template_schema.json b/databricks_template_schema.json index 86c1e14e..674c9e36 100644 --- a/databricks_template_schema.json +++ b/databricks_template_schema.json @@ -161,7 +161,7 @@ "input_prod_catalog_name": { "order": 13, "type": "string", - "description": "\nName of the Prod Unity Catalog.\nThis catalog must already exist and service principals must have access to it.\nDefault", + "description": "\nName of the catalog in Unity Catalog that will host the production UC resources.\nThis catalog must already exist and service principals must have access to it.\nDefault", "default": "prod", "skip_prompt_if": { "anyOf": [ @@ -185,7 +185,7 @@ "input_test_catalog_name": { "order": 14, "type": "string", - "description": "\nName of the catalog in Unity Catalog that will be used for integration tests\nDefault", + "description": "\nName of the catalog in Unity Catalog that will be used for integration tests.\nThis catalog must already exist and service principals must have access to it.\nDefault", "default": "test", "skip_prompt_if": { "anyOf": [ diff --git a/template/{{.input_root_dir}}/.azure/devops-pipelines/{{.input_project_name}}-tests-ci.yml.tmpl b/template/{{.input_root_dir}}/.azure/devops-pipelines/{{.input_project_name}}-tests-ci.yml.tmpl index 364ae74b..c4ddfd20 100644 --- a/template/{{.input_root_dir}}/.azure/devops-pipelines/{{.input_project_name}}-tests-ci.yml.tmpl +++ b/template/{{.input_root_dir}}/.azure/devops-pipelines/{{.input_project_name}}-tests-ci.yml.tmpl @@ -12,6 +12,7 @@ trigger: paths: include: - {{template `project_name_alphanumeric_underscore` .}}/* + - '.azure/devops-pipelines/{{ .input_project_name }}-run-tests.yml' variables: - name: workingDirectory