diff --git a/databricks_template_schema.json b/databricks_template_schema.json index db607bca..674c9e36 100644 --- a/databricks_template_schema.json +++ b/databricks_template_schema.json @@ -132,17 +132,82 @@ "type": "string", "description": "\nWhether to use the Model Registry with Unity Catalog", "default": "no", - "enum": ["yes", "no"], + "enum": ["yes", "no"] + }, + "input_staging_catalog_name": { + "order": 12, + "type": "string", + "description": "\nName of the catalog in Unity Catalog that will host the staging UC resources. \nThis catalog must already exist and service principals must have access to it.\nDefault", + "default": "staging", "skip_prompt_if": { - "properties": { - "input_setup_cicd_and_project": { - "const": "CICD_Only" + "anyOf": [ + { + "properties": { + "input_include_models_in_unity_catalog": { + "const": "no" + } + } + }, + { + "properties": { + "input_setup_cicd_and_project": { + "const": "Project_Only" + } + } } - } + ] + } + }, + "input_prod_catalog_name": { + "order": 13, + "type": "string", + "description": "\nName of the catalog in Unity Catalog that will host the production UC resources.\nThis catalog must already exist and service principals must have access to it.\nDefault", + "default": "prod", + "skip_prompt_if": { + "anyOf": [ + { + "properties": { + "input_include_models_in_unity_catalog": { + "const": "no" + } + } + }, + { + "properties": { + "input_setup_cicd_and_project": { + "const": "Project_Only" + } + } + } + ] + } + }, + "input_test_catalog_name": { + "order": 14, + "type": "string", + "description": "\nName of the catalog in Unity Catalog that will be used for integration tests.\nThis catalog must already exist and service principals must have access to it.\nDefault", + "default": "test", + "skip_prompt_if": { + "anyOf": [ + { + "properties": { + "input_include_models_in_unity_catalog": { + "const": "no" + } + } + }, + { + "properties": { + "input_setup_cicd_and_project": { + "const": "Project_Only" + } + } + } + ] } }, "input_schema_name": { - "order": 12, + "order": 15, "type": "string", "description": "\nName of schema to use when registering a model in Unity Catalog.\nThis schema must already exist and service principals must have access.\nWe recommend using the project name.\nDefault", "default": "{{if (eq .input_include_models_in_unity_catalog `no`)}}schema{{else}}{{ .input_project_name }}{{end}}", @@ -168,7 +233,7 @@ } }, "input_unity_catalog_read_user_group": { - "order": 13, + "order": 16, "type": "string", "default": "account users", "description": "\nUser group name to give EXECUTE privileges to models in Unity Catalog (UC).\nIt must exist in UC with access granted to the staging and prod workspaces.\nDefault", @@ -192,7 +257,7 @@ } }, "input_include_feature_store": { - "order": 14, + "order": 17, "type": "string", "description": "\nWhether to include Feature Store", "default": "no", @@ -206,7 +271,7 @@ } }, "input_include_mlflow_recipes": { - "order": 15, + "order": 18, "type": "string", "description": "\nWhether to include MLflow Recipes", "default": "no", diff --git a/template/{{.input_root_dir}}/.azure/devops-pipelines/deploy-cicd.yml.tmpl b/template/{{.input_root_dir}}/.azure/devops-pipelines/deploy-cicd.yml.tmpl index d38a068a..bfb689b9 100644 --- a/template/{{.input_root_dir}}/.azure/devops-pipelines/deploy-cicd.yml.tmpl +++ b/template/{{.input_root_dir}}/.azure/devops-pipelines/deploy-cicd.yml.tmpl @@ -59,7 +59,7 @@ jobs: # Update databricks.yml - script: | - echo -e " staging:\n workspace:\n host: {{template `databricks_staging_workspace_host` .}}\n\n prod:\n workspace:\n host: {{template `databricks_prod_workspace_host` .}}\n\n test:\n workspace:\n host: {{template `databricks_staging_workspace_host` .}}" >> "$(PROJECT_NAME_ALPHA)/databricks.yml" + echo -e " {{ .input_staging_catalog_name }}:\n workspace:\n host: {{template `databricks_staging_workspace_host` .}}\n\n {{ .input_prod_catalog_name }}:\n workspace:\n host: {{template `databricks_prod_workspace_host` .}}\n\n {{ .input_test_catalog_name }}:\n workspace:\n host: {{template `databricks_staging_workspace_host` .}}" >> "$(PROJECT_NAME_ALPHA)/databricks.yml" displayName: 'Update databricks.yml' # Initialize CICD Bundle diff --git a/template/{{.input_root_dir}}/.azure/devops-pipelines/{{.input_project_name}}-tests-ci.yml.tmpl b/template/{{.input_root_dir}}/.azure/devops-pipelines/{{.input_project_name}}-tests-ci.yml.tmpl index 364ae74b..c4ddfd20 100644 --- a/template/{{.input_root_dir}}/.azure/devops-pipelines/{{.input_project_name}}-tests-ci.yml.tmpl +++ b/template/{{.input_root_dir}}/.azure/devops-pipelines/{{.input_project_name}}-tests-ci.yml.tmpl @@ -12,6 +12,7 @@ trigger: paths: include: - {{template `project_name_alphanumeric_underscore` .}}/* + - '.azure/devops-pipelines/{{ .input_project_name }}-run-tests.yml' variables: - name: workingDirectory diff --git a/template/{{.input_root_dir}}/.github/workflows/deploy-cicd.yml.tmpl b/template/{{.input_root_dir}}/.github/workflows/deploy-cicd.yml.tmpl index 5eedb6c0..448fb2f7 100644 --- a/template/{{.input_root_dir}}/.github/workflows/deploy-cicd.yml.tmpl +++ b/template/{{.input_root_dir}}/.github/workflows/deploy-cicd.yml.tmpl @@ -23,6 +23,10 @@ jobs: cicd: runs-on: ubuntu-latest steps: + - name: Get current timestamp + id: timestamp + run: | + echo "timestamp=$(date +'%s')" >> "$GITHUB_ENV" - uses: actions/checkout@v3 with: ref: {{`${{ github.event.pull_request.head.sha || github.sha }}`}} @@ -49,7 +53,7 @@ jobs: - name: Update databricks.yml id: update run: | - echo -e " staging:\n workspace:\n host: {{template `databricks_staging_workspace_host` .}}\n\n prod:\n workspace:\n host: {{template `databricks_prod_workspace_host` .}}\n\n test:\n workspace:\n host: {{template `databricks_staging_workspace_host` .}}" >> "$PROJECT_NAME_ALPHA/databricks.yml" + echo -e " {{ .input_staging_catalog_name }}:\n workspace:\n host: {{template `databricks_staging_workspace_host` .}}\n\n {{ .input_prod_catalog_name }}:\n workspace:\n host: {{template `databricks_prod_workspace_host` .}}\n\n {{ .input_test_catalog_name }}:\n workspace:\n host: {{template `databricks_staging_workspace_host` .}}" >> "$PROJECT_NAME_ALPHA/databricks.yml" - name: Initialize Bundle id: initialize run: | @@ -61,13 +65,14 @@ jobs: run: | git config --global user.name "Deploy CICD Bot" git config --global user.email "noreply-cicd-bot@databricks.com" - git checkout -b add-cicd-for-{{`${{ github.event.inputs.project_name }}`}} + git checkout -b add-cicd-for-{{`${{ github.event.inputs.project_name }}`}}-{{`${{ env.timestamp }}`}} git add .github "$PROJECT_NAME_ALPHA/databricks.yml" git commit -m "Add CICD for {{`${{ github.event.inputs.project_name }}`}}" - git push origin add-cicd-for-{{`${{ github.event.inputs.project_name }}`}} + git push origin add-cicd-for-{{`${{ github.event.inputs.project_name }}`}}-{{`${{ env.timestamp }}`}} + - name: Create Pull Request id: pr env: GITHUB_TOKEN: {{`${{ github.token }}`}} run: | - gh pr create --base {{ .input_default_branch }} --head add-cicd-for-{{`${{ github.event.inputs.project_name }}`}} --title "Deploy CICD for {{`${{ github.event.inputs.project_name }}`}}" --body "This PR was generated by the Deploy CICD workflow." + gh pr create --base {{ .input_default_branch }} --head add-cicd-for-{{`${{ github.event.inputs.project_name }}`}}-{{`${{ env.timestamp }}`}} --title "Deploy CICD for {{`${{ github.event.inputs.project_name }}`}}" --body "This PR was generated by the Deploy CICD workflow." --reviewer {{`${{ github.actor }}`}} diff --git a/template/{{.input_root_dir}}/.github/workflows/{{.input_project_name}}-bundle-cd-prod.yml.tmpl b/template/{{.input_root_dir}}/.github/workflows/{{.input_project_name}}-bundle-cd-prod.yml.tmpl index a5072d40..10f007fd 100644 --- a/template/{{.input_root_dir}}/.github/workflows/{{.input_project_name}}-bundle-cd-prod.yml.tmpl +++ b/template/{{.input_root_dir}}/.github/workflows/{{.input_project_name}}-bundle-cd-prod.yml.tmpl @@ -33,8 +33,8 @@ jobs: - name: Validate Bundle For Prod id: validate run: | - databricks bundle validate -t prod + databricks bundle validate -t {{ .input_prod_catalog_name }} - name: Deploy Bundle to Prod id: deploy run: | - databricks bundle deploy -t prod + databricks bundle deploy -t {{ .input_prod_catalog_name }} diff --git a/template/{{.input_root_dir}}/.github/workflows/{{.input_project_name}}-bundle-cd-staging.yml.tmpl b/template/{{.input_root_dir}}/.github/workflows/{{.input_project_name}}-bundle-cd-staging.yml.tmpl index e84a9384..442af3d1 100644 --- a/template/{{.input_root_dir}}/.github/workflows/{{.input_project_name}}-bundle-cd-staging.yml.tmpl +++ b/template/{{.input_root_dir}}/.github/workflows/{{.input_project_name}}-bundle-cd-staging.yml.tmpl @@ -33,8 +33,8 @@ jobs: - name: Validate Bundle For Staging id: validate run: | - databricks bundle validate -t staging + databricks bundle validate -t {{ .input_staging_catalog_name }} - name: Deploy Bundle to Staging id: deploy run: | - databricks bundle deploy -t staging + databricks bundle deploy -t {{ .input_staging_catalog_name }} diff --git a/template/{{.input_root_dir}}/.github/workflows/{{.input_project_name}}-bundle-ci.yml.tmpl b/template/{{.input_root_dir}}/.github/workflows/{{.input_project_name}}-bundle-ci.yml.tmpl index 67b78399..49d65a55 100644 --- a/template/{{.input_root_dir}}/.github/workflows/{{.input_project_name}}-bundle-ci.yml.tmpl +++ b/template/{{.input_root_dir}}/.github/workflows/{{.input_project_name}}-bundle-ci.yml.tmpl @@ -46,7 +46,7 @@ jobs: ARM_CLIENT_SECRET: {{`${{ env.STAGING_ARM_CLIENT_SECRET }}`}} {{- end }} run: | - databricks bundle validate -t staging > ../validate_output.txt + databricks bundle validate -t {{ .input_staging_catalog_name }} > ../validate_output.txt - name: Create Comment with Bundle Configuration uses: actions/github-script@v6 id: comment @@ -90,7 +90,7 @@ jobs: ARM_CLIENT_SECRET: {{`${{ env.PROD_ARM_CLIENT_SECRET }}`}} {{- end }} run: | - databricks bundle validate -t prod > ../validate_output.txt + databricks bundle validate -t {{ .input_prod_catalog_name }} > ../validate_output.txt - name: Create Comment with Bundle Configuration uses: actions/github-script@v6 id: comment diff --git a/template/{{.input_root_dir}}/.github/workflows/{{.input_project_name}}-run-tests.yml.tmpl b/template/{{.input_root_dir}}/.github/workflows/{{.input_project_name}}-run-tests.yml.tmpl index ca807d72..df5c1bb6 100644 --- a/template/{{.input_root_dir}}/.github/workflows/{{.input_project_name}}-run-tests.yml.tmpl +++ b/template/{{.input_root_dir}}/.github/workflows/{{.input_project_name}}-run-tests.yml.tmpl @@ -4,6 +4,7 @@ on: pull_request: paths: - '{{template `project_name_alphanumeric_underscore` .}}/**' + - '.github/workflows/{{ .input_project_name }}-run-tests.yml' defaults: run: @@ -54,18 +55,18 @@ jobs: - name: Validate Bundle For Test Deployment Target in Staging Workspace id: validate run: | - databricks bundle validate -t test + databricks bundle validate -t {{ .input_test_catalog_name }} - name: Deploy Bundle to Test Deployment Target in Staging Workspace id: deploy run: | - databricks bundle deploy -t test + databricks bundle deploy -t {{ .input_test_catalog_name }} {{- if (eq .input_include_feature_store `yes`) }} - name: Run Feature Engineering Workflow for Test Deployment Target in Staging Workspace id: feature_engineering run: | - databricks bundle run write_feature_table_job -t test + databricks bundle run write_feature_table_job -t {{ .input_test_catalog_name }} {{- end }} - name: Run Training Workflow for Test Deployment Target in Staging Workspace id: training run: | - databricks bundle run model_training_job -t test + databricks bundle run model_training_job -t {{ .input_test_catalog_name }} diff --git a/template/{{.input_root_dir}}/README.md.tmpl b/template/{{.input_root_dir}}/README.md.tmpl index bbc0de6d..dbabde09 100644 --- a/template/{{.input_root_dir}}/README.md.tmpl +++ b/template/{{.input_root_dir}}/README.md.tmpl @@ -178,8 +178,8 @@ This stack comes with a workflow to set up CI/CD for projects that can be found {{ end }} To set up CI/CD for projects that were created through MLOps Stacks with the `Project_Only` parameter, -run the abovementioned workflow, specifying the `project_name` as a parameter. This workflow assumes that -all steps in the [MLOps Setup Guide](./docs/mlops-setup.md) have been completed. For example, for the monorepo case: +run the above mentioned workflow, specifying the `project_name` as a parameter. For example, for the monorepo case: + 1. Setup your repository by initializing MLOps Stacks via Databricks CLI with the `CICD_and_Project` or `CICD_Only` parameter. 2. Follow the [MLOps Setup Guide](./docs/mlops-setup.md) to setup authentication and get the repo ready for CI/CD. 3. Create a new project by initializing MLOps Stacks again but this time with the `Project_Only` parameter. diff --git a/template/{{.input_root_dir}}/cicd.tar.gz b/template/{{.input_root_dir}}/cicd.tar.gz index b95fe607..efb58236 100644 Binary files a/template/{{.input_root_dir}}/cicd.tar.gz and b/template/{{.input_root_dir}}/cicd.tar.gz differ diff --git a/template/{{.input_root_dir}}/cicd/databricks_template_schema.json b/template/{{.input_root_dir}}/cicd/databricks_template_schema.json index b0a066c2..13b5ea58 100644 --- a/template/{{.input_root_dir}}/cicd/databricks_template_schema.json +++ b/template/{{.input_root_dir}}/cicd/databricks_template_schema.json @@ -36,13 +36,31 @@ "description": "CLI Version", "type": "string" }, - "input_project_name": { + "input_test_catalog_name": { "order": 8, + "type": "string", + "description": "\nName of the Test Unity Catalog", + "default": "test" + }, + "input_staging_catalog_name": { + "order": 9, + "type": "string", + "description": "\nName of the Staging Unity Catalog", + "default": "staging" + }, + "input_prod_catalog_name": { + "order": 10, + "type": "string", + "description": "\nName of the Prod Unity Catalog", + "default": "prod" + }, + "input_project_name": { + "order": 11, "description": "Project Name", "type": "string" }, "input_include_feature_store": { - "order": 9, + "order": 12, "description": "Use Feature Store (yes) or not (no)", "type": "string" } diff --git a/template/{{.input_root_dir}}/cicd_params.json.tmpl b/template/{{.input_root_dir}}/cicd_params.json.tmpl index 8358e6b1..e87cf132 100644 --- a/template/{{.input_root_dir}}/cicd_params.json.tmpl +++ b/template/{{.input_root_dir}}/cicd_params.json.tmpl @@ -5,5 +5,8 @@ "input_default_branch": "{{ .input_default_branch }}", "input_release_branch": "{{ .input_release_branch }}", "cloud_specific_node_type_id": "{{template `cli_version` .}}", - "input_cli_version": "{{template `cli_version` .}}" + "input_cli_version": "{{template `cli_version` .}}", + "test_catalog_name": "{{ .input_test_catalog_name }}", + "staging_catalog_name": "{{ .input_staging_catalog_name }}", + "prod_catalog_name": "{{ .input_prod_catalog_name }}" } diff --git a/template/{{.input_root_dir}}/docs/ml-pull-request.md.tmpl b/template/{{.input_root_dir}}/docs/ml-pull-request.md.tmpl index 3345a25d..0c895058 100644 --- a/template/{{.input_root_dir}}/docs/ml-pull-request.md.tmpl +++ b/template/{{.input_root_dir}}/docs/ml-pull-request.md.tmpl @@ -78,7 +78,9 @@ After merging your pull request, subsequent runs of the model training and batch jobs in staging and production will automatically use your updated ML code. {{- end }} -You can track the state of the ML pipelines for the current project from the MLflow registered model UI. Links: +You can track the state of the ML pipelines for the current project from the MLflow registered model UI. +{{ if (eq .input_setup_cicd_and_project `CICD_and_Project`) }} +Links: {{ if (eq .input_include_models_in_unity_catalog `no`) }} * [Staging workspace registered model]({{template `databricks_staging_workspace_host` .}}/ml/models/staging-{{template `model_name` .}}) * [Prod workspace registered model]({{template `databricks_prod_workspace_host` .}}/ml/models/prod-{{template `model_name` .}}) @@ -86,6 +88,7 @@ You can track the state of the ML pipelines for the current project from the MLf * [Staging model in UC]({{template `databricks_staging_workspace_host` .}}/explore/data/models/staging/{{.input_project_name}}/{{template `model_name` .}}) * [Prod model in UC]({{template `databricks_prod_workspace_host` .}}/explore/data/models/prod/{{.input_project_name}}/{{template `model_name` .}}) {{end}}. +{{end}} In both the staging and prod workspaces, the MLflow registered model contains links to: * The model versions produced through automated retraining diff --git a/template/{{.input_root_dir}}/docs/mlops-setup.md.tmpl b/template/{{.input_root_dir}}/docs/mlops-setup.md.tmpl index 8d298801..0b98461b 100644 --- a/template/{{.input_root_dir}}/docs/mlops-setup.md.tmpl +++ b/template/{{.input_root_dir}}/docs/mlops-setup.md.tmpl @@ -84,7 +84,7 @@ If the created project uses **Unity Catalog**, we expect a catalog to exist with For example, if the deployment target is dev, we expect a catalog named dev to exist in the workspace. If you want to use different catalog names, please update the target names declared in the {{- if (eq .input_setup_cicd_and_project `CICD_and_Project`)}}[{{ .input_project_name }}/databricks.yml](../{{template `project_name_alphanumeric_underscore` .}}/databricks.yml) -{{- else }} `databricks.yml` {{ end }} file. +{{- else }} `databricks.yml` {{ end }} file. If changing the staging, prod, or test deployment targets, you'll also need to update the workflows located in the .github/workflows directory. The SP must have proper permission in each respective environment and the catalog for the environments. @@ -113,8 +113,10 @@ to add the secrets to GitHub: - `STAGING_WORKSPACE_TOKEN` : service principal token for staging workspace - `PROD_WORKSPACE_TOKEN` : service principal token for prod workspace - `WORKFLOW_TOKEN` : [Github token](https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/managing-your-personal-access-tokens#creating-a-personal-access-token-classic) with workflow permissions. This secret is needed for the Deploy CI/CD Workflow. -Be sure to update the [Workflow Permissions](https://docs.github.com/en/actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token) section under Repo Settings > Actions > General to allow `Read and write permissions`, -and to allow workflows to be able to open pull requests (PRs). + +Next, be sure to update the [Workflow Permissions](https://docs.github.com/en/actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token) section under Repo Settings > Actions > General: +- Allow `Read and write permissions`, +- Allow workflows to be able to open pull requests (PRs). {{ end }} {{ if and (eq .input_cicd_platform `github_actions`) (eq .input_cloud `azure`) }} @@ -138,6 +140,9 @@ After setting up authentication for CI/CD, you can now set up CI/CD workflows. W This workflow is manually triggered with `project_name` as parameter. This workflow will need to be triggered for each project to set up its set of CI/CD workflows that can be used to deploy ML resources and run ML jobs in the staging and prod workspaces. These workflows will be defined under `.github/workflows`. +If you want to deploy CI/CD for an initialized project (`Project-Only` MLOps Stacks initialization), you can manually run the `deploy-cicd.yml` workflow from the [Github Actions UI](https://docs.github.com/en/actions/using-workflows/manually-running-a-workflow?tool=webui) once the project code has been added to your main repo. +The workflow will create a pull request with all the changes against your {{ .input_default_branch }} branch. Review and approve it to commit the files to deploy CI/CD for the project. + {{ else if (eq .input_cicd_platform `azure_devops`) -}} ## Configure CI/CD - Azure DevOps diff --git a/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/README.md.tmpl b/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/README.md.tmpl index 84d3bc5c..a021af38 100644 --- a/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/README.md.tmpl +++ b/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/README.md.tmpl @@ -237,10 +237,16 @@ i.e. for each environment You can run unit tests for your ML code via `pytest tests`. {{ end }} + ## Next Steps -When you're satisfied with initial ML experimentation (e.g. validated that a model with reasonable performance can be -trained on your dataset) and ready to deploy production training/inference -pipelines, ask your ops team to set up CI/CD for the current ML project if they haven't already. CI/CD can be set up as part of the -MLOps Stacks initialization even if it was skipped in this case, or this project can be added to a repo setup with CI/CD already, -following the directions under "Setting up CI/CD" in the repo root directory README. + +When you're satisfied with initial ML experimentation (e.g. validated that a model with reasonable performance can be trained on your dataset) and ready to deploy production training/inference pipelines, ask your ops team to set up CI/CD for the current ML project if they haven't already. CI/CD can be set up as part of the + +MLOps Stacks initialization even if it was skipped in this case, or this project can be added to a repo setup with CI/CD already, following the directions under "Setting up CI/CD" in the repo root directory README. + +To add CI/CD to this repo: + 1. Run `databricks bundle init mlops-stacks` via the Databricks CLI + 2. Select the option to only initialize `CICD_Only` + 3. Provide the root directory of this project and answer the subsequent prompts + More details can be found on the homepage [MLOps Stacks README](https://github.com/databricks/mlops-stacks/blob/main/README.md). diff --git a/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/databricks.yml.tmpl b/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/databricks.yml.tmpl index b43035d2..d6a827ac 100644 --- a/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/databricks.yml.tmpl +++ b/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/databricks.yml.tmpl @@ -27,15 +27,15 @@ targets: {{ if (eq .input_setup_cicd_and_project `CICD_and_Project`)}} - staging: + {{ .input_staging_catalog_name }}: workspace: host: {{template `databricks_staging_workspace_host` .}} - prod: + {{ .input_prod_catalog_name }}: workspace: host: {{template `databricks_prod_workspace_host` .}} - test: + {{ .input_test_catalog_name }}: workspace: host: {{template `databricks_staging_workspace_host` .}} {{ end }}