diff --git a/.gitignore b/.gitignore index ce1b8b47..9aa63598 100644 --- a/.gitignore +++ b/.gitignore @@ -11,4 +11,4 @@ __pycache__/ .cache *.pyc -mlops-stack.iml +mlops-stacks.iml diff --git a/Pipeline.md b/Pipeline.md index cb87a306..7c84360e 100644 --- a/Pipeline.md +++ b/Pipeline.md @@ -1,5 +1,5 @@ # ML Pipeline Structure and Devloop -The default stack contains an ML pipeline with CI/CD workflows to test and deploy +MLOps Stacks contains an ML pipeline with CI/CD workflows to test and deploy automated model training and batch inference jobs across your dev, staging, and prod Databricks workspaces. diff --git a/README.md b/README.md index 066ed792..7ea03351 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,6 @@ -# Databricks MLOps Stack +# Databricks MLOps Stacks -> **_NOTE:_** This feature is in [private preview](https://docs.databricks.com/release-notes/release-types.html). The interface/APIs may change and no formal support is available during the preview. However, you can still create new production-grade ML projects using the stack. -If interested in trying it out, please fill out this [form](https://docs.google.com/forms/d/e/1FAIpQLSfHXCmkbsEURjQQvtUGObgh2D5q1eD4YRHnUxZ0M4Hu0W63WA/viewform), and you’ll be contacted by a Databricks representative. +> **_NOTE:_** This feature is in [public preview](https://docs.databricks.com/release-notes/release-types.html). This repo provides a customizable stack for starting new ML projects on Databricks that follow production best-practices out of the box. @@ -19,11 +18,11 @@ Your organization can use the default stack as is or customize it as needed, e.g adapt individual components to fit your organization's best practices. See the [stack customization guide](stack-customization.md) for more details. -Using Databricks MLOps stack, data scientists can quickly get started iterating on ML code for new projects while ops engineers set up CI/CD and ML service state -management, with an easy transition to production. You can also use MLOps stack as a building block +Using Databricks MLOps Stacks, data scientists can quickly get started iterating on ML code for new projects while ops engineers set up CI/CD and ML service state +management, with an easy transition to production. You can also use MLOps Stacks as a building block in automation for creating new data science projects with production-grade CI/CD pre-configured. -![MLOps Stack diagram](doc-images/mlops-stack.png) +![MLOps Stacks diagram](doc-images/mlops-stacks.png) See the [FAQ](#FAQ) for questions on common use cases. @@ -31,13 +30,13 @@ See the [FAQ](#FAQ) for questions on common use cases. [See this page](Pipeline.md) for detailed description and diagrams of the ML pipeline structure defined in the default stack. -## Using this stack +## Using MLOps Stacks ### Prerequisites - Python 3.8+ - - [Databricks CLI](https://docs.databricks.com/en/dev-tools/cli/databricks-cli.html) >= v0.204.0 + - [Databricks CLI](https://docs.databricks.com/en/dev-tools/cli/databricks-cli.html) >= v0.208.1 -[Databricks CLI](https://docs.databricks.com/en/dev-tools/cli/databricks-cli.html) v0.204.0 contains [Databricks asset bundle templates](https://docs.databricks.com/en/dev-tools/bundles/templates.html) for the purpose of project creation. +[Databricks CLI](https://docs.databricks.com/en/dev-tools/cli/databricks-cli.html) v0.208.1 contains [Databricks asset bundle templates](https://docs.databricks.com/en/dev-tools/bundles/templates.html) for the purpose of project creation. Please follow [the instruction](https://docs.databricks.com/en/dev-tools/cli/databricks-cli-ref.html#install-the-cli) to install and set up databricks CLI. Releases of databricks CLI can be found in the [releases section](https://github.com/databricks/cli/releases) of databricks/cli repository. @@ -47,7 +46,7 @@ Please follow [the instruction](https://docs.databricks.com/en/dev-tools/cli/dat To create a new project, run: - databricks bundle init https://github.com/databricks/mlops-stack + databricks bundle init mlops-stacks This will prompt for parameters for project initialization. Some of these parameters are required to get started: * ``input_project_name``: name of the current project @@ -78,42 +77,41 @@ See the generated ``README.md`` for next steps! ## FAQ -### Do I need separate dev/staging/prod workspaces to use this stack? +### Do I need separate dev/staging/prod workspaces to use MLOps Stacks? We recommend using separate dev/staging/prod Databricks workspaces for stronger isolation between environments. For example, Databricks REST API rate limits are applied per-workspace, so if using [Databricks Model Serving](https://docs.databricks.com/applications/mlflow/model-serving.html), using separate workspaces can help prevent high load in staging from DOSing your production model serving endpoints. -However, you can run the stack against just a single workspace, against a dev and -staging/prod workspace, etc. Just supply the same workspace URL for +However, you can create a single workspace stack, by supplying the same workspace URL for `input_databricks_staging_workspace_host` and `input_databricks_prod_workspace_host`. If you go this route, we recommend using different service principals to manage staging vs prod resources, to ensure that CI workloads run in staging cannot interfere with production resources. -### I have an existing ML project. Can I productionize it using this stack? -Yes. Currently, you can instantiate a new project from the stack and copy relevant components -into your existing project to productionize it. The stack is modularized, so +### I have an existing ML project. Can I productionize it using MLOps Stacks? +Yes. Currently, you can instantiate a new project and copy relevant components +into your existing project to productionize it. MLOps Stacks is modularized, so you can e.g. copy just the GitHub Actions workflows under `.github` or ML resource configs under ``{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/resources`` -and ``{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/bundle.yml`` into your existing project. +and ``{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/databricks.yml`` into your existing project. -### Can I adopt individual components of the stack? -For this use case, we recommend instantiating the full stack via [Databricks asset bundle templates](https://docs.databricks.com/en/dev-tools/bundles/templates.html) -and copying the relevant stack subdirectories. For example, all ML resource configs +### Can I adopt individual components of MLOps Stacks? +For this use case, we recommend instantiating via [Databricks asset bundle templates](https://docs.databricks.com/en/dev-tools/bundles/templates.html) +and copying the relevant subdirectories. For example, all ML resource configs are defined under ``{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/resources`` -and ``{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/bundle.yml``, while CI/CD is defined e.g. under `.github` +and ``{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/databricks.yml``, while CI/CD is defined e.g. under `.github` if using GitHub Actions, or under `.azure` if using Azure DevOps. -### Can I customize this stack? +### Can I customize my MLOps Stack? Yes. We provide the default stack in this repo as a production-friendly starting point for MLOps. However, in many cases you may need to customize the stack to match your organization's best practices. See [the stack customization guide](stack-customization.md) for details on how to do this. -### Does the MLOps stack cover data (ETL) pipelines? +### Does the MLOps Stacks cover data (ETL) pipelines? -Since MLOps Stack is based on [databricks CLI bundles](https://docs.databricks.com/dev-tools/cli/bundle-commands.html), +Since MLOps Stacks is based on [databricks CLI bundles](https://docs.databricks.com/dev-tools/cli/bundle-commands.html), it's not limited only to ML workflows and assets - it works for assets across the Databricks Lakehouse. For instance, while the existing ML code samples contain feature engineering, training, model validation, deployment and batch inference workflows, you can use it for Delta Live Tables pipelines as well. @@ -127,7 +125,7 @@ Please provide feedback (bug reports, feature requests, etc) via GitHub issues. We welcome community contributions. For substantial changes, we ask that you first file a GitHub issue to facilitate discussion, before opening a pull request. -This stack is implemented as a [Databricks asset bundle template](https://docs.databricks.com/en/dev-tools/bundles/templates.html) +MLOps Stacks is implemented as a [Databricks asset bundle template](https://docs.databricks.com/en/dev-tools/bundles/templates.html) that generates new projects given user-supplied parameters. Parametrized project code can be found under the `{{.input_root_dir}}` directory. @@ -164,10 +162,10 @@ Run integration tests only: pytest tests --large-only ``` -### Previewing stack changes -When making changes to the stack, it can be convenient to see how those changes affect -an actual new ML project created from the stack. To do this, you can create an example -project from your local checkout of the stack, and inspect its contents/run tests within +### Previewing changes +When making changes to MLOps Stacks, it can be convenient to see how those changes affect +a generated new ML project. To do this, you can create an example +project from your local checkout of the repo, and inspect its contents/run tests within the project. We provide example project configs for Azure (using both GitHub and Azure DevOps) and AWS (using GitHub) under `tests/example-project-configs`. @@ -175,14 +173,14 @@ To create an example Azure project, using Azure DevOps as the CI/CD platform, ru of the example project: ``` -# Note: update MLOPS_STACK_PATH to the path to your local checkout of the stack -MLOPS_STACK_PATH=~/mlops-stack -databricks bundle init "$MLOPS_STACK_PATH" --config-file "$MLOPS_STACK_PATH/tests/example-project-configs/azure/azure-devops.json" +# Note: update MLOPS_STACKS_PATH to the path to your local checkout of the MLOps Stacks repo +MLOPS_STACKS_PATH=~/mlops-stacks +databricks bundle init "$MLOPS_STACKS_PATH" --config-file "$MLOPS_STACKS_PATH/tests/example-project-configs/azure/azure-devops.json" ``` To create an example AWS project, using GitHub Actions for CI/CD, run: ``` -# Note: update MLOPS_STACK_PATH to the path to your local checkout of the stack -MLOPS_STACK_PATH=~/mlops-stack -databricks bundle init "$MLOPS_STACK_PATH" --config-file "$MLOPS_STACK_PATH/tests/example-project-configs/aws/aws-github.json" +# Note: update MLOPS_STACKS_PATH to the path to your local checkout of the MLOps Stacks repo +MLOPS_STACKS_PATH=~/mlops-stacks +databricks bundle init "$MLOPS_STACKS_PATH" --config-file "$MLOPS_STACKS_PATH/tests/example-project-configs/aws/aws-github.json" ``` diff --git a/databricks_template_schema.json b/databricks_template_schema.json index 3e5d7b0b..70d86ff6 100644 --- a/databricks_template_schema.json +++ b/databricks_template_schema.json @@ -4,7 +4,7 @@ "order": 1, "type": "string", "default": "my-mlops-project", - "description": "Welcome to MLOps Stack. For detailed information on project generation, see the README at https://github.com/databricks/mlops-stack/blob/main/README.md. \n\nProject Name" + "description": "Welcome to MLOps Stacks. For detailed information on project generation, see the README at https://github.com/databricks/mlops-stacks/blob/main/README.md. \n\nProject Name" }, "input_root_dir": { "order": 2, @@ -63,8 +63,8 @@ "input_schema_name": { "order": 11, "type": "string", - "description": "\nName of schema to use when registering a model in Unity Catalog. \nNote that this schema must already exist. Default", - "default": "schema_name" + "description": "\nName of schema to use when registering a model in Unity Catalog. \nNote that this schema must already exist, and we recommend keeping the name the same as the project name. Default", + "default": "my-mlops-project" }, "input_unity_catalog_read_user_group": { "order": 12, @@ -84,5 +84,6 @@ "description": "\nWhether to include MLflow Recipes. \nChoose from no, yes", "default": "no" } - } + }, + "success_message" : "\n✨ Your MLOps Stack has been created in the '{{.input_project_name}}' directory!\n\nPlease refer to the README.md of your project for further instructions on getting started." } \ No newline at end of file diff --git a/doc-images/mlops-stack.png b/doc-images/mlops-stack.png deleted file mode 100644 index cd296a86..00000000 Binary files a/doc-images/mlops-stack.png and /dev/null differ diff --git a/doc-images/mlops-stacks.png b/doc-images/mlops-stacks.png new file mode 100644 index 00000000..0659783e Binary files /dev/null and b/doc-images/mlops-stacks.png differ diff --git a/stack-customization.md b/stack-customization.md index be40571b..43131ba7 100644 --- a/stack-customization.md +++ b/stack-customization.md @@ -1,9 +1,9 @@ -# Stack Customization Guide -We provide the default stack in this repo as a production-friendly starting point for MLOps. +# MLOps Stacks Customization Guide +We provide the default MLOps Stack in this repo as a production-friendly starting point for MLOps. For generic enhancements not specific to your organization (e.g. add support for a new CI/CD provider), we encourage you to consider contributing the -change back to the default stack, so that the community can help maintain and enhance it. +change back to the MLOps Stacks repo, so that the community can help maintain and enhance it. However, in many cases you may need to customize the stack, for example if: * You have different Databricks workspace environments (e.g. a "test" workspace for CI, in addition to dev/staging/prod) @@ -19,20 +19,20 @@ default stack. Before getting started, we encourage you to read the [contributor guide](README.md#contributing) to learn how to make, preview, and test changes to your custom stack. -### Fork the default stack repo -Fork the default stack repo. You may want to create a private fork if you're tailoring +### Fork the MLOps Stacks repo +Fork the MLOps Stacks repo. You may want to create a private fork if you're tailoring the stack to the specific needs of your organization, or a public fork if you're creating a generic new stack. -### (optional) Set up CI for your new stack -Tests for the default stack are defined under the `tests/` directory and are +### (optional) Set up CI +Tests for MLOps Stacks are defined under the `tests/` directory and are executed in CI by Github Actions workflows defined under `.github/`. We encourage you to configure -CI in your own stack repo to ensure the stack continues to work as you make changes. +CI in your own MLOps Stacks repo to ensure it continues to work as you make changes. If you use GitHub Actions for CI, the provided workflows should work out of the box. Otherwise, you'll need to translate the workflows under `.github/` to the CI provider of your choice. -### Update stack parameters +### Update MLOps Stacks parameters Update parameters in your fork as needed in `databricks_template_schema.json` and update corresponding template variable in `library/template_variables.tmpl`. Pruning the set of parameters makes it easier for data scientists to start new projects, at the cost of reduced flexibility. @@ -41,16 +41,15 @@ For example, you may have a fixed set of staging & prod Databricks workspaces (o also run all of your ML pipelines on a single cloud, in which case the `input_cloud` parameter is unnecessary. The easiest way to prune parameters and replace them with hardcoded values is to follow -the [contributor guide](README.md#previewing-stack-changes) to generate an example project with -parameters substituted-in, and then copy the generated project contents back into your stack. +the [contributor guide](README.md#previewing-changes) to generate an example project with +parameters substituted-in, and then copy the generated project contents back into your MLOps Stacks repo. ## Customize individual components ### Example ML code -The default stack provides example ML code using [MLflow recipes](https://mlflow.org/docs/latest/recipes.html#). +MLOps Stacks provides example ML code. You may want to customize the example code, e.g. further prune it down into a skeleton for data scientists -to fill out, or remove and replace the use of MLflow Recipes if you expect data scientists to work on problem -types that are currently unsupported by MLflow Recipes. +to fill out. If you customize this component, you can still use the CI/CD and ML resource components to build production ML pipelines, as long as you provide ML notebooks with the expected interface. For example, model training under ``template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/training/notebooks/`` and inference under @@ -60,14 +59,13 @@ You may also want to update developer-facing docs under `template/{{.input_root_ or `template/{{.input_root_dir}}/docs/ml-developer-guide-fs.md`, which will be read by users of your stack. ### CI/CD workflows -The default stack currently has the following sub-components for CI/CD: +MLOps Stacks currently has the following sub-components for CI/CD: * CI/CD workflow logic defined under `template/{{.input_root_dir}}/.github/` for testing and deploying ML code and models -* Automated scripts and docs for setting up CI/CD under `template/{{.input_root_dir}}/.mlops-setup-scripts/` * Logic to trigger model deployment through REST API calls to your CD system, when model training completes. - This logic is currently captured in ``template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/deployment/model_deployment/notebooks/TriggerModelDeploy.py`` + This logic is currently captured in ``template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/deployment/model_deployment/notebooks/ModelDeployment.py`` ### ML resource configs -Root ML resource config file can be found as ``{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/bundle.yml``. +Root ML resource config file can be found as ``{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/databricks.yml``. It defines the ML config resources to be included and workspace host for each deployment target. ML resource configs (databricks CLI bundles code definitions of ML jobs, experiments, models etc) can be found under @@ -80,7 +78,7 @@ When updating this component, you may want to update developer-facing docs in ``template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/resources/README.md``. ### Docs -After making stack customizations, make any changes needed to -the stack docs under `template/{{.input_root_dir}}/docs` and in the main README -(`template/{{.input_root_dir}}/README.md`) to reflect any updates you've made to the stack. -For example, you may want to include a link to your custom stack in `template/{{.input_root_dir}}/README.md`. +After making customizations, make any changes needed to +the docs under `template/{{.input_root_dir}}/docs` and in the main README +(`template/{{.input_root_dir}}/README.md`) to reflect any updates you've made to the MLOps Stacks repo. +For example, you may want to include a link to your custom MLOps Stacks repo in `template/{{.input_root_dir}}/README.md`. diff --git a/template/update_layout.tmpl b/template/update_layout.tmpl index 857539f2..4dcf5548 100644 --- a/template/update_layout.tmpl +++ b/template/update_layout.tmpl @@ -61,6 +61,11 @@ {{ skip (printf `%s/%s` $root_dir `docs/ml-developer-guide-fs.md`) }} {{ end }} +# Remove utils if using Models in Unity Catalog +{{ if (eq .input_include_models_in_unity_catalog `yes`) }} + {{ skip (printf `%s/%s/%s` $root_dir $project_name_alphanumeric_underscore `utils.py`) }} +{{ end }} + # Remove template files {{ skip `update_layout` }} {{ skip `run_validations` }} diff --git a/template/{{.input_root_dir}}/.azure/devops-pipelines/{{.input_project_name}}-bundle-cicd.yml.tmpl b/template/{{.input_root_dir}}/.azure/devops-pipelines/{{.input_project_name}}-bundle-cicd.yml.tmpl index 3b873cd0..407313a8 100644 --- a/template/{{.input_root_dir}}/.azure/devops-pipelines/{{.input_project_name}}-bundle-cicd.yml.tmpl +++ b/template/{{.input_root_dir}}/.azure/devops-pipelines/{{.input_project_name}}-bundle-cicd.yml.tmpl @@ -1,6 +1,6 @@ # This Azure Pipeline validates and deploys bundle config (ML resource config and more) -# defined under {{template `project_name_alphanumeric_underscore` .}}/databricks-resource/* -# and {{template `project_name_alphanumeric_underscore` .}}/bundle.yml. +# defined under {{template `project_name_alphanumeric_underscore` .}}/resources/* +# and {{template `project_name_alphanumeric_underscore` .}}/databricks.yml. # The bundle is validated (CI) upon making a PR against the {{template `default_branch` .}} branch. # Bundle resources defined for staging are deployed when a PR is merged into the {{template `default_branch` .}} branch. # Bundle resources defined for prod are deployed when a PR is merged into the {{template `release_branch` .}} branch. diff --git a/template/{{.input_root_dir}}/.azure/devops-pipelines/{{.input_project_name}}-tests-ci.yml.tmpl b/template/{{.input_root_dir}}/.azure/devops-pipelines/{{.input_project_name}}-tests-ci.yml.tmpl index e7e51c67..073ece3e 100644 --- a/template/{{.input_root_dir}}/.azure/devops-pipelines/{{.input_project_name}}-tests-ci.yml.tmpl +++ b/template/{{.input_root_dir}}/.azure/devops-pipelines/{{.input_project_name}}-tests-ci.yml.tmpl @@ -2,8 +2,8 @@ # This pipeline is triggered upon making a PR against the {{template `default_branch` .}} branch. # Unit tests are defined under {{template `project_name_alphanumeric_underscore` .}}/tests # and are executed on the Azure Pipelines agent. -# The integration test deploys and runs the model_training_job defined in {{template `project_name_alphanumeric_underscore` .}}/databricks-resource/model-workflow-resource.yml -# This integration test is run in the staging workspace, as defined under {{template `project_name_alphanumeric_underscore` .}}/bundle.yml +# The integration test deploys and runs the model_training_job defined in {{template `project_name_alphanumeric_underscore` .}}/resources/model-workflow-resource.yml +# This integration test is run in the staging workspace, as defined under {{template `project_name_alphanumeric_underscore` .}}/databricks.yml trigger: branches: diff --git a/template/{{.input_root_dir}}/.github/workflows/{{.input_project_name}}-bundle-cd-prod.yml.tmpl b/template/{{.input_root_dir}}/.github/workflows/{{.input_project_name}}-bundle-cd-prod.yml.tmpl index 9e56783c..db11d009 100644 --- a/template/{{.input_root_dir}}/.github/workflows/{{.input_project_name}}-bundle-cd-prod.yml.tmpl +++ b/template/{{.input_root_dir}}/.github/workflows/{{.input_project_name}}-bundle-cd-prod.yml.tmpl @@ -1,6 +1,6 @@ # This GitHub workflow deploys Bundle resources (ML resource config and more) # defined under {{template `project_name_alphanumeric_underscore` .}}/resources/* -# and {{template `project_name_alphanumeric_underscore` .}}/bundle.yml with prod deployment target configs, +# and {{template `project_name_alphanumeric_underscore` .}}/databricks.yml with prod deployment target configs, # when PRs are merged into the release branch name: Bundle Deployment for {{template `project_name` .}} Prod diff --git a/template/{{.input_root_dir}}/.github/workflows/{{.input_project_name}}-bundle-cd-staging.yml.tmpl b/template/{{.input_root_dir}}/.github/workflows/{{.input_project_name}}-bundle-cd-staging.yml.tmpl index ee9f55dd..d9029393 100644 --- a/template/{{.input_root_dir}}/.github/workflows/{{.input_project_name}}-bundle-cd-staging.yml.tmpl +++ b/template/{{.input_root_dir}}/.github/workflows/{{.input_project_name}}-bundle-cd-staging.yml.tmpl @@ -1,6 +1,6 @@ # This GitHub workflow deploys Bundle resources (ML resource config and more) # defined under {{template `project_name_alphanumeric_underscore` .}}/resources/* -# and {{template `project_name_alphanumeric_underscore` .}}/bundle.yml with staging deployment target configs, +# and {{template `project_name_alphanumeric_underscore` .}}/databricks.yml with staging deployment target configs, # when PRs are merged into the default branch name: Bundle Deployment for {{template `project_name` .}} Staging diff --git a/template/{{.input_root_dir}}/.github/workflows/{{.input_project_name}}-bundle-ci.yml.tmpl b/template/{{.input_root_dir}}/.github/workflows/{{.input_project_name}}-bundle-ci.yml.tmpl index 247cb6ea..7cb70fd7 100644 --- a/template/{{.input_root_dir}}/.github/workflows/{{.input_project_name}}-bundle-ci.yml.tmpl +++ b/template/{{.input_root_dir}}/.github/workflows/{{.input_project_name}}-bundle-ci.yml.tmpl @@ -1,6 +1,6 @@ # This GitHub workflow validates Bundle config (ML resource config and more) # defined under {{template `project_name_alphanumeric_underscore` .}}/resources/* -# and {{template `project_name_alphanumeric_underscore` .}}/bundle.yml, when PRs are merged into the main branch +# and {{template `project_name_alphanumeric_underscore` .}}/databricks.yml, when PRs are merged into the main branch name: Bundle validation for {{template `project_name` .}} on: diff --git a/template/{{.input_root_dir}}/README.md.tmpl b/template/{{.input_root_dir}}/README.md.tmpl index 8dfdeef2..8973b753 100644 --- a/template/{{.input_root_dir}}/README.md.tmpl +++ b/template/{{.input_root_dir}}/README.md.tmpl @@ -1,7 +1,7 @@ # {{ template `root_dir` .}} This directory contains an ML project based on the default -[Databricks MLOps Stack](https://github.com/databricks/mlops-stack), +[Databricks MLOps Stacks](https://github.com/databricks/mlops-stacks), defining a production-grade ML pipeline for automated retraining and batch inference of an ML model on tabular data. See the [Project overview](docs/project-overview.md) for details on the ML pipeline and code structure @@ -38,7 +38,7 @@ or pipeline resources (e.g. use a larger instance type for model training) via p It's possible to use the repo as a monorepo that contains multiple projects. All projects share the same workspaces and service principals. For example, assuming there's existing repo with root directory name `monorepo_root_dir` and project name `project1` -1. Create another project from cookiecutter with project name `project2` and root directory name `project2`. +1. Create another project from `databricks bundle init` with project name `project2` and root directory name `project2`. 2. Copy the internal directory `project2/project2` to root directory of existing repo `monorepo_root_dir/project2`. {{ if or (eq .input_cicd_platform `github_actions`) (eq .input_cicd_platform `github_actions_for_github_enterprise_servers`) -}} 3. Copy yaml files from `project2/.github/workflows/` to `monorepo_root_dir/.github/workflows/` and make sure there's no name conflicts. diff --git a/template/{{.input_root_dir}}/docs/mlops-setup.md.tmpl b/template/{{.input_root_dir}}/docs/mlops-setup.md.tmpl index 76672f98..8153418a 100644 --- a/template/{{.input_root_dir}}/docs/mlops-setup.md.tmpl +++ b/template/{{.input_root_dir}}/docs/mlops-setup.md.tmpl @@ -105,7 +105,7 @@ Two Azure DevOps Pipelines are defined under `.azure/devops-pipelines`: - **[CI]** Performs unit and integration tests
- Triggered on PR to main - **`{{template `project_name` .}}-bundle-cicd.yml`**:
- - **[CI]** Performs validation of Databricks resources defined under `{{template `project_name_alphanumeric_underscore` .}}/databricks-resource`
+ - **[CI]** Performs validation of Databricks resources defined under `{{template `project_name_alphanumeric_underscore` .}}/resources`
- Triggered on PR to main
- **[CD]** Deploys Databricks resources to the staging workspace
- Triggered on merging into main
diff --git a/template/{{.input_root_dir}}/docs/project-overview.md.tmpl b/template/{{.input_root_dir}}/docs/project-overview.md.tmpl index 8360d414..2cf9f105 100644 --- a/template/{{.input_root_dir}}/docs/project-overview.md.tmpl +++ b/template/{{.input_root_dir}}/docs/project-overview.md.tmpl @@ -6,10 +6,10 @@ This project defines an ML pipeline for automated retraining and batch inference of an ML model on tabular data. -See the full pipeline structure below. The [stack README](https://github.com/databricks/mlops-stack/blob/main/Pipeline.md) +See the full pipeline structure below. The [MLOps Stacks README](https://github.com/databricks/mlops-stacks/blob/main/Pipeline.md) contains additional details on how ML pipelines are tested and deployed across each of the dev, staging, prod environments below. -![MLOps Stack diagram](images/mlops-stack-summary.png) +![MLOps Stacks diagram](images/mlops-stack-summary.png) ## Code structure @@ -30,7 +30,7 @@ contained in the following files: │ │ │ ├── requirements.txt <- Specifies Python dependencies for ML code (for example: model training, batch inference). │ │ -│ ├── bundle.yml <- bundle.yml is the root ML resource config file for the ML project that can be loaded by databricks CLI bundles. It defines the bundle name, workspace URL and resource config component to be included. +│ ├── databricks.yml <- databricks.yml is the root ML resource config file for the ML project that can be loaded by databricks CLI bundles. It defines the bundle name, workspace URL and resource config component to be included. │ │ {{ if and (eq .input_include_feature_store `no`) (eq .input_include_mlflow_recipes `no`) -}} │ ├── training <- Training folder contains Notebook that trains and registers the model. diff --git a/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/deployment/batch_inference/README.md.tmpl b/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/deployment/batch_inference/README.md.tmpl index 1ad43f18..ef973eed 100644 --- a/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/deployment/batch_inference/README.md.tmpl +++ b/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/deployment/batch_inference/README.md.tmpl @@ -47,7 +47,7 @@ df = spark.table( ).drop("fare_amount") df.write.mode("overwrite").saveAsTable( - {{ if (eq .input_include_models_in_unity_catalog `yes`) }}name="hive_metastore.default.taxi_scoring_sample" + {{ if (eq .input_include_models_in_unity_catalog `no`) }}name="hive_metastore.default.taxi_scoring_sample" {{- else -}}name=".{{template `schema_name` .}}.feature_store_inference_input"{{ end }} ) ``` diff --git a/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/deployment/batch_inference/notebooks/BatchInference.py.tmpl b/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/deployment/batch_inference/notebooks/BatchInference.py.tmpl index 79e599db..522b9bfb 100644 --- a/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/deployment/batch_inference/notebooks/BatchInference.py.tmpl +++ b/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/deployment/batch_inference/notebooks/BatchInference.py.tmpl @@ -34,7 +34,7 @@ dbutils.widgets.text( {{else}} # Unity Catalog registered model name to use for the trained mode. dbutils.widgets.text( - "model_name", "dev.{{template `schema_name` .}}.{{template `model_name` .}}", label="Model Name" + "model_name", "dev.{{template `schema_name` .}}.{{template `model_name` .}}", label="Full (Three-Level) Model Name" ){{end}} # COMMAND ---------- @@ -65,8 +65,7 @@ sys.path.append("../..") # DBTITLE 1,Define input and output variables {{- if (eq .input_include_models_in_unity_catalog "no") }} -from utils import get_deployed_model_stage_for_env{{else}} -from utils import get_deployed_model_alias_for_env{{end}} +from utils import get_deployed_model_stage_for_env{{end}} env = dbutils.widgets.get("env") input_table_name = dbutils.widgets.get("input_table_name") @@ -78,7 +77,7 @@ assert model_name != "", "model_name notebook parameter must be specified" {{- if (eq .input_include_models_in_unity_catalog "no") }} stage = get_deployed_model_stage_for_env(env) model_uri = f"models:/{model_name}/{stage}"{{else}} -alias = get_deployed_model_alias_for_env(env) +alias = "Champion" model_uri = f"models:/{model_name}@{alias}"{{end}} # COMMAND ---------- diff --git a/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/deployment/model_deployment/deploy.py.tmpl b/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/deployment/model_deployment/deploy.py.tmpl index 0806a094..d06c960a 100644 --- a/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/deployment/model_deployment/deploy.py.tmpl +++ b/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/deployment/model_deployment/deploy.py.tmpl @@ -2,8 +2,7 @@ import sys import pathlib sys.path.append(str(pathlib.Path(__file__).parent.parent.parent.resolve())) -{{if (eq .input_include_models_in_unity_catalog "no")}}from utils import get_deployed_model_stage_for_env{{else}} -from utils import get_deployed_model_alias_for_env{{end}} +{{if (eq .input_include_models_in_unity_catalog "no")}}from utils import get_deployed_model_stage_for_env{{end}} from mlflow.tracking import MlflowClient {{ if (eq .input_include_models_in_unity_catalog "no") }} @@ -42,7 +41,7 @@ def deploy(model_uri, env): _, model_name, version = model_uri.split("/") client = MlflowClient(registry_uri="databricks-uc") mv = client.get_model_version(model_name, version) - target_alias = get_deployed_model_alias_for_env(env) + target_alias = "Champion" if target_alias not in mv.aliases: client.set_registered_model_alias( name=model_name, @@ -50,8 +49,8 @@ def deploy(model_uri, env): version=version) print(f"Assigned alias '{target_alias}' to model version {model_uri}.") - # remove "challenger" alias if assigning "Champion" alias - if target_alias == "Champion": + # remove "Challenger" alias if assigning "Champion" alias + if target_alias == "Champion" and "Challenger" in mv.aliases: print(f"Removing 'Challenger' alias from model version {model_uri}.") client.delete_registered_model_alias( name=model_name, diff --git a/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/resources/README.md.tmpl b/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/resources/README.md.tmpl index c08b3a10..f0a111a1 100644 --- a/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/resources/README.md.tmpl +++ b/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/resources/README.md.tmpl @@ -12,7 +12,7 @@ ## Intro ### databricks CLI bundles -MLOps-stack ML resources are configured and deployed through [databricks CLI bundles]({{ template `generate_doc_link` (map (pair "cloud" .input_cloud) (pair "path" "dev-tools/cli/bundle-cli.html")) }}). +MLOps Stacks ML resources are configured and deployed through [databricks CLI bundles]({{ template `generate_doc_link` (map (pair "cloud" .input_cloud) (pair "path" "dev-tools/cli/bundle-cli.html")) }}). The bundle setting file must be expressed in YAML format and must contain at minimum the top-level bundle mapping. The databricks CLI bundles top level is defined by file `{{template `project_name_alphanumeric_underscore` .}}/databricks.yml`. @@ -32,6 +32,16 @@ Deployment configs of different deployment targets share the general ML resource This project ships with CI/CD workflows for developing and deploying ML resource configurations based on deployment config. +{{- if (eq .input_include_models_in_unity_catalog "yes") }} + +NOTE: For Model Registry in Unity Catalog, we expect a catalog to exist with the name of the deployment target by default. For example, if the deployment target is `dev`, we expect a catalog named `dev` to exist in the workspace. +If you want to use different catalog names, please update the `targets` declared in the `{{template `project_name_alphanumeric_underscore` .}}/databricks.yml` and `{{template `project_name_alphanumeric_underscore` .}}/resources/ml-artifacts-resource.yml` files. +If changing the `staging`, `prod`, or `test` deployment targets, you'll need to update the +{{- if or (eq .input_cicd_platform `github_actions`) (eq .input_cicd_platform `github_actions_for_github_enterprise_servers`) }} workflows located in the `.github/workflows` directory. +{{- else if (eq .input_cicd_platform `azure_devops`) }} pipelines located in the `azure-pipelines` directory.{{- end }} +{{- end }} + + | Deployment Target | Description | Databricks Workspace | Model Name | Experiment Name | |-------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------------------|-------------------------------------|------------------------------------------------| | dev | The `dev` deployment target is used by ML engineers to deploy ML resources to development workspace with `dev` configs. The config is for ML project development purposes. | dev workspace | dev-{{template `model_name` .}} | /dev-{{template `experiment_base_name` .}} | @@ -69,7 +79,7 @@ Alternatively, you can use the other approaches described in the [databricks CLI ### Validate and provision ML resource configurations 1. After installing the databricks CLI and creating the `DATABRICKS_TOKEN` env variable, change to the `{{template `project_name_alphanumeric_underscore` .}}` directory. 2. Run `databricks bundle validate` to validate the Databricks resource configurations. -3. Run `databricks bundle deploy` to provision the Databricks resource configurations to the dev workspace. The resource configurations and your ML code will be copied together to the dev workspace. The defined resources such as Databricks Workflows, MLflow Model and MLflow Experiment will be provisioned according to the config files under `{{template `project_name_alphanumeric_underscore` .}}/databricks-resource`. +3. Run `databricks bundle deploy` to provision the Databricks resource configurations to the dev workspace. The resource configurations and your ML code will be copied together to the dev workspace. The defined resources such as Databricks Workflows, MLflow Model and MLflow Experiment will be provisioned according to the config files under `{{template `project_name_alphanumeric_underscore` .}}/resources`. 4. Go to the Databricks dev workspace, check the defined model, experiment and workflows status, and interact with the created workflows. ### Destroy ML resource configurations @@ -98,7 +108,7 @@ Follow the next section to configure the input and output data tables for the ba ### Setting up the batch inference job The batch inference job expects an input Delta table with a schema that your registered model accepts. To use the batch inference job, set up such a Delta table in both your staging and prod workspaces. -Following this, update the batch_inference_job base parameters in `{{template `project_name_alphanumeric_underscore` .}}/databricks-resource/batch-inference-workflow-resource.yml` to pass +Following this, update the batch_inference_job base parameters in `{{template `project_name_alphanumeric_underscore` .}}/resources/batch-inference-workflow-resource.yml` to pass the name of the input Delta table and the name of the output Delta table to which to write batch predictions. As the batch job will be run with the credentials of the service principal that provisioned it, make sure that the service @@ -109,7 +119,7 @@ principal corresponding to a particular environment has permissions to read the * `MODIFY` permission for the output table if it pre-dates your job. ### Setting up model validation -The model validation stack focuses on building a plug-and-play stack component for continuous deployment (CD) of models +The model validation workflow focuses on building a plug-and-play stack component for continuous deployment (CD) of models in staging and prod. Its central purpose is to evaluate a registered model and validate its quality before deploying the model to Production/Staging. @@ -133,7 +143,7 @@ new_cluster: &new_cluster spark_version: 13.3.x-cpu-ml-scala2.12 node_type_id: {{template `cloud_specific_node_type_id` .}} custom_tags: - clusterSource: mlops-stack/0.1 + clusterSource: mlops-stack/0.2 resources: jobs: @@ -188,7 +198,7 @@ new_cluster: &new_cluster spark_version: 13.3.x-cpu-ml-scala2.12 node_type_id: {{template `cloud_specific_node_type_id` .}} custom_tags: - clusterSource: mlops-stack/0.1 + clusterSource: mlops-stack/0.2 resources: jobs: diff --git a/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/resources/batch-inference-workflow-resource.yml.tmpl b/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/resources/batch-inference-workflow-resource.yml.tmpl index f4e325a3..1c1ff737 100644 --- a/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/resources/batch-inference-workflow-resource.yml.tmpl +++ b/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/resources/batch-inference-workflow-resource.yml.tmpl @@ -4,7 +4,7 @@ new_cluster: &new_cluster spark_version: 13.3.x-cpu-ml-scala2.12 node_type_id: {{template `cloud_specific_node_type_id` .}} custom_tags: - clusterSource: mlops-stack/0.1 + clusterSource: mlops-stack/0.2 permissions: &permissions permissions: diff --git a/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/resources/feature-engineering-workflow-resource.yml.tmpl b/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/resources/feature-engineering-workflow-resource.yml.tmpl index 63450ad3..e23ec9d3 100644 --- a/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/resources/feature-engineering-workflow-resource.yml.tmpl +++ b/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/resources/feature-engineering-workflow-resource.yml.tmpl @@ -4,7 +4,7 @@ new_cluster: &new_cluster spark_version: 13.3.x-cpu-ml-scala2.12 node_type_id: {{template `cloud_specific_node_type_id` .}} custom_tags: - clusterSource: mlops-stack/0.1 + clusterSource: mlops-stack/0.2 permissions: &permissions permissions: diff --git a/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/resources/ml-artifacts-resource.yml.tmpl b/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/resources/ml-artifacts-resource.yml.tmpl index 63c02878..470825f2 100644 --- a/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/resources/ml-artifacts-resource.yml.tmpl +++ b/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/resources/ml-artifacts-resource.yml.tmpl @@ -7,8 +7,8 @@ targets: description: MLflow registered model for the "{{template `project_name` .}}" ML Project for ${bundle.target} deployment target. {{- else -}} registered_models: - model: - comment: Registered model in Unity Catalog for the "{{template `project_name` .}}" ML Project for ${bundle.target} deployment target.{{end}} + model: + comment: Registered model in Unity Catalog for the "{{template `project_name` .}}" ML Project for ${bundle.target} deployment target.{{end}} test: resources: @@ -17,8 +17,8 @@ targets: description: MLflow registered model for the "{{template `project_name` .}}" ML Project for ${bundle.target} deployment target. {{- else -}} registered_models: - model: - comment: Registered model in Unity Catalog for the "{{template `project_name` .}}" ML Project for ${bundle.target} deployment target.{{end}} + model: + comment: Registered model in Unity Catalog for the "{{template `project_name` .}}" ML Project for ${bundle.target} deployment target.{{end}} staging: resources: @@ -27,8 +27,8 @@ targets: description: MLflow registered model for the "{{template `project_name` .}}" ML Project for ${bundle.target} deployment target. {{- else -}} registered_models: - model: - comment: Registered model in Unity Catalog for the "{{template `project_name` .}}" ML Project for ${bundle.target} deployment target.{{end}} + model: + comment: Registered model in Unity Catalog for the "{{template `project_name` .}}" ML Project for ${bundle.target} deployment target.{{end}} prod: resources: @@ -38,19 +38,17 @@ targets: MLflow registered model for the "{{template `project_name` .}}" ML Project. See the corresponding [Git repo]($#{var.git_repo_url}) for details on the project. Links: - * [Git Repo]($#{var.git_repo_url}): contains ML code for the current project. * [Recurring model training job]({{template `databricks_prod_workspace_host` .}}#job/${resources.jobs.model_training_job.id}): trains fresh model versions using the latest ML code. * [Recurring batch inference job]({{template `databricks_prod_workspace_host` .}}#job/${resources.jobs.batch_inference_job.id}): applies the latest ${bundle.target} model version for batch inference. {{- else -}} registered_models: - model: - comment: | - Registered model in Unity Catalog for the "mlops-stack-models-uc" ML Project. See the corresponding [Git repo]($#{var.git_repo_url}) for details on the project. - - Links: - * [Git Repo]($#{var.git_repo_url}): contains ML code for the current project. - * [Recurring model training job]({{template `databricks_prod_workspace_host` .}}#job/${resources.jobs.model_training_job.id}): trains fresh model versions using the latest ML code. - * [Recurring batch inference job]({{template `databricks_prod_workspace_host` .}}#job/${resources.jobs.batch_inference_job.id}): applies the latest ${bundle.target} model version for batch inference. + model: + comment: | + Registered model in Unity Catalog for the "{{template `project_name` .}}" ML Project. See the corresponding [Git repo]($#{var.git_repo_url}) for details on the project. + + Links: + * [Recurring model training job]({{template `databricks_prod_workspace_host` .}}#job/${resources.jobs.model_training_job.id}): trains fresh model versions using the latest ML code. + * [Recurring batch inference job]({{template `databricks_prod_workspace_host` .}}#job/${resources.jobs.batch_inference_job.id}): applies the latest ${bundle.target} model version for batch inference. {{ end }} # Allow users to read the experiment {{ if (eq .input_include_models_in_unity_catalog `no`) }}and the model{{end}} diff --git a/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/resources/model-workflow-resource.yml.tmpl b/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/resources/model-workflow-resource.yml.tmpl index f69e5afa..8f86f208 100644 --- a/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/resources/model-workflow-resource.yml.tmpl +++ b/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/resources/model-workflow-resource.yml.tmpl @@ -4,7 +4,7 @@ new_cluster: &new_cluster spark_version: 13.3.x-cpu-ml-scala2.12 node_type_id: {{template `cloud_specific_node_type_id` .}} custom_tags: - clusterSource: mlops-stack/0.1 + clusterSource: mlops-stack/0.2 permissions: &permissions permissions: diff --git a/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/training/notebooks/Train.py.tmpl b/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/training/notebooks/Train.py.tmpl index 8933947a..16e7e6ab 100644 --- a/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/training/notebooks/Train.py.tmpl +++ b/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/training/notebooks/Train.py.tmpl @@ -71,7 +71,7 @@ dbutils.widgets.text( {{else}} # Unity Catalog registered model name to use for the trained model. dbutils.widgets.text( - "model_name", "dev.{{template `schema_name` .}}.{{template `model_name` .}}", label="Model Name" + "model_name", "dev.{{template `schema_name` .}}.{{template `model_name` .}}", label="Full (Three-Level) Model Name" ) {{end -}} @@ -89,6 +89,7 @@ model_name = dbutils.widgets.get("model_name") import mlflow mlflow.set_experiment(experiment_name) + {{- if (eq .input_include_models_in_unity_catalog "yes") }} mlflow.set_registry_uri('databricks-uc') diff --git a/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/training/notebooks/TrainWithFeatureStore.py.tmpl b/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/training/notebooks/TrainWithFeatureStore.py.tmpl index 80685618..e066076b 100644 --- a/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/training/notebooks/TrainWithFeatureStore.py.tmpl +++ b/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/training/notebooks/TrainWithFeatureStore.py.tmpl @@ -71,7 +71,7 @@ dbutils.widgets.text( {{else}} # Unity Catalog registered model name to use for the trained mode. dbutils.widgets.text( - "model_name", "dev.{{template `schema_name` .}}.{{template `model_name` .}}", label="Model Name" + "model_name", "dev.{{template `schema_name` .}}.{{template `model_name` .}}", label="Full (Three-Level) Model Name" ) {{end -}} diff --git a/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/utils.py.tmpl b/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/utils.py.tmpl index 6ce4e3b2..a261a525 100644 --- a/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/utils.py.tmpl +++ b/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/utils.py.tmpl @@ -1,9 +1,8 @@ """This module contains utils shared between different notebooks""" -{{- if (eq .input_include_models_in_unity_catalog "no") }} - def get_deployed_model_stage_for_env(env): - """Get the model version stage under which the latest deployed model version can be found + """ + Get the model version stage under which the latest deployed model version can be found for the current environment :param env: Current environment :return: Model version stage @@ -20,24 +19,3 @@ def get_deployed_model_stage_for_env(env): "test": "Production", } return _MODEL_STAGE_FOR_ENV[env] -{{else}} - - -def get_deployed_model_alias_for_env(env): - """Get the registered model alias under which the latest deployed model version can be found - for the current environment - :param env: Current environment - :return: Model alias - """ - # For a model registered in Unity Catalog to be served, it needs have either a "Champion" or "Challenger" alias. - # ({{ template `generate_doc_link` (map (pair "cloud" .input_cloud) (pair "path" "applications/machine-learning/manage-model-lifecycle/index.html#transition-a-model-stage")) }}). - # For models in dev and staging environments, we assign the model version the "Challenger" alias, and in prod we assign the model version - # the "Champion" alias. - _MODEL_STAGE_FOR_ENV = { - "dev": "Challenger", - "staging": "Challenger", - "prod": "Champion", - "test": "Challenger", - } - return _MODEL_STAGE_FOR_ENV[env] -{{end -}} diff --git a/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/validation/notebooks/ModelValidation.py.tmpl b/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/validation/notebooks/ModelValidation.py.tmpl index b286b5a6..5d6e4f67 100644 --- a/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/validation/notebooks/ModelValidation.py.tmpl +++ b/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/validation/notebooks/ModelValidation.py.tmpl @@ -3,7 +3,7 @@ # Model Validation Notebook ## # This notebook uses mlflow model validation API to run mode validation after training and registering a model -# in model registry, before deploying it to Production stage. +# in model registry, before deploying it to the {{- if (eq .input_include_models_in_unity_catalog "no") }}"Production" stage{{else}} "Champion" alias{{end -}}. # # It runs as part of CD and by an automated model training job -> validation -> deployment job defined under ``{{template `project_name_alphanumeric_underscore` .}}/resources/model-workflow-resource.yml`` # @@ -14,8 +14,8 @@ # * `run_mode` - The `run_mode` defines whether model validation is enabled or not. It can be one of the three values: # * `disabled` : Do not run the model validation notebook. # * `dry_run` : Run the model validation notebook. Ignore failed model validation rules and proceed to move -# model to Production stage. -# * `enabled` : Run the model validation notebook. Move model to Production stage only if all model validation +# model to the {{- if (eq .input_include_models_in_unity_catalog "no") }}"Production" stage{{else}} "Champion" alias{{end -}}. +# * `enabled` : Run the model validation notebook. Move model to the {{- if (eq .input_include_models_in_unity_catalog "no") }} "Production" stage {{else}} "Champion" alias {{end -}} only if all model validation # rules are passing. {{- if (eq .input_include_models_in_unity_catalog "no") }} # * enable_baseline_comparison - Whether to load the current registered "Production" stage model as baseline. @@ -84,7 +84,7 @@ dbutils.widgets.text("evaluator_config_loader_function", "evaluator_config", "Ev {{- if (eq .input_include_models_in_unity_catalog "no") }} dbutils.widgets.text("model_name", "dev-{{template `model_name` .}}", "Model Name") {{else}} -dbutils.widgets.text("model_name", "dev.{{template `schema_name` .}}.{{template `model_name` .}}", "Model Name") +dbutils.widgets.text("model_name", "dev.{{template `schema_name` .}}.{{template `model_name` .}}", "Full (Three-Level) Model Name") {{end -}} dbutils.widgets.text("model_version", "", "Candidate Model Version") @@ -92,7 +92,7 @@ dbutils.widgets.text("model_version", "", "Candidate Model Version") {{ if (eq .input_include_feature_store `yes`) }} print( "Currently model validation is not supported for models registered with feature store. Please refer to " - "issue https://github.com/databricks/mlops-stack/issues/70 for more details." + "issue https://github.com/databricks/mlops-stacks/issues/70 for more details." ) dbutils.notebook.exit(0){{ end }} run_mode = dbutils.widgets.get("run_mode").lower() diff --git a/tests/install.sh b/tests/install.sh index 9bc145fa..2d4201df 100755 --- a/tests/install.sh +++ b/tests/install.sh @@ -4,7 +4,7 @@ # Usage in the wild uses the "curl | sh" approach and we need that to continue working. set -e -VERSION="0.204.0" +VERSION="0.208.1" FILE="databricks_cli_$VERSION" # Include operating system in file name. diff --git a/tests/test_create_project.py b/tests/test_create_project.py index d9e4667e..72a45398 100644 --- a/tests/test_create_project.py +++ b/tests/test_create_project.py @@ -89,7 +89,7 @@ def test_no_template_strings_after_param_substitution(generated_project_dir): def test_no_databricks_workspace_urls(): - # Test that there are no accidental hardcoded Databricks workspace URLs included in stack source files + # Test that there are no accidental hardcoded Databricks workspace URLs included in source files template_dir = pathlib.Path(__file__).parent.parent / "template" test_paths = [os.path.join(template_dir, path) for path in paths(template_dir)] assert_no_disallowed_strings_in_files( @@ -165,7 +165,7 @@ def test_generate_project_with_default_values( include_models_in_unity_catalog, ): """ - Asserts the default parameter values for the stack. The project name and experiment + Asserts the default parameter values. The project name and experiment parent directory are excluded from this test as they covered in other tests. If this test fails due to an update of the default values, please do the following checks: - The default param value constants in this test are up to date. @@ -204,7 +204,7 @@ def test_generate_project_check_delta_output( include_models_in_unity_catalog, ): """ - Asserts the behavior of Delta Table-related artifacts when generating Stack. + Asserts the behavior of Delta Table-related artifacts when generating MLOps Stacks. """ context = { "input_project_name": TEST_PROJECT_NAME, @@ -241,7 +241,7 @@ def test_generate_project_check_feature_store_output( include_models_in_unity_catalog, ): """ - Asserts the behavior of feature store-related artifacts when generating Stack. + Asserts the behavior of feature store-related artifacts when generating MLOps Stacks. """ context = { "input_project_name": TEST_PROJECT_NAME, @@ -278,7 +278,7 @@ def test_generate_project_check_recipe_output( include_models_in_unity_catalog, ): """ - Asserts the behavior of MLflow Recipes-related artifacts when generating Stack. + Asserts the behavior of MLflow Recipes-related artifacts when generating MLOps Stacks. """ context = { "input_project_name": TEST_PROJECT_NAME,