diff --git a/website/docs/docs/build/jinja-macros.md b/website/docs/docs/build/jinja-macros.md index 538a3a5e4c6..44bc85872f5 100644 --- a/website/docs/docs/build/jinja-macros.md +++ b/website/docs/docs/build/jinja-macros.md @@ -126,7 +126,7 @@ from app_data.payments ### Using a macro from a package -A number of useful macros have also been grouped together into [packages](docs/build/packages) — our most popular package is [dbt-utils](https://hub.getdbt.com/dbt-labs/dbt_utils/latest/). +A number of useful macros have also been grouped together into [packages](/docs/build/packages) — our most popular package is [dbt-utils](https://hub.getdbt.com/dbt-labs/dbt_utils/latest/). After installing a package into your project, you can use any of the macros in your own project — make sure you qualify the macro by prefixing it with the [package name](/reference/dbt-jinja-functions/project_name): diff --git a/website/docs/docs/build/metrics-overview.md b/website/docs/docs/build/metrics-overview.md index fd84c96e0ec..4a926589de2 100644 --- a/website/docs/docs/build/metrics-overview.md +++ b/website/docs/docs/build/metrics-overview.md @@ -98,7 +98,7 @@ metrics: ### Ratio metrics -[Ratio metrics](/docs/build/ratio) involve a numerator measure and a denominator measure. A `constraint` string can be applied, to both numerator and denominator, or applied separately to the numerator or denominator. +[Ratio metrics](/docs/build/ratio) involve a numerator metric and a denominator metric. A `constraint` string can be applied, to both numerator and denominator, or applied separately to the numerator or denominator. ```yaml # Ratio Metric @@ -106,25 +106,25 @@ metrics: - name: cancellation_rate owners: - support@getdbt.com -# Ratio metrics create a ratio out of two measures. -# Define the measures from the semantic model as numerator or denominator +# Ratio metrics create a ratio out of two metrics. +# Define the metrics from the semantic manifest as numerator or denominator type: ratio type_params: - numerator: cancellations_usd - denominator: transaction_amount_usd + numerator: cancellations + denominator: transaction_amount filter: | # add optional constraint string. This applies to both the numerator and denominator {{ Dimension('customer__country') }} = 'MX' - name: enterprise_cancellation_rate owners: - support@getdbt.com # Ratio metrics create a ratio out of two measures. - # Define the measures from the semantic model as numerator or denominator + # Define the metrics from the semantic model as numerator or denominator type: ratio type_params: numerator: - name: cancellations_usd + name: cancellations filter: {{ Dimension('company__tier' )}} = 'enterprise' # constraint only applies to the numerator - denominator: transaction_amount_usd + denominator: transaction_amount filter: | # add optional constraint string. This applies to both the numerator and denominator {{ Dimension('customer__country') }} = 'MX' ``` @@ -142,9 +142,9 @@ metrics: - name: cancellations type: simple type_params: - measure: cancellations_usd # Specify the measure you are creating a proxy for. - filter: | - {{ Dimension('order__value')}} > 100 and {{Dimension('user__acquisition')}} + measure: cancellations_usd # Specify the measure you are creating a proxy for. + filter: | + {{ Dimension('order__value')}} > 100 and {{Dimension('user__acquisition')}} ``` ## Filters diff --git a/website/docs/docs/build/python-models.md b/website/docs/docs/build/python-models.md index 5b9222ad1c5..12825648501 100644 --- a/website/docs/docs/build/python-models.md +++ b/website/docs/docs/build/python-models.md @@ -146,7 +146,7 @@ with upstream_python_model as ( :::caution -Referencing [ephemeral](docs/build/materializations#ephemeral) models is currently not supported (see [feature request](https://github.com/dbt-labs/dbt-core/issues/7288)) +Referencing [ephemeral](/docs/build/materializations#ephemeral) models is currently not supported (see [feature request](https://github.com/dbt-labs/dbt-core/issues/7288)) ::: ## Configuring Python models diff --git a/website/docs/docs/build/ratio-metrics.md b/website/docs/docs/build/ratio-metrics.md index e206671d423..db108bd37fb 100644 --- a/website/docs/docs/build/ratio-metrics.md +++ b/website/docs/docs/build/ratio-metrics.md @@ -6,7 +6,7 @@ sidebar_label: Ratio tags: [Metrics, Semantic Layer] --- -Ratio allows you to create a ratio between two measures. You simply specify a numerator and a denominator measure. Additionally, you can apply a dimensional filter to both the numerator and denominator using a constraint string when computing the metric. +Ratio allows you to create a ratio between two metrics. You simply specify a numerator and a denominator metric. Additionally, you can apply a dimensional filter to both the numerator and denominator using a constraint string when computing the metric. The following displays the full spec for ratio metrics, along with an example: @@ -17,12 +17,14 @@ metrics: type: ratio # Required label: The value that will be displayed in downstream tools #Required type_params: # Required - numerator: the measure used for the numerator # Required - filter: filter for the numerator# Optional - alias: alias for the numerator # Optional - denominator: the measure used for the denominator # Required - filter: filter for the denominator # Optional - alias: alias for the denominator # Optional + numerator: the name of the metric used for the numerator, or a struct of properties as below # Required + name: name of metric used for the numerator # Required + filter: filter for the numerator# Optional + alias: alias for the numerator # Optional + denominator: the name of the metric used for the denominator, or a struct of properties as below # Required + name: name of metric used for the denominator # Required + filter: filter for the denominator # Optional + alias: alias for the denominator # Optional ``` ## Ratio metrics example @@ -30,13 +32,13 @@ metrics: ```yaml # Ratio Metric metrics: - - name: food_order_total_pct - description: "The food order total as the % of the total order" - label: Food Order Total % + - name: food_order_pct + description: "The food order count as a ratio of the total order count" + label: Food Order Ratio type: ratio type_params: - numerator: food_order_total - denominator: order_total + numerator: food_orders + denominator: orders ``` ## Ratio metrics using different semantic models @@ -90,7 +92,7 @@ on ## Add filter -Users can define constraints on input measures for a metric by applying a filter directly to the measure, like so: +Users can define constraints on input metrics for a ratio metric by applying a filter directly to the input metric, like so: ```yaml metrics: @@ -108,4 +110,4 @@ metrics: name: distinct_purchasers ``` -Note the `filter` and `alias` parameters for the measure referenced in the numerator. Use the `filter` parameter to apply a filter to the measure it's attached to. The `alias` parameter is used to avoid naming conflicts in the rendered SQL queries when the same measure is used with different filters. If there are no naming conflicts, the `alias` parameter can be left out. +Note the `filter` and `alias` parameters for the metric referenced in the numerator. Use the `filter` parameter to apply a filter to the metric it's attached to. The `alias` parameter is used to avoid naming conflicts in the rendered SQL queries when the same metric is used with different filters. If there are no naming conflicts, the `alias` parameter can be left out. diff --git a/website/docs/docs/cloud/git/connect-github.md b/website/docs/docs/cloud/git/connect-github.md index d5ead96d940..5d27012195d 100644 --- a/website/docs/docs/cloud/git/connect-github.md +++ b/website/docs/docs/cloud/git/connect-github.md @@ -56,7 +56,7 @@ If you are your GitHub organization owner, you can also configure the dbt Cloud ## Personally authenticate with GitHub -Once the dbt Cloud admin has [set up a connection](docs/cloud/git/connect-github#installing-dbt-cloud-in-your-github-account) to your organization GitHub account, you need to personally authenticate, which improves the security of dbt Cloud by enabling you to log in using OAuth through GitHub. +Once the dbt Cloud admin has [set up a connection](/docs/cloud/git/connect-github#installing-dbt-cloud-in-your-github-account) to your organization GitHub account, you need to personally authenticate, which improves the security of dbt Cloud by enabling you to log in using OAuth through GitHub. :::infoGitHub profile connection - dbt Cloud developers on the [Enterprise plan](https://www.getdbt.com/pricing/) must each connect their GitHub profiles to dbt Cloud. This is because the dbt Cloud IDE verifies every developer's read / write access for the dbt repo. diff --git a/website/docs/docs/collaborate/govern/model-contracts.md b/website/docs/docs/collaborate/govern/model-contracts.md index 97667996194..339098adbdc 100644 --- a/website/docs/docs/collaborate/govern/model-contracts.md +++ b/website/docs/docs/collaborate/govern/model-contracts.md @@ -98,7 +98,7 @@ Any model meeting the criteria described above _can_ define a contract. We recom A model's contract defines the **shape** of the returned dataset. If the model's logic or input data doesn't conform to that shape, the model does not build. -[Tests](docs/build/tests) are a more flexible mechanism for validating the content of your model _after_ it's built. So long as you can write the query, you can run the test. Tests are more configurable, such as with [custom severity thresholds](/reference/resource-configs/severity). They are easier to debug after finding failures, because you can query the already-built model, or [store the failing records in the data warehouse](/reference/resource-configs/store_failures). +[Tests](/docs/build/tests) are a more flexible mechanism for validating the content of your model _after_ it's built. So long as you can write the query, you can run the test. Tests are more configurable, such as with [custom severity thresholds](/reference/resource-configs/severity). They are easier to debug after finding failures, because you can query the already-built model, or [store the failing records in the data warehouse](/reference/resource-configs/store_failures). In some cases, you can replace a test with its equivalent constraint. This has the advantage of guaranteeing the validation at build time, and it probably requires less compute (cost) in your data platform. The prerequisites for replacing a test with a constraint are: - Making sure that your data platform can support and enforce the constraint that you need. Most platforms only enforce `not_null`. diff --git a/website/docs/docs/dbt-cloud-apis/sl-jdbc.md b/website/docs/docs/dbt-cloud-apis/sl-jdbc.md index 46a696e695e..21b5f4f921a 100644 --- a/website/docs/docs/dbt-cloud-apis/sl-jdbc.md +++ b/website/docs/docs/dbt-cloud-apis/sl-jdbc.md @@ -278,3 +278,7 @@ semantic_layer.query(metrics=['food_order_amount', 'order_gross_profit'], - **What is the default output when adding granularity?**
The default output follows the format `{time_dimension_name}__{granularity_level}`. So for example, if the time dimension name is `ds` and the granularity level is yearly, the output is `ds__year`. +## Related docs + +- [dbt Semantic Layer integration best practices](/guides/dbt-ecosystem/sl-partner-integration-guide) + diff --git a/website/docs/docs/deploy/ci-jobs.md b/website/docs/docs/deploy/ci-jobs.md index 08fbd5f4225..a1f12d19f84 100644 --- a/website/docs/docs/deploy/ci-jobs.md +++ b/website/docs/docs/deploy/ci-jobs.md @@ -24,7 +24,7 @@ If you're interested in joining our beta, please fill out our Google Form to [si ## Set up CI jobs {#set-up-ci-jobs} -dbt Labs recommends that you create your CI job in a dedicated dbt Cloud [deployment environment](/docs/deploy/deploy-environments#create-a-deployment-environment) that's connected to a staging database. Having a separate environment dedicated for CI will provide better isolation between your temporary CI schema builds and your production data builds. Additionally, sometimes teams need their CI jobs to be triggered when a PR is made to a branch other than main. If your team maintains a staging branch as part of your release process, having a separate environment will allow you to set a [custom branch](/faqs/environments/custom-branch-settings) and, accordingly, the CI job in that dedicated environment will be triggered only when PRs are made to the specified custom branch. +dbt Labs recommends that you create your CI job in a dedicated dbt Cloud [deployment environment](/docs/deploy/deploy-environments#create-a-deployment-environment) that's connected to a staging database. Having a separate environment dedicated for CI will provide better isolation between your temporary CI schema builds and your production data builds. Additionally, sometimes teams need their CI jobs to be triggered when a PR is made to a branch other than main. If your team maintains a staging branch as part of your release process, having a separate environment will allow you to set a [custom branch](/faqs/environments/custom-branch-settings) and, accordingly, the CI job in that dedicated environment will be triggered only when PRs are made to the specified custom branch. To learn more, refer to [Get started with CI tests](/guides/orchestration/set-up-ci/overview). diff --git a/website/docs/guides/best-practices/environment-setup/1-env-guide-overview.md b/website/docs/guides/best-practices/environment-setup/1-env-guide-overview.md deleted file mode 100644 index 8c1d79877ae..00000000000 --- a/website/docs/guides/best-practices/environment-setup/1-env-guide-overview.md +++ /dev/null @@ -1,67 +0,0 @@ ---- -title: "dbt Cloud environment best practices" -id: 1-env-guide-overview -description: Learn how to configure environments in dbt Cloud. -displayText: "dbt Cloud environment best practices" -hoverSnippet: Learn how to configure environments in dbt Cloud. ---- - -> *How do I manage environments in my dbt Cloud project? How many do I need?* -> -> *How does my structure map to environments in dbt Cloud?* -> -> *What do git branches have to do with my dbt Cloud environments?* -> - -If these questions keep you up at night, you’ve come to the right place! When it comes to managing your dbt Cloud environments, there is not a one-size-fits-all solution for all teams. In this guide we’ll walk you through a few environment architecture options for dbt Cloud that we’d recommend, and hopefully you find an option that works for you. - -## Learning goals - -This guide has three main goals: - -- Provide our recommendations on managing dbt Cloud environments -- Illustrate these recommendations with comprehensive examples -- At each stage, explain *why* we recommend the approach that we do, so that you're equipped to decide when and where to deviate from these recommendations to better fit your organization’s unique needs - -:::info -☁️ This guide focuses on architecture for **dbt Cloud**. However, similar principles apply for developers using dbt Core. Before diving into this guide we recommend taking a look at our **[dbt Cloud environments](/docs/dbt-cloud-environments)** page for more context. - -::: - -### How many environments do I really need? - -Environments define the way that dbt will execute your code, including: - -- The **version of dbt** that will run. -- The **version of your code** to be executed. -- The **connection information** for your warehouse. -- In dbt Cloud, there are **two types of environments:** - - **Development** — the environment settings in which you work in the IDE on a development branch. - - **Deployment** — the environment settings in which a dbt Cloud job runs. - -In this guide, we’re going to focus on **deployment environments**, which determine how your project is executed when a **dbt Cloud job executes**. When using both approaches, make sure to designate one environment as "Production." This will allow you to use features such as dbt Explorer and cross-project references. Refer to [Set product environment](/docs/deploy/deploy-environments#set-as-production-environment-beta) for details. - -Depending on your git workflow and testing strategy, you'll be choosing between one deployment environment or many deployment environments. We provide a high-level overview of how these two deployment strategies work here, but use each section of this guide to get a deep-dive into how these setups differ. - -| Setup option | Works well if you | Relative complexity level | -| --- | --- | --- | -| One deployment environment | - only scheduled runs for one set of data objects
- development branches are merged directly to main | Low | -| Many deployment environments | - feature branches move through several promotion stages | High | - -### TL;DR — One deployment environment - -We usually recommended folks start with the basics; having one deployment environment is usually the simplest and most maintainable approach to start. This approach works well if: - -- You only need to have **scheduled jobs running in a single environment** within your data warehouse. -- You use a **single primary branch** and follow a direct promotion (**Dev —> Prod**) strategy - -With this option, your production jobs and your [CI jobs](/docs/deploy/continuous-integration) that ensure code integrity are managed within one single deployment environment. - -### TL;DR — Many deployment environments -This approach adds a bit more complexity and may slow down the development process, but adds a layer of security that can be worth the tradeoff. This approach works well if: - -- Your organization maintains **several long-lived git branches** to control how and when changes are tested and promoted to production. - - Some orgs follow a **Dev —> QA —> Prod release cycle** — if that sounds like your org, this approach is probably right for you. -- The **output of your dbt project is an input to other systems** and you need to test and validate many changes on a stable, long-lived staging dataset in a pre-production environment. - -The two options are explored in more detail in the following sections, including the benefits, trade-offs, the steps required to implement the setup in dbt Cloud. diff --git a/website/docs/guides/best-practices/environment-setup/2-one-deployment-environment.md b/website/docs/guides/best-practices/environment-setup/2-one-deployment-environment.md deleted file mode 100644 index 5b6c3b742e3..00000000000 --- a/website/docs/guides/best-practices/environment-setup/2-one-deployment-environment.md +++ /dev/null @@ -1,61 +0,0 @@ ---- -title: "One deployment environment" -id: 2-one-deployment-environment -description: Learn how to configure a single deployment environment setup in dbt Cloud. -displayText: "dbt Cloud environment best practices" -hoverSnippet: Learn how to configure a single deployment environment setup in dbt Cloud. ---- -import ExpNote from '/snippets/_explorer-beta-note.md'; - - -## What this looks like - -1. You have a **single *development* environment** where dbt users can access the dbt Cloud IDE and make changes to their code on feature branches created off of your default branch in your repository (most often the `main` branch). -2. You have a **single *deployment* environment** (let’s call it “Production”) where your scheduled jobs run referencing the `main` branch.
- - - -3. You also have a [**CI job**](/docs/deploy/continuous-integration) that kicks off anytime you open a PR to merge a feature branch into `main`. This CI job can run in your dbt “Production” environment. - -:::info - -☁️ CI jobs run in a dedicated custom schema for each PR, so there will no collision with your production schemas. - -::: - - - -### Git workflow - - - - -1. In the dbt Cloud IDE, developers work on feature branches, created from the `main` branch (`feature_a`, `feature_b`, `feature_c` above) -2. When code is ready, developer opens a PR to merge feature branch into `main` -3. [**CI Job**](/docs/deploy/continuous-integration) automatically kicks off, and tests the changes made in the PR -4. When CI Job is successful and team is ready to deploy changes to Production, the PR is merged directly into the `main` branch. The next time a production job runs, these changes will be incorporated and executed. - -### dbt Cloud setup - -1. Create your [**development environment**](/docs/dbt-cloud-environments) to power the dbt Cloud IDE. No extra customization needed! -2. Create your **[production deployment environment](/docs/deploy/deploy-environments)**. -3. Define your **dbt Cloud jobs** in the production deployment environment from step 2. - 1. **Production job(s)**: You will need to set up **at least one scheduled job** that deploys your project to your production databases/schemas. You may create multiple jobs based on your business SLAs. - 2. **CI Job**: Unlike the production jobs, which are triggered via the scheduler, this job will be triggered when PRs are opened in your repository. Refer to [CI jobs](/docs/deploy/ci-jobs) for details. - - -### When this works well - -This approach is recommended for most use cases because it enables you to quickly and safely implement code changes in the production environment. It also gives developers the confidence to trust and rely on these changes. With this option, multiple developers can easily contribute to and collaborate on the same codebase with confidence. - -:::info -💡 Check out [Sunrun's Coalesce 2022 talk](https://www.youtube.com/watch?v=vmBAO2XN-fM) on Automating CI/CD in dbt Cloud, where they simplified their CI/CD process from several long-lived branches to a single long-lived main branch with feature branches. - -::: - -### When this doesn’t work so well - -- You have a **formal QA process** before merging code into production. -- You want to **control when features are released** to production. -- You need to have scheduled **jobs running in many environments** due to dependencies on outside systems. - - e.g. Your organization has many applications that consume and test data changes in a lower non-Production environment before changes should be promoted to Production. diff --git a/website/docs/guides/best-practices/environment-setup/3-many-deployment-environments.md b/website/docs/guides/best-practices/environment-setup/3-many-deployment-environments.md deleted file mode 100644 index 2ac46f811b1..00000000000 --- a/website/docs/guides/best-practices/environment-setup/3-many-deployment-environments.md +++ /dev/null @@ -1,77 +0,0 @@ ---- -title: "Many deployment environments" -id: 3-many-deployment-environments -description: Learn how to configure a many deployment environment setup in dbt Cloud. -displayText: "dbt Cloud environment best practices" -hoverSnippet: Learn how to configure a many deployment environment setup in dbt Cloud. ---- -import ExpNote from '/snippets/_explorer-beta-note.md'; - -## What this looks like - -1. You have a **single *development* environment** where dbt users can access the dbt Cloud IDE and make changes to their code. However, you’ll want to update the **[custom branch settings](faqs/Environments/custom-branch-settings)** to ensure that developers create feature branches off of the a non-production branch. For this example, we’ll refer to this as the `qa` branch. -2. You have a **QA deployment environment**, running scheduled jobs from the `qa` branch that deploys your dbt project to a pre-production warehouse location. -3. You have a **Production deployment environment,** running scheduled jobs from the `main` branch that deploys your dbt project to your production warehouse location.
- - - -4. You have **multiple CI jobs** (one in each deployment environment) to ensure changes to each branch are tested. - - - -### Git workflow - - - -1. In the dbt Cloud IDE, developers work on feature branches, **created from the `qa` branch** (`feature_a`, `feature_b`, `feature_c` above). -2. When code is ready, developer opens a PR to merge feature branch into `qa`. -3. The **first CI Job** automatically kicks off to test the changes introduced in the PR. This job will *defer to a regularly-scheduled job in the QA environment* and run in the QA deployment environment. -4. When **CI Job is successful** and team is ready to deploy changes, the **PR is merged into `qa`.** -5. Scheduled jobs run in the QA deployment environment, running on `qa` branch to ensure the new changes work as intended. -6. When **all feature branches** for a given release (e.g. sprint) have been **successfully merged** to `qa` and are **running without error** in the QA deployment environment, a team member opens a **PR to merge `qa` → `main`.** -7. The **second CI Job** automatically kicks off to test changes in PR. This job will *defer to a regularly-scheduled job in the Production environment* and run in the Production deployment environment. -8. When **second CI Job** is successful and team is ready to deploy changes, the **PR is merged into `main`**. -9. Monitor scheduled jobs in the Production deployment environment that are running on `main` branch. Voila! All changes are released and ready for your stakeholders. - -:::info -💡 Considering a different branching strategy that involves cherry picking? [Maybe reconsider!](https://docs.getdbt.com/blog/the-case-against-git-cherry-picking) - -::: - -### dbt Cloud setup - -1. Create your [**development environment**](/docs/dbt-cloud-environments) to power the dbt Cloud IDE. - - Here, we’ll set a **custom branch** so that users in the IDE create their feature branches from `qa` instead of `main`. Click **Only run on a custom branch** in **General settings**, enter `qa` into **Custom Branch.** - -2. Set up your **QA [deployment environment](/docs/deploy/deploy-environments)** - - Here, we’ll apply the same custom branch settings as the development environment in Step 1. All scheduled jobs in the QA deployment environment will use the code from the `qa` branch during execution. - -3. **Define QA jobs** - 1. **QA job(s)**: You’ll want to create at least one scheduled job, running on a roughly daily cadence. This will allow us to make sure all the code executes without error before you release it to production, and will also power the first CI job. - 2. **CI Job**: As above, this job will be triggered when PRs are opened in your repository. Enable this option by selecting **Run on Pull Requests?** under the **Continuous Integration(CI)** tab under the **Triggers** section. Since we’re using the custom branch setting in the QA environment, you'll also want to be sure to select the second option **Run only on Custom Branch** (selected by default) — this means that only PRs created against the `qa` branch will trigger this job, rather than any PR at all. - - This job will also need to defer to one of the QA jobs created in step 3a. This enables the use of the `state` modifier in your selection syntax to only run changes introduced by your PR. - -4. Set up your **Production [deployment environment](/docs/deploy/deploy-environments)** - - Here, we’ll *also* use the same custom branch settings as the other environments, but set the custom branch as `main`. Even thought the `main` branch is the default, setting this value enables us to properly set up the CI Job in the next step. - -5. **Define production jobs** - 1. **Production job(s)**: You will need to set up at least one scheduled job that deploys your project to your production databases/schemas. You may create multiple jobs based on your business SLAs. - 2. **Production CI Job**: As above, this job will be triggered when PRs are opened in your repository. Enable this option by selecting **Run on Pull Requests?** under the **Continuous Integration(CI)** tab under the **Triggers** section. Since we’re using the custom branch setting in the QA environment, we’ll also want to select the second option **Run only on Custom Branch** — this means that only PRs created against the `main` branch will trigger this job, rather than any PR at all. - - This job will also need to defer to one of the QA jobs created in step 5a. This enables the use of the `state` modifier in your selection syntax to only run changes introduced by your PR. - -### When this works well - -This approach works well when it’s critical to **apply user acceptance and integration testing to your project in a pre-production environment**. This approach allows you to have scheduled jobs running in **many environments** on your data warehouse. - -### When this doesn’t work so well - -This approach may slow down the time it takes to get new feature into production, since it requires additional steps in the deployment process and additional branches to maintain. Keep in mind that adding complexity to your deployment process might cause some slowdown in your release cycle. - -## Conclusion - -While there’s no single correct answer to how to setup your dbt Cloud environments, they are flexible enough to enable just about any code promotion workflow your organization uses. We would love to hear how you’ve set up your deployment infrastructure in dbt Cloud! diff --git a/website/docs/guides/best-practices/how-we-build-our-metrics/semantic-layer-3-build-semantic-models.md b/website/docs/guides/best-practices/how-we-build-our-metrics/semantic-layer-3-build-semantic-models.md index 549dbccf8dd..ba4d0cab726 100644 --- a/website/docs/guides/best-practices/how-we-build-our-metrics/semantic-layer-3-build-semantic-models.md +++ b/website/docs/guides/best-practices/how-we-build-our-metrics/semantic-layer-3-build-semantic-models.md @@ -43,7 +43,7 @@ semantic_models: semantic_models: - name: orders description: | - Model containting order data. The grain of the table is the order id. + Model containing order data. The grain of the table is the order id. model: ref('stg_orders') entities: ... @@ -117,13 +117,13 @@ semantic_models: - 🧮 Dimensions are the columns that we want to **filter and group by**, **the adjectives of our project**. They come in three types: - **categorical** - **time** - - slowly changing dimensions — [these are covered in the documentation](https://docs.getdbt.com/docs/build/dimensions#scd-type-ii), and a little more complex. To focus on building your mental models of MetricFlow's fundamentals, we won't be using SCDs this guide. + - slowly changing dimensions — [these are covered in the documentation](https://docs.getdbt.com/docs/build/dimensions#scd-type-ii), and a little more complex. To focus on building your mental models of MetricFlow's fundamentals, we won't be using SCDs in this guide. - ➕ We're **not limited to existing columns**, we can use the `expr` property to add simple computations in our dimensions. - 📛 Categorical dimensions are the simplest, they simply require a `name` and `type` (type being categorical). **If the `name` property matches the name of the dimension column**, that's it, you're done. If you want or need to use a `name` other than the column name, or do some filtering or computation, **you can supply an optional `expr` property** to evaluate for the dimension. ### Dimensions in action -- 👀 Lets look at our staging model again and see what fields we have available. +- 👀 Let's look at our staging model again and see what fields we have available. ```SQL select @@ -145,7 +145,7 @@ from source - ⏰ For now the only dimension to add is a **time dimension**. - 🕰️ At least one **primary time dimension** is **required** for any semantic models that **have measures**. -- 1️⃣ We denote this with the `is_primary` property, or if there is only one time dimension supplied it is primary by default. Below we only have `ordered_at` as a timestamp so we don't need to specify anything except the maximum granularity we're bucketing to (in this case, day). +- 1️⃣ We denote this with the `is_primary` property, or if there is only a one-time dimension supplied it is primary by default. Below we only have `ordered_at` as a timestamp so we don't need to specify anything except the maximum granularity we're bucketing to (in this case, day). ```YAML dimensions: @@ -161,7 +161,7 @@ dimensions: We'll discuss an alternate situation, dimensional tables that have static numeric values like supply costs or tax rates but no time dimensions, later in the Guide. ::: -- 🔢 We can also **make a dimension out of numeric column** that would be typically be a measures. +- 🔢 We can also **make a dimension out of a numeric column** that would typically be a measure. - 🪣 Using `expr` we can **create buckets of values that we label** for our dimension. We'll add one of these in for labeling 'large orders' as any order totals over $50. ```YAML @@ -185,7 +185,7 @@ dimensions: ### Measures in action -- 👀 Lets look at **our staging model** one last time and see what **fields we want to measure**. +- 👀 Let's look at **our staging model** one last time and see what **fields we want to measure**. ```SQL select @@ -213,7 +213,7 @@ from source ```YAML measures: - name: order_total - description: The total amount for each order inlcuding taxes. + description: The total amount for each order including taxes. agg: sum - name: tax_paid description: The total tax paid on each order. @@ -275,17 +275,17 @@ semantic_models: agg: sum ``` -- 🦺 We can check that it's valid configuration and works with the real data our dbt project is generating by using the `mf validate-configs` command. This will: +- 🦺 We can check that it's a valid configuration and works with the real data our dbt project is generating by using the `mf validate-configs` command. This will: 1. **Parse the semantic manifest** our configuration describes out of the dbt project. 2. Validate the **internal semantics** of the manifest as described by our code. 3. Validate the **external semantics** of the manifest against your data warehouse (e.g. making sure that a column specified as a dimension exists on the proper table) ## Review and next steps -Let's review what the basics of semantic models, they: +Let's review the basics of semantic models: - 🧱 Consist off **entities, dimensions, and measures**. - 🫂 Describe the **semantics and relationships of objects** in the warehouse. - 1️⃣ Correspond to a **single logical model** in your dbt project. -Next up, lets use our new semantic model to **build a metric**! +Next up, let's use our new semantic model to **build a metric**! diff --git a/website/docs/guides/best-practices/materializations/materializations-guide-3-configuring-materializations.md b/website/docs/guides/best-practices/materializations/materializations-guide-3-configuring-materializations.md index 2f6c04bd35d..54f4443b600 100644 --- a/website/docs/guides/best-practices/materializations/materializations-guide-3-configuring-materializations.md +++ b/website/docs/guides/best-practices/materializations/materializations-guide-3-configuring-materializations.md @@ -53,7 +53,7 @@ def model(dbt, session):
:::info -🐍 **Not all adapters support python yet**, check the [docs here to be sure](docs/build/python-models#specific-data-platforms) before spending time writing python models. +🐍 **Not all adapters support python yet**, check the [docs here to be sure](/docs/build/python-models#specific-data-platforms) before spending time writing python models. ::: - Configuring a model to materialize as a `table` is simple, and the same as a `view` for both SQL and python models. diff --git a/website/docs/guides/best-practices/materializations/materializations-guide-4-incremental-models.md b/website/docs/guides/best-practices/materializations/materializations-guide-4-incremental-models.md index c1a4cb3eb0e..603cbc8cda1 100644 --- a/website/docs/guides/best-practices/materializations/materializations-guide-4-incremental-models.md +++ b/website/docs/guides/best-practices/materializations/materializations-guide-4-incremental-models.md @@ -115,7 +115,7 @@ So we’re going to use an **if statement** to apply our cutoff filter **only wh Thankfully, we don’t have to dig into the guts of dbt to sort out each of these conditions individually. -- ⚙️  dbt provides us with a **macro [`is_incremental`](docs/build/incremental-models#understanding-the-is_incremental-macro)** that checks all of these conditions for this exact use case. +- ⚙️  dbt provides us with a **macro [`is_incremental`](/docs/build/incremental-models#understanding-the-is_incremental-macro)** that checks all of these conditions for this exact use case. - 🔀  By **wrapping our cutoff logic** in this macro, it will only get applied when the macro returns true for all of the above conditions. Let’s take a look at all these pieces together: diff --git a/website/docs/guides/migration/tools/refactoring-legacy-sql.md b/website/docs/guides/migration/tools/refactoring-legacy-sql.md index 9dd66abb495..d9acfea6dab 100644 --- a/website/docs/guides/migration/tools/refactoring-legacy-sql.md +++ b/website/docs/guides/migration/tools/refactoring-legacy-sql.md @@ -59,7 +59,7 @@ This allows you to call the same table in multiple places with `{{ src('my_sourc We start here for several reasons: #### Source freshness reporting -Using sources unlocks the ability to run [source freshness reporting](docs/build/sources#snapshotting-source-data-freshness) to make sure your raw data isn't stale. +Using sources unlocks the ability to run [source freshness reporting](/docs/build/sources#snapshotting-source-data-freshness) to make sure your raw data isn't stale. #### Easy dependency tracing If you're migrating multiple stored procedures into dbt, with sources you can see which queries depend on the same raw tables. diff --git a/website/docs/guides/migration/versions/01-upgrading-to-v1.6.md b/website/docs/guides/migration/versions/01-upgrading-to-v1.6.md index 22111828e24..ea231ce7f9a 100644 --- a/website/docs/guides/migration/versions/01-upgrading-to-v1.6.md +++ b/website/docs/guides/migration/versions/01-upgrading-to-v1.6.md @@ -47,8 +47,9 @@ Supported on: - [Postgres](/reference/resource-configs/postgres-configs#materialized-view) - [Redshift](/reference/resource-configs/redshift-configs#materialized-view) - Snowflake (docs forthcoming) +- Databricks (docs forthcoming) -Support for BigQuery and Databricks forthcoming. +Support for BigQuery coming soon. ### New commands for mature deployment diff --git a/website/docs/guides/orchestration/custom-cicd-pipelines/1-cicd-background.md b/website/docs/guides/orchestration/custom-cicd-pipelines/1-cicd-background.md index 048fe637de0..a66259c6c49 100644 --- a/website/docs/guides/orchestration/custom-cicd-pipelines/1-cicd-background.md +++ b/website/docs/guides/orchestration/custom-cicd-pipelines/1-cicd-background.md @@ -1,10 +1,8 @@ --- -title: Customizing CI/CD +title: Customizing CI/CD with Custom Pipelines id: 1-cicd-background --- -# Creating Custom CI/CD Pipelines - One of the core tenets of dbt is that analytic code should be version controlled. This provides a ton of benefit to your organization in terms of collaboration, code consistency, stability, and the ability to roll back to a prior version. There’s an additional benefit that is provided with your code hosting platform that is often overlooked or underutilized. Some of you may have experience using dbt Cloud’s [webhook functionality](https://docs.getdbt.com/docs/dbt-cloud/using-dbt-cloud/cloud-enabling-continuous-integration) to run a job when a PR is created. This is a fantastic capability, and meets most use cases for testing your code before merging to production. However, there are circumstances when an organization needs additional functionality, like running workflows on every commit (linting), or running workflows after a merge is complete. In this article, we will show you how to setup custom pipelines to lint your project and trigger a dbt Cloud job via the API. A note on parlance in this article since each code hosting platform uses different terms for similar concepts. The terms `pull request` (PR) and `merge request` (MR) are used interchangeably to mean the process of merging one branch into another branch. diff --git a/website/docs/guides/orchestration/set-up-ci/1-introduction.md b/website/docs/guides/orchestration/set-up-ci/1-introduction.md new file mode 100644 index 00000000000..97df16b4ce1 --- /dev/null +++ b/website/docs/guides/orchestration/set-up-ci/1-introduction.md @@ -0,0 +1,10 @@ +--- +title: "Get started with Continuous Integration tests" +slug: overview +--- + +By validating your code _before_ it goes into production, you don't need to spend your afternoon fielding messages from people whose reports are suddenly broken. + +A solid CI setup is critical to preventing avoidable downtime and broken trust. dbt Cloud uses **sensible defaults** to get you up and running in a performant and cost-effective way in minimal time. + +After that, there's time to get fancy, but let's walk before we run. diff --git a/website/docs/guides/orchestration/set-up-ci/2-quick-setup.md b/website/docs/guides/orchestration/set-up-ci/2-quick-setup.md new file mode 100644 index 00000000000..89e04daa8df --- /dev/null +++ b/website/docs/guides/orchestration/set-up-ci/2-quick-setup.md @@ -0,0 +1,58 @@ +--- +title: "Baseline: Enable CI in 15 minutes" +slug: in-15-minutes +description: Find issues before they are deployed to production with dbt Cloud's Slim CI. +--- + +:::tip Join the beta + +dbt Labs is currently running a beta that provides improved UI updates for setting up CI jobs. For docs, refer to [Set up CI jobs (Beta version)](/docs/deploy/ci-jobs?version=beta#set-up-ci-jobs). This guide assumes you are using the improvements available in the beta. + +If you're interested in joining our beta, please fill out our Google Form to [sign up](https://forms.gle/VxwBD1xjzouE84EQ6). + +::: + +## Prerequisites + +As part of your initial dbt Cloud setup, you should already have Development and Production environments configured. Let's recap what each does: + +- Your **Development environment** powers the IDE. Each user has individual credentials, and builds into an individual dev schema. Nothing you do here impacts any of your colleagues. +- Your **Production environment** brings the canonical version of your project to life for downstream consumers. There is a single set of deployment credentials, and everything is built into your production schema(s). + +In this guide, we're going to add a **CI environment**, where proposed changes can be validated in the context of the entire project without impacting production systems. We will use a single set of deployment credentials (like the Prod environment), but models are built in a separate location to avoid impacting others (like the Dev environment). + +Your git flow will look like this: + + +## Step 1: Create a new CI environment + +See [Create a new environment](/docs/dbt-cloud-environments#create-a-deployment-environment). The environment should be called **CI**. Just like your existing Production environment, it will be a Deployment-type environment. + +When setting a Schema in the **Deployment Credentials** area, remember that dbt Cloud will automatically generate a custom schema name for each PR to ensure that they don't interfere with your deployed models. This means you can safely set the same Schema name as your Production job. + +## Step 2: Double-check your Production environment is identified + +Go into your existing Production environment, and ensure that the **Set as Production environment** checkbox is set. It'll make things easier later. + +## Step 3: Create a new job in the CI environment + +Use the **Continuous Integration Job** template, and call the job **CI Check**. + +In the Execution Settings, your command will be preset to `dbt build --select state:modified+`. Let's break this down: + +- [`dbt build`](/reference/commands/build) runs all nodes (seeds, models, snapshots, tests) at once in DAG order. If something fails, nodes that depend on it will be skipped. +- The [`state:modified+` selector](/reference/node-selection/methods#the-state-method) means that only modified nodes and their children will be run ("Slim CI"). In addition to [not wasting time](https://discourse.getdbt.com/t/how-we-sped-up-our-ci-runs-by-10x-using-slim-ci/2603) building and testing nodes that weren't changed in the first place, this significantly reduces compute costs. + +To be able to find modified nodes, dbt needs to have something to compare against. dbt Cloud uses the last successful run of any job in your Production environment as its [comparison state](/reference/node-selection/syntax#about-node-selection). As long as you identified your Production environment in Step 2, you won't need to touch this. If you didn't, pick the right environment from the dropdown. + +## Step 4: Test your process + +That's it! There are other steps you can take to be even more confident in your work, such as [validating your structure follows best practices](/guides/orchestration/set-up-ci/run-dbt-project-evaluator) and [linting your code](/guides/orchestration/set-up-ci/lint-on-push), but this covers the most critical checks. + +To test your new flow, create a new branch in the dbt Cloud IDE then add a new file or modify an existing one. Commit it, then create a new Pull Request (not a draft). Within a few seconds, you’ll see a new check appear in your git provider. + +## Things to keep in mind + +- If you make a new commit while a CI run based on older code is in progress, it will be automatically canceled and replaced with the fresh code. +- An unlimited number of CI jobs can run at once. If 10 developers all commit code to different PRs at the same time, each person will get their own schema containing their changes. Once each PR is merged, dbt Cloud will drop that schema. +- CI jobs will never block a production run. diff --git a/website/docs/guides/orchestration/set-up-ci/3-run-dbt-project-evaluator.md b/website/docs/guides/orchestration/set-up-ci/3-run-dbt-project-evaluator.md new file mode 100644 index 00000000000..646a9cb42b7 --- /dev/null +++ b/website/docs/guides/orchestration/set-up-ci/3-run-dbt-project-evaluator.md @@ -0,0 +1,46 @@ +--- +title: "Enforce best practices with dbt project evaluator" +slug: run-dbt-project-evaluator +description: dbt Project Evaluator can be run from inside of your existing dbt Cloud CI job to identify common flaws in projects. +--- + +dbt Project Evaluator is a package designed to identify deviations from best practices common to many dbt projects, including modeling, testing, documentation, structure and performance problems. For an introduction to the package, read its [launch blog post](/blog/align-with-dbt-project-evaluator). + +## Step 1: Install the package + +As with all packages, add a reference to `dbt-labs/dbt_project_evaluator` to your `packages.yml` file. See the [dbt Package Hub](https://hub.getdbt.com/dbt-labs/dbt_project_evaluator/latest/) for full installation instructions. + +## Step 2: Define test severity with an environment variable + +As noted in the [documentation](https://dbt-labs.github.io/dbt-project-evaluator/latest/ci-check/), tests in the package are set to `warn` severity by default. + +To have these tests fail in CI, create a new environment called `DBT_PROJECT_EVALUATOR_SEVERITY`. Set the project-wide default to `warn`, and set it to `error` in the CI environment. + +In your `dbt_project.yml` file, override the severity configuration: + +```yaml +tests: +dbt_project_evaluator: + +severity: "{{ env_var('DBT_PROJECT_EVALUATOR_SEVERITY', 'warn') }}" +``` + +## Step 3: Update your CI commands + +Because these tests should only run after the rest of your project has been built, your existing CI command will need to be updated to exclude the dbt_project_evaluator package. You will then add a second step which builds _only_ the package's models and tests. + +Update your steps to: + +```bash +dbt build --select state:modified+ --exclude package:dbt_project_evaluator +dbt build --select package:dbt_project_evaluator +``` + +## Step 4: Apply any customizations + +Depending on the state of your project when you roll out the evaluator, you may need to skip some tests or allow exceptions for some areas. To do this, refer to the documentation on: + +- [disabling tests](https://dbt-labs.github.io/dbt-project-evaluator/latest/customization/customization/) +- [excluding groups of models from a specific test](https://dbt-labs.github.io/dbt-project-evaluator/latest/customization/exceptions/) +- [excluding packages or sources/models based on path](https://dbt-labs.github.io/dbt-project-evaluator/latest/customization/excluding-packages-and-paths/) + +If you create a seed to exclude groups of models from a specific test, remember to disable the default seed and include `dbt_project_evaluator_exceptions` in your second `dbt build` command above. diff --git a/website/docs/guides/orchestration/custom-cicd-pipelines/2-lint-on-push.md b/website/docs/guides/orchestration/set-up-ci/4-lint-on-push.md similarity index 55% rename from website/docs/guides/orchestration/custom-cicd-pipelines/2-lint-on-push.md rename to website/docs/guides/orchestration/set-up-ci/4-lint-on-push.md index 465994e4442..1932ffe1019 100644 --- a/website/docs/guides/orchestration/custom-cicd-pipelines/2-lint-on-push.md +++ b/website/docs/guides/orchestration/set-up-ci/4-lint-on-push.md @@ -1,11 +1,12 @@ --- -title: Lint code on push -id: 2-lint-on-push +title: "Run linting checks with SQLFluff" +slug: lint-on-push +description: Enforce your organization's SQL style guide with by running SQLFluff in your git workflow whenever new code is pushed. --- -This section shows a very basic example of linting a project every time a commit is pushed to the repo. While it is simple, it shows the power of CI and can be expanded on to meet the needs of your organization. +By [linting](/docs/cloud/dbt-cloud-ide/lint-format#lint) your project during CI, you can ensure that code styling standards are consistently enforced, without spending human time nitpicking comma placement. -The steps below use [SQLFluff](https://docs.sqlfluff.com/en/stable/) to scan your code and look for linting errors. In the example, it's set to use the `snowflake` dialect, and specifically runs the rules L019, L020, L021, and L022. This is purely for demonstration purposes. You should update this to reflect your code base's [dialect](https://docs.sqlfluff.com/en/stable/dialects.html) and the [rules](https://docs.sqlfluff.com/en/stable/rules.html) you've established for your repo. +The steps below create an action/pipeline which uses [SQLFluff](https://docs.sqlfluff.com/en/stable/) to scan your code and look for linting errors. If you don't already have SQLFluff rules defined, check out [our recommended config file](/guides/best-practices/how-we-style/2-how-we-style-our-sql). ### 1. Create a YAML file to define your pipeline @@ -21,8 +22,8 @@ The YAML files defined below are what tell your code hosting platform the steps }> -In order for GitHub to know that you want to run an action, you need to have a few specific folders in your project. Add a new folder named `.github`, and within that folder add a new one named `workflows`. Your final folder structure will look like this: - +GitHub Actions are defined in the `.github/workflows` directory. To define the job for your action, add a new file named `lint_on_push.yml` under the `workflows` folder. Your final folder structure will look like this: + ```sql my_awesome_project ├── .github @@ -30,16 +31,14 @@ my_awesome_project │ │ └── lint_on_push.yml ``` -To define the job for our action, let’s add a new file named `lint_on_push.yml` under the `workflows` folder. This file is how we tell the GitHub runner what to execute when the job is triggered. - -Below I touch on the important pieces for running a dbt Cloud job, but if you want a full run-down of all the components of this YAML file checkout [this GitHub article](https://docs.github.com/en/actions/learn-github-actions/understanding-github-actions#understanding-the-workflow-file) on actions. - **Key pieces:** -- `on:` - this is used to filter when the pipeline is run. In this example we’re running it on every push except for pushes to branches named `main`. For more filters, checkout [GitHub’s docs](https://docs.github.com/en/actions/using-workflows/events-that-trigger-workflows). +- `on:` defines when the pipeline is run. This workflow will run whenever code is pushed to any branch except `main`. For other trigger options, check out [GitHub’s docs](https://docs.github.com/en/actions/using-workflows/events-that-trigger-workflows). - `runs-on: ubuntu-latest` - this defines the operating system we’re using to run the job -- `uses:` - remember the virtual servers we covered in the background section? They’re just empty operating systems, so there are two pieces of setup that are needed in order to access the code in your repo, and setup Python correctly on the virtual server. These two actions are called from other repos in GitHub to provide those services. For more information on them, checkout their repos: [actions/checkout](https://github.com/actions/checkout#checkout-v3) and [actions/setup-python](https://github.com/actions/setup-python#setup-python-v3). -- `run:` - this is how we’re telling the GitHub runner to execute the Python script we defined above. +- `uses:` - When the Ubuntu server is created, it is completely empty. [`checkout`](https://github.com/actions/checkout#checkout-v3) and [`setup-python`](https://github.com/actions/setup-python#setup-python-v3) are public GitHub Actions which enable the server to access the code in your repo, and set up Python correctly. +- `run:` - these steps are run at the command line, as though you typed them at a prompt yourself. This will install sqlfluff and lint the project. Be sure to set the correct `--dialect` for your project. + +For a full breakdown of the properties in a workflow file, see [Understanding the workflow file](https://docs.github.com/en/actions/learn-github-actions/understanding-github-actions#understanding-the-workflow-file) on GitHub's website. ```yaml name: lint dbt project on push @@ -50,7 +49,7 @@ on: - 'main' jobs: -# this job runs SQLFluff with a specific set of rules + # this job runs SQLFluff with a specific set of rules # note the dialect is set to Snowflake, so make that specific to your setup # details on linter rules: https://docs.sqlfluff.com/en/stable/rules.html lint_project: @@ -63,9 +62,9 @@ jobs: with: python-version: "3.9" - name: Install SQLFluff - run: "pip install sqlfluff==0.13.1" + run: "pip install sqlfluff" - name: Lint project - run: "sqlfluff lint models --dialect snowflake --rules L019,L020,L021,L022" + run: "sqlfluff lint models --dialect snowflake" ``` @@ -83,7 +82,7 @@ my_awesome_project **Key pieces:** - `image: python:3.9` - this defines the virtual image we’re using to run the job -- `rules:` - this is used to filter when the pipeline runs. In this case we’re telling it to run on every push event except when the branch is named `main`. Filters are very powerful to run commands on specific events, and you can find a full list in [GitLab’s documentation](https://docs.gitlab.com/ee/ci/yaml/#rules). +- `rules:` - defines when the pipeline is run. This workflow will run whenever code is pushed to any branch except `main`. For other rules, refer to [GitLab’s documentation](https://docs.gitlab.com/ee/ci/yaml/#rules). - `script:` - this is how we’re telling the GitLab runner to execute the Python script we defined above. ```yaml @@ -100,8 +99,8 @@ lint-project: rules: - if: $CI_PIPELINE_SOURCE == "push" && $CI_COMMIT_BRANCH != 'main' script: - - pip install sqlfluff==0.13.1 - - sqlfluff lint models --dialect snowflake --rules L019,L020,L021,L022 + - pip install sqlfluff + - sqlfluff lint models --dialect snowflake ``` @@ -118,7 +117,7 @@ my_awesome_project **Key pieces:** - `image: python:3.11.1` - this defines the virtual image we’re using to run the job -- `'**':` - this is used to filter when the pipeline runs. In this case we’re telling it to run on every push event, and you can see at line 12 we're creating a dummy pipeline for `master`. More information on filtering when a pipeline is run can be found in [Bitbucket's documentation](https://support.atlassian.com/bitbucket-cloud/docs/pipeline-triggers/) +- `'**':` - this is used to filter when the pipeline runs. In this case we’re telling it to run on every push event, and you can see at line 12 we're creating a dummy pipeline for `main`. More information on filtering when a pipeline is run can be found in [Bitbucket's documentation](https://support.atlassian.com/bitbucket-cloud/docs/pipeline-triggers/) - `script:` - this is how we’re telling the Bitbucket runner to execute the Python script we defined above. ```yaml @@ -134,7 +133,7 @@ pipelines: - pip install sqlfluff==0.13.1 - sqlfluff lint models --dialect snowflake --rules L019,L020,L021,L022 - 'master': # override if your default branch doesn't run on a branch named "master" + 'main': # override if your default branch doesn't run on a branch named "main" - step: script: - python --version @@ -145,7 +144,7 @@ pipelines: ### 2. Commit and push your changes to make sure everything works -After you finish creating the YAML files, commit and push your code. Doing this will trigger your pipeline for the first time! If everything goes well, you should see the pipeline in your code platform. When you click into the job you’ll get a log showing that SQLFluff was run. If your code failed linting you’ll get an error in the job with a description of what needs to be fixed. If everything passed the lint check, you’ll see a successful job run. +After you finish creating the YAML files, commit and push your code to trigger your pipeline for the first time. If everything goes well, you should see the pipeline in your code platform. When you click into the job you’ll get a log showing that SQLFluff was run. If your code failed linting you’ll get an error in the job with a description of what needs to be fixed. If everything passed the lint check, you’ll see a successful job run. + +## Step 1: Create a `release` branch in your git repo + +As noted above, this branch will outlive any individual feature, and will be the base of all feature development for a period of time. Your team might choose to create a new branch for each sprint (`qa/sprint-01`, `qa/sprint-02`, etc), tie it to a version of your data product (`qa/1.0`, `qa/1.1`), or just have a single `qa` branch which remains active indefinitely. + +## Step 2: Update your Development environment to use the `qa` branch + +See [Custom branch behavior](/docs/dbt-cloud-environments#custom-branch-behavior). Setting `qa` as your custom branch ensures that the IDE creates new branches and PRs with the correct target, instead of using `main`. + + + +## Step 3: Create a new QA environment + +See [Create a new environment](/docs/dbt-cloud-environments#create-a-deployment-environment). The environment should be called **QA**. Just like your existing Production and CI environments, it will be a Deployment-type environment. + +Set its branch to `qa` as well. + +## Step 4: Create a new job + +Use the **Continuous Integration Job** template, and call the job **QA Check**. + +In the Execution Settings, your command will be preset to `dbt build --select state:modified+`. Let's break this down: + +- [`dbt build`](/reference/commands/build) runs all nodes (seeds, models, snapshots, tests) at once in DAG order. If something fails, nodes that depend on it will be skipped. +- The [`state:modified+` selector](/reference/node-selection/methods#the-state-method) means that only modified nodes and their children will be run ("Slim CI"). In addition to [not wasting time](https://discourse.getdbt.com/t/how-we-sped-up-our-ci-runs-by-10x-using-slim-ci/2603) building and testing nodes that weren't changed in the first place, this significantly reduces compute costs. + +To be able to find modified nodes, dbt needs to have something to compare against. Normally, we use the Production environment as the source of truth, but in this case there will be new code merged into `qa` long before it hits the `main` branch and Production environment. Because of this, we'll want to defer the Release environment to itself. + +### Optional: also add a compile-only job + +dbt Cloud uses the last successful run of any job in that environment as its [comparison state](/reference/node-selection/syntax#about-node-selection). If you have a lot of PRs in flight, the comparison state could switch around regularly. + +Adding a regularly-scheduled job inside of the QA environment whose only command is `dbt compile` can regenerate a more stable manifest for comparison purposes. + +## Step 5: Test your process + +When the Release Manager is ready to cut a new release, they will manually open a PR from `qa` into `main` from their git provider (e.g. GitHub, GitLab, Azure DevOps). dbt Cloud will detect the new PR, at which point the existing check in the CI environment will trigger and run. When using the [baseline configuration](/guides/orchestration/set-up-ci/in-15-minutes), it's possible to kick off the PR creation from inside of the dbt Cloud IDE. Under this paradigm, that button will create PRs targeting your QA branch instead. + +To test your new flow, create a new branch in the dbt Cloud IDE then add a new file or modify an existing one. Commit it, then create a new Pull Request (not a draft) against your `qa` branch. You'll see the integration tests begin to run. Once they complete, manually create a PR against `main`, and within a few seconds you’ll see the tests run again but this time incorporating all changes from all code that hasn't been merged to main yet. diff --git a/website/docs/guides/orchestration/webhooks/zapier-ms-teams.md b/website/docs/guides/orchestration/webhooks/zapier-ms-teams.md index aa95b999d4c..bb3f03ef0c0 100644 --- a/website/docs/guides/orchestration/webhooks/zapier-ms-teams.md +++ b/website/docs/guides/orchestration/webhooks/zapier-ms-teams.md @@ -27,7 +27,7 @@ In order to set up the integration, you should have familiarity with: **Note**: To receive the message, add the Zapier app to the team's channel during installation. ### 2. Create a new Zap in Zapier -Use **Webhooks by Zapier** as the Trigger, and **Catch Raw Hook** as the Event. If you don't intend to [validate the authenticity of your webhook](docs/deploy/webhooks#validate-a-webhook) (not recommended!) then you can choose **Catch Hook** instead. +Use **Webhooks by Zapier** as the Trigger, and **Catch Raw Hook** as the Event. If you don't intend to [validate the authenticity of your webhook](/docs/deploy/webhooks#validate-a-webhook) (not recommended!) then you can choose **Catch Hook** instead. Press **Continue**, then copy the webhook URL. diff --git a/website/docs/guides/orchestration/webhooks/zapier-new-cloud-job.md b/website/docs/guides/orchestration/webhooks/zapier-new-cloud-job.md index 49b01d0db7e..0764c6c7911 100644 --- a/website/docs/guides/orchestration/webhooks/zapier-new-cloud-job.md +++ b/website/docs/guides/orchestration/webhooks/zapier-new-cloud-job.md @@ -16,7 +16,7 @@ In order to set up the integration, you should have familiarity with: ## Integration steps ### 1. Create a new Zap in Zapier -Use **Webhooks by Zapier** as the Trigger, and **Catch Raw Hook** as the Event. If you don't intend to [validate the authenticity of your webhook](docs/deploy/webhooks#validate-a-webhook) (not recommended!) then you can choose **Catch Hook** instead. +Use **Webhooks by Zapier** as the Trigger, and **Catch Raw Hook** as the Event. If you don't intend to [validate the authenticity of your webhook](/docs/deploy/webhooks#validate-a-webhook) (not recommended!) then you can choose **Catch Hook** instead. Press **Continue**, then copy the webhook URL. diff --git a/website/docs/guides/orchestration/webhooks/zapier-refresh-mode-report.md b/website/docs/guides/orchestration/webhooks/zapier-refresh-mode-report.md index 99680c432b3..f682baae8e2 100644 --- a/website/docs/guides/orchestration/webhooks/zapier-refresh-mode-report.md +++ b/website/docs/guides/orchestration/webhooks/zapier-refresh-mode-report.md @@ -22,7 +22,7 @@ In order to set up the integration, you should have familiarity with: ## Integration steps ### 1. Create a new Zap in Zapier -Use **Webhooks by Zapier** as the Trigger, and **Catch Raw Hook** as the Event. If you don't intend to [validate the authenticity of your webhook](docs/deploy/webhooks#validate-a-webhook) (not recommended!) then you can choose **Catch Hook** instead. +Use **Webhooks by Zapier** as the Trigger, and **Catch Raw Hook** as the Event. If you don't intend to [validate the authenticity of your webhook](/docs/deploy/webhooks#validate-a-webhook) (not recommended!) then you can choose **Catch Hook** instead. Press **Continue**, then copy the webhook URL. diff --git a/website/docs/guides/orchestration/webhooks/zapier-refresh-tableau-workbook.md b/website/docs/guides/orchestration/webhooks/zapier-refresh-tableau-workbook.md index 8751528565c..52a9ae63523 100644 --- a/website/docs/guides/orchestration/webhooks/zapier-refresh-tableau-workbook.md +++ b/website/docs/guides/orchestration/webhooks/zapier-refresh-tableau-workbook.md @@ -25,7 +25,7 @@ To set up the integration, you need to be familiar with: To authenticate with the Tableau API, obtain a [Personal Access Token](https://help.tableau.com/current/server/en-us/security_personal_access_tokens.htm) from your Tableau Server/Cloud instance. In addition, make sure your Tableau workbook uses data sources that allow refresh access, which is usually set when publishing. ### 2. Create a new Zap in Zapier -To trigger an action with the delivery of a webhook in Zapier, you'll want to create a new Zap with **Webhooks by Zapier** as the Trigger and **Catch Raw Hook** as the Event. However, if you choose not to [validate the authenticity of your webhook](docs/deploy/webhooks#validate-a-webhook), which isn't recommended, you can choose **Catch Hook** instead. +To trigger an action with the delivery of a webhook in Zapier, you'll want to create a new Zap with **Webhooks by Zapier** as the Trigger and **Catch Raw Hook** as the Event. However, if you choose not to [validate the authenticity of your webhook](/docs/deploy/webhooks#validate-a-webhook), which isn't recommended, you can choose **Catch Hook** instead. Press **Continue**, then copy the webhook URL. diff --git a/website/docs/guides/orchestration/webhooks/zapier-slack.md b/website/docs/guides/orchestration/webhooks/zapier-slack.md index d3b0473502b..c9046ee9943 100644 --- a/website/docs/guides/orchestration/webhooks/zapier-slack.md +++ b/website/docs/guides/orchestration/webhooks/zapier-slack.md @@ -25,7 +25,7 @@ In order to set up the integration, you should have familiarity with: ## Integration steps ### 1. Create a new Zap in Zapier -Use **Webhooks by Zapier** as the Trigger, and **Catch Raw Hook** as the Event. If you don't intend to [validate the authenticity of your webhook](docs/deploy/webhooks#validate-a-webhook) (not recommended!) then you can choose **Catch Hook** instead. +Use **Webhooks by Zapier** as the Trigger, and **Catch Raw Hook** as the Event. If you don't intend to [validate the authenticity of your webhook](/docs/deploy/webhooks#validate-a-webhook) (not recommended!) then you can choose **Catch Hook** instead. Click **Continue**, then copy the webhook URL. diff --git a/website/docs/quickstarts/snowflake-qs.md b/website/docs/quickstarts/snowflake-qs.md index 6d03586e611..0561ea13410 100644 --- a/website/docs/quickstarts/snowflake-qs.md +++ b/website/docs/quickstarts/snowflake-qs.md @@ -138,7 +138,7 @@ There are two ways to connect dbt Cloud to Snowflake. The first option is Partne -Using Partner Connect allows you to create a complete dbt account with your [Snowflake connection](docs/cloud/connect-data-platform/connect-snowflake), [a managed repository](/docs/collaborate/git/managed-repository), [environments](/docs/build/custom-schemas#managing-environments), and credentials. +Using Partner Connect allows you to create a complete dbt account with your [Snowflake connection](/docs/cloud/connect-data-platform/connect-snowflake), [a managed repository](/docs/collaborate/git/managed-repository), [environments](/docs/build/custom-schemas#managing-environments), and credentials. 1. In the Snowflake UI, click on the home icon in the upper left corner. In the left sidebar, select **Admin**. Then, select **Partner Connect**. Find the dbt tile by scrolling or by searching for dbt in the search bar. Click the tile to connect to dbt. diff --git a/website/sidebars.js b/website/sidebars.js index e9cab8815a9..0faa37afff0 100644 --- a/website/sidebars.js +++ b/website/sidebars.js @@ -246,7 +246,7 @@ const sidebarSettings = { { type: "category", label: "Build your metrics", - link: { type: "doc", id: "docs/build/build-metrics-intro"}, + link: { type: "doc", id: "docs/build/build-metrics-intro" }, collapsed: true, items: [ { @@ -274,7 +274,7 @@ const sidebarSettings = { { type: "category", label: "Metrics", - link: { type: "doc", id: "docs/build/metrics-overview"}, + link: { type: "doc", id: "docs/build/metrics-overview" }, items: [ "docs/build/cumulative", "docs/build/derived", @@ -909,18 +909,6 @@ const sidebarSettings = { "guides/best-practices/materializations/materializations-guide-6-examining-builds", "guides/best-practices/materializations/materializations-guide-7-conclusion", ], - }, - { - type: "category", - label: "dbt Cloud Environment best practices", - link: { - type: "doc", - id: "guides/best-practices/environment-setup/1-env-guide-overview", - }, - items: [ - "guides/best-practices/environment-setup/2-one-deployment-environment", - "guides/best-practices/environment-setup/3-many-deployment-environments", - ], }, "guides/best-practices/debugging-errors", "guides/best-practices/writing-custom-generic-tests", @@ -949,16 +937,29 @@ const sidebarSettings = { "guides/orchestration/airflow-and-dbt-cloud/3-running-airflow-and-dbt-cloud", "guides/orchestration/airflow-and-dbt-cloud/4-airflow-and-dbt-cloud-faqs", ], + }, + { + type: "category", + label: "Set up Continuous Integration", + link: { + type: "doc", + id: "guides/orchestration/set-up-ci/introduction", + }, + items: [ + "guides/orchestration/set-up-ci/quick-setup", + "guides/orchestration/set-up-ci/run-dbt-project-evaluator", + "guides/orchestration/set-up-ci/lint-on-push", + "guides/orchestration/set-up-ci/multiple-checks", + ], }, { type: "category", - label: "Customizing CI/CD", + label: "Custom Continuous Deployment Workflows", link: { type: "doc", id: "guides/orchestration/custom-cicd-pipelines/1-cicd-background", }, items: [ - "guides/orchestration/custom-cicd-pipelines/2-lint-on-push", "guides/orchestration/custom-cicd-pipelines/3-dbt-cloud-job-on-merge", "guides/orchestration/custom-cicd-pipelines/4-dbt-cloud-job-on-pr", "guides/orchestration/custom-cicd-pipelines/5-something-to-consider", diff --git a/website/static/_redirects b/website/static/_redirects index 4172475e431..ba9d00d37f6 100644 --- a/website/static/_redirects +++ b/website/static/_redirects @@ -762,6 +762,10 @@ https://tutorial.getdbt.com/* https://docs.getdbt.com/:splat 301! /docs/guides/migration-guide/_ /guides/migration/versions/:splat 301! /docs/guides/_ /guides/legacy/:splat 301! +/guides/best-practices/environment-setup/1-env-guide-overview /guides/orchestration/set-up-ci/overview 301 +/guides/best-practices/environment-setup/2-one-deployment-environment /guides/orchestration/set-up-ci/in-15-minutes 301 +/guides/best-practices/environment-setup/3-many-deployment-environments /guides/orchestration/set-up-ci/multiple-environments 301 + # adapter development docs /docs/contributing/what-are-adapters /guides/advanced/adapter-development/1-what-are-adapters 301