diff --git a/.github/workflows/autoupdate.yml b/.github/workflows/autoupdate.yml index 4105ec6b902..f26abcb6802 100644 --- a/.github/workflows/autoupdate.yml +++ b/.github/workflows/autoupdate.yml @@ -2,11 +2,11 @@ name: Auto Update on: # This will trigger on all pushes to all branches. - push: {} +# push: {} # Alternatively, you can only trigger if commits are pushed to certain branches, e.g.: - # push: - # branches: - # - current + push: + branches: + - current # - unstable jobs: autoupdate: diff --git a/website/dbt-versions.js b/website/dbt-versions.js index d80521b19d5..01d1bf5d128 100644 --- a/website/dbt-versions.js +++ b/website/dbt-versions.js @@ -1,7 +1,7 @@ exports.versions = [ { version: "1.6", - EOLDate: "2024-07-20", // placeholder - need to confirm the final date + EOLDate: "2024-07-31", isPrerelease: true }, { @@ -31,6 +31,10 @@ exports.versions = [ ] exports.versionedPages = [ + { + "page": "reference/commands/clone", + "firstVersion": "1.6", + }, { "page": "docs/collaborate/govern/project-dependencies", "firstVersion": "1.6", @@ -55,7 +59,7 @@ exports.versionedPages = [ "page": "docs/collaborate/govern/model-contracts", "firstVersion": "1.5", }, - { + { "page": "reference/commands/show", "firstVersion": "1.5", }, diff --git a/website/docs/docs/build/about-metricflow.md b/website/docs/docs/build/about-metricflow.md index 6ec7ecfe4b5..5a42fcd7b3e 100644 --- a/website/docs/docs/build/about-metricflow.md +++ b/website/docs/docs/build/about-metricflow.md @@ -60,6 +60,7 @@ Metrics, which is a key concept, are functions that combine measures, constraint MetricFlow supports different metric types: +- [Cumulative](/docs/build/cumulative) — Aggregates a measure over a given window. - [Derived](/docs/build/derived) — An expression of other metrics, which allows you to do calculations on top of metrics. - [Ratio](/docs/build/ratio) — Create a ratio out of two measures, like revenue per customer. - [Simple](/docs/build/simple) — Metrics that refer directly to one measure. diff --git a/website/docs/docs/build/cumulative-metrics.md b/website/docs/docs/build/cumulative-metrics.md index 77d23d32dce..efdde600635 100644 --- a/website/docs/docs/build/cumulative-metrics.md +++ b/website/docs/docs/build/cumulative-metrics.md @@ -8,6 +8,12 @@ tags: [Metrics, Semantic Layer] Cumulative metrics aggregate a measure over a given window. If no window is specified, the window is considered infinite and accumulates values over all time. +:::info MetricFlow time spine required + +You will need to create the [time spine model](/docs/build/metricflow-time-spine) before you add cumulative metrics. + +::: + ```yaml # Cumulative metrics aggregate a measure over a given window. The window is considered infinite if no window parameter is passed (accumulate the measure over all time) metrics: @@ -24,7 +30,7 @@ metrics: ### Window options -This section details examples for when you specify and don't specify window options. +This section details examples of when you specify and don't specify window options. @@ -56,7 +62,7 @@ metrics: window: 7 days ``` -From the sample yaml above, note the following: +From the sample YAML above, note the following: * `type`: Specify cumulative to indicate the type of metric. * `type_params`: Specify the measure you want to aggregate as a cumulative metric. You have the option of specifying a `window`, or a `grain to date`. @@ -142,7 +148,7 @@ metrics: ```yaml metrics: name: revenue_monthly_grain_to_date #For this metric, we use a monthly grain to date - description: Monthly revenue using a grain to date of 1 month (think of this as a monthly resetting point) + description: Monthly revenue using grain to date of 1 month (think of this as a monthly resetting point) type: cumulative type_params: measures: diff --git a/website/docs/docs/build/incremental-models.md b/website/docs/docs/build/incremental-models.md index 28345ba1873..29c7c8c585f 100644 --- a/website/docs/docs/build/incremental-models.md +++ b/website/docs/docs/build/incremental-models.md @@ -57,6 +57,7 @@ from raw_app_data.events {% if is_incremental() %} -- this filter will only be applied on an incremental run + -- (uses > to include records whose timestamp occurred since the last run of this model) where event_time > (select max(event_time) from {{ this }}) {% endif %} @@ -137,6 +138,7 @@ from raw_app_data.events {% if is_incremental() %} -- this filter will only be applied on an incremental run + -- (uses >= to include records arriving later on the same day as the last run of this model) where date_day >= (select max(date_day) from {{ this }}) {% endif %} diff --git a/website/docs/docs/build/metrics-overview.md b/website/docs/docs/build/metrics-overview.md index e7271ecf417..351c674ca8a 100644 --- a/website/docs/docs/build/metrics-overview.md +++ b/website/docs/docs/build/metrics-overview.md @@ -25,10 +25,10 @@ This page explains the different supported metric types you can add to your dbt - [Ratio](#ratio-metrics) — Create a ratio out of two measures. --> - ### Derived metrics [Derived metrics](/docs/build/derived) are defined as an expression of other metrics. Derived metrics allow you to do calculations on top of metrics. @@ -145,7 +144,9 @@ You can set more metadata for your metrics, which can be used by other tools lat ## Related docs - [Semantic models](/docs/build/semantic-models) +- [Cumulative](/docs/build/cumulative) - [Derived](/docs/build/derived) + diff --git a/website/docs/docs/cloud/manage-access/set-up-sso-google-workspace.md b/website/docs/docs/cloud/manage-access/set-up-sso-google-workspace.md index 1cfa9a49286..a206d359270 100644 --- a/website/docs/docs/cloud/manage-access/set-up-sso-google-workspace.md +++ b/website/docs/docs/cloud/manage-access/set-up-sso-google-workspace.md @@ -49,7 +49,7 @@ Client Secret for use in dbt Cloud. | **Application type** | internal | required | | **Application name** | dbt Cloud | required | | **Application logo** | Download the logo here | optional | -| **Authorized domains** | `getdbt.com` | If deploying into a VPC, use the domain for your deployment | +| **Authorized domains** | `getdbt.com` (US) `dbt.com` (EMEA or AU) | If deploying into a VPC, use the domain for your deployment | | **Scopes** | `email, profile, openid` | The default scopes are sufficient | diff --git a/website/docs/docs/collaborate/govern/model-versions.md b/website/docs/docs/collaborate/govern/model-versions.md index b38ed13289d..12599d0b65f 100644 --- a/website/docs/docs/collaborate/govern/model-versions.md +++ b/website/docs/docs/collaborate/govern/model-versions.md @@ -37,7 +37,7 @@ Instead, for mature models at larger organizations, powering queries inside & ou During that migration window, anywhere that model is being used downstream, it can continue to be referenced at a specific version. -In the future, dbt will also offer first-class support for **deprecating models** ([dbt-core#7433](https://github.com/dbt-labs/dbt-core/issues/7433)). Taken together, model versions and deprecation offer a pathway for model producers to _sunset_ old models, and consumers the time to _migrate_ across breaking changes. It's a way of managing change across an organization: develop a new version, bump the latest, slate the old version for deprecation, update downstream references, and then remove the old version. +dbt Core 1.6 introduced first-class support for **deprecating models** by specifying a [`deprecation_date`](/reference/resource-properties/deprecation_date). Taken together, model versions and deprecation offer a pathway for model producers to _sunset_ old models, and consumers the time to _migrate_ across breaking changes. It's a way of managing change across an organization: develop a new version, bump the latest, slate the old version for deprecation, update downstream references, and then remove the old version. There is a real trade-off that exists here—the cost to frequently migrate downstream code, and the cost (and clutter) of materializing multiple versions of a model in the data warehouse. Model versions do not make that problem go away, but by setting a deprecation date, and communicating a clear window for consumers to gracefully migrate off old versions, they put a known boundary on the cost of that migration. @@ -73,7 +73,7 @@ As the **producer** of a versioned model: - You keep track of all live versions in one place, rather than scattering them throughout the codebase - You can reuse the model's configuration, and highlight just the diffs between versions - You can select models to build (or not) based on whether they're a `latest`, `prerelease`, or `old` version -- dbt will notify consumers of your versioned model when new versions become available, or (in the future) when they are slated for deprecation +- dbt will notify consumers of your versioned model when new versions become available, or when they are slated for deprecation As the **consumer** of a versioned model: - You use a consistent `ref`, with the option of pinning to a specific live version @@ -109,7 +109,7 @@ selectors: -Because dbt knows that these models are _actually the same model_, it can notify downstream consumers as new versions become available, and (in the future) as older versions are slated for deprecation. +Because dbt knows that these models are _actually the same model_, it can notify downstream consumers as new versions become available, and as older versions are slated for deprecation. ```bash Found an unpinned reference to versioned model 'dim_customers'. diff --git a/website/docs/docs/collaborate/govern/project-dependencies.md b/website/docs/docs/collaborate/govern/project-dependencies.md index 3469ec1a7b6..158c405e4a7 100644 --- a/website/docs/docs/collaborate/govern/project-dependencies.md +++ b/website/docs/docs/collaborate/govern/project-dependencies.md @@ -5,8 +5,8 @@ sidebar_label: "Project dependencies" description: "Reference public models across dbt projects" --- -:::info -"Project" dependencies and cross-project `ref` is currently in closed beta and are features of dbt Cloud Enterprise. To access these features, please contact your account team. +:::caution Closed Beta - dbt Cloud Enterprise +"Project" dependencies and cross-project `ref` are features of dbt Cloud Enterprise, currently in Closed Beta. To access these features while they are in beta, please contact your account team at dbt Labs. ::: For a long time, dbt has supported code reuse and extension by installing other projects as [packages](/docs/build/packages). When you install another project as a package, you are pulling in its full source code, and adding it to your own. This enables you to call macros and run models defined in that other project. diff --git a/website/docs/docs/dbt-cloud-apis/admin-cloud-api.md b/website/docs/docs/dbt-cloud-apis/admin-cloud-api.md index 62b13f7aeb5..8a5712f40df 100644 --- a/website/docs/docs/dbt-cloud-apis/admin-cloud-api.md +++ b/website/docs/docs/dbt-cloud-apis/admin-cloud-api.md @@ -10,7 +10,7 @@ The dbt Cloud Administrative API is enabled by default for [Team and Enterprise - Manage your dbt Cloud account - and more -Check out our dbt Cloud Admin API docs to help you access the API: +dbt Cloud currently supports two versions of the Administrative API: v2 and v3. In general, v3 is the recommended version to use, but we don't yet have all our v2 routes upgraded to v3. We're currently working on this. If you can't find something in our v3 docs, check out the shorter list of v2 endpoints because you might find it there.
diff --git a/website/docs/docs/dbt-cloud-apis/service-tokens.md b/website/docs/docs/dbt-cloud-apis/service-tokens.md index 139eff8fd07..811bfaea29d 100644 --- a/website/docs/docs/dbt-cloud-apis/service-tokens.md +++ b/website/docs/docs/dbt-cloud-apis/service-tokens.md @@ -3,6 +3,11 @@ title: "Service account tokens" id: "service-tokens" description: "Service account tokens help you define permissions for securing access to your dbt Cloud account and its projects." --- +:::info Important service account token update + +If you have service tokens created on or before July 18, 2023, please read [this important update](/docs/dbt-cloud-apis/service-tokens#service-token-update). + +::: ## About service tokens @@ -92,3 +97,17 @@ Analyst admin service tokens have all the permissions listed in [Analyst](/docs/ **Stakeholder**
Stakeholder service tokens have all the permissions listed in [Stakeholder](/docs/cloud/manage-access/enterprise-permissions#stakeholder) on the Enterprise Permissions page. + + +## Service token update + +On July 18, 2023, dbt Labs made critical infrastructure changes to service account tokens. These enhancements improve the security and performance of all tokens created after July 18, 2023. To ensure security best practices are in place, we recommend you rotate your service tokens created before this date. + +To rotate your token: +1. Navigate to **Account settings** and click **Service tokens** on the left side pane. +2. Verify the **Created** date for the token is _on or before_ July 18, 2023. + +3. Click **+ New Token** on the top right side of the screen. Ensure the new token has the same permissions as the old one. +4. Copy the new token and replace the old one in your systems. Store it in a safe place, as it will not be available again once the creation screen is closed. +5. Delete the old token in dbt Cloud by clicking the **trash can icon**. _Only take this action after the new token is in place to avoid service disruptions_. + diff --git a/website/docs/guides/migration/versions/01-upgrading-to-v1.6.md b/website/docs/guides/migration/versions/01-upgrading-to-v1.6.md index 52850239a53..cb1e9af603d 100644 --- a/website/docs/guides/migration/versions/01-upgrading-to-v1.6.md +++ b/website/docs/guides/migration/versions/01-upgrading-to-v1.6.md @@ -1,17 +1,19 @@ --- -title: "Upgrading to v1.6 (beta)" +title: "Upgrading to v1.6 (prerelease)" description: New features and changes in dbt Core v1.6 --- -:::warning Beta Functionality +:::warning Prerelease -dbt Core v1.6 is in beta, and the features and functionality on this page are subject to change. +dbt Core v1.6 is available as a release candidate. [Final release is planned for July 31.](https://github.com/dbt-labs/dbt-core/issues/7990) + +Test it out, and [let us know](https://github.com/dbt-labs/dbt-core/issues/new/choose) if you run into any issues! ::: ## Resources -- [Changelog](https://github.com/dbt-labs/dbt-core/blob/main/CHANGELOG.md) +- [Changelog](https://github.com/dbt-labs/dbt-core/blob/1.6.latest/CHANGELOG.md) - [CLI Installation guide](/docs/core/installation) - [Cloud upgrade guide](/docs/dbt-versions/upgrade-core-in-cloud) - [Release schedule](https://github.com/dbt-labs/dbt-core/issues/7481) @@ -22,24 +24,58 @@ dbt Labs is committed to providing backward compatibility for all versions 1.x, ### Behavior changes -**Coming soon** +- dbt Core v1.6 does not support Python 3.7, which reached End Of Life on June 23. Support Python versions are 3.8, 3.9, 3.10, and 3.11. +- As part of the Semantic layer re-launch (in beta), the spec for `metrics` has changed significantly. Migration guide coming soon: https://github.com/dbt-labs/docs.getdbt.com/pull/3705 +- Manifest schema version is now v10, reflecting [TODO] changes + +### For consumers of dbt artifacts (metadata) + +The [manifest](/reference/artifacts/manifest-json) schema version has updated to `v10`. Specific changes: +- Addition of `semantic_models` and changes to `metrics` attributes +- Addition of `deprecation_date` as a model property +- Addition of `on_configuration_change` as default node configuration (to support materialized views) +- Small type changes to `contracts` and `constraints` +- Manifest `metadata` includes `project_name` + +### For maintainers of adapter plugins +For more detailed information and to ask questions, please read and comment on the GH discussion: [dbt-labs/dbt-core#7958](https://github.com/dbt-labs/dbt-core/discussions/7958). ## New and changed documentation -[`dbt retry`](/reference/commands/retry) is a new command that executes the previously run command from the point of failure. This convenient command enables you to continue a failed command without rebuilding all upstream dependencies. +### Materialized views -**Materialized view** support (for model and project configs) has been added for three data warehouses: - - [Bigquery](/reference/resource-configs/bigquery-configs#materialized-view) - - [Postgres](/reference/resource-configs/postgres-configs#materialized-view) - - [Redshift](/reference/resource-configs/redshift-configs#materialized-view) +Supported on: +- [Postgres](/reference/resource-configs/postgres-configs#materialized-view) +- [Redshift](/reference/resource-configs/redshift-configs#materialized-view) +- Snowflake (docs forthcoming) -[**Namespacing:**](/faqs/Models/unique-model-names) Model names can be duplicated across different namespaces (packages/projects), so long as they are unique within each package/project. We strongly encourage using [two-argument `ref`](/reference/dbt-jinja-functions/ref#two-argument-variant) when referencing a model from a different package/project. +Support for BigQuery and Databricks forthcoming. -[**Project dependencies**](/docs/collaborate/govern/project-dependencies): Introduces `dependencies.yml` and dependent `projects` as a feature of dbt Cloud Enterprise. Allows enforcing model access (public vs. protected/private) across project/package boundaries. Enables cross-project `ref` of public models, without requiring the installation of upstream source code. +### New commands for mature deployment -### Quick hits +[`dbt retry`](/reference/commands/retry) executes the previously run command from the point of failure. Rebuild just the nodes that errored or skipped in a previous run/build/test, rather than starting over from scratch. + +[`dbt clone`](/reference/commands/clone) leverages each data platform's functionality for creating lightweight copies of dbt models from one environment into another. Useful when quickly spinning up a new development environment, or promoting specific models from a staging environment into production. + +### Multi-project collaboration -More consistency and flexibility around packages! Resources defined in a package will respect variable and global macro definitions within the scope of that package. +[**Deprecation date**](/reference/resource-properties/deprecation_date): Models can declare a deprecation date that will warn model producers and downstream consumers. This enables clear migration windows for versioned models, and provides a mechanism to facilitate removal of immature or little-used models, helping to avoid project bloat. + +[Model names](/faqs/Models/unique-model-names) can be duplicated across different namespaces (projects/packages), so long as they are unique within each project/package. We strongly encourage using [two-argument `ref`](/reference/dbt-jinja-functions/ref#two-argument-variant) when referencing a model from a different package/project. + +More consistency and flexibility around packages. Resources defined in a package will respect variable and global macro definitions within the scope of that package. - `vars` defined in a package's `dbt_project.yml` are now available in the resolution order when compiling nodes in that package, though CLI `--vars` and the root project's `vars` will still take precedence. See ["Variable Precedence"](/docs/build/project-variables#variable-precedence) for details. - `generate_x_name` macros (defining custom rules for database, schema, alias naming) follow the same pattern as other "global" macros for package-scoped overrides. See [macro dispatch](/reference/dbt-jinja-functions/dispatch) for an overview of the patterns that are possible. + +:::caution Closed Beta - dbt Cloud Enterprise +[**Project dependencies**](/docs/collaborate/govern/project-dependencies): Introduces `dependencies.yml` and dependent `projects` as a feature of dbt Cloud Enterprise. Allows enforcing model access (public vs. protected/private) across project/package boundaries. Enables cross-project `ref` of public models, without requiring the installation of upstream source code. +::: + +### Quick hits + +- [`state:unmodified` and `state:old`](/reference/node-selection/methods#the-state-method) for [MECE](https://en.wikipedia.org/wiki/MECE_principle) stateful selection +- [`invocation_args_dict`](/reference/dbt-jinja-functions/flags#invocation_args_dict) includes full `invocation_command` as string +- [`dbt debug --connection`](/reference/commands/debug) to test just the data platform connection specified in a profile +- [`dbt docs generate --empty-catalog`](/reference/commands/cmd-docs) to skip catalog population while generating docs +- [`--defer-state`](/reference/node-selection/defer) enables more-granular control diff --git a/website/docs/reference/artifacts/dbt-artifacts.md b/website/docs/reference/artifacts/dbt-artifacts.md index 2fbcc329484..b20c1548d99 100644 --- a/website/docs/reference/artifacts/dbt-artifacts.md +++ b/website/docs/reference/artifacts/dbt-artifacts.md @@ -39,6 +39,7 @@ All artifacts produced by dbt include a `metadata` dictionary with these propert In the manifest, the `metadata` may also include: - `send_anonymous_usage_stats`: Whether this invocation sent [anonymous usage statistics](/reference/global-configs/usage-stats) while executing. +- `project_name`: The `name` defined in the root project's `dbt_project.yml`. (Added in manifest v10 / dbt Core v1.6) - `project_id`: Project identifier, hashed from `project_name`, sent with anonymous usage stats if enabled. - `user_id`: User identifier, stored by default in `~/dbt/.user.yml`, sent with anonymous usage stats if enabled. diff --git a/website/docs/reference/artifacts/manifest-json.md b/website/docs/reference/artifacts/manifest-json.md index c71c073d842..3a916ed6d4c 100644 --- a/website/docs/reference/artifacts/manifest-json.md +++ b/website/docs/reference/artifacts/manifest-json.md @@ -2,53 +2,18 @@ title: "Manifest JSON file" sidebar_label: "Manifest" --- - -**dbt Core v1.5 produces schema**: [`v9`](https://schemas.getdbt.com/dbt/manifest/v9/index.html) - - - - - -**dbt Core v1.4 produces schema**: [`v8`](https://schemas.getdbt.com/dbt/manifest/v8/index.html) - - - - - -**dbt Core v1.3 produces schema**: [`v7`](https://schemas.getdbt.com/dbt/manifest/v7/index.html) - - - - - -**dbt Core v1.2 produces schema**: [`v6`](https://schemas.getdbt.com/dbt/manifest/v6/index.html) - - - - - -**dbt Core v1.1 produces schema**: [`v5`](https://schemas.getdbt.com/dbt/manifest/v5/index.html) - - - - - -**dbt Core v1.0 produces schema**: [`v4`](https://schemas.getdbt.com/dbt/manifest/v4/index.html) - - - - - -**Produced by:** [`build`](/reference/commands/build) [`compile`](/reference/commands/compile) [`docs generate`](/reference/commands/cmd-docs) [`list`](/reference/commands/list) [`seed`](/reference/commands/seed) [`snapshot`](/reference/commands/snapshot) [`source freshness`](/reference/commands/source) [`test`](/reference/commands/test) [`run`](/reference/commands/run) [`run-operation`](/reference/commands/run-operation) - - - - - -**Produced by:** [`build`](commands/build) [`compile`](commands/compile) [`docs generate`](commands/cmd-docs) [`list`](commands/list) [`parse`](commands/parse) [`run`](commands/run) [`run-operation`](commands/run-operation) [`seed`](commands/seed) [`show`](commands/show) [`snapshot`](commands/snapshot) [`source freshness`](commands/source) [`test`](commands/test) - - +| dbt Core version | Manifest version | +|------------------|---------------------------------------------------------------| +| v1.6 | [v10](https://schemas.getdbt.com/dbt/manifest/v10/index.html) | +| v1.5 | [v9](https://schemas.getdbt.com/dbt/manifest/v9/index.html) | +| v1.4 | [v8](https://schemas.getdbt.com/dbt/manifest/v8/index.html) | +| v1.3 | [v7](https://schemas.getdbt.com/dbt/manifest/v7/index.html) | +| v1.2 | [v6](https://schemas.getdbt.com/dbt/manifest/v6/index.html) | +| v1.1 | [v5](https://schemas.getdbt.com/dbt/manifest/v5/index.html) | +| v1.0 | [v4](https://schemas.getdbt.com/dbt/manifest/v4/index.html) | + +**Produced by:** Any command that parses your project. This includes all commands **except** [`deps`](/reference/commands/deps), [`clean`](/reference/commands/clean), [`debug`](/reference/commands/debug), [`init`](/reference/commands/init) This single file contains a full representation of your dbt project's resources (models, tests, macros, etc), including all node configurations and resource properties. Even if you're only running some models or tests, all resources will appear in the manifest (unless they are disabled) with most of their properties. (A few node properties, such as `compiled_sql`, only appear for executed nodes.) diff --git a/website/docs/reference/commands/clone.md b/website/docs/reference/commands/clone.md new file mode 100644 index 00000000000..32c8a89be04 --- /dev/null +++ b/website/docs/reference/commands/clone.md @@ -0,0 +1,39 @@ +--- +title: "About dbt clone command" +sidebar_label: "clone" +id: "clone" +--- + +The `dbt clone` command clones selected nodes from the [specified state](/reference/node-selection/syntax#establishing-state) to the target schema(s). This command makes use of the `clone` materialization: +- If your data platform supports zero-copy cloning of tables, and this model exists as a table in the source environment, dbt will create it in your target environment as a clone +- Otherwise, dbt will create a simple pointer view (`select * from` the source object) +- By default, `dbt clone` will not recreate pre-existing relations in the current target. To override this, use the `--full-refresh` flag. +- You may want to specify a higher number of [threads](/docs/running-a-dbt-project/using-threads) to decrease execution time since individual clone statements are independent of one another. + +The `clone` command is useful for: +- blue/green continuous deployment (on data warehouses that support zero-copy cloning tables) +- cloning current production state into development schema(s) +- handling incremental models in Slim CI dbt Cloud jobs (on data warehouses that support zero-copy cloning tables) +- testing code changes on downstream dependencies in your BI tool + +```bash +# clone all of my models from specified state to my target schema(s) +dbt clone --state path/to/artifacts + +# clone one_specific_model of my models from specified state to my target schema(s) +dbt clone --select one_specific_model --state path/to/artifacts + +# clone all of my models from specified state to my target schema(s) and recreate all pre-existing relations in the current target +dbt clone --state path/to/artifacts --full-refresh + +# clone all of my models from specified state to my target schema(s), running up to 50 clone statements in parallel +dbt clone --state path/to/artifacts --threads 50 +``` + +### When to use `dbt clone` instead of [deferral](/reference/node-selection/defer)? + +Unlike deferral, `dbt clone` requires some compute and creation of additional objects in your data warehouse. In many cases, deferral is a cheaper and simpler alternative to `dbt clone`. However, `dbt clone` covers additional use cases where deferral may not be possible. + +For example, by creating actual data warehouse objects, `dbt clone` allows you to test out your code changes on downstream dependencies _outside of dbt_ (such as a BI tool). + +As another example, you could `clone` your modified incremental models as the first step of your dbt Cloud CI job to prevent costly `full-refresh` builds for warehouses that support zero-copy cloning. diff --git a/website/docs/reference/commands/cmd-docs.md b/website/docs/reference/commands/cmd-docs.md index e96f825d2d9..754c5e93baf 100644 --- a/website/docs/reference/commands/cmd-docs.md +++ b/website/docs/reference/commands/cmd-docs.md @@ -10,22 +10,35 @@ id: "cmd-docs" The command is responsible for generating your project's documentation website by -1. copying the website `index.html` file into the `target/` directory -2. compiling the project to `target/manifest.json` -3. producing the `target/catalog.json` file, which contains metadata about the tables and views produced by the models in your project. +1. Copying the website `index.html` file into the `target/` directory +2. Compiling the resources in your project, so that their `compiled_code` will be included in [`manifest.json`](/reference/artifacts/manifest-json) +3. Running queries against database metadata to produce the [`catalog.json`](/reference/artifacts/catalog-json) file, which contains metadata about the tables and views produced by the models in your project. **Example**: ``` dbt docs generate ``` -Use the `--no-compile` argument to skip re-compilation. When this flag is provided, `dbt docs generate` will only execute steps (1) and (3), as described above. +Use the `--no-compile` argument to skip re-compilation. When this flag is provided, `dbt docs generate` will skip step (2) described above. **Example**: ``` dbt docs generate --no-compile ``` + + +Use the `--empty-catalog` argument to skip running the database queries to populate `catalog.json`. When this flag is provided, `dbt docs generate` will skip step (3) described above. + +This is not recommended for production environments, as it means that your documentation will be missing information gleaned from database metadata (the full set of columns in each table, and statistics about those tables). It can speed up `docs generate` in development, when you just want to visualize lineage and other information defined within your project. + +**Example**: +``` +dbt docs generate --empty-catalog +``` + + + ### dbt docs serve This command starts a webserver on port 8080 to serve your documentation locally and opens the documentation site in your default browser. The webserver is rooted in your `target/` directory. Be sure to run `dbt docs generate` before `dbt docs serve` because the `generate` command produces a [catalog metadata artifact](/reference/artifacts/catalog-json) that the `serve` command depends upon. You will see an error message if the catalog is missing. diff --git a/website/docs/reference/dbt-commands.md b/website/docs/reference/dbt-commands.md index 0ecac561766..5b37f13a3fb 100644 --- a/website/docs/reference/dbt-commands.md +++ b/website/docs/reference/dbt-commands.md @@ -20,6 +20,7 @@ Select the tabs that are relevant to the your development workflow. For example, Use the following dbt commands in the [dbt Cloud IDE](/docs/cloud/dbt-cloud-ide/develop-in-the-cloud) and use the `dbt` prefix. For example, to run the `test` command, type `dbt test`. - [build](/reference/commands/build): build and test all selected resources (models, seeds, snapshots, tests) +- [clone](/reference/commands/clone): clone selected nodes from specified state (requires dbt 1.6 or higher) - [compile](/reference/commands/compile): compiles (but does not run) the models in a project - [deps](/reference/commands/deps): downloads dependencies for a project - [docs](/reference/commands/cmd-docs) : generates documentation for a project @@ -40,6 +41,7 @@ Use the following dbt commands in the [CLI](/docs/core/about-the-cli) and use th - [build](/reference/commands/build): build and test all selected resources (models, seeds, snapshots, tests) - [clean](/reference/commands/clean): deletes artifacts present in the dbt project +- [clone](/reference/commands/clone): clone selected models from specified state (requires dbt 1.6 or higher) - [compile](/reference/commands/compile): compiles (but does not run) the models in a project - [debug](/reference/commands/debug): debugs dbt connections and projects - [deps](/reference/commands/deps): downloads dependencies for a project diff --git a/website/docs/reference/node-selection/defer.md b/website/docs/reference/node-selection/defer.md index a6ef6261cf1..6079e53793a 100644 --- a/website/docs/reference/node-selection/defer.md +++ b/website/docs/reference/node-selection/defer.md @@ -9,11 +9,18 @@ title: "Defer" -Deferral is a powerful, complex feature that enables compelling workflows. As the use cases for `--defer` evolve, dbt Labs might make enhancements to the feature, but commit to providing backward compatibility for supported versions of dbt Core. For details, see [dbt#5095](https://github.com/dbt-labs/dbt-core/discussions/5095). - Defer is a powerful feature that makes it possible to run a subset of models or tests in a [sandbox environment](/docs/environments-in-dbt) without having to first build their upstream parents. This can save time and computational resources when you want to test a small number of models in a large project. Defer requires that a manifest from a previous dbt invocation be passed to the `--state` flag or env var. Together with the `state:` selection method, these features enable "Slim CI". Read more about [state](/reference/node-selection/syntax#about-node-selection). + +An alternative command that accomplishes similar functionality for different use cases is `dbt clone` - see the docs for [clone](/reference/commands/clone#when-to-use-dbt-clone-instead-of-deferral) for more information. + + + +It is possible to use separate state for `state:modified` and `--defer`, by passing paths to different manifests to each of the `--state`/`DBT_STATE` and `--defer-state`/`DBT_DEFER_STATE`. This enables more granular control in cases where you want to compare against logical state from one environment or past point in time, and defer to applied state from a different environment or point in time. If `--defer-state` is not specified, deferral will use the manifest supplied to `--state`. In most cases, you will want to use the same state for both: compare logical changes against production, and also "fail over" to the production environment for unbuilt upstream resources. + + + ### Usage ```shell diff --git a/website/docs/reference/node-selection/syntax.md b/website/docs/reference/node-selection/syntax.md index 87772262514..1a43a32e2bc 100644 --- a/website/docs/reference/node-selection/syntax.md +++ b/website/docs/reference/node-selection/syntax.md @@ -81,7 +81,7 @@ Note that when you're using `--selector`, most other flags (namely `--select` an -## About node selection +## Stateful selection One of the greatest underlying assumptions about dbt is that its operations should be **stateless** and ****. That is, it doesn't matter how many times a model has been run before, or if it has ever been run before. It doesn't matter if you run it once or a thousand times. Given the same raw data, you can expect the same transformed result. A given run of dbt doesn't need to "know" about _any other_ run; it just needs to know about the code in the project and the objects in your database as they exist _right now_. @@ -91,8 +91,9 @@ dbt can leverage artifacts from a prior invocation as long as their file path is - [The `state:` selector](/reference/node-selection/methods#the-state-method), whereby dbt can identify resources that are new or modified by comparing code in the current project against the state manifest. - [Deferring](/reference/node-selection/defer) to another environment, whereby dbt can identify upstream, unselected resources that don't exist in your current environment and instead "defer" their references to the environment provided by the state manifest. +- The [`dbt clone` command](/reference/commands/clone), whereby dbt can clone nodes based on their location in the manifest provided to the `--state` flag. -Together, these two features enable ["slim CI"](/guides/legacy/best-practices#run-only-modified-models-to-test-changes-slim-ci). We expect to add more features in future releases that can leverage artifacts passed to the `--state` flag. +Together, the `state:` selector and deferral enable ["slim CI"](/guides/legacy/best-practices#run-only-modified-models-to-test-changes-slim-ci). We expect to add more features in future releases that can leverage artifacts passed to the `--state` flag. ### Establishing state @@ -105,7 +106,7 @@ State and defer can be set by environment variables as well as CLI flags: - + - `--state` or `DBT_STATE`: file path - `--defer` or `DBT_DEFER`: boolean @@ -118,6 +119,16 @@ In dbt v1.5, we deprecated the original syntax for state (`DBT_ARTIFACT_STATE_PA + + +- `--state` or `DBT_STATE`: file path +- `--defer` or `DBT_DEFER`: boolean +- `--defer-state` or `DBT_DEFER_STATE`: file path to use for deferral only (optional) + +If `--defer-state` is not specified, deferral will use the artifacts supplied by `--state`. This enables more granular control in cases where you want to compare against logical state from one environment or past point in time, and defer to applied state from a different environment or point in time. + + + If both the flag and env var are provided, the flag takes precedence. #### Notes: diff --git a/website/docs/reference/resource-properties/deprecation_date.md b/website/docs/reference/resource-properties/deprecation_date.md index 9fe9e2e1098..830412d2af6 100644 --- a/website/docs/reference/resource-properties/deprecation_date.md +++ b/website/docs/reference/resource-properties/deprecation_date.md @@ -28,5 +28,59 @@ models: ## Definition -The deprecation date of the model in YAML DateTime format. +The deprecation date of the model is formatted as a date, optionally with a timezone offset. Supported RFC 3339 formats include: +- `YYYY-MM-DD hh:mm:ss.sss±hh:mm` +- `YYYY-MM-DD hh:mm:ss.sss` +- `YYYY-MM-DD` +When `deprecation_date` does not include an offset from UTC, then it is interpreted as being in the system time zone of the dbt execution environment. + +## Explanation + +### Purpose + +Declaring a `deprecation_date` for a dbt model provides a mechanism to communicate plans and timelines for long-term support and maintenance and to facilitate change management. + +Setting a `deprecation_date` works well in conjunction with other [model governance](/docs/collaborate/govern/about-model-governance) features like [model versions](/docs/collaborate/govern/model-versions), but can also be used independently from them. + +### Warning messages + +When a project references a model that's slated for deprecation or the deprecation date has passed, a warning is generated. If it's a versioned model, with a newer version available, then the warning says so. This added bit of cross-team communication, from producers to consumers, is an advantage of using dbt's built-in functionality around model versions to facilitate migrations. + +Additionally, [`WARN_ERROR_OPTIONS`](/reference/global-configs/warnings) gives a mechanism whereby users can promote these warnings to actual runtime errors: + +| Warning | Scenario | Affected projects | +|--------------------------------|----------------------------------------------------|------------------------| +| `DeprecatedModel` | Parsing a project that defines a deprecated model | Producer | +| `DeprecatedReference` | Referencing a model with a past deprecation date | Producer and consumers | +| `UpcomingDeprecationReference` | Referencing a model with a future deprecation date | Producer and consumers | + +** Example ** + +Example output for an `UpcomingDeprecationReference` warning: +``` +$ dbt parse +15:48:14 Running with dbt=1.6.0 +15:48:14 Registered adapter: postgres=1.6.0 +15:48:14 [WARNING]: While compiling 'my_model_ref': Found a reference to my_model, which is slated for deprecation on '2038-01-19T03:14:07-00:00'. +``` + +### Selection syntax + +There is not specific [node selection syntax](/reference/node-selection/syntax) for `deprecation_date`. [Programmatic invocations](/reference/programmatic-invocations) is one way to identify deprecated models (potentially in conjunction with [dbt list](/reference/commands/list)). e.g., `dbt -q ls --output json --output-keys database schema alias deprecation_date`. + +### Deprecation process + +Additional steps are necessary to save on build-related compute and storage costs for a deprecated model. + +Deprecated models can continue to be built by producers and be selected by consumers until they are [disabled](/reference/resource-configs/enabled) or removed. + +Just like it does not automatically [drop relations when models are deleted](/faqs/models/removing-deleted-models), dbt does not drop relations for deprecated models. + +Strategies similar to [here](https://discourse.getdbt.com/t/faq-cleaning-up-removed-models-from-your-production-schema/113) or [here](https://discourse.getdbt.com/t/clean-your-warehouse-of-old-and-deprecated-models/1547) can be used to drop relations that have been deprecated and are no longer in use. + +### Table expiration on BigQuery + +dbt-bigquery can set an [`hours_to_expiration`](/reference/resource-configs/bigquery-configs#controlling-table-expiration) that translates to `expiration_timestamp` within BigQuery. + +dbt does not automatically synchronize `deprecation_date` and `hours_to_expiration`, but users may want to coordinate them in some fashion (such as setting a model to expire 48 hours after its `deprecation_date`). Expired tables in BigQuery will be deleted and their storage reclaimed. diff --git a/website/sidebars.js b/website/sidebars.js index e10ebd513c2..1a1b33ab1db 100644 --- a/website/sidebars.js +++ b/website/sidebars.js @@ -275,6 +275,7 @@ const sidebarSettings = { label: "Metrics", link: { type: "doc", id: "docs/build/metrics-overview"}, items: [ + "docs/build/cumulative", "docs/build/derived", "docs/build/ratio", "docs/build/simple", @@ -735,6 +736,7 @@ const sidebarSettings = { items: [ "reference/commands/build", "reference/commands/clean", + "reference/commands/clone", "reference/commands/cmd-docs", "reference/commands/compile", "reference/commands/debug", diff --git a/website/static/img/docs/dbt-cloud/access-control/google-enable.png b/website/static/img/docs/dbt-cloud/access-control/google-enable.png index 9de449b7944..f87858ff751 100644 Binary files a/website/static/img/docs/dbt-cloud/access-control/google-enable.png and b/website/static/img/docs/dbt-cloud/access-control/google-enable.png differ diff --git a/website/static/img/docs/dbt-cloud/cloud-configuring-dbt-cloud/service-token-date.png b/website/static/img/docs/dbt-cloud/cloud-configuring-dbt-cloud/service-token-date.png new file mode 100644 index 00000000000..8f35eba639b Binary files /dev/null and b/website/static/img/docs/dbt-cloud/cloud-configuring-dbt-cloud/service-token-date.png differ