From b590045b9fdc1abb63cdf5b6fd55d39f5d2c9c36 Mon Sep 17 00:00:00 2001 From: Michelle Ark Date: Thu, 26 Sep 2024 16:03:40 +0100 Subject: [PATCH] [state:modified] persist unrendered_config from schema.yml, and more reliably compute unrendered_config from .sql files (#10487) --- .../unreleased/Fixes-20240925-131028.yaml | 7 + core/dbt/artifacts/resources/v1/components.py | 3 + core/dbt/clients/jinja_static.py | 54 +++++++ core/dbt/context/context_config.py | 20 ++- core/dbt/context/providers.py | 10 ++ core/dbt/contracts/files.py | 36 +++++ core/dbt/contracts/project.py | 4 +- core/dbt/parser/base.py | 18 +++ core/dbt/parser/common.py | 20 +++ core/dbt/parser/partial.py | 1 + core/dbt/parser/read_files.py | 3 +- core/dbt/parser/schemas.py | 49 +++--- .../configs/test_configs_in_schema_files.py | 70 ++++++++- tests/functional/defer_state/fixtures.py | 63 ++++++++ .../test_modified_state_environment_vars.py | 108 +++++++++++++ .../defer_state/test_modified_state_jinja.py | 145 ++++++++++++++++++ .../defer_state/test_modified_state_vars.py | 103 +++++++++++++ tests/unit/clients/test_jinja_static.py | 41 +++++ tests/unit/contracts/graph/test_manifest.py | 1 + tests/unit/contracts/graph/test_nodes.py | 4 + .../unit/contracts/graph/test_nodes_parsed.py | 11 ++ tests/unit/parser/test_parser.py | 16 +- 22 files changed, 756 insertions(+), 31 deletions(-) create mode 100644 .changes/unreleased/Fixes-20240925-131028.yaml create mode 100644 tests/functional/defer_state/test_modified_state_environment_vars.py create mode 100644 tests/functional/defer_state/test_modified_state_jinja.py create mode 100644 tests/functional/defer_state/test_modified_state_vars.py diff --git a/.changes/unreleased/Fixes-20240925-131028.yaml b/.changes/unreleased/Fixes-20240925-131028.yaml new file mode 100644 index 00000000000..2e36f340f1c --- /dev/null +++ b/.changes/unreleased/Fixes-20240925-131028.yaml @@ -0,0 +1,7 @@ +kind: Fixes +body: Ignore rendered jinja in configs for state:modified, behind state_modified_compare_more_unrendered_values + behaviour flag +time: 2024-09-25T13:10:28.490042+01:00 +custom: + Author: michelleark + Issue: "9564" diff --git a/core/dbt/artifacts/resources/v1/components.py b/core/dbt/artifacts/resources/v1/components.py index fc6f44a38f0..8eb43f35d8e 100644 --- a/core/dbt/artifacts/resources/v1/components.py +++ b/core/dbt/artifacts/resources/v1/components.py @@ -194,6 +194,7 @@ class ParsedResource(ParsedResourceMandatory): unrendered_config: Dict[str, Any] = field(default_factory=dict) created_at: float = field(default_factory=lambda: time.time()) config_call_dict: Dict[str, Any] = field(default_factory=dict) + unrendered_config_call_dict: Dict[str, Any] = field(default_factory=dict) relation_name: Optional[str] = None raw_code: str = "" @@ -201,6 +202,8 @@ def __post_serialize__(self, dct: Dict, context: Optional[Dict] = None): dct = super().__post_serialize__(dct, context) if context and context.get("artifact") and "config_call_dict" in dct: del dct["config_call_dict"] + if context and context.get("artifact") and "unrendered_config_call_dict" in dct: + del dct["unrendered_config_call_dict"] return dct diff --git a/core/dbt/clients/jinja_static.py b/core/dbt/clients/jinja_static.py index d8746a7607d..3d32b3603c9 100644 --- a/core/dbt/clients/jinja_static.py +++ b/core/dbt/clients/jinja_static.py @@ -191,3 +191,57 @@ def statically_parse_ref_or_source(expression: str) -> Union[RefArgs, List[str]] raise ParsingError(f"Invalid ref or source expression: {expression}") return ref_or_source + + +def statically_parse_unrendered_config(string: str) -> Optional[Dict[str, Any]]: + """ + Given a string with jinja, extract an unrendered config call. + If no config call is present, returns None. + + For example, given: + "{{ config(materialized=env_var('DBT_TEST_STATE_MODIFIED')) }}\nselect 1 as id" + returns: {'materialized': "Keyword(key='materialized', value=Call(node=Name(name='env_var', ctx='load'), args=[Const(value='DBT_TEST_STATE_MODIFIED')], kwargs=[], dyn_args=None, dyn_kwargs=None))"} + + No config call: + "select 1 as id" + returns: None + """ + # Return early to avoid creating jinja environemt if no config call in input string + if "config(" not in string: + return None + + # set 'capture_macros' to capture undefined + env = get_environment(None, capture_macros=True) + + global _TESTING_MACRO_CACHE + if test_caching_enabled() and _TESTING_MACRO_CACHE and string in _TESTING_MACRO_CACHE: + parsed = _TESTING_MACRO_CACHE.get(string, None) + func_calls = getattr(parsed, "_dbt_cached_calls") + else: + parsed = env.parse(string) + func_calls = tuple(parsed.find_all(jinja2.nodes.Call)) + + config_func_calls = list( + filter( + lambda f: hasattr(f, "node") and hasattr(f.node, "name") and f.node.name == "config", + func_calls, + ) + ) + # There should only be one {{ config(...) }} call per input + config_func_call = config_func_calls[0] if config_func_calls else None + + if not config_func_call: + return None + + unrendered_config = {} + for kwarg in config_func_call.kwargs: + unrendered_config[kwarg.key] = construct_static_kwarg_value(kwarg) + + return unrendered_config + + +def construct_static_kwarg_value(kwarg): + # Instead of trying to re-assemble complex kwarg value, simply stringify the value + # This is still useful to be able to detect changes in unrendered configs, even if it is + # not an exact representation of the user input. + return str(kwarg) diff --git a/core/dbt/context/context_config.py b/core/dbt/context/context_config.py index caf83d425fe..a5b38bb415e 100644 --- a/core/dbt/context/context_config.py +++ b/core/dbt/context/context_config.py @@ -6,6 +6,7 @@ from dbt.adapters.factory import get_config_class_by_name from dbt.config import IsFQNResource, Project, RuntimeConfig from dbt.contracts.graph.model_config import get_config_for +from dbt.flags import get_flags from dbt.node_types import NodeType from dbt.utils import fqn_search from dbt_common.contracts.config.base import BaseConfig, merge_config_dicts @@ -286,6 +287,7 @@ def __init__( project_name: str, ) -> None: self._config_call_dict: Dict[str, Any] = {} + self._unrendered_config_call_dict: Dict[str, Any] = {} self._active_project = active_project self._fqn = fqn self._resource_type = resource_type @@ -295,6 +297,10 @@ def add_config_call(self, opts: Dict[str, Any]) -> None: dct = self._config_call_dict merge_config_dicts(dct, opts) + def add_unrendered_config_call(self, opts: Dict[str, Any]) -> None: + # Cannot perform complex merge behaviours on unrendered configs as they may not be appropriate types. + self._unrendered_config_call_dict.update(opts) + def build_config_dict( self, base: bool = False, @@ -305,12 +311,24 @@ def build_config_dict( if rendered: # TODO CT-211 src = ContextConfigGenerator(self._active_project) # type: ignore[var-annotated] + config_call_dict = self._config_call_dict else: # TODO CT-211 src = UnrenderedConfigGenerator(self._active_project) # type: ignore[assignment] + # preserve legacy behaviour - using unreliable (potentially rendered) _config_call_dict + if get_flags().state_modified_compare_more_unrendered_values is False: + config_call_dict = self._config_call_dict + else: + # Prefer _config_call_dict if it is available and _unrendered_config_call_dict is not, + # as _unrendered_config_call_dict is unreliable for non-sql nodes (e.g. no jinja config block rendered for python models, etc) + if self._config_call_dict and not self._unrendered_config_call_dict: + config_call_dict = self._config_call_dict + else: + config_call_dict = self._unrendered_config_call_dict + return src.calculate_node_config_dict( - config_call_dict=self._config_call_dict, + config_call_dict=config_call_dict, fqn=self._fqn, resource_type=self._resource_type, project_name=self._project_name, diff --git a/core/dbt/context/providers.py b/core/dbt/context/providers.py index e90a1f13c0c..dfc8c9bb40b 100644 --- a/core/dbt/context/providers.py +++ b/core/dbt/context/providers.py @@ -35,6 +35,7 @@ UnitTestMacroGenerator, get_rendered, ) +from dbt.clients.jinja_static import statically_parse_unrendered_config from dbt.config import IsFQNResource, Project, RuntimeConfig from dbt.constants import DEFAULT_ENV_PLACEHOLDER from dbt.context.base import Var, contextmember, contextproperty @@ -78,6 +79,7 @@ SecretEnvVarLocationError, TargetNotFoundError, ) +from dbt.flags import get_flags from dbt.materializations.incremental.microbatch import MicrobatchBuilder from dbt.node_types import ModelLanguage, NodeType from dbt.utils import MultiDict, args_to_dict @@ -395,6 +397,14 @@ def __call__(self, *args, **kwargs): # not call it! if self.context_config is None: raise DbtRuntimeError("At parse time, did not receive a context config") + + # Track unrendered opts to build parsed node unrendered_config later on + if get_flags().state_modified_compare_more_unrendered_values: + unrendered_config = statically_parse_unrendered_config(self.model.raw_code) + if unrendered_config: + self.context_config.add_unrendered_config_call(unrendered_config) + + # Use rendered opts to populate context_config self.context_config.add_config_call(opts) return "" diff --git a/core/dbt/contracts/files.py b/core/dbt/contracts/files.py index 793451c0b00..04d7909133a 100644 --- a/core/dbt/contracts/files.py +++ b/core/dbt/contracts/files.py @@ -212,6 +212,7 @@ class SchemaSourceFile(BaseSourceFile): # created too, but those are in 'sources' sop: List[SourceKey] = field(default_factory=list) env_vars: Dict[str, Any] = field(default_factory=dict) + unrendered_configs: Dict[str, Any] = field(default_factory=dict) pp_dict: Optional[Dict[str, Any]] = None pp_test_index: Optional[Dict[str, Any]] = None @@ -317,6 +318,41 @@ def get_all_test_ids(self): test_ids.extend(self.data_tests[key][name]) return test_ids + def add_unrendered_config(self, unrendered_config, yaml_key, name, version=None): + versioned_name = f"{name}_v{version}" if version is not None else name + + if yaml_key not in self.unrendered_configs: + self.unrendered_configs[yaml_key] = {} + + if versioned_name not in self.unrendered_configs[yaml_key]: + self.unrendered_configs[yaml_key][versioned_name] = unrendered_config + + def get_unrendered_config(self, yaml_key, name, version=None) -> Optional[Dict[str, Any]]: + versioned_name = f"{name}_v{version}" if version is not None else name + + if yaml_key not in self.unrendered_configs: + return None + if versioned_name not in self.unrendered_configs[yaml_key]: + return None + + return self.unrendered_configs[yaml_key][versioned_name] + + def delete_from_unrendered_configs(self, yaml_key, name): + # We delete all unrendered_configs for this yaml_key/name because the + # entry has been scheduled for reparsing. + if self.get_unrendered_config(yaml_key, name): + del self.unrendered_configs[yaml_key][name] + # Delete all versioned keys associated with name + version_names_to_delete = [] + for potential_version_name in self.unrendered_configs[yaml_key]: + if potential_version_name.startswith(f"{name}_v"): + version_names_to_delete.append(potential_version_name) + for version_name in version_names_to_delete: + del self.unrendered_configs[yaml_key][version_name] + + if not self.unrendered_configs[yaml_key]: + del self.unrendered_configs[yaml_key] + def add_env_var(self, var, yaml_key, name): if yaml_key not in self.env_vars: self.env_vars[yaml_key] = {} diff --git a/core/dbt/contracts/project.py b/core/dbt/contracts/project.py index 041d310cc5e..e08131ecd8f 100644 --- a/core/dbt/contracts/project.py +++ b/core/dbt/contracts/project.py @@ -337,10 +337,11 @@ class ProjectFlags(ExtensibleDbtClassMixin): warn_error_options: Optional[Dict[str, Union[str, List[str]]]] = None write_json: Optional[bool] = None - # legacy behaviors + # legacy behaviors - https://github.com/dbt-labs/dbt-core/blob/main/docs/guides/behavior-change-flags.md require_explicit_package_overrides_for_builtin_materializations: bool = True require_resource_names_without_spaces: bool = False source_freshness_run_project_hooks: bool = False + state_modified_compare_more_unrendered_values: bool = False @property def project_only_flags(self) -> Dict[str, Any]: @@ -348,6 +349,7 @@ def project_only_flags(self) -> Dict[str, Any]: "require_explicit_package_overrides_for_builtin_materializations": self.require_explicit_package_overrides_for_builtin_materializations, "require_resource_names_without_spaces": self.require_resource_names_without_spaces, "source_freshness_run_project_hooks": self.source_freshness_run_project_hooks, + "state_modified_compare_more_unrendered_values": self.state_modified_compare_more_unrendered_values, } diff --git a/core/dbt/parser/base.py b/core/dbt/parser/base.py index a68f7384c0b..32dbe6dedab 100644 --- a/core/dbt/parser/base.py +++ b/core/dbt/parser/base.py @@ -13,6 +13,7 @@ generate_generate_name_macro_context, generate_parser_model_context, ) +from dbt.contracts.files import SchemaSourceFile from dbt.contracts.graph.manifest import Manifest from dbt.contracts.graph.nodes import BaseNode, ManifestNode from dbt.contracts.graph.unparsed import Docs, UnparsedNode @@ -22,9 +23,12 @@ DictParseError, InvalidAccessTypeError, ) +from dbt.flags import get_flags from dbt.node_types import AccessType, ModelLanguage, NodeType +from dbt.parser.common import resource_types_to_schema_file_keys from dbt.parser.search import FileBlock from dbt_common.dataclass_schema import ValidationError +from dbt_common.utils import deep_merge # internally, the parser may store a less-restrictive type that will be # transformed into the final type. But it will have to be derived from @@ -308,6 +312,7 @@ def update_parsed_node_config( config: ContextConfig, context=None, patch_config_dict=None, + patch_file_id=None, ) -> None: """Given the ContextConfig used for parsing and the parsed node, generate and set the true values to use, overriding the temporary parse @@ -369,6 +374,18 @@ def update_parsed_node_config( if hasattr(parsed_node, "contract"): parsed_node.contract = Contract.from_dict(contract_dct) + if get_flags().state_modified_compare_more_unrendered_values: + # Use the patch_file.unrendered_configs if available to update patch_dict_config, + # as provided patch_config_dict may actuallly already be rendered and thus sensitive to jinja evaluations + if patch_file_id: + patch_file = self.manifest.files.get(patch_file_id, None) + if patch_file and isinstance(patch_file, SchemaSourceFile): + schema_key = resource_types_to_schema_file_keys[parsed_node.resource_type] + if unrendered_patch_config := patch_file.get_unrendered_config( + schema_key, parsed_node.name, getattr(parsed_node, "version", None) + ): + patch_config_dict = deep_merge(patch_config_dict, unrendered_patch_config) + # unrendered_config is used to compare the original database/schema/alias # values and to handle 'same_config' and 'same_contents' calls parsed_node.unrendered_config = config.build_config_dict( @@ -376,6 +393,7 @@ def update_parsed_node_config( ) parsed_node.config_call_dict = config._config_call_dict + parsed_node.unrendered_config_call_dict = config._unrendered_config_call_dict # do this once before we parse the node database/schema/alias, so # parsed_node.config is what it would be if they did nothing diff --git a/core/dbt/parser/common.py b/core/dbt/parser/common.py index d3bb640a78f..5cc4385ea1c 100644 --- a/core/dbt/parser/common.py +++ b/core/dbt/parser/common.py @@ -16,11 +16,31 @@ UnparsedSingularTestUpdate, ) from dbt.exceptions import ParsingError +from dbt.node_types import NodeType from dbt.parser.search import FileBlock from dbt_common.contracts.constraints import ColumnLevelConstraint, ConstraintType from dbt_common.exceptions import DbtInternalError from dbt_semantic_interfaces.type_enums import TimeGranularity +schema_file_keys_to_resource_types = { + "models": NodeType.Model, + "seeds": NodeType.Seed, + "snapshots": NodeType.Snapshot, + "sources": NodeType.Source, + "macros": NodeType.Macro, + "analyses": NodeType.Analysis, + "exposures": NodeType.Exposure, + "metrics": NodeType.Metric, + "semantic_models": NodeType.SemanticModel, + "saved_queries": NodeType.SavedQuery, +} + +resource_types_to_schema_file_keys = { + v: k for (k, v) in schema_file_keys_to_resource_types.items() +} + +schema_file_keys = list(schema_file_keys_to_resource_types.keys()) + def trimmed(inp: str) -> str: if len(inp) < 50: diff --git a/core/dbt/parser/partial.py b/core/dbt/parser/partial.py index db3400dbc58..774edf8ce6d 100644 --- a/core/dbt/parser/partial.py +++ b/core/dbt/parser/partial.py @@ -808,6 +808,7 @@ def merge_patch(self, schema_file, key, patch, new_patch=False): pp_dict[key].append(patch) schema_file.delete_from_env_vars(key, patch["name"]) + schema_file.delete_from_unrendered_configs(key, patch["name"]) self.add_to_pp_files(schema_file) # For model, seed, snapshot, analysis schema dictionary keys, diff --git a/core/dbt/parser/read_files.py b/core/dbt/parser/read_files.py index e5e25841f06..d0ce1a551e3 100644 --- a/core/dbt/parser/read_files.py +++ b/core/dbt/parser/read_files.py @@ -17,7 +17,8 @@ ) from dbt.events.types import InputFileDiffError from dbt.exceptions import ParsingError -from dbt.parser.schemas import schema_file_keys, yaml_from_file +from dbt.parser.common import schema_file_keys +from dbt.parser.schemas import yaml_from_file from dbt.parser.search import filesystem_search from dbt_common.clients.system import load_file_contents from dbt_common.dataclass_schema import dbtClassMixin diff --git a/core/dbt/parser/schemas.py b/core/dbt/parser/schemas.py index 81c0744f852..6fa73cc3ee9 100644 --- a/core/dbt/parser/schemas.py +++ b/core/dbt/parser/schemas.py @@ -58,6 +58,7 @@ TestBlock, VersionedTestBlock, YamlBlock, + schema_file_keys_to_resource_types, trimmed, ) from dbt.parser.schema_generic_tests import SchemaGenericTestParser @@ -70,22 +71,6 @@ from dbt_common.exceptions import DbtValidationError from dbt_common.utils import deep_merge -schema_file_keys_to_resource_types = { - "models": NodeType.Model, - "seeds": NodeType.Seed, - "snapshots": NodeType.Snapshot, - "sources": NodeType.Source, - "macros": NodeType.Macro, - "analyses": NodeType.Analysis, - "exposures": NodeType.Exposure, - "metrics": NodeType.Metric, - "semantic_models": NodeType.SemanticModel, - "saved_queries": NodeType.SavedQuery, -} - -schema_file_keys = list(schema_file_keys_to_resource_types.keys()) - - # =============================================================================== # Schema Parser classes # @@ -400,13 +385,33 @@ def get_key_dicts(self) -> Iterable[Dict[str, Any]]: if "name" not in entry and "model" not in entry: raise ParsingError("Entry did not contain a name") + unrendered_config = {} + if "config" in entry: + unrendered_config = entry["config"] + + unrendered_version_configs = {} + if "versions" in entry: + for version in entry["versions"]: + if "v" in version: + unrendered_version_configs[version["v"]] = version.get("config", {}) + # Render the data (except for tests, data_tests and descriptions). # See the SchemaYamlRenderer entry = self.render_entry(entry) + + schema_file = self.yaml.file + assert isinstance(schema_file, SchemaSourceFile) + + if unrendered_config: + schema_file.add_unrendered_config(unrendered_config, self.key, entry["name"]) + + for version, unrendered_version_config in unrendered_version_configs.items(): + schema_file.add_unrendered_config( + unrendered_version_config, self.key, entry["name"], version + ) + if self.schema_yaml_vars.env_vars: self.schema_parser.manifest.env_vars.update(self.schema_yaml_vars.env_vars) - schema_file = self.yaml.file - assert isinstance(schema_file, SchemaSourceFile) for var in self.schema_yaml_vars.env_vars.keys(): schema_file.add_env_var(var, self.key, entry["name"]) self.schema_yaml_vars.env_vars = {} @@ -663,7 +668,13 @@ def patch_node_config(self, node, patch) -> None: ) # We need to re-apply the config_call_dict after the patch config config._config_call_dict = node.config_call_dict - self.schema_parser.update_parsed_node_config(node, config, patch_config_dict=patch.config) + config._unrendered_config_call_dict = node.unrendered_config_call_dict + self.schema_parser.update_parsed_node_config( + node, + config, + patch_config_dict=patch.config, + patch_file_id=patch.file_id, + ) # Subclasses of NodePatchParser: TestablePatchParser, ModelPatchParser, AnalysisPatchParser, diff --git a/tests/functional/configs/test_configs_in_schema_files.py b/tests/functional/configs/test_configs_in_schema_files.py index e0b85686895..72d41722a53 100644 --- a/tests/functional/configs/test_configs_in_schema_files.py +++ b/tests/functional/configs/test_configs_in_schema_files.py @@ -110,6 +110,15 @@ class TestSchemaFileConfigs: + @pytest.fixture(scope="class") + def expected_unrendered_config(self): + # my_attr is unrendered when state_modified_compare_more_unrendered_values: True + return { + "materialized": "view", + "meta": {"my_attr": "{{ var('my_var') }}", "owner": "Julie Smith"}, + "tags": ["tag_1_in_model", "tag_2_in_model"], + } + @pytest.fixture(scope="class") def models(self): return { @@ -125,6 +134,9 @@ def seeds(self): @pytest.fixture(scope="class") def project_config_update(self): return { + "flags": { + "state_modified_compare_more_unrendered_values": True, + }, "models": { "+meta": { "company": "NuMade", @@ -160,6 +172,7 @@ def project_config_update(self): def test_config_layering( self, project, + expected_unrendered_config, ): # run seed @@ -219,11 +232,7 @@ def test_config_layering( model_node = manifest.nodes[model_id] assert model_node.config.materialized == "view" - model_unrendered_config = { - "materialized": "view", - "meta": {"my_attr": "TESTING", "owner": "Julie Smith"}, - "tags": ["tag_1_in_model", "tag_2_in_model"], - } + model_unrendered_config = expected_unrendered_config assert model_node.unrendered_config == model_unrendered_config # look for test meta @@ -251,6 +260,57 @@ def test_config_layering( run_dbt(["run"]) +class TestLegacySchemaFileConfigs(TestSchemaFileConfigs): + @pytest.fixture(scope="class") + def expected_unrendered_config(self): + # my_attr is rendered ("TESTING") when state_modified_compare_more_unrendered_values: False + return { + "materialized": "view", + "meta": {"my_attr": "TESTING", "owner": "Julie Smith"}, + "tags": ["tag_1_in_model", "tag_2_in_model"], + } + + @pytest.fixture(scope="class") + def project_config_update(self): + return { + # The uncommented below lines can be removed once the default behaviour is flipped. + # state_modified_compare_more_unrendered_values defaults to false currently + # "flags": { + # "state_modified_compare_more_unrendered_values": False, + # }, + "models": { + "+meta": { + "company": "NuMade", + }, + "test": { + "+meta": { + "project": "test", + }, + "tagged": { + "+meta": { + "team": "Core Team", + }, + "tags": ["tag_in_project"], + "model": { + "materialized": "table", + "+meta": { + "owner": "Julie Dent", + }, + }, + }, + }, + }, + "vars": { + "test": { + "my_var": "TESTING", + } + }, + "seeds": { + "quote_columns": False, + }, + } + + list_schema_yml = """ - name: my_name - name: alt_name diff --git a/tests/functional/defer_state/fixtures.py b/tests/functional/defer_state/fixtures.py index 832bf258f56..4b409ea4a78 100644 --- a/tests/functional/defer_state/fixtures.py +++ b/tests/functional/defer_state/fixtures.py @@ -540,3 +540,66 @@ metricflow_time_spine_sql = """ SELECT to_date('02/20/2023', 'mm/dd/yyyy') as date_day """ + + +model_with_env_var_in_config_sql = """ +{{ config(materialized=env_var('DBT_TEST_STATE_MODIFIED')) }} +select 1 as id +""" + +model_with_no_in_config_sql = """ +select 1 as id +""" + + +schema_model_with_env_var_in_config_yml = """ +models: + - name: model + config: + materialized: "{{ env_var('DBT_TEST_STATE_MODIFIED') }}" + +""" + +schema_source_with_env_var_as_property_yml = """ +sources: + - name: jaffle_shop + database: "{{ env_var('DBT_TEST_STATE_MODIFIED') }}" + tables: + - name: customers +""" + + +model_with_var_in_config_sql = """ +{{ config(materialized=var('DBT_TEST_STATE_MODIFIED')) }} +select 1 as id +""" + +model_with_jinja_in_config_sql = """ +{{ config( + materialized = ('table' if execute else 'view') +) }} + +select 1 as id +""" + +model_with_updated_jinja_in_config_sql = """ +{{ config( + materialized = ('view' if execute else 'table') +) }} + +select 1 as id +""" + +schema_model_with_jinja_in_config_yml = """ +models: + - name: model + config: + materialized: "{{ ('table' if execute else 'view') }}" +""" + +schema_model_with_updated_jinja_in_config_yml = """ +models: + - name: model + config: + materialized: "{{ ('view' if execute else 'table') }}" +""" diff --git a/tests/functional/defer_state/test_modified_state_environment_vars.py b/tests/functional/defer_state/test_modified_state_environment_vars.py new file mode 100644 index 00000000000..569a00e5b10 --- /dev/null +++ b/tests/functional/defer_state/test_modified_state_environment_vars.py @@ -0,0 +1,108 @@ +import os + +import pytest + +from dbt.tests.util import run_dbt +from tests.functional.defer_state.fixtures import ( + model_with_env_var_in_config_sql, + model_with_no_in_config_sql, + schema_model_with_env_var_in_config_yml, +) +from tests.functional.defer_state.test_modified_state import BaseModifiedState + + +class BaseTestStateSelectionEnvVarConfig(BaseModifiedState): + @pytest.fixture(scope="class", autouse=True) + def setup(self): + os.environ["DBT_TEST_STATE_MODIFIED"] = "table" + yield + del os.environ["DBT_TEST_STATE_MODIFIED"] + + @pytest.fixture(scope="class") + def project_config_update(self): + return { + "flags": { + "state_modified_compare_more_unrendered_values": True, + } + } + + def test_change_env_var(self, project): + # Generate ./state without changing environment variable value + run_dbt(["run"]) + self.copy_state() + + # Assert no false positive + results = run_dbt(["list", "-s", "state:modified", "--state", "./state"]) + assert len(results) == 0 + + # Change environment variable and assert no false positive + # Environment variables do not have an effect on state:modified + os.environ["DBT_TEST_STATE_MODIFIED"] = "view" + results = run_dbt(["list", "-s", "state:modified", "--state", "./state"]) + assert len(results) == 0 + + +class TestModelNodeWithEnvVarConfigInSqlFile(BaseTestStateSelectionEnvVarConfig): + @pytest.fixture(scope="class") + def models(self): + return { + "model.sql": model_with_env_var_in_config_sql, + } + + +class TestModelNodeWithEnvVarConfigInSchemaYml(BaseTestStateSelectionEnvVarConfig): + @pytest.fixture(scope="class") + def models(self): + return { + "model.sql": model_with_no_in_config_sql, + "schema.yml": schema_model_with_env_var_in_config_yml, + } + + +class TestModelNodeWithEnvVarConfigInProjectYml(BaseTestStateSelectionEnvVarConfig): + @pytest.fixture(scope="class") + def models(self): + return { + "model.sql": model_with_no_in_config_sql, + } + + @pytest.fixture(scope="class") + def project_config_update(self): + return { + "models": { + "test": { + "+materialized": "{{ env_var('DBT_TEST_STATE_MODIFIED') }}", + } + } + } + + +class TestModelNodeWithEnvVarConfigInProjectYmlAndSchemaYml(BaseTestStateSelectionEnvVarConfig): + @pytest.fixture(scope="class") + def models(self): + return { + "model.sql": model_with_no_in_config_sql, + "schema.yml": schema_model_with_env_var_in_config_yml, + } + + @pytest.fixture(scope="class") + def project_config_update(self): + return { + "flags": { + "state_modified_compare_more_unrendered_values": True, + }, + "models": { + "test": { + "+materialized": "{{ env_var('DBT_TEST_STATE_MODIFIED') }}", + } + }, + } + + +class TestModelNodeWithEnvVarConfigInSqlAndSchemaYml(BaseTestStateSelectionEnvVarConfig): + @pytest.fixture(scope="class") + def models(self): + return { + "model.sql": model_with_env_var_in_config_sql, + "schema.yml": schema_model_with_env_var_in_config_yml, + } diff --git a/tests/functional/defer_state/test_modified_state_jinja.py b/tests/functional/defer_state/test_modified_state_jinja.py new file mode 100644 index 00000000000..b0c96c42ee1 --- /dev/null +++ b/tests/functional/defer_state/test_modified_state_jinja.py @@ -0,0 +1,145 @@ +import pytest + +from dbt.tests.util import ( + get_artifact, + get_project_config, + run_dbt, + update_config_file, + write_file, +) +from tests.functional.defer_state.fixtures import ( + model_with_jinja_in_config_sql, + model_with_no_in_config_sql, + model_with_updated_jinja_in_config_sql, + schema_model_with_jinja_in_config_yml, + schema_model_with_updated_jinja_in_config_yml, +) +from tests.functional.defer_state.test_modified_state import BaseModifiedState + + +class BaseTestStateSelectionJinjaInConfig(BaseModifiedState): + @pytest.fixture(scope="class") + def project_config_update(self): + return { + "flags": { + "state_modified_compare_more_unrendered_values": True, + } + } + + def update_jinja_expression_in_config(self, project): + pass + + def test_change_target_jinja_if(self, project, dbt_profile_data, profiles_root): + # Generate ./state without changing target.name + run_dbt(["run"]) + self.copy_state() + # Model is table when execute = True + manifest_json = get_artifact(project.project_root, "target", "manifest.json") + assert manifest_json["nodes"]["model.test.model"]["config"]["materialized"] == "view" + + # Assert no false positive (execute = False) + results = run_dbt(["list", "-s", "state:modified", "--state", "./state"]) + assert len(results) == 0 + + # Update unrendered config (change jinja expression) + self.update_jinja_expression_in_config(project) + # Assert no false negatives (jinja expression has changed) + results = run_dbt(["list", "-s", "state:modified", "--state", "./state"]) + assert len(results) == 1 + + +class TestModelNodeWithJinjaConfigInSqlFile(BaseTestStateSelectionJinjaInConfig): + def update_jinja_expression_in_config(self, project): + write_file( + model_with_updated_jinja_in_config_sql, project.project_root, "models", "model.sql" + ) + + @pytest.fixture(scope="class") + def models(self): + return { + "model.sql": model_with_jinja_in_config_sql, + } + + +class TestModelNodeWithEnvVarConfigInSchemaYml(BaseTestStateSelectionJinjaInConfig): + def update_jinja_expression_in_config(self, project): + write_file( + schema_model_with_updated_jinja_in_config_yml, + project.project_root, + "models", + "schema.yml", + ) + + @pytest.fixture(scope="class") + def models(self): + return { + "model.sql": model_with_no_in_config_sql, + "schema.yml": schema_model_with_jinja_in_config_yml, + } + + +class TestModelNodeWithJinjaConfigInProjectYml(BaseTestStateSelectionJinjaInConfig): + def update_jinja_expression_in_config(self, project): + config = get_project_config(project) + config["models"]["test"]["+materialized"] = "{{ ('view' if execute else 'table') }}" + update_config_file(config, "dbt_project.yml") + + @pytest.fixture(scope="class") + def models(self): + return { + "model.sql": model_with_no_in_config_sql, + } + + @pytest.fixture(scope="class") + def project_config_update(self): + return { + "models": { + "test": { + "+materialized": "{{ ('table' if execute else 'view') }}", + } + } + } + + +class TestModelNodeWithJinjaConfigInProjectYmlAndSchemaYml(BaseTestStateSelectionJinjaInConfig): + def update_jinja_expression_in_config(self, project): + write_file( + schema_model_with_updated_jinja_in_config_yml, + project.project_root, + "models", + "schema.yml", + ) + + @pytest.fixture(scope="class") + def models(self): + return { + "model.sql": model_with_no_in_config_sql, + "schema.yml": schema_model_with_jinja_in_config_yml, + } + + @pytest.fixture(scope="class") + def project_config_update(self): + return { + "flags": { + "state_modified_compare_more_unrendered_values": True, + }, + "models": { + "test": { + "+materialized": "{{ ('view' if execute else 'table') }}", + } + }, + } + + +class TestModelNodeWithJinjaConfigInSqlAndSchemaYml(BaseTestStateSelectionJinjaInConfig): + def update_jinja_expression_in_config(self, project): + write_file( + model_with_updated_jinja_in_config_sql, project.project_root, "models", "model.sql" + ) + + @pytest.fixture(scope="class") + def models(self): + return { + "model.sql": model_with_jinja_in_config_sql, + "schema.yml": schema_model_with_jinja_in_config_yml, + } diff --git a/tests/functional/defer_state/test_modified_state_vars.py b/tests/functional/defer_state/test_modified_state_vars.py new file mode 100644 index 00000000000..01068f0130f --- /dev/null +++ b/tests/functional/defer_state/test_modified_state_vars.py @@ -0,0 +1,103 @@ +import pytest + +from dbt.tests.util import run_dbt +from tests.functional.defer_state.fixtures import model_with_var_in_config_sql +from tests.functional.defer_state.test_modified_state import BaseModifiedState + + +class TestStateSelectionVarConfigLegacy(BaseModifiedState): + @pytest.fixture(scope="class") + def models(self): + return { + "model.sql": model_with_var_in_config_sql, + } + + @pytest.fixture(scope="class") + def project_config_update(self): + return { + "flags": { + "state_modified_compare_more_unrendered_values": False, + } + } + + def test_change_var(self, project): + # Generate ./state without changing variable value + run_dbt(["run", "--vars", "DBT_TEST_STATE_MODIFIED: view"]) + self.copy_state() + + # Assert no false positive + results = run_dbt( + [ + "list", + "-s", + "state:modified", + "--state", + "./state", + "--vars", + "DBT_TEST_STATE_MODIFIED: view", + ] + ) + assert len(results) == 0 + + # Change var and assert no false negative - legacy behaviour + results = run_dbt( + [ + "list", + "-s", + "state:modified", + "--state", + "./state", + "--vars", + "DBT_TEST_STATE_MODIFIED: table", + ] + ) + assert len(results) == 1 + + +class TestStateSelectionVarConfig(BaseModifiedState): + @pytest.fixture(scope="class") + def models(self): + return { + "model.sql": model_with_var_in_config_sql, + } + + @pytest.fixture(scope="class") + def project_config_update(self): + return { + "flags": { + "state_modified_compare_more_unrendered_values": True, + } + } + + def test_change_var(self, project): + # Generate ./state without changing variable value + run_dbt(["run", "--vars", "DBT_TEST_STATE_MODIFIED: view"]) + self.copy_state() + + # Assert no false positive + results = run_dbt( + [ + "list", + "-s", + "state:modified", + "--state", + "./state", + "--vars", + "DBT_TEST_STATE_MODIFIED: view", + ] + ) + assert len(results) == 0 + + # Change var and assert no sensitivity to var changes -- new behaviour until state:modified.vars included in state:modified by default + results = run_dbt( + [ + "list", + "-s", + "state:modified", + "--state", + "./state", + "--vars", + "DBT_TEST_STATE_MODIFIED: table", + ] + ) + assert len(results) == 0 diff --git a/tests/unit/clients/test_jinja_static.py b/tests/unit/clients/test_jinja_static.py index 171976a6b50..42264f24331 100644 --- a/tests/unit/clients/test_jinja_static.py +++ b/tests/unit/clients/test_jinja_static.py @@ -4,6 +4,7 @@ from dbt.clients.jinja_static import ( statically_extract_macro_calls, statically_parse_ref_or_source, + statically_parse_unrendered_config, ) from dbt.context.base import generate_base_context from dbt.exceptions import ParsingError @@ -77,3 +78,43 @@ def test_invalid_expression(self): def test_valid_ref_expression(self, expression, expected_ref_or_source): ref_or_source = statically_parse_ref_or_source(expression) assert ref_or_source == expected_ref_or_source + + +class TestStaticallyParseUnrenderedConfig: + @pytest.mark.parametrize( + "expression,expected_unrendered_config", + [ + ( + "{{ config(materialized='view') }}", + {"materialized": "Keyword(key='materialized', value=Const(value='view'))"}, + ), + ( + "{{ config(materialized='view', enabled=True) }}", + { + "materialized": "Keyword(key='materialized', value=Const(value='view'))", + "enabled": "Keyword(key='enabled', value=Const(value=True))", + }, + ), + ( + "{{ config(materialized=env_var('test')) }}", + { + "materialized": "Keyword(key='materialized', value=Call(node=Name(name='env_var', ctx='load'), args=[Const(value='test')], kwargs=[], dyn_args=None, dyn_kwargs=None))" + }, + ), + ( + "{{ config(materialized=env_var('test', default='default')) }}", + { + "materialized": "Keyword(key='materialized', value=Call(node=Name(name='env_var', ctx='load'), args=[Const(value='test')], kwargs=[Keyword(key='default', value=Const(value='default'))], dyn_args=None, dyn_kwargs=None))" + }, + ), + ( + "{{ config(materialized=env_var('test', default=env_var('default'))) }}", + { + "materialized": "Keyword(key='materialized', value=Call(node=Name(name='env_var', ctx='load'), args=[Const(value='test')], kwargs=[Keyword(key='default', value=Call(node=Name(name='env_var', ctx='load'), args=[Const(value='default')], kwargs=[], dyn_args=None, dyn_kwargs=None))], dyn_args=None, dyn_kwargs=None))" + }, + ), + ], + ) + def test_statically_parse_unrendered_config(self, expression, expected_unrendered_config): + unrendered_config = statically_parse_unrendered_config(expression) + assert unrendered_config == expected_unrendered_config diff --git a/tests/unit/contracts/graph/test_manifest.py b/tests/unit/contracts/graph/test_manifest.py index 38b8c1de250..36f14537b09 100644 --- a/tests/unit/contracts/graph/test_manifest.py +++ b/tests/unit/contracts/graph/test_manifest.py @@ -84,6 +84,7 @@ "docs", "checksum", "unrendered_config", + "unrendered_config_call_dict", "created_at", "config_call_dict", "relation_name", diff --git a/tests/unit/contracts/graph/test_nodes.py b/tests/unit/contracts/graph/test_nodes.py index a67ca1f5efc..4e3a2353105 100644 --- a/tests/unit/contracts/graph/test_nodes.py +++ b/tests/unit/contracts/graph/test_nodes.py @@ -139,6 +139,7 @@ def basic_uncompiled_dict(): "checksum": "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", }, "unrendered_config": {}, + "unrendered_config_call_dict": {}, "config_call_dict": {}, } @@ -198,6 +199,7 @@ def basic_compiled_dict(): "checksum": "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", }, "unrendered_config": {}, + "unrendered_config_call_dict": {}, "config_call_dict": {}, "access": "protected", "constraints": [], @@ -459,6 +461,7 @@ def basic_uncompiled_schema_test_dict(): "checksum": "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", }, "unrendered_config": {}, + "unrendered_config_call_dict": {}, "config_call_dict": {}, } @@ -516,6 +519,7 @@ def basic_compiled_schema_test_dict(): "unrendered_config": { "severity": "warn", }, + "unrendered_config_call_dict": {}, "config_call_dict": {}, } diff --git a/tests/unit/contracts/graph/test_nodes_parsed.py b/tests/unit/contracts/graph/test_nodes_parsed.py index 7acd4c1f02a..eeec787dd54 100644 --- a/tests/unit/contracts/graph/test_nodes_parsed.py +++ b/tests/unit/contracts/graph/test_nodes_parsed.py @@ -199,6 +199,7 @@ def base_parsed_model_dict(): "checksum": "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", }, "unrendered_config": {}, + "unrendered_config_call_dict": {}, "config_call_dict": {}, "access": AccessType.Protected.value, "constraints": [], @@ -321,6 +322,7 @@ def complex_parsed_model_dict(): "materialized": "ephemeral", "post_hook": ['insert into blah(a, b) select "1", 1'], }, + "unrendered_config_call_dict": {}, "config_call_dict": {}, "access": AccessType.Protected.value, "constraints": [], @@ -530,6 +532,7 @@ def basic_parsed_seed_dict(): "meta": {}, "checksum": {"name": "path", "checksum": "seeds/seed.csv"}, "unrendered_config": {}, + "unrendered_config_call_dict": {}, "config_call_dict": {}, } @@ -635,6 +638,7 @@ def complex_parsed_seed_dict(): "unrendered_config": { "persist_docs": {"relation": True, "columns": True}, }, + "unrendered_config_call_dict": {}, "config_call_dict": {}, } @@ -834,6 +838,7 @@ def base_parsed_hook_dict(): "checksum": "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", }, "unrendered_config": {}, + "unrendered_config_call_dict": {}, "config_call_dict": {}, } @@ -927,6 +932,7 @@ def complex_parsed_hook_dict(): "column_types": {"a": "text"}, "materialized": "table", }, + "unrendered_config_call_dict": {}, "config_call_dict": {}, } @@ -1020,6 +1026,7 @@ def minimal_parsed_schema_test_dict(): "name": "sha256", "checksum": "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", }, + "unrendered_config_call_dict": {}, "config_call_dict": {}, } @@ -1070,6 +1077,7 @@ def basic_parsed_schema_test_dict(): "checksum": "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", }, "unrendered_config": {}, + "unrendered_config_call_dict": {}, "config_call_dict": {}, } @@ -1158,6 +1166,7 @@ def complex_parsed_schema_test_dict(): "checksum": "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", }, "unrendered_config": {"materialized": "table", "severity": "WARN"}, + "unrendered_config_call_dict": {}, "config_call_dict": {}, } @@ -1552,6 +1561,7 @@ def basic_timestamp_snapshot_dict(): "target_database": "some_snapshot_db", "target_schema": "some_snapshot_schema", }, + "unrendered_config_call_dict": {}, "config_call_dict": {}, } @@ -1656,6 +1666,7 @@ def basic_check_snapshot_dict(): "strategy": "check", "check_cols": "all", }, + "unrendered_config_call_dict": {}, "config_call_dict": {}, } diff --git a/tests/unit/parser/test_parser.py b/tests/unit/parser/test_parser.py index 20a2c9e8c83..b9b414ebac2 100644 --- a/tests/unit/parser/test_parser.py +++ b/tests/unit/parser/test_parser.py @@ -6,8 +6,6 @@ import yaml -import dbt.flags -import dbt.parser from dbt import tracking from dbt.artifacts.resources import ModelConfig, RefArgs from dbt.context.context_config import ContextConfig @@ -60,7 +58,9 @@ normalize, ) -set_from_args(Namespace(WARN_ERROR=False), None) +set_from_args( + Namespace(warn_error=False, state_modified_compare_more_unrendered_values=False), None +) def get_abs_os_path(unix_path): @@ -94,7 +94,10 @@ def _generate_macros(self): yield pm def setUp(self): - dbt.flags.WARN_ERROR = True + set_from_args( + Namespace(warn_error=True, state_modified_compare_more_unrendered_values=False), + None, + ) # HACK: this is needed since tracking events can # be sent when using the model parser tracking.do_not_track() @@ -1471,6 +1474,7 @@ def test_single_block(self): "unique_key": "id", "updated_at": "last_update", }, + unrendered_config_call_dict={}, ) assertEqualNodes(expected, node) file_id = "snowplow://" + normalize("snapshots/nested/snap_1.sql") @@ -1541,6 +1545,8 @@ def test_multi_block(self): "unique_key": "id", "updated_at": "last_update", }, + # Empty until state_modified_compare_more_unrendered_values=True + unrendered_config_call_dict={}, ) expect_bar = SnapshotNode( alias="bar", @@ -1578,6 +1584,8 @@ def test_multi_block(self): "unique_key": "id", "updated_at": "last_update", }, + # Empty until state_modified_compare_more_unrendered_values=True + unrendered_config_call_dict={}, ) assertEqualNodes(nodes[0], expect_bar) assertEqualNodes(nodes[1], expect_foo)