Skip to content

Commit

Permalink
Merge pull request #206 from nicholasyager/feature/better_split_projects
Browse files Browse the repository at this point in the history
Feature: Use dbt-core starter project as baseline for split projects
  • Loading branch information
nicholasyager authored May 5, 2024
2 parents 8bc23ab + 8385cc0 commit 31e1430
Show file tree
Hide file tree
Showing 6 changed files with 134 additions and 79 deletions.
11 changes: 10 additions & 1 deletion dbt_meshify/change.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import os
from enum import Enum
from pathlib import Path
from typing import Dict, Iterable, List, Optional, Protocol
from typing import Callable, Dict, Iterable, List, Optional, Protocol


class Operation(str, Enum):
Expand Down Expand Up @@ -46,6 +46,7 @@ class EntityType(str, Enum):
SemanticModel = "semantic_model"
Project = "project"
Code = "code"
Directory = "directory"

def pluralize(self) -> str:
if self is self.Analysis:
Expand Down Expand Up @@ -97,6 +98,14 @@ def __str__(self):
)


@dataclasses.dataclass
class DirectoryChange(BaseChange):
"""A DirectoryChange represents a unit of work that should be performed on a Directory in a dbt project."""

source: Optional[Path] = None
ignore_function: Optional[Callable] = None


@dataclasses.dataclass
class FileChange(BaseChange):
"""A FileChange represents a unit of work that should be performed on a File in a dbt project."""
Expand Down
13 changes: 9 additions & 4 deletions dbt_meshify/change_set_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,14 @@
from loguru import logger

from dbt_meshify.change import Change, ChangeSet, EntityType
from dbt_meshify.storage.file_content_editors import RawFileEditor, ResourceFileEditor
from dbt_meshify.storage.file_content_editors import (
DirectoryEditor,
RawFileEditor,
ResourceFileEditor,
)

# Enumeration of valid File Editors
FILE_EDITORS = {EntityType.Code: RawFileEditor, EntityType.Directory: DirectoryEditor}


class ChangeSetProcessorException(BaseException):
Expand All @@ -24,9 +31,7 @@ def __init__(self, dry_run: bool = False) -> None:

def write(self, change: Change) -> None:
"""Commit a Change to the file system."""
file_editor = (
RawFileEditor() if change.entity_type == EntityType.Code else ResourceFileEditor()
)
file_editor = FILE_EDITORS.get(change.entity_type, ResourceFileEditor)()

file_editor.__getattribute__(change.operation)(change)

Expand Down
53 changes: 49 additions & 4 deletions dbt_meshify/storage/dbt_project_editors.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import shutil
from pathlib import Path
from tracemalloc import start
from typing import Dict, Optional, Set

from dbt.contracts.graph.nodes import (
Expand All @@ -12,9 +14,11 @@
)
from dbt.node_types import AccessType
from loguru import logger
from regex import D

from dbt_meshify.change import (
ChangeSet,
DirectoryChange,
EntityType,
FileChange,
Operation,
Expand All @@ -30,6 +34,20 @@
from dbt_meshify.utilities.references import ReferenceUpdater


def get_starter_project_path() -> Path:
"""Obtain the path of a dbt starter project on the local filesystem."""

from importlib import resources

import dbt.include.starter_project

starter_path = Path(str(resources.files(dbt.include.starter_project)))
assert starter_path is not None
assert (starter_path / "dbt_project.yml").exists()

return starter_path


class DbtSubprojectCreator:
"""
Takes a `DbtSubProject` and creates the directory structure and files for it.
Expand Down Expand Up @@ -85,14 +103,29 @@ def _get_subproject_boundary_models(self) -> Set[str]:
)
return boundary_models

def create_starter_project(self) -> DirectoryChange:
"""Create a new starter project using the default stored in dbt-core"""

return DirectoryChange(
operation=Operation.Copy,
entity_type=EntityType.Directory,
identifier=str(self.subproject.path),
path=self.subproject.path,
source=get_starter_project_path(),
ignore_function=shutil.ignore_patterns("__init__.py", "__pycache__", "**/*.pyc"),
)

def write_project_file(self) -> FileChange:
"""
Writes the dbt_project.yml file for the subproject in the specified subdirectory
"""

# Read a starter `dbt_project.yml` file as a baseline
starter_path: Path = get_starter_project_path() / "dbt_project.yml"
starter_dbt_project = YAMLFileManager.read_file(starter_path)

contents = self.subproject.project.to_dict()
# was getting a weird serialization error from ruamel on this value
# it's been deprecated, so no reason to keep it
contents.pop("version")

# this one appears in the project yml, but i don't think it should be written
contents.pop("query-comment")
contents = filter_empty_dict_items(contents)
Expand All @@ -105,12 +138,22 @@ def write_project_file(self) -> FileChange:
if max([len(version) for version in contents["require-dbt-version"]]) == 1:
contents["require-dbt-version"] = "".join(contents["require-dbt-version"])

for key, value in contents.items():
if value is None:
continue

if isinstance(value, (list, dict, tuple)):
if len(value) == 0:
continue

starter_dbt_project[key] = value

return FileChange(
operation=Operation.Add,
entity_type=EntityType.Code,
identifier="dbt_project.yml",
path=self.subproject.path / Path("dbt_project.yml"),
data=yaml.dump(contents),
data=yaml.dump(starter_dbt_project),
)

def copy_packages_yml_file(self) -> FileChange:
Expand Down Expand Up @@ -143,6 +186,8 @@ def initialize(self) -> ChangeSet:
f"Identifying operations required to split {subproject.name} from {subproject.parent_project.name}."
)

change_set.add(self.create_starter_project())

for unique_id in (
subproject.resources
| subproject.custom_macros
Expand Down
20 changes: 18 additions & 2 deletions dbt_meshify/storage/file_content_editors.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,13 @@

from loguru import logger

from dbt_meshify.change import EntityType, FileChange, ResourceChange
from dbt_meshify.change import DirectoryChange, EntityType, FileChange, ResourceChange
from dbt_meshify.exceptions import FileEditorException
from dbt_meshify.storage.file_manager import RawFileManager, YAMLFileManager
from dbt_meshify.storage.file_manager import (
DirectoryManager,
RawFileManager,
YAMLFileManager,
)


class NamedList(dict):
Expand Down Expand Up @@ -89,6 +93,18 @@ def safe_update(original: Dict[Any, Any], update: Dict[Any, Any]) -> Dict[Any, A
return original


class DirectoryEditor:
"""A helper class used to perform filesystem operations on Directories"""

@staticmethod
def copy(change: DirectoryChange):
"""Copy a file from one location to another."""
if change.source is None:
raise FileEditorException("None source value provided in Copy operation.")

DirectoryManager.copy_directory(change.source, change.path, change.ignore_function)


class RawFileEditor:
"""A class used to perform Raw operations on Files"""

Expand Down
18 changes: 17 additions & 1 deletion dbt_meshify/storage/file_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

import shutil
from pathlib import Path
from typing import Any, Dict, List, Protocol
from typing import Any, Callable, Dict, List, Optional, Protocol

from dbt.contracts.util import Identifier
from ruamel.yaml import YAML
Expand All @@ -18,6 +18,7 @@ def __init__(self):
self.preserve_quotes = True
self.width = 4096
self.indent(mapping=2, sequence=4, offset=2)
self.default_flow_style = False

def dump(self, data, stream=None, **kw):
inefficient = False
Expand All @@ -43,6 +44,21 @@ def write_file(self, path: Path, content: Any) -> None:
pass


class DirectoryManager:
"""DirectoryManager is a FileManager for operating on directories in the filesystem"""

@staticmethod
def copy_directory(
source_path: Path, target_path: Path, ignore_function: Optional[Callable] = None
) -> None:
"""Copy a directory from source to target"""

if not target_path.parent.exists():
target_path.parent.mkdir(parents=True, exist_ok=True)

shutil.copytree(source_path, target_path, symlinks=True, ignore=ignore_function)


class RawFileManager:
"""RawFileManager is a FileManager for operating on raw files in the filesystem."""

Expand Down
Loading

0 comments on commit 31e1430

Please sign in to comment.