diff --git a/.gitignore b/.gitignore index 9b71fe7..47f0945 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ .vscode __pycache__ *.rock +.venv/ +yaml_checker/yaml_checker.egg-info diff --git a/yaml_checker/README.md b/yaml_checker/README.md new file mode 100644 index 0000000..b84e1d4 --- /dev/null +++ b/yaml_checker/README.md @@ -0,0 +1,111 @@ +# YAML Checker + +An internal CLI util for formatting and validating YAML files. This project +relies on Pydantic and Ruamel libraries. + +**Installation** +```bash +pip install -e yaml_checker +``` + +**Usage** +``` +usage: yaml_checker [-h] [-v] [-w] [--config CONFIG] [files ...] + +positional arguments: + files Additional files to process (optional). + +options: + -h, --help show this help message and exit + -v, --verbose Enable verbose output. + -w, --write Write yaml output to disk. + --config CONFIG CheckYAML subclass to load +``` + +**Example** + +```bash +# Lets cat a demonstration file for comparison. +$ cat yaml_checker/demo/slice.yaml +# yaml_checker --config=Chisel demo/slice.yaml + +package: grep + +essential: + - grep_copyright + +# hello: world + +slices: + bins: + essential: + - libpcre2-8-0_libs # tests + + # another test + - libc6_libs + contents: + /usr/bin/grep: + + deprecated: + # These are shell scripts requiring a symlink from /usr/bin/dash to + # /usr/bin/sh. + # See: https://manpages.ubuntu.com/manpages/noble/en/man1/grep.1.html + essential: + - dash_bins + - grep_bins + contents: + # we ned this leading comment + /usr/bin/rgrep: # this should be last + + /usr/bin/fgrep: + + # careful with this path ... + /usr/bin/egrep: # it is my favorite + copyright: + contents: + /usr/share/doc/grep/copyright: +# Note: Missing new line at EOF + +# Now we can run the yaml_checker to format the same file. +# Note how comments are preserved during sorting of lists and +# dict type objects. If you want to test the validator, +# uncomment the hello field. +$ yaml_checker --config=Chisel yaml_checker/demo/slice.yaml +# yaml_checker --config=Chisel demo/slice.yaml + +package: grep + +essential: + - grep_copyright + +# hello: world + +slices: + bins: + essential: + - libc6_libs + - libpcre2-8-0_libs # tests + + # another test + contents: + /usr/bin/grep: + + deprecated: + # These are shell scripts requiring a symlink from /usr/bin/dash to + # /usr/bin/sh. + # See: https://manpages.ubuntu.com/manpages/noble/en/man1/grep.1.html + essential: + - dash_bins + - grep_bins + contents: + # we ned this leading comment + + # careful with this path ... + /usr/bin/egrep: # it is my favorite + /usr/bin/fgrep: + /usr/bin/rgrep: # this should be last + copyright: + contents: + /usr/share/doc/grep/copyright: + +``` diff --git a/yaml_checker/demo/slice.yaml b/yaml_checker/demo/slice.yaml new file mode 100644 index 0000000..db26dce --- /dev/null +++ b/yaml_checker/demo/slice.yaml @@ -0,0 +1,37 @@ +# yaml_checker --config=Chisel demo/slice.yaml + +package: grep + +essential: + - grep_copyright + +# hello: world + +slices: + bins: + essential: + - libpcre2-8-0_libs # tests + + # another test + - libc6_libs + contents: + /usr/bin/grep: + + deprecated: + # These are shell scripts requiring a symlink from /usr/bin/dash to + # /usr/bin/sh. + # See: https://manpages.ubuntu.com/manpages/noble/en/man1/grep.1.html + essential: + - dash_bins + - grep_bins + contents: + # we ned this leading comment + /usr/bin/rgrep: # this should be last + + /usr/bin/fgrep: + + # careful with this path ... + /usr/bin/egrep: # it is my favorite + copyright: + contents: + /usr/share/doc/grep/copyright: \ No newline at end of file diff --git a/yaml_checker/requirements.txt b/yaml_checker/requirements.txt new file mode 100644 index 0000000..ece35be --- /dev/null +++ b/yaml_checker/requirements.txt @@ -0,0 +1,2 @@ +pydantic==2.8.2 +ruamel.yaml==0.18.6 \ No newline at end of file diff --git a/yaml_checker/setup.py b/yaml_checker/setup.py new file mode 100644 index 0000000..a140857 --- /dev/null +++ b/yaml_checker/setup.py @@ -0,0 +1,23 @@ +from pathlib import Path + +from setuptools import find_packages, setup + + +def read_text(filename): + filepath = Path(__file__).parent / filename + return filepath.read_text() + + +setup( + name="yaml_checker", + version="0.1.0", + long_description=read_text("README.md"), + packages=find_packages(), + install_requires=read_text("requirements.txt"), + entry_points={ + "console_scripts": [ + "yaml_checker=yaml_checker.__main__:main", + "clayaml=yaml_checker.__main__:main", + ], + }, +) diff --git a/yaml_checker/yaml_checker/__init__.py b/yaml_checker/yaml_checker/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/yaml_checker/yaml_checker/__main__.py b/yaml_checker/yaml_checker/__main__.py new file mode 100644 index 0000000..53e1618 --- /dev/null +++ b/yaml_checker/yaml_checker/__main__.py @@ -0,0 +1,54 @@ +import argparse +import logging +from pathlib import Path + +from .config.base import YAMLCheckConfigBase + +# TODO: display all available configs in help +parser = argparse.ArgumentParser() + +parser.add_argument( + "-v", "--verbose", action="store_true", help="Enable verbose output." +) + +parser.add_argument( + "-w", "--write", action="store_true", help="Write yaml output to disk." +) + +parser.add_argument( + "--config", + type=str, + default="YAMLCheckConfigBase", + help="CheckYAML subclass to load", +) + +parser.add_argument( + "files", type=Path, nargs="*", help="Additional files to process (optional)." +) + + +def main(): + args = parser.parse_args() + + log_level = logging.DEBUG if args.verbose else logging.INFO + logging.basicConfig(level=log_level) + + check_yaml_config = YAMLCheckConfigBase.configs[args.config] + + yaml = check_yaml_config() + + for file in args.files: + data = yaml.load(file.read_text()) + data = yaml.apply_rules(data) + yaml.validate_model(data) + + output = yaml.dump(data) + + if args.write: + file.write_text(output) + else: + print(output) + + +if __name__ == "__main__": + main() diff --git a/yaml_checker/yaml_checker/config/__init__.py b/yaml_checker/yaml_checker/config/__init__.py new file mode 100644 index 0000000..3724559 --- /dev/null +++ b/yaml_checker/yaml_checker/config/__init__.py @@ -0,0 +1,14 @@ +from importlib import import_module +from pathlib import Path + +submodule_root = Path(__file__).parent +package_name = __name__ + +# import all submodules so our configs registry is populated +for submodule in submodule_root.glob("*.py"): + submodule_name = submodule.stem + + if submodule_name.startswith("_"): + continue + + import_module(f"{__name__}.{submodule_name}") diff --git a/yaml_checker/yaml_checker/config/base.py b/yaml_checker/yaml_checker/config/base.py new file mode 100644 index 0000000..b4dc75a --- /dev/null +++ b/yaml_checker/yaml_checker/config/base.py @@ -0,0 +1,99 @@ +import fnmatch +import logging +from io import StringIO +from pathlib import Path +from typing import Any + +from pydantic import BaseModel +from ruamel.yaml import YAML + + +class YAMLCheckConfigReg(type): + def __init__(cls, *args, **kwargs): + """Track all subclass configurations of YAMLCheckConfigBase for CLI""" + super().__init__(*args, **kwargs) + name = cls.__name__ + if name not in cls.configs: + cls.configs[name] = cls + + +class YAMLCheckConfigBase(metaclass=YAMLCheckConfigReg): + configs = {} # Store configs for access from CLI + rules = {} # map glob strings to class method names + + class Model(BaseModel): + """Pydantic BaseModel to provide validation""" + + class Config: + extra = "allow" + + class Config: + """ruamel.yaml configuration set before loading.""" + + preserve_quotes = True + width = 80 + map_indent = 2 + sequence_indent = 4 + sequence_dash_offset = 2 + + def __init__(self): + """YAMLCheck Base Config""" + self.yaml = YAML() + + # load Config into yaml + for attr in dir(self.Config): + if attr.startswith("__"): + continue + + attr_val = getattr(self.Config, attr) + + if hasattr(self.yaml, attr): + setattr(self.yaml, attr, attr_val) + else: + raise AttributeError(f"Invalid ruamel.yaml attribute: {attr}") + + def load(self, yaml_str: str): + """Load YAML data from string""" + data = self.yaml.load(yaml_str) + + return data + + def dump(self, data: Any): + """Dump data to YAML string""" + with StringIO() as sio: + self.yaml.dump(data, sio) + sio.seek(0) + + return sio.read() + + def validate_model(self, data: Any): + """Apply validate data against model""" + if issubclass(self.Model, BaseModel): + _ = self.Model(**data) + + def _apply_rules(self, path: Path, data: Any): + """Recursively apply rules starting from the outermost elements.""" + logging.debug(f"Walking path {path}.") + + # recurse over dicts and lists + if isinstance(data, dict): + for key, value in data.items(): + data[key] = self._apply_rules(path / str(key), value) + + elif isinstance(data, list): + for index, item in enumerate(data): + data[index] = self._apply_rules(path / str(item), item) + + # scan for applicable rules at each directory + # TODO: selection of rules here does not scale well and should be improved + for key, value in self.rules.items(): + if fnmatch.fnmatch(path, key): + logging.debug(f'Applying rule "{value}" at {path}') + rule = getattr(self, value) + data = rule(path, data) + + return data + + def apply_rules(self, data: Any): + """Walk all objects in data and apply rules where applicable.""" + return self._apply_rules(Path("/"), data) diff --git a/yaml_checker/yaml_checker/config/chisel.py b/yaml_checker/yaml_checker/config/chisel.py new file mode 100644 index 0000000..0397861 --- /dev/null +++ b/yaml_checker/yaml_checker/config/chisel.py @@ -0,0 +1,80 @@ +from typing import Any, Dict, List + +from pydantic import BaseModel, Field, RootModel +from ruamel.yaml.comments import CommentedMap +from ruamel.yaml.scalarstring import PlainScalarString + +from .base import YAMLCheckConfigBase + + +class Slices(RootModel): + # TODO: expand slices model to validate individual slices + root: Dict[str, Any] + + +class SDF(BaseModel): + package: str = Field() + essential: List[str] = Field() + slices: Slices = Field() + + model_config = {"extra": "forbid"} + + +class Chisel(YAMLCheckConfigBase): + rules = { + "/slices/*/essential": "sort_content", + "/slices/*/essential/*": "no_quotes", + "/slices/*/contents": "sort_content", + } + + def sort_content(self, path, data): + """Sort dict and list objects.""" + + def prep_comment_content(value): + # remove whitespace and leading pound sign + value = value.strip() + value = value.strip("#") + return value + + if isinstance(data, dict): + sorted_dict = CommentedMap() + for key in sorted(data.keys()): + sorted_dict[key] = data[key] + + if key in data.ca.items: + _, key_comments, eol_comment, _ = data.ca.items[key] + + # Migrate comments to new sorted dictionary. This works for most + # but not all cases + if key_comments is not None: + if not isinstance(key_comments, list): + key_comments = [key_comments] + + for key_comment in key_comments: + content = prep_comment_content(key_comment.value) + sorted_dict.yaml_set_comment_before_after_key( + key, before=content, indent=key_comment.column + ) + + if eol_comment is not None: + # These should be sorted ok, no need for warning + content = prep_comment_content(eol_comment.value) + sorted_dict.yaml_add_eol_comment(content, key) + + return sorted_dict + + elif isinstance(data, list): + data.sort() + return data + + return data + + def no_quotes(self, path, data): + """Remove quotes form strings""" + if isinstance(data, str): + return PlainScalarString(data) + + return data + + # validate documents with basic SDF model + Model = SDF diff --git a/yaml_checker/yaml_checker/config/oci_factory.py b/yaml_checker/yaml_checker/config/oci_factory.py new file mode 100644 index 0000000..f60270d --- /dev/null +++ b/yaml_checker/yaml_checker/config/oci_factory.py @@ -0,0 +1,23 @@ +import logging + +from ruamel.yaml.scalarstring import (DoubleQuotedScalarString, + SingleQuotedScalarString) + +from .base import YAMLCheckConfigBase + + +class OCIFactory(YAMLCheckConfigBase): + rules = {"/**": "convert_to_single_quotes"} + + def convert_to_single_quotes(self, path, data): + # filter out only strings of DoubleQuotedScalarString + if isinstance(data, DoubleQuotedScalarString): + # skip strings containing "'" character + if "'" in data: + logging.warning(f'Cannot convert {path}, contains "\'" character.') + return data + + return SingleQuotedScalarString(data) + + # fall back + return data