From 963f36ffc32a7e3598673e1dc96cd83349afa662 Mon Sep 17 00:00:00 2001 From: Sergey Motornyuk Date: Mon, 7 Oct 2024 15:56:44 +0300 Subject: [PATCH] feat: add weight to fields --- README.md | 2 +- ckanext/transmute/logic/action/get.py | 76 +++++++------------------ ckanext/transmute/schema.py | 81 ++++++++++----------------- ckanext/transmute/types.py | 6 +- ckanext/transmute/utils.py | 2 +- setup.cfg | 15 +++-- 6 files changed, 64 insertions(+), 118 deletions(-) diff --git a/README.md b/README.md index a19b04e..0989aed 100644 --- a/README.md +++ b/README.md @@ -250,7 +250,7 @@ schema = ... ``` ### Keywords -1. `map_to` (`str`) - changes the `field.name` in result dict. +1. `map` (`str`) - changes the `field.name` in result dict. 2. `validators` (`list[str]`) - a list of transmutators that will be applied to a `field.value`. A transmutator could be a `string` or a `list` where the first item must be transmutator name and others are arbitrary values. Example: ``` ... diff --git a/ckanext/transmute/logic/action/get.py b/ckanext/transmute/logic/action/get.py index 67f1d87..400e45d 100644 --- a/ckanext/transmute/logic/action/get.py +++ b/ckanext/transmute/logic/action/get.py @@ -60,38 +60,32 @@ def _transmute_data(data, definition, root): if not schema: return - mutate_old_fields(data, definition, root) - create_new_fields(data, definition, root) + mutate_fields(data, definition, root) -def mutate_old_fields(data, definition, root): - """Checks all of the data fields and mutate them - according to the provided schema +def _weighten_fields(pair: tuple[str, SchemaField]): + return pair[1].weight - New fields won't be created here, because we are - only traversing the data dictionary - We can't traverse only Data or only Schema, because - otherwise, the user will have to define all of the fields - that could exist in data +def mutate_fields(data: dict[str, Any], definition: SchemaParser, root: str): + """Checks all of the schema fields and mutate/create them according to the + provided schema. Args: data (dict: [str, Any]): a data to mutate definition (SchemaParser): SchemaParser object root (str): a root schema type + """ schema = definition.types[root] - for field_name, value in data.copy().items(): - field: SchemaField = schema["fields"].get(field_name) - - if not field: - continue - + for field_name, field in sorted(schema["fields"].items(), key=_weighten_fields): if field.remove: - data.pop(field_name) + data.pop(field_name, None) continue + value = data.get(field_name) + if field.default is not SENTINEL and not value: data[field.name] = value = field.default @@ -120,51 +114,19 @@ def mutate_old_fields(data, definition, root): data[field.name] = value = field.value if field.is_multiple(): - for nested_field in value: + for nested_field in value or []: _transmute_data(nested_field, definition, field.type) + else: + if field_name not in data and not field.validate_missing: + continue + data[field.name] = _apply_validators( Field(field.name, value, root, data_ctx.get()), field.validators ) - if field.map_to: - data[field.map_to] = data.pop(field.name) - - -def create_new_fields(data, definition, root): - """New fields are going to be created according - to the provided schema - - If the defined field is not exist in the data dict - we are going to create it - - The newly created field's value could be inherited from - an existing field. This field must be defined in the - schema. - """ - schema = definition.types[root] - - for field_name, field in schema["fields"].items(): - if field_name in data: - continue - - if field.value is not SENTINEL: - data[field_name] = field.value - elif field.default is not SENTINEL: - data[field_name] = field.default - - if field.default_from: - data[field_name] = _default_from(data, field) - - if field.replace_from: - data[field_name] = _replace_from(data, field) - - if field_name not in data: - continue - - data[field_name] = _apply_validators( - Field(field_name, data[field_name], root, data_ctx.get()), field.validators - ) + if field.map: + data[field.map] = data.pop(field.name, None) def _default_from(data, field: SchemaField): @@ -210,7 +172,7 @@ def _get_first_filled(data, external_fields: list[str]): return field_value -def _apply_validators(field: Field, validators: list[Callable[[Field], Any]]): +def _apply_validators(field: Field, validators: list[str | list[str]]): """Applies validators sequentially to the field value Args: diff --git a/ckanext/transmute/schema.py b/ckanext/transmute/schema.py index 37b6edd..1ed38fa 100644 --- a/ckanext/transmute/schema.py +++ b/ckanext/transmute/schema.py @@ -3,48 +3,35 @@ from typing import Any, Optional, Union import copy +import dataclasses from ckan.logic.schema import validator_args - +from ckan import types from ckanext.transmute.exception import SchemaParsingError, SchemaFieldError from ckanext.transmute.utils import SENTINEL +@dataclasses.dataclass class SchemaField: - def __init__( - self, - *, - name: str, - type_: str, - definition: dict, - map_to: Optional[str] = None, - validators: Optional[list] = None, - multiple: bool = False, - remove: bool = False, - default: Any = SENTINEL, - default_from: Optional[str] = None, - value: Any = SENTINEL, - replace_from: Optional[str] = None, - inherit_mode: Optional[str] = None, - update: bool = False, - ): - self.name = name - self.type = type_ - self.definition = definition - self.map_to = map_to - self.validators = validators or [] - self.multiple = multiple - self.remove = remove - self.default = default - self.default_from = default_from - self.replace_from = replace_from - self.inherit_mode = inherit_mode - self.value = value - self.update = update + name: str + type: str + definition: dict[str, Any] + map: Optional[str] = None + validators: list[Any] = dataclasses.field(default_factory=list) + multiple: bool = False + remove: bool = False + default: Any = SENTINEL + default_from: Optional[str] = None + value: Any = SENTINEL + replace_from: Optional[str] = None + inherit_mode: Optional[str] = "combine" + update: bool = False + validate_missing: bool = False + weight: int = 0 def __repr__(self): return ( - f"" ) @@ -86,14 +73,14 @@ def _get_sibling_field_name(self, field_name: str) -> Optional[Any]: class SchemaParser: - def __init__(self, schema): + def __init__(self, schema: dict[str, Any]): self.schema = copy.deepcopy(schema) self.root_type = self.get_root_type() self.types = self.parse_types() self.parse_fields() def get_root_type(self): - root_type: str = self.schema.get("root") + root_type: str = self.schema.get("root", "") if not root_type: raise SchemaParsingError("Schema: root type is missing") @@ -117,7 +104,7 @@ def parse_fields(self): ) def _parse_field( - self, field_name: str, field_meta: dict, _type: str + self, field_name: str, field_meta: dict[str, Any], _type: str ) -> SchemaField: """Create a SchemaField combining all the information about field @@ -131,25 +118,15 @@ def _parse_field( SchemaField: SchemaField object """ - return SchemaField( - name=field_name, - type_=field_meta.get("type", _type), - definition=self.types[_type], - map_to=field_meta.get("map", None), - validators=field_meta.get("validators"), - multiple=field_meta.get("multiple", False), - remove=field_meta.get("remove", False), - default=field_meta.get("default", SENTINEL), - default_from=field_meta.get("default_from", None), - value=field_meta.get("value", SENTINEL), - replace_from=field_meta.get("replace_from", None), - inherit_mode=field_meta.get("inherit_mode", "combine"), - update=field_meta.get("update", False), - ) + params: dict[str, Any] = dict({"type": _type}, **field_meta) + return SchemaField(name=field_name, definition=self.types[_type], **params) @validator_args -def transmute_schema(not_missing, default): +def transmute_schema( + not_missing: types.Validator, + default: types.ValidatorFactory, +) -> types.Schema: return { "data": [not_missing], "schema": [not_missing], @@ -158,7 +135,7 @@ def transmute_schema(not_missing, default): @validator_args -def validate_schema(not_missing): +def validate_schema(not_missing: types.Validator) -> types.Schema: return { "data": [not_missing], } diff --git a/ckanext/transmute/types.py b/ckanext/transmute/types.py index 1ffac52..f2c4af5 100644 --- a/ckanext/transmute/types.py +++ b/ckanext/transmute/types.py @@ -1,10 +1,9 @@ from __future__ import annotations +import dataclasses from typing import Any from typing_extensions import TypedDict -from recordclass import RecordClass - class TransmuteData(TypedDict): data: dict[str, Any] @@ -12,7 +11,8 @@ class TransmuteData(TypedDict): root: str -class Field(RecordClass): +@dataclasses.dataclass +class Field: field_name: str value: Any type: str diff --git a/ckanext/transmute/utils.py b/ckanext/transmute/utils.py index 8ab256f..809417e 100644 --- a/ckanext/transmute/utils.py +++ b/ckanext/transmute/utils.py @@ -9,7 +9,7 @@ from ckanext.transmute.interfaces import ITransmute from ckanext.transmute.types import MODE_COMBINE, MODE_FIRST_FILLED -SENTINEL = {} +SENTINEL = object() _transmutator_cache = {} _schema_cache = {} diff --git a/setup.cfg b/setup.cfg index f60a0ee..923d924 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = ckanext-transmute -version = 1.6.0 +version = 2.0.0a0 description = Converts a dataset based on a specific schema long_description = file: README.md long_description_content_type = text/markdown @@ -11,24 +11,31 @@ license = AGPL classifiers = Development Status :: 4 - Beta License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+) - Programming Language :: Python :: 2.7 + Programming Language :: Python :: 3.8 + Programming Language :: Python :: 3.9 + Programming Language :: Python :: 3.10 + Programming Language :: Python :: 3.11 + Programming Language :: Python :: 3.12 + Programming Language :: Python :: 3.13 + Programming Language :: Python :: 3.14 keywords = CKAN scheming schema [options] +python_requires = >= 3.8 packages = find: namespace_packages = ckanext install_requires = ckanext-scheming typing-extensions - recordclass -include_package_data = True, +include_package_data = True [options.entry_points] ckan.plugins = transmute = ckanext.transmute.plugin:TransmutePlugin + babel.extractors = ckan = ckan.lib.extract:extract_ckan