diff --git a/src/linkml_map/transformer/object_transformer.py b/src/linkml_map/transformer/object_transformer.py index 6c4d32c..6126b7f 100644 --- a/src/linkml_map/transformer/object_transformer.py +++ b/src/linkml_map/transformer/object_transformer.py @@ -1,7 +1,8 @@ import json import logging +from collections.abc import Mapping from dataclasses import dataclass -from typing import Any, Dict, List, Optional, Type, Union +from typing import Any, Dict, Iterator, List, Optional, Tuple, Type, Union import yaml from asteval import Interpreter @@ -17,8 +18,8 @@ ) from linkml_map.functions.unit_conversion import UnitSystem, convert_units from linkml_map.transformer.transformer import OBJECT_TYPE, Transformer -from linkml_map.utils.dynamic_object import dynamic_object -from linkml_map.utils.eval_utils import eval_expr +from linkml_map.utils.dynamic_object import DynObj, dynamic_object +from linkml_map.utils.eval_utils import eval_expr, eval_expr_with_mapping DICT_OBJ = Dict[str, Any] @@ -26,6 +27,88 @@ logger = logging.getLogger(__name__) +class Bindings(Mapping): + """ + Efficiently access source object attributes. + """ + + def __init__( + self, + object_transformer: "ObjectTransformer", + source_obj: OBJECT_TYPE, + source_obj_typed: OBJECT_TYPE, + source_type: str, + sv: SchemaView, + bindings: Dict, + ): + self.object_transformer: "ObjectTransformer" = object_transformer + self.source_obj: OBJECT_TYPE = source_obj + self.source_obj_typed: OBJECT_TYPE = source_obj_typed + self.source_type: str = source_type + self.sv: SchemaView = sv + self.bindings: Dict = {} + if bindings: + self.bindings.update(bindings) + + def get_ctxt_obj_and_dict(self, source_obj: OBJECT_TYPE = None) -> Tuple[DynObj, OBJECT_TYPE]: + """ + Transform a source object into a typed context object and dictionary, and cache results. + + :param source_obj: Source data. Should be a subset of source_obj provided in the constructor. + If None the full source_obj from constructor is used. + :return: Tuple of typed context object and context dictionary. The object is the dictionary + with keys converted to member variables. + """ + if source_obj is None: + source_obj = self.source_obj + + if self.object_transformer.object_index: + if not self.source_obj_typed: + source_obj_dyn = dynamic_object(source_obj, self.sv, self.source_type) + else: + source_obj_dyn = self.source_obj_typed + # Clear cache: Cache doesn't work since the cache key is the same when source_obj has only a subset + # of its keys. eg. {"age": self.source_obj["age"]} and {"name": self.source_obj["name"]} + # (with optionally the identifier key included) will have the same cache key, and so `bless` will + # incorrectly return the same cached values. + self.object_transformer.object_index.clear_proxy_object_cache() + + ctxt_obj = self.object_transformer.object_index.bless(source_obj_dyn) + ctxt_dict = { + k: getattr(ctxt_obj, k) for k in ctxt_obj._attributes() if not k.startswith("_") + } + else: + do = dynamic_object(source_obj, self.sv, self.source_type) + ctxt_obj = do + ctxt_dict = vars(do) + + self.bindings.update(ctxt_dict) + + return ctxt_obj, ctxt_dict + + def _all_keys(self) -> List[Any]: + keys = list(self.source_obj.keys()) + list(self.bindings.keys()) + # Remove duplicate keys (ie. found in both source_obj and bindings), and retain original order + keys = list(dict.fromkeys(keys).keys()) + return keys + + def __len__(self) -> int: + return len(self._all_keys()) + + def __iter__(self) -> Iterator: + return iter(self._all_keys()) + + def __getitem__(self, name: Any) -> Any: + if name not in self.bindings: + _ = self.get_ctxt_obj_and_dict({name: self.source_obj[name]}) + + return self.bindings.get(name) + + def __setitem__(self, name: Any, value: Any): + del name, value + raise RuntimeError(f"__setitem__ not allowed on class {self.__class__.__name__}") + + @dataclass class ObjectTransformer(Transformer): """ @@ -112,7 +195,7 @@ def map_object( return source_obj class_deriv = self._get_class_derivation(source_type) tgt_attrs = {} - ctxt_obj = ctxt_dict = None + bindings = None # map each slot assignment in source_obj, if there is a slot_derivation for slot_derivation in class_deriv.slot_derivations.values(): v = None @@ -120,28 +203,22 @@ def map_object( if slot_derivation.unit_conversion: v = self._perform_unit_conversion(slot_derivation, source_obj, sv, source_type) elif slot_derivation.expr: - if ctxt_obj is None: - if self.object_index: - if not source_obj_typed: - source_obj_dyn = dynamic_object(source_obj, sv, source_type) - else: - source_obj_dyn = source_obj_typed - ctxt_obj = self.object_index.bless(source_obj_dyn) - ctxt_dict = { - k: getattr(ctxt_obj, k) - for k in ctxt_obj._attributes() - if not k.startswith("_") - } - else: - do = dynamic_object(source_obj, sv, source_type) - ctxt_obj = do - ctxt_dict = vars(do) + if bindings is None: + bindings = Bindings( + self, + source_obj=source_obj, + source_obj_typed=source_obj_typed, + source_type=source_type, + sv=sv, + bindings={"NULL": None}, + ) try: - v = eval_expr(slot_derivation.expr, **ctxt_dict, NULL=None) + v = eval_expr_with_mapping(slot_derivation.expr, bindings) except Exception: if not self.unrestricted_eval: raise RuntimeError(f"Expression not in safe subset: {slot_derivation.expr}") + ctxt_obj, _ = bindings.get_ctxt_obj_and_dict() aeval = Interpreter(usersyms={"src": ctxt_obj, "target": None}) aeval(slot_derivation.expr) v = aeval.symtable["target"] diff --git a/src/linkml_map/utils/eval_utils.py b/src/linkml_map/utils/eval_utils.py index 9d4ffc2..41a4f46 100644 --- a/src/linkml_map/utils/eval_utils.py +++ b/src/linkml_map/utils/eval_utils.py @@ -8,6 +8,7 @@ import ast import operator as op +from collections.abc import Mapping # supported operators from typing import Any, List, Tuple @@ -52,6 +53,21 @@ class UnsetValueError(Exception): pass +def eval_expr_with_mapping(expr: str, mapping: Mapping) -> Any: + """ + Evaluates a given expression, with restricted syntax. + This function is equivalent to eval_expr where **kwargs has been switched to a Mapping (eg. a dictionary). + See eval_expr for details. + """ + if expr == "None": + # TODO: do this as part of parsing + return None + try: + return eval_(ast.parse(expr, mode="eval").body, mapping) + except UnsetValueError: + return None + + def eval_expr(expr: str, **kwargs) -> Any: """ Evaluates a given expression, with restricted syntax @@ -94,15 +110,7 @@ def eval_expr(expr: str, **kwargs) -> Any: :param expr: expression to evaluate """ - # if kwargs: - # expr = expr.format(**kwargs) - if expr == "None": - # TODO: do this as part of parsing - return None - try: - return eval_(ast.parse(expr, mode="eval").body, kwargs) - except UnsetValueError: - return None + return eval_expr_with_mapping(expr, kwargs) def eval_(node, bindings=None):