From 0115ca4804d4c84affdaef713c6d194185c49ec3 Mon Sep 17 00:00:00 2001 From: Martin Wellman Date: Fri, 3 May 2024 10:25:08 -0700 Subject: [PATCH 1/3] Bindings class for optimizing expr code --- .../transformer/object_transformer.py | 117 +++++++++++++++--- src/linkml_map/utils/eval_utils.py | 26 ++-- 2 files changed, 115 insertions(+), 28 deletions(-) diff --git a/src/linkml_map/transformer/object_transformer.py b/src/linkml_map/transformer/object_transformer.py index 0d80e9b..6126b7f 100644 --- a/src/linkml_map/transformer/object_transformer.py +++ b/src/linkml_map/transformer/object_transformer.py @@ -1,7 +1,8 @@ import json import logging +from collections.abc import Mapping from dataclasses import dataclass -from typing import Any, Dict, List, Optional, Type, Union +from typing import Any, Dict, Iterator, List, Optional, Tuple, Type, Union import yaml from asteval import Interpreter @@ -17,8 +18,8 @@ ) from linkml_map.functions.unit_conversion import UnitSystem, convert_units from linkml_map.transformer.transformer import OBJECT_TYPE, Transformer -from linkml_map.utils.dynamic_object import dynamic_object -from linkml_map.utils.eval_utils import eval_expr +from linkml_map.utils.dynamic_object import DynObj, dynamic_object +from linkml_map.utils.eval_utils import eval_expr, eval_expr_with_mapping DICT_OBJ = Dict[str, Any] @@ -26,6 +27,88 @@ logger = logging.getLogger(__name__) +class Bindings(Mapping): + """ + Efficiently access source object attributes. + """ + + def __init__( + self, + object_transformer: "ObjectTransformer", + source_obj: OBJECT_TYPE, + source_obj_typed: OBJECT_TYPE, + source_type: str, + sv: SchemaView, + bindings: Dict, + ): + self.object_transformer: "ObjectTransformer" = object_transformer + self.source_obj: OBJECT_TYPE = source_obj + self.source_obj_typed: OBJECT_TYPE = source_obj_typed + self.source_type: str = source_type + self.sv: SchemaView = sv + self.bindings: Dict = {} + if bindings: + self.bindings.update(bindings) + + def get_ctxt_obj_and_dict(self, source_obj: OBJECT_TYPE = None) -> Tuple[DynObj, OBJECT_TYPE]: + """ + Transform a source object into a typed context object and dictionary, and cache results. + + :param source_obj: Source data. Should be a subset of source_obj provided in the constructor. + If None the full source_obj from constructor is used. + :return: Tuple of typed context object and context dictionary. The object is the dictionary + with keys converted to member variables. + """ + if source_obj is None: + source_obj = self.source_obj + + if self.object_transformer.object_index: + if not self.source_obj_typed: + source_obj_dyn = dynamic_object(source_obj, self.sv, self.source_type) + else: + source_obj_dyn = self.source_obj_typed + # Clear cache: Cache doesn't work since the cache key is the same when source_obj has only a subset + # of its keys. eg. {"age": self.source_obj["age"]} and {"name": self.source_obj["name"]} + # (with optionally the identifier key included) will have the same cache key, and so `bless` will + # incorrectly return the same cached values. + self.object_transformer.object_index.clear_proxy_object_cache() + + ctxt_obj = self.object_transformer.object_index.bless(source_obj_dyn) + ctxt_dict = { + k: getattr(ctxt_obj, k) for k in ctxt_obj._attributes() if not k.startswith("_") + } + else: + do = dynamic_object(source_obj, self.sv, self.source_type) + ctxt_obj = do + ctxt_dict = vars(do) + + self.bindings.update(ctxt_dict) + + return ctxt_obj, ctxt_dict + + def _all_keys(self) -> List[Any]: + keys = list(self.source_obj.keys()) + list(self.bindings.keys()) + # Remove duplicate keys (ie. found in both source_obj and bindings), and retain original order + keys = list(dict.fromkeys(keys).keys()) + return keys + + def __len__(self) -> int: + return len(self._all_keys()) + + def __iter__(self) -> Iterator: + return iter(self._all_keys()) + + def __getitem__(self, name: Any) -> Any: + if name not in self.bindings: + _ = self.get_ctxt_obj_and_dict({name: self.source_obj[name]}) + + return self.bindings.get(name) + + def __setitem__(self, name: Any, value: Any): + del name, value + raise RuntimeError(f"__setitem__ not allowed on class {self.__class__.__name__}") + + @dataclass class ObjectTransformer(Transformer): """ @@ -112,6 +195,7 @@ def map_object( return source_obj class_deriv = self._get_class_derivation(source_type) tgt_attrs = {} + bindings = None # map each slot assignment in source_obj, if there is a slot_derivation for slot_derivation in class_deriv.slot_derivations.values(): v = None @@ -119,27 +203,22 @@ def map_object( if slot_derivation.unit_conversion: v = self._perform_unit_conversion(slot_derivation, source_obj, sv, source_type) elif slot_derivation.expr: - if self.object_index: - if not source_obj_typed: - source_obj_dyn = dynamic_object(source_obj, sv, source_type) - else: - source_obj_dyn = source_obj_typed - ctxt_obj = self.object_index.bless(source_obj_dyn) - ctxt_dict = { - k: getattr(ctxt_obj, k) - for k in ctxt_obj._attributes() - if not k.startswith("_") - } - else: - do = dynamic_object(source_obj, sv, source_type) - ctxt_obj = do - ctxt_dict = vars(do) + if bindings is None: + bindings = Bindings( + self, + source_obj=source_obj, + source_obj_typed=source_obj_typed, + source_type=source_type, + sv=sv, + bindings={"NULL": None}, + ) try: - v = eval_expr(slot_derivation.expr, **ctxt_dict, NULL=None) + v = eval_expr_with_mapping(slot_derivation.expr, bindings) except Exception: if not self.unrestricted_eval: raise RuntimeError(f"Expression not in safe subset: {slot_derivation.expr}") + ctxt_obj, _ = bindings.get_ctxt_obj_and_dict() aeval = Interpreter(usersyms={"src": ctxt_obj, "target": None}) aeval(slot_derivation.expr) v = aeval.symtable["target"] diff --git a/src/linkml_map/utils/eval_utils.py b/src/linkml_map/utils/eval_utils.py index 9d4ffc2..41a4f46 100644 --- a/src/linkml_map/utils/eval_utils.py +++ b/src/linkml_map/utils/eval_utils.py @@ -8,6 +8,7 @@ import ast import operator as op +from collections.abc import Mapping # supported operators from typing import Any, List, Tuple @@ -52,6 +53,21 @@ class UnsetValueError(Exception): pass +def eval_expr_with_mapping(expr: str, mapping: Mapping) -> Any: + """ + Evaluates a given expression, with restricted syntax. + This function is equivalent to eval_expr where **kwargs has been switched to a Mapping (eg. a dictionary). + See eval_expr for details. + """ + if expr == "None": + # TODO: do this as part of parsing + return None + try: + return eval_(ast.parse(expr, mode="eval").body, mapping) + except UnsetValueError: + return None + + def eval_expr(expr: str, **kwargs) -> Any: """ Evaluates a given expression, with restricted syntax @@ -94,15 +110,7 @@ def eval_expr(expr: str, **kwargs) -> Any: :param expr: expression to evaluate """ - # if kwargs: - # expr = expr.format(**kwargs) - if expr == "None": - # TODO: do this as part of parsing - return None - try: - return eval_(ast.parse(expr, mode="eval").body, kwargs) - except UnsetValueError: - return None + return eval_expr_with_mapping(expr, kwargs) def eval_(node, bindings=None): From 5acf6279796188e7afb7597fe6627ba66c25f9c8 Mon Sep 17 00:00:00 2001 From: Martin Wellman Date: Tue, 7 May 2024 14:44:07 -0700 Subject: [PATCH 2/3] Added class Bindings for optimized expr handling --- .../transformer/object_transformer.py | 122 ++++++++++++++---- 1 file changed, 100 insertions(+), 22 deletions(-) diff --git a/src/linkml_map/transformer/object_transformer.py b/src/linkml_map/transformer/object_transformer.py index 6c4d32c..f859918 100644 --- a/src/linkml_map/transformer/object_transformer.py +++ b/src/linkml_map/transformer/object_transformer.py @@ -1,7 +1,8 @@ import json import logging from dataclasses import dataclass -from typing import Any, Dict, List, Optional, Type, Union +from typing import Any, Dict, List, Iterator, Optional, Tuple, Type, Union +from collections.abc import Mapping import yaml from asteval import Interpreter @@ -9,6 +10,7 @@ from linkml_runtime.index.object_index import ObjectIndex from linkml_runtime.utils.yamlutils import YAMLRoot from pydantic import BaseModel +from collections.abc import Mapping from linkml_map.datamodel.transformer_model import ( CollectionType, @@ -17,8 +19,8 @@ ) from linkml_map.functions.unit_conversion import UnitSystem, convert_units from linkml_map.transformer.transformer import OBJECT_TYPE, Transformer -from linkml_map.utils.dynamic_object import dynamic_object -from linkml_map.utils.eval_utils import eval_expr +from linkml_map.utils.dynamic_object import DynObj, dynamic_object +from linkml_map.utils.eval_utils import eval_expr, eval_expr_with_mapping DICT_OBJ = Dict[str, Any] @@ -26,6 +28,88 @@ logger = logging.getLogger(__name__) +class Bindings(Mapping): + """ + Efficiently access source object attributes. + """ + + def __init__( + self, + object_transformer: "ObjectTransformer", + source_obj: OBJECT_TYPE, + source_obj_typed: OBJECT_TYPE, + source_type: str, + sv: SchemaView, + bindings: Dict, + ): + self.object_transformer: "ObjectTransformer" = object_transformer + self.source_obj: OBJECT_TYPE = source_obj + self.source_obj_typed: OBJECT_TYPE = source_obj_typed + self.source_type: str = source_type + self.sv: SchemaView = sv + self.bindings: Dict = {} + if bindings: + self.bindings.update(bindings) + + def get_ctxt_obj_and_dict(self, source_obj: OBJECT_TYPE = None) -> Tuple[DynObj, OBJECT_TYPE]: + """ + Transform a source object into a typed context object and dictionary, and cache results. + + :param source_obj: Source data. Should be a subset of source_obj provided in the constructor. + If None the full source_obj from constructor is used. + :return: Tuple of typed context object and context dictionary. The object is the dictionary + with keys converted to member variables. + """ + if source_obj is None: + source_obj = self.source_obj + + if self.object_transformer.object_index: + if not self.source_obj_typed: + source_obj_dyn = dynamic_object(source_obj, self.sv, self.source_type) + else: + source_obj_dyn = self.source_obj_typed + # Clear cache: Cache doesn't work since the cache key is the same when source_obj has only a subset + # of its keys. eg. {"age": self.source_obj["age"]} and {"name": self.source_obj["name"]} + # (with optionally the identifier key included) will have the same cache key, and so `bless` will + # incorrectly return the same cached values. + self.object_transformer.object_index.clear_proxy_object_cache() + + ctxt_obj = self.object_transformer.object_index.bless(source_obj_dyn) + ctxt_dict = { + k: getattr(ctxt_obj, k) for k in ctxt_obj._attributes() if not k.startswith("_") + } + else: + do = dynamic_object(source_obj, self.sv, self.source_type) + ctxt_obj = do + ctxt_dict = vars(do) + + self.bindings.update(ctxt_dict) + + return ctxt_obj, ctxt_dict + + def _all_keys(self) -> List[Any]: + keys = list(self.source_obj.keys()) + list(self.bindings.keys()) + # Remove duplicate keys (ie. found in both source_obj and bindings), and retain original order + keys = list(dict.fromkeys(keys).keys()) + return keys + + def __len__(self) -> int: + return len(self._all_keys()) + + def __iter__(self) -> Iterator: + return iter(self._all_keys()) + + def __getitem__(self, name: Any) -> Any: + if name not in self.bindings: + _ = self.get_ctxt_obj_and_dict({name: self.source_obj[name]}) + + return self.bindings.get(name) + + def __setitem__(self, name: Any, value: Any): + del name, value + raise RuntimeError(f"__setitem__ not allowed on class {self.__class__.__name__}") + + @dataclass class ObjectTransformer(Transformer): """ @@ -112,7 +196,7 @@ def map_object( return source_obj class_deriv = self._get_class_derivation(source_type) tgt_attrs = {} - ctxt_obj = ctxt_dict = None + bindings = None # map each slot assignment in source_obj, if there is a slot_derivation for slot_derivation in class_deriv.slot_derivations.values(): v = None @@ -120,28 +204,22 @@ def map_object( if slot_derivation.unit_conversion: v = self._perform_unit_conversion(slot_derivation, source_obj, sv, source_type) elif slot_derivation.expr: - if ctxt_obj is None: - if self.object_index: - if not source_obj_typed: - source_obj_dyn = dynamic_object(source_obj, sv, source_type) - else: - source_obj_dyn = source_obj_typed - ctxt_obj = self.object_index.bless(source_obj_dyn) - ctxt_dict = { - k: getattr(ctxt_obj, k) - for k in ctxt_obj._attributes() - if not k.startswith("_") - } - else: - do = dynamic_object(source_obj, sv, source_type) - ctxt_obj = do - ctxt_dict = vars(do) - + if bindings is None: + bindings = Bindings( + self, + source_obj=source_obj, + source_obj_typed=source_obj_typed, + source_type=source_type, + sv=sv, + bindings={"NULL": None}, + ) + try: - v = eval_expr(slot_derivation.expr, **ctxt_dict, NULL=None) + v = eval_expr_with_mapping(slot_derivation.expr, bindings) except Exception: if not self.unrestricted_eval: raise RuntimeError(f"Expression not in safe subset: {slot_derivation.expr}") + ctxt_obj, _ = bindings.get_ctxt_obj_and_dict() aeval = Interpreter(usersyms={"src": ctxt_obj, "target": None}) aeval(slot_derivation.expr) v = aeval.symtable["target"] From 303db85fd4037d9a022e685dee47b00465ed700f Mon Sep 17 00:00:00 2001 From: Martin Wellman Date: Tue, 7 May 2024 15:13:24 -0700 Subject: [PATCH 3/3] Ran linter --- .../transformer/object_transformer.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/src/linkml_map/transformer/object_transformer.py b/src/linkml_map/transformer/object_transformer.py index f859918..6126b7f 100644 --- a/src/linkml_map/transformer/object_transformer.py +++ b/src/linkml_map/transformer/object_transformer.py @@ -1,8 +1,8 @@ import json import logging -from dataclasses import dataclass -from typing import Any, Dict, List, Iterator, Optional, Tuple, Type, Union from collections.abc import Mapping +from dataclasses import dataclass +from typing import Any, Dict, Iterator, List, Optional, Tuple, Type, Union import yaml from asteval import Interpreter @@ -10,7 +10,6 @@ from linkml_runtime.index.object_index import ObjectIndex from linkml_runtime.utils.yamlutils import YAMLRoot from pydantic import BaseModel -from collections.abc import Mapping from linkml_map.datamodel.transformer_model import ( CollectionType, @@ -62,7 +61,7 @@ def get_ctxt_obj_and_dict(self, source_obj: OBJECT_TYPE = None) -> Tuple[DynObj, """ if source_obj is None: source_obj = self.source_obj - + if self.object_transformer.object_index: if not self.source_obj_typed: source_obj_dyn = dynamic_object(source_obj, self.sv, self.source_type) @@ -73,7 +72,7 @@ def get_ctxt_obj_and_dict(self, source_obj: OBJECT_TYPE = None) -> Tuple[DynObj, # (with optionally the identifier key included) will have the same cache key, and so `bless` will # incorrectly return the same cached values. self.object_transformer.object_index.clear_proxy_object_cache() - + ctxt_obj = self.object_transformer.object_index.bless(source_obj_dyn) ctxt_dict = { k: getattr(ctxt_obj, k) for k in ctxt_obj._attributes() if not k.startswith("_") @@ -86,7 +85,7 @@ def get_ctxt_obj_and_dict(self, source_obj: OBJECT_TYPE = None) -> Tuple[DynObj, self.bindings.update(ctxt_dict) return ctxt_obj, ctxt_dict - + def _all_keys(self) -> List[Any]: keys = list(self.source_obj.keys()) + list(self.bindings.keys()) # Remove duplicate keys (ie. found in both source_obj and bindings), and retain original order @@ -95,14 +94,14 @@ def _all_keys(self) -> List[Any]: def __len__(self) -> int: return len(self._all_keys()) - + def __iter__(self) -> Iterator: return iter(self._all_keys()) - + def __getitem__(self, name: Any) -> Any: if name not in self.bindings: _ = self.get_ctxt_obj_and_dict({name: self.source_obj[name]}) - + return self.bindings.get(name) def __setitem__(self, name: Any, value: Any): @@ -213,7 +212,7 @@ def map_object( sv=sv, bindings={"NULL": None}, ) - + try: v = eval_expr_with_mapping(slot_derivation.expr, bindings) except Exception: