Skip to content

Commit

Permalink
Merge pull request #28 from linkml/expr-opt-a
Browse files Browse the repository at this point in the history
Performance improvement for expr slots using Mapping-derived class for bindings
  • Loading branch information
martinwellman authored May 8, 2024
2 parents bd6cea2 + 303db85 commit 2fe7d8e
Show file tree
Hide file tree
Showing 2 changed files with 115 additions and 30 deletions.
119 changes: 98 additions & 21 deletions src/linkml_map/transformer/object_transformer.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import json
import logging
from collections.abc import Mapping
from dataclasses import dataclass
from typing import Any, Dict, List, Optional, Type, Union
from typing import Any, Dict, Iterator, List, Optional, Tuple, Type, Union

import yaml
from asteval import Interpreter
Expand All @@ -17,15 +18,97 @@
)
from linkml_map.functions.unit_conversion import UnitSystem, convert_units
from linkml_map.transformer.transformer import OBJECT_TYPE, Transformer
from linkml_map.utils.dynamic_object import dynamic_object
from linkml_map.utils.eval_utils import eval_expr
from linkml_map.utils.dynamic_object import DynObj, dynamic_object
from linkml_map.utils.eval_utils import eval_expr, eval_expr_with_mapping

DICT_OBJ = Dict[str, Any]


logger = logging.getLogger(__name__)


class Bindings(Mapping):
"""
Efficiently access source object attributes.
"""

def __init__(
self,
object_transformer: "ObjectTransformer",
source_obj: OBJECT_TYPE,
source_obj_typed: OBJECT_TYPE,
source_type: str,
sv: SchemaView,
bindings: Dict,
):
self.object_transformer: "ObjectTransformer" = object_transformer
self.source_obj: OBJECT_TYPE = source_obj
self.source_obj_typed: OBJECT_TYPE = source_obj_typed
self.source_type: str = source_type
self.sv: SchemaView = sv
self.bindings: Dict = {}
if bindings:
self.bindings.update(bindings)

def get_ctxt_obj_and_dict(self, source_obj: OBJECT_TYPE = None) -> Tuple[DynObj, OBJECT_TYPE]:
"""
Transform a source object into a typed context object and dictionary, and cache results.
:param source_obj: Source data. Should be a subset of source_obj provided in the constructor.
If None the full source_obj from constructor is used.
:return: Tuple of typed context object and context dictionary. The object is the dictionary
with keys converted to member variables.
"""
if source_obj is None:
source_obj = self.source_obj

if self.object_transformer.object_index:
if not self.source_obj_typed:
source_obj_dyn = dynamic_object(source_obj, self.sv, self.source_type)
else:
source_obj_dyn = self.source_obj_typed
# Clear cache: Cache doesn't work since the cache key is the same when source_obj has only a subset
# of its keys. eg. {"age": self.source_obj["age"]} and {"name": self.source_obj["name"]}
# (with optionally the identifier key included) will have the same cache key, and so `bless` will
# incorrectly return the same cached values.
self.object_transformer.object_index.clear_proxy_object_cache()

ctxt_obj = self.object_transformer.object_index.bless(source_obj_dyn)
ctxt_dict = {
k: getattr(ctxt_obj, k) for k in ctxt_obj._attributes() if not k.startswith("_")
}
else:
do = dynamic_object(source_obj, self.sv, self.source_type)
ctxt_obj = do
ctxt_dict = vars(do)

self.bindings.update(ctxt_dict)

return ctxt_obj, ctxt_dict

def _all_keys(self) -> List[Any]:
keys = list(self.source_obj.keys()) + list(self.bindings.keys())
# Remove duplicate keys (ie. found in both source_obj and bindings), and retain original order
keys = list(dict.fromkeys(keys).keys())
return keys

def __len__(self) -> int:
return len(self._all_keys())

def __iter__(self) -> Iterator:
return iter(self._all_keys())

def __getitem__(self, name: Any) -> Any:
if name not in self.bindings:
_ = self.get_ctxt_obj_and_dict({name: self.source_obj[name]})

return self.bindings.get(name)

def __setitem__(self, name: Any, value: Any):
del name, value
raise RuntimeError(f"__setitem__ not allowed on class {self.__class__.__name__}")


@dataclass
class ObjectTransformer(Transformer):
"""
Expand Down Expand Up @@ -112,36 +195,30 @@ def map_object(
return source_obj
class_deriv = self._get_class_derivation(source_type)
tgt_attrs = {}
ctxt_obj = ctxt_dict = None
bindings = None
# map each slot assignment in source_obj, if there is a slot_derivation
for slot_derivation in class_deriv.slot_derivations.values():
v = None
source_class_slot = None
if slot_derivation.unit_conversion:
v = self._perform_unit_conversion(slot_derivation, source_obj, sv, source_type)
elif slot_derivation.expr:
if ctxt_obj is None:
if self.object_index:
if not source_obj_typed:
source_obj_dyn = dynamic_object(source_obj, sv, source_type)
else:
source_obj_dyn = source_obj_typed
ctxt_obj = self.object_index.bless(source_obj_dyn)
ctxt_dict = {
k: getattr(ctxt_obj, k)
for k in ctxt_obj._attributes()
if not k.startswith("_")
}
else:
do = dynamic_object(source_obj, sv, source_type)
ctxt_obj = do
ctxt_dict = vars(do)
if bindings is None:
bindings = Bindings(
self,
source_obj=source_obj,
source_obj_typed=source_obj_typed,
source_type=source_type,
sv=sv,
bindings={"NULL": None},
)

try:
v = eval_expr(slot_derivation.expr, **ctxt_dict, NULL=None)
v = eval_expr_with_mapping(slot_derivation.expr, bindings)
except Exception:
if not self.unrestricted_eval:
raise RuntimeError(f"Expression not in safe subset: {slot_derivation.expr}")
ctxt_obj, _ = bindings.get_ctxt_obj_and_dict()
aeval = Interpreter(usersyms={"src": ctxt_obj, "target": None})
aeval(slot_derivation.expr)
v = aeval.symtable["target"]
Expand Down
26 changes: 17 additions & 9 deletions src/linkml_map/utils/eval_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

import ast
import operator as op
from collections.abc import Mapping

# supported operators
from typing import Any, List, Tuple
Expand Down Expand Up @@ -52,6 +53,21 @@ class UnsetValueError(Exception):
pass


def eval_expr_with_mapping(expr: str, mapping: Mapping) -> Any:
"""
Evaluates a given expression, with restricted syntax.
This function is equivalent to eval_expr where **kwargs has been switched to a Mapping (eg. a dictionary).
See eval_expr for details.
"""
if expr == "None":
# TODO: do this as part of parsing
return None
try:
return eval_(ast.parse(expr, mode="eval").body, mapping)
except UnsetValueError:
return None


def eval_expr(expr: str, **kwargs) -> Any:
"""
Evaluates a given expression, with restricted syntax
Expand Down Expand Up @@ -94,15 +110,7 @@ def eval_expr(expr: str, **kwargs) -> Any:
:param expr: expression to evaluate
"""
# if kwargs:
# expr = expr.format(**kwargs)
if expr == "None":
# TODO: do this as part of parsing
return None
try:
return eval_(ast.parse(expr, mode="eval").body, kwargs)
except UnsetValueError:
return None
return eval_expr_with_mapping(expr, kwargs)


def eval_(node, bindings=None):
Expand Down

0 comments on commit 2fe7d8e

Please sign in to comment.