Skip to content

Commit

Permalink
data transform with mutability
Browse files Browse the repository at this point in the history
  • Loading branch information
sbilge committed Nov 26, 2024
1 parent f8bf192 commit 1174277
Show file tree
Hide file tree
Showing 6 changed files with 37 additions and 54 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
from metldata.builtin_transformations.add_content_properties.path import (
resolve_data_object_path,
)
from metldata.builtin_transformations.common.utils import thaw_frozen_dict
from metldata.builtin_transformations.common.utils import data_to_dict
from metldata.transform.exceptions import EvitableTransformationError


Expand All @@ -45,23 +45,17 @@ def add_properties(
Returns:
The data with the specified content properties being added.
"""
updated_classes: dict = {}

modified_data = data_to_dict(data)
for class_name, instructions in instructions_by_class.items():
class_resources = data.resources.get(class_name)
class_resources = modified_data["resources"].get(class_name)

if not class_resources:
raise EvitableTransformationError()

# convert to a mutable dict to modify it
# note that, it does not apply mutability to inner layer Resource objects
mutable_class_resources = thaw_frozen_dict(class_resources)

for resource_id, resource in class_resources.items():
resource_content = thaw_frozen_dict(resource.content)
for resource in class_resources.values():
for instruction in instructions:
object = resolve_data_object_path(
data=resource_content,
data=resource.get("content", {}),
path=instruction.target_content.object_path,
)

Expand All @@ -74,12 +68,4 @@ def add_properties(
object[instruction.target_content.property_name] = deepcopy(
instruction.value
)
# resource_content changed
mutable_class_resources[resource_id] = resource.model_copy(
update={"content": resource_content})
#class resources changed
updated_classes[class_name]= class_resources.update(mutable_class_resources)
# resources changed
updated_resources = data.resources.update(updated_classes)
modified_data = data.model_copy(update={"resources": updated_resources})
return modified_data
return DataPack.model_validate(modified_data)
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,7 @@
)
from metldata.builtin_transformations.common.model_transform import update_model
from metldata.builtin_transformations.common.utils import (
model_to_dict,
thaw_frozen_dict,
thaw_content,
)
from metldata.transform.exceptions import EvitableTransformationError

Expand All @@ -47,7 +46,7 @@ def add_content_properties(
if not class_def:
raise EvitableTransformationError()

content_schema = thaw_frozen_dict(class_def.content)
content_schema = thaw_content(class_def.content)
for cls_instruction in cls_instructions:
object_path = cls_instruction.target_content.object_path
property_name = cls_instruction.target_content.property_name
Expand Down
19 changes: 15 additions & 4 deletions src/metldata/builtin_transformations/common/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,9 @@
from copy import deepcopy
from typing import Any

from schemapack import dumps_schemapack
from schemapack import dumps_datapack, dumps_schemapack
from schemapack._internals.spec.datapack import Resource
from schemapack.spec.datapack import DataPack
from schemapack.spec.schemapack import SchemaPack


Expand All @@ -34,13 +36,22 @@ def model_to_dict(
return json.loads(dumps_schemapack(deepcopy(model), yaml_format=False))


def thaw_frozen_dict(frozen_dict: Mapping ) -> dict:
def data_to_dict(data:DataPack):
"""Converts the provided DataPack data to a JSON-serializable dictionary.
Returns:
A dictionary representation of the provided data.
"""
return json.loads(dumps_datapack(deepcopy(data), yaml_format=False))


def thaw_content(frozen_dict: Mapping ) -> dict:
"""Recursively convert a nested FrozenDict, frozenset to mutable types.
This will be removed after we implement a FrozenDict validation to Schemapack lib.
"""
if isinstance(frozen_dict, Mapping):
return {key: thaw_frozen_dict(value) for key, value in frozen_dict.items()}
return {key: thaw_content(value) for key, value in frozen_dict.items()}
elif isinstance(frozen_dict, tuple):
return [thaw_frozen_dict(item) for item in frozen_dict]
return [thaw_content(item) for item in frozen_dict]
return frozen_dict

Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

from schemapack.spec.datapack import DataPack

from metldata.builtin_transformations.common.utils import thaw_frozen_dict
from metldata.builtin_transformations.common.utils import data_to_dict
from metldata.transform.exceptions import EvitableTransformationError


Expand All @@ -36,27 +36,16 @@ def delete_properties(
Returns:
The data with the specified content properties being deleted.
"""
updated_classes: dict = {}
modified_data = data_to_dict(data)

for class_name, properties in properties_by_class.items():
class_resources = data.resources.get(class_name)
class_resources = modified_data["resources"].get(class_name)

if not class_resources:
raise EvitableTransformationError()

# convert to a mutable dict to modify it
# note that, it does not apply mutability to inner layer Resource objects
mutable_class_resources = thaw_frozen_dict(class_resources)

for resource_id, resource in class_resources.items():
# convert to a mutable dict to modify it in place
resource_content = thaw_frozen_dict(resource.content)

for resource in class_resources.values():
for property in properties:
resource_content.pop(property, None)
mutable_class_resources[resource_id] = resource.model_copy(
update={"content": resource_content}
)
updated_classes[class_name]= class_resources.update(mutable_class_resources)
updated_resources = data.resources.update(updated_classes)
modified_data = data.model_copy(update={"resources": updated_resources})
return modified_data
resource["content"].pop(property, None)

return DataPack.model_validate(modified_data)
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,9 @@
SchemaPack,
)

from metldata.builtin_transformations.common.model_transform import update_model
from metldata.builtin_transformations.common.utils import (
model_to_dict,
thaw_frozen_dict,
thaw_content,
)
from metldata.transform.exceptions import EvitableTransformationError

Expand All @@ -51,7 +51,7 @@ def delete_properties(
if not class_def:
raise EvitableTransformationError()

content_schema = thaw_frozen_dict(class_def.content)
content_schema = thaw_content(class_def.content)

for property in properties:
if "properties" not in content_schema:
Expand All @@ -67,6 +67,4 @@ def delete_properties(
{**class_def.model_dump(), "content": content_schema}
)

model_dict = model_to_dict(model)
model_dict["classes"].update(updated_class_defs)
return SchemaPack.model_validate(model_dict)
return update_model(model=model, updated_class_defs=updated_class_defs)
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
)
from metldata.builtin_transformations.common.utils import (
model_to_dict,
thaw_frozen_dict,
thaw_content,
)
from metldata.builtin_transformations.infer_relations.relations import (
InferenceInstruction,
Expand Down Expand Up @@ -155,7 +155,7 @@ def add_inferred_relations(
updated_class_defs[instruction.source] = ClassDefinition.model_validate(
{
"id": class_def.id,
"content": thaw_frozen_dict(class_def.content),
"content": thaw_content(class_def.content),
"relations": {
**class_def.relations,
instruction.new_property: new_relation,
Expand Down

0 comments on commit 1174277

Please sign in to comment.