Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Suggestions for the count content values transformation #83

Merged
merged 2 commits into from
Nov 8, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -36,106 +36,102 @@
)


class TransformationContext:
"""Context for a data transformation process, wrapping the Datapack to be
transformed and transformation instructions per class.
"""

def __init__(
self,
data: DataPack,
instructions_by_class: dict[str, list[CountContentValueInstruction]],
):
self.data = data.model_copy(deep=True)
self.instructions_by_class = instructions_by_class

def get_class_resources(self, class_name: str):
"""Extract the resources of a given class from a Datapack."""
resources = self.data.resources.get(class_name)
if not resources:
raise EvitableTransformationError()
return resources
def get_class_resources(*, data: DataPack, class_name: str):
"""Extract the resources of a given class from a Datapack."""
resources = data.resources.get(class_name)
if not resources:
raise EvitableTransformationError()
return resources


def count_content(
*,
data: DataPack,
instructions_by_class: dict[str, list[CountContentValueInstruction]],
) -> DataPack:
"""Transforms the data."""
context = TransformationContext(data, instructions_by_class)
for class_name, instructions in context.instructions_by_class.items():
target_resources = context.get_class_resources(class_name)

for instruction in instructions:
relation_path = instruction.source.relation_path
referenced_class = get_directly_referenced_class(relation_path)

# Only one element is expected in the path
relation_name = relation_path.elements[0].property
# content_resources keeping the values to be counted are obtained from
# the class that is referenced by the transformation target class through
# relation_name.
content_resources = context.get_class_resources(referenced_class)

transform_resources(
target_resources=target_resources,
"""Apply all count content value transformation instructions."""
data = data.model_copy(deep=True)

for class_name, instructions in instructions_by_class.items():
transform_class(class_name=class_name, data=data, instructions=instructions)
return data


def transform_class(
*, class_name: str, data: DataPack, instructions: list[CountContentValueInstruction]
):
"""Apply the count content value transformations to the specified class."""
# the target prefix refers to resources that will be modified by the transformation
target_resources = get_class_resources(data=data, class_name=class_name)
for instruction in instructions:
relation_path = instruction.source.relation_path
referenced_class = get_directly_referenced_class(relation_path)

# Only one element is expected in the path, validated by `get_directly_referenced_class`
relation_name = relation_path.elements[0].property
# get resources for the class referenced by the relation path
referenced_resources = get_class_resources(
data=data, class_name=referenced_class
)

for target_resource in target_resources.values():
transform_resource(
referenced_resources=referenced_resources,
target_resource=target_resource,
relation_name=relation_name,
content_resources=content_resources,
instruction=instruction,
)

return context.data


def transform_resources(
def transform_resource(
*,
target_resources: dict[ResourceId, Resource],
referenced_resources: dict[ResourceId, Resource],
target_resource: Resource,
relation_name: str,
content_resources: dict[ResourceId, Resource],
instruction: CountContentValueInstruction,
):
"""Transform resources"""
for target_resource in target_resources.values():
target_content = target_resource.content
related_to = target_resource.relations.get(relation_name)
if not related_to:
raise EvitableTransformationError()
count_values = get_count_values(related_to, content_resources, instruction)

target_object = get_target_object(target_content, instruction)
target_property = instruction.target_content.property_name
target_object[target_property] = dict(Counter(count_values))


def get_count_values(
resource_relations: ResourceId | ResourceIdSet,
content_resources: dict[ResourceId, Resource],
instruction: CountContentValueInstruction,
"""Apply the count content value transformation to each resource of a class."""
target_content = target_resource.content
relation_target_ids = target_resource.relations.get(relation_name)
if not relation_target_ids:
raise EvitableTransformationError()

values_to_count = get_values_to_count(
relation_target_ids=relation_target_ids,
referenced_resources=referenced_resources,
content_path=instruction.source.content_path,
)
target_object = get_modification_target(
data=target_content, instruction=instruction
)
target_property = instruction.target_content.property_name
target_object[target_property] = dict(Counter(values_to_count))


def get_values_to_count(
*,
relation_target_ids: ResourceId | ResourceIdSet,
referenced_resources: dict[ResourceId, Resource],
content_path: str,
):
"""Get values to be counted."""
"""Get countable properties from all resources referred to by the relation."""
try:
content_values = [
content_resources[relation].content.get(instruction.source.content_path)
for relation in resource_relations
return [
referenced_resources[resource_id].content.get(content_path)
for resource_id in relation_target_ids
]
return content_values
except KeyError as exc:
raise EvitableTransformationError() from exc


def get_target_object(
content: dict[str, Any], instruction: CountContentValueInstruction
def get_modification_target(
*, data: dict[str, Any], instruction: CountContentValueInstruction
):
"""Get the json object that is to be modify."""
target_object = resolve_data_object_path(
data=content,
path=instruction.target_content.object_path,
)
if (
not isinstance(target_object, dict)
or instruction.target_content.property_name in target_object
):
raise EvitableTransformationError()
"""Get the json object that is to be modified."""
path = instruction.target_content.object_path
property = instruction.target_content.property_name

return target_object
target = resolve_data_object_path(data=data, path=path)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In add_content_properties -> data_transform.py transformation, the same fn call is assigned to the variable name object which is not super explanatory. It would be good to change that too.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But I can do it in the main PR.

if not isinstance(target, dict) or property in target:
raise EvitableTransformationError()
return target