Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix translation serializer #112

Merged
merged 7 commits into from
Jan 3, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from openpecha.utils import get_text_direction_with_lang, write_json


class SimpleTextTranslationSerializer(BaseAlignmentSerializer):
class TextTranslationSerializer(BaseAlignmentSerializer):
def __init__(self):
self.root_json: Dict[str, List] = {
"categories": [],
Expand Down Expand Up @@ -108,7 +108,7 @@ def set_root_content(self):
segments = self.get_texts_from_layer(segment_layer)
self.root_json["books"][0]["content"] = [segments]

def set_translation_content(self):
def set_translation_content(self, is_pecha_display: bool):
"""
Processes:
1. Get the first txt file from root and translation opf
Expand All @@ -131,13 +131,19 @@ def set_translation_content(self):
ann_data = {}
for data in ann:
ann_data[str(data.key().id())] = data.value().get()
if "alignment_mapping" in ann_data:
for map in ann_data["alignment_mapping"]:
root_map = map[0]
if root_map in segments:
segments[root_map].append(str(ann))
else:
segments[root_map] = [str(ann)]

if is_pecha_display:
if "alignment_mapping" in ann_data:
for map in ann_data["alignment_mapping"]:
root_map = map[0]
if root_map in segments:
segments[root_map].append(str(ann))
else:
segments[root_map] = [str(ann)]
else:
if "root_idx_mapping" in ann_data:
root_map = int(ann_data["root_idx_mapping"])
segments[root_map] = [str(ann)]

max_root_idx = max(segments.keys())
translation_segments = []
Expand All @@ -149,15 +155,15 @@ def set_translation_content(self):

self.translation_json["books"][0]["content"] = [translation_segments]

def get_pecha_display_aligment(self):
def get_root_and_translation_layer(self):
"""
Get the root layer and translation layer to serialize the layer(STAM) to JSON
1.First it checks if the 'pecha_display_segment_alignments' contains in the metadata (from translation opf)
2.Select the first meaning segment layer found in each of the opf
"""
assert isinstance(
self.translation_opf_path, Path
), "Translation opf path is not set for 'get_pecha_display_aligment'"
), "Translation opf path is not set for 'get_root_and_translation_layer'"
pecha = Pecha.from_path(self.translation_opf_path)
if "pecha_display_segment_alignments" in pecha.metadata.source_metadata:
pecha_display_alignment = pecha.metadata.source_metadata[
Expand All @@ -171,9 +177,18 @@ def get_pecha_display_aligment(self):
self.translation_opf_path, Path
), "Translation opf path is not set"

root_layer_path = next(self.root_opf_path.rglob("*.json")).as_posix()
root_jsons = list(self.root_opf_path.rglob("*.json"))
root_layer_path = next(
root_json
for root_json in root_jsons
if root_json.name != "metadata.json"
).as_posix()

translation_jsons = list(self.translation_opf_path.rglob("*.json"))
translation_layer_path = next(
self.translation_opf_path.rglob("*.json")
translation_json
for translation_json in translation_jsons
if translation_json.name != "metadata.json"
).as_posix()

self.root_basename = root_layer_path.split("/")[-2]
Expand All @@ -196,6 +211,7 @@ def serialize(
root_opf_path: Path,
translation_opf_path: Path,
output_path: Path = SERIALIZED_ALIGNMENT_JSON_PATH,
is_pecha_display: bool = True,
) -> Path:

self.root_opf_path = root_opf_path
Expand All @@ -210,11 +226,11 @@ def serialize(
self.set_pecha_category(pecha_title)

# Get the root and translation layer to serialize the layer(STAM) to JSON
self.get_pecha_display_aligment()
self.get_root_and_translation_layer()

# Set the content for source and target and set it to JSON
self.set_root_content()
self.set_translation_content()
self.set_translation_content(is_pecha_display)

# Write the JSON to the output path
json_output_path = output_path / "alignment.json"
Expand Down
Loading
Loading