diff --git a/pedurma/docx_serializer.py b/pedurma/docx_serializer.py index 5aba72d..faf48a2 100644 --- a/pedurma/docx_serializer.py +++ b/pedurma/docx_serializer.py @@ -11,7 +11,7 @@ def split_text(content): - chunks = re.split(r"(\(\d+\) <.*?>)", content) + chunks = re.split(r"(\(\d+\) <.+?>)", content) return chunks @@ -64,11 +64,19 @@ def parse_page(page, note_walker): return page_md, note_walker +def reformat_note_text(note_text): + pub_abv = {"«པེ་»": "P", "«སྣར་»": "N", "«ཅོ་»": "C", "«སྡེ་»": "D"} + for tib_abv, eng_abv in pub_abv.items(): + note_text = note_text.replace(tib_abv, f" {eng_abv} ") + return note_text + + def parse_note(collated_text): note_md = "\n" - notes = re.finditer(r"\((\d+)\) <(.*?)>", collated_text) + notes = re.finditer(r"\((\d+)\) <(.+?)>", collated_text) for note_walker, note in enumerate(notes, 1): - note_md += f"[^{note_walker}]: {note.group(2)}\n" + note_text = reformat_note_text(note.group(2)) + note_md += f"[^{note_walker}]: {note_text}\n" return note_md diff --git a/pedurma/pagination_update.py b/pedurma/pagination_update.py index e20411f..008591b 100644 --- a/pedurma/pagination_update.py +++ b/pedurma/pagination_update.py @@ -2,7 +2,7 @@ from openpecha.utils import download_pecha -from pedurma.texts import get_text_info +from pedurma.texts import get_pecha_paths, get_text_info from pedurma.utils import from_yaml, get_pecha_id, to_yaml @@ -139,7 +139,7 @@ def update_pagination(pecha_id, text_id, pedurma_edit_notes, index, pecha_path): for span in text_info["span"]: vol = span["vol"] pagination_layer = from_yaml( - Path(f"{pecha_path}/{pecha_id}.opf/layers/v{int(vol):03}/Pagination.yml") + (pecha_path / f"{pecha_id}.opf/layers/v{int(vol):03}/Pagination.yml") ) pagination_layer = update_pg_ref(vol, pedurma_edit_notes, pagination_layer) yield vol, pagination_layer @@ -152,13 +152,15 @@ def update_text_pagination(text_id, pedurma_edit_notes, text_mapping=None): text_id (str): text id pedurma_edit_notes (obj): pedurma edit notes obj """ - pecha_id = get_pecha_id(text_id, text_mapping) - pecha_path = download_pecha(pecha_id, needs_update=False) - index = from_yaml(Path(f"{pecha_path}/{pecha_id}.opf/index.yml")) - for vol, new_pagination in update_pagination( - pecha_id, text_id, pedurma_edit_notes, index, pecha_path - ): - new_pagination_yml = to_yaml(new_pagination) - Path( - f"{pecha_path}/{pecha_id}.opf/layers/v{int(vol):03}/Pagination.yml" - ).write_text(new_pagination_yml, encoding="utf-8") + pecha_paths = get_pecha_paths(text_id, text_mapping) + for pecha_type, pecha_path in pecha_paths.items(): + pecha_path = Path(pecha_path) + pecha_id = pecha_path.stem + index = from_yaml((pecha_path / f"{pecha_id}.opf/index.yml")) + for vol, new_pagination in update_pagination( + pecha_id, text_id, pedurma_edit_notes, index, pecha_path + ): + new_pagination_yml = to_yaml(new_pagination) + ( + pecha_path / f"{pecha_id}.opf/layers/v{int(vol):03}/Pagination.yml" + ).write_text(new_pagination_yml, encoding="utf-8") diff --git a/pedurma/reconstruction.py b/pedurma/reconstruction.py index c336881..3743dd3 100644 --- a/pedurma/reconstruction.py +++ b/pedurma/reconstruction.py @@ -862,7 +862,10 @@ def merge_footnotes_per_page(page, foot_notes): except Exception: note = "" marker_walker = get_tib_num(marker_walker) - repl2 = f"({marker_walker}) <{note}>" + if note: + repl2 = f"({marker_walker}) <{note}>" + else: + repl2 = "" if marker: preview_page = preview_page.replace(marker, repl2, 1) preview_page = re.sub("
", r"\n\g<1>", preview_page)