Skip to content

Commit

Permalink
Merge pull request #59 from Esukhia/fix-preview
Browse files Browse the repository at this point in the history
fix(preview):
  • Loading branch information
kaldan007 authored Dec 23, 2021
2 parents 6bba949 + 455116f commit 624f9e6
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 33 deletions.
32 changes: 23 additions & 9 deletions pedurma/reconstruction.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,8 @@
from docx import Document

from pedurma.exceptions import PageNumMissing
from pedurma.pecha import NotesPage
from pedurma.preprocess import preprocess_google_notes, preprocess_namsel_notes
from pedurma.preview_note_layer import update_hybird_pecha_note_layer
from pedurma.text_report import get_text_report
from pedurma.texts import (
get_body_text_from_last_page,
get_page_ann,
Expand Down Expand Up @@ -1036,7 +1035,7 @@ def pecha_path_2_id(pecha_path):
return pecha_path_obj.stem


def get_preview_text(text_id, pecha_paths=None):
def get_reconstructed_text(text_id, pecha_paths=None):
if pecha_paths is None:
pecha_paths = get_pecha_paths(text_id)
pedurmatext = get_pedurma_text_obj(text_id, pecha_paths)
Expand All @@ -1063,9 +1062,6 @@ def get_preview_text(text_id, pecha_paths=None):
cur_vol_preview = get_vol_preview(
dg_body, namsel_body, dg_note_text, namsel_note_text, vol_num
)
update_hybird_pecha_note_layer(
cur_vol_preview, pecha_paths["google"], int(vol_num)
)
preview_text[f"v{int(vol_num):03}"] = cur_vol_preview
dg_body = ""
namsel_body = ""
Expand Down Expand Up @@ -1101,16 +1097,34 @@ def create_docx(text_id, chunks, path):
return output_path


def get_docx_text(text_id, pecha_paths=None, output_path=None):
def get_docx_text(text_id, preview_text, output_path=None):
if not output_path:
(Path.home() / ".collation_docx").mkdir(parents=True, exist_ok=True)
output_path = Path.home() / ".collation_docx"
collation_text = ""
preview_text, google_pecha_id = get_preview_text(text_id, pecha_paths)
for vol_id, text in preview_text.items():
collation_text += f"{text}\n\n"
collation_text = collation_text.replace("\n", "")
collation_text = re.sub(r"(༺.+?༻)", r"\n\g<1>\n", collation_text)
collation_text = re.sub(r"(\d+-\d+)", r"\n\g<1>\n", collation_text)
chunks = split_text(collation_text)
docx_path = create_docx(text_id, chunks, output_path)
return docx_path


def get_preview_text(text_id, docx_output_path, pecha_paths=None):
preview_text_info = {
"preview_text": None,
"google_pecha_id": None,
"docx_output_path": None,
"text_report": None,
}
preview_text, google_pecha_id = get_reconstructed_text(text_id, pecha_paths)
preview_text_info["preview_text"] = preview_text
preview_text_info["google_pecha_id"] = google_pecha_id
preview_text_info["docx_output_path"] = get_docx_text(
text_id, preview_text, docx_output_path
)
preview_text_info["text_report"] = get_text_report(
text_id, pecha_paths, preview_text
)
return preview_text_info
4 changes: 4 additions & 0 deletions pedurma/text_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

from openpecha.utils import load_yaml

from pedurma.texts import get_pecha_paths


def get_metadata(pecha_path):
pecha_id = Path(pecha_path).stem
Expand Down Expand Up @@ -39,6 +41,8 @@ def get_text_report(text_id, pecha_paths, preview_text):
"total_number_of_footnotes": None,
"download_date": None,
}
if pecha_paths is None:
pecha_paths = get_pecha_paths(text_id)
text_report["title"] = get_text_title(pecha_paths["google"])
number_of_pages = 0
number_of_footnotes = 0
Expand Down
32 changes: 8 additions & 24 deletions tests/preview/test_get_preview.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,11 @@

from pedurma.exceptions import PageNumMissing
from pedurma.pecha import NotesPage, Page
from pedurma.reconstruction import get_docx_text, get_preview_page, get_preview_text
from pedurma.reconstruction import (
get_docx_text,
get_preview_page,
get_reconstructed_text,
)
from pedurma.utils import from_yaml


Expand All @@ -14,16 +18,7 @@ def get_dummy_preview():
namsel_pecha_path = str(Path(__file__).parent / "data" / "P973")
text_id = "D1119"
pecha_paths = {"namsel": namsel_pecha_path, "google": dg_pecha_path}
preview_text = get_preview_text(text_id, pecha_paths)
(
Path(__file__).parent
/ "data"
/ "P972"
/ "P972.opf"
/ "layers"
/ "v001"
/ "PedurmaNote.yml"
).unlink()
preview_text = get_reconstructed_text(text_id, pecha_paths)
return preview_text


Expand Down Expand Up @@ -151,19 +146,8 @@ def test_get_preview_text():
def test_get_docx_text():
text_id = "D1119"
output_path = Path.home()
dg_pecha_path = str(Path(__file__).parent / "data" / "P972")
namsel_pecha_path = str(Path(__file__).parent / "data" / "P973")
pecha_paths = {"namsel": namsel_pecha_path, "google": dg_pecha_path}
docx_path = get_docx_text(text_id, pecha_paths, output_path)
preview_text, google_pecha_id = get_dummy_preview()
docx_path = get_docx_text(text_id, preview_text, output_path)
expected_path = Path.home() / "D1119.docx"
assert docx_path == expected_path
expected_path.unlink()
(
Path(__file__).parent
/ "data"
/ "P972"
/ "P972.opf"
/ "layers"
/ "v001"
/ "PedurmaNote.yml"
).unlink()

0 comments on commit 624f9e6

Please sign in to comment.