Skip to content

Commit

Permalink
Now stub_to_print checks the validity of the generated pdf. This allo…
Browse files Browse the repository at this point in the history
…ws early detection of problems where the scan contains image formats that pypdf2 doesn't handle.

work related to #8
  • Loading branch information
g-raffy committed Sep 1, 2024
1 parent 044e4e4 commit 3685d4a
Show file tree
Hide file tree
Showing 4 changed files with 196 additions and 87 deletions.
4 changes: 3 additions & 1 deletion src/pymusco/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,17 +10,19 @@
from .core import TableOfContents
from .core import InstrumentNotFound
from .core import ITrackSelector
from .main import images_to_pdf
from .main import scan_to_stub
from .main import stub_to_print
from .main import split_double_pages
from .main import crop_pdf
from .main import merge_pdf
from .main import remove_unneeded_pdf_password
from .main import StampDesc
from .main import StubContents
# from .tesseract import extract_pdf_text
from .tsauto import load_musician_count
from .tsauto import AutoTrackSelector
from .tssingle import SingleTrackSelector
from .tsmanual import ManualTrackSelector
from .pdf import check_pdf, add_stamp, add_bookmarks
from .pdf import check_pdf, check_pdf_reader, check_pdf_page, dump_pdf_page, add_stamp, add_bookmarks
from .piece import Piece, Catalog, load_piece_description
2 changes: 1 addition & 1 deletion src/pymusco/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -576,7 +576,7 @@ def get_pdf_toc_item_page(pdf_toc_item: Dict[str, Any], pdf_reader: PyPDF2.PdfRe

# at this point, linked_page_indirect_object is of type PyPDF2.generic.IndirectObject, with a value such as:
# IndirectObject(228, 0)
print(dir(pdf_reader))
# print(dir(pdf_reader))
linked_page_object = pdf_reader.resolved_objects[(0, linked_page_indirect_object.idnum)]
# at this point, linked_page_object is of type PyPDF2.generic.DictionaryObject with a value such as :
# {
Expand Down
10 changes: 7 additions & 3 deletions src/pymusco/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from .pdf import extract_pdf_page_main_image
from .pdf import extract_pdf_page
from .core import get_stub_tracks
from .pdf import check_pdf
from .pdf import check_pdf, check_pdf_reader


def md5(fname):
Expand Down Expand Up @@ -359,6 +359,7 @@ def scan_to_stub(src_scanned_pdf_file_path: Path, dst_stub_pdf_file_path: Path,
# break
stamp_descs = stamp_descs if stamp_descs is not None else []
images_to_pdf(StubContents(image_file_paths=scanned_image_file_paths, toc=toc, title=title, stamp_descs=stamp_descs, page_info_line_y_pos=page_info_line_y_pos), dst_stub_pdf_file_path)
check_pdf(dst_stub_pdf_file_path) # ensure that the generated pdf is supported by pypdf2


def stub_to_print(src_stub_file_path: Path, dst_print_file_path: Path, track_selector: ITrackSelector, orchestra: Orchestra):
Expand All @@ -370,6 +371,9 @@ def stub_to_print(src_stub_file_path: Path, dst_print_file_path: Path, track_sel
:param dict(str, int) musician_count: gets the number of musicians for each musical intrument family
:param TableOfContents or None stub_toc: if defined, gets the start page number for each track in the stub
"""

check_pdf(src_stub_file_path)

stub_toc = get_stub_tracks(src_stub_file_path, orchestra)
print(stub_toc)

Expand Down Expand Up @@ -420,11 +424,11 @@ def stub_to_print(src_stub_file_path: Path, dst_print_file_path: Path, track_sel
(first_page_index, last_page_index) = page_range
num_copies = range_to_num_copies[page_range]
log_file.write(f"{num_copies} copies of {'/'.join(range_to_tracks[page_range])}\n")
# print(page_range, num_copies)
# print(page_range, num_copies, range_to_tracks[page_range])
for copy_index in range(num_copies): # @UnusedVariable pylint: disable=unused-variable
for page_index in range(first_page_index, last_page_index + 1):
track_page = stub_pdf.pages[page_index - 1] # -1 to convert 1-based index into 0-based index
# print('adding page %d' % page_index)
# print(f'adding page {page_index}')
print_pdf.add_page(track_page)

log_file.write("\nunprinted tracks :\n\n")
Expand Down
Loading

0 comments on commit 3685d4a

Please sign in to comment.