From e8829eb849c0037bcbf560d6824a32cd4e924e9f Mon Sep 17 00:00:00 2001 From: FFengIll Date: Sat, 11 Mar 2023 12:46:05 +0800 Subject: [PATCH] feat/error_check: add path arguments check and simplify path process. (#15) * feat: add exit 1 for exception in cli. * feat: propagate exception. * test: add shell test for cli. * feat: set default folder to `.` (current). * refactor: better log message. * refactor: simplify path process and check if file exists. * chore: add gitpod extension. * chore: add gitpod extension. --- .gitpod.yml | 4 ++-- cli.py | 15 +++++++++------ pdf_white_cut/analyzer.py | 5 ++--- pdf_white_cut/cutter.py | 40 ++++++++++++++++++++------------------- pdf_white_cut/parser.py | 4 ++-- tests/test_cli.sh | 4 ++++ 6 files changed, 40 insertions(+), 32 deletions(-) create mode 100644 tests/test_cli.sh diff --git a/.gitpod.yml b/.gitpod.yml index b8af527..c2f5233 100644 --- a/.gitpod.yml +++ b/.gitpod.yml @@ -8,6 +8,6 @@ tasks: vscode: extensions: - ms-python.python - - cweijan.vscode-office - + - ms-python.black-formatter + - eamodio.gitlens diff --git a/cli.py b/cli.py index 9a780c3..3a487d9 100644 --- a/cli.py +++ b/cli.py @@ -11,9 +11,12 @@ logger.remove() logger.add(sys.stderr, level="DEBUG") - if args.input and args.output: - edit_pdf(args.input, args.output, args.ignore) - elif args.indir and args.outdir: - batch_edit_pdf(args.indir, args.outdir, args.ignore) - else: - get_parser().print_help() + try: + if args.input and args.output: + edit_pdf(args.input, args.output, args.ignore) + elif args.indir and args.outdir: + batch_edit_pdf(args.indir, args.outdir, args.ignore) + else: + get_parser().print_help() + except: + exit(1) diff --git a/pdf_white_cut/analyzer.py b/pdf_white_cut/analyzer.py index 89b87ba..86ab3d3 100644 --- a/pdf_white_cut/analyzer.py +++ b/pdf_white_cut/analyzer.py @@ -61,7 +61,7 @@ def extract_item_box(item): elif isinstance(item, LTCurve): logger.debug("use itself: {}", item) elif isinstance(item, LTTextBox): - logger.warning("NotImplemented and use itself: {}", item) + logger.warning("use itself since NotImplemented: {}", item) elif isinstance(item, LTTextLine): # there is 2 types of `LTTextLine`: horizontal and vertical text = item.get_text().encode("unicode_escape") @@ -91,7 +91,7 @@ def extract_item_box(item): bbox[3] + item.height / 2, ) elif isinstance(item, LTImage): - logger.warning("NotImplemented and use itself: {}", item) + logger.warning("use itself since NotImplemented: {}", item) elif isinstance(item, LTFigure): logger.debug("analyse LTFigure:{}", item) # for `LTFigure`, the bbox is modified in `PDFMiner` @@ -160,7 +160,6 @@ def extract_pdf_boxs(filename, ignore=0): boxs.append(box) max_box = get_max_box(boxs) - logger.warning("visible bbox: {}", max_box) page_boxs.append(max_box) logger.warning("max visible bbox for the page: {}", max_box) diff --git a/pdf_white_cut/cutter.py b/pdf_white_cut/cutter.py index 9a87529..7c818df 100644 --- a/pdf_white_cut/cutter.py +++ b/pdf_white_cut/cutter.py @@ -37,14 +37,21 @@ def edit_page_box(page, visible_box): logger.info("cut media box to: {}", box) -def edit_pdf(source: str, target: str, ignore=0): +def edit_pdf(source: Path, target: Path, ignore=0): """ edit to cut the white slide of the input pdf file, and output a new pdf file. """ + # guard type + source = Path(source) + target = Path(target) + if source == target: logger.error("{} {}", source, target) raise Exception("input and output can not be the same!") + if not source.exists(): + raise Exception("input file not exists! ({})".format(source)) + try: # MENTION: never move and change the sequence, since file IO. # analyses the visible box of each page, aka the box scale. res=[(x1,y1,x2,y2)] @@ -66,39 +73,34 @@ def edit_pdf(source: str, target: str, ignore=0): edit_page_box(page, box) outpdf.add_page(page) - Path(target).dirname().makedirs_p() + logger.info("output to {}", Path(target)) + target.abspath().dirname().makedirs_p() with open(target, "wb") as outfd: outpdf.write(outfd) - logger.info("output file: {}", target) except UnicodeEncodeError as ue: logger.exception("UnicodeEncodeError while processing file:{}", source) logger.exception(ue) + raise ue except Exception as e: - logger.exception("Some other Error while processing file:{}", source) + logger.exception("Some unknown Error while processing file:{}", source) logger.exception(e) + raise e -def scan_files(folder, glob=""): - """ - scan files under the dir with spec prefix and postfix - """ - files = [] - for item in Path(folder).listdir(glob): - item: "Path" - files.append(item.basename()) - return files - +def batch_edit_pdf(indir: Path, outdir: Path, ignore=0): + # guard type + indir = Path(indir) + outdir = Path(outdir) -def batch_edit_pdf(indir, outdir, ignore=0): if indir == outdir: raise Exception("input and output can not be the same!") - files = scan_files(indir, glob="*.pdf") - logger.info(files) + files = [pdf.basename() for pdf in indir.listdir("*.pdf")] + logger.info("pdf files in spec folder: {}", files) - if not os.path.exists(indir): - os.mkdir(indir) + # guard dir + outdir.makedirs_p() logger.info("input dir: {}", indir) logger.info("output dir: {}", outdir) diff --git a/pdf_white_cut/parser.py b/pdf_white_cut/parser.py index 36c0fb7..6fa05f2 100644 --- a/pdf_white_cut/parser.py +++ b/pdf_white_cut/parser.py @@ -19,7 +19,7 @@ def get_parser(): "-id", help="input directory", action="store", - default="", + default=".", type=str, dest="indir", ) @@ -27,7 +27,7 @@ def get_parser(): "-od", help="output directory", action="store", - default="", + default=".", type=str, dest="outdir", ) diff --git a/tests/test_cli.sh b/tests/test_cli.sh new file mode 100644 index 0000000..922baf2 --- /dev/null +++ b/tests/test_cli.sh @@ -0,0 +1,4 @@ +python cli.py -i cases/input/input.pdf -o output/output.pdf > /dev/null 2>&1 +echo $? "== 0" +python cli.py -i cases/input/non_exist.pdf -o output/output.pdf > /dev/null 2>&1 +echo $? "== 1"