From ab279fe41a51783458d9e17290eec2e886c48763 Mon Sep 17 00:00:00 2001 From: scivision Date: Wed, 13 Dec 2023 10:01:17 -0500 Subject: [PATCH] doc standalone --- README.md | 18 ++++++++++++++---- doc2pdf.py | 42 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+), 4 deletions(-) create mode 100644 doc2pdf.py diff --git a/README.md b/README.md index ab9ec66..90bda1c 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,10 @@ # Headless LibreOffice / Microsoft Office [![Actions Status](https://github.com/scivision/office-headless/workflows/ci/badge.svg)](https://github.com/scivision/office-headless/actions) -[![PyPi versions](https://img.shields.io/pypi/pyversions/loutils.svg)](https://pypi.python.org/pypi/loutils) -[![PyPi Download stats](http://pepy.tech/badge/loutils)](http://pepy.tech/project/loutils) +[![PyPI versions](https://img.shields.io/pypi/pyversions/loutils.svg)](https://pypi.python.org/pypi/loutils) +[![PyPI Download stats](http://pepy.tech/badge/loutils)](http://pepy.tech/project/loutils) -Headless (command line) operations by LibreOffice or Microsoft Office on Word, Excel, Powerpoint and most other +Headless (command line) operations by LibreOffice or Microsoft Office on Word, Excel, PowerPoint and most other [formats LibreOffice can handle](https://en.wikipedia.org/wiki/LibreOffice#Supported_file_formats) for: @@ -13,6 +13,16 @@ for: from Python command line using LibreOffice or Microsoft Word +## standalone single file document to PDF + +For reuse in other programs and projects, we made a +[separate standalone script doc2pdf.py](./doc2pdf.py) +to convert any document that LibreOffice can handle to PDF. + +```sh +python doc2pdf.py ~/mydoc.docx +``` + ## .doc / .docx to PDF conversion Convert a directory of .doc / .docx to .pdf by: @@ -40,5 +50,5 @@ For example to print all Markdown files in a directory with Notepad++: python -m loutils.doc2print ~/mydocs -s .md -exe notepad++ ``` -LibreOffice 7.2 finally fixed file globbing, but we use explict for-looping to work with older LibreOffice. +LibreOffice 7.2 finally fixed file globbing, but we use explicit for-looping to work with older LibreOffice. LibreOffice is not thread-safe, so documents are converted or printed one at a time. diff --git a/doc2pdf.py b/doc2pdf.py new file mode 100644 index 0000000..00f290c --- /dev/null +++ b/doc2pdf.py @@ -0,0 +1,42 @@ +#!/usr/bin/env python3 +""" +Single file standalone script to convert a text/document file to a PDF file using LibreOiffice. +""" + +import subprocess +import shutil +import os +import sys +from pathlib import Path +import argparse + + +def get_lo_exe() -> str: + name = "soffice" + + if os.name == "nt": + path = Path(os.environ["PROGRAMFILES"]) / "LibreOffice/program" + elif sys.platform == "darwin": + path = Path("/Applications/LibreOffice.app/Contents/MacOS") + else: + path = None + + if not (exe := shutil.which(name, path=path)): + raise FileNotFoundError("LibreOffice not found") + + return exe + + +if __name__ == "__main__": + p = argparse.ArgumentParser(description="Convert a document file to PDF using LibreOffice") + p.add_argument("filein", help="Input file") + p.add_argument("out_dir", help="Output directory", nargs="?") + args = p.parse_args() + + filein = Path(args.filein).expanduser().resolve() + + out_dir = Path(args.out_dir).expanduser().resolve() if args.out_dir else filein.parent + + cmd = [get_lo_exe(), "--convert-to", "pdf", "--outdir", str(out_dir), str(filein)] + + subprocess.check_call(cmd)