diff --git a/meta/bindings/python/papermuncher.py b/meta/bindings/python/papermuncher.py new file mode 100644 index 0000000..07b688f --- /dev/null +++ b/meta/bindings/python/papermuncher.py @@ -0,0 +1,141 @@ +import dataclasses as dc +from email.message import Message +from pathlib import Path +from email.parser import BytesParser +import subprocess +import tempfile +from typing import IO +import magic + + +class Loader: + def handleRequest( + self, url: str, headers: dict[str, str] + ) -> tuple[int, dict[str, str], bytes]: + return ( + 404, + { + "mime": "text/html", + }, + b"
404 Not Found", + ) + + +@dc.dataclass +class StaticDir(Loader): + _path: Path + + def __init__(self, path: Path): + self._path = path + + def handleRequest( + self, url: str, headers: dict[str, str] + ) -> tuple[int, dict[str, str], bytes]: + path = self._path / url + if not path.exists(): + return ( + 404, + { + "mime": "text/html", + }, + b"404 Not Found", + ) + with open(path, "rb") as f: + return ( + 200, + { + "mime": magic.Magic(mime=True).from_file(path), + }, + f.read(), + ) + + +def _run( + args: list[str], + loader=Loader(), +) -> bytes: + def _readRequest(fd: IO) -> Message[str, str] | None: + # Read the request header from the file descriptor + parser = BytesParser() + return parser.parse(fd) + + def _sendResponse(fd: IO, status: int, headers: dict[str, str], body: bytes): + fd.write(f"HTTP/2 {status}\r\n".encode()) + for key, value in headers.items(): + fd.write(f"{key}: {value}\r\n".encode()) + fd.write(b"\r\n") + fd.write(body) + + with subprocess.Popen( + args, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) as proc: + stdout = proc.stdout + if stdout is None: + raise ValueError("stdout is None") + + stderr = proc.stderr + if stderr is None: + raise ValueError("stderr is None") + + stdin = proc.stdin + if stdin is None: + raise ValueError("stdin is None") + + while True: + request = _readRequest(stdout) + if request is None: + raise ValueError("request is None") + + if request.preamble is None: + raise ValueError("request.preamble is None") + + preamble = request.preamble.split(" ") + if preamble[0] == b"GET": + _sendResponse(stdin, *loader.handleRequest(preamble[1], dict(request))) + elif preamble[0] == b"POST": + payload = request.get_payload() + if not isinstance(payload, bytes): + raise ValueError("payload is not bytes") + proc.terminate() + return payload + else: + raise ValueError("Invalid request") + + +def find() -> Path: + return Path(__file__).parent / "bin" + + +def print( + document: bytes | str | Path, + mime: str = "text/html", + loader: Loader = StaticDir(Path.cwd()), + bin: Path = find(), + **kwargs: str, +) -> bytes: + extraArgs = [] + for key, value in kwargs.items(): + extraArgs.append(f"--{key}") + extraArgs.append(str(value)) + + if isinstance(document, Path): + return _run( + [str(bin), "print", "-i", str(document), "-o", "out.pdf"] + extraArgs, + loader, + ) + else: + with tempfile.NamedTemporaryFile(delete=False) as f: + if isinstance(document, str): + document = document.encode() + f.write(document) + return _run( + [str(bin), "print", "-i", f.name, "-o", "out.pdf"] + extraArgs, + loader, + ) + return b"" + + +__all__ = ["Loader", "StaticDir", "print"] diff --git a/meta/bindings/python/sample.py b/meta/bindings/python/sample.py new file mode 100644 index 0000000..e1a59c4 --- /dev/null +++ b/meta/bindings/python/sample.py @@ -0,0 +1,12 @@ +import papermuncher + +with open("out.pdf", "wb") as f: + document = """ +Hello, world!
+ """ + f.write( + papermuncher.print( + document, + paper="a4", + ) + ) diff --git a/meta/site/protocol.md b/meta/site/protocol.md new file mode 100644 index 0000000..d97e64c --- /dev/null +++ b/meta/site/protocol.md @@ -0,0 +1,130 @@ +**HTTPipe Specification** + +This document describes the HTTPipe mode of PaperMuncher and the corresponding wire protocol interactions. It outlines how to start PaperMuncher in HTTPipe mode, as well as the format and flow of requests, responses, and result submissions. + +--- + +## 1. Overview + +HTTPipe mode allows PaperMuncher to fetch content, process it, and submit the processed result back via HTTP. The primary use case is converting web pages or other retrievable resources into PDF documents, though the mechanism can be extended for other transformations. + +--- + +## 2. Starting PaperMuncher in HTTPipe Mode + +To start PaperMuncher in HTTPipe mode, use the following command: + +```bash +paper-muncher print -o