Skip to content

Commit

Permalink
removing metadata modification
Browse files Browse the repository at this point in the history
  • Loading branch information
samuelcolvin committed May 24, 2017
1 parent 93ee5c1 commit 9beb7f4
Show file tree
Hide file tree
Showing 6 changed files with 15 additions and 71 deletions.
4 changes: 4 additions & 0 deletions HISTORY.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@
History
-------

v0.33.0 (2017-05-24)
....................
* remove pdf metadata modification as it can break some pdf viewers

v0.32.0 (2017-05-24)
....................
* set ``cache_dir`` for ``generate_pdf`` by default
Expand Down
2 changes: 1 addition & 1 deletion benchmark/invoice.html
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
<html lang="en">
<head>
<meta charset="UTF-8">
<title>Invoice INV-1</title>
<title>Invoice INV-123</title>
<link rel="stylesheet" href="https://secure.tutorcruncher.com/static/css/libraries.css">
<link rel="stylesheet" href="https://secure.tutorcruncher.com/static/css/pdf_styles.css">
</head>
Expand Down
6 changes: 0 additions & 6 deletions benchmark/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,6 @@ def go_sync():
for i in range(count):
pdf = generate_pdf(
html,
title='Benchmark',
author='Samuel Colvin',
subject='Mock Invoice',
page_size='A4',
zoom='1.25',
margin_left='8mm',
Expand All @@ -41,9 +38,6 @@ async def go_async():
async def gen(i_):
pdf = await apydf.generate_pdf(
html,
title='Benchmark',
author='Samuel Colvin',
subject='Mock Invoice',
page_size='A4',
zoom='1.25',
margin_left='8mm',
Expand Down
2 changes: 1 addition & 1 deletion pydf/version.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
from distutils.version import StrictVersion

VERSION = StrictVersion('0.32.0')
VERSION = StrictVersion('0.33.0')
50 changes: 3 additions & 47 deletions pydf/wkhtmltopdf.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import asyncio
import re
import subprocess
import tempfile

Expand Down Expand Up @@ -47,20 +46,6 @@ def _convert_args(**py_args):
return cmd_args


def _set_meta_data(pdf_content, **kwargs):
fields = [
('Title', kwargs.get('title')),
('Author', kwargs.get('author')),
('Subject', kwargs.get('subject')),
('Creator', kwargs.get('creator')),
('Producer', kwargs.get('producer')),
]
metadata = '\n'.join(f'/{name} ({value})' for name, value in fields if value)
if metadata:
pdf_content = re.sub(b'/Title.*\n.*\n/Producer.*', metadata.encode(), pdf_content, count=1)
return pdf_content


class AsyncPydf:
def __init__(self, *, max_processes=20, loop=None, cache_dir=DFT_CACHE_DIR):
self.semaphore = asyncio.Semaphore(value=max_processes, loop=loop)
Expand All @@ -69,14 +54,7 @@ def __init__(self, *, max_processes=20, loop=None, cache_dir=DFT_CACHE_DIR):
Path.mkdir(cache_dir)
self.cache_dir = cache_dir

async def generate_pdf(self,
html,
title=None,
author=None,
subject=None,
creator=None,
producer=None,
**cmd_args):
async def generate_pdf(self, html, **cmd_args):
cmd_args = [WK_PATH] + _convert_args(cache_dir=self.cache_dir, **cmd_args)
async with self.semaphore:
p = await asyncio.create_subprocess_exec(
Expand All @@ -94,24 +72,10 @@ async def generate_pdf(self,
stderr = await p.stderr.read()
raise RuntimeError('error running wkhtmltopdf, command: {!r}\n'
'response: "{}"'.format(cmd_args, stderr.strip()))

return _set_meta_data(
pdf_content,
title=title,
author=author,
subject=subject,
creator=creator,
producer=producer,
)
return pdf_content


def generate_pdf(html, *,
title: str=None,
author: str=None,
subject: str=None,
creator: str=None,
producer: str=None,
# from here on arguments are passed via the commandline to wkhtmltopdf
cache_dir: Path=DFT_CACHE_DIR,
grayscale: bool=False,
lowquality: bool=False,
Expand Down Expand Up @@ -186,15 +150,7 @@ def generate_pdf(html, *,
if p.returncode != 0 and pdf_content[:4] != b'%PDF':
raise RuntimeError('error running wkhtmltopdf, command: {!r}\n'
'response: "{}"'.format(cmd_args, p.stderr.strip()))

return _set_meta_data(
pdf_content,
title=title,
author=author,
subject=subject,
creator=creator,
producer=producer,
)
return pdf_content


def _string_execute(*args):
Expand Down
22 changes: 6 additions & 16 deletions tests/test_sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,23 +11,13 @@ def test_generate_pdf_with_html():
assert 'Is this thing on?\n\n\x0c' == text


def test_generate_pdf_with_html_meta_data():
pdf_content = generate_pdf(
'<html><body>Is this thing on?</body></html>',
title='title foobar',
subject='the subject',
author='Samuel Colvin',
creator='this is the creator'
)
def test_pdf_title():
pdf_content = generate_pdf('<html><head><title>the title</title></head><body>hello</body></html>')
assert pdf_content[:4] == b'%PDF'
beginning = pdf_content.decode('utf8', 'ignore')[:300]
print(beginning)
assert """
<<
/Title (title foobar)
/Author (Samuel Colvin)
/Subject (the subject)
/Creator (this is the creator)""" in beginning
text = pdf_text(pdf_content)
title = 'the title'.encode('utf-16be')
assert b'\n/Title (\xfe\xff%s)\n' % title in pdf_content
assert 'hello\n\n\x0c' == text


def test_unicode():
Expand Down

0 comments on commit 9beb7f4

Please sign in to comment.