-
Notifications
You must be signed in to change notification settings - Fork 7
/
issuu-pdf-dl.py
executable file
·76 lines (60 loc) · 2.58 KB
/
issuu-pdf-dl.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
#!/usr/bin/env python3
import urllib.request
import json
import gzip
from argparse import ArgumentParser
from io import BytesIO
from typing import List
from pypdf import PdfWriter, PdfReader
from PIL import Image
def retrieve_images_from_urls(url_list: List[str]) -> list[Image.Image]:
"""Returns a list of in-memory images retrieved from HTTP URLs."""
images = []
for url in url_list:
try:
with urllib.request.urlopen(url) as response:
image_data = response.read()
image = Image.open(BytesIO(image_data))
images.append(image)
except Exception as e:
print(f"Failed to download image from {url}: {str(e)}")
return images
def convert_images_to_pdf(images: List[Image.Image], output_path: str):
"""Converts a list of in-memory images to PDF format and writes
the resulting PDF file to disk."""
pdf_writer = PdfWriter()
for image in images:
with BytesIO() as image_stream:
image.save(image_stream, format='PDF')
image_stream.seek(0)
pdf_reader = PdfReader(image_stream)
pdf_writer.add_page(pdf_reader.pages[0])
with open(output_path, 'wb') as output_file:
pdf_writer.write(output_file)
def main():
parser = ArgumentParser('issuu-pdf-dl', 'Download Issuu documents as PDF.')
parser.add_argument('document_url', help='URL of the document to download', type=str)
parser.add_argument('--output', '-o', help='output file path', type=str)
args = parser.parse_args()
url_parts = args.document_url.split("/")
user = url_parts[3]
title = url_parts[5]
print(f"Downloading document '{title}' by '{user}'.")
json_url = f"https://reader3.isu.pub/{user}/{title}/reader3_4.json"
request = urllib.request.Request(json_url, headers={'Accept-Encoding': 'gzip'})
response = urllib.request.urlopen(request)
if response.info().get('Content-Encoding') == 'gzip':
data = gzip.decompress(response.read())
else:
data = response.read()
json_dict = json.loads(data.decode())
pages_urls = ["https://" + page["imageUri"] for page in json_dict["document"]["pages"]]
downloaded_images = retrieve_images_from_urls(pages_urls)
# Save images as JPG
# for index, image in enumerate(downloaded_images):
# image.save(f"./image{index + 1}.jpg")
output_path = args.output if args.output else f"./{user}-{title}.pdf"
convert_images_to_pdf(downloaded_images, output_path)
print(f"Output PDF file successfully written to: {output_path}.")
if __name__ == "__main__":
main()