-
Notifications
You must be signed in to change notification settings - Fork 4
/
make_pdf.py
104 lines (80 loc) · 3.21 KB
/
make_pdf.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
# Constructs the pdf from the html content. Run it from the textbook root directory:
# python3 htmlbook/make_pdf.py && xpdf {underactuated|manipulation}.pdf
# or, this *should* work (but isn't):
# xpdf -remote foo "openFile(manipulation.pdf)" &
# python3 htmlbook/make_pdf.py && xpdf -remote foo reload
#
# Requires PrinceXML: https://www.princexml.com/latest/
# on 18.04 it was: sudo apt install prince
# Also requires puppeteer for offline rendering of mathjax, etc.
#
# cd ~
# curl -sL https://deb.nodesource.com/setup_10.x -o nodesource_setup.sh
# sudo bash nodesource_setup.sh
# sudo apt install -y nodejs
# npm i puppeteer --save
#
# Note: I had to manually npm i some of the missing deps on my first try to get # the installer to complete without errors.
# TODO
# - Remove prince watermark from first page? (How much does it cost?)
# - Bibliography?
# MAYBE
# - prince-books? https://www.princexml.com/doc/prince-for-books/
# - overflow-x warning is coming from highlight.js
# - font-size-adjust comes in through the rendering process. but I could
# investigate more.
import json
import os
import tempfile
def get_file_as_string(filename):
f = open(filename, "r")
s = f.read()
f.close()
return s
def write_file_as_string(filename, s):
f = open(filename, "w")
f.write(s)
f.close()
def replace_string_before(s, before_str, with_str):
r = with_str + s[s.find(before_str) :]
return r
def replace_string_after(s, after_str, with_str):
loc = s.find(after_str) + len(after_str)
r = s[:loc] + with_str
return r
def replace_string_between(s, start_str, end_str, with_str):
index = 0
while s.find(start_str, index) > 0:
start = s.find(start_str, index) + len(start_str)
end = s.find(end_str, start)
s = s[:start] + with_str + s[end:]
index = start + len(with_str)
return s
# root should be textbook repo root/book
root = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
basename = os.path.basename(root)
dir = tempfile.TemporaryDirectory()
os.symlink(os.path.join(root, "htmlbook"), os.path.join(dir.name, "htmlbook"))
os.symlink(os.path.join(root, "data"), os.path.join(dir.name, "data"))
os.symlink(os.path.join(root, "figures"), os.path.join(dir.name, "figures"))
chapters = json.load(open(os.path.join(root, "chapters.json")))
chapter_ids = ["index"] + chapters["chapter_ids"]
prince_input_files = ""
for c in chapter_ids:
filename = os.path.join(dir.name, c + ".html")
s = get_file_as_string(os.path.join(root, c + ".html"))
# Tweak html for rendering
# Remove hypothesis
s = s.replace('<script src="https://hypothes.is/embed.js" async></script>', "")
# links to data should point to online version
s = s.replace('href="data/', f'href="http://{basename}.csail.mit.edu/data/')
write_file_as_string(filename, s)
# os.system(f"node {os.path.join(root, 'htmlbook/render_html.js')} http://{basename}.csail.mit.edu/{c}.html {filename}")
os.system(
f"node {os.path.join(root, 'htmlbook/render_html.js')} file://{filename} {filename}"
)
prince_input_files += " " + filename
os.system(
f"prince -s {os.path.join(root,'htmlbook/pdf.css')} {prince_input_files} -o {basename}.pdf"
)
dir.cleanup()