forked from gutenbergtools/autocat3
-
-
Notifications
You must be signed in to change notification settings - Fork 1
/
HTMLFormatter.py
183 lines (137 loc) · 6.17 KB
/
HTMLFormatter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
#!/usr/bin/env python
# -*- mode: python; indent-tabs-mode: nil; -*- coding: iso-8859-1 -*-
"""
HTMLFormatter.py
Copyright 2009-2014 by Marcello Perathoner
Distributable under the GNU General Public License Version 3 or newer.
Produce a HTML page.
"""
from __future__ import unicode_literals
import operator
import cherrypy
import genshi.output
import re
import six
from six.moves import urllib
from libgutenberg.MediaTypes import mediatypes as mt
import BaseSearcher
import BaseFormatter
from i18n_tool import ugettext as _
# filetypes ignored on desktop site
NO_DESKTOP_FILETYPES = 'plucker qioo rdf rst rst.gen rst.master tei cover.medium cover.small'.split ()
# filetypes which are usually handed over to a separate app on mobile devices
HANDOVER_TYPES = (mt.epub, mt.mobi, mt.pdf)
# self-contained files we can send to dropbox
CLOUD_TYPES = (mt.epub, mt.mobi, mt.pdf)
STD_PDF_MATCH = re.compile (r'files/\d+/\d+-pdf.pdf$')
class XMLishFormatter (BaseFormatter.BaseFormatter):
""" Produce XMLish output. """
def __init__ (self):
super (XMLishFormatter, self).__init__ ()
def fix_dc (self, dc, os):
""" Tweak dc. """
def has_std_path (file_obj):
''' so cloudstorage links can be elided when the url is non-standard'''
if file_obj.filetype == 'pdf':
return STD_PDF_MATCH.search (file_obj.url)
return True
super (XMLishFormatter, self).fix_dc (dc, os)
# generated_files always [] AFAICT -esh
for file_ in dc.generated_files:
file_.help_topic = file_.hr_filetype
file_.compression = 'none'
file_.encoding = None
dedupable = {}
for file_ in dc.files:
if file_.filetype and file_.filetype.endswith('images'):
dedupable[file_.filetype] = file_
do_dedupe = False
for ft in ['epub', 'kindle', 'pdf', 'html']:
if ft + '.images' in dedupable and ft + '.noimages' in dedupable:
# because of timestamps, identical files may vary by a bit or 2
if abs (dedupable[ft + '.images'].extent - dedupable[ft + '.noimages'].extent) < 3:
do_dedupe = True
if do_dedupe:
for ft in ['epub', 'kindle', 'pdf', 'html']:
if ft + '.images' in dedupable and ft + '.noimages' in dedupable:
dc.files.remove(dedupable[ft + '.images'])
for file_ in dc.files + dc.generated_files:
type_ = six.text_type (file_.mediatypes[0])
m = type_.partition (';')[0]
if m in CLOUD_TYPES and has_std_path (file_):
file_.dropbox_url = os.url (
'dropbox_send', id = dc.project_gutenberg_id, filetype = file_.filetype)
file_.gdrive_url = os.url (
'gdrive_send', id = dc.project_gutenberg_id, filetype = file_.filetype)
file_.msdrive_url = os.url (
'msdrive_send', id = dc.project_gutenberg_id, filetype = file_.filetype)
# these are used as relative links
if file_.generated and not file_.filetype.startswith ('cover.'):
file_.filename = "ebooks/%d.%s" % (dc.project_gutenberg_id, file_.filetype)
if m in HANDOVER_TYPES:
file_.filename = file_.filename + '?' + urllib.parse.urlencode (
{ 'session_id': str (cherrypy.session.id) } )
def format (self, page, os):
""" Format to HTML. """
for e in os.entries:
if isinstance (e, BaseSearcher.DC):
self.fix_dc (e, os)
# loop again because fix:dc appends things
for e in os.entries:
if isinstance (e, BaseSearcher.Cat):
if e.url:
e.icon2 = e.icon2 or 'next'
else:
e.class_ += 'grayed'
if os.title_icon:
os.class_ += 'icon_' + os.title_icon
os.entries.sort (key = operator.attrgetter ('order'))
return self.render (page, os)
class HTMLFormatter (XMLishFormatter):
""" Produce HTML output. """
CONTENT_TYPE = 'text/html; charset=UTF-8'
DOCTYPE = 'html5'
def __init__ (self):
super (HTMLFormatter, self).__init__ ()
def get_serializer (self):
# return BaseFormatter.XHTMLSerializer (doctype = self.DOCTYPE, strip_whitespace = False)
return genshi.output.HTMLSerializer (doctype = self.DOCTYPE, strip_whitespace = False)
def fix_dc (self, dc, os):
""" Add some info to dc for easier templating.
Also make sure that dc `walks like a cat´. """
super (HTMLFormatter, self).fix_dc (dc, os)
#for author in dc.authors:
# author.authors_page_url = (
# "/browse/authors/%s#a%d" % (author.name[:1].lower (), author.id))
if dc.new_filesystem:
dc.base_dir = "/files/%d/" % dc.project_gutenberg_id
# dc.mirror_dir = gg.archive_dir (dc.project_gutenberg_id)
else:
dc.base_dir = None
# dc.mirror_dir = None
dc.magnetlink = None
# hide all txt files but the first one
txtcount = showncount = 0
for file_ in dc.files + dc.generated_files:
filetype = file_.filetype or ''
file_.hidden = False
if filetype in NO_DESKTOP_FILETYPES:
file_.hidden = True
if file_.compression != 'none':
file_.hidden = True
if filetype.startswith ('txt'):
if txtcount > 0:
file_.hidden = True
txtcount += 1
if filetype != 'txt':
file_.encoding = None
if file_.encoding:
file_.hr_filetype += ' ' + file_.encoding.upper ()
if filetype.startswith ('html') and file_.compression == 'none':
file_.hr_filetype = 'Read this book online: {}'.format (file_.hr_filetype)
if not file_.hidden:
showncount += 1
# if we happened to hide everything, show all files
if showncount == 0:
for file_ in dc.files + dc.generated_files:
file_.hidden = False