-
Notifications
You must be signed in to change notification settings - Fork 8
/
lektor_i18n.py
468 lines (400 loc) · 19.4 KB
/
lektor_i18n.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
# -*- coding: utf-8 -*-
#pylint: disable=wrong-import-position
import sys
PY3 = sys.version_info > (3,)
import collections
import datetime
import gettext
import os
from os.path import relpath, join, exists, dirname
from pprint import PrettyPrinter
import re
import tempfile
import time
if PY3:
from urllib.parse import urljoin
else:
from urlparse import urljoin
from lektor.pluginsystem import Plugin
from lektor.db import Page
from lektor.metaformat import tokenize
from lektor.reporter import reporter
from lektor.types.flow import FlowType, process_flowblock_data
from lektor.utils import portable_popen, locate_executable
from lektor.environment import PRIMARY_ALT
from lektor.filecontents import FileContents
from lektor.context import get_ctx
command_re = re.compile(r'([a-zA-Z0-9.-_]+):\s*(.*?)?\s*$')
# derived from lektor.types.flow but allows more dash signs
block2re = re.compile(r'^###(#+)\s*([^#]*?)\s*###(#+)\s*$')
POT_HEADER = """msgid ""
msgstr ""
"Project-Id-Version: PACKAGE VERSION\\n"
"Report-Msgid-Bugs-To: \\n"
"POT-Creation-Date: %(NOW)s\\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\\n"
"Language-Team: %(LANGUAGE)s <LL@li.org>\\n"
"Language: %(LANGUAGE)s\\n"
"MIME-Version: 1.0\\n"
"Content-Type: text/plain; charset=UTF-8\\n"
"Content-Transfer-Encoding: 8bit\\n"
"""
# python2/3 compatibility layer
encode = lambda s: (s if PY3 else s.encode('UTF-8'))
def trans(translator, s):
"""Thin gettext translation wrapper to allow compatibility with both Python2
and 3."""
if PY3:
return translator.gettext(s)
else:
return translator.ugettext(s)
def truncate(s, length=32):
return (s[:length] + '..') if len(s) > length else s
#pylint: disable=too-few-public-methods,redefined-variable-type
class TemplateTranslator(object):
def __init__(self, i18npath):
self.i18npath = i18npath
self.__lastlang = None
self.translator = None
self.init_translator()
def init_translator(self):
ctx = get_ctx()
if not ctx:
self.translator = gettext.GNUTranslations()
return super(TemplateTranslator, self).__init__()
if not self.__lastlang == ctx.locale:
self.__lastlang = ctx.locale
self.translator = gettext.translation("contents",
join(self.i18npath, '_compiled'),
languages=[ctx.locale], fallback=True)
def gettext(self, x):
self.init_translator() # lagnuage could have changed
return self.translator.gettext(x)
def ngettext(self, *x):
self.init_translator()
return self.translator.ngettext(*x)
class Translations():
"""Memory of translations"""
def __init__(self):
# dict like {'text' : ['source1', 'source2',...],}
self.translations = collections.OrderedDict()
def add(self, text, source):
if not text in self.translations.keys():
self.translations[text]=[]
reporter.report_debug_info('added to translation memory : ', truncate(text))
if not source in self.translations[text]:
self.translations[text].append(source)
def __repr__(self):
return PrettyPrinter(2).pformat(self.translations)
def as_pot(self, content_language):
"""returns a POT version of the translation dictionnary"""
now = datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
now += '+%s'%(time.tzname[0])
result = POT_HEADER % {'LANGUAGE' : content_language, 'NOW' : now}
for msg, paths in self.translations.items():
result += "#: %s\n"%" ".join(paths)
for token, repl in {'\\': '\\\\', '\n': '\\n', '\t': '\\t', '"': '\\"'}.items():
msg = msg.replace(token, repl)
result+='msgid "%s"\n' % msg
result+='msgstr ""\n\n'
return result
def write_pot(self, pot_filename, language):
if not os.path.exists(os.path.dirname(pot_filename)):
os.makedirs(os.path.dirname(pot_filename))
with open(pot_filename,'w') as f:
f.write(encode(self.as_pot(language)))
def merge_pot(self, from_filenames, to_filename):
msgcat=locate_executable('msgcat')
cmdline=[msgcat, "--use-first"]
cmdline.extend(from_filenames)
cmdline.extend(("-o", to_filename))
reporter.report_debug_info('msgcat cmd line', cmdline)
portable_popen(cmdline).wait()
def parse_templates(self, to_filename):
pybabel=locate_executable('pybabel')
cmdline=[pybabel, 'extract', '-F', 'babel.cfg', "-o", to_filename, "./"]
reporter.report_debug_info('pybabel cmd line', cmdline)
portable_popen(cmdline).wait()
translations = Translations() # let's have a singleton
class POFile():
FILENAME_PATTERN = "contents+%s.po"
def __init__(self, language, i18npath):
self.language=language
self.i18npath=i18npath
def _exists(self):
"""Returns True if <language>.po file exists in i18npath"""
filename=self.FILENAME_PATTERN%self.language
return exists( join(self.i18npath, filename) )
def _msg_init(self):
"""Generates the first <language>.po file"""
msginit=locate_executable('msginit')
cmdline=[msginit, "-i", "contents.pot", "-l", self.language, "-o", self.FILENAME_PATTERN%self.language, "--no-translator"]
reporter.report_debug_info('msginit cmd line', cmdline)
portable_popen(cmdline, cwd=self.i18npath).wait()
def _msg_merge(self):
"""Merges an existing <language>.po file with .pot file"""
msgmerge=locate_executable('msgmerge')
cmdline=[msgmerge, self.FILENAME_PATTERN%self.language, "contents.pot", "-U", "-N", "--backup=simple"]
reporter.report_debug_info('msgmerge cmd line', cmdline)
portable_popen(cmdline, cwd=self.i18npath).wait()
def _prepare_locale_dir(self):
"""Prepares the i18n/<language>/LC_MESSAGES/ to store the .mo file ; returns the dirname"""
directory = join('_compiled',self.language, "LC_MESSAGES")
try:
os.makedirs(join(self.i18npath, directory))
except OSError:
pass # already exists, no big deal
return directory
def _msg_fmt(self, locale_dirname):
"""Compile an existing <language>.po file into a .mo file"""
msgfmt=locate_executable('msgfmt')
cmdline=[msgfmt, self.FILENAME_PATTERN%self.language, "-o", join(locale_dirname,"contents.mo")]
reporter.report_debug_info('msgfmt cmd line', cmdline)
portable_popen(cmdline, cwd=self.i18npath).wait()
def generate(self):
if self._exists():
self._msg_merge()
else:
self._msg_init()
locale_dirname=self._prepare_locale_dir()
self._msg_fmt(locale_dirname)
def line_starts_new_block(line, prev_line):
"""Detect a new block in a lektor document. Blocks are delimited by a line
containing 3 or more dashes. This actually matches the definition of a
markdown level 2 heading, so this function returns False if no colon was
found in the line before, so if it isn't a new block with a key: value pair
before."""
if not prev_line or ':' not in prev_line:
return False # could be a markdown heading
line = line.strip()
return line == u'-' * len(line) and len(line) >= 3
def split_paragraphs(document):
if isinstance(document, (list, tuple)):
document = ''.join(document) # list of lines
return re.split('\n(?:\\s*\n){1,}', document)
# We cannot check for unused arguments here, they're mandated by the plugin API.
#pylint:disable=unused-argument
class I18NPlugin(Plugin):
name = u'i18n'
description = u'Internationalisation helper'
def translate_tag(self, s, *args, **kwargs):
if not self.enabled:
return s # no operation
s = s.strip()
ctx = get_ctx()
if self.content_language==ctx.locale:
translations.add(s,'(dynamic)')
reporter.report_debug_info('added to translation memory (dynamic): ', truncate(s))
return s
else:
translator = gettext.translation("contents", join(self.i18npath,'_compiled'), languages=[ctx.locale], fallback = True)
return trans(translator, s)
def choose_language(self, l, language, fallback='en', attribute='language'):
"""Will return from list 'l' the element with attribute 'attribute' set to given 'language'.
If none is found, will try to return element with attribute 'attribute' set to given 'fallback'.
Else returns None."""
language=language.strip().lower()
fallback=fallback.strip().lower()
for item in l:
if item[attribute].strip().lower()==language:
return item
# fallback
for item in l:
if item[attribute].strip().lower()==fallback:
return item
return None
#pylint: disable=attribute-defined-outside-init
def on_setup_env(self, **extra):
"""Setup `env` for the plugin"""
# Read configuration
self.enabled = self.get_config().get('enable', 'true') in ('true','True','1')
if not self.enabled:
reporter.report_generic('I18N plugin disabled in configs/i18n.ini')
self.i18npath = self.get_config().get('i18npath', 'i18n')
self.url_prefix = self.get_config().get('url_prefix', 'http://localhost/')
# whether or not to use a pargraph as smallest translatable unit
self.trans_parwise = self.get_config().get('translate_paragraphwise',
'false') in ('true','True','1')
self.content_language=self.get_config().get('content', 'en')
self.env.jinja_env.add_extension('jinja2.ext.i18n')
self.env.jinja_env.policies['ext.i18n.trimmed'] = True # do a .strip()
self.env.jinja_env.install_gettext_translations(TemplateTranslator(self.i18npath))
# ToDo: is this still required
try:
self.translations_languages=self.get_config().get('translations').replace(' ','').split(',')
except AttributeError:
raise RuntimeError('Please specify the "translations" configuration option in configs/i18n.ini')
if not self.content_language in self.translations_languages:
self.translations_languages.append(self.content_language)
self.env.jinja_env.filters['translate'] = self.translate_tag
self.env.jinja_env.globals['_'] = self.translate_tag
self.env.jinja_env.globals['choose_language'] = self.choose_language
def process_node(self, fields, sections, source, zone, root_path):
"""For a given node (), identify all fields to translate, and add new
fields to translations memory. Flow blocks are handled recursively."""
for field in fields:
if ('translate' in field.options) \
and (source.alt in (PRIMARY_ALT, self.content_language)) \
and (field.options['translate'] in ('True', 'true', '1', 1)):
if field.name in sections.keys():
section = sections[field.name]
# if blockwise, each paragraph is one translatable message,
# otherwise each line
chunks = (split_paragraphs(section) if self.trans_parwise
else [x.strip() for x in section if x.strip()])
for chunk in chunks:
translations.add(chunk.strip('\r\n'),
"%s (%s:%s.%s)" % (
urljoin(self.url_prefix, source.url_path),
relpath(source.source_filename, root_path),
zone, field.name)
)
if isinstance(field.type, FlowType):
if field.name in sections:
section = sections[field.name]
for blockname, blockvalue in process_flowblock_data("".join(section)):
flowblockmodel = source.pad.db.flowblocks[blockname]
blockcontent=dict(tokenize(blockvalue))
self.process_node(flowblockmodel.fields, blockcontent, source, blockname, root_path)
def __parse_source_structure(self, lines):
"""Parse structure of source file. In short, there are two types of
chunks: those which need to be translated ('translatable') and those
which don't ('raw'). "title: test" could be split into:
[('raw': 'title: ',), ('translatable', 'test')]
NOTE: There is no guarantee that multiple raw blocks couldn't occur and
in fact due to implementation details, this actually happens."""
blocks = []
count_lines_block = 0 # counting the number of lines of the current block
is_content = False
prev_line = None
for line in lines:
stripped_line = line.strip()
if not stripped_line: # empty line
blocks.append(('raw', '\n'))
continue
# line like "---*" or a new block tag
if line_starts_new_block(stripped_line, prev_line) or \
block2re.search(stripped_line):
count_lines_block=0
is_content = False
blocks.append(('raw', line))
else:
count_lines_block+=1
match = command_re.search(stripped_line)
if count_lines_block==1 and not is_content and match: # handle first line, while not in content
key, value = match.groups()
blocks.append(('raw', encode(key) + ':'))
if value:
blocks.append(('raw', ' '))
blocks.append(('translatable', encode(value)))
blocks.append(('raw', '\n'))
else:
is_content=True
if is_content:
blocks.append(('translatable', line))
prev_line = line
# join neighbour blocks of same type
newblocks = []
for type, data in blocks:
if len(newblocks) > 0 and newblocks[-1][0] == type: # same type, merge
newblocks[-1][1] += data
else:
newblocks.append([type, data])
return newblocks
def on_before_build(self, builder, build_state, source, prog, **extra):
"""Before building a page, produce all its alternatives (=translated pages)
using the gettext translations available."""
if self.enabled and isinstance(source,Page) and source.alt in (PRIMARY_ALT, self.content_language):
contents = None
for fn in source.iter_source_filenames():
try:
contents=FileContents(fn)
except IOError:
pass # next
for language in self.translations_languages:
translator = gettext.translation("contents",
join(self.i18npath,'_compiled'), languages=[language], fallback = True)
translated_filename = join(dirname(source.source_filename),
"contents+%s.lr"%language)
with contents.open(encoding='utf-8') as file:
chunks = self.__parse_source_structure(file.readlines())
with open(translated_filename,"w") as f:
for type, content in chunks: # see __parse_source_structure
if type == 'raw':
f.write(content)
elif type == 'translatable':
if self.trans_parwise: # translate per paragraph
f.write(self.__trans_parwise(content,
translator))
else:
f.write(self.__trans_linewise(content,
translator))
else:
raise RuntimeError("Unknown chunk type detected, this is a bug")
def __trans_linewise(self, content, translator):
"""Translate the chunk linewise."""
lines = []
for line in content.split('\n'):
line_stripped = line.strip()
trans_stripline = trans(translator, line_stripped) # trnanslate the stripped version
# and re-inject the stripped translation into original line (not stripped)
lines.append(line.replace(line_stripped,
trans_stripline, 1))
return '\n'.join(lines)
def __trans_parwise(self, content, translator):
"""Extract translatable strings block-wise, query for translation of
block and re-inject result."""
result = []
for paragraph in split_paragraphs(content):
stripped = paragraph.strip('\n\r')
paragraph = paragraph.replace(stripped, trans(translator,
stripped))
result.append(paragraph)
return '\n\n'.join(result)
def on_after_build(self, builder, build_state, source, prog, **extra):
if self.enabled and isinstance(source,Page):
try:
text = source.contents.as_text()
except IOError:
pass
else:
fields = source.datamodel.fields
sections = dict(tokenize(text.splitlines())) # {'sectionname':[list of section texts]}
self.process_node(fields, sections, source, source.datamodel.id, builder.env.root_path)
def get_templates_pot_filename(self):
try:
return self.pot_templates_filename
except AttributeError:
self.pot_templates_file = tempfile.NamedTemporaryFile(suffix=".pot",prefix="templates-")
self.pot_templates_filename = self.pot_templates_file.name
return self.pot_templates_filename
def on_before_build_all(self, builder, **extra):
if self.enabled:
reporter.report_generic("i18n activated, with main language %s"% self.content_language )
templates_pot_filename = self.get_templates_pot_filename()
reporter.report_generic("Parsing templates for i18n into %s" \
% relpath(templates_pot_filename, builder.env.root_path))
translations.parse_templates(templates_pot_filename)
def on_after_build_all(self, builder, **extra):
"""Once the build process is over :
- write the translation template `contents.pot` on the filesystem,
- write all translation contents+<language>.po files """
if not self.enabled:
return
contents_pot_filename = join(builder.env.root_path, self.i18npath, 'contents.pot')
pots = [contents_pot_filename,
self.get_templates_pot_filename(),
join(builder.env.root_path, self.i18npath, 'plugins.pot')]
# write out contents.pot from web site contents
translations.write_pot(pots[0], self.content_language)
reporter.report_generic("%s generated" % relpath(pots[0],
builder.env.root_path))
pots = [p for p in pots if os.path.exists(p) ] # only keep existing ones
if len(pots) > 1:
translations.merge_pot(pots, contents_pot_filename)
reporter.report_generic("Merged POT files %s" % ', '.join(
relpath(p, builder.env.root_path) for p in pots))
for language in self.translations_languages:
po_file=POFile(language, self.i18npath)
po_file.generate()