-
Notifications
You must be signed in to change notification settings - Fork 1
/
bbl2html.py
executable file
·108 lines (85 loc) · 3.24 KB
/
bbl2html.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
#!/usr/bin/env python3
from ris2bib import __version__
import re
import sys
def bbl2html(s):
arg = r' *(<#\d+>|\d| \S)'
noarg = ' *({})?'
for key, s in re.findall(r'\{([^{}]*?)\}[^{}]*?'
r'\\BibitemOpen(.+?)\\BibitemShut', s, re.DOTALL)[1:]:
s = re.sub(r'\n', r' ', s)
s = re.sub(r' +', r' ', s)
s = re.sub(r"(?<=\w)'", r'’', s)
groups = []
while '{' in s:
for group in re.findall(r'\{[^{]*?\}', s):
groups.append(group[1:-1])
replacement = '<#%d>' % len(groups)
s = s.replace(group, replacement)
groups.append(s)
for n, group in enumerate(groups):
groups[n] = re.sub(r'\\bibf?namefont' + arg, r'\1', groups[n])
groups[n] = re.sub(r'\\bib(info|field)' + 2 * arg, r'\3', groups[n])
groups[n] = re.sub(r'\\(Eprint|href)' + 2 * arg,
r"<a href='\2'>\3</a>", groups[n])
groups[n] = re.sub(r'\\emph' + arg, r'<em>\1</em>', groups[n])
groups[n] = re.sub(r'\\natexlab' + arg, '', groups[n])
groups[n] = re.sub(r'\\textbf' + arg, r'<b>\1</b>', groups[n])
groups[n] = re.sub(r'\\textsc' + arg, r"<span style='font-variant: "
r"small-caps'>\1</span>", groups[n])
groups[n] = re.sub(r'\\textsubscript(\d)', r'Ȉ\1;', groups[n])
groups[n] = re.sub(r'\\textsubscript' + arg, r'<sub>\1</sub>',
groups[n])
groups[n] = re.sub(r'\\allowbreak' + noarg, r'​', groups[n])
groups[n] = re.sub(r'\\@' + noarg, r'', groups[n])
s = groups[-1]
for n, group in reversed(list(enumerate(groups, 1))):
s = s.replace('<#%d>' % n, group)
s = re.sub(r'--', r'–', s)
s = re.sub(r'~', r' ', s)
s = re.sub(r'\\ ', r' ', s)
s = re.sub(r"\\'([aeiou])", r'&\1acute;', s, flags=re.I)
s = re.sub(r'\\"([aeiou])', r'&\1uml;', s, flags=re.I)
yield (key, s)
def main():
bbl, html = [arg for arg in sys.argv[1:] if not arg.startswith('-')]
citekeys = '--citekeys' in sys.argv[1:]
with open(bbl) as infile:
s = infile.read()
outfile = open(html, 'w')
outfile.write('''<!DOCTYPE html>
<html>
<body>
<%s>
''' % ("ol id='bibliography'" if citekeys else 'ul'))
for key, s in bbl2html(s):
if citekeys:
outfile.write("<li id='%s'> %s\n" % (key, s.strip()))
else:
outfile.write('<li> %s\n' % s.strip())
if citekeys:
outfile.write('''</ol>
<script>
const links = document.getElementsByTagName('a')
const bib = document.getElementById('bibliography')
const refs = new Array()
for (let i = 0; i < links.length; i++) {
let href = links[i].getAttribute('href')
if (href && href.startsWith('#')) {
let ref = document.getElementById(href.substring(1))
if (ref && bib.contains(ref)) {
links[i].innerText = refs.indexOf(ref) + 1 || refs.push(ref)
}
}
}
if (refs.length) bib.replaceChildren(...refs)
</script>''')
else:
outfile.write('</ul>')
outfile.write('''
</body>
</html>
''')
outfile.close()
if __name__ == '__main__':
main()