-
Notifications
You must be signed in to change notification settings - Fork 14
/
display.py
147 lines (135 loc) · 4.65 KB
/
display.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
"""Align a verse to a given metre."""
from __future__ import absolute_import, division, print_function, unicode_literals
import slp1
from transliteration import transliterate
_GAP_CHAR = '-'
def _Align(s, t):
"""Find best alignment of strings s and t."""
m = len(s)
n = len(t)
addition_cost = 1 # Cost of adding an extra character in s
deletion_cost = 1 # Cost of skipping a character of t
def MismatchCost(i, j):
"""Cost of characters in s and t not matching."""
return 0 if s[i - 1] == t[j - 1] else 1
max_cost = m * n + 1
best = [[max_cost] * (n + 1) for _ in range(m + 1)]
best[0][0] = 0
for i in range(m + 1):
for j in range(n + 1):
if i > 0: best[i][j] = min(best[i][j], best[i-1][j] + addition_cost)
if j > 0: best[i][j] = min(best[i][j], best[i][j-1] + deletion_cost)
if i > 0 and j > 0:
best[i][j] = min(best[i][j], best[i-1][j-1] + MismatchCost(i, j))
for j in range(1, n + 1):
assert best[0][j] == deletion_cost * j
for i in range(1, m + 1):
assert best[i][0] == addition_cost * i
# Now actually find the alignment
i = m
j = n
aligned_s = ''
aligned_t = ''
while i > 0 or j > 0:
if i > 0 and best[i][j] == best[i-1][j] + addition_cost:
aligned_s += s[i - 1]
aligned_t += _GAP_CHAR
i -= 1
elif j > 0 and best[i][j] == best[i][j-1] + deletion_cost:
aligned_s += _GAP_CHAR
aligned_t += t[j - 1]
j -= 1
else:
assert i > 0 and j > 0
assert best[i][j] == best[i-1][j-1] + MismatchCost(i, j)
aligned_s += s[i - 1]
aligned_t += t[j - 1]
i -= 1
j -= 1
assert i == 0 and j == 0
aligned_s = ''.join(reversed(aligned_s))
aligned_t = ''.join(reversed(aligned_t))
assert len(aligned_s) == len(aligned_t)
return (aligned_s, aligned_t)
def _SyllabizeVisual(text):
"""Break given text into syllables."""
syllables = []
# Everything until the first vowel is the first syllable
i = 0
while True:
syllable = ''
if i == len(text): break
while i < len(text):
c = text[i]
i += 1
syllable += c
if c in slp1.VOWELS:
# Peek ahead to capture all groups until the next vowel
groups = []
group = ''
j = i
while j < len(text):
c = text[j]
group += c
j += 1
if c in slp1.VOWELS:
# This group has a vowel, and shouldn't be consumed
break
if j == len(text) or c not in slp1.ALPHABET:
groups.append(group)
group = ''
to_add = ''.join(groups)
syllable += to_add
i += len(to_add)
syllables.append(syllable)
syllable = ''
assert ''.join(syllables) == text, (syllables, text)
return syllables
# Take verse_pattern too, because of additional display chars in display_verse
def AlignVerseToMetre(display_verse, verse_pattern, metre_pattern_lines):
"""Match syllables of verse with those of metre."""
if not verse_pattern:
return
metre_pattern = ''.join(metre_pattern_lines)
(aligned_v, aligned_m) = _Align(verse_pattern, metre_pattern)
assert len(aligned_v) == len(aligned_m)
assert len(aligned_v) >= len(verse_pattern)
assert len(aligned_m) >= len(metre_pattern)
syllables = _SyllabizeVisual(' '.join(display_verse))
assert len(syllables) == len(verse_pattern)
n = len(aligned_m)
current_line = 0
num_consumed_from_metre = 0
num_consumed_from_verse = 0
out = [[]]
for i in range(n):
if aligned_v[i] != _GAP_CHAR:
out[-1].append(
(syllables[num_consumed_from_verse], aligned_v[i], aligned_m[i]))
num_consumed_from_verse += 1
else:
out[-1].append((aligned_v[i], aligned_v[i], aligned_m[i]))
num_consumed_from_metre += (aligned_m[i] != _GAP_CHAR)
if num_consumed_from_metre == len(metre_pattern_lines[current_line]):
if (i == n - 1 or
not all(aligned_m[j] == _GAP_CHAR for j in range(i + 1, n))):
current_line += 1
num_consumed_from_metre = 0
out.append([])
return out
def HtmlTableFromAlignment(alignment):
"""Make a pretty HTML table out of the alignment."""
out = []
for line in alignment:
v = ''
for syllable in line:
if syllable[0] == _GAP_CHAR:
printable_syllable = '[-]'
else:
printable_syllable = transliterate.TransliterateForTable(syllable[0])
to_print = (printable_syllable if syllable[1] == syllable[2] else
'<abbr title="Should be %s">%s</abbr>' % (syllable[2],
printable_syllable))
v += '<span class="syl%s">%s</span>' % (syllable[1], to_print)
out.append('%s <br/>\n' % v)
return out