-
Notifications
You must be signed in to change notification settings - Fork 0
/
script.js
133 lines (103 loc) · 7.14 KB
/
script.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
hebonTransliteration = ['a', 'i', 'u', 'e', 'o', 'ka', 'ki', 'ku', 'ke', 'ko', 'kya', 'kyi', 'kyu', 'kye', 'kyo', 'ga', 'gi', 'gu', 'ge', 'go', 'gya', 'gyi', 'gyu', 'gye', 'gyo', 'sa', 'shi', 'su', 'se', 'so', 'sha', 'shi', 'shu', 'she', 'sho', 'za', 'ji', 'zu', 'ze', 'zo', 'ja', 'ji', 'ju', 'je', 'jo', 'ta', 'chi', 'tsu', 'te', 'to', 'cha', 'chi', 'chu', 'che', 'cho', 'da', 'ji', 'zu', 'de', 'do', 'dya', 'dyi', 'dyu', 'dye', 'dyo', 'na', 'ni', 'nu', 'ne', 'no', 'nya', 'nyi', 'nyu', 'nye', 'nyo', 'ha', 'hi', 'fu', 'he', 'ho', 'hya', 'hyi', 'hyu', 'hye', 'hyo', 'ba', 'bi', 'bu', 'be', 'bo', 'bya', 'byi', 'byu', 'bye', 'byo', 'pa', 'pi', 'pu', 'pe', 'po', 'pya', 'pyi', 'pyu', 'pye', 'pyo', 'ma', 'mi', 'mu', 'me', 'mo', 'mya', 'myi', 'myu', 'mye', 'myo', 'ya', 'yu', 'yo', 'ra', 'ri', 'ru', 're', 'ro', 'rya', 'ryi', 'ryu', 'rye', 'ryo', 'wa', 'i', 'e', 'o', 'vu', 'n'];
hiraganaCombinations = ['あ', 'い', 'う', 'え', 'お', 'か', 'き', 'く', 'け', 'こ', 'きゃ', 'きぃ', 'きゅ', 'きぇ', 'きょ', 'が', 'ぎ', 'ぐ', 'げ', 'ご', 'ぎゃ', 'ぎぃ', 'ぎゅ', 'ぎぇ', 'ぎょ', 'さ', 'し', 'す', 'せ', 'そ', 'しゃ', 'しぃ', 'しゅ', 'しぇ', 'しょ', 'ざ', 'じ', 'ず', 'ぜ', 'ぞ', 'じゃ', 'じぃ', 'じゅ', 'じぇ', 'じょ', 'た', 'ち', 'つ', 'て', 'と', 'ちゃ', 'ちぃ', 'ちゅ', 'ちぇ', 'ちょ', 'だ', 'ぢ', 'づ', 'で', 'ど', 'ぢゃ', 'ぢぃ', 'ぢゅ', 'ぢぇ', 'ぢょ', 'な', 'に', 'ぬ', 'ね', 'の', 'にゃ', 'にぃ', 'にゅ', 'にぇ', 'にょ', 'は', 'ひ', 'ふ', 'へ', 'ほ', 'ひゃ', 'ひぃ', 'ひゅ', 'ひぇ', 'ひょ', 'ば', 'び', 'ぶ', 'べ', 'ぼ', 'びゃ', 'びぃ', 'びゅ', 'びぇ', 'びょ', 'ぱ', 'ぴ', 'ぷ', 'ぺ', 'ぽ', 'ぴゃ', 'ぴぃ', 'ぴゅ', 'ぴぇ', 'ぴょ', 'ま', 'み', 'む', 'め', 'も', 'みゃ', 'みぃ', 'みゅ', 'みぇ', 'みょ', 'や', 'ゆ', 'よ', 'ら', 'り', 'る', 'れ', 'ろ', 'りゃ', 'りぃ', 'りゅ', 'りぇ', 'りょ', 'わ', 'ゐ', 'ゑ', 'を', 'ゔ', 'ん'];
hiragana = ['ぁ','あ','ぃ','い','ぅ','う','ぇ','え','ぉ','お','か','が','き','ぎ','く','ぐ','け','げ','こ','ご','さ','ざ','し','じ','す','ず','せ','ぜ','そ','ぞ','た','だ','ち','ぢ','っ','つ','づ','て','で','と','ど','な','に','ぬ','ね','の','は','ば','ぱ','ひ','び','ぴ','ふ','ぶ','ぷ','へ','べ','ぺ','ほ','ぼ','ぽ','ま','み','む','め','も','ゃ','や','ゅ','ゆ','ょ','よ','ら','り','る','れ','ろ','ゎ','わ','ゐ','ゑ','を','ん','ゔ'];
katakana = ['ァ','ア','ィ','イ','ゥ','ウ','ェ','エ','ォ','オ','カ','ガ','キ','ギ','ク','グ','ケ','ゲ','コ','ゴ','サ','ザ','シ','ジ','ス','ズ','セ','ゼ','ソ','ゾ','タ','ダ','チ','ヂ','ッ','ツ','ヅ','テ','デ','ト','ド','ナ','ニ','ヌ','ネ','ノ','ハ','バ','パ','ヒ','ビ','ピ','フ','ブ','プ','ヘ','ベ','ペ','ホ','ボ','ポ','マ','ミ','ム','メ','モ','ャ','ヤ','ュ','ユ','ョ','ヨ','ラ','リ','ル','レ','ロ','ヮ','ワ','ヰ','ヱ','ヲ','ン','ヴ'];
const tiny_hira = ['ぁ', 'ぃ', 'ぅ', 'ぇ', 'ぉ', 'っ', 'ゃ', 'ゅ', 'ょ', 'ゎ'];
const tiny_kata = ['ァ', 'ィ', 'ゥ', 'ェ', 'ォ', 'ッ', 'ャ', 'ュ', 'ョ', 'ヮ'];
const all_tiny_kana = tiny_hira.concat(tiny_kata);
const tiny_hira_vowels = ['ぁ', 'ぃ', 'ぅ', 'ぇ', 'ぉ'];
const tiny_kata_vowels = ['ァ', 'ィ', 'ゥ', 'ェ', 'ォ'];
const all_tiny_kana_vowels = tiny_hira_vowels.concat(tiny_kata_vowels);
// ぁ-ゖァ-ヺ all kana glyphs
// ぁ-ぢつ-ゔァ-ヂツ-ヴ <== all kana glyphs (excluding tiny tsu)
const unifiedKanaPattern = /[っッ]{1}[ぁ-ぢつ-ゔァ-ヂツ-ヴ]{1}|[ぁ-ぢつ-ゔァ-ヂツ-ヴ]{1}[ぁぃぅぇぉゃゅょァィゥェォャュョ]{1}|[ぁ-ぢつ-ゔァ-ヂツ-ヴ]{1}/;
const getMatchingItem = (item, list1, list2) => { return list1.includes(item) ? list2[list1.indexOf(item)] : item; };
// Methods like substring used for maximum
function replaceAtIndex(str, index, length, newSubstring) {
if (index < 0 || index >= str.length) { return str; }
const stringWithoutChar = str.substring(0, index) + str.substring(index + length);
return stringWithoutChar.substring(0, index) + newSubstring + stringWithoutChar.substring(index);
}
function getMatches(str, regex) {
// Ensure the regex has the global flag set
const regexWithGlobal = new RegExp(regex, 'g');
const matches = [];
let match;
while ((match = regexWithGlobal.exec(str)) !== null) {
matches.push(match);
}
return matches;
}
function kata2hira(text) {
const hira = []
text.split("").forEach(ch => {
hira.push(getMatchingItem(ch, katakana, hiragana))
});
return hira.join("")
}
function romajiForTsuPair(hiraString) {
const nonTsu = getMatchingItem(hiraString[1], hiraganaCombinations, hebonTransliteration);
return nonTsu[0] + nonTsu;
}
function romajiForVowelPair(hiraString) {
let consonant = getMatchingItem(hiraString[0], hiraganaCombinations, hebonTransliteration)[0]
if (consonant === "u") { consonant = "w" }
const tiny_romaji_vowels = ['a', 'i', 'u', 'e', 'o']
return consonant + getMatchingItem(hiraString[1], tiny_hira_vowels, tiny_romaji_vowels)
}
function determineRomaji(hiraString) {
if (hiraString.length === 2) {
if (tiny_hira_vowels.includes(hiraString[1])) {
return romajiForVowelPair(hiraString);
}
if (hiraString.includes("っ")) {
return romajiForTsuPair(hiraString);
}
}
return getMatchingItem(hiraString, hiraganaCombinations, hebonTransliteration);
}
function wrapAllMatchingChars(text, regex_pattern) {
const matches = getMatches(text, regex_pattern)
for (let i = matches.length - 1; i >= 0; i -= 1) {
const matchObj = matches[i]
const matchObjLastIndex = matchObj.length - 1;
const matchString = matchObj[matchObjLastIndex];
/** Some variation of these lines might be needed
* for matches with capture groups: **/
// const fullMatch = matchObj[0]
// const offSet = fullMatch.indexOf(matchString);
// const matchIndex = matchObj.index + offset;
const matchIndex = matchObj.index;
const matchLength = matchString.length;
const hiraString = kata2hira(matchString);
const romaji = determineRomaji(hiraString);
const replacement = `<ruby>${matchString}<rt>${romaji}</rt></ruby>`;
text = replaceAtIndex(text, matchIndex, matchLength, replacement);
}
return text;
}
function addTagsToNodeText(node, regex_patterns) {
if (node.nodeType !== Node.TEXT_NODE || node.parentNode.nodeName.toLowerCase() === 'ruby') {
return;
}
let nodeText = node.nodeValue;
regex_patterns.forEach(pattern => {
nodeText = wrapAllMatchingChars(nodeText, pattern);
});
const tempSpan = document.createElement('span');
tempSpan.innerHTML = nodeText;
while (tempSpan.firstChild) {
node.parentNode.insertBefore(tempSpan.firstChild, node);
}
node.parentNode.removeChild(node);
}
function checkChildren(node) {
const childNodes = node.childNodes;
for (let i = 0; i < childNodes.length; i++) {
const childNode = childNodes[i];
checkChildren(childNode);
}
addTagsToNodeText(node, [unifiedKanaPattern]);
}
// This starts a recursive traversal of all DOM nodes.
checkChildren(document.body);