-
Notifications
You must be signed in to change notification settings - Fork 10
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 441465d
Showing
11 changed files
with
317 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
/jyut6ping3.simple.dict.yaml | ||
/background_scripts/dictionary.json.txt | ||
/process.py | ||
/icons/icon.xcf |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
BSD 2-Clause License | ||
|
||
Copyright (c) 2020, Cantonese Computational Linguistics Infrastructure Development Workgroup | ||
All rights reserved. | ||
|
||
Redistribution and use in source and binary forms, with or without | ||
modification, are permitted provided that the following conditions are met: | ||
|
||
1. Redistributions of source code must retain the above copyright notice, this | ||
list of conditions and the following disclaimer. | ||
|
||
2. Redistributions in binary form must reproduce the above copyright notice, | ||
this list of conditions and the following disclaimer in the documentation | ||
and/or other materials provided with the distribution. | ||
|
||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | ||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | ||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE | ||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | ||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | ||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | ||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | ||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
{ | ||
"extensionName": { | ||
"message": "Inject Jyutping" | ||
}, | ||
"extensionDescription": { | ||
"message": "Add Cantonese pronunciation (Jyutping) on Chinese characters." | ||
}, | ||
"contextMenuItemDoInjectJyutping": { | ||
"message": "Inject Jyutping" | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
{ | ||
"extensionName": { | ||
"message": "粵拼を注入" | ||
}, | ||
"extensionDescription": { | ||
"message": "漢字に広東語の発音(粵拼)を付ける。" | ||
}, | ||
"contextMenuItemDoInjectJyutping": { | ||
"message": "粵拼を注入" | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
{ | ||
"extensionName": { | ||
"message": "注入粤拼" | ||
}, | ||
"extensionDescription": { | ||
"message": "为汉字标注粤语发音(粤拼)。" | ||
}, | ||
"contextMenuItemDoInjectJyutping": { | ||
"message": "注入粤拼" | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
{ | ||
"extensionName": { | ||
"message": "注入粵拼" | ||
}, | ||
"extensionDescription": { | ||
"message": "為漢字標註粵語發音(粵拼)。" | ||
}, | ||
"contextMenuItemDoInjectJyutping": { | ||
"message": "注入粵拼" | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,102 @@ | ||
class Trie { | ||
constructor() { | ||
/** | ||
* Trie 的每個節點為一個 Map 物件。 | ||
* key 為 code point,value 為子節點(也是一個 Map)。 | ||
* 如果 Map 物件有 __trie_val 屬性,則該屬性為值字串,代表替換的字詞。 | ||
*/ | ||
this.t = new Map(); | ||
} | ||
|
||
/** | ||
* 將一組資料加入字典樹 | ||
* @param {String} k 鍵字串 | ||
* @param {String} v 值字串,代表替換的字詞 | ||
*/ | ||
addWord(k, v) { | ||
let t = this.t; | ||
for (const c of k) { | ||
const cp = c.codePointAt(0); | ||
if (!t.has(cp)) { | ||
t.set(cp, new Map()) | ||
} | ||
t = t.get(cp); | ||
} | ||
t.__trie_val = v; | ||
} | ||
|
||
longestPrefix(s) { | ||
const totalBreadcrumbs = []; | ||
let currentBreadcrumbs = [], currentTarget, t = this.t; | ||
for (const c of s) { | ||
const cp = c.codePointAt(0); | ||
if (!t.has(cp)) { | ||
break; | ||
} | ||
currentBreadcrumbs.push(c); | ||
t = t.get(cp); | ||
if (typeof t.__trie_val !== 'undefined') { | ||
currentTarget = t.__trie_val; | ||
totalBreadcrumbs.push(...currentBreadcrumbs); | ||
currentBreadcrumbs = []; | ||
} | ||
} | ||
if (totalBreadcrumbs.length) { | ||
return [totalBreadcrumbs, currentTarget.split(' ')]; // chars, romanization of each char | ||
} | ||
} | ||
} | ||
|
||
/** | ||
* 轉換一個字串,取得字串中每個字及其讀音。 | ||
* @param {Trie} t Trie 樹 | ||
* @param {String} s 鍵字串 | ||
* @return {Array} 二維陣列。每個元素為一個字及其讀音。 | ||
*/ | ||
function convert(t, s) { | ||
const res = []; | ||
while (s.length) { | ||
const prefix = t.longestPrefix(s); | ||
if (typeof prefix !== 'undefined') { | ||
const [cs, rs] = prefix; | ||
const zipped_cs_rs = cs.map((c, i) => [c, rs[i]]); | ||
res.push(...zipped_cs_rs); | ||
s = s.slice(cs.reduce((acc, x) => acc + x.length, 0)); // total length of strings in array cs | ||
} else { | ||
const k = s[Symbol.iterator]().next().value; // Unicode-aware version of s[0] | ||
res.push([k, null]); | ||
s = s.slice(k.length); | ||
} | ||
} | ||
return res; | ||
} | ||
|
||
const t = new Trie(); | ||
|
||
(function loadDict() { | ||
fetch(browser.runtime.getURL('background_scripts/dictionary.json.txt')) | ||
.then(x => x.json()) | ||
.then(d => { | ||
for (const [k, v] of d) { | ||
t.addWord(k, v); | ||
} | ||
}) | ||
.catch(err => console.error(err)); | ||
})(); | ||
|
||
browser.runtime.onMessage.addListener((data, sender, sendResponse) => { | ||
const result = convert(t, data); | ||
sendResponse(result); | ||
}); | ||
|
||
browser.contextMenus.create({ | ||
id: "do-inject-jyutping", | ||
title: browser.i18n.getMessage("contextMenuItemDoInjectJyutping"), | ||
contexts: ["page"] | ||
}); | ||
|
||
browser.contextMenus.onClicked.addListener(function(info, tab) { | ||
if (info.menuItemId === "do-inject-jyutping") { | ||
browser.tabs.sendMessage(tab.id, {type: 'init'}); | ||
} | ||
}); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
ruby.inject-jyutping > rt { | ||
font-size: 0.74em; | ||
font-variant: initial; | ||
margin-left: 0.1em; | ||
margin-right: 0.1em; | ||
text-transform: initial; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,111 @@ | ||
/** | ||
* Check if a string contains Chinese characters. | ||
* @param {String} s The string to be checked | ||
* @return {Boolean} If the string contains at least one Chinese character, | ||
* returns true. Otherwise returns false. | ||
*/ | ||
function hasHanChar(s) { | ||
const r = /[〆〇一-鿿㐀-䶿𠀀-𪛟𪜀-𫝀-𫠠-𬺰-𰀀-]/u; | ||
return Boolean(s.match(r)); | ||
} | ||
|
||
/** | ||
* Determine whether an HTML element should be handled by inject-jyutping | ||
* by checking its lang tag. | ||
* @param {String} lang The lang tag of an HTML element | ||
* @return {Boolean} If the lang tag is reasonable to be handled, returns | ||
* true. Otherwise returns false. | ||
*/ | ||
function isTargetLang(lang) { | ||
return !lang.startsWith('ja') | ||
&& !lang.startsWith('ko') | ||
&& !lang.startsWith('vi'); | ||
} | ||
|
||
/** | ||
* Create a ruby element with the character and the pronunciation. | ||
* @param {String} ch The character in a ruby element | ||
* @param {String} pronunciation The pronunciation in a ruby element | ||
* @return {Element} The ruby element | ||
*/ | ||
function makeRuby(ch, pronunciation) { | ||
const ruby = document.createElement('ruby'); | ||
ruby.classList.add('inject-jyutping'); | ||
ruby.innerText = ch; | ||
const rp_left = document.createElement('rp'); | ||
rp_left.appendChild(document.createTextNode('(')); | ||
ruby.appendChild(rp_left); | ||
const rt = document.createElement('rt'); | ||
rt.lang = 'yue-Latn'; | ||
rt.innerText = pronunciation; | ||
ruby.appendChild(rt); | ||
const rp_right = document.createElement('rp'); | ||
rp_right.appendChild(document.createTextNode(')')); | ||
ruby.appendChild(rp_right); | ||
return ruby; | ||
} | ||
|
||
async function recursiveConvert(currentNode, langMatched) { | ||
// ignore certain HTML elements | ||
if ( currentNode.tagName === 'RUBY' | ||
|| currentNode.tagName === 'OPTION' | ||
|| currentNode.tagName === 'NOSCRIPT' | ||
|| currentNode.tagName === 'SCRIPT' | ||
|| currentNode.tagName === 'STYLE' | ||
) { | ||
return; | ||
} | ||
|
||
if (currentNode.lang && currentNode.lang.length) { | ||
langMatched = isTargetLang(currentNode.lang); | ||
} | ||
|
||
const ret = []; | ||
|
||
for (const node of currentNode.childNodes) { | ||
if (node.nodeType == Node.TEXT_NODE) { | ||
if (!langMatched) { | ||
break; | ||
} | ||
|
||
const s = node.nodeValue; | ||
|
||
if (hasHanChar(s)) { | ||
const nodesFragment = document.createDocumentFragment(); | ||
for (const [k, v] of await convert__(s)) { | ||
if (v === null) { | ||
nodesFragment.appendChild(document.createTextNode(k)); | ||
} else { | ||
nodesFragment.appendChild(makeRuby(k, v)); | ||
} | ||
} | ||
ret.push([nodesFragment, node]); | ||
} | ||
} else { | ||
await recursiveConvert(node, langMatched); | ||
} | ||
} | ||
|
||
for (const [nodesFragment, node] of ret) { | ||
currentNode.replaceChild(nodesFragment, node); | ||
} | ||
} | ||
|
||
async function convert_() { | ||
const root = document.documentElement; | ||
await recursiveConvert(document.body, isTargetLang(document.body.lang || root.lang)); | ||
} | ||
|
||
// ================ | ||
|
||
async function convert__(s) { | ||
return await browser.runtime.sendMessage(s); | ||
} | ||
|
||
(async () => await convert_())(); | ||
|
||
browser.runtime.onMessage.addListener(message => { | ||
if (message.type === 'init') { | ||
convert_(); | ||
} | ||
}); |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
{ | ||
"manifest_version": 2, | ||
"name": "__MSG_extensionName__", | ||
"version": "0.0.1rc1", | ||
"description": "__MSG_extensionDescription__", | ||
"icons": { | ||
"96": "icons/96.png" | ||
}, | ||
"content_scripts": [ | ||
{ | ||
"matches": ["<all_urls>"], | ||
"js": ["content_scripts/index.js"], | ||
"css": ["content_scripts/index.css"], | ||
"all_frames": true, | ||
"run_at": "document_end" | ||
} | ||
], | ||
"background": { | ||
"scripts": ["background_scripts/index.js"], | ||
"persistent": true | ||
}, | ||
"permissions": ["contextMenus"], | ||
"default_locale": "en" | ||
} |