diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..14c36be --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +/jyut6ping3.simple.dict.yaml +/background_scripts/dictionary.json.txt +/process.py +/icons/icon.xcf diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..d0e8a13 --- /dev/null +++ b/LICENSE @@ -0,0 +1,25 @@ +BSD 2-Clause License + +Copyright (c) 2020, Cantonese Computational Linguistics Infrastructure Development Workgroup +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/_locales/en/messages.json b/_locales/en/messages.json new file mode 100644 index 0000000..6a67b49 --- /dev/null +++ b/_locales/en/messages.json @@ -0,0 +1,11 @@ +{ + "extensionName": { + "message": "Inject Jyutping" + }, + "extensionDescription": { + "message": "Add Cantonese pronunciation (Jyutping) on Chinese characters." + }, + "contextMenuItemDoInjectJyutping": { + "message": "Inject Jyutping" + } +} diff --git a/_locales/ja/messages.json b/_locales/ja/messages.json new file mode 100644 index 0000000..5449b21 --- /dev/null +++ b/_locales/ja/messages.json @@ -0,0 +1,11 @@ +{ + "extensionName": { + "message": "粵拼を注入" + }, + "extensionDescription": { + "message": "漢字に広東語の発音(粵拼)を付ける。" + }, + "contextMenuItemDoInjectJyutping": { + "message": "粵拼を注入" + } +} diff --git a/_locales/zh_CN/messages.json b/_locales/zh_CN/messages.json new file mode 100644 index 0000000..2264d97 --- /dev/null +++ b/_locales/zh_CN/messages.json @@ -0,0 +1,11 @@ +{ + "extensionName": { + "message": "注入粤拼" + }, + "extensionDescription": { + "message": "为汉字标注粤语发音(粤拼)。" + }, + "contextMenuItemDoInjectJyutping": { + "message": "注入粤拼" + } +} diff --git a/_locales/zh_TW/messages.json b/_locales/zh_TW/messages.json new file mode 100644 index 0000000..a9cce4a --- /dev/null +++ b/_locales/zh_TW/messages.json @@ -0,0 +1,11 @@ +{ + "extensionName": { + "message": "注入粵拼" + }, + "extensionDescription": { + "message": "為漢字標註粵語發音(粵拼)。" + }, + "contextMenuItemDoInjectJyutping": { + "message": "注入粵拼" + } +} diff --git a/background_scripts/index.js b/background_scripts/index.js new file mode 100644 index 0000000..2cc8dcb --- /dev/null +++ b/background_scripts/index.js @@ -0,0 +1,102 @@ +class Trie { + constructor() { + /** + * Trie 的每個節點為一個 Map 物件。 + * key 為 code point,value 為子節點(也是一個 Map)。 + * 如果 Map 物件有 __trie_val 屬性,則該屬性為值字串,代表替換的字詞。 + */ + this.t = new Map(); + } + + /** + * 將一組資料加入字典樹 + * @param {String} k 鍵字串 + * @param {String} v 值字串,代表替換的字詞 + */ + addWord(k, v) { + let t = this.t; + for (const c of k) { + const cp = c.codePointAt(0); + if (!t.has(cp)) { + t.set(cp, new Map()) + } + t = t.get(cp); + } + t.__trie_val = v; + } + + longestPrefix(s) { + const totalBreadcrumbs = []; + let currentBreadcrumbs = [], currentTarget, t = this.t; + for (const c of s) { + const cp = c.codePointAt(0); + if (!t.has(cp)) { + break; + } + currentBreadcrumbs.push(c); + t = t.get(cp); + if (typeof t.__trie_val !== 'undefined') { + currentTarget = t.__trie_val; + totalBreadcrumbs.push(...currentBreadcrumbs); + currentBreadcrumbs = []; + } + } + if (totalBreadcrumbs.length) { + return [totalBreadcrumbs, currentTarget.split(' ')]; // chars, romanization of each char + } + } +} + +/** + * 轉換一個字串,取得字串中每個字及其讀音。 + * @param {Trie} t Trie 樹 + * @param {String} s 鍵字串 + * @return {Array} 二維陣列。每個元素為一個字及其讀音。 + */ +function convert(t, s) { + const res = []; + while (s.length) { + const prefix = t.longestPrefix(s); + if (typeof prefix !== 'undefined') { + const [cs, rs] = prefix; + const zipped_cs_rs = cs.map((c, i) => [c, rs[i]]); + res.push(...zipped_cs_rs); + s = s.slice(cs.reduce((acc, x) => acc + x.length, 0)); // total length of strings in array cs + } else { + const k = s[Symbol.iterator]().next().value; // Unicode-aware version of s[0] + res.push([k, null]); + s = s.slice(k.length); + } + } + return res; +} + +const t = new Trie(); + +(function loadDict() { + fetch(browser.runtime.getURL('background_scripts/dictionary.json.txt')) + .then(x => x.json()) + .then(d => { + for (const [k, v] of d) { + t.addWord(k, v); + } + }) + .catch(err => console.error(err)); +})(); + +browser.runtime.onMessage.addListener((data, sender, sendResponse) => { + const result = convert(t, data); + sendResponse(result); +}); + +browser.contextMenus.create({ + id: "do-inject-jyutping", + title: browser.i18n.getMessage("contextMenuItemDoInjectJyutping"), + contexts: ["page"] +}); + +browser.contextMenus.onClicked.addListener(function(info, tab) { + if (info.menuItemId === "do-inject-jyutping") { + browser.tabs.sendMessage(tab.id, {type: 'init'}); + } +}); diff --git a/content_scripts/index.css b/content_scripts/index.css new file mode 100644 index 0000000..3bb091e --- /dev/null +++ b/content_scripts/index.css @@ -0,0 +1,7 @@ +ruby.inject-jyutping > rt { + font-size: 0.74em; + font-variant: initial; + margin-left: 0.1em; + margin-right: 0.1em; + text-transform: initial; +} diff --git a/content_scripts/index.js b/content_scripts/index.js new file mode 100644 index 0000000..a7a31f4 --- /dev/null +++ b/content_scripts/index.js @@ -0,0 +1,111 @@ +/** + * Check if a string contains Chinese characters. + * @param {String} s The string to be checked + * @return {Boolean} If the string contains at least one Chinese character, + * returns true. Otherwise returns false. + */ +function hasHanChar(s) { + const r = /[〆〇一-鿿㐀-䶿𠀀-𪛟𪜀-𫜿𫝀-𫠟𫠠-𬺯𬺰-𮯯𰀀-𱍏]/u; + return Boolean(s.match(r)); +} + +/** + * Determine whether an HTML element should be handled by inject-jyutping + * by checking its lang tag. + * @param {String} lang The lang tag of an HTML element + * @return {Boolean} If the lang tag is reasonable to be handled, returns + * true. Otherwise returns false. + */ +function isTargetLang(lang) { + return !lang.startsWith('ja') + && !lang.startsWith('ko') + && !lang.startsWith('vi'); +} + +/** + * Create a ruby element with the character and the pronunciation. + * @param {String} ch The character in a ruby element + * @param {String} pronunciation The pronunciation in a ruby element + * @return {Element} The ruby element + */ +function makeRuby(ch, pronunciation) { + const ruby = document.createElement('ruby'); + ruby.classList.add('inject-jyutping'); + ruby.innerText = ch; + const rp_left = document.createElement('rp'); + rp_left.appendChild(document.createTextNode('(')); + ruby.appendChild(rp_left); + const rt = document.createElement('rt'); + rt.lang = 'yue-Latn'; + rt.innerText = pronunciation; + ruby.appendChild(rt); + const rp_right = document.createElement('rp'); + rp_right.appendChild(document.createTextNode(')')); + ruby.appendChild(rp_right); + return ruby; +} + +async function recursiveConvert(currentNode, langMatched) { + // ignore certain HTML elements + if ( currentNode.tagName === 'RUBY' + || currentNode.tagName === 'OPTION' + || currentNode.tagName === 'NOSCRIPT' + || currentNode.tagName === 'SCRIPT' + || currentNode.tagName === 'STYLE' + ) { + return; + } + + if (currentNode.lang && currentNode.lang.length) { + langMatched = isTargetLang(currentNode.lang); + } + + const ret = []; + + for (const node of currentNode.childNodes) { + if (node.nodeType == Node.TEXT_NODE) { + if (!langMatched) { + break; + } + + const s = node.nodeValue; + + if (hasHanChar(s)) { + const nodesFragment = document.createDocumentFragment(); + for (const [k, v] of await convert__(s)) { + if (v === null) { + nodesFragment.appendChild(document.createTextNode(k)); + } else { + nodesFragment.appendChild(makeRuby(k, v)); + } + } + ret.push([nodesFragment, node]); + } + } else { + await recursiveConvert(node, langMatched); + } + } + + for (const [nodesFragment, node] of ret) { + currentNode.replaceChild(nodesFragment, node); + } +} + +async function convert_() { + const root = document.documentElement; + await recursiveConvert(document.body, isTargetLang(document.body.lang || root.lang)); +} + +// ================ + +async function convert__(s) { + return await browser.runtime.sendMessage(s); +} + +(async () => await convert_())(); + +browser.runtime.onMessage.addListener(message => { + if (message.type === 'init') { + convert_(); + } +}); diff --git a/icons/96.png b/icons/96.png new file mode 100644 index 0000000..d1135c5 Binary files /dev/null and b/icons/96.png differ diff --git a/manifest.json b/manifest.json new file mode 100644 index 0000000..41d1f3a --- /dev/null +++ b/manifest.json @@ -0,0 +1,24 @@ +{ + "manifest_version": 2, + "name": "__MSG_extensionName__", + "version": "0.0.1rc1", + "description": "__MSG_extensionDescription__", + "icons": { + "96": "icons/96.png" + }, + "content_scripts": [ + { + "matches": [""], + "js": ["content_scripts/index.js"], + "css": ["content_scripts/index.css"], + "all_frames": true, + "run_at": "document_end" + } + ], + "background": { + "scripts": ["background_scripts/index.js"], + "persistent": true + }, + "permissions": ["contextMenus"], + "default_locale": "en" +}