Skip to content

Commit

Permalink
Initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
ayaka14732 committed Sep 4, 2020
0 parents commit 441465d
Show file tree
Hide file tree
Showing 11 changed files with 317 additions and 0 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
/jyut6ping3.simple.dict.yaml
/background_scripts/dictionary.json.txt
/process.py
/icons/icon.xcf
25 changes: 25 additions & 0 deletions LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
BSD 2-Clause License

Copyright (c) 2020, Cantonese Computational Linguistics Infrastructure Development Workgroup
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.

2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
11 changes: 11 additions & 0 deletions _locales/en/messages.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
{
"extensionName": {
"message": "Inject Jyutping"
},
"extensionDescription": {
"message": "Add Cantonese pronunciation (Jyutping) on Chinese characters."
},
"contextMenuItemDoInjectJyutping": {
"message": "Inject Jyutping"
}
}
11 changes: 11 additions & 0 deletions _locales/ja/messages.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
{
"extensionName": {
"message": "粵拼を注入"
},
"extensionDescription": {
"message": "漢字に広東語の発音(粵拼)を付ける。"
},
"contextMenuItemDoInjectJyutping": {
"message": "粵拼を注入"
}
}
11 changes: 11 additions & 0 deletions _locales/zh_CN/messages.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
{
"extensionName": {
"message": "注入粤拼"
},
"extensionDescription": {
"message": "为汉字标注粤语发音(粤拼)。"
},
"contextMenuItemDoInjectJyutping": {
"message": "注入粤拼"
}
}
11 changes: 11 additions & 0 deletions _locales/zh_TW/messages.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
{
"extensionName": {
"message": "注入粵拼"
},
"extensionDescription": {
"message": "為漢字標註粵語發音(粵拼)。"
},
"contextMenuItemDoInjectJyutping": {
"message": "注入粵拼"
}
}
102 changes: 102 additions & 0 deletions background_scripts/index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
class Trie {
constructor() {
/**
* Trie 的每個節點為一個 Map 物件。
* key 為 code point,value 為子節點(也是一個 Map)。
* 如果 Map 物件有 __trie_val 屬性,則該屬性為值字串,代表替換的字詞。
*/
this.t = new Map();
}

/**
* 將一組資料加入字典樹
* @param {String} k 鍵字串
* @param {String} v 值字串,代表替換的字詞
*/
addWord(k, v) {
let t = this.t;
for (const c of k) {
const cp = c.codePointAt(0);
if (!t.has(cp)) {
t.set(cp, new Map())
}
t = t.get(cp);
}
t.__trie_val = v;
}

longestPrefix(s) {
const totalBreadcrumbs = [];
let currentBreadcrumbs = [], currentTarget, t = this.t;
for (const c of s) {
const cp = c.codePointAt(0);
if (!t.has(cp)) {
break;
}
currentBreadcrumbs.push(c);
t = t.get(cp);
if (typeof t.__trie_val !== 'undefined') {
currentTarget = t.__trie_val;
totalBreadcrumbs.push(...currentBreadcrumbs);
currentBreadcrumbs = [];
}
}
if (totalBreadcrumbs.length) {
return [totalBreadcrumbs, currentTarget.split(' ')]; // chars, romanization of each char
}
}
}

/**
* 轉換一個字串,取得字串中每個字及其讀音。
* @param {Trie} t Trie 樹
* @param {String} s 鍵字串
* @return {Array} 二維陣列。每個元素為一個字及其讀音。
*/
function convert(t, s) {
const res = [];
while (s.length) {
const prefix = t.longestPrefix(s);
if (typeof prefix !== 'undefined') {
const [cs, rs] = prefix;
const zipped_cs_rs = cs.map((c, i) => [c, rs[i]]);
res.push(...zipped_cs_rs);
s = s.slice(cs.reduce((acc, x) => acc + x.length, 0)); // total length of strings in array cs
} else {
const k = s[Symbol.iterator]().next().value; // Unicode-aware version of s[0]
res.push([k, null]);
s = s.slice(k.length);
}
}
return res;
}

const t = new Trie();

(function loadDict() {
fetch(browser.runtime.getURL('background_scripts/dictionary.json.txt'))
.then(x => x.json())
.then(d => {
for (const [k, v] of d) {
t.addWord(k, v);
}
})
.catch(err => console.error(err));
})();

browser.runtime.onMessage.addListener((data, sender, sendResponse) => {
const result = convert(t, data);
sendResponse(result);
});

browser.contextMenus.create({
id: "do-inject-jyutping",
title: browser.i18n.getMessage("contextMenuItemDoInjectJyutping"),
contexts: ["page"]
});

browser.contextMenus.onClicked.addListener(function(info, tab) {
if (info.menuItemId === "do-inject-jyutping") {
browser.tabs.sendMessage(tab.id, {type: 'init'});
}
});
7 changes: 7 additions & 0 deletions content_scripts/index.css
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
ruby.inject-jyutping > rt {
font-size: 0.74em;
font-variant: initial;
margin-left: 0.1em;
margin-right: 0.1em;
text-transform: initial;
}
111 changes: 111 additions & 0 deletions content_scripts/index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
/**
* Check if a string contains Chinese characters.
* @param {String} s The string to be checked
* @return {Boolean} If the string contains at least one Chinese character,
* returns true. Otherwise returns false.
*/
function hasHanChar(s) {
const r = /[〆〇一-鿿㐀-䶿𠀀-𪛟𪜀-𫜿𫝀-𫠟𫠠-𬺯𬺰-𮯯𰀀-𱍏]/u;
return Boolean(s.match(r));
}

/**
* Determine whether an HTML element should be handled by inject-jyutping
* by checking its lang tag.
* @param {String} lang The lang tag of an HTML element
* @return {Boolean} If the lang tag is reasonable to be handled, returns
* true. Otherwise returns false.
*/
function isTargetLang(lang) {
return !lang.startsWith('ja')
&& !lang.startsWith('ko')
&& !lang.startsWith('vi');
}

/**
* Create a ruby element with the character and the pronunciation.
* @param {String} ch The character in a ruby element
* @param {String} pronunciation The pronunciation in a ruby element
* @return {Element} The ruby element
*/
function makeRuby(ch, pronunciation) {
const ruby = document.createElement('ruby');
ruby.classList.add('inject-jyutping');
ruby.innerText = ch;
const rp_left = document.createElement('rp');
rp_left.appendChild(document.createTextNode('('));
ruby.appendChild(rp_left);
const rt = document.createElement('rt');
rt.lang = 'yue-Latn';
rt.innerText = pronunciation;
ruby.appendChild(rt);
const rp_right = document.createElement('rp');
rp_right.appendChild(document.createTextNode(')'));
ruby.appendChild(rp_right);
return ruby;
}

async function recursiveConvert(currentNode, langMatched) {
// ignore certain HTML elements
if ( currentNode.tagName === 'RUBY'
|| currentNode.tagName === 'OPTION'
|| currentNode.tagName === 'NOSCRIPT'
|| currentNode.tagName === 'SCRIPT'
|| currentNode.tagName === 'STYLE'
) {
return;
}

if (currentNode.lang && currentNode.lang.length) {
langMatched = isTargetLang(currentNode.lang);
}

const ret = [];

for (const node of currentNode.childNodes) {
if (node.nodeType == Node.TEXT_NODE) {
if (!langMatched) {
break;
}

const s = node.nodeValue;

if (hasHanChar(s)) {
const nodesFragment = document.createDocumentFragment();
for (const [k, v] of await convert__(s)) {
if (v === null) {
nodesFragment.appendChild(document.createTextNode(k));
} else {
nodesFragment.appendChild(makeRuby(k, v));
}
}
ret.push([nodesFragment, node]);
}
} else {
await recursiveConvert(node, langMatched);
}
}

for (const [nodesFragment, node] of ret) {
currentNode.replaceChild(nodesFragment, node);
}
}

async function convert_() {
const root = document.documentElement;
await recursiveConvert(document.body, isTargetLang(document.body.lang || root.lang));
}

// ================

async function convert__(s) {
return await browser.runtime.sendMessage(s);
}

(async () => await convert_())();

browser.runtime.onMessage.addListener(message => {
if (message.type === 'init') {
convert_();
}
});
Binary file added icons/96.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
24 changes: 24 additions & 0 deletions manifest.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
{
"manifest_version": 2,
"name": "__MSG_extensionName__",
"version": "0.0.1rc1",
"description": "__MSG_extensionDescription__",
"icons": {
"96": "icons/96.png"
},
"content_scripts": [
{
"matches": ["<all_urls>"],
"js": ["content_scripts/index.js"],
"css": ["content_scripts/index.css"],
"all_frames": true,
"run_at": "document_end"
}
],
"background": {
"scripts": ["background_scripts/index.js"],
"persistent": true
},
"permissions": ["contextMenus"],
"default_locale": "en"
}

0 comments on commit 441465d

Please sign in to comment.