Skip to content

Commit

Permalink
feat(module): update dist to use new format
Browse files Browse the repository at this point in the history
  • Loading branch information
rhahao authored Apr 13, 2022
1 parent edfee58 commit 8e9af02
Show file tree
Hide file tree
Showing 8 changed files with 560 additions and 36 deletions.
14 changes: 0 additions & 14 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,20 +20,6 @@ import { loadEPUB } from 'jw-epub-parser';
const epubJW = await loadEPUB('/path/to/file.epub');
```

If you want to use this module in a node apps, first do the following:

- Install jsdom and global-jsdom

```js
npm i jsdom global-jsdom
```

- Enables DOM in Node.js using global-jsdom

```js
import 'global-jsdom/register';
```

### loadEPUB(epubData)

function: asynchronous
Expand Down
13 changes: 12 additions & 1 deletion package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
"rollup": "^2.69.0"
},
"dependencies": {
"jszip": "^3.9.1"
"jszip": "^3.9.1",
"path-browserify": "^1.0.1"
}
}
52 changes: 36 additions & 16 deletions rollup.config.js
Original file line number Diff line number Diff line change
@@ -1,21 +1,41 @@
import commonjs from '@rollup/plugin-commonjs';
import { nodeResolve } from '@rollup/plugin-node-resolve';

const config = {
input: 'src/index.js',
output: [
{
file: 'dist/index.js',
format: 'es',
},
{
file: 'dist/index.cjs',
format: 'cjs',
exports: 'named',
},
],
plugins: [nodeResolve(), commonjs()],
external: ['jszip', 'jsdom'],
};
const config = [
{
input: 'src/browser.js',
output: [
{
file: 'dist/index.js',
format: 'es',
},
],
plugins: [nodeResolve(), commonjs()],
external: ['jszip', 'path-browserify'],
},
{
input: 'src/node.js',
output: [
{
file: 'dist/index.node.js',
format: 'es',
},
],
plugins: [nodeResolve(), commonjs()],
external: ['jszip', 'global-jsdom/register'],
},
{
input: 'src/node.js',
output: [
{
file: 'dist/index.node.cjs',
format: 'cjs',
exports: 'named',
},
],
plugins: [nodeResolve(), commonjs()],
external: ['jszip', 'global-jsdom/register'],
},
];

export default config;
244 changes: 244 additions & 0 deletions src/browser.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,244 @@
import JSZip from 'jszip';
import * as path from 'path-browserify';

let validMwbFiles = [];
let mwbYear;

const loadEPUB = async (epubInput) => {
// check if we receive path or blob
let data;
if (epubInput.name) {
if (isValidEpubNaming(epubInput.name)) {
mwbYear = epubInput.name.split('_')[2].substring(0, 4);
data = epubInput; // blob
} else {
throw new Error('The selected epub file has an incorrect naming.');
}
} else {
throw new Error(
'You are using the browser version of the module. Please switch to the node version if needed'
);
}

const doParsing = () => {
return new Promise((resolve, reject) => {
JSZip.loadAsync(data).then(async (zip) => {
await initEpub(zip);

if (validMwbFiles.length === 0) {
reject(
'The file you provided is not a valid Meeting Workbook EPUB file. Please make sure that the file is correct.'
);
} else {
resolve(parseEpub(validMwbFiles));
}
});
});
};

const result = await doParsing();
return result;
};

const isValidEpubNaming = (name) => {
let regex = /^mwb_[A-Z][A-Z]?[A-Z]?_202\d(0[1-9]|1[0-2])\.epub$/i;
return regex.test(name);
};

const initEpub = async (zip) => {
const MAX_FILES = 300;
const MAX_SIZE = 20000000; // 20 MO

let fileCount = 0;
let totalSize = 0;
let targetDirectory = 'archive_tmp';

for (const file in zip.files) {
fileCount++;
if (fileCount > MAX_FILES) {
while (validMwbFiles.length > 0) {
validMwbFiles.pop();
}
throw new Error('Reached max. number of files');
}

// Prevent ZipSlip path traversal (S6096)
const resolvedPath = path.join(targetDirectory, file);
if (!resolvedPath.startsWith(targetDirectory)) {
while (validMwbFiles.length > 0) {
validMwbFiles.pop();
}
throw new Error('Path traversal detected');
}

const contentSize = await zip.file(file).async('nodebuffer');
totalSize += contentSize.length;
if (totalSize > MAX_SIZE) {
while (validMwbFiles.length > 0) {
validMwbFiles.pop();
}
throw new Error('Reached max. size');
}

if (isValidFilename(file)) {
const content = await getHtmlRawString(zip, file);

const parser = new window.DOMParser();
const htmlDoc = parser.parseFromString(content, 'text/html');

if (isValidMwbSched(htmlDoc)) {
validMwbFiles.push(htmlDoc);
}
}
}
};

const isValidFilename = (name) => {
if (name.startsWith('OEBPS') && name.endsWith('.xhtml')) {
const fileName = name.split('/')[1].split('.')[0];
if (!isNaN(fileName)) {
return true;
} else {
return false;
}
} else {
return false;
}
};

const getHtmlRawString = async (zip, filename) => {
const content = await zip.file(filename).async('string');

return content;
};

const isValidMwbSched = (htmlDoc) => {
const isValidTGW = htmlDoc.querySelector(`[class*=treasures]`) ? true : false;
const isValidAYF = htmlDoc.querySelector(`[class*=ministry]`) ? true : false;
const isValidLC = htmlDoc.querySelector(`[class*=christianLiving]`)
? true
: false;

if (isValidTGW === true && isValidAYF === true && isValidLC === true) {
return true;
} else {
return false;
}
};

const parseEpub = (htmlDocs) => {
let obj = {};
let weeksData = [];
let weeksCount;

weeksCount = htmlDocs.length;

obj.weeksCount = weeksCount;
obj.mwbYear = mwbYear;

for (let a = 0; a < weeksCount; a++) {
let weekItem = {};

const htmlItem = htmlDocs[a];

// get week date
const wdHtml = htmlItem.getElementsByTagName('h1').item(0);
const weekDate = wdHtml.textContent;

weekItem.weekDate = weekDate;

// get weekly Bible Reading
const wbHtml = htmlItem.getElementsByTagName('h2').item(0);
weekItem.weeklyBibleReading = wbHtml.textContent;

let src = '';
let cnLC = 0;

// get number of assignments in Apply Yourself Parts
const cnAYF = htmlItem
.querySelector('#section3')
.querySelectorAll('li').length;

// get number of assignments in Living as Christians Parts
const lcLiLength = htmlItem
.querySelector('#section4')
.querySelectorAll('li').length;
cnLC = lcLiLength === 6 ? 2 : 1;

// get elements with meeting schedule data: pGroup
const pGroupData = htmlItem.querySelectorAll('.pGroup');
pGroupData.forEach((pGroup) => {
let pgData = pGroup.querySelectorAll('p');
pgData.forEach((p) => {
src += '|' + p.textContent;
});
});

src.replace(/\u00A0/g, ' '); // remove non-breaking space
let toSplit = src.split('|');

// First song
weekItem.songFirst = toSplit[1].match(/(\d+)/)[0];

// 10min TGW Source
weekItem.tgw10Talk = toSplit[3].trim();

//Bible Reading Source
weekItem.tgwBRead = toSplit[7].trim();

// AYF Part Count
weekItem.ayfCount = cnAYF;

//AYF1 Source
weekItem.ayfPart1 = toSplit[8].trim();

if (cnAYF > 1) {
//AYF2 Source
weekItem.ayfPart2 = toSplit[9].trim();
}

if (cnAYF > 2) {
//AYF3 Source
weekItem.ayfPart3 = toSplit[10].trim();
}

if (cnAYF > 3) {
//AYF4 Source
weekItem.ayfPart4 = toSplit[11].trim();
}

// Middle song
let nextIndex = cnAYF > 3 ? 12 : cnAYF > 2 ? 11 : cnAYF > 1 ? 10 : 9;
weekItem.songMiddle = toSplit[nextIndex].match(/(\d+)/)[0];

// LC Part Count
weekItem.lcCount = cnLC;

// 1st LC part
nextIndex++;
weekItem.lcPart1 = toSplit[nextIndex].trim();

if (cnLC === 2) {
// 1st LC part
nextIndex++;
weekItem.lcPart2 = toSplit[nextIndex].trim();
}

// CBS Source
nextIndex++;
weekItem.lcCBS = toSplit[nextIndex].trim();

// Concluding Song
nextIndex++;
nextIndex++;
weekItem.songConclude = toSplit[nextIndex].match(/(\d+)/)[0];

weeksData.push(weekItem);
}

obj.weeksData = weeksData;

return obj;
};

export { loadEPUB };
2 changes: 0 additions & 2 deletions src/index.js → src/common.js
Original file line number Diff line number Diff line change
Expand Up @@ -260,5 +260,3 @@ const parseEpub = (htmlDocs) => {

return obj;
};

export { loadEPUB };
Loading

0 comments on commit 8e9af02

Please sign in to comment.