diff --git a/README.md b/README.md index 88eec8e8..3200a887 100644 --- a/README.md +++ b/README.md @@ -20,20 +20,6 @@ import { loadEPUB } from 'jw-epub-parser'; const epubJW = await loadEPUB('/path/to/file.epub'); ``` -If you want to use this module in a node apps, first do the following: - -- Install jsdom and global-jsdom - -```js -npm i jsdom global-jsdom -``` - -- Enables DOM in Node.js using global-jsdom - -```js -import 'global-jsdom/register'; -``` - ### loadEPUB(epubData) function: asynchronous diff --git a/package-lock.json b/package-lock.json index 8cb6ce85..ef633982 100644 --- a/package-lock.json +++ b/package-lock.json @@ -9,7 +9,8 @@ "version": "1.7.1", "license": "MIT", "dependencies": { - "jszip": "^3.9.1" + "jszip": "^3.9.1", + "path-browserify": "^1.0.1" }, "devDependencies": { "@babel/preset-env": "^7.16.11", @@ -7936,6 +7937,11 @@ "integrity": "sha512-Ofn/CTFzRGTTxwpNEs9PP93gXShHcTq255nzRYSKe8AkVpZY7e1fpmTfOyoIvjP5HG7Z2ZM7VS9PPhQGW2pOpw==", "dev": true }, + "node_modules/path-browserify": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/path-browserify/-/path-browserify-1.0.1.tgz", + "integrity": "sha512-b7uo2UCUOYZcnF/3ID0lulOJi/bafxa1xPe7ZPsammBSpjSWQkjNxlt635YGS2MiR9GjvuXCtz2emr3jbsz98g==" + }, "node_modules/path-exists": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/path-exists/-/path-exists-4.0.0.tgz", @@ -15518,6 +15524,11 @@ "integrity": "sha512-Ofn/CTFzRGTTxwpNEs9PP93gXShHcTq255nzRYSKe8AkVpZY7e1fpmTfOyoIvjP5HG7Z2ZM7VS9PPhQGW2pOpw==", "dev": true }, + "path-browserify": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/path-browserify/-/path-browserify-1.0.1.tgz", + "integrity": "sha512-b7uo2UCUOYZcnF/3ID0lulOJi/bafxa1xPe7ZPsammBSpjSWQkjNxlt635YGS2MiR9GjvuXCtz2emr3jbsz98g==" + }, "path-exists": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/path-exists/-/path-exists-4.0.0.tgz", diff --git a/package.json b/package.json index ef36971b..f7a335c5 100644 --- a/package.json +++ b/package.json @@ -46,6 +46,7 @@ "rollup": "^2.69.0" }, "dependencies": { - "jszip": "^3.9.1" + "jszip": "^3.9.1", + "path-browserify": "^1.0.1" } } diff --git a/rollup.config.js b/rollup.config.js index da311a92..5ad788e7 100644 --- a/rollup.config.js +++ b/rollup.config.js @@ -1,21 +1,41 @@ import commonjs from '@rollup/plugin-commonjs'; import { nodeResolve } from '@rollup/plugin-node-resolve'; -const config = { - input: 'src/index.js', - output: [ - { - file: 'dist/index.js', - format: 'es', - }, - { - file: 'dist/index.cjs', - format: 'cjs', - exports: 'named', - }, - ], - plugins: [nodeResolve(), commonjs()], - external: ['jszip', 'jsdom'], -}; +const config = [ + { + input: 'src/browser.js', + output: [ + { + file: 'dist/index.js', + format: 'es', + }, + ], + plugins: [nodeResolve(), commonjs()], + external: ['jszip', 'path-browserify'], + }, + { + input: 'src/node.js', + output: [ + { + file: 'dist/index.node.js', + format: 'es', + }, + ], + plugins: [nodeResolve(), commonjs()], + external: ['jszip', 'global-jsdom/register'], + }, + { + input: 'src/node.js', + output: [ + { + file: 'dist/index.node.cjs', + format: 'cjs', + exports: 'named', + }, + ], + plugins: [nodeResolve(), commonjs()], + external: ['jszip', 'global-jsdom/register'], + }, +]; export default config; diff --git a/src/browser.js b/src/browser.js new file mode 100644 index 00000000..da88b13a --- /dev/null +++ b/src/browser.js @@ -0,0 +1,244 @@ +import JSZip from 'jszip'; +import * as path from 'path-browserify'; + +let validMwbFiles = []; +let mwbYear; + +const loadEPUB = async (epubInput) => { + // check if we receive path or blob + let data; + if (epubInput.name) { + if (isValidEpubNaming(epubInput.name)) { + mwbYear = epubInput.name.split('_')[2].substring(0, 4); + data = epubInput; // blob + } else { + throw new Error('The selected epub file has an incorrect naming.'); + } + } else { + throw new Error( + 'You are using the browser version of the module. Please switch to the node version if needed' + ); + } + + const doParsing = () => { + return new Promise((resolve, reject) => { + JSZip.loadAsync(data).then(async (zip) => { + await initEpub(zip); + + if (validMwbFiles.length === 0) { + reject( + 'The file you provided is not a valid Meeting Workbook EPUB file. Please make sure that the file is correct.' + ); + } else { + resolve(parseEpub(validMwbFiles)); + } + }); + }); + }; + + const result = await doParsing(); + return result; +}; + +const isValidEpubNaming = (name) => { + let regex = /^mwb_[A-Z][A-Z]?[A-Z]?_202\d(0[1-9]|1[0-2])\.epub$/i; + return regex.test(name); +}; + +const initEpub = async (zip) => { + const MAX_FILES = 300; + const MAX_SIZE = 20000000; // 20 MO + + let fileCount = 0; + let totalSize = 0; + let targetDirectory = 'archive_tmp'; + + for (const file in zip.files) { + fileCount++; + if (fileCount > MAX_FILES) { + while (validMwbFiles.length > 0) { + validMwbFiles.pop(); + } + throw new Error('Reached max. number of files'); + } + + // Prevent ZipSlip path traversal (S6096) + const resolvedPath = path.join(targetDirectory, file); + if (!resolvedPath.startsWith(targetDirectory)) { + while (validMwbFiles.length > 0) { + validMwbFiles.pop(); + } + throw new Error('Path traversal detected'); + } + + const contentSize = await zip.file(file).async('nodebuffer'); + totalSize += contentSize.length; + if (totalSize > MAX_SIZE) { + while (validMwbFiles.length > 0) { + validMwbFiles.pop(); + } + throw new Error('Reached max. size'); + } + + if (isValidFilename(file)) { + const content = await getHtmlRawString(zip, file); + + const parser = new window.DOMParser(); + const htmlDoc = parser.parseFromString(content, 'text/html'); + + if (isValidMwbSched(htmlDoc)) { + validMwbFiles.push(htmlDoc); + } + } + } +}; + +const isValidFilename = (name) => { + if (name.startsWith('OEBPS') && name.endsWith('.xhtml')) { + const fileName = name.split('/')[1].split('.')[0]; + if (!isNaN(fileName)) { + return true; + } else { + return false; + } + } else { + return false; + } +}; + +const getHtmlRawString = async (zip, filename) => { + const content = await zip.file(filename).async('string'); + + return content; +}; + +const isValidMwbSched = (htmlDoc) => { + const isValidTGW = htmlDoc.querySelector(`[class*=treasures]`) ? true : false; + const isValidAYF = htmlDoc.querySelector(`[class*=ministry]`) ? true : false; + const isValidLC = htmlDoc.querySelector(`[class*=christianLiving]`) + ? true + : false; + + if (isValidTGW === true && isValidAYF === true && isValidLC === true) { + return true; + } else { + return false; + } +}; + +const parseEpub = (htmlDocs) => { + let obj = {}; + let weeksData = []; + let weeksCount; + + weeksCount = htmlDocs.length; + + obj.weeksCount = weeksCount; + obj.mwbYear = mwbYear; + + for (let a = 0; a < weeksCount; a++) { + let weekItem = {}; + + const htmlItem = htmlDocs[a]; + + // get week date + const wdHtml = htmlItem.getElementsByTagName('h1').item(0); + const weekDate = wdHtml.textContent; + + weekItem.weekDate = weekDate; + + // get weekly Bible Reading + const wbHtml = htmlItem.getElementsByTagName('h2').item(0); + weekItem.weeklyBibleReading = wbHtml.textContent; + + let src = ''; + let cnLC = 0; + + // get number of assignments in Apply Yourself Parts + const cnAYF = htmlItem + .querySelector('#section3') + .querySelectorAll('li').length; + + // get number of assignments in Living as Christians Parts + const lcLiLength = htmlItem + .querySelector('#section4') + .querySelectorAll('li').length; + cnLC = lcLiLength === 6 ? 2 : 1; + + // get elements with meeting schedule data: pGroup + const pGroupData = htmlItem.querySelectorAll('.pGroup'); + pGroupData.forEach((pGroup) => { + let pgData = pGroup.querySelectorAll('p'); + pgData.forEach((p) => { + src += '|' + p.textContent; + }); + }); + + src.replace(/\u00A0/g, ' '); // remove non-breaking space + let toSplit = src.split('|'); + + // First song + weekItem.songFirst = toSplit[1].match(/(\d+)/)[0]; + + // 10min TGW Source + weekItem.tgw10Talk = toSplit[3].trim(); + + //Bible Reading Source + weekItem.tgwBRead = toSplit[7].trim(); + + // AYF Part Count + weekItem.ayfCount = cnAYF; + + //AYF1 Source + weekItem.ayfPart1 = toSplit[8].trim(); + + if (cnAYF > 1) { + //AYF2 Source + weekItem.ayfPart2 = toSplit[9].trim(); + } + + if (cnAYF > 2) { + //AYF3 Source + weekItem.ayfPart3 = toSplit[10].trim(); + } + + if (cnAYF > 3) { + //AYF4 Source + weekItem.ayfPart4 = toSplit[11].trim(); + } + + // Middle song + let nextIndex = cnAYF > 3 ? 12 : cnAYF > 2 ? 11 : cnAYF > 1 ? 10 : 9; + weekItem.songMiddle = toSplit[nextIndex].match(/(\d+)/)[0]; + + // LC Part Count + weekItem.lcCount = cnLC; + + // 1st LC part + nextIndex++; + weekItem.lcPart1 = toSplit[nextIndex].trim(); + + if (cnLC === 2) { + // 1st LC part + nextIndex++; + weekItem.lcPart2 = toSplit[nextIndex].trim(); + } + + // CBS Source + nextIndex++; + weekItem.lcCBS = toSplit[nextIndex].trim(); + + // Concluding Song + nextIndex++; + nextIndex++; + weekItem.songConclude = toSplit[nextIndex].match(/(\d+)/)[0]; + + weeksData.push(weekItem); + } + + obj.weeksData = weeksData; + + return obj; +}; + +export { loadEPUB }; diff --git a/src/index.js b/src/common.js similarity index 99% rename from src/index.js rename to src/common.js index 4ea76930..7d97b50a 100644 --- a/src/index.js +++ b/src/common.js @@ -260,5 +260,3 @@ const parseEpub = (htmlDocs) => { return obj; }; - -export { loadEPUB }; diff --git a/src/node.js b/src/node.js new file mode 100644 index 00000000..2587e94b --- /dev/null +++ b/src/node.js @@ -0,0 +1,265 @@ +import 'global-jsdom/register'; +import JSZip from 'jszip'; +import * as fs from 'fs'; +import * as path from 'path'; + +let validMwbFiles = []; +let mwbYear; + +const loadEPUB = async (epubInput) => { + // check if we receive path or blob + let data; + if (epubInput.name) { + if (isValidEpubNaming(epubInput.name)) { + mwbYear = epubInput.name.split('_')[2].substring(0, 4); + data = epubInput; // blob + } else { + throw new Error('The selected epub file has an incorrect naming.'); + } + } else { + const file = path.basename(epubInput); + + if (isValidEpubNaming(file)) { + data = epubInput; // blob + mwbYear = file.split('_')[2].substring(0, 4); + } else { + throw new Error('The selected epub file has an incorrect naming.'); + } + + const getDataFromPath = () => { + return new Promise((resolve, reject) => { + fs.readFile(epubInput, (err, data) => { + if (err) { + reject(err); + } else { + resolve(data); + } + }); + }); + }; + + data = await getDataFromPath(); // path + } + + const doParsing = () => { + return new Promise((resolve, reject) => { + JSZip.loadAsync(data).then(async (zip) => { + await initEpub(zip); + + if (validMwbFiles.length === 0) { + reject( + 'The file you provided is not a valid Meeting Workbook EPUB file. Please make sure that the file is correct.' + ); + } else { + resolve(parseEpub(validMwbFiles)); + } + }); + }); + }; + + const result = await doParsing(); + return result; +}; + +const isValidEpubNaming = (name) => { + let regex = /^mwb_[A-Z][A-Z]?[A-Z]?_202\d(0[1-9]|1[0-2])\.epub$/i; + return regex.test(name); +}; + +const initEpub = async (zip) => { + const MAX_FILES = 300; + const MAX_SIZE = 20000000; // 20 MO + + let fileCount = 0; + let totalSize = 0; + let targetDirectory = 'archive_tmp'; + + for (const file in zip.files) { + fileCount++; + if (fileCount > MAX_FILES) { + while (validMwbFiles.length > 0) { + validMwbFiles.pop(); + } + throw new Error('Reached max. number of files'); + } + + // Prevent ZipSlip path traversal (S6096) + const resolvedPath = path.join(targetDirectory, file); + if (!resolvedPath.startsWith(targetDirectory)) { + while (validMwbFiles.length > 0) { + validMwbFiles.pop(); + } + throw new Error('Path traversal detected'); + } + + const contentSize = await zip.file(file).async('nodebuffer'); + totalSize += contentSize.length; + if (totalSize > MAX_SIZE) { + while (validMwbFiles.length > 0) { + validMwbFiles.pop(); + } + throw new Error('Reached max. size'); + } + + if (isValidFilename(file)) { + const content = await getHtmlRawString(zip, file); + + const parser = new window.DOMParser(); + const htmlDoc = parser.parseFromString(content, 'text/html'); + + if (isValidMwbSched(htmlDoc)) { + validMwbFiles.push(htmlDoc); + } + } + } +}; + +const isValidFilename = (name) => { + if (name.startsWith('OEBPS') && name.endsWith('.xhtml')) { + const fileName = name.split('/')[1].split('.')[0]; + if (!isNaN(fileName)) { + return true; + } else { + return false; + } + } else { + return false; + } +}; + +const getHtmlRawString = async (zip, filename) => { + const content = await zip.file(filename).async('string'); + + return content; +}; + +const isValidMwbSched = (htmlDoc) => { + const isValidTGW = htmlDoc.querySelector(`[class*=treasures]`) ? true : false; + const isValidAYF = htmlDoc.querySelector(`[class*=ministry]`) ? true : false; + const isValidLC = htmlDoc.querySelector(`[class*=christianLiving]`) + ? true + : false; + + if (isValidTGW === true && isValidAYF === true && isValidLC === true) { + return true; + } else { + return false; + } +}; + +const parseEpub = (htmlDocs) => { + let obj = {}; + let weeksData = []; + let weeksCount; + + weeksCount = htmlDocs.length; + + obj.weeksCount = weeksCount; + obj.mwbYear = mwbYear; + + for (let a = 0; a < weeksCount; a++) { + let weekItem = {}; + + const htmlItem = htmlDocs[a]; + + // get week date + const wdHtml = htmlItem.getElementsByTagName('h1').item(0); + const weekDate = wdHtml.textContent; + + weekItem.weekDate = weekDate; + + // get weekly Bible Reading + const wbHtml = htmlItem.getElementsByTagName('h2').item(0); + weekItem.weeklyBibleReading = wbHtml.textContent; + + let src = ''; + let cnLC = 0; + + // get number of assignments in Apply Yourself Parts + const cnAYF = htmlItem + .querySelector('#section3') + .querySelectorAll('li').length; + + // get number of assignments in Living as Christians Parts + const lcLiLength = htmlItem + .querySelector('#section4') + .querySelectorAll('li').length; + cnLC = lcLiLength === 6 ? 2 : 1; + + // get elements with meeting schedule data: pGroup + const pGroupData = htmlItem.querySelectorAll('.pGroup'); + pGroupData.forEach((pGroup) => { + let pgData = pGroup.querySelectorAll('p'); + pgData.forEach((p) => { + src += '|' + p.textContent; + }); + }); + + src.replace(/\u00A0/g, ' '); // remove non-breaking space + let toSplit = src.split('|'); + + // First song + weekItem.songFirst = toSplit[1].match(/(\d+)/)[0]; + + // 10min TGW Source + weekItem.tgw10Talk = toSplit[3].trim(); + + //Bible Reading Source + weekItem.tgwBRead = toSplit[7].trim(); + + // AYF Part Count + weekItem.ayfCount = cnAYF; + + //AYF1 Source + weekItem.ayfPart1 = toSplit[8].trim(); + + if (cnAYF > 1) { + //AYF2 Source + weekItem.ayfPart2 = toSplit[9].trim(); + } + + if (cnAYF > 2) { + //AYF3 Source + weekItem.ayfPart3 = toSplit[10].trim(); + } + + if (cnAYF > 3) { + //AYF4 Source + weekItem.ayfPart4 = toSplit[11].trim(); + } + + // Middle song + let nextIndex = cnAYF > 3 ? 12 : cnAYF > 2 ? 11 : cnAYF > 1 ? 10 : 9; + weekItem.songMiddle = toSplit[nextIndex].match(/(\d+)/)[0]; + + // LC Part Count + weekItem.lcCount = cnLC; + + // 1st LC part + nextIndex++; + weekItem.lcPart1 = toSplit[nextIndex].trim(); + + if (cnLC === 2) { + // 1st LC part + nextIndex++; + weekItem.lcPart2 = toSplit[nextIndex].trim(); + } + + // CBS Source + nextIndex++; + weekItem.lcCBS = toSplit[nextIndex].trim(); + + // Concluding Song + nextIndex++; + nextIndex++; + weekItem.songConclude = toSplit[nextIndex].match(/(\d+)/)[0]; + + weeksData.push(weekItem); + } + + obj.weeksData = weeksData; + + return obj; +}; + +export { loadEPUB }; diff --git a/test/index.test.mjs b/test/index.test.mjs index 5024cbac..f8a6670a 100644 --- a/test/index.test.mjs +++ b/test/index.test.mjs @@ -1,6 +1,5 @@ -import 'global-jsdom/register'; import { expect } from 'chai'; -import { loadEPUB } from '../dist/index.cjs'; +import { loadEPUB } from '../dist/index.node.js'; const expData = { mwbYear: '2021',