Skip to content

Commit

Permalink
feat(module): support parsing mwb starting 202401
Browse files Browse the repository at this point in the history
  • Loading branch information
rhahao authored Nov 6, 2023
1 parent e5d81f5 commit 303ec44
Show file tree
Hide file tree
Showing 10 changed files with 152 additions and 333 deletions.
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,6 @@ node_modules
dist
sample

.env
.env

*.epub
2 changes: 1 addition & 1 deletion example/sample.js
Original file line number Diff line number Diff line change
Expand Up @@ -204,13 +204,13 @@ export const fetchData = async (language, issue, pub) => {

if (issue && pub) {
const url = JW_CDN + new URLSearchParams({ langwritten: language, pub, output: 'json', issue });


const res = await fetch(url);

if (res.status === 200) {
const result = await res.json();
const hasEPUB = result.files[language].EPUB;

const issueFetch = { issueDate: issue, currentYear: issue.substring(0, 4), language, hasEPUB: hasEPUB };

data = await fetchIssueData(issueFetch);
Expand Down
30 changes: 1 addition & 29 deletions src/common/enhanced_parse_utils.js
Original file line number Diff line number Diff line change
@@ -1,13 +1,5 @@
import dateFormat from 'dateformat';
import {
extractAYFAssignment,
extractCBSSource,
extractLCAssignment,
extractMonthName,
extractTGWBibleReading,
extractTGWTalk,
extractWTStudyDate,
} from './parsing_rules.js';
import { extractMonthName, extractWTStudyDate } from './parsing_rules.js';

export const getMWBWeekDateEnhanced = (weekDate, mwbYear, lang) => {
const { varDay, monthIndex } = extractMonthName(weekDate, lang);
Expand All @@ -16,26 +8,6 @@ export const getMWBWeekDateEnhanced = (weekDate, mwbYear, lang) => {
return dateFormat(schedDate, 'yyyy/mm/dd');
};

export const getMWBTGWTalkEnhanced = (src, lang) => {
return extractTGWTalk(src, lang);
};

export const getMWBTGWBibleReadingEnhanced = (src, lang) => {
return extractTGWBibleReading(src, lang);
};

export const getMWBAYFEnhanced = (src, lang) => {
return extractAYFAssignment(src, lang);
};

export const getMWBLCEnhanced = (src, lang) => {
return extractLCAssignment(src, lang);
};

export const getMWBCBSEnhanced = (src, lang) => {
return extractCBSSource(src, lang);
};

export const getWTStudyDateEnhanced = (src, lang) => {
const { varDay, monthIndex, varYear } = extractWTStudyDate(src, lang);
const schedDate = new Date(varYear, monthIndex, varDay);
Expand Down
1 change: 0 additions & 1 deletion src/common/epub_validation.js
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@ export const isValidEPUBIssue = (input) => {
const issue = +epubFilename.split('_')[2].split('.epub')[0];

if (type === 'mwb' && issue < 202207) valid = false;
if (type === 'mwb' && issue >= 202401) valid = false;
if (type === 'w' && issue < 202304) valid = false;

return valid;
Expand Down
64 changes: 57 additions & 7 deletions src/common/html_utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -16,26 +16,76 @@ export const getMWBWeeklyBibleReading = (htmlItem) => {
};

export const getMWBAYFCount = (htmlItem) => {
return htmlItem.querySelector('#section3').querySelectorAll('li').length;
let count;

const testSection = htmlItem.querySelector('#section3');

// pre-2024 mwb
if (testSection) {
count = testSection.querySelectorAll('li').length;
}

// 2024 onward
if (!testSection) {
count = htmlItem.querySelectorAll('.du-color--gold-700').length - 1;
}

return count;
};

export const getMWBLCCount = (htmlItem) => {
const itemsCn = htmlItem.querySelector('#section4').querySelectorAll('li').length;
return itemsCn === 6 ? 2 : 1;
let count = 0;

const testSection = htmlItem.querySelector('#section4');

// pre-2024 mwb
if (testSection) {
count = testSection.querySelectorAll('li').length;
count = count === 6 ? 2 : 1;
}

// 2024 onward
if (testSection === null) {
count = htmlItem.querySelectorAll('h3.du-color--maroon-600').length - 1;
}

return count;
};

export const getMWBSources = (htmlItem) => {
let src = '';

// pre-2024 mwb
// get elements with meeting schedule data: pGroup
const pGroupData = htmlItem.querySelectorAll('.pGroup');
pGroupData.forEach((pGroup) => {
for (const pGroup of pGroupData) {
const liData = pGroup.querySelectorAll('li');
liData.forEach((li) => {
for (const li of liData) {
const firstP = li.querySelector('p');
src += '|' + firstP.textContent;
});
});
}
}

// 2024 onward
// get elements with meeting schedule data: h3
if (src.length === 0) {
const h3Texts = htmlItem.querySelectorAll('h3');

for (const h3 of h3Texts) {
src += '|' + h3.textContent;
const nextElement = h3.nextElementSibling;
if (nextElement) {
const tmp = nextElement.querySelector('.du-color--textSubdued');
if (tmp) {
const firstP = tmp.querySelector('p');
src += ' ' + firstP.textContent;
}
}
}

const sepBeforeBR = src.split('|', 5).join('|').length;
src = src.substring(0, sepBeforeBR) + '|junk|junk' + src.substring(sepBeforeBR);
}

src = src.replaceAll(/\u00A0/g, ' '); // remove non-breaking space

Expand Down
22 changes: 19 additions & 3 deletions src/common/html_validation.js
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,26 @@ export const getHTMLString = async (zip, filename) => {

export const isValidMWBSchedule = (htmlDoc) => {
let valid = false;
let isValidTGW = false;
let isValidAYF = false;
let isValidLC = false;

const isValidTGW = htmlDoc.querySelector(`[class*=treasures]`) ? true : false;
const isValidAYF = htmlDoc.querySelector(`[class*=ministry]`) ? true : false;
const isValidLC = htmlDoc.querySelector(`[class*=christianLiving]`) ? true : false;
// pre-2024 mwb

isValidTGW = htmlDoc.querySelector(`[class*=treasures]`) ? true : false;
if (isValidTGW) {
isValidAYF = htmlDoc.querySelector(`[class*=ministry]`) ? true : false;
isValidLC = htmlDoc.querySelector(`[class*=christianLiving]`) ? true : false;
}

// 2024 onward
if (!isValidTGW) {
isValidTGW = htmlDoc.querySelector('.du-color--teal-700') ? true : false;
if (isValidTGW) {
isValidAYF = htmlDoc.querySelector('.du-color--gold-700') ? true : false;
isValidLC = htmlDoc.querySelector('.du-color--maroon-600') ? true : false;
}
}

if (isValidTGW === true && isValidAYF === true && isValidLC === true) {
valid = true;
Expand Down
51 changes: 16 additions & 35 deletions src/common/language_rules.js
Original file line number Diff line number Diff line change
@@ -1,43 +1,24 @@
const languages = window.jw_epub_parser.languages;

export const getMonthNames = (lang) => {
return [
{ index: 0, name: languages[lang].januaryVariations },
{ index: 1, name: languages[lang].februaryVariations },
{ index: 2, name: languages[lang].marchVariations },
{ index: 3, name: languages[lang].aprilVariations },
{ index: 4, name: languages[lang].mayVariations },
{ index: 5, name: languages[lang].juneVariations },
{ index: 6, name: languages[lang].julyVariations },
{ index: 7, name: languages[lang].augustVariations },
{ index: 8, name: languages[lang].septemberVariations },
{ index: 9, name: languages[lang].octoberVariations },
{ index: 10, name: languages[lang].novemberVariations },
{ index: 11, name: languages[lang].decemberVariations },
];
return [
{ index: 0, name: languages[lang].januaryVariations },
{ index: 1, name: languages[lang].februaryVariations },
{ index: 2, name: languages[lang].marchVariations },
{ index: 3, name: languages[lang].aprilVariations },
{ index: 4, name: languages[lang].mayVariations },
{ index: 5, name: languages[lang].juneVariations },
{ index: 6, name: languages[lang].julyVariations },
{ index: 7, name: languages[lang].augustVariations },
{ index: 8, name: languages[lang].septemberVariations },
{ index: 9, name: languages[lang].octoberVariations },
{ index: 10, name: languages[lang].novemberVariations },
{ index: 11, name: languages[lang].decemberVariations },
];
};

export const getTGWTalkVariations = (lang) => languages[lang].tgwTalk10Variations;

export const getTGWBibleReadingVariations = (lang) => languages[lang].tgwBibleReadingVariations;

export const getAssignmentsName = (lang) => [
languages[lang].initialCallVideoVariations,
languages[lang].returnVisitVideoVariations,
languages[lang].memorialInvitationVideoVariations,
languages[lang].initialCallVariations,
languages[lang].returnVisitVariations,
languages[lang].bibleStudyVariations,
languages[lang].talkVariations,
languages[lang].memorialInvitationVariations,
];

export const getAssignmentsVariations = (lang) => languages[lang].assignmentAyfVariations;

export const getLivingPartsVariations = (lang) => languages[lang].assignmentLcVariations;

export const getCBSVariations = (lang) => languages[lang].cbsVariations;

export const getConcludingSongFormat = (lang) => languages[lang].concludingSongVariations;

export const getStudyArticleDateVariations = (lang) => languages[lang].studyArticleDateVariations;

export const getPartMinutesSeparatorVariations = (lang) => languages[lang].partMinutesSeparatorVariations;
42 changes: 17 additions & 25 deletions src/common/parser.js
Original file line number Diff line number Diff line change
@@ -1,13 +1,5 @@
import languages from '../locales/languages.js';
import {
getMWBAYFEnhanced,
getMWBCBSEnhanced,
getMWBLCEnhanced,
getMWBTGWBibleReadingEnhanced,
getMWBTGWTalkEnhanced,
getMWBWeekDateEnhanced,
getWTStudyDateEnhanced,
} from './enhanced_parse_utils.js';
import { getMWBWeekDateEnhanced, getWTStudyDateEnhanced } from './enhanced_parse_utils.js';
import { extractEPUBFiles, getHTMLDocs, validateEPUBContents } from './epub_jszip.js';
import {
getEPUBData,
Expand All @@ -30,7 +22,7 @@ import {
getWStudyDate,
getWStudyTitle,
} from './html_utils.js';
import { extractLastSong, extractSongNumber } from './parsing_rules.js';
import { extractLastSong, extractSongNumber, extractSourceEnhanced } from './parsing_rules.js';

export const startParse = async (epubInput) => {
let result = {};
Expand Down Expand Up @@ -128,15 +120,15 @@ export const parseMWBSchedule = (htmlItem, mwbYear, mwbLang) => {
// 10min TGW Source
tmpSrc = splits[3].trim();
if (isEnhancedParsing) {
weekItem.mwb_tgw_talk = getMWBTGWTalkEnhanced(tmpSrc, mwbLang);
weekItem.mwb_tgw_talk = extractSourceEnhanced(tmpSrc, mwbLang).type;
} else {
weekItem.mwb_tgw_talk = tmpSrc;
}

//Bible Reading Source
tmpSrc = splits[7].trim();
if (isEnhancedParsing) {
weekItem.mwb_tgw_bread = getMWBTGWBibleReadingEnhanced(tmpSrc, mwbLang);
weekItem.mwb_tgw_bread = extractSourceEnhanced(tmpSrc, mwbLang).src;
} else {
weekItem.mwb_tgw_bread = tmpSrc;
}
Expand All @@ -150,7 +142,7 @@ export const parseMWBSchedule = (htmlItem, mwbYear, mwbLang) => {
//AYF1 Source
tmpSrc = splits[8].trim();
if (isEnhancedParsing) {
const partEnhanced = getMWBAYFEnhanced(tmpSrc, mwbLang);
const partEnhanced = extractSourceEnhanced(tmpSrc, mwbLang);
weekItem.mwb_ayf_part1 = partEnhanced.src;
weekItem.mwb_ayf_part1_time = partEnhanced.time;
weekItem.mwb_ayf_part1_type = partEnhanced.type;
Expand All @@ -162,7 +154,7 @@ export const parseMWBSchedule = (htmlItem, mwbYear, mwbLang) => {
if (cnAYF > 1) {
tmpSrc = splits[9].trim();
if (isEnhancedParsing) {
const partEnhanced = getMWBAYFEnhanced(tmpSrc, mwbLang);
const partEnhanced = extractSourceEnhanced(tmpSrc, mwbLang);
weekItem.mwb_ayf_part2 = partEnhanced.src;
weekItem.mwb_ayf_part2_time = partEnhanced.time;
weekItem.mwb_ayf_part2_type = partEnhanced.type;
Expand All @@ -175,7 +167,7 @@ export const parseMWBSchedule = (htmlItem, mwbYear, mwbLang) => {
if (cnAYF > 2) {
tmpSrc = splits[10].trim();
if (isEnhancedParsing) {
const partEnhanced = getMWBAYFEnhanced(tmpSrc, mwbLang);
const partEnhanced = extractSourceEnhanced(tmpSrc, mwbLang);
weekItem.mwb_ayf_part3 = partEnhanced.src;
weekItem.mwb_ayf_part3_time = partEnhanced.time;
weekItem.mwb_ayf_part3_type = partEnhanced.type;
Expand All @@ -188,7 +180,7 @@ export const parseMWBSchedule = (htmlItem, mwbYear, mwbLang) => {
if (cnAYF > 3) {
tmpSrc = splits[11].trim();
if (isEnhancedParsing) {
const partEnhanced = getMWBAYFEnhanced(tmpSrc, mwbLang);
const partEnhanced = extractSourceEnhanced(tmpSrc, mwbLang);
weekItem.mwb_ayf_part4 = partEnhanced.src;
weekItem.mwb_ayf_part4_time = partEnhanced.time;
weekItem.mwb_ayf_part4_type = partEnhanced.type;
Expand All @@ -212,11 +204,11 @@ export const parseMWBSchedule = (htmlItem, mwbYear, mwbLang) => {

tmpSrc = splits[nextIndex].trim();
if (isEnhancedParsing) {
const lcEnhanced = getMWBLCEnhanced(tmpSrc, mwbLang);
weekItem.mwb_lc_part1 = lcEnhanced.title;
const lcEnhanced = extractSourceEnhanced(tmpSrc, mwbLang);
weekItem.mwb_lc_part1 = lcEnhanced.type;
weekItem.mwb_lc_part1_time = lcEnhanced.time;
if (lcEnhanced.content && lcEnhanced.content !== '') {
weekItem.mwb_lc_part1_content = lcEnhanced.content;
if (lcEnhanced.src && lcEnhanced.src !== '') {
weekItem.mwb_lc_part1_content = lcEnhanced.src;
}
} else {
weekItem.mwb_lc_part1 = tmpSrc;
Expand All @@ -228,11 +220,11 @@ export const parseMWBSchedule = (htmlItem, mwbYear, mwbLang) => {
tmpSrc = splits[nextIndex].trim();

if (isEnhancedParsing) {
const lcEnhanced = getMWBLCEnhanced(tmpSrc, mwbLang);
weekItem.mwb_lc_part2 = lcEnhanced.title;
const lcEnhanced = extractSourceEnhanced(tmpSrc, mwbLang);
weekItem.mwb_lc_part2 = lcEnhanced.type;
weekItem.mwb_lc_part2_time = lcEnhanced.time;
if (lcEnhanced.content && lcEnhanced.content !== '') {
weekItem.mwb_lc_part2_content = lcEnhanced.content;
if (lcEnhanced.src && lcEnhanced.src !== '') {
weekItem.mwb_lc_part2_content = lcEnhanced.src;
}
} else {
weekItem.mwb_lc_part2 = tmpSrc;
Expand All @@ -244,7 +236,7 @@ export const parseMWBSchedule = (htmlItem, mwbYear, mwbLang) => {
tmpSrc = splits[nextIndex].trim();

if (isEnhancedParsing) {
weekItem.mwb_lc_cbs = getMWBCBSEnhanced(tmpSrc, mwbLang);
weekItem.mwb_lc_cbs = extractSourceEnhanced(tmpSrc, mwbLang).src;
} else {
weekItem.mwb_lc_cbs = tmpSrc;
}
Expand Down
Loading

0 comments on commit 303ec44

Please sign in to comment.