Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(module): support parsing mwb starting 202401 #527

Merged
merged 2 commits into from
Nov 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,6 @@ node_modules
dist
sample

.env
.env

*.epub
2 changes: 1 addition & 1 deletion example/sample.js
Original file line number Diff line number Diff line change
Expand Up @@ -204,13 +204,13 @@ export const fetchData = async (language, issue, pub) => {

if (issue && pub) {
const url = JW_CDN + new URLSearchParams({ langwritten: language, pub, output: 'json', issue });


const res = await fetch(url);

if (res.status === 200) {
const result = await res.json();
const hasEPUB = result.files[language].EPUB;

const issueFetch = { issueDate: issue, currentYear: issue.substring(0, 4), language, hasEPUB: hasEPUB };

data = await fetchIssueData(issueFetch);
Expand Down
30 changes: 1 addition & 29 deletions src/common/enhanced_parse_utils.js
Original file line number Diff line number Diff line change
@@ -1,13 +1,5 @@
import dateFormat from 'dateformat';
import {
extractAYFAssignment,
extractCBSSource,
extractLCAssignment,
extractMonthName,
extractTGWBibleReading,
extractTGWTalk,
extractWTStudyDate,
} from './parsing_rules.js';
import { extractMonthName, extractWTStudyDate } from './parsing_rules.js';

export const getMWBWeekDateEnhanced = (weekDate, mwbYear, lang) => {
const { varDay, monthIndex } = extractMonthName(weekDate, lang);
Expand All @@ -16,26 +8,6 @@ export const getMWBWeekDateEnhanced = (weekDate, mwbYear, lang) => {
return dateFormat(schedDate, 'yyyy/mm/dd');
};

export const getMWBTGWTalkEnhanced = (src, lang) => {
return extractTGWTalk(src, lang);
};

export const getMWBTGWBibleReadingEnhanced = (src, lang) => {
return extractTGWBibleReading(src, lang);
};

export const getMWBAYFEnhanced = (src, lang) => {
return extractAYFAssignment(src, lang);
};

export const getMWBLCEnhanced = (src, lang) => {
return extractLCAssignment(src, lang);
};

export const getMWBCBSEnhanced = (src, lang) => {
return extractCBSSource(src, lang);
};

export const getWTStudyDateEnhanced = (src, lang) => {
const { varDay, monthIndex, varYear } = extractWTStudyDate(src, lang);
const schedDate = new Date(varYear, monthIndex, varDay);
Expand Down
1 change: 0 additions & 1 deletion src/common/epub_validation.js
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@ export const isValidEPUBIssue = (input) => {
const issue = +epubFilename.split('_')[2].split('.epub')[0];

if (type === 'mwb' && issue < 202207) valid = false;
if (type === 'mwb' && issue >= 202401) valid = false;
if (type === 'w' && issue < 202304) valid = false;

return valid;
Expand Down
64 changes: 57 additions & 7 deletions src/common/html_utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -16,26 +16,76 @@ export const getMWBWeeklyBibleReading = (htmlItem) => {
};

export const getMWBAYFCount = (htmlItem) => {
return htmlItem.querySelector('#section3').querySelectorAll('li').length;
let count;

const testSection = htmlItem.querySelector('#section3');

// pre-2024 mwb
if (testSection) {
count = testSection.querySelectorAll('li').length;
}

// 2024 onward
if (!testSection) {
count = htmlItem.querySelectorAll('.du-color--gold-700').length - 1;
}

return count;
};

export const getMWBLCCount = (htmlItem) => {
const itemsCn = htmlItem.querySelector('#section4').querySelectorAll('li').length;
return itemsCn === 6 ? 2 : 1;
let count = 0;

const testSection = htmlItem.querySelector('#section4');

// pre-2024 mwb
if (testSection) {
count = testSection.querySelectorAll('li').length;
count = count === 6 ? 2 : 1;
}

// 2024 onward
if (testSection === null) {
count = htmlItem.querySelectorAll('h3.du-color--maroon-600').length - 1;
}

return count;
};

export const getMWBSources = (htmlItem) => {
let src = '';

// pre-2024 mwb
// get elements with meeting schedule data: pGroup
const pGroupData = htmlItem.querySelectorAll('.pGroup');
pGroupData.forEach((pGroup) => {
for (const pGroup of pGroupData) {
const liData = pGroup.querySelectorAll('li');
liData.forEach((li) => {
for (const li of liData) {
const firstP = li.querySelector('p');
src += '|' + firstP.textContent;
});
});
}
}

// 2024 onward
// get elements with meeting schedule data: h3
if (src.length === 0) {
const h3Texts = htmlItem.querySelectorAll('h3');

for (const h3 of h3Texts) {
src += '|' + h3.textContent;
const nextElement = h3.nextElementSibling;
if (nextElement) {
const tmp = nextElement.querySelector('.du-color--textSubdued');
if (tmp) {
const firstP = tmp.querySelector('p');
src += ' ' + firstP.textContent;
}
}
}

const sepBeforeBR = src.split('|', 5).join('|').length;
src = src.substring(0, sepBeforeBR) + '|junk|junk' + src.substring(sepBeforeBR);
}

src = src.replaceAll(/\u00A0/g, ' '); // remove non-breaking space

Expand Down
22 changes: 19 additions & 3 deletions src/common/html_validation.js
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,26 @@ export const getHTMLString = async (zip, filename) => {

export const isValidMWBSchedule = (htmlDoc) => {
let valid = false;
let isValidTGW = false;
let isValidAYF = false;
let isValidLC = false;

const isValidTGW = htmlDoc.querySelector(`[class*=treasures]`) ? true : false;
const isValidAYF = htmlDoc.querySelector(`[class*=ministry]`) ? true : false;
const isValidLC = htmlDoc.querySelector(`[class*=christianLiving]`) ? true : false;
// pre-2024 mwb

isValidTGW = htmlDoc.querySelector(`[class*=treasures]`) ? true : false;
if (isValidTGW) {
isValidAYF = htmlDoc.querySelector(`[class*=ministry]`) ? true : false;
isValidLC = htmlDoc.querySelector(`[class*=christianLiving]`) ? true : false;
}

// 2024 onward
if (!isValidTGW) {
isValidTGW = htmlDoc.querySelector('.du-color--teal-700') ? true : false;
if (isValidTGW) {
isValidAYF = htmlDoc.querySelector('.du-color--gold-700') ? true : false;
isValidLC = htmlDoc.querySelector('.du-color--maroon-600') ? true : false;
}
}

if (isValidTGW === true && isValidAYF === true && isValidLC === true) {
valid = true;
Expand Down
51 changes: 16 additions & 35 deletions src/common/language_rules.js
Original file line number Diff line number Diff line change
@@ -1,43 +1,24 @@
const languages = window.jw_epub_parser.languages;

export const getMonthNames = (lang) => {
return [
{ index: 0, name: languages[lang].januaryVariations },
{ index: 1, name: languages[lang].februaryVariations },
{ index: 2, name: languages[lang].marchVariations },
{ index: 3, name: languages[lang].aprilVariations },
{ index: 4, name: languages[lang].mayVariations },
{ index: 5, name: languages[lang].juneVariations },
{ index: 6, name: languages[lang].julyVariations },
{ index: 7, name: languages[lang].augustVariations },
{ index: 8, name: languages[lang].septemberVariations },
{ index: 9, name: languages[lang].octoberVariations },
{ index: 10, name: languages[lang].novemberVariations },
{ index: 11, name: languages[lang].decemberVariations },
];
return [
{ index: 0, name: languages[lang].januaryVariations },
{ index: 1, name: languages[lang].februaryVariations },
{ index: 2, name: languages[lang].marchVariations },
{ index: 3, name: languages[lang].aprilVariations },
{ index: 4, name: languages[lang].mayVariations },
{ index: 5, name: languages[lang].juneVariations },
{ index: 6, name: languages[lang].julyVariations },
{ index: 7, name: languages[lang].augustVariations },
{ index: 8, name: languages[lang].septemberVariations },
{ index: 9, name: languages[lang].octoberVariations },
{ index: 10, name: languages[lang].novemberVariations },
{ index: 11, name: languages[lang].decemberVariations },
];
};

export const getTGWTalkVariations = (lang) => languages[lang].tgwTalk10Variations;

export const getTGWBibleReadingVariations = (lang) => languages[lang].tgwBibleReadingVariations;

export const getAssignmentsName = (lang) => [
languages[lang].initialCallVideoVariations,
languages[lang].returnVisitVideoVariations,
languages[lang].memorialInvitationVideoVariations,
languages[lang].initialCallVariations,
languages[lang].returnVisitVariations,
languages[lang].bibleStudyVariations,
languages[lang].talkVariations,
languages[lang].memorialInvitationVariations,
];

export const getAssignmentsVariations = (lang) => languages[lang].assignmentAyfVariations;

export const getLivingPartsVariations = (lang) => languages[lang].assignmentLcVariations;

export const getCBSVariations = (lang) => languages[lang].cbsVariations;

export const getConcludingSongFormat = (lang) => languages[lang].concludingSongVariations;

export const getStudyArticleDateVariations = (lang) => languages[lang].studyArticleDateVariations;

export const getPartMinutesSeparatorVariations = (lang) => languages[lang].partMinutesSeparatorVariations;
42 changes: 17 additions & 25 deletions src/common/parser.js
Original file line number Diff line number Diff line change
@@ -1,13 +1,5 @@
import languages from '../locales/languages.js';
import {
getMWBAYFEnhanced,
getMWBCBSEnhanced,
getMWBLCEnhanced,
getMWBTGWBibleReadingEnhanced,
getMWBTGWTalkEnhanced,
getMWBWeekDateEnhanced,
getWTStudyDateEnhanced,
} from './enhanced_parse_utils.js';
import { getMWBWeekDateEnhanced, getWTStudyDateEnhanced } from './enhanced_parse_utils.js';
import { extractEPUBFiles, getHTMLDocs, validateEPUBContents } from './epub_jszip.js';
import {
getEPUBData,
Expand All @@ -30,7 +22,7 @@ import {
getWStudyDate,
getWStudyTitle,
} from './html_utils.js';
import { extractLastSong, extractSongNumber } from './parsing_rules.js';
import { extractLastSong, extractSongNumber, extractSourceEnhanced } from './parsing_rules.js';

export const startParse = async (epubInput) => {
let result = {};
Expand Down Expand Up @@ -128,15 +120,15 @@ export const parseMWBSchedule = (htmlItem, mwbYear, mwbLang) => {
// 10min TGW Source
tmpSrc = splits[3].trim();
if (isEnhancedParsing) {
weekItem.mwb_tgw_talk = getMWBTGWTalkEnhanced(tmpSrc, mwbLang);
weekItem.mwb_tgw_talk = extractSourceEnhanced(tmpSrc, mwbLang).type;
} else {
weekItem.mwb_tgw_talk = tmpSrc;
}

//Bible Reading Source
tmpSrc = splits[7].trim();
if (isEnhancedParsing) {
weekItem.mwb_tgw_bread = getMWBTGWBibleReadingEnhanced(tmpSrc, mwbLang);
weekItem.mwb_tgw_bread = extractSourceEnhanced(tmpSrc, mwbLang).src;
} else {
weekItem.mwb_tgw_bread = tmpSrc;
}
Expand All @@ -150,7 +142,7 @@ export const parseMWBSchedule = (htmlItem, mwbYear, mwbLang) => {
//AYF1 Source
tmpSrc = splits[8].trim();
if (isEnhancedParsing) {
const partEnhanced = getMWBAYFEnhanced(tmpSrc, mwbLang);
const partEnhanced = extractSourceEnhanced(tmpSrc, mwbLang);
weekItem.mwb_ayf_part1 = partEnhanced.src;
weekItem.mwb_ayf_part1_time = partEnhanced.time;
weekItem.mwb_ayf_part1_type = partEnhanced.type;
Expand All @@ -162,7 +154,7 @@ export const parseMWBSchedule = (htmlItem, mwbYear, mwbLang) => {
if (cnAYF > 1) {
tmpSrc = splits[9].trim();
if (isEnhancedParsing) {
const partEnhanced = getMWBAYFEnhanced(tmpSrc, mwbLang);
const partEnhanced = extractSourceEnhanced(tmpSrc, mwbLang);
weekItem.mwb_ayf_part2 = partEnhanced.src;
weekItem.mwb_ayf_part2_time = partEnhanced.time;
weekItem.mwb_ayf_part2_type = partEnhanced.type;
Expand All @@ -175,7 +167,7 @@ export const parseMWBSchedule = (htmlItem, mwbYear, mwbLang) => {
if (cnAYF > 2) {
tmpSrc = splits[10].trim();
if (isEnhancedParsing) {
const partEnhanced = getMWBAYFEnhanced(tmpSrc, mwbLang);
const partEnhanced = extractSourceEnhanced(tmpSrc, mwbLang);
weekItem.mwb_ayf_part3 = partEnhanced.src;
weekItem.mwb_ayf_part3_time = partEnhanced.time;
weekItem.mwb_ayf_part3_type = partEnhanced.type;
Expand All @@ -188,7 +180,7 @@ export const parseMWBSchedule = (htmlItem, mwbYear, mwbLang) => {
if (cnAYF > 3) {
tmpSrc = splits[11].trim();
if (isEnhancedParsing) {
const partEnhanced = getMWBAYFEnhanced(tmpSrc, mwbLang);
const partEnhanced = extractSourceEnhanced(tmpSrc, mwbLang);
weekItem.mwb_ayf_part4 = partEnhanced.src;
weekItem.mwb_ayf_part4_time = partEnhanced.time;
weekItem.mwb_ayf_part4_type = partEnhanced.type;
Expand All @@ -212,11 +204,11 @@ export const parseMWBSchedule = (htmlItem, mwbYear, mwbLang) => {

tmpSrc = splits[nextIndex].trim();
if (isEnhancedParsing) {
const lcEnhanced = getMWBLCEnhanced(tmpSrc, mwbLang);
weekItem.mwb_lc_part1 = lcEnhanced.title;
const lcEnhanced = extractSourceEnhanced(tmpSrc, mwbLang);
weekItem.mwb_lc_part1 = lcEnhanced.type;
weekItem.mwb_lc_part1_time = lcEnhanced.time;
if (lcEnhanced.content && lcEnhanced.content !== '') {
weekItem.mwb_lc_part1_content = lcEnhanced.content;
if (lcEnhanced.src && lcEnhanced.src !== '') {
weekItem.mwb_lc_part1_content = lcEnhanced.src;
}
} else {
weekItem.mwb_lc_part1 = tmpSrc;
Expand All @@ -228,11 +220,11 @@ export const parseMWBSchedule = (htmlItem, mwbYear, mwbLang) => {
tmpSrc = splits[nextIndex].trim();

if (isEnhancedParsing) {
const lcEnhanced = getMWBLCEnhanced(tmpSrc, mwbLang);
weekItem.mwb_lc_part2 = lcEnhanced.title;
const lcEnhanced = extractSourceEnhanced(tmpSrc, mwbLang);
weekItem.mwb_lc_part2 = lcEnhanced.type;
weekItem.mwb_lc_part2_time = lcEnhanced.time;
if (lcEnhanced.content && lcEnhanced.content !== '') {
weekItem.mwb_lc_part2_content = lcEnhanced.content;
if (lcEnhanced.src && lcEnhanced.src !== '') {
weekItem.mwb_lc_part2_content = lcEnhanced.src;
}
} else {
weekItem.mwb_lc_part2 = tmpSrc;
Expand All @@ -244,7 +236,7 @@ export const parseMWBSchedule = (htmlItem, mwbYear, mwbLang) => {
tmpSrc = splits[nextIndex].trim();

if (isEnhancedParsing) {
weekItem.mwb_lc_cbs = getMWBCBSEnhanced(tmpSrc, mwbLang);
weekItem.mwb_lc_cbs = extractSourceEnhanced(tmpSrc, mwbLang).src;
} else {
weekItem.mwb_lc_cbs = tmpSrc;
}
Expand Down
Loading
Loading