From db0ae9d3fa9b62a35f16169ccc48aa0c4886c11e Mon Sep 17 00:00:00 2001 From: Henry Wang Date: Thu, 30 May 2024 16:51:22 +0100 Subject: [PATCH] fix(route): IEEE journal routes (#15720) * fix: IEEE journal routes * feat: add feed image * chore: preprint -> earlyAccess * chore: remove cookie jar --- lib/routes/ieee/earlyaccess.ts | 100 ------------------ lib/routes/ieee/journal.ts | 117 +++++++++------------- lib/routes/ieee/recent.ts | 107 -------------------- lib/routes/ieee/templates/description.art | 2 +- 4 files changed, 51 insertions(+), 275 deletions(-) delete mode 100644 lib/routes/ieee/earlyaccess.ts delete mode 100644 lib/routes/ieee/recent.ts diff --git a/lib/routes/ieee/earlyaccess.ts b/lib/routes/ieee/earlyaccess.ts deleted file mode 100644 index 9f18bc0b02d97..0000000000000 --- a/lib/routes/ieee/earlyaccess.ts +++ /dev/null @@ -1,100 +0,0 @@ -import { Route } from '@/types'; -import { getCurrentPath } from '@/utils/helpers'; -const __dirname = getCurrentPath(import.meta.url); - -import cache from '@/utils/cache'; -import got from '@/utils/got'; -import { load } from 'cheerio'; -import path from 'node:path'; -import { art } from '@/utils/render'; - -import { CookieJar } from 'tough-cookie'; -const cookieJar = new CookieJar(); - -export const route: Route = { - path: '/journal/:journal/earlyaccess/:sortType?', - categories: ['journal'], - example: '/ieee/journal/5306045/earlyaccess', - parameters: { journal: 'Issue code, the number of the `isnumber` in the URL', sortType: 'Sort Type, default: `vol-only-seq`, the part of the URL after `sortType`' }, - features: { - requireConfig: false, - requirePuppeteer: false, - antiCrawler: false, - supportBT: false, - supportPodcast: false, - supportScihub: false, - }, - name: 'Early Access Journal', - maintainers: ['5upernova-heng'], - handler, -}; - -async function handler(ctx) { - const isnumber = ctx.req.param('journal'); - const sortType = ctx.req.param('sortType') ?? 'vol-only-seq'; - const host = 'https://ieeexplore.ieee.org'; - const jrnlUrl = `${host}/xpl/tocresult.jsp?isnumber=${isnumber}`; - - const response = await got(`${host}/rest/publication/home/metadata?issueid=${isnumber}`, { - cookieJar, - }).json(); - const punumber = response.publicationNumber; - const volume = response.currentIssue.volume; - const jrnlName = response.displayTitle; - - const response2 = await got - .post(`${host}/rest/search/pub/${punumber}/issue/${isnumber}/toc`, { - cookieJar, - json: { - punumber, - isnumber, - sortType, - rowsPerPage: '100', - }, - }) - .json(); - let list = response2.records.map((item) => { - const $2 = load(item.articleTitle); - const title = $2.text(); - const link = item.htmlLink; - const doi = item.doi; - let authors = 'Do not have author'; - if (Object.hasOwn(item, 'authors')) { - authors = item.authors.map((itemAuth) => itemAuth.preferredName).join('; '); - } - const abstract = Object.hasOwn(item, 'abstract') ? item.abstract : ''; - return { - title, - link, - authors, - doi, - volume, - abstract, - }; - }); - - const renderDesc = (item) => - art(path.join(__dirname, 'templates/description.art'), { - item, - }); - list = await Promise.all( - list.map((item) => - cache.tryGet(item.link, async () => { - if (item.abstract !== '') { - const response3 = await got(`${host}${item.link}`); - const { abstract } = JSON.parse(response3.body.match(/metadata=(.*);/)[1]); - const $3 = load(abstract); - item.abstract = $3.text(); - item.description = renderDesc(item); - } - return item; - }) - ) - ); - - return { - title: jrnlName, - link: jrnlUrl, - item: list, - }; -} diff --git a/lib/routes/ieee/journal.ts b/lib/routes/ieee/journal.ts index f8e38085b9aa6..c8ffa34713796 100644 --- a/lib/routes/ieee/journal.ts +++ b/lib/routes/ieee/journal.ts @@ -2,89 +2,72 @@ import { Route } from '@/types'; import { getCurrentPath } from '@/utils/helpers'; const __dirname = getCurrentPath(import.meta.url); -import cache from '@/utils/cache'; import got from '@/utils/got'; -import { load } from 'cheerio'; import path from 'node:path'; import { art } from '@/utils/render'; -import { CookieJar } from 'tough-cookie'; -const cookieJar = new CookieJar(); +const ieeeHost = 'https://ieeexplore.ieee.org'; export const route: Route = { - path: ['/:journal/latest/vol/:sortType?', '/journal/:journal/:sortType?'], - name: 'Unknown', - maintainers: [], + name: 'IEEE Journal Articles', + maintainers: ['HenryQW'], + categories: ['journal'], + path: '/journal/:punumber/:earlyAccess?', + parameters: { + punumber: 'Publication Number, look for `punumber` in the URL', + earlyAccess: 'Optional, set any value to get early access articles', + }, + example: '/ieee/journal/6287639/preprint', handler, }; async function handler(ctx) { - const punumber = ctx.req.param('journal'); - const sortType = ctx.req.param('sortType') ?? 'vol-only-seq'; - const host = 'https://ieeexplore.ieee.org'; - const jrnlUrl = `${host}/xpl/mostRecentIssue.jsp?punumber=${punumber}`; + const publicationNumber = ctx.req.param('punumber'); + const earlyAccess = !!ctx.req.param('earlyAccess'); - const response = await got(`${host}/rest/publication/home/metadata?pubid=${punumber}`, { - cookieJar, - }).json(); - const volume = response.currentIssue.volume; - const isnumber = response.currentIssue.issueNumber; - const jrnlName = response.displayTitle; + const metadata = await fetchMetadata(publicationNumber); + const { displayTitle, currentIssue, preprintIssue, coverImagePath } = metadata; + const { issueNumber, volume } = earlyAccess ? preprintIssue : currentIssue; - const response2 = await got - .post(`${host}/rest/search/pub/${punumber}/issue/${isnumber}/toc`, { - cookieJar, - json: { - punumber, - isnumber, - sortType, - rowsPerPage: '100', - }, - }) - .json(); - let list = response2.records.map((item) => { - const $2 = load(item.articleTitle); - const title = $2.text(); - const link = item.htmlLink; - const doi = item.doi; - let authors = 'Do not have author'; - if (Object.hasOwn(item, 'authors')) { - authors = item.authors.map((itemAuth) => itemAuth.preferredName).join('; '); - } - let abstract = ''; - Object.hasOwn(item, 'abstract') ? (abstract = item.abstract) : (abstract = ''); - return { - title, - link, - authors, - doi, - volume, - abstract, - }; - }); + const tocData = await fetchTOCData(publicationNumber, issueNumber); + const list = tocData.records.map((item) => { + const mappedItem = mapRecordToItem(volume)(item); - const renderDesc = (item) => - art(path.join(__dirname, 'templates/description.art'), { - item, + mappedItem.description = art(path.join(__dirname, 'templates/description.art'), { + item: mappedItem, }); - list = await Promise.all( - list.map((item) => - cache.tryGet(item.link, async () => { - if (item.abstract !== '') { - const response3 = await got(`${host}${item.link}`); - const { abstract } = JSON.parse(response3.body.match(/metadata=(.*);/)[1]); - const $3 = load(abstract); - item.abstract = $3.text(); - item.description = renderDesc(item); - } - return item; - }) - ) - ); + + return mappedItem; + }); return { - title: jrnlName, - link: jrnlUrl, + title: displayTitle, + link: `${ieeeHost}/xpl/tocresult.jsp?isnumber=${issueNumber}`, item: list, + image: `${ieeeHost}${coverImagePath}`, }; } + +async function fetchMetadata(punumber) { + const response = await got(`${ieeeHost}/rest/publication/home/metadata?pubid=${punumber}`); + return response.data; +} + +async function fetchTOCData(punumber, isnumber) { + const response = await got.post(`${ieeeHost}/rest/search/pub/${punumber}/issue/${isnumber}/toc`, { + json: { punumber, isnumber, rowsPerPage: '100' }, + }); + return response.data; +} + +function mapRecordToItem(volume) { + return (item) => ({ + abstract: item.abstract || '', + authors: item.authors ? item.authors.map((author) => author.preferredName).join('; ') : '', + description: '', + doi: item.doi, + link: item.htmlLink, + title: item.articleTitle || '', + volume, + }); +} diff --git a/lib/routes/ieee/recent.ts b/lib/routes/ieee/recent.ts deleted file mode 100644 index b5f75bc7eef52..0000000000000 --- a/lib/routes/ieee/recent.ts +++ /dev/null @@ -1,107 +0,0 @@ -import { Route } from '@/types'; -import { getCurrentPath } from '@/utils/helpers'; -const __dirname = getCurrentPath(import.meta.url); - -import cache from '@/utils/cache'; -import got from '@/utils/got'; -import { load } from 'cheerio'; -import path from 'node:path'; -import { art } from '@/utils/render'; - -import { CookieJar } from 'tough-cookie'; -const cookieJar = new CookieJar(); - -export const route: Route = { - path: ['/:journal/latest/date/:sortType?', '/journal/:journal/recent/:sortType?'], - name: 'Unknown', - maintainers: [], - handler, -}; - -async function handler(ctx) { - const punumber = ctx.req.param('journal'); - const sortType = ctx.req.param('sortType') ?? 'vol-only-seq'; - const host = 'https://ieeexplore.ieee.org'; - const jrnlUrl = `${host}/xpl/mostRecentIssue.jsp?punumber=${punumber}`; - - const date = new Date(); - const year = date.getFullYear(); - const month = date.getMonth() + 1; - let strYM, endYM; - const snap = 2; - if (1 <= month && month <= snap) { - month - snap + 12 < 10 ? (strYM = year - 1 + '0' + (month - snap + 12)) : (strYM = year - 1 + '' + (month - snap + 12)); - endYM = year + '0' + month; - } else if (snap < month && month < 10) { - month - snap < 10 ? (strYM = year + '0' + (month - snap)) : (strYM = year + '' + (month - snap)); - endYM = year + '0' + month; - } else { - month - snap < 10 ? (strYM = year + '0' + (month - snap)) : (strYM = year + '' + (month - snap)); - endYM = year + '' + month; - } - - const response = await got(`${host}/rest/publication/home/metadata?pubid=${punumber}`, { - cookieJar, - }).json(); - const volume = response.currentIssue.volume; - const isnumber = response.currentIssue.issueNumber; - const jrnlName = response.displayTitle; - - const response2 = await got - .post(`${host}/rest/search/pub/${punumber}/issue/${isnumber}/toc`, { - cookieJar, - json: { - punumber, - isnumber, - sortType, - rowsPerPage: '100', - ranges: [strYM + `01_` + endYM + `31_Search Latest Date`], - }, - }) - .json(); - let list = response2.records.map((item) => { - const $2 = load(item.articleTitle); - const title = $2.text(); - const link = item.htmlLink; - const doi = item.doi; - let authors = 'Do not have author'; - if (Object.hasOwn(item, 'authors')) { - authors = item.authors.map((itemAuth) => itemAuth.preferredName).join('; '); - } - let abstract = ''; - Object.hasOwn(item, 'abstract') ? (abstract = item.abstract) : (abstract = ''); - return { - title, - link, - authors, - doi, - volume, - abstract, - }; - }); - - const renderDesc = (item) => - art(path.join(__dirname, 'templates/description.art'), { - item, - }); - list = await Promise.all( - list.map((item) => - cache.tryGet(item.link, async () => { - if (item.abstract !== '') { - const response3 = await got(`${host}${item.link}`); - const { abstract } = JSON.parse(response3.body.match(/metadata=(.*);/)[1]); - const $3 = load(abstract); - item.abstract = $3.text(); - item.description = renderDesc(item); - } - return item; - }) - ) - ); - - return { - title: `${jrnlName} - Recent`, - link: jrnlUrl, - item: list, - }; -} diff --git a/lib/routes/ieee/templates/description.art b/lib/routes/ieee/templates/description.art index 04367cc08d3da..a9e8c5da22219 100644 --- a/lib/routes/ieee/templates/description.art +++ b/lib/routes/ieee/templates/description.art @@ -3,7 +3,7 @@

{{ item.authors }}
- https://doi.org/{{ item.doi }}
+ https://doi.org/{{ item.doi }}
Volume {{ item.volume }}