From dcbb36b5a80c55c179d228a558c5a839b07bcb34 Mon Sep 17 00:00:00 2001 From: IITII Date: Tue, 28 Nov 2023 14:23:05 +0800 Subject: [PATCH] feat: hentaiComic support --- config/config.js | 3 ++ libs/download/index.js | 4 ++ libs/download/sites/HentaiComic.js | 53 ++++++++++++++++++++++++++ libs/download/sites/HentaiComicTags.js | 50 ++++++++++++++++++++++++ services/tasks/TaskRunner.js | 2 + services/utils/support_urls_utils.js | 22 +++++++++-- 6 files changed, 131 insertions(+), 3 deletions(-) create mode 100644 libs/download/sites/HentaiComic.js create mode 100644 libs/download/sites/HentaiComicTags.js diff --git a/config/config.js b/config/config.js index 13385a3..05ddeac 100644 --- a/config/config.js +++ b/config/config.js @@ -158,6 +158,9 @@ const config = { // 4kup kupLimit: 3, kupTagsLimit: 1, + // hentaiComic + hentaiComicLimit: 3, + hentaiComicTagsLimit: 1, // 图片 header headLimit: 20, // 上/下一页 diff --git a/libs/download/index.js b/libs/download/index.js index f47d679..e3461b9 100644 --- a/libs/download/index.js +++ b/libs/download/index.js @@ -36,6 +36,8 @@ const m131 = require('./sites/M131'), m131Tags = require('./sites/M131Tags') const kup = require('./sites/kup.js'), kupTags = require('./sites/kupTags.js') +const hentaiComic = require('./sites/HentaiComic'), + hentaiComicTags = require('./sites/HentaiComicTags') module.exports = { @@ -67,4 +69,6 @@ module.exports = { m131Tags, kup, kupTags, + hentaiComic, + hentaiComicTags, } diff --git a/libs/download/sites/HentaiComic.js b/libs/download/sites/HentaiComic.js new file mode 100644 index 0000000..84bda08 --- /dev/null +++ b/libs/download/sites/HentaiComic.js @@ -0,0 +1,53 @@ +/** + * @author IITII + * @date 2023/11/28 + */ +'use strict' + +const path = require('path') +const {getImgArr, arrToAbsUrl, droppedPage, urlTextsToAbs} = require('../dl_utils') +const {titleFormat} = require('../../utils') +const {uniq} = require('lodash') + +async function getImageArray(url) { + let res = await getImgArr(url, handle_dom) + let m = url.match(/photos-index-aid-(\d+).html/) + if (!m) { + m = url.match(/photos-slide-aid-(\d+).html/) + } + if (m) { + let url1 = `https://www.hentaicomic.ru/photos-gallery-aid-${m[1]}.html` + let res1 = await getImgArr(url1, handle_dom) + res.imgs = res.imgs.concat(res1.imgs) + res.cost += res1.cost + } + return res +} + +async function handle_dom($, original) { + let title, imgs, otherPages, related, tags, denyPages, urls, external + denyPages = '«,»'.split(',') + + title = $('title').text()?.replace(/ ?- ?列表 ?- ?紳士漫畫-專註分享漢化本子\|邪惡漫畫/, '') + imgs = $.text()?.split('\n').filter(_ => _.includes('imglist')).filter(_ => _.includes('fast_img_host'))[0] + // imgs = imgs.replace(/^document.writeln\("(\s+)?/, '').replace(/;?"\);/, '') + imgs = imgs?.split('},').map(_ => { + let m = _.match(/"\/\/([\s\S]+)\\",/) + return m ? m[1] : '' + }).filter(_ => !!_).map(_ => `http://${_}`) + + title = titleFormat(title) + imgs = uniq(imgs) + imgs = arrToAbsUrl(imgs, original) + + const res = {title, imgs,} + return Promise.resolve(res) +} + +let url = 'https://www.hentaicomic.ru/photos-slide-aid-227644.html' +// url = 'https://www.hentaicomic.ru/photos-gallery-aid-227644.html' +getImageArray(url).then(console.log).catch(console.error) + +module.exports = { + getImageArray, +} diff --git a/libs/download/sites/HentaiComicTags.js b/libs/download/sites/HentaiComicTags.js new file mode 100644 index 0000000..cc45b27 --- /dev/null +++ b/libs/download/sites/HentaiComicTags.js @@ -0,0 +1,50 @@ +/** + * @author IITII + * @date 2022/11/03 + */ +'use strict' + +const {get_dom, getTagImgArr, urlTextsToAbs} = require('../dl_utils') +const {titleFormat} = require('../../utils') +const pic = require('./HentaiComic.js') + +module.exports = { + getTagUrls, + getImageArray, +} + +async function getTagUrls(url) { + return get_dom(url, handle_dom) +} + +async function getImageArray(url) { + return getTagImgArr(url, getTagUrls, pic.getImageArray) +} + +async function handle_dom($, original) { + let title, posters, url_texts, urls, texts + + title = $('title').text()?.replace(/搜索[::] ?/, '') + title = title?.replace(/ ?- ?紳士漫畫-專註分享漢化本子\|邪惡漫畫/, '') + // title = title.replace('') + url_texts = $('.gallary_wrap .pic_box img') + urls = $('.gallary_wrap .pic_box a').map((i, el) => el.attribs['href']).get() + url_texts = url_texts.map((i, el) => { + let poster = el.attribs['data-src'] || el.attribs['src'], + text = el.attribs.alt, + url = urls[i] + text = text.replace(/<\/?em>/g, '') + url = url.match(/photos-index-aid-(\d+).html/) + url = url ? `https://www.hentaicomic.ru/photos-slide-aid-${url[1]}.html` : urls[i] + return {url, text, poster} + }).get() + + title = titleFormat(title) + url_texts = urlTextsToAbs(url_texts, original, true) + const res = {title, imgs: url_texts} + return Promise.resolve(res) +} + +let url = 'https://www.hentaicomic.ru/search/?q=Kitkatkitty&f=_all&s=create_time_DESC&syn=yes' +// url = 'https://www.hentaicomic.ru/albums-index-cate-3.html' +getTagUrls(url).then(console.log) diff --git a/services/tasks/TaskRunner.js b/services/tasks/TaskRunner.js index 5553132..16d1da0 100644 --- a/services/tasks/TaskRunner.js +++ b/services/tasks/TaskRunner.js @@ -50,6 +50,7 @@ const supRaw = [ [...supportUrlArr[27]], [...supportUrlArr[29]], [...supportUrlArr[31]], + [...supportUrlArr[33]], ], supRaw_flat = supRaw.flat(Infinity), handle_limit = [ @@ -71,6 +72,7 @@ const supRaw = [ [download.asianTags, check.all], [download.m131Tags, check.all], [download.kupTags, check.all], + [download.hentaiComicTags, check.all], ] // const special_url_raw = [0,3,3,3,7,8,9,9,12,12,13,14,14,14,14,14,14,15,16,16,16] const special_url = [ diff --git a/services/utils/support_urls_utils.js b/services/utils/support_urls_utils.js index ac66fd1..c1d9c3d 100644 --- a/services/utils/support_urls_utils.js +++ b/services/utils/support_urls_utils.js @@ -6,6 +6,10 @@ const {clip} = require('../../config/config'), download = require('../../libs/download') +/** + * 二维数组形式, 按 tag/搜索, 详情页 格式排序. + * tag/搜索 之类的网址放前面 + */ const supRaw = [ ['https://telegra.ph/',], [ @@ -114,8 +118,12 @@ const supRaw = [ ['https://www.mmm131.com/',], ['https://www.4kup.net/search?q='], ['https://www.4kup.net/',], + ['https://www.hentaicomic.ru/search/?q=', 'https://www.hentaicomic.ru/albums-index-cate-3.html'], + ['https://www.hentaicomic.ru/',], ], + // flat 之后方便判断用户输入的网址是否支持 supRaw_flat = supRaw.flat(Infinity), + // 和网址一一对应, 指定 handler 和 limit handle_limit = [ [download.telegraph, clip.telegrafLimit], [download.eveiraTags, clip.eveLimit], @@ -150,7 +158,10 @@ const supRaw = [ [download.m131, clip.m131Limit], [download.kupTags, clip.kupTagsLimit], [download.kup, clip.kupLimit], + [download.hentaiComicTags, clip.hentaiComicTagsLimit], + [download.hentaiComic, clip.hentaiComicLimit], ] +// 特定 url 完全匹配, 指定 handler const special_url = [ [/^https?:\/\/everia\.club\/?$/, 1], [/^https?:\/\/junmeitu\.com\/beauty\/?$/, 5], @@ -179,6 +190,7 @@ const special_url = [ [/^https?:\/\/www\.mmm131\.com\/mingxing\/?$/, 29], [/^https?:\/\/www\.4kup\.net\/?$/, 31], ] +// 聚合网址搜索, 字符串替换关键字 const searchArr = [ 'https://www.24fa.com/search.aspx?keyword={##}&where=title', 'https://junmeitu.com/search/{##}-1.html', @@ -188,7 +200,8 @@ const searchArr = [ 'https://tu.acgbox.org/index.php/search/{##}/', 'https://xx.knit.bid/search/?s={##}', 'https://asiantolick.com/search/{##}', - 'https://www.4kup.net/search?q={##}&max-results=18' + 'https://www.4kup.net/search?q={##}&max-results=18', + 'https://www.hentaicomic.ru/search/?q={##}&f=_all&s=create_time_DESC&syn=yes', ] let distinct_host = supRaw_flat.map(u => new URL(u)) @@ -206,6 +219,9 @@ function isSupport(text) { return text && supRaw_flat.some(_ => text.includes(_)) } +/** + * 按类型, 提供支持的 handle_limit 的 handler 下标 + */ function filterSupStart(arr, img_or_tags = 'mix') { let mix mix = arr.filter(_ => supRaw_flat.some(s => _.startsWith(s))) @@ -213,10 +229,10 @@ function filterSupStart(arr, img_or_tags = 'mix') { let allowArr = [] switch (img_or_tags) { case 'img': - allowArr = [0, 2, 4, 6, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32] + allowArr = [0, 2, 4, 6, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34] break case 'tags': - allowArr = [1, 3, 5, 8, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31] + allowArr = [1, 3, 5, 8, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, 33] break case 'mix': default: