Skip to content

Commit

Permalink
feat: hentaiComic support
Browse files Browse the repository at this point in the history
  • Loading branch information
IITII committed Nov 28, 2023
1 parent 6bb1f69 commit dcbb36b
Show file tree
Hide file tree
Showing 6 changed files with 131 additions and 3 deletions.
3 changes: 3 additions & 0 deletions config/config.js
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,9 @@ const config = {
// 4kup
kupLimit: 3,
kupTagsLimit: 1,
// hentaiComic
hentaiComicLimit: 3,
hentaiComicTagsLimit: 1,
// 图片 header
headLimit: 20,
// 上/下一页
Expand Down
4 changes: 4 additions & 0 deletions libs/download/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ const m131 = require('./sites/M131'),
m131Tags = require('./sites/M131Tags')
const kup = require('./sites/kup.js'),
kupTags = require('./sites/kupTags.js')
const hentaiComic = require('./sites/HentaiComic'),
hentaiComicTags = require('./sites/HentaiComicTags')


module.exports = {
Expand Down Expand Up @@ -67,4 +69,6 @@ module.exports = {
m131Tags,
kup,
kupTags,
hentaiComic,
hentaiComicTags,
}
53 changes: 53 additions & 0 deletions libs/download/sites/HentaiComic.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
/**
* @author IITII <ccmejx@gmail.com>
* @date 2023/11/28
*/
'use strict'

const path = require('path')
const {getImgArr, arrToAbsUrl, droppedPage, urlTextsToAbs} = require('../dl_utils')
const {titleFormat} = require('../../utils')
const {uniq} = require('lodash')

async function getImageArray(url) {
let res = await getImgArr(url, handle_dom)
let m = url.match(/photos-index-aid-(\d+).html/)
if (!m) {
m = url.match(/photos-slide-aid-(\d+).html/)
}
if (m) {
let url1 = `https://www.hentaicomic.ru/photos-gallery-aid-${m[1]}.html`
let res1 = await getImgArr(url1, handle_dom)
res.imgs = res.imgs.concat(res1.imgs)
res.cost += res1.cost
}
return res
}

async function handle_dom($, original) {
let title, imgs, otherPages, related, tags, denyPages, urls, external
denyPages = '«,»'.split(',')

title = $('title').text()?.replace(/ ?- ?列表 ?- ?紳士漫畫-專註分享漢化本子\|邪惡漫畫/, '')
imgs = $.text()?.split('\n').filter(_ => _.includes('imglist')).filter(_ => _.includes('fast_img_host'))[0]
// imgs = imgs.replace(/^document.writeln\("(\s+)?/, '').replace(/;?"\);/, '')
imgs = imgs?.split('},').map(_ => {
let m = _.match(/"\/\/([\s\S]+)\\",/)
return m ? m[1] : ''
}).filter(_ => !!_).map(_ => `http://${_}`)

title = titleFormat(title)
imgs = uniq(imgs)
imgs = arrToAbsUrl(imgs, original)

const res = {title, imgs,}
return Promise.resolve(res)
}

let url = 'https://www.hentaicomic.ru/photos-slide-aid-227644.html'
// url = 'https://www.hentaicomic.ru/photos-gallery-aid-227644.html'
getImageArray(url).then(console.log).catch(console.error)

module.exports = {
getImageArray,
}
50 changes: 50 additions & 0 deletions libs/download/sites/HentaiComicTags.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
/**
* @author IITII <ccmejx@gmail.com>
* @date 2022/11/03
*/
'use strict'

const {get_dom, getTagImgArr, urlTextsToAbs} = require('../dl_utils')
const {titleFormat} = require('../../utils')
const pic = require('./HentaiComic.js')

module.exports = {
getTagUrls,
getImageArray,
}

async function getTagUrls(url) {
return get_dom(url, handle_dom)
}

async function getImageArray(url) {
return getTagImgArr(url, getTagUrls, pic.getImageArray)
}

async function handle_dom($, original) {
let title, posters, url_texts, urls, texts

title = $('title').text()?.replace(/搜索[::] ?/, '')
title = title?.replace(/ ?- ?紳士漫畫-專註分享漢化本子\|邪惡漫畫/, '')
// title = title.replace('')
url_texts = $('.gallary_wrap .pic_box img')
urls = $('.gallary_wrap .pic_box a').map((i, el) => el.attribs['href']).get()
url_texts = url_texts.map((i, el) => {
let poster = el.attribs['data-src'] || el.attribs['src'],
text = el.attribs.alt,
url = urls[i]
text = text.replace(/<\/?em>/g, '')
url = url.match(/photos-index-aid-(\d+).html/)
url = url ? `https://www.hentaicomic.ru/photos-slide-aid-${url[1]}.html` : urls[i]
return {url, text, poster}
}).get()

title = titleFormat(title)
url_texts = urlTextsToAbs(url_texts, original, true)
const res = {title, imgs: url_texts}
return Promise.resolve(res)
}

let url = 'https://www.hentaicomic.ru/search/?q=Kitkatkitty&f=_all&s=create_time_DESC&syn=yes'
// url = 'https://www.hentaicomic.ru/albums-index-cate-3.html'
getTagUrls(url).then(console.log)
2 changes: 2 additions & 0 deletions services/tasks/TaskRunner.js
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ const supRaw = [
[...supportUrlArr[27]],
[...supportUrlArr[29]],
[...supportUrlArr[31]],
[...supportUrlArr[33]],
],
supRaw_flat = supRaw.flat(Infinity),
handle_limit = [
Expand All @@ -71,6 +72,7 @@ const supRaw = [
[download.asianTags, check.all],
[download.m131Tags, check.all],
[download.kupTags, check.all],
[download.hentaiComicTags, check.all],
]
// const special_url_raw = [0,3,3,3,7,8,9,9,12,12,13,14,14,14,14,14,14,15,16,16,16]
const special_url = [
Expand Down
22 changes: 19 additions & 3 deletions services/utils/support_urls_utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@
const {clip} = require('../../config/config'),
download = require('../../libs/download')

/**
* 二维数组形式, 按 tag/搜索, 详情页 格式排序.
* tag/搜索 之类的网址放前面
*/
const supRaw = [
['https://telegra.ph/',],
[
Expand Down Expand Up @@ -114,8 +118,12 @@ const supRaw = [
['https://www.mmm131.com/',],
['https://www.4kup.net/search?q='],
['https://www.4kup.net/',],
['https://www.hentaicomic.ru/search/?q=', 'https://www.hentaicomic.ru/albums-index-cate-3.html'],
['https://www.hentaicomic.ru/',],
],
// flat 之后方便判断用户输入的网址是否支持
supRaw_flat = supRaw.flat(Infinity),
// 和网址一一对应, 指定 handler 和 limit
handle_limit = [
[download.telegraph, clip.telegrafLimit],
[download.eveiraTags, clip.eveLimit],
Expand Down Expand Up @@ -150,7 +158,10 @@ const supRaw = [
[download.m131, clip.m131Limit],
[download.kupTags, clip.kupTagsLimit],
[download.kup, clip.kupLimit],
[download.hentaiComicTags, clip.hentaiComicTagsLimit],
[download.hentaiComic, clip.hentaiComicLimit],
]
// 特定 url 完全匹配, 指定 handler
const special_url = [
[/^https?:\/\/everia\.club\/?$/, 1],
[/^https?:\/\/junmeitu\.com\/beauty\/?$/, 5],
Expand Down Expand Up @@ -179,6 +190,7 @@ const special_url = [
[/^https?:\/\/www\.mmm131\.com\/mingxing\/?$/, 29],
[/^https?:\/\/www\.4kup\.net\/?$/, 31],
]
// 聚合网址搜索, 字符串替换关键字
const searchArr = [
'https://www.24fa.com/search.aspx?keyword={##}&where=title',
'https://junmeitu.com/search/{##}-1.html',
Expand All @@ -188,7 +200,8 @@ const searchArr = [
'https://tu.acgbox.org/index.php/search/{##}/',
'https://xx.knit.bid/search/?s={##}',
'https://asiantolick.com/search/{##}',
'https://www.4kup.net/search?q={##}&max-results=18'
'https://www.4kup.net/search?q={##}&max-results=18',
'https://www.hentaicomic.ru/search/?q={##}&f=_all&s=create_time_DESC&syn=yes',
]

let distinct_host = supRaw_flat.map(u => new URL(u))
Expand All @@ -206,17 +219,20 @@ function isSupport(text) {
return text && supRaw_flat.some(_ => text.includes(_))
}

/**
* 按类型, 提供支持的 handle_limit 的 handler 下标
*/
function filterSupStart(arr, img_or_tags = 'mix') {
let mix
mix = arr.filter(_ => supRaw_flat.some(s => _.startsWith(s)))
mix.push(...arr.filter(_ => special_url.some(s => s[0].test(_))))
let allowArr = []
switch (img_or_tags) {
case 'img':
allowArr = [0, 2, 4, 6, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32]
allowArr = [0, 2, 4, 6, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34]
break
case 'tags':
allowArr = [1, 3, 5, 8, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31]
allowArr = [1, 3, 5, 8, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, 33]
break
case 'mix':
default:
Expand Down

0 comments on commit dcbb36b

Please sign in to comment.