feat: hentaiComic support

IITII · Nov 28, 2023 · dcbb36b · dcbb36b
1 parent 6bb1f69
commit dcbb36b
Show file tree

Hide file tree

Showing 6 changed files with 131 additions and 3 deletions.
diff --git a/config/config.js b/config/config.js
@@ -158,6 +158,9 @@ const config = {
     // 4kup
     kupLimit: 3,
     kupTagsLimit: 1,
+    // hentaiComic
+    hentaiComicLimit: 3,
+    hentaiComicTagsLimit: 1,
     // 图片 header
     headLimit: 20,
     // 上/下一页

diff --git a/libs/download/index.js b/libs/download/index.js
@@ -36,6 +36,8 @@ const m131 = require('./sites/M131'),
   m131Tags = require('./sites/M131Tags')
 const kup = require('./sites/kup.js'),
   kupTags = require('./sites/kupTags.js')
+const hentaiComic = require('./sites/HentaiComic'),
+  hentaiComicTags = require('./sites/HentaiComicTags')
 
 
 module.exports = {
@@ -67,4 +69,6 @@ module.exports = {
   m131Tags,
   kup,
   kupTags,
+  hentaiComic,
+  hentaiComicTags,
 }
diff --git a/libs/download/sites/HentaiComic.js b/libs/download/sites/HentaiComic.js
@@ -0,0 +1,53 @@
+/**
+ * @author IITII <ccmejx@gmail.com>
+ * @date 2023/11/28
+ */
+'use strict'
+
+const path = require('path')
+const {getImgArr, arrToAbsUrl, droppedPage, urlTextsToAbs} = require('../dl_utils')
+const {titleFormat} = require('../../utils')
+const {uniq} = require('lodash')
+
+async function getImageArray(url) {
+  let res = await getImgArr(url, handle_dom)
+  let m = url.match(/photos-index-aid-(\d+).html/)
+  if (!m) {
+    m = url.match(/photos-slide-aid-(\d+).html/)
+  }
+  if (m) {
+    let url1 = `https://www.hentaicomic.ru/photos-gallery-aid-${m[1]}.html`
+    let res1 = await getImgArr(url1, handle_dom)
+    res.imgs = res.imgs.concat(res1.imgs)
+    res.cost += res1.cost
+  }
+  return res
+}
+
+async function handle_dom($, original) {
+  let title, imgs, otherPages, related, tags, denyPages, urls, external
+  denyPages = '«,»'.split(',')
+
+  title = $('title').text()?.replace(/ ?- ?列表 ?- ?紳士漫畫-專註分享漢化本子\|邪惡漫畫/, '')
+  imgs = $.text()?.split('\n').filter(_ => _.includes('imglist')).filter(_ => _.includes('fast_img_host'))[0]
+  // imgs = imgs.replace(/^document.writeln\("(\s+)?/, '').replace(/;?"\);/, '')
+  imgs = imgs?.split('},').map(_ => {
+    let m = _.match(/"\/\/([\s\S]+)\\",/)
+    return m ? m[1] : ''
+  }).filter(_ => !!_).map(_ => `http://${_}`)
+
+  title = titleFormat(title)
+  imgs = uniq(imgs)
+  imgs = arrToAbsUrl(imgs, original)
+
+  const res = {title, imgs,}
+  return Promise.resolve(res)
+}
+
+let url = 'https://www.hentaicomic.ru/photos-slide-aid-227644.html'
+// url = 'https://www.hentaicomic.ru/photos-gallery-aid-227644.html'
+getImageArray(url).then(console.log).catch(console.error)
+
+module.exports = {
+  getImageArray,
+}
diff --git a/libs/download/sites/HentaiComicTags.js b/libs/download/sites/HentaiComicTags.js
@@ -0,0 +1,50 @@
+/**
+ * @author IITII <ccmejx@gmail.com>
+ * @date 2022/11/03
+ */
+'use strict'
+
+const {get_dom, getTagImgArr, urlTextsToAbs} = require('../dl_utils')
+const {titleFormat} = require('../../utils')
+const pic = require('./HentaiComic.js')
+
+module.exports = {
+  getTagUrls,
+  getImageArray,
+}
+
+async function getTagUrls(url) {
+  return get_dom(url, handle_dom)
+}
+
+async function getImageArray(url) {
+  return getTagImgArr(url, getTagUrls, pic.getImageArray)
+}
+
+async function handle_dom($, original) {
+  let title, posters, url_texts, urls, texts
+
+  title = $('title').text()?.replace(/搜索[：:] ?/, '')
+  title = title?.replace(/ ?- ?紳士漫畫-專註分享漢化本子\|邪惡漫畫/, '')
+  // title = title.replace('')
+  url_texts = $('.gallary_wrap .pic_box img')
+  urls = $('.gallary_wrap .pic_box a').map((i, el) => el.attribs['href']).get()
+  url_texts = url_texts.map((i, el) => {
+    let poster = el.attribs['data-src'] || el.attribs['src'],
+      text = el.attribs.alt,
+      url = urls[i]
+    text = text.replace(/<\/?em>/g, '')
+    url = url.match(/photos-index-aid-(\d+).html/)
+    url = url ? `https://www.hentaicomic.ru/photos-slide-aid-${url[1]}.html` : urls[i]
+    return {url, text, poster}
+  }).get()
+
+  title = titleFormat(title)
+  url_texts = urlTextsToAbs(url_texts, original, true)
+  const res = {title, imgs: url_texts}
+  return Promise.resolve(res)
+}
+
+let url = 'https://www.hentaicomic.ru/search/?q=Kitkatkitty&f=_all&s=create_time_DESC&syn=yes'
+// url = 'https://www.hentaicomic.ru/albums-index-cate-3.html'
+getTagUrls(url).then(console.log)
diff --git a/services/tasks/TaskRunner.js b/services/tasks/TaskRunner.js
@@ -50,6 +50,7 @@ const supRaw = [
     [...supportUrlArr[27]],
     [...supportUrlArr[29]],
     [...supportUrlArr[31]],
+    [...supportUrlArr[33]],
   ],
   supRaw_flat = supRaw.flat(Infinity),
   handle_limit = [
@@ -71,6 +72,7 @@ const supRaw = [
     [download.asianTags, check.all],
     [download.m131Tags, check.all],
     [download.kupTags, check.all],
+    [download.hentaiComicTags, check.all],
   ]
 // const special_url_raw = [0,3,3,3,7,8,9,9,12,12,13,14,14,14,14,14,14,15,16,16,16]
 const special_url = [

diff --git a/services/utils/support_urls_utils.js b/services/utils/support_urls_utils.js
@@ -6,6 +6,10 @@
 const {clip} = require('../../config/config'),
   download = require('../../libs/download')
 
+/**
+ * 二维数组形式, 按 tag/搜索, 详情页 格式排序.
+ * tag/搜索 之类的网址放前面
+ */
 const supRaw = [
     ['https://telegra.ph/',],
     [
@@ -114,8 +118,12 @@ const supRaw = [
     ['https://www.mmm131.com/',],
     ['https://www.4kup.net/search?q='],
     ['https://www.4kup.net/',],
+    ['https://www.hentaicomic.ru/search/?q=', 'https://www.hentaicomic.ru/albums-index-cate-3.html'],
+    ['https://www.hentaicomic.ru/',],
   ],
+  // flat 之后方便判断用户输入的网址是否支持
   supRaw_flat = supRaw.flat(Infinity),
+  // 和网址一一对应, 指定 handler 和 limit
   handle_limit = [
     [download.telegraph, clip.telegrafLimit],
     [download.eveiraTags, clip.eveLimit],
@@ -150,7 +158,10 @@ const supRaw = [
     [download.m131, clip.m131Limit],
     [download.kupTags, clip.kupTagsLimit],
     [download.kup, clip.kupLimit],
+    [download.hentaiComicTags, clip.hentaiComicTagsLimit],
+    [download.hentaiComic, clip.hentaiComicLimit],
   ]
+// 特定 url 完全匹配, 指定 handler
 const special_url = [
   [/^https?:\/\/everia\.club\/?$/, 1],
   [/^https?:\/\/junmeitu\.com\/beauty\/?$/, 5],
@@ -179,6 +190,7 @@ const special_url = [
   [/^https?:\/\/www\.mmm131\.com\/mingxing\/?$/, 29],
   [/^https?:\/\/www\.4kup\.net\/?$/, 31],
 ]
+// 聚合网址搜索, 字符串替换关键字
 const searchArr = [
   'https://www.24fa.com/search.aspx?keyword={##}&where=title',
   'https://junmeitu.com/search/{##}-1.html',
@@ -188,7 +200,8 @@ const searchArr = [
   'https://tu.acgbox.org/index.php/search/{##}/',
   'https://xx.knit.bid/search/?s={##}',
   'https://asiantolick.com/search/{##}',
-  'https://www.4kup.net/search?q={##}&max-results=18'
+  'https://www.4kup.net/search?q={##}&max-results=18',
+  'https://www.hentaicomic.ru/search/?q={##}&f=_all&s=create_time_DESC&syn=yes',
 ]
 
 let distinct_host = supRaw_flat.map(u => new URL(u))
@@ -206,17 +219,20 @@ function isSupport(text) {
   return text && supRaw_flat.some(_ => text.includes(_))
 }
 
+/**
+ * 按类型, 提供支持的 handle_limit 的 handler 下标
+ */
 function filterSupStart(arr, img_or_tags = 'mix') {
   let mix
   mix = arr.filter(_ => supRaw_flat.some(s => _.startsWith(s)))
   mix.push(...arr.filter(_ => special_url.some(s => s[0].test(_))))
   let allowArr = []
   switch (img_or_tags) {
     case 'img':
-      allowArr = [0, 2, 4, 6, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32]
+      allowArr = [0, 2, 4, 6, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34]
       break
     case 'tags':
-      allowArr = [1, 3, 5, 8, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31]
+      allowArr = [1, 3, 5, 8, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, 33]
       break
     case 'mix':
     default: