Skip to content

Commit

Permalink
fix(route/sis001): add cookie to solve anti-crawler (DIYgod#17259)
Browse files Browse the repository at this point in the history
* fix(route/sis001): add cookie to resolve anti-crawler

* fix desc

* move base url to env config

* update doc

* fix: cache cookie

---------
  • Loading branch information
keocheung authored Oct 23, 2024
1 parent bc020b6 commit ed8fa7a
Show file tree
Hide file tree
Showing 5 changed files with 66 additions and 18 deletions.
6 changes: 6 additions & 0 deletions lib/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,9 @@ export type Config = {
scihub: {
host?: string;
};
sis001: {
baseUrl?: string;
};
skeb: {
bearerToken?: string;
};
Expand Down Expand Up @@ -663,6 +666,9 @@ const calculateValue = () => {
scihub: {
host: envs.SCIHUB_HOST || 'https://sci-hub.se/',
},
sis001: {
baseUrl: envs.SIS001_BASE_URL || 'https://sis001.com',
},
skeb: {
bearerToken: envs.SKEB_BEARER_TOKEN,
},
Expand Down
15 changes: 9 additions & 6 deletions lib/routes/sis001/author.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import { Route } from '@/types';
import cache from '@/utils/cache';
import got from '@/utils/got';
import { config } from '@/config';
import { load } from 'cheerio';
import { baseUrl, getThread } from './common';
import type { Context } from 'hono';
import { getCookie, getThread } from './common';

export const route: Route = {
path: '/author/:id?',
Expand All @@ -22,11 +24,12 @@ export const route: Route = {
handler,
};

async function handler(ctx) {
async function handler(ctx: Context) {
const { id = '13131575' } = ctx.req.param();
const url = `${baseUrl}/forum/space.php?uid=${id}`;
const url = `${config.sis001.baseUrl}/forum/space.php?uid=${id}`;

const response = await got(url);
const cookie = await getCookie(url);
const response = await got(url, { headers: { cookie } });
const $ = load(response.data);

const username = $('div.bg div.title').text().replace('的个人空间', '');
Expand All @@ -37,12 +40,12 @@ async function handler(ctx) {
item = $(item);
return {
title: item.text(),
link: `${baseUrl}/forum/${item.attr('href')}`,
link: `${config.sis001.baseUrl}/forum/${item.attr('href')}`,
author: username,
};
});

items = await Promise.all(items.map((item) => cache.tryGet(item.link, async () => await getThread(item))));
items = await Promise.all(items.map((item) => cache.tryGet(item.link, async () => await getThread(cookie, item))));

return {
title: `${username}的主题`,
Expand Down
42 changes: 38 additions & 4 deletions lib/routes/sis001/common.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,45 @@ import got from '@/utils/got';
import { load } from 'cheerio';
import { parseDate } from '@/utils/parse-date';
import timezone from '@/utils/timezone';
import { DataItem } from '@/types';
import CryptoJS from 'crypto-js';
import cache from '@/utils/cache';
import { config } from '@/config';

const baseUrl = 'https://www.sis001.com';
function getCookie(url: string): Promise<string> {
return cache.tryGet(
'sis001:cookie',
async () => {
const response = await got(url);
const rsp = response.data;

async function getThread(item) {
const response = await got(item.link);
const regex = /toNumbers\("([a-fA-F0-9]+)"\)/g;
const matches: string[] = [];
let match: RegExpExecArray | null;

while ((match = regex.exec(rsp)) !== null) {
matches.push(match[1]);
}

if (matches.length !== 3) {
return '';
}

const key = CryptoJS.enc.Hex.parse(matches[0]);
const iv = CryptoJS.enc.Hex.parse(matches[1]);
const encrypted = CryptoJS.enc.Hex.parse(matches[2]);

const decrypted = CryptoJS.AES.decrypt({ ciphertext: encrypted }, key, { iv, padding: CryptoJS.pad.NoPadding });

return 'CeRaHigh1=' + decrypted.toString(CryptoJS.enc.Hex);
},
config.cache.routeExpire,
false
);
}

async function getThread(cookie: string, item: DataItem) {
const response = await got(item.link, { headers: { cookie } });
const $ = load(response.data);

item.category = $('.posttags a')
Expand Down Expand Up @@ -35,4 +69,4 @@ async function getThread(item) {
return item;
}

export { baseUrl, getThread };
export { getCookie, getThread };
18 changes: 10 additions & 8 deletions lib/routes/sis001/forum.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import { Route } from '@/types';
import cache from '@/utils/cache';
import got from '@/utils/got';
import { config } from '@/config';
import { load } from 'cheerio';
import { baseUrl, getThread } from './common';
import type { Context } from 'hono';
import { getCookie, getThread } from './common';

export const route: Route = {
path: '/forum/:id?',
Expand All @@ -18,16 +20,16 @@ export const route: Route = {
supportScihub: false,
},
name: '子版块',
maintainers: [],
maintainers: ['TonyRL'],
handler,
};

async function handler(ctx) {
async function handler(ctx: Context) {
const { id = 76 } = ctx.req.param();
const url = `${baseUrl}/forum/forum-${id}-1.html`;

const response = await got(url);
const url = `${config.sis001.baseUrl}/forum/forum-${id}-1.html`;

const cookie = await getCookie(url);
const response = await got(url, { headers: { cookie } });
const $ = load(response.data);

let items = $('form table')
Expand All @@ -39,12 +41,12 @@ async function handler(ctx) {
item = $(item);
return {
title: item.find('th em').text() + ' ' + item.find('span a').eq(0).text(),
link: new URL(item.find('span a').eq(0).attr('href'), `${baseUrl}/forum/`).href,
link: new URL(item.find('span a').eq(0).attr('href'), `${config.sis001.baseUrl}/forum/`).href,
author: item.find('.author a').text(),
};
});

items = await Promise.all(items.map((item) => cache.tryGet(item.link, async () => await getThread(item))));
items = await Promise.all(items.map((item) => cache.tryGet(item.link, async () => await getThread(cookie, item))));

return {
title: $('head title').text(),
Expand Down
3 changes: 3 additions & 0 deletions lib/routes/sis001/namespace.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,7 @@ import type { Namespace } from '@/types';
export const namespace: Namespace = {
name: '第一会所',
url: 'sis001.com',
description: `:::tip
第一会所有多个备用网址,本路由默认使用\`https://sis001.com\`,若该网址无法访问,可以在部署实例的时候通过\`SIS001_BASE_URL\`环境变量配置要使用的地址,如\`https://www.sis001.com\`等)
:::`,
};

0 comments on commit ed8fa7a

Please sign in to comment.