diff --git a/src/index.mjs b/src/index.mjs index 8bf2fa8..bc749b0 100644 --- a/src/index.mjs +++ b/src/index.mjs @@ -1,13 +1,7 @@ import console from "console"; import express from "express"; import process from "process"; -import puppeteer from "puppeteer-core"; -import { fetch } from "undici"; -import { - asyncExpressMiddleware, - createCommonSearchAPI, - defaultUserAgent, -} from "./utils.mjs"; +import { createCommonSearchAPI } from "./utils.mjs"; if (process.env.PW_REMOTE_URL === undefined) { console.error("PW_REMOTE_URL is required"); @@ -18,56 +12,12 @@ const app = express(); app.get( "/sogou", - asyncExpressMiddleware(async (req, res) => { - const { search } = req.query; - const browser = await puppeteer.connect({ - browserWSEndpoint: process.env.PW_REMOTE_URL, - }); - const page = await browser.newPage(); - await page.setUserAgent(defaultUserAgent()); - await page.goto(`https://www.sogou.com/web?query=${search}`, { - waitUntil: "networkidle0", - timeout: 30_000, - referer: "https://www.sogou.com/", - }); - const results = await page.$(".results"); - const cards = await results.$$(".vrwrap"); - - const refLinks = await Promise.all( - cards.map(async (card) => { - const item = await card.evaluate((node) => { - const linkEle = node.querySelector("h3 a"); - if (!linkEle) return; - const link = linkEle.href; - - // get text - const title = linkEle.innerText; - const description = node.querySelector(".space-txt")?.innerText; - const img = node.querySelector("img")?.src; - return { - title, - link, - description, - img, - }; - }); - if (!item?.link) return; - const res = await fetch(process.env.TF_URL + "/extract", { - method: "POST", - headers: { - "Content-Type": "application/json", - "User-Agent": defaultUserAgent(), - }, - body: JSON.stringify({ url: item.link }), - }); - if (!res.ok) return item; - const data = await res.json(); - if (!data.text) return item; - return { ...item, text: data.text }; - }), - ); - await browser.close(); - return res.json(refLinks.filter(Boolean)); + createCommonSearchAPI({ + urlPrefix: "https://www.sogou.com/web?query=", + resultsItemSelector: ".results .vrwrap", + titleSelector: "h3 a", + linkSelector: "h3 a", + descriptionSelector: ".space-txt", }), ); diff --git a/src/utils.mjs b/src/utils.mjs index 62bf316..2436114 100644 --- a/src/utils.mjs +++ b/src/utils.mjs @@ -61,6 +61,10 @@ export function createCommonSearchAPI(options) { }; }, options); if (!item?.link) return; + if (!process.env.TF_URL) { + console.warn("TF_URL is not set, skipping text extraction"); + return item; + } const res = await fetch(process.env.TF_URL + "/extract", { method: "POST", headers: {