Skip to content

Commit

Permalink
chore: update
Browse files Browse the repository at this point in the history
  • Loading branch information
Soontao committed May 29, 2024
1 parent 681fe37 commit b4e4a69
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 57 deletions.
64 changes: 7 additions & 57 deletions src/index.mjs
Original file line number Diff line number Diff line change
@@ -1,13 +1,7 @@
import console from "console";
import express from "express";
import process from "process";
import puppeteer from "puppeteer-core";
import { fetch } from "undici";
import {
asyncExpressMiddleware,
createCommonSearchAPI,
defaultUserAgent,
} from "./utils.mjs";
import { createCommonSearchAPI } from "./utils.mjs";

if (process.env.PW_REMOTE_URL === undefined) {
console.error("PW_REMOTE_URL is required");
Expand All @@ -18,56 +12,12 @@ const app = express();

app.get(
"/sogou",
asyncExpressMiddleware(async (req, res) => {
const { search } = req.query;
const browser = await puppeteer.connect({
browserWSEndpoint: process.env.PW_REMOTE_URL,
});
const page = await browser.newPage();
await page.setUserAgent(defaultUserAgent());
await page.goto(`https://www.sogou.com/web?query=${search}`, {
waitUntil: "networkidle0",
timeout: 30_000,
referer: "https://www.sogou.com/",
});
const results = await page.$(".results");
const cards = await results.$$(".vrwrap");

const refLinks = await Promise.all(
cards.map(async (card) => {
const item = await card.evaluate((node) => {
const linkEle = node.querySelector("h3 a");
if (!linkEle) return;
const link = linkEle.href;

// get text
const title = linkEle.innerText;
const description = node.querySelector(".space-txt")?.innerText;
const img = node.querySelector("img")?.src;
return {
title,
link,
description,
img,
};
});
if (!item?.link) return;
const res = await fetch(process.env.TF_URL + "/extract", {
method: "POST",
headers: {
"Content-Type": "application/json",
"User-Agent": defaultUserAgent(),
},
body: JSON.stringify({ url: item.link }),
});
if (!res.ok) return item;
const data = await res.json();
if (!data.text) return item;
return { ...item, text: data.text };
}),
);
await browser.close();
return res.json(refLinks.filter(Boolean));
createCommonSearchAPI({
urlPrefix: "https://www.sogou.com/web?query=",
resultsItemSelector: ".results .vrwrap",
titleSelector: "h3 a",
linkSelector: "h3 a",
descriptionSelector: ".space-txt",
}),
);

Expand Down
4 changes: 4 additions & 0 deletions src/utils.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,10 @@ export function createCommonSearchAPI(options) {
};
}, options);
if (!item?.link) return;
if (!process.env.TF_URL) {
console.warn("TF_URL is not set, skipping text extraction");
return item;
}
const res = await fetch(process.env.TF_URL + "/extract", {
method: "POST",
headers: {
Expand Down

0 comments on commit b4e4a69

Please sign in to comment.