From a77a2efdd77127fabf3544c192b572f2dbfcb7fe Mon Sep 17 00:00:00 2001 From: Theo Sun Date: Wed, 29 May 2024 20:50:59 +0800 Subject: [PATCH] feat: text detail --- package.json | 3 ++- src/index.mjs | 20 ++++++++++++++++++-- 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/package.json b/package.json index c4eb9dc..9b28c52 100644 --- a/package.json +++ b/package.json @@ -21,6 +21,7 @@ "dependencies": { "express": "^4.19.2", "puppeteer-core": "^22.9.0", - "turndown": "^7.1.3" + "turndown": "^7.1.3", + "undici": "^6.18.1" } } diff --git a/src/index.mjs b/src/index.mjs index 3c89fa4..6c590d0 100644 --- a/src/index.mjs +++ b/src/index.mjs @@ -2,6 +2,7 @@ import console from "console"; import express from "express"; import process from "process"; import puppeteer from "puppeteer-core"; +import { fetch } from "undici"; import { asyncExpressMiddleware, defaultUserAgent } from "./utils.mjs"; if (process.env.PW_REMOTE_URL === undefined) { @@ -27,12 +28,14 @@ app.get( }); const results = await page.$(".results"); const cards = await results.$$(".vrwrap"); + const refLinks = await Promise.all( - cards.map((card) => { - return card.evaluate((node) => { + cards.map(async (card) => { + const item = await card.evaluate((node) => { const linkEle = node.querySelector("h3 a"); if (!linkEle) return; const link = linkEle.href; + // get text const title = linkEle.innerText; const description = node.querySelector(".space-txt")?.innerText; @@ -44,6 +47,19 @@ app.get( img, }; }); + if (!item?.link) return; + const res = await fetch(process.env.TF_URL + "/extract", { + method: "POST", + headers: { + "Content-Type": "application/json", + "User-Agent": defaultUserAgent(), + }, + body: JSON.stringify({ url: item.link }), + }); + if (!res.ok) return item; + const data = await res.json(); + if (!data.text) return item; + return { ...item, text: data.text }; }), ); await browser.close();