From 6ccdbe59d70057b18b0752f6becb371a14e39424 Mon Sep 17 00:00:00 2001 From: Mohamed Bassem Date: Sat, 23 Nov 2024 14:20:17 +0000 Subject: [PATCH] fix(workers): Set a timeout on the screenshot call and completely skip it if screenshotting is disabled --- apps/workers/crawlerWorker.ts | 45 +++++++++++++++++++++++++---------- 1 file changed, 32 insertions(+), 13 deletions(-) diff --git a/apps/workers/crawlerWorker.ts b/apps/workers/crawlerWorker.ts index 2dad98b7..376e50ea 100644 --- a/apps/workers/crawlerWorker.ts +++ b/apps/workers/crawlerWorker.ts @@ -277,18 +277,31 @@ async function crawlPage(jobId: string, url: string) { logger.info(`[Crawler][${jobId}] Finished waiting for the page to load.`); - const [htmlContent, screenshot] = await Promise.all([ - page.content(), - page.screenshot({ - // If you change this, you need to change the asset type in the store function. - type: "png", - encoding: "binary", - fullPage: serverConfig.crawler.fullPageScreenshot, - }), - ]); - logger.info( - `[Crawler][${jobId}] Finished capturing page content and a screenshot. FullPageScreenshot: ${serverConfig.crawler.fullPageScreenshot}`, - ); + const htmlContent = await page.content(); + logger.info(`[Crawler][${jobId}] Successfully fetched the page content.`); + + let screenshot: Buffer | undefined = undefined; + if (serverConfig.crawler.storeScreenshot) { + screenshot = await Promise.race([ + page + .screenshot({ + // If you change this, you need to change the asset type in the store function. + type: "png", + encoding: "binary", + fullPage: serverConfig.crawler.fullPageScreenshot, + }) + .catch(() => undefined), + new Promise((f) => setTimeout(f, 5000)), + ]); + if (!screenshot) { + logger.warn(`[Crawler][${jobId}] Failed to capture the screenshot.`); + } else { + logger.info( + `[Crawler][${jobId}] Finished capturing page content and a screenshot. FullPageScreenshot: ${serverConfig.crawler.fullPageScreenshot}`, + ); + } + } + return { htmlContent, screenshot, @@ -336,7 +349,7 @@ function extractReadableContent( } async function storeScreenshot( - screenshot: Buffer, + screenshot: Buffer | undefined, userId: string, jobId: string, ) { @@ -346,6 +359,12 @@ async function storeScreenshot( ); return null; } + if (!screenshot) { + logger.info( + `[Crawler][${jobId}] Skipping storing the screenshot as it's empty.`, + ); + return null; + } const assetId = newAssetId(); const contentType = "image/png"; const fileName = "screenshot.png";