Skip to content

Commit

Permalink
fix(workers): Set a timeout on the screenshot call and completely ski…
Browse files Browse the repository at this point in the history
…p it if screenshotting is disabled
  • Loading branch information
MohamedBassem committed Nov 23, 2024
1 parent 378ad9b commit 6ccdbe5
Showing 1 changed file with 32 additions and 13 deletions.
45 changes: 32 additions & 13 deletions apps/workers/crawlerWorker.ts
Original file line number Diff line number Diff line change
Expand Up @@ -277,18 +277,31 @@ async function crawlPage(jobId: string, url: string) {

logger.info(`[Crawler][${jobId}] Finished waiting for the page to load.`);

const [htmlContent, screenshot] = await Promise.all([
page.content(),
page.screenshot({
// If you change this, you need to change the asset type in the store function.
type: "png",
encoding: "binary",
fullPage: serverConfig.crawler.fullPageScreenshot,
}),
]);
logger.info(
`[Crawler][${jobId}] Finished capturing page content and a screenshot. FullPageScreenshot: ${serverConfig.crawler.fullPageScreenshot}`,
);
const htmlContent = await page.content();
logger.info(`[Crawler][${jobId}] Successfully fetched the page content.`);

let screenshot: Buffer | undefined = undefined;
if (serverConfig.crawler.storeScreenshot) {
screenshot = await Promise.race<Buffer | undefined>([
page
.screenshot({
// If you change this, you need to change the asset type in the store function.
type: "png",
encoding: "binary",
fullPage: serverConfig.crawler.fullPageScreenshot,
})
.catch(() => undefined),
new Promise((f) => setTimeout(f, 5000)),
]);
if (!screenshot) {
logger.warn(`[Crawler][${jobId}] Failed to capture the screenshot.`);
} else {
logger.info(
`[Crawler][${jobId}] Finished capturing page content and a screenshot. FullPageScreenshot: ${serverConfig.crawler.fullPageScreenshot}`,
);
}
}

return {
htmlContent,
screenshot,
Expand Down Expand Up @@ -336,7 +349,7 @@ function extractReadableContent(
}

async function storeScreenshot(
screenshot: Buffer,
screenshot: Buffer | undefined,
userId: string,
jobId: string,
) {
Expand All @@ -346,6 +359,12 @@ async function storeScreenshot(
);
return null;
}
if (!screenshot) {
logger.info(
`[Crawler][${jobId}] Skipping storing the screenshot as it's empty.`,
);
return null;
}
const assetId = newAssetId();
const contentType = "image/png";
const fileName = "screenshot.png";
Expand Down

0 comments on commit 6ccdbe5

Please sign in to comment.