-
Notifications
You must be signed in to change notification settings - Fork 1
/
instrumentation.ts
147 lines (140 loc) · 5.72 KB
/
instrumentation.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
import prisma from "@/lib/prisma";
import { startLocationScraping } from "@/scrapper/location-scrapping";
import { startPackageScraping } from "@/scrapper/package-scrapping";
import { startFlightScraping } from "@/scrapper/flights-scraping";
import { startHotelScraping } from "@/scrapper/hotels-scrapping";
const SBR_WS_ENDPOINT = process.env.SBR_WS_ENDPOINT;
export async function register() {
//This if statement is important, read here: https://nextjs.org/docs/app/building-your-application/optimizing/instrumentation
if (process.env.NEXT_RUNTIME === "nodejs") {
const { Worker } = await import("bullmq");
const puppeteer = await import("puppeteer");
const { connection } = await import("@/lib/redis");
const { importQueue } = await import("@/lib/queue");
new Worker(
"importQueue",
async (job) => {
console.log("Connecting to Scraping Browser...");
try {
const browser = await puppeteer.connect({
browserWSEndpoint: SBR_WS_ENDPOINT,
});
const page = await browser.newPage();
if (job.data.jobType.type === "location") {
console.log("Connected! Navigating to " + job.data.url);
await page.goto(job.data.url, { timeout: 120000 });
console.log("Navigated! Scraping page content...");
const packages = await startLocationScraping(page);
await prisma.jobs.update({
where: { id: job.data.id },
data: { isComplete: true, status: "complete" },
});
for (const pkg of packages) {
const jobCreated = await prisma.jobs.findFirst({
where: {
url: `https://packages.yatra.com/holidays/intl/details.htm?packageId=${pkg?.id}`,
},
});
if (!jobCreated) {
const job = await prisma.jobs.create({
data: {
url: `https://packages.yatra.com/holidays/intl/details.htm?packageId=${pkg?.id}`,
jobType: { type: "package" },
},
});
importQueue.add("package", { ...job, packageDetails: pkg });
}
}
} else if (job.data.jobType.type === "package") {
const alreadyScrapped = await prisma.trips.findUnique({
where: { id: job.data.packageDetails.id },
});
if (!alreadyScrapped) {
console.log("Connected! Navigating to " + job.data.url);
await page.goto(job.data.url, { timeout: 120000 });
console.log("Navigated! Scraping page content...");
const pkg = await startPackageScraping(
page,
job.data.packageDetails
);
// @ts-ignore
await prisma.trips.create({ data: pkg });
await prisma.jobs.update({
where: { id: job.data.id },
data: { isComplete: true, status: "complete" },
});
}
} else if (job.data.jobType.type === "flight") {
console.log("in flight scraping");
console.log("Connected! Navigating to " + job.data.url);
await page.goto(job.data.url, { timeout: 120000 });
console.log("Navigated! Scraping page content...");
const flights = await startFlightScraping(page);
await prisma.jobs.update({
where: { id: job.data.id },
data: { isComplete: true, status: "complete" },
});
for (const flight of flights) {
await prisma.flights.create({
data: {
name: flight.airlineName,
logo: flight.airlineLogo,
from: job.data.jobType.source,
to: job.data.jobType.destination,
departureTime: flight.departureTime,
arrivalTime: flight.arrivalTime,
duration: flight.flightDuration,
price: flight.price,
jobId: job.data.id,
},
});
}
} else if (job.data.jobType.type === "hotels") {
console.log("Connected! Navigating to " + job.data.url);
await page.goto(job.data.url, { timeout: 120000 });
console.log("Navigated! Scraping page content...");
const hotels = await startHotelScraping(
page,
browser,
job.data.location
);
console.log(`Scraping Complete, ${hotels.length} hotels found.`);
await prisma.jobs.update({
where: { id: job.data.id },
data: { isComplete: true, status: "complete" },
});
console.log("Job Marked as complete.");
console.log("Starting Loop for Hotels");
for (const hotel of hotels) {
await prisma.hotels.create({
data: {
name: hotel.title,
image: hotel.photo,
price: hotel.price,
jobId: job.data.id,
location: job.data.location.toLowerCase(),
},
});
console.log(`${hotel.title} inserted in DB.`);
}
console.log("COMPLETE.");
}
} catch (error) {
console.log(error);
await prisma.jobs.update({
where: { id: job.data.id },
data: { isComplete: true, status: "failed" },
});
} finally {
console.log("Browser closed successfully");
}
},
{
connection,
concurrency: 10,
removeOnComplete: { count: 1000 },
removeOnFail: { count: 5000 },
}
);
}
}