-
Notifications
You must be signed in to change notification settings - Fork 0
/
scrapFashion.js
68 lines (59 loc) · 1.98 KB
/
scrapFashion.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import puppeteer from "puppeteer";
import postToGhost from "./ghost.js";
export default async function ScrapData(
url,
IdClassesTags,
articletitle,
articlebody,
articleimage
) {
const browser = await puppeteer.launch({ headless: true });
const page = await browser.newPage();
await page.goto(url, {
waitUntil: "load",
// Remove the timeout
timeout: 0,
});
const resultsSelector = IdClassesTags;
const articleTitle = articletitle;
const articleBody = articlebody;
const articleImage = articleimage;
const links = await page.evaluate((resultsSelector) => {
return [...document.querySelectorAll(resultsSelector)].map((anchor) => {
const title = anchor.textContent.split("|")[0].trim();
return `${anchor.href}`;
});
}, resultsSelector);
if ((links !== null) | undefined) {
await page.goto(links[8], {
waitUntil: "load",
// Remove the timeout
timeout: 0,
});
// Full article title
const fullArticleTitle = await page.evaluate((articleTitle) => {
return [...document.querySelectorAll(articleTitle)].map((anchor) => {
const title = anchor.textContent.split("|")[0].trim();
return `${title}`;
});
}, articleTitle);
//Full article body
const fullArticleBody = await page.evaluate((articleBody) => {
return [...document.querySelectorAll(articleBody)].map((anchor) => {
const title = anchor.innerHTML.split("|")[0].trim();
return `${title}`;
});
}, articleBody);
// Full article image
const fullArticleImage = await page.evaluate((articleImage) => {
return [...document.querySelectorAll(articleImage)].map((anchor) => {
const image = anchor.getAttribute("src");
return `${image}`;
});
}, articleImage);
const status = "published";
const publishBody = fullArticleBody[0];
postToGhost(fullArticleImage[0], fullArticleTitle[0], publishBody, status);
console.log(fullArticleTitle + " has been published");
}
}