-
Notifications
You must be signed in to change notification settings - Fork 37
/
Copy pathtemplate.js
87 lines (68 loc) · 2.91 KB
/
template.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
export const config = {
// Specify the URL to start scraping from.
url: "https://example.com/",
// Enable rendering with headless browser. (default = false)
// browser: true,
// Specify if browser should be headless or not. (default = true)
// headless: false,
// Specify the multiple URLs to start scraping from. (default = [])
// urls: [
// "https://anothersite.com/",
// "https://yetanother.com/",
// ],
// Specify how deep links should be followed. (default = 0, no follow)
// depth: 5,
// Speficy the css selectors to follow. (default = ["a[href]"])
// follow: [".next > a", ".related a"],
// Specify the allowed domains. ['*'] for all. (default = domain from url)
// allowedDomains: ["example.com", "anothersite.com"],
// Specify the blocked domains. (default = none)
// blockedDomains: ["somesite.com"],
// Specify the allowed URLs as regex. (default = all allowed)
// allowedURLs: ["/posts", "/articles/\d+"],
// Specify the blocked URLs as regex. (default = none)
// blockedURLs: ["/admin"],
// Specify the rate in requests per minute. (default = no rate limit)
// rate: 60,
// Specify the number of concurrent requests. (default = no limit)
// concurrency: 1,
// Specify a single HTTP(S) proxy URL. (default = no proxy)
// Note: Not compatible with browser mode.
// proxy: "http://someproxy.com:8043",
// Specify multiple HTTP(S) proxy URLs. (default = no proxy)
// Note: Not compatible with browser mode.
// proxies: [
// "http://someproxy.com:8043",
// "http://someotherproxy.com:8043",
// ],
// Enable file-based request caching. (default = no cache)
// cache: "file",
// Specify the HTTP request header. (default = none)
// headers: {
// "Authorization": "Bearer ...",
// "User-Agent": "Mozilla ...",
// },
// Use the cookie store of your local browser. (default = off)
// Options: "chrome" | "edge" | "firefox"
// cookies: "chrome",
// Specify the output options.
// output: {
// // Specify the output file. (default = stdout)
// file: "results.json",
//
// // Specify the output format. (default = json)
// // Options: "json" | "ndjson"
// format: "json",
// },
};
export default function({ doc, absoluteURL }) {
const title = doc.find("h1");
const link = doc.find("a");
return {
title: title.text(),
link: {
text: link.text(),
url: absoluteURL(link.attr("href")),
},
};
}