-
Notifications
You must be signed in to change notification settings - Fork 20
/
index.js
124 lines (107 loc) · 2.96 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
var request = require('request');
var cheerio = require('cheerio');
var queryString = require('querystring');
var flatten = require('lodash.flatten');
// var fs = require('fs');
var baseURL = 'http://images.google.com/search?';
var imageFileExtensions = ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.svg'];
function gis(opts, done) {
var searchTerm;
var queryStringAddition;
var filterOutDomains = ['gstatic.com'];
var userAgent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:109.0) Gecko/20100101 Firefox/117.0';
if (typeof opts === 'string') {
searchTerm = opts;
} else {
searchTerm = opts.searchTerm;
queryStringAddition = opts.queryStringAddition;
if (opts.filterOutDomains) {
filterOutDomains = filterOutDomains.concat(opts.filterOutDomains);
}
if (opts.userAgent) {
userAgent = opts.userAgent;
}
}
var url =
baseURL +
queryString.stringify({
tbm: 'isch',
q: searchTerm
});
if (filterOutDomains) {
url += encodeURIComponent(
' ' + filterOutDomains.map(addSiteExcludePrefix).join(' ')
);
}
if (queryStringAddition) {
url += queryStringAddition;
}
var reqOpts = {
url,
headers: {
'User-Agent': userAgent
}
};
// console.log(reqOpts.url);
request(reqOpts, parseGISResponse);
function parseGISResponse(error, response, body) {
if (error) {
done(error);
return;
}
var $ = cheerio.load(body);
var scripts = $('script');
var scriptContents = [];
for (var i = 0; i < scripts.length; ++i) {
if (scripts[i].children.length > 0) {
const content = scripts[i].children[0].data;
if (containsAnyImageFileExtension(content)) {
scriptContents.push(content);
}
}
}
done(error, flatten(scriptContents.map(collectImageRefs)));
function collectImageRefs(content) {
var refs = [];
var re = /\["(http.+?)",(\d+),(\d+)\]/g;
var result;
while ((result = re.exec(content)) !== null) {
if (result.length > 3) {
let ref = {
url: result[1],
width: +result[3],
height: +result[2]
};
if (domainIsOK(ref.url)) {
refs.push(ref);
}
}
}
// if (refs.length < 1) {
// fs.writeFileSync('content.txt', content, { encoding: 'utf8' });
// }
return refs;
}
function domainIsOK(url) {
if (!filterOutDomains) {
return true;
} else {
return filterOutDomains.every(skipDomainIsNotInURL);
}
function skipDomainIsNotInURL(skipDomain) {
return url.indexOf(skipDomain) === -1;
}
}
}
}
function addSiteExcludePrefix(s) {
return '-site:' + s;
}
function containsAnyImageFileExtension(s) {
var lowercase = s.toLowerCase();
return imageFileExtensions.some(containsImageFileExtension);
function containsImageFileExtension(ext) {
return lowercase.includes(ext);
}
}
module.exports = gis;