-
Notifications
You must be signed in to change notification settings - Fork 26
/
scrapper.js
89 lines (74 loc) · 1.91 KB
/
scrapper.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
var http = require('http');
var config = require(__dirname + '/config.js');
var parseXML = require('xml2js').parseString;
exports.scrap = function (feed, callback) {
http.get(feed.url, function (res) {
var body = "";
res.on('data', function (chunk) {
body += chunk;
});
res.on('end', function () {
// Got all response, now parsing...
if (!body || res.statusCode !== 200)
return callback({message: "Invalid Feed"});
parseXML(body, function (err, rss) {
if (err)
return callback({message: "Invalid Feed"});
feed = parseRSS(rss);
if (!feed)
feed = parseAtom(rss);
if (!feed)
return callback({message: "Invalid Feed"});
callback(err, feed);
});
});
}).on('error', function (error) {
console.log("error while getting feed", error);
callback(error, null);
});
}
var parseRSS = function (rss) {
try {
var items = [];
for (var i = 0; i < config.maxItems && i < rss.rss.channel[0].item.length - 1; i++) {
items.push({
title: rss.rss.channel[0].item[i].title[0],
link: rss.rss.channel[0].item[i].link[0],
description: rss.rss.channel[0].item[i].description[0]
})
};
var feed = {
name: rss.rss.channel[0].title,
description: rss.rss.channel[0].description,
link: rss.rss.channel[0].link,
items: items
};
return feed;
}
catch (e) { // If not all the fiels are inside the feed
return null;
}
}
var parseAtom = function (rss) {
try {
var items = [];
for (var i = 0; i < config.maxItems && i < rss.feed.entry.length - 1; i++) {
items.push({
title: rss.feed.entry[i].title[0]._,
link: rss.feed.entry[i].link[0].$.href,
description: rss.feed.entry[i].content[0]._
})
};
var feed = {
name: rss.feed.title,
description: "No description",
link: rss.feed.link[0].$.href,
items: items
};
return feed;
}
catch (e) { // If not all the fiels are inside the feed
console.log(e);
return null;
}
}