forked from CodeforLeipzig/wo-ist-markt-scraper
-
Notifications
You must be signed in to change notification settings - Fork 0
/
scraper.js
62 lines (55 loc) · 2.29 KB
/
scraper.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
const cheerio = require('cheerio')
const fs = require('fs')
exports.scrape = function (content) {
//const $ = cheerio.load(fs.readFileSync(url))
const $ = cheerio.load(content)
const table = $('#tabborder_21351').find("div > table").first()
const weekDayLit = { 'Montag': 'Mo', 'Dienstag': 'Tu', 'Mittwoch': 'We',
'Donnerstag': 'Th', 'Freitag': 'Fr', 'Samstag': 'Sa', 'Sonntag': 'Su' }
const weekDayMap = new Map(Object.entries(weekDayLit))
const config = []
table.find('tr > td[class="td-0"]').each((index, td) => handleName(config, td))
table.find('tr > td[class="td-1"]').each((index, td) => handleWeekDays(config, td, index, weekDayMap))
table.find('tr > td[class="td-last td-2"]').each((index, td) => handleOpeningHours(config, td, index))
const configObj = {
markets: config
}
fs.writeFileSync('./config.json', JSON.stringify(configObj, null, 2), 'utf-8')
}
const handleName = function(config, td) {
config.push({
name: td.children[0].data.trim()
})
}
const handleWeekDays = function(config, td, index, weekDayMap) {
const weekDayStr = td.children[0].data.trim()
const weekDays = weekDayStr ? weekDayStr.split(' und ') : [ 'Niemals' ]
const market = config[index]
market.openingHours = weekDays.map(wd => weekDayMap.get(wd))
}
const handleOpeningHours = function(config, td, index) {
const market = config[index]
const weekDays = market.openingHours
const hourRangesStr = td.children[0].data.trim()
const hourRangeStrs = hourRangesStr ? hourRangesStr.split(' und ') : [ 'Niemals' ]
const hourRanges = hourRangeStrs.map(str => str.replace(' Uhr', '').split(' bis '))
var openingHoursStr = ''
if(hourRanges.length == 1) {
for (index in weekDays) {
if(index > 0) {
openingHoursStr += ', '
}
openingHoursStr += weekDays[index];
}
openingHoursStr += ' ' + hourRanges[0].join('-')
} else {
for (index in weekDays) {
if(index > 0) {
openingHoursStr += '; '
}
openingHoursStr += weekDays[index] + ' '
+ hourRanges[index >= hourRanges.length ? hourRanges.length - 1 : index].join('-')
}
}
market.openingHours = openingHoursStr
}