This repository has been archived by the owner on Mar 25, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
index.js
101 lines (60 loc) · 1.9 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
var table_name = 'documents'
var fs = require('fs')
var url = require('url')
var store = require('./store')
var scraper = require('./scraper')
var indexer = require('./indexer')
var log = require('./logger')
// Load backlog to store
log.load()
// Set up data dump file
iso_date = new Date()
store.data_filename = 'data'+iso_date+'.sql'
sql_insert_query = "INSERT INTO "+table_name+" () VALUES "
try {
fs.writeFileSync(store.data_filename, sql_insert_query)
}
catch(e) {
console.log(e)
process.exit(1)
}
// Write to dump file when scraper queue gets empty
// Restart the process
scraper.on('drain',function(){
fs.appendFile('message.txt', JSON.stringify(store.documents, null, '\t'), function(err) {
if (err) throw err
console.log('Data File: Write Complete')
})
console.log(store.backlog.last.doc)
})
indexer.on('drain', function() {
})
/** Queue Pages **/
start_indexer()
/** Queue Certificates **/
// Pass uri_list
scraper.queue(['http://www.adene.pt/sce/certificados/SCE0000144093728', 'http://www.adene.pt/sce/certificados/DCR0000001000809'])
/** On Exit **/
process.on('exit', function(code) {
console.log('\n## EXITING - Code '+code+' ##')
// Save from store
log.save()
});
var lib = {
get_index_uri: function(page_number) {
return constants.index.uri+'page='+page_number+'&'+constants.index.querystring
},
start_indexer: function(page_number, offset) {
page_list = []
for(i=page_number; i<offset; i++) {
page_list.push(get_index_uri(i))
}
indexer.queue(page_list)
}
}
var constants = {
index: {
uri: 'http://www.adene.pt/sce/micro/certificados-energeticos?',
querystring: 'tipo_cert=Todos&tipo_ed=Todos&morada=&concelho=all&distrito=all&freguesia=all&conservatoria=&conservatoria_nr=&artigo=&fracao=&numero=&op=Pesquisar&form_build_id=form-qpN7d8_HPQqSQJGhFxB024FLI8tBZLX_naofWt_Mwlo&form_id=certificados_webservice_form'
}
}