-
Notifications
You must be signed in to change notification settings - Fork 3
/
parser.js
56 lines (49 loc) · 1.45 KB
/
parser.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
'use strict';
const Wxr = require('wxr');
const $ = require('cheerio');
const path = require('path');
const fs = require('fs');
const imgUrlFix = 'http://read.html5.qq.com/image?src=forum&q=5&r=0&imgflag=7&imageUrl=';
const outputDir = path.join(__dirname, 'out');
const importer = new Wxr();
const files = [];
fs.readdirSync(outputDir).forEach((name) => {
if (!name.endsWith('.html')) {
return;
}
files.push(name);
const raw = fs.readFileSync(path.join(outputDir, name), 'utf-8');
const article = parseArticle(raw);
try {
importer.addPost({
title: name.slice(0, -5),
date: article.date,
contentEncoded: article.content,
});
} catch (ex) {
ex.articleTitle = name.slice(0, -5);
console.error(ex);
}
});
function parseArticle(raw) {
let obj = {};
let contentNode = $(raw).find('#js_content');
let postDate = $(raw).find('#post-date').text();
obj.date = postDate || '';
let imgs = [];
let imgNodes = contentNode.find('img');
for (let i = 0; i < imgNodes.length; i++) {
let img = imgNodes[i];
imgs.push(img.data['src']);
}
let contentStr = contentNode.html();
for (let img of imgs) {
let search = `data-src="${img}"`;
let replace = `src="${imgUrlFix + img}"`;
contentStr = contentStr.replace(search, replace);
}
obj.content = contentStr;
return obj;
}
fs.writeFileSync(path.join(__dirname, 'result.xml'), importer.stringify(), 'utf-8');
console.log(`${files.length} files done.`);