This repository has been archived by the owner on Nov 17, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
build_blog.py
109 lines (83 loc) · 3.3 KB
/
build_blog.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
import re
from types import SimpleNamespace
import unicodedata
import os
import yaml
import markdown
import jinja2
from jinja2 import Environment, FileSystemLoader
PATH_TO_CONFIG = './config.yaml'
# from https://github.com/waylan/docdata/blob/master/docdata/yamldata.py
METADATA_RE = re.compile(r'^-{3}[ \t]*\n(.*?\n)(?:\.{3}|-{3})[ \t]*\n', re.UNICODE|re.DOTALL)
Article = SimpleNamespace
def extract_article_data(source):
raw_metadata = METADATA_RE.match(source)
metadata = yaml.safe_load(raw_metadata.group(1))
pub_date = metadata.pop('publication date')
text = source[raw_metadata.end():].lstrip()
title = re.match(r'^#{1}([^#].*)\n?',text).group(1).strip()
return Article(title=title, pub_date=pub_date, text=text, metadata=metadata)
def render_markdown(text):
return markdown.markdown(text)
def render_article(title, body, pub_date, metadata):
env = Environment(
loader=FileSystemLoader('templates'),
)
template = env.get_template('article.html')
return template.render(title=title, body=body, pub_date=pub_date, **metadata)
def render_index(article_list):
# XXX environment could be created once
env = Environment(
loader=FileSystemLoader('templates'),
)
template = env.get_template('index.html')
article_list.sort(key = lambda x:x.pub_date, reverse=True)
return template.render(article_list=article_list)
# from https://stackoverflow.com/a/517974/3025740
def remove_accents(input_str):
nfkd_form = unicodedata.normalize('NFKD', input_str)
return u"".join([c for c in nfkd_form if not unicodedata.combining(c)])
def slugify(x):
xx = re.sub('\W', '-', x)
xxx = remove_accents(xx)
return xxx.lower()
def build_page(path_to_source):
with open(path_to_source) as f:
source = f.read()
article = extract_article_data(source)
body = render_markdown(article.text)
page = render_article(article.title, body, article.pub_date, article.metadata)
if "slug" in article.metadata:
slug = article.metadata["slug"]
else:
slug = slugify(article.title)
article.filename = str(article.pub_date) + '-' + slug + '.html'
article.href = article.filename
return (page, article)
def remove_all_html_files(path_to_dir):
for name in os.listdir(path_to_dir):
if name.endswith('.html'):
os.remove(path_to_dir+'/'+name)
def build_blog(path_to_sources, path_to_destination):
if not os.path.isdir(path_to_sources):
raise Exception('not a directory: "%s"' % path_to_sources)
remove_all_html_files(path_to_destination)
article_list = list()
for source_file in os.listdir(path_to_sources):
page, article = build_page(path_to_sources+'/'+source_file)
article_list.append(article)
with open(path_to_destination+'/'+article.filename, 'w') as f:
f.write(page)
index = render_index(article_list)
with open(path_to_destination+'/'+'index.html', 'w') as f:
f.write(index)
def get_config(path_to_config):
with open(path_to_config) as f:
config = dict()
for line in f:
(k, v) = line.strip().split(': ')
config[k] = v
return config
if __name__ == "__main__":
config = get_config(PATH_TO_CONFIG)
build_blog(config['path_to_source'], config['path_to_build'])