-
Notifications
You must be signed in to change notification settings - Fork 0
/
markdown-readability.py
89 lines (73 loc) · 2.63 KB
/
markdown-readability.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
# SPDX-License-Identifier: CC0-1.0
# SPDX-FileCopyrightText: 2024 The Foundation for Public Code <info@publiccode.net>
# markdown-readability.py: experiment in checking readability of jekyll text
import io
import os
import re
import sys
import markdown
import textstat
# remove links
def unmark_element(element, stream=None):
if stream is None:
stream = io.StringIO()
if element.text:
stream.write(element.text)
for sub in element:
unmark_element(sub, stream)
if element.tail:
stream.write(element.tail)
return stream.getvalue()
# patch Markdown to add plaintext as an output format
markdown.Markdown.output_formats["plain"] = unmark_element
__md = markdown.Markdown(output_format="plain")
__md.stripTopLevelTags = False
def unmark(text):
return __md.convert(text)
# jekyll markdown has YAML "frontmatter" which precedes the actual markdown
def strip_frontmatter(filepath):
with open(filepath) as x:
raw_lines = x.readlines()
# strip "frontmatter"
front_matter_markers = 0
md_lines = []
for line in raw_lines:
if front_matter_markers < 2:
if line == "---\n":
front_matter_markers += 1
else:
md_lines.append(line)
md_text = ''.join(md_lines)
return md_text
def score_readability(filepath):
md_text = strip_frontmatter(filepath)
text = unmark(md_text)
# print(text)
print(filepath)
print("flesch_kincaid_grade: ", textstat.flesch_kincaid_grade(text))
print("gunning_fog: ", textstat.gunning_fog(text))
print("text_standard: ", textstat.text_standard(text))
# print("flesch_reading_ease: ", textstat.flesch_reading_ease(text))
# print("smog_index: ", textstat.smog_index(text))
# print("coleman_liau_index: ", textstat.coleman_liau_index(text))
# print("automated_readability_index: ", textstat.automated_readability_index(text))
# print("dale_chall_readability_score: ", textstat.dale_chall_readability_score(text))
# print("difficult_words: ", textstat.difficult_words(text))
# print("linsear_write_formula: ", textstat.linsear_write_formula(text))
# print("fernandez_huerta: ", textstat.fernandez_huerta(text))
# print("szigriszt_pazos: ", textstat.szigriszt_pazos(text))
# print("gutierrez_polini: ", textstat.gutierrez_polini(text))
# print("crawford: ", textstat.crawford(text))
# print("gulpease_index: ", textstat.gulpease_index(text))
# print("osman: ", textstat.osman(text))
markdown_dir = sys.argv[1];
if not os.path.isdir(markdown_dir):
print(f"usage: {sys.argv[0]} path/to/markdown/dir")
exit(1)
filelist = []
for f in os.listdir(markdown_dir):
if f.endswith(".md"):
filelist.append(markdown_dir + "/" + f)
for filepath in filelist:
print("\n")
score_readability(filepath)