forked from twoscoops/two-scoops-of-django-1.8
-
Notifications
You must be signed in to change notification settings - Fork 0
/
getlinks.py
84 lines (61 loc) · 2.26 KB
/
getlinks.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
"""
Blogged in http://twoscoopspress.com/blogs/news/11935321-the-appendix-that-didnt-survive
This has been modified from the code in that blog example to handle the \linkdiff command.
"""
# getlinks.py
import glob
import re
# Regex to find all the links defined in LaTeX
regex = re.compile(r'\\link[a-z]{0,4}{.*}', re.IGNORECASE)
def strip_http_things(link):
"""Used to clean links for better sorting and for 'cleaner' final
display:
e.g. http://www.djangopackages.com is returned as djangopackages.com
"""
link = link.replace("https://", "").replace("http://", "")
return link.replace("www.", "")
def cleaned_url_comparison(a, b):
"""This custom sort ensures that the domain is the leading sort, not the
various protocol prefixes.
See http://docs.python.org/2/library/functions.html#sorted"""
a = strip_http_things(a)
b = strip_http_things(b)
if a > b:
return 1
if a == b:
return 0
return -1
def main():
tmp = [] # temporary holder for links
links = [] # results list
# Find all the LaTeX files in the chapters directory
for filename in glob.glob("chapters/*.tex"):
# Open the LaTeX file
with open(filename) as f:
text = f.read()
# Find all the links in the LaTeX file and add them to tmp
tmp += re.findall(regex, text)
for link in tmp:
if ':8000' not in link: # Ignore the links to port 8000
link = link.replace("www.2scoops.co", "2scoops.co")
links.append(link[0:link.index('}') + 1])
# Remove MOST of the duplicates
links = list(set(links))
# Sort the links. See cleaned_url_comparison() above.
links = sorted(links, cmp=cleaned_url_comparison)
return links
if __name__ == "__main__":
with open("links.html", "w") as f:
text = "<ul>\n"
for link in main():
# strip LaTeX elements
link = link.replace("\link{", "")
link = link.replace("\linkdiff{", "")
link = link.replace("}", "")
# Add to the unordered list
text += """ <li><a href="{}">{}</a></li>\n""".format(
link,
strip_http_things(link)
)
text += "</ul>\n"
f.write(text)