-
Notifications
You must be signed in to change notification settings - Fork 4
/
mirror_hackage.py
148 lines (113 loc) · 3.83 KB
/
mirror_hackage.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
# requires python-requests.org
import sys
import requests
import os
import hashlib
import traceback
if len(sys.argv) < 2:
sys.stderr.write("error: require path to mirror base as first argument")
raise SystemExit(1)
base = sys.argv[1]
os.chdir(base)
def mkdir_p(d):
if not os.path.isdir(d):
os.mkdir(d)
mkdir_p("archive")
mkdir_p("archive/package")
mkdir_p("index")
if len(sys.argv) > 2:
url_base = sys.argv[2]
if not url_base.endswith("/"):
url_base += "/"
else:
url_base = "http://hackage.haskell.org/packages/archive/"
# Thanks stackoverflow..
class BColors:
HEADER = '\033[95m'
OKBLUE = '\033[94m'
OKGREEN = '\033[92m'
WARNING = '\033[93m'
FAIL = '\033[91m'
ENDC = '\033[0m'
def start_line(l):
sys.stderr.write(l.ljust(60) + "...")
sys.stderr.flush()
def end_okay():
sys.stderr.write(BColors.OKGREEN + "[OK]".rjust(10) + BColors.ENDC + "\n")
def end_finished():
sys.stderr.write(BColors.OKBLUE + "[DONE]".rjust(10) + BColors.ENDC + "\n")
def end_starting():
sys.stderr.write(BColors.WARNING + "[START]".rjust(10) + BColors.ENDC + "\n")
def end_fail(why):
sys.stderr.write(BColors.FAIL + "[FAIL]".rjust(10) + BColors.ENDC + "\n -> " + why + "\n")
raise SystemExit(1)
def information(i):
sys.stderr.write(BColors.WARNING + " --> " + i + "\n" + BColors.ENDC)
start_line("[1] Downloading log")
resp = requests.get(url_base + "log")
if resp.status_code != 200:
end_fail("log download got HTTP status code: %d" % resp.status_code)
log_data = resp.content.splitlines()
end_okay()
mark = open("_mark", "r").read().strip() if os.path.isfile("_mark") else "(missing)"
start_line("[2] Scanning for %r" % mark)
pos = -1
def make_log_checksums():
global pos # woot side efffects!
csum = ""
for x, l in enumerate(log_data):
csum = (hashlib.sha1(csum + l).hexdigest())
if csum == mark:
assert pos == -1, "wtfbbq?"
pos = x
yield l, csum
steps = list(make_log_checksums())
pos += 1
todo = len(log_data) - pos
end_okay()
information("%d new uploads to mirror" % todo)
# Mirror each
start_line("[3] Mirroring new packages")
end_starting()
def mirror_package(full, package, version):
# 1. Make directory
mkdir_p("index/%s" % package)
mkdir_p("index/%s/%s" % (package, version))
cabal_r = requests.get(url_base + "%s/%s/%s.cabal" % (package, version, package))
if cabal_r.status_code == 404:
information("Package %s seems to have been removed! Skipping..." % full)
return
assert cabal_r.status_code == 200, "non-200 from cabal download"
with open("index/%s/%s/%s.cabal" % (package, version, package), "w") as f:
f.write(cabal_r.content)
data_r = requests.get(url_base + "%s/%s/%s.tar.gz" % (package, version, full))
assert data_r.status_code == 200, "non-200 from tarball download"
with open("archive/package/%s.tar.gz" % (full,), "w") as f:
f.write(data_r.content)
for x, (pline, csum) in enumerate(steps[pos:]):
# Download all we need..
package, version = pline.split()[-2:]
fullpack = "%s-%s" % (package, version)
# Actually mirror
start_line(" %s (%d/%d)" % (fullpack, x+1, todo))
try:
mirror_package(fullpack, package, version)
except:
information("Exception during mirror:\n%s" % traceback.format_exc())
end_fail("Aborting due to exception")
else:
end_okay()
# Update bookmark at each stage
# (paranoid: atomic rename)
with open("_temp_mark", "w") as f:
f.write(csum)
os.rename("_temp_mark", "_mark")
start_line(" Mirroring new packages")
end_finished()
start_line("[4] Building index")
end_starting()
os.chdir("index")
os.system("tar cfz 00-index.tar.gz *")
os.rename("00-index.tar.gz", "../archive/00-index.tar.gz")
start_line(" Building index")
end_finished()