-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgeo-rep-status-compare.py
executable file
·367 lines (318 loc) · 12.1 KB
/
geo-rep-status-compare.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
#!/usr/bin/python
# $Id$
import sys,os,re
from datetime import datetime, timedelta
import string
def usage():
print "Usage:\n" + sys.argv[0] + " {-h | --help} | [-d] {layout} {georep_status_f1} {georep_status_f2}"
print "\n=== Gluster geo-replication status check comparison tool ==="
print "* Parse output for two geo-replication status reports"
print "* Report the time difference between the two, as well as the progress rates"
print "* Report any errors identified"
print "\nArguments:"
print " -h|--help Display this help text"
print " -d[dbg_level] Enable debugging"
print " {layout} Volume layout (i.e. 4x3)"
print " {georep_status_f1/2} Files containing geo-replication status output for a single session"
print " - time is taken either from modification time of file, or from single"
print " line timestamp of format yyyy-mm-dd hh:mm:ss on a line in file"
print "--- v1.0 --- pmladd@gmail.com"
#####################
# Argument processing
#####################
args=sys.argv[1:]
if len(args) < 3 or args[0] == "-h" or args[0] == "--help":
usage()
exit(0)
dbgarg = re.match("^-d(\d*)", args[0])
dbg_lvl = 0
if dbgarg:
if dbgarg.group(1):
dbg_lvl = int(dbgarg.group(1))
else:
dbg_lvl = 1
args=args[1:]
if len(args) != 3:
usage()
exit(1)
layout = args[0].split('x');
args=args[1:]
######################
# Print debug messages
######################
def debug(*args):
if dbg_lvl >= args[0]:
for i in args[1:]:
print i,
print
######################################
# Convert timestamp to datetime object
######################################
def ts_to_datetime(ts):
if ts == "N/A":
return None
else:
# tstamp_line = re.compile("^\s*(\d{4})-(\d{2})-(\d{2})\s+(\d{2}):(\d{2}):(\d{2})\s*$")
# return datetime(int(time_match.group(1)),int(time_match.group(2)),int(time_match.group(3)),int(time_match.group(4)),int(time_match.group(5)),int(time_match.group(6)))
return datetime.strptime(ts, "%Y-%m-%d %H:%M:%S")
###################################
# Read through status file
# Return parsed array and timestamp
###################################
def parse_file(f):
parsed_file = []
blank_line = re.compile("^\s*$")
header_line = re.compile("^\s*MASTER NODE\s+MASTER VOL\s+MASTER BRICK\s+SLAVE USER\s+SLAVE\s+SLAVE NODE\s+STATUS\s+CRAWL STATUS\s+LAST_SYNCED\s+$")
sep_line = re.compile("^\s*-+\s*$")
tstamp_line = re.compile("^\s*(\d{4})-(\d{2})-(\d{2})\s+(\d{2}):(\d{2}):(\d{2})\s*$")
status_line = re.compile("^\s*\
(?P<master_node>\S+)\s+\
(?P<master_vol>\S+)\s+\
(?P<master_brick>\S+)\s+\
(?P<slave_user>\S+)\s+\
(?P<slave>\S+)\s+\
(?P<slave_node>\S+)\s+\
(?P<status>Active|Passive|Faulty|Stopped|Intitializing...)\s+\
(?P<crawl_status>N/A|Changelog Crawl|History Crawl|Hybrid Crawl)\s+\
(?P<last_synced>(N/A)|(\d{4})-(\d{2})-(\d{2})\s+(\d{2}):(\d{2}):(\d{2}))\s*$")
file_time = datetime.fromtimestamp(os.path.getmtime(f.name))
for line in f:
debug(4,line)
if blank_line.match(line):
debug(3,"blank line")
continue
if header_line.match(line):
debug(3, "header line")
continue
if sep_line.match(line):
debug(3, "sep line")
continue
time_match = tstamp_line.match(line)
if time_match != None:
debug(3, "time line")
file_time = ts_to_datetime(string.strip(line))
continue
parsed_line = status_line.match(line)
if parsed_line == None:
print "Unable to parse line:\n"+line
exit(2)
else:
debug(3, "status line: ", parsed_line.groupdict())
parsed_file.append(parsed_line.groupdict())
return parsed_file,file_time
#######################
# Build dict of volumes
#######################
def volumes_for(lines):
volumes = {}
for line in lines:
volume = line['master_vol']
if not volume in volumes:
volumes[volume] = [ line ]
else:
newval = volumes[volume] + [line]
volumes[volume] = newval
return volumes
######################
# Build dict of sessions
######################
def sessions_for_volume(lines):
sessions = {}
for line in lines:
slave = line['slave']
if not slave in sessions:
sessions[slave] = [line]
else:
newval = sessions[slave] + [line]
sessions[slave] = newval
return sessions
######################
# Build dict of masters
######################
def masters_for_session(lines):
masters = {}
for line in lines:
master = (line['master_node'], line['master_brick'])
if not master in masters:
masters[master] = line
else:
print "[E] **** Error: duplicate master/brick combination!"
print " Master: ", line['master_node'], " Brick: ", line['master_brick']
exit(5)
return masters
######################
# Print node counts
######################
def print_nodereport():
print "\n===== Nodes ====="
print "Subvolumes: ", subvol_count
print "Replication factor: ", replica_count
print "Total bricks: ", brick_count
print "now # nodes: ", now_nodecount
print "prev # nodes: ", be4_nodecount
noderpt_printed = True
######################
# Print volume details
######################
def print_volume(volume, lines):
print "Volume ", volume, ": ============"
sessions = sessions_for_volume(lines)
for session, lines in sessions.items():
print "Session: ", session
masters = masters_for_session(lines)
for master, line in sorted(masters.items()):
print "\tNode: ", line['master_node'], \
" ||| Brick:", line['master_brick'],\
" ||| Slave:", line['slave_node'], \
" ||| Status:", line['status'], \
" ||| Crawl Status:", line['crawl_status']
#, "||| Last Sync: ", line['last_synced']
########################
# Main line code
########################
f1 = open(args[0])
f1_parsed = parse_file(f1)
debug(2,f1_parsed)
f2 = open(args[1])
f2_parsed = parse_file(f2)
debug(2,f2_parsed)
# Time calculations
print "===== Times ====="
elapsed_time = abs(f1_parsed[1] - f2_parsed[1])
print "F1 time: ", f1_parsed[1]
print "F2 time: ", f2_parsed[1]
print "Elapsed time: ", elapsed_time
now_file = None
now_parsed = None
now_time = None
be4_file = None
be4_parsed = None
be4_time = None
if f1_parsed[1] > f2_parsed[1]:
now_file = f1
now_parsed = f1_parsed
now_time = f1_parsed[1]
be4_file = f2
be4_parsed = f2_parsed
be4_time = f2_parsed[1]
elif f1_parsed[1] < f2_parsed[1]:
now_file = f2
now_parsed = f2_parsed
now_time = f2_parsed[1]
be4_file = f1
be4_parsed = f1_parsed
be4_time = f1_parsed[1]
else:
"[E] Times on both files are the same. Comparison not possible"
exit(6)
#############
# Node checks
#############
# Note: These checks assume that there is only one session contained in both files
# Another way to get the volume topology other than as a command line argument
# will be needed if this code is enhanced to handle multiple volumes or sessions
subvol_count=int(layout[0])
replica_count=int(layout[1])
brick_count = subvol_count * replica_count
expected_active_count=subvol_count
expected_passive_count=brick_count - subvol_count
now_nodecount = len(now_parsed[0])
be4_nodecount = len(be4_parsed[0])
node_errors = ""
if now_nodecount != be4_nodecount:
node_errors += "[W] **** Warning: node counts not equal ****\n"
if now_nodecount != brick_count:
node_errors += "[E] **** Error: now nodecount not equal to brick count ****\n"
if be4_nodecount != brick_count:
node_errors += "[W] **** Warning: previous nodecount not equal to brick count ****\n"
if len(node_errors) > 0 or dbg_lvl >= 1:
print_nodereport()
print node_errors
# Session details
now_volumes = volumes_for(now_parsed[0])
if dbg_lvl >= 1:
print "\n===== Sessions ====="
for volume, lines in now_volumes.items():
print_volume(volume, lines)
be4_volumes = volumes_for(be4_parsed[0])
if dbg_lvl >= 1:
for volume, lines in be4_volumes.items():
print_volume(volume, lines)
####################
# Correlate Sessions
####################
if len(now_volumes.keys()) != 1 or len(be4_volumes.keys()) != 1:
print "Correlating sessions with multiple volumes not currently supported"
exit(2)
now_volume, now_vol_lines = now_volumes.items()[0]
be4_volume, be4_vol_lines = be4_volumes.items()[0]
debug(2, "volumes: ", now_volume, be4_volume)
if now_volume != be4_volume:
print "[E] Status files contain sessions from different volumes"
exit(3)
now_sessions = sessions_for_volume(now_vol_lines)
be4_sessions = sessions_for_volume(be4_vol_lines)
if len(now_sessions.keys()) != 1 or len(now_sessions.keys()) != 1:
print "Correlating sessions with multiple sessions is not currently supported"
exit(2)
now_session, now_session_lines = now_sessions.items()[0]
be4_session, be4_session_lines = be4_sessions.items()[0]
debug(2, "slave sessions: ", now_session, be4_session)
if now_session != be4_session:
print "[E] Status files contain sessions to different slaves"
exit(4)
now_masters = masters_for_session(now_session_lines)
be4_masters = masters_for_session(be4_session_lines)
debug(3, "now masters: ", now_masters.keys())
debug(3, "be4 masters: ", be4_masters.keys())
now_actives = {}
be4_actives = {}
now_passives = {}
be4_passives = {}
now_others = {}
be4_others = {}
# Separate out by type
for master, line in now_masters.items():
if line['status'] == "Active":
now_actives[master] = line
elif line['status'] == "Passive":
now_passives[master] = line
else:
now_others[master] = line
for master, line in be4_masters.items():
if line['status'] == "Active":
be4_actives[master] = line
elif line['status'] == "Passive":
be4_passives[master] = line
else:
be4_others[master] = line
# Check active count == subvolume count
if len(now_actives) != expected_active_count:
print "[E] Error: Current active count (", len(now_actives), ") not equal to subvolume count (", expected_active_count, ")"
if len(be4_actives) != expected_active_count:
print "[W] Warning: Previous active count (", len(be4_actives), ") not equal to subvolume count (", expected_active_count, ")"
# Check passive count == (brick count - subvolume count)
if len(now_passives) != expected_passive_count:
print "[E] Error: Current passive count (", len(now_passives), ") not equal to subvolume count (", expected_passive_count, ")"
if len(be4_passives) != expected_passive_count:
print "[W] Warning: Previous passive count (", len(be4_passives), ") not equal to subvolume count (", expected_passive_count, ")"
# Report on any nodes not in active or passive
if len(now_others) != 0:
print "[E] Error: Some current bricks not in Active/Passive status:"
for master, line in sorted(now_others.items()):
print "Brick: ", master, " Status: ", line['status']
if len(be4_others) != 0:
print "[W] Warning: Some previous bricks not in Active/Passive status:"
for master, line in sorted(be4_others.items()):
print "\tBrick: ", master, " Status: ", line['status']
for master, line in now_actives.items():
if master in sorted(be4_actives):
last_sync = ts_to_datetime(line['last_synced'])
behind_time = now_time - last_sync
progress = last_sync - ts_to_datetime(be4_actives[master]['last_synced'])
progress_ratio = progress.total_seconds() / elapsed_time.total_seconds()
print "Node:", master, "Current:", last_sync, "||| Behind:", behind_time, "||| Progress:", progress, "||| Progress Ratio:", "%.4f" % progress_ratio
if progress_ratio < 0.95:
print "[E] Error: Progress ratio is signifigantly less than 1.0 - geo-replication is falling behind"
else:
print master, "not matched in be4"