-
Notifications
You must be signed in to change notification settings - Fork 0
/
truncate2msg_headers.py
executable file
·45 lines (34 loc) · 1.32 KB
/
truncate2msg_headers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
#!/usr/bin/env python3
# Use this to deal with binary files where the writer got killed before
# finishing to write the msg_headers
# Quoting Peter: "The msgs dataset is written before the msg_headers dataset, so
# this [length mismatch] is possible if the writer process was killed before it
# finished writing the data. It should be safe to truncate to the shorter length
# if this is the case"
import argparse
import shutil
import h5py
def main():
ap = argparse.ArgumentParser()
ap.add_argument('-i', '--input', required=True)
ap.add_argument('-o', '--output', required=True)
args = ap.parse_args()
infile = h5py.File(args.input)
nheaders = len(infile['msg_headers'])
assert nheaders <= len(infile['msgs'])
## The following has trouble with groups:
# with h5py.File(args.output, 'w') as outfile:
# nheaders = len(infile['msg_headers'])
# assert nheaders <= len(infile['msgs'])
# for k in infile.keys():
# print(k)
# if k == 'msgs':
# outfile[k] = infile[k][:nheaders]
# else:
# outfile[k] = infile[k]
shutil.copy(args.input, args.output)
with h5py.File(args.output, 'a') as outfile:
del outfile['msgs']
outfile['msgs'] = infile['msgs'][:nheaders]
if __name__ == '__main__':
main()