-
Notifications
You must be signed in to change notification settings - Fork 0
/
data_rmdulpEL.py
executable file
·58 lines (52 loc) · 1.47 KB
/
data_rmdulpEL.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
#!/usr/bin/python3
import sys
num_argv = len(sys.argv)
print('----------- remove data with repeated keys ----------')
print('------- (data appeared earlier being deleted) -------')
if ( num_argv<3 ):
sys.exit("Usage: data_rmdulp filename key1 key2 ...")
elif ( sys.argv[1]=='--help' ):
sys.exit("Usage: data_rmdulp filename key1 key2 ...")
else:
filename=sys.argv[1]
keys=sys.argv[2:]
num_keys=len(keys)
print("keys used for removing dulplicates: ",keys)
readfile = open(filename, "r")
data = [line.split() for line in readfile]
readfile.close()
if ( data[0][0][0]!='#' ):
print('data[0][0]:\t', data[0][0])
sys.exit("Check if your 1st line starts with #?")
row_total=len(data)
row_begin = 2
row_end = row_total-1
col_begin = 0
col_end = len(data[row_begin])-1
col_keys = []
for key in keys:
col_keys.append(data[row_begin-1].index(key))
print('col_keys-1:\t\t',col_keys)
# remove dulplicated lines
row = row_begin
while row + 1 < len(data):
flag = 1
for col in col_keys:
if data[row][col]!=data[row+1][col]:
flag=0
break
if flag == 1:
del data[row]
else:
row = row + 1
row_total=len(data)
row_begin = 2
row_end = row_total-1
filename = filename+".rmdulpEL"
writefile = open(filename, "w")
for row in range(0,len(data)):
for word in data[row]:
writefile.write(word + "\t")
writefile.write("\n")
writefile.close()
print('------------------------------------------------------')