-
Notifications
You must be signed in to change notification settings - Fork 1
/
clean.py
41 lines (33 loc) · 1.4 KB
/
clean.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import csv
from utils import get_valid_colum_indices
class Cleaner():
def clean(file, clean_columns, remove):
print ("Cleaning {}".format(file))
print ("For columns {}".format(clean_columns))
new_file = file[0:-7] + "clean.csv"
with open(file, 'r') as raw_file:
reader = csv.reader(raw_file, delimiter=',')
headers = next(reader)
col_count = len(clean_columns)
if remove:
clean_columns.append("Message")
indices = get_valid_colum_indices(headers, clean_columns)
if indices is None:
print ("invalid column specified for in {}".format(file))
return
with open(new_file, 'w') as clean_file:
writer = csv.writer(clean_file, delimiter=',')
writer.writerow(clean_columns)
for row in reader:
if remove:
blacklisted = False
for r in remove:
if r in row[indices[-1]]:
blacklisted = True
if blacklisted:
continue
cleaned_row = []
for i in range(col_count):
cleaned_row.append(row[indices[i]])
writer.writerow(cleaned_row)
print("Done")