-
Notifications
You must be signed in to change notification settings - Fork 0
/
data-cleanup.py
73 lines (52 loc) · 1.78 KB
/
data-cleanup.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import csv
import json
# Helper function for retrieving the year from the dataset
def get_year( item ):
return item["Year"]
# Convert CSV files to a dictionary
data = {}
with open( "MPVDatasetDownload.csv", encoding = "utf-8" ) as csvf:
csvReader = csv.DictReader(csvf)
key = 0
for rows in csvReader:
key += 1
data[key] = rows
stats = []
statuses_charges = []
for index in data:
year = "20" + data[index]["Date of Incident (month/day/year)"][-2:]
# Skip data for 2021 that's still in progress
if year != "2021":
years_processed = map( get_year, stats )
if year not in years_processed:
stats.append ( {
"Year": year,
"Killings": 0,
"Charges": 0,
"Convictions": 0
} )
current_index = len( stats ) - 1
else:
for i, item in enumerate( stats ):
if stats[i]["Year"] == year:
current_index = i
if stats[current_index]["Charges"] not in statuses_charges:
statuses_charges.append( stats[current_index]["Charges"] )
stats[current_index]["Killings"] += 1
if "Charged" in data[index]["Criminal Charges?"]:
stats[current_index]["Charges"] += 1
if "Charged, Convicted" in data[index]["Criminal Charges?"]:
stats[current_index]["Convictions"] += 1
stats.reverse()
print( json.dumps( stats, indent = 4, sort_keys=True ) )
# Save the dataset to a CSV file
data_file = open( "data-summary.csv", "w" )
csv_writer = csv.writer( data_file )
count = 0
for item in stats:
if count == 0:
header = item.keys()
csv_writer.writerow( header )
count += 1
csv_writer.writerow( item.values() )
data_file.close()