-
Notifications
You must be signed in to change notification settings - Fork 0
/
clean_election_data.py
executable file
·102 lines (97 loc) · 4.4 KB
/
clean_election_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import pandas as pd
import re
data = pd.read_csv("Valgdata_-_personlige_stemmer.csv",sep='";"',engine="python")
tmp_list = list(data.columns)
#print(tmp_list)
with open("test_2.txt", "w") as f:
for i in tmp_list:
f.write(i+"\n")
outdata = pd.DataFrame(columns=["Gruppe", "KredsNr", "StorKredsNr", "LandsdelsNr","Stemmer", "Valg", "Parti", "Rang"])
#print(outdata)
for i in tmp_list:
if re.search("^FV\d{4} - personlige stemmer i alt$", i, re.IGNORECASE) != None:
#print("Parti: Total, Rank: Total")
tmp_data = pd.DataFrame(data[list(data.columns)[0:4]])
tmp_data["Stemmer"] = list(data[i])
tmp_data["Parti"] = ["Total"]*92
tmp_data["Rang"] = ["Total"]*92
tmp_data["Valg"] = [i[2:6]]*92
outdata = outdata.append(tmp_data,ignore_index=True)
# print(outdata)
#print(election)
elif re.search("^FV\d{4} - \w{1} - personlige stemmer i alt$", i, re.IGNORECASE) != None:
#print("Parti fundet, Rank: Total")
tmp_data = pd.DataFrame(data[list(data.columns)[0:4]])
tmp_data["Stemmer"] = list(data[i])
tmp_data["Parti"] = [i[9:10]] * 92
tmp_data["Rang"] = ["Total"] * 92
tmp_data["Valg"] = [i[2:6]] * 92
outdata = outdata.append(tmp_data, ignore_index=True)
# print(outdata)
elif re.search("^FV\d{4} - \w{2} - personlige stemmer i alt$", i, re.IGNORECASE) != None:
#print("Parti fundet, Rank: Total")
tmp_data = pd.DataFrame(data[list(data.columns)[0:4]])
tmp_data["Stemmer"] = list(data[i])
tmp_data["Parti"] = [i[9:11]] * 92
tmp_data["Rang"] = ["Total"] * 92
tmp_data["Valg"] = [i[2:6]] * 92
outdata = outdata.append(tmp_data, ignore_index=True)
# print(outdata)
elif re.search("^FV\d{4} - .{1} - personlige stemmer i alt$", i, re.IGNORECASE) != None:
# print("Parti: Enhedslisten, Rank: Total")
tmp_data = pd.DataFrame(data[list(data.columns)[0:4]])
tmp_data["Stemmer"] = list(data[i])
tmp_data["Parti"] = ["Ø"] * 92
tmp_data["Rang"] = ["Total"] * 92
tmp_data["Valg"] = [i[2:6]] * 92
outdata = outdata.append(tmp_data, ignore_index=True)
# print(outdata)
elif re.search("^FV\d{4} - .{1} - personlige stemmer i alt.1$", i, re.IGNORECASE) != None:
# print("Parti: Alternativet, Rank: Total")
tmp_data = pd.DataFrame(data[list(data.columns)[0:4]])
tmp_data["Stemmer"] = list(data[i])
tmp_data["Parti"] = ["Å"] * 92
tmp_data["Rang"] = ["Total"] * 92
tmp_data["Valg"] = [i[2:6]] * 92
outdata = outdata.append(tmp_data, ignore_index=True)
# print(outdata)
elif re.search("^FV\d{4}\s-\s\w{1}-\d{2}$",i) != None:
# print("Rank og parti fundet.")
tmp_data = pd.DataFrame(data[list(data.columns)[0:4]])
tmp_data["Stemmer"] = list(data[i])
tmp_data["Parti"] = [i[9:10]] * 92
tmp_data["Rang"] = [i[11:13]] * 92
tmp_data["Valg"] = [i[2:6]] * 92
outdata = outdata.append(tmp_data, ignore_index=True)
# print(outdata)
elif re.search("^FV\d{4}\s-\s\w{2}-\d{2}$",i) != None:
# print("Rank og parti fundet.")
tmp_data = pd.DataFrame(data[list(data.columns)[0:4]])
tmp_data["Stemmer"] = list(data[i])
tmp_data["Parti"] = [i[9:11]] * 92
tmp_data["Rang"] = [i[12:14]] * 92
tmp_data["Valg"] = [i[2:6]] * 92
outdata = outdata.append(tmp_data, ignore_index=True)
#print(outdata)
elif re.search("^FV\d{4}\s-\s.-\d{2}$",i) != None:
# print("Rank og Enhedslisten fundet.")
tmp_data = pd.DataFrame(data[list(data.columns)[0:4]])
tmp_data["Stemmer"] = list(data[i])
tmp_data["Parti"] = ["Ø"] * 92
tmp_data["Rang"] = [i[11:13]] * 92
tmp_data["Valg"] = [i[2:6]] * 92
outdata = outdata.append(tmp_data, ignore_index=True)
#print(outdata)
elif re.search("^FV\d{4}\s-\s.-\d{2}.1$",i) != None:
# print("Rank og Alternativet fundet.")
tmp_data = pd.DataFrame(data[list(data.columns)[0:4]])
tmp_data["Stemmer"] = list(data[i])
tmp_data["Parti"] = ["Å"] * 92
tmp_data["Rang"] = [i[12:14]] * 92
tmp_data["Valg"] = [i[2:6]] * 92
outdata = outdata.append(tmp_data, ignore_index=True)
#print(outdata)
tmp_list_2 = tmp_list[0:5]
tmp_db = data[tmp_list_2]
tmp_db = tmp_db.append(tmp_db)
outdata.to_csv("out.csv",sep=";")