-
Notifications
You must be signed in to change notification settings - Fork 0
/
plot.py
119 lines (106 loc) · 3.95 KB
/
plot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
###############################################################################
# LIBRARIES AND DEPENDENCIES #
###############################################################################
import pandas as pd
###############################################################################
# AUXILIAR FUNCTIONS #
###############################################################################
def combine(start_date, end_date):
# Set date range to run the analysis
date_range = pd.date_range(start=start_date, end=end_date, freq='D')
#
# Alert map
alert_map = {'R0': 0, 'R2': 2, 'R5': 5, 'R10': 10, 'R25': 25, 'R50': 50, 'R100': 100}
#
# Combine data in a single dataframe
combined_data = pd.DataFrame()
for date in date_range:
csv_file = f"geoglows_warnings/{date.strftime('%Y_%m_%d')}.csv"
df = pd.read_csv(csv_file, usecols=['comid', 'alert'])
df["alert"] = df["alert"].map(alert_map)
df.rename(columns={
'alert': date.strftime('%Y-%m-%d'),
'comid': 'date'
}, inplace=True)
df = df.set_index('date').T
combined_data = pd.concat([combined_data, df], ignore_index=False)
#
# returning
return combined_data
# Generate event matrix
def event_matrix(comid, combined_data):
# Generate the input
comid_data = pd.DataFrame()
comid_data["date"] = pd.to_datetime(combined_data.index.tolist())
comid_data["alert"] = combined_data[comid].to_list()
#
# Determine the event start
F1 = list()
for i in range(len(comid_data["date"]) - 1):
value_start = comid_data.alert[i]
value_end = comid_data.alert[i+1]
temp_val = 1 if value_start == 0 and value_end > 0 else 0
F1.append(temp_val)
F1.append(0)
#
# Clasify events using cumulative sum (per events)
F2 = list()
temp = 0
for num in F1:
temp += num
F2.append(temp)
F2.append(0)
#
# Clasify events, without no alerts class (0)
output = list()
for i in range(len(comid_data["date"])):
value = (comid_data.alert[i]>0) * F2[i]
output.append(value)
comid_data["event"] = output
#
# Summarized
result = comid_data.groupby('event').agg(
start=('date', 'min'),
end=('date', 'max'),
alert=('alert', 'max')
).reset_index()
result = result[result['alert'] != 0].drop(columns=['event'])
#
# Returning
return(result)
# Summarize the event matrix
def summary(comid, event_matrix):
alert_counts = event_matrix['alert'].value_counts().reset_index()
alert_counts = alert_counts.sort_values(by='alert').reset_index(drop=True)
alert_counts.rename(columns={
"count": comid,
"alert": "comid"
}, inplace=True)
alert_counts = alert_counts.set_index('comid').T
alert_counts.columns = ['RP_' + str(col) if col != 'comid' else col for col in alert_counts.columns]
return(alert_counts)
###############################################################################
# MAIN CONTROLLER #
###############################################################################
def main(year):
# Read drainage network
drainage = pd.read_excel("geoglows_reachs_ids.xlsx")["comid"].to_list()
# Setup date range
start_date = f'{year}-01-01'
end_date = f'{year}-12-31'
# Combined data
combined_data = combine(start_date, end_date)
combined_out = pd.DataFrame()
for comid in drainage:
print(comid)
em = event_matrix(comid, combined_data)
summ = summary(comid, em)
combined_out = pd.concat([combined_out, summ], ignore_index=False)
combined_out = combined_out.fillna(0)
combined_out.to_csv(f"geoglows_analysis/results_comids-{year}.csv", index=True)
main("2014")
main("2015")
main("2016")
main("2017")
main("2018")
main("2019")