-
Notifications
You must be signed in to change notification settings - Fork 0
/
merge.py
67 lines (45 loc) · 2.71 KB
/
merge.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import pandas as pd
import numpy as np
ua_flights = pd.read_csv('Unitedflights.csv')
aa_flights = pd.read_csv('AAflights.csv')
dl_flights = pd.read_csv('Dlflights.csv')
#i=0
#for index, flight in aa_flights[aa_flights['airline']=='United Airlines'].iterrows():
# if ((ua_flights['flightn'] == "UA "+str(flight['flightn'])) & (ua_flights['dep_time'] == str(flight['dep_time']).lower()) & (ua_flights['arr_time'] == str(flight['arr_time']).lower()) & (ua_flights['arr_port'] == str(flight['arr_port'])) & (ua_flights['dep_port'] == str(flight['dep_port']))).any():
# print("Found flight")
# i += 1
# print(i)
# else:
# print("Could not find flight",flight)
ua_flights['dep_time'] = ua_flights['dep_time'].replace(to_replace='([p])[.]([m])[.]',value='PM',regex=True).replace(to_replace='([a])[.]([m])[.]',value='AM',regex=True)
ua_flights['arr_time'] = ua_flights['arr_time'].replace(to_replace='([p])[.]([m])[.]',value='PM',regex=True).replace(to_replace='([a])[.]([m])[.]',value='AM',regex=True)
ua_flights.to_csv('Unitedflights.csv', index=False, header=True)
all_data = pd.concat([aa_flights, ua_flights], axis=0)
#append capacities to all_data
capacities = pd.read_csv('Capacities.csv')
L = []
for index, capacity in all_data.iterrows():
L.append(capacities[capacities['labels']==capacity['craft_type']]['capacities'].values[0])
all_data.insert(7, "capacities", L)
#convert AM and PM to 24 hour system
for i in range(len(all_data)):
all_data['arr_time'].iloc[i] = pd.to_datetime(all_data['arr_time'].iloc[i]).strftime('%H:%M')
all_data['dep_time'].iloc[i] = pd.to_datetime(all_data['dep_time'].iloc[i]).strftime('%H:%M')
dl_flights = dl_flights.rename(columns={"number": "flightn", "depart": "dep_time","origin":"dep_port","arrive":"arr_time","destination":"arr_port","type":"craft_type","capacity":"capacities"})
for index, flight in dl_flights.iterrows():
dl_flights.at[index,'flightn'] = dl_flights.at[index,'flightn'].strip()
dl_flights.at[index, 'dep_time'] = pd.to_datetime(dl_flights.at[index, 'dep_time']).strftime('%H:%M')
dl_flights.at[index, 'arr_time'] = pd.to_datetime(dl_flights.at[index, 'arr_time']).strftime('%H:%M')
L = []
for index, flight in dl_flights.iterrows():
#L.append(capacities[capacities['labels']==capacity['craft_type']]['capacities'].values[0])
L.append("Delta Airlines")
dl_flights.insert(1, "airline", L)
all_data = pd.concat([all_data, dl_flights], axis=0)
L = []
for index, capacity in all_data.iterrows():
L.append(capacities[capacities['labels']==capacity['craft_type']]['capacities'].values[0])
all_data.drop('capacities',axis =1)
all_data.insert(7, "capacities", L)
#save all_data csv
all_data.to_csv('AllFlights.csv',index=False,header=True)