-
Notifications
You must be signed in to change notification settings - Fork 0
/
inference.py
184 lines (140 loc) · 7.23 KB
/
inference.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
from datetime import datetime , timedelta
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pycaret.regression import *
import requests
from wwo_hist import retrieve_hist_data
date=(input("enter the month and year for which you want to predict the sales of the product [EXAMPLE : 02-2020 (FEB 2020)]: "))
print("The Products are : \n ALCOHOL , BREAD , CHEMISTRY , CHEWING_GUM_LOLIPOPS \n CHIPS_FLAKES , CIGARETTES , COFFEE TEA \n DAIRY_CHESSE , DRINK_JUICE , GENERAL , GENERAL_FOOD \n GENERAL_ITEMS , GROATS_RICE_PASTA , ICE_CREAMS_FROZEN, KETCH_CONCETRATE_MUSTARD_MAJO_HORSERADISH \n OCCASIONAL , POULTRY , SPICES , SWEETS , TABLETS , VEGETABLES")
group=input("Enter the group of products for which you want to predict the sales for the above entered month : ")
converted_datetime=pd.to_datetime(date).date()
# this function makes sure that when the user enters a date, it calculates the ending of the month the user entered
def dateaddition (converted_datetime):
if int(str(converted_datetime).split("-")[0])%4 ==0:
if (int(str(converted_datetime).split("-")[1])) ==2:
added_days=29
elif int(str(converted_datetime).split("-")[1]) %2 ==0 and int(str(converted_datetime).split("-")[1]) <=6:
added_days=30
elif int(str(converted_datetime).split("-")[1]) %2 ==1 and int(str(converted_datetime).split("-")[1]) <=6:
added_days=31
elif int(str(converted_datetime).split("-")[1]) %2 ==0 and int(str(converted_datetime).split("-")[1]) >=7:
added_days=31
elif int(str(converted_datetime).split("-")[1]) %2 ==1 and int(str(converted_datetime).split("-")[1]) >=7:
added_days=30
elif int(str(converted_datetime).split("-")[0])%4 !=0:
if (int(str(converted_datetime).split("-")[1])) ==2:
added_days=28
elif int(str(converted_datetime).split("-")[1]) %2 ==0 and int(str(converted_datetime).split("-")[1]) <=6:
added_days=30
elif int(str(converted_datetime).split("-")[1]) %2 ==1 and int(str(converted_datetime).split("-")[1]) <=6:
added_days=31
elif int(str(converted_datetime).split("-")[1]) %2 ==0 and int(str(converted_datetime).split("-")[1]) >=7:
added_days=31
elif int(str(converted_datetime).split("-")[1]) %2 ==1 and int(str(converted_datetime).split("-")[1]) >=7:
added_days=30
return added_days
#converting the dates to strings to pass them through the weather-collecting api
start_date = converted_datetime.strftime("%d-%b-%Y")
# calculating the end date based on the function dateaddition
end_date= pd.to_datetime(converted_datetime+timedelta(dateaddition(converted_datetime)-1)).date()
#if the user enters the present month, the end date will be calculated based on the present day
if end_date > datetime.now().date():
end_date= datetime.now().date().strftime("%d-%b-%Y")
else:
end_date=end_date.strftime("%d-%b-%Y")
# this is an api to collect the necessary weather data we need
frequency = 24
start_date = start_date
end_date = end_date
api_key = '12b2c18a34194a8ca93113127200405'
location_list = ['Mlawa']
hist_weather_data = retrieve_hist_data(api_key,
location_list,
start_date,
end_date,
frequency,
location_label = False,
export_csv = True,
store_df = True)
#the weather data is stored in the file "Mlawa.csv", hence we read from that
monthly_weather_data=pd.read_csv("Mlawa.csv")
def final_weather(monthly_weather_data):
#dropping the unecessary columns
monthly_weather_data=monthly_weather_data.drop(["date_time","totalSnow_cm","sunHour","uvIndex.1","uvIndex","moon_illumination","moonrise","moonset","sunrise","DewPointC","sunset","WindChillC","WindGustKmph","precipMM","pressure","visibility","winddirDegree","windspeedKmph","tempC"],axis=1)
monthly_weather_data["avg_temp"]=(monthly_weather_data["maxtempC"]+monthly_weather_data["mintempC"])/2
monthly_weather_data=monthly_weather_data.drop(["maxtempC","mintempC"],axis=1)
# rearranging the data
monthly_weather_data=monthly_weather_data[["avg_temp","FeelsLikeC","HeatIndexC","cloudcover","humidity",]]
values=[]
monthly_averages=[]
def mean_data(data):
for key,value in data.iteritems():
values.append(value.mean())
print(key)
return values
#categorising the heat_index data into three different types
def cat_heat(heatindex):
if heatindex < -2:
return (0)
elif heatindex >=-1 and heatindex<=14:
return (1)
else:
return (2)
#categorising the cloud cover into 4 different types
def cat_cloud(cloudcover):
if cloudcover < 25:
return(0)
elif cloudcover >=25 and cloudcover<50:
return(1)
elif cloudcover >=50 and cloudcover<75:
return(1)
else:
return(3)
monthly_averages=np.around(mean_data(monthly_weather_data),2)
monthly_averages[2]=cat_heat(monthly_averages[2])
monthly_averages[3]=cat_cloud(monthly_averages[3])
return monthly_averages
monthly_weather_data=final_weather(monthly_weather_data)
# print(monthly_weather_data)
final_data=pd.read_csv("GROUP_OF_ITEMS_FINAL/"+group.upper()+".csv")
final_data=final_data.drop(["Unnamed: 0"],axis=1)
# print(final_data.head())
final_data["ishol/week"]=9
final_data["group"]=group
final_data["monthly_Avgtemp"]=monthly_weather_data[0]
final_data["monthly_avg_FeelsLikeC"]=monthly_weather_data[1]
final_data["monthly_avg_HeatIndexC"]=monthly_weather_data[2]
final_data["monthly_avg_cloudcover"]=monthly_weather_data[3]
final_data["monthly_avg_humidity"]=monthly_weather_data[4]
# # # test_data="pd.read_csv("GROUP_OF_DATASETS/SWEETS.csv")
# # test_data=test_data.rename(columns={0:"weekend"})
# # test_data=test_data.drop(test_data["quantity"])
# print(final_data.head())
if group=="ALCOHOL" or "KETCH_CONCETRATE_MUSTARD_MAJO_HORSERADISH" or "SPICES" or "GENERAL" or "BREAD" or "CHEWING_GUM_LOLIPOPS" or "GENERAL_FOOD":
loaded_model=load_model("MODELS/gb")
elif group== "COFFEE TEA" or "CIGARETTES" or "CHIPS_FLAKES" or "ICE_CREAMS_FROZEN" or "POULTRY" or "SWEETS":
loaded_model=load_model("MODELS/extreme_gb")
elif group== "GROATS_RICE_PASTA" or "OCCASIONAL":
loaded_model=load_model("MODELS/adab")
elif group== "CHEMISTRY" or "GENERAL_ITEMS" or "VEGETABLES":
loaded_model=load_model("MODELS/rf")
elif group== "DAIRY_CHESSE":
loaded_model=load_model("MODELS/catb")
pred=predict_model(loaded_model, data= final_data)
# print(pred.head())
final_sales=(np.exp(pred["Label"]))
final_sales=np.round(final_sales,0)
pred["Label"]=final_sales
'''
output=pred
s=""
for row in output:
s+='Quantity of product {} predicted is {}\n'.format(row['name'],row['Label'])
print(s)
'''
fmt = '{:<8}{:<80}{}'
print(pred["name",], final_sales)
print(fmt.format('', 'NAME', 'QUANTITY'))
for i, (name, sales) in enumerate(zip(pred["name"], final_sales )):
print(fmt.format(i, name, sales))