-
Notifications
You must be signed in to change notification settings - Fork 0
/
Walmart Analysis wSql.sql
351 lines (281 loc) · 7.85 KB
/
Walmart Analysis wSql.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
CREATE DATABASE IF NOT EXISTS WalmartSales;
CREATE TABLE IF NOT EXISTS Sales(
invoice_id VARCHAR(30) NOT NULL PRIMARY KEY,
branch VARCHAR(5) NOT NULL,
city VARCHAR(30) NOT NULL,
customer_type VARCHAR(30) NOT NULL,
gender VARCHAR(30) NOT NULL,
product_line VARCHAR(100) NOT NULL,
unit_price DECIMAL(10,2) NOT NULL,
quantity INT NOT NULL,
tax_pct FLOAT(6,4) NOT NULL,
total DECIMAL(12, 4) NOT NULL,
date DATETIME NOT NULL,
time TIME NOT NULL,
payment VARCHAR(15) NOT NULL,
cogs DECIMAL(10,2) NOT NULL,
gross_margin_pct FLOAT(11,9),
gross_income DECIMAL(12, 4),
rating FLOAT(2, 1)
);
SHOW COLUMNS FROM sales;
SELECT * FROM walmartsales.sales;
-- --------------------------------------------------------------------------------------------
-- ---------------------------Feature Enginerring ---------------------------------------------
-- ----- TIME_OF_DAY --------
SELECT
time,
(CASE
WHEN time BETWEEN "00:00:00" AND "12:00:00" THEN "Morning"
WHEN time BETWEEN "12:01:00" AND "16:00:00" THEN "Afternoon"
ELSE "Evening"
END
) as time_of_date
FROM sales;
ALTER TABLE sales ADD COLUMN time_of_day VARCHAR(20);
UPDATE sales SET time_of_day =
(CASE
WHEN time BETWEEN "00:00:00" AND "12:00:00" THEN "Morning"
WHEN time BETWEEN "12:01:00" AND "16:00:00" THEN "Afternoon"
ELSE "Evening"
END);
-- ------ day_name ------
SELECT
date,
DAYNAME(date) as day_name
FROM sales;
ALTER TABLE sales ADD COLUMN day_name VARCHAR(10);
UPDATE sales SET day_name = DAYNAME(date);
-- ------- month_name ------
SELECT
date,
MONTHNAME(date) as day_name
FROM sales;
ALTER TABLE sales ADD COLUMN month_name VARCHAR(10);
UPDATE sales SET month_name = MONTHNAME(date);
-- --------------------------------------------------------------------------------------------
-- --------------------------------------------------------------------------------------------
-- ------------------------------ general questions for EDA -----------------------------------
-- How many unique cities does the data have?
SELECT
DISTINCT city
FROM sales;
-- In which cities are the branches located?
SELECT
DISTINCT city,
branch
FROM sales
ORDER BY branch;
-- --------------------------------------------------------------------------------------------
-- ------------------------------ product questions for EDA -----------------------------------
-- How many unique product lines does the data have?
SELECT
COUNT(DISTINCT product_line)
FROM sales;
-- What is the most common payment method?
SELECT
payment,
COUNT(payment) as payment_cnt
FROM sales
GROUP BY payment
ORDER BY payment_cnt DESC;
-- What is the most selling product line?
SELECT
product_line,
COUNT(product_line) as product_cnt
FROM sales
GROUP BY product_line
ORDER BY product_cnt DESC;
-- What is the total revenue by month?
SELECT
month_name,
SUM(total) as total_rev
FROM sales
GROUP BY month_name
ORDER BY total_rev DESC;
-- What month had the largest COGS?
SELECT
month_name as months,
SUM(cogs) as cogs_sum
FROM sales
GROUP BY months
ORDER BY cogs_sum DESC;
-- What product line had the largest revenue?
SELECT
product_line,
SUM(total) as revenue
FROM sales
GROUP BY product_line
ORDER BY revenue DESC;
-- What is the city with the largest revenue?
SELECT
city,
branch,
SUM(total) as revenue
FROM sales
GROUP BY city, branch
ORDER BY revenue DESC;
-- What product line had the largest VAT (Value Added Tax)? -> AVG of PCT for comparing proportions
SELECT
product_line,
AVG(tax_pct) as avg_VAT
FROM sales
GROUP BY product_line
ORDER BY avg_VAT DESC;
-- Fetch each product line and add a column to those product line showing "Good", "Bad". Good if its greater than average sales
-- Which branch sold more products than average product sold?
SELECT
branch,
SUM(quantity) as qty
FROM sales
GROUP BY branch
HAVING SUM(quantity) > (SELECT AVG(quantity) FROM sales);
-- What is the most common product line by gender?
SELECT
gender,
product_line,
count(gender) as cnt_gender
FROM sales
GROUP BY gender, product_line
ORDER BY cnt_gender DESC;
-- What is the average rating of each product line?
SELECT
product_line,
ROUND(AVG(rating),2) as avg_rating
FROM sales
GROUP BY product_line
ORDER BY avg_rating DESC;
-- --------------------------------------------------------------------------------------
-- ---------------------------------- Sales Questions -----------------------------------
-- Number of sales made in each time of the day per weekday
SELECT
day_name,
time_of_day,
COUNT(*) AS number_of_sales
FROM sales
WHERE day_name = "Sunday"
GROUP BY day_name, time_of_day
ORDER BY number_of_sales;
-- ----------or?! -----------
SELECT day_name, time_of_day, COUNT(*) AS number_of_sales
FROM sales
WHERE day_name = "Saturday"
GROUP BY day_name, time_of_day
UNION ALL
SELECT day_name, time_of_day, COUNT(*) AS number_of_sales
FROM sales
WHERE day_name = "Sunday"
GROUP BY day_name, time_of_day
UNION ALL
SELECT day_name, time_of_day, COUNT(*) AS number_of_sales
FROM sales
WHERE day_name = "Monday"
GROUP BY day_name, time_of_day
UNION ALL
SELECT day_name, time_of_day, COUNT(*) AS number_of_sales
FROM sales
WHERE day_name = "Tuesday"
GROUP BY day_name, time_of_day
UNION ALL
SELECT day_name, time_of_day, COUNT(*) AS number_of_sales
FROM sales
WHERE day_name = "Wednesday"
GROUP BY day_name, time_of_day
UNION ALL
SELECT day_name, time_of_day, COUNT(*) AS number_of_sales
FROM sales
WHERE day_name = "Thursday"
GROUP BY day_name, time_of_day
UNION ALL
SELECT day_name, time_of_day, COUNT(*) AS number_of_sales
FROM sales
WHERE day_name = "Friday"
GROUP BY day_name, time_of_day
ORDER BY day_name, time_of_day;
-- Which of the customer types brings the most revenue?
SELECT
customer_type,
SUM(total) as revenue
FROM sales
GROUP BY customer_type
ORDER BY revenue DESC;
-- Which city has the largest tax percent/ VAT (Value Added Tax)?
SELECT
city,
AVG(tax_pct) as largest_VAT
FROM sales
GROUP BY city
ORDER BY largest_VAT DESC;
-- Which customer type pays the most in VAT?
SELECT
customer_type,
AVG(tax_pct) as largest_VAT
FROM sales
GROUP BY customer_type
ORDER BY largest_VAT DESC;
-- -----------------------------------------------------------------------------
-- -----------------------Customer questtions ----------------------------------
-- How many unique customer types does the data have? -> 2 types: Member, Normal
SELECT DISTINCT customer_type
FROM sales;
-- How many unique payment methods does the data have? -> 3 methods: credit, Ewallet, Cash
SELECT DISTINCT payment
FROM sales;
-- What is the most common customer type? -> Member customers
SELECT
customer_type,
COUNT(customer_type) as cust_cnt
FROM sales
GROUP BY customer_type
ORDER BY cust_cnt DESC;
-- Which customer type buys the most? -> Normal type
SELECT
customer_type,
COUNT(*) as cust_cnt
FROM sales
GROUP BY customer_type;
-- What is the gender of most of the customers?
SELECT
gender,
COUNT(*) as cnt
FROM sales
GROUP BY gender
ORDER BY cnt DESC;
-- What is the gender distribution per branch?
SELECT
branch,
gender,
COUNT(*) as cnt
FROM sales
GROUP BY branch, gender
ORDER BY branch ASC;
-- Which time of the day do customers give most ratings?
SELECT
time_of_day,
AVG(rating) as rating_avg
FROM sales
GROUP BY time_of_day
ORDER BY rating_avg DESC;
-- Which time of the day do customers give most ratings per branch?
SELECT
branch,
time_of_day,
AVG(rating) as rating_avg
FROM sales
GROUP BY branch,time_of_day
ORDER BY branch ASC, rating_avg DESC;
-- Which day of the week has the best avg ratings?
SELECT
day_name,
AVG(rating) as rating_avg
FROM sales
GROUP BY day_name
ORDER BY rating_avg DESC;
-- Which day of the week has the best average ratings per branch?
SELECT
branch,
day_name,
AVG(rating) as rating_avg
FROM sales
GROUP BY branch, day_name
ORDER BY branch ASC,rating_avg DESC;