Oberserved analysis
-
Looking at the scatterplot chart, you can see that urban cities have about eighty percent more drivers than rural cities.This is indicates that competition is high in urban cities, thus concluding that fare prices in urban cities are more than half the price of those in rural cities; indicating that urban cities have a higher population than suburban and rural cities.
-
According to the pie charts, the percent of total fare by city, percent of total drivers by city, and the percent of total rides by city is higher than rural and suburban cities by over half. Therefore, indicating that the demand for uber drivers in urban cities is higher. One can quickly conclude that people would not rather drive themselves in a city environment.
-
According to all three charts, rural cities use very little to none uber services. Knowing rural cities and it's geography, one can conclude that majority of it's population owns a car.
%matplotlib notebook
# Import Dependencies
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
# path to csv files
pycity_file = "raw_data/city_data.csv"
pyride_file = "raw_data/ride_data.csv"
# reading the files and creating dataframe
pycity_df = pd.read_csv(pycity_file)
pyride_df = pd.read_csv(pyride_file)
# delete any duplicate cities
# pycity_df = pycity_df.drop_duplicates('city')
# cities = pycity_df['city']
#testing
# cities
#merging the dataframes
merged_df = pycity_df.merge(pyride_df)
merged_df
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
city | driver_count | type | date | fare | ride_id | |
---|---|---|---|---|---|---|
0 | Richardfort | 38 | Urban | 2018-02-24 08:40:38 | 13.93 | 5628545007794 |
1 | Richardfort | 38 | Urban | 2018-02-13 12:46:07 | 14.00 | 910050116494 |
2 | Richardfort | 38 | Urban | 2018-02-16 13:52:19 | 17.92 | 820639054416 |
3 | Richardfort | 38 | Urban | 2018-02-01 20:18:28 | 10.26 | 9554935945413 |
4 | Richardfort | 38 | Urban | 2018-04-17 02:26:37 | 23.00 | 720020655850 |
5 | Richardfort | 38 | Urban | 2018-04-21 03:44:04 | 9.54 | 3698147103219 |
6 | Richardfort | 38 | Urban | 2018-02-03 00:14:26 | 29.04 | 4982665519010 |
7 | Richardfort | 38 | Urban | 2018-02-08 15:50:12 | 16.55 | 2270463070874 |
8 | Richardfort | 38 | Urban | 2018-04-03 15:07:34 | 40.77 | 9496210735824 |
9 | Richardfort | 38 | Urban | 2018-02-19 14:09:20 | 27.11 | 8690324801449 |
10 | Richardfort | 38 | Urban | 2018-04-22 03:06:56 | 39.74 | 4081388893008 |
11 | Richardfort | 38 | Urban | 2018-01-31 14:51:01 | 43.92 | 4066949857460 |
12 | Richardfort | 38 | Urban | 2018-03-04 22:13:57 | 14.83 | 9474112834050 |
13 | Richardfort | 38 | Urban | 2018-03-28 19:33:00 | 7.75 | 5539056178883 |
14 | Richardfort | 38 | Urban | 2018-02-14 03:27:28 | 44.11 | 2503858662225 |
15 | Richardfort | 38 | Urban | 2018-04-27 11:59:25 | 30.31 | 6106446829435 |
16 | Richardfort | 38 | Urban | 2018-03-03 11:57:24 | 11.42 | 2916078036620 |
17 | Richardfort | 38 | Urban | 2018-01-13 10:08:54 | 25.81 | 9110913538598 |
18 | Richardfort | 38 | Urban | 2018-03-06 12:53:05 | 8.28 | 5127600643309 |
19 | Richardfort | 38 | Urban | 2018-02-05 16:12:04 | 42.22 | 4051093040264 |
20 | Richardfort | 38 | Urban | 2018-04-04 15:04:56 | 16.35 | 6077906760851 |
21 | Richardfort | 38 | Urban | 2018-03-05 16:00:59 | 4.85 | 3291539624738 |
22 | Richardfort | 38 | Urban | 2018-04-14 11:56:28 | 43.01 | 7636011510547 |
23 | Richardfort | 38 | Urban | 2018-01-21 23:20:53 | 10.91 | 3567611375530 |
24 | Richardfort | 38 | Urban | 2018-03-16 23:00:51 | 21.39 | 237473464569 |
25 | Richardfort | 38 | Urban | 2018-03-22 04:41:04 | 16.35 | 5934239154720 |
26 | Richardfort | 38 | Urban | 2018-01-03 01:06:17 | 5.32 | 3268844473610 |
27 | Richardfort | 38 | Urban | 2018-03-29 15:16:01 | 37.76 | 4802688422688 |
28 | Williamsstad | 59 | Urban | 2018-01-23 17:51:24 | 19.62 | 125986195523 |
29 | Williamsstad | 59 | Urban | 2018-03-29 21:43:04 | 35.58 | 4747780858464 |
... | ... | ... | ... | ... | ... | ... |
2345 | Bradshawfurt | 7 | Rural | 2018-03-24 18:53:18 | 57.51 | 2301998876294 |
2346 | Bradshawfurt | 7 | Rural | 2018-03-24 07:34:03 | 19.89 | 7867402022145 |
2347 | Bradshawfurt | 7 | Rural | 2018-02-18 22:21:26 | 34.32 | 8881996813087 |
2348 | Bradshawfurt | 7 | Rural | 2018-04-08 13:46:03 | 55.19 | 1742954306812 |
2349 | Bradshawfurt | 7 | Rural | 2018-04-07 21:12:49 | 49.70 | 5088814323688 |
2350 | Bradshawfurt | 7 | Rural | 2018-02-23 12:00:59 | 37.05 | 2068423024643 |
2351 | Bradshawfurt | 7 | Rural | 2018-04-10 09:47:54 | 19.07 | 739305106253 |
2352 | Bradshawfurt | 7 | Rural | 2018-01-19 20:21:54 | 56.78 | 1253403506597 |
2353 | Bradshawfurt | 7 | Rural | 2018-01-30 10:55:23 | 51.39 | 1328274868072 |
2354 | New Ryantown | 2 | Rural | 2018-01-27 17:33:41 | 42.68 | 7994603753131 |
2355 | New Ryantown | 2 | Rural | 2018-04-18 19:43:54 | 42.97 | 230914178346 |
2356 | New Ryantown | 2 | Rural | 2018-02-04 23:54:51 | 45.70 | 3570428225530 |
2357 | New Ryantown | 2 | Rural | 2018-04-30 01:50:44 | 50.81 | 5357550405010 |
2358 | New Ryantown | 2 | Rural | 2018-04-05 21:38:18 | 50.98 | 4834855490008 |
2359 | New Ryantown | 2 | Rural | 2018-05-05 19:29:38 | 26.53 | 2302209966018 |
2360 | Randallchester | 9 | Rural | 2018-04-13 11:13:31 | 43.22 | 1076079536213 |
2361 | Randallchester | 9 | Rural | 2018-02-19 03:52:47 | 58.55 | 8004803682564 |
2362 | Randallchester | 9 | Rural | 2018-02-11 05:42:29 | 25.78 | 9010611749008 |
2363 | Randallchester | 9 | Rural | 2018-03-25 13:36:46 | 10.37 | 3216382725494 |
2364 | Randallchester | 9 | Rural | 2018-04-07 23:42:07 | 10.79 | 1615474447641 |
2365 | Jessicaport | 1 | Rural | 2018-01-01 09:45:36 | 43.69 | 2424875833354 |
2366 | Jessicaport | 1 | Rural | 2018-01-14 07:09:17 | 18.05 | 5405362355006 |
2367 | Jessicaport | 1 | Rural | 2018-04-13 16:08:11 | 39.89 | 6511242590852 |
2368 | Jessicaport | 1 | Rural | 2018-03-18 16:59:40 | 33.72 | 3046889917159 |
2369 | Jessicaport | 1 | Rural | 2018-05-01 08:14:47 | 22.44 | 3725278487786 |
2370 | Jessicaport | 1 | Rural | 2018-01-31 17:57:25 | 58.29 | 623154556195 |
2371 | South Saramouth | 7 | Rural | 2018-02-20 16:32:36 | 44.29 | 3622365199969 |
2372 | South Saramouth | 7 | Rural | 2018-01-28 15:55:33 | 31.25 | 7118046558393 |
2373 | South Saramouth | 7 | Rural | 2018-03-27 21:07:16 | 11.87 | 170351888128 |
2374 | South Saramouth | 7 | Rural | 2018-04-12 18:11:50 | 57.23 | 5081198789583 |
2375 rows × 6 columns
# creating DataFrame for each city
#Urban (total revenue, total number of rides, average fare, total number of drivers)
urban = merged_df.loc[merged_df['type'] == 'Urban']
urban_total_revenue = urban.groupby('city').sum()['fare']
urban_total_number_of_rides = urban.groupby('city').count()['ride_id']
urban_average_fare = round(urban_total_revenue/urban_total_number_of_rides,2)
urban_total_drivers =urban.drop_duplicates('city').set_index('city')['driver_count']
#suburban (total revenue, total number of rides, average fare, total number of drivers)
suburban = merged_df.loc[merged_df['type'] == 'Suburban']
suburban_total_revenue = suburban.groupby('city').sum()['fare']
suburban_total_number_of_rides = suburban.groupby('city').count()['ride_id']
suburban_average_fare = round(suburban_total_revenue/suburban_total_number_of_rides,2)
suburban_total_drivers = suburban.drop_duplicates('city').set_index('city')['driver_count']
#rural(total revenue, total number of rides, average fare, total number of drivers)
rural = merged_df.loc[merged_df['type'] == 'Rural']
rural_total_revenue = rural.groupby('city').sum()['fare']
rural_total_number_of_rides = rural.groupby('city').count()['ride_id']
rural_average_fare = round(rural_total_revenue/rural_total_number_of_rides, 2)
rural_total_drivers = rural.drop_duplicates('city').set_index('city')['driver_count']
#creating the the plot for urban,suburban and rural cities
#urban
plt.scatter(urban_total_number_of_rides, urban_average_fare, marker='o',
facecolors='lightcoral', edgecolors='black',
s=urban_total_drivers*10, alpha=0.75, label='Urban')
#suburban
plt.scatter(suburban_total_number_of_rides, suburban_average_fare, marker='o',
facecolors='lightblue', edgecolors='black',
s=suburban_total_drivers*10, alpha=0.75, label='Suburban')
#rural
plt.scatter(rural_total_number_of_rides, rural_average_fare, marker='o',
facecolors='yellow', edgecolors='black',
s=rural_total_drivers*10, alpha=0.75, label='Rural')
#labels
plt.title("Pyber Ride Sharing Data (2016)")
plt.xlabel("Total Number of Rides (Per City)")
plt.xlim(0,36)
plt.ylabel("Average Fare ($)")
plt.ylim(15,51)
lgnd = plt.legend(scatterpoints=1)
lgnd.legendHandles[0]._sizes = [50]
lgnd.legendHandles[1]._sizes = [50]
lgnd.legendHandles[2]._sizes = [50]
plt.annotate(s='Note:\nCircle size correlates with driver count per city', xy=(0,15), xytext=(36,40))
plt.grid()
plt.show()
#Total Revenue for all cities
fare_total = merged_df['fare'].sum()
#total fares by type of city
fare_type = merged_df.groupby('type').sum()['fare']
#percent of revenye by city type
fare_percent = (fare_type/fare_total)
#labels
labels = fare_percent.index
#size of slice, colors, explode
sizes = fare_percent
colors = ['yellow', 'lightblue', 'lightcoral']
explode = (0, 0, 0.1)
#pie chart
plt.pie(sizes, explode=explode, labels=labels, colors=colors,
autopct="%1.1f%%", shadow=True, startangle=140)
plt.title("% of Total Fares by City Type")
plt.show()
# Number of rides by the city type
ride_type = merged_df.groupby('type').count()['ride_id']
# total number of rides
total_rides = merged_df['fare'].count()
#percent of rides
ride_percent = round((ride_type/total_rides),2)
#labels,sizes, colors, explode
labels = ride_percent.index
sizes = ride_percent
explode = (0,0,0.2)
colors = ['yellow', 'lightblue', 'lightcoral']
plt.pie(sizes, explode=explode, labels=labels, colors=colors,
autopct="%1.1f%%", shadow=True, startangle=140)
plt.title("% of Total Rides by City Type")
plt.show()
#drivers by city type
city_ride_dup = merged_df.drop_duplicates(['city', 'driver_count'], keep = 'first')
#total number of drivers
total_drivers = city_ride_dup.groupby('type')['driver_count'].sum()
plt.pie(total_drivers,explode=(0,0,0.1),colors = ["gold", "lightblue", "lightcoral"],
autopct="%1.1f%%", labels=["Rural","Suburban","Urban"])
plt.axis("equal")
plt.title("% Total Drivers (city type)")
plt.show()