diff --git a/taxi_dashboard.ipynb b/taxi_dashboard.ipynb
index 5e98e02..1b95b08 100644
--- a/taxi_dashboard.ipynb
+++ b/taxi_dashboard.ipynb
@@ -1258,7 +1258,20 @@
"text/html": [
"\n",
"\n",
- "\n",
"\n",
"\n",
@@ -2300,7 +2300,7 @@
{
"data": {
"text/plain": [
- "'/home/winston/turbo-telegram/data/nyc_taxi_jan15.csv'"
+ "'/home/winston/turbo-telegram/data/nyc_taxi_*'"
]
},
"execution_count": 1,
@@ -2337,7 +2337,7 @@
"# tag path to this notebook\n",
"cwd = os.getcwd() \n",
"# add taxi wildcard to complete file path of data\n",
- "path = cwd + '/' + 'data/nyc_taxi_jan15.csv'\n",
+ "path = cwd + '/' + 'data/nyc_taxi_*'\n",
"# how's it look?\n",
"path"
]
@@ -2392,106 +2392,97 @@
"
mta_tax | \n",
" tip_amount | \n",
" tolls_amount | \n",
- " improvement_surcharge | \n",
" total_amount | \n",
" \n",
" \n",
" \n",
" \n",
- " 697498 | \n",
- " 2 | \n",
- " 2015-01-31 20:46:12 | \n",
- " 2015-01-31 20:57:49 | \n",
+ " 1911983 | \n",
" 1 | \n",
- " 2.31 | \n",
- " -8.237828e+06 | \n",
- " 4.972035e+06 | \n",
+ " 2015-02-02 13:12:45 | \n",
+ " 2015-02-02 13:20:29 | \n",
+ " 1 | \n",
+ " 0.80 | \n",
+ " -8.233733e+06 | \n",
+ " 4.978858e+06 | \n",
" 1 | \n",
" N | \n",
- " -8.239360e+06 | \n",
- " 4.969136e+06 | \n",
+ " -8.232789e+06 | \n",
+ " 4.980123e+06 | \n",
" 2 | \n",
- " 10.0 | \n",
- " 0.5 | \n",
+ " 6.5 | \n",
+ " 0.0 | \n",
" 0.5 | \n",
" 0.0 | \n",
" 0.0 | \n",
- " 0.3 | \n",
- " 11.3 | \n",
+ " 7.3 | \n",
"
\n",
" \n",
- " 697499 | \n",
- " 1 | \n",
- " 2015-01-31 21:04:48 | \n",
- " 2015-01-31 21:20:53 | \n",
- " 1 | \n",
- " 3.10 | \n",
- " -8.233061e+06 | \n",
- " 4.978176e+06 | \n",
+ " 1911984 | \n",
+ " 2 | \n",
+ " 2015-02-12 21:39:55 | \n",
+ " 2015-02-12 21:47:53 | \n",
+ " 2 | \n",
+ " 1.04 | \n",
+ " -8.235069e+06 | \n",
+ " 4.976922e+06 | \n",
" 1 | \n",
" N | \n",
- " -8.236266e+06 | \n",
- " 4.972510e+06 | \n",
+ " -8.233533e+06 | \n",
+ " 4.976631e+06 | \n",
" 1 | \n",
- " 13.0 | \n",
+ " 7.0 | \n",
" 0.5 | \n",
" 0.5 | \n",
- " 1.0 | \n",
+ " 1.5 | \n",
" 0.0 | \n",
- " 0.3 | \n",
- " 15.3 | \n",
+ " 9.8 | \n",
"
\n",
" \n",
- " 697500 | \n",
- " 1 | \n",
- " 2015-01-31 20:52:51 | \n",
- " 2015-01-31 21:20:06 | \n",
+ " 1911985 | \n",
+ " 2 | \n",
+ " 2015-02-25 16:55:21 | \n",
+ " 2015-02-25 17:06:37 | \n",
" 1 | \n",
- " 6.90 | \n",
- " -8.234850e+06 | \n",
- " 4.981938e+06 | \n",
+ " 1.84 | \n",
+ " -8.234084e+06 | \n",
+ " 4.977811e+06 | \n",
" 1 | \n",
" N | \n",
- " -8.237785e+06 | \n",
- " 4.971117e+06 | \n",
- " 1 | \n",
- " 25.5 | \n",
- " 0.5 | \n",
+ " -8.231831e+06 | \n",
+ " 4.979868e+06 | \n",
+ " 2 | \n",
+ " 9.5 | \n",
+ " 1.0 | \n",
" 0.5 | \n",
- " 1.2 | \n",
" 0.0 | \n",
- " 0.3 | \n",
- " 28.0 | \n",
+ " 0.0 | \n",
+ " 11.3 | \n",
"
\n",
" \n",
"\n",
""
],
"text/plain": [
- " VendorID tpep_pickup_datetime tpep_dropoff_datetime passenger_count \\\n",
- "697498 2 2015-01-31 20:46:12 2015-01-31 20:57:49 1 \n",
- "697499 1 2015-01-31 21:04:48 2015-01-31 21:20:53 1 \n",
- "697500 1 2015-01-31 20:52:51 2015-01-31 21:20:06 1 \n",
- "\n",
- " trip_distance pickup_x pickup_y RateCodeID \\\n",
- "697498 2.31 -8.237828e+06 4.972035e+06 1 \n",
- "697499 3.10 -8.233061e+06 4.978176e+06 1 \n",
- "697500 6.90 -8.234850e+06 4.981938e+06 1 \n",
- "\n",
- " store_and_fwd_flag dropoff_x dropoff_y payment_type \\\n",
- "697498 N -8.239360e+06 4.969136e+06 2 \n",
- "697499 N -8.236266e+06 4.972510e+06 1 \n",
- "697500 N -8.237785e+06 4.971117e+06 1 \n",
- "\n",
- " fare_amount extra mta_tax tip_amount tolls_amount \\\n",
- "697498 10.0 0.5 0.5 0.0 0.0 \n",
- "697499 13.0 0.5 0.5 1.0 0.0 \n",
- "697500 25.5 0.5 0.5 1.2 0.0 \n",
- "\n",
- " improvement_surcharge total_amount \n",
- "697498 0.3 11.3 \n",
- "697499 0.3 15.3 \n",
- "697500 0.3 28.0 "
+ " VendorID tpep_pickup_datetime tpep_dropoff_datetime passenger_count \\\n",
+ "1911983 1 2015-02-02 13:12:45 2015-02-02 13:20:29 1 \n",
+ "1911984 2 2015-02-12 21:39:55 2015-02-12 21:47:53 2 \n",
+ "1911985 2 2015-02-25 16:55:21 2015-02-25 17:06:37 1 \n",
+ "\n",
+ " trip_distance pickup_x pickup_y RateCodeID \\\n",
+ "1911983 0.80 -8.233733e+06 4.978858e+06 1 \n",
+ "1911984 1.04 -8.235069e+06 4.976922e+06 1 \n",
+ "1911985 1.84 -8.234084e+06 4.977811e+06 1 \n",
+ "\n",
+ " store_and_fwd_flag dropoff_x dropoff_y payment_type \\\n",
+ "1911983 N -8.232789e+06 4.980123e+06 2 \n",
+ "1911984 N -8.233533e+06 4.976631e+06 1 \n",
+ "1911985 N -8.231831e+06 4.979868e+06 2 \n",
+ "\n",
+ " fare_amount extra mta_tax tip_amount tolls_amount total_amount \n",
+ "1911983 6.5 0.0 0.5 0.0 0.0 7.3 \n",
+ "1911984 7.0 0.5 0.5 1.5 0.0 9.8 \n",
+ "1911985 9.5 1.0 0.5 0.0 0.0 11.3 "
]
},
"execution_count": 2,
@@ -2505,7 +2496,7 @@
" 'int', 'str', 'float64', 'float64', 'int', 'float64', 'float64', \n",
" 'float64', 'float64', 'float64', 'float64', 'float64']\n",
"bc.create_table('big_taxi', path, dtype=col_types, header=0)\n",
- "# what's it look like?\n",
+ "# what's it look like? \n",
"bc.sql('select * from big_taxi').tail(3)"
]
},
@@ -2524,7 +2515,6 @@
"source": [
"# build outputs (each will have it's own tab)\n",
"output = widgets.Output()\n",
- "Christmas_NYC = widgets.Output()\n",
"plot_output = widgets.Output()\n",
"hv_output = widgets.Output()\n",
"hv_output2 = widgets.Output()\n",
@@ -2591,44 +2581,19 @@
"default_location = dropdown_location.value\n",
"\n",
"def common_filtering(day, month, year, n_riders, tol_cost, location):\n",
- " # sql: day, month, year, n_riders, tol_cost\n",
- " # visual dataframe: month, year, n_riders, tol_cost\n",
- " # plot: month, year, n_riders, tol_cost, location\n",
- " \n",
- " # check if input boxes are blank or not\n",
- " if n_riders != '':\n",
- " # and strip extra blank space from start & end\n",
- " n_riders = n_riders.strip()\n",
- " # check again that it's not blank\n",
- " if n_riders != '':\n",
- " # make sure there's a direction\n",
- " if n_riders[0] not in '<>=':\n",
- " # give user time to sort it\n",
- " time.sleep(4) \n",
- " # call it\n",
- " return 'number of riders input must start with one of the following:\\n >\\n <\\n =\\n >=\\n <=\\nor be blank\\n\\nfor example: > 2 or = 3'\n",
- " if tol_cost != '':\n",
- " # e.g. ' ' = '' or '> 5 ' = '> 5'\n",
- " tol_cost = tol_cost.strip()\n",
- " # check again that it's not blank\n",
- " if tol_cost != '':\n",
- " # make sure there's a direction\n",
- " if tol_cost[0] not in '<>=':\n",
- " # give user some time to figure it out\n",
- " time.sleep(4)\n",
- " # we need a direction\n",
- " return 'fare amount input must start with one of the following:\\n >\\n <\\n =\\n >=\\n <=\\nor be blank\\n\\nfor example: > 2 or = 3'\n",
- " \n",
+ " \"\"\"sql: day, month, year, n_riders, tol_cost\n",
+ " visual dataframe: month, year, n_riders, tol_cost\n",
+ " plot: month, year, n_riders, tol_cost, location\"\"\"\n",
+ " \n",
" # clear outputs (replace instead of stacking, basically)\n",
" output.clear_output()\n",
- " Christmas_NYC.clear_output()\n",
" plot_output.clear_output()\n",
" hv_output.clear_output()\n",
" hv_output2.clear_output()\n",
- " \n",
+ "\n",
" # start building our SQL query \n",
" query = 'SELECT * FROM big_taxi'\n",
- " \n",
+ "\n",
" # has user focused specific days?\n",
" if day != default_day:\n",
" # go through days selected\n",
@@ -2699,102 +2664,39 @@
" else:\n",
" # add WHERE & total fare to query \n",
" query += f' WHERE fare_amount {tol_cost}'\n",
- " \n",
+ "\n",
" # run query & deliver cuDF DataFrame of results\n",
" query = query.replace('big_taxi', f'({time_table})')\n",
" gdf = bc.sql(query).to_pandas()\n",
- " \n",
+ "\n",
" with output:\n",
" print(f'{query}\\n') # print(gdf.info())\n",
" # trim down columns to display \n",
" focused = ['passenger_count', 'trip_distance', 'VendorID', 'RateCodeID', 'payment_type', \n",
" 'store_and_fwd_flag', 'fare_amount', 'extra', 'mta_tax', 'tip_amount', \n",
- " 'tolls_amount', 'improvement_surcharge', 'total_amount', 'tpep_pickup_datetime']\n",
+ " 'tolls_amount', 'total_amount', 'tpep_pickup_datetime']\n",
" left_out = 'tpep_dropoff_datetime, pickup_x, pickup_y, dropoff_x, dropoff_y'\n",
" print(f'DATAFRAME SAMPLE\\nexcluding columns: {left_out}')\n",
" display(gdf[focused].sample(21))\n",
- " \n",
- " with Christmas_NYC:\n",
- " map_tiles = EsriImagery().opts(alpha=0.5, width=900, height=480, bgcolor='red')\n",
- " # check viability of graph\n",
- " if len(gdf) >= 5000:\n",
- " if dropdown_location.value == 'Pickup Location':\n",
- " points = hv.Points(gdf, ['pickup_x', 'pickup_y'])\n",
- " else:\n",
- " points = hv.Points(gdf, ['dropoff_x', 'dropoff_y'])\n",
- " taxi_trips = datashade(points, x_sampling=1, y_sampling=1, cmap=['green', 'white'], width=900, height=480)\n",
- " display(map_tiles * taxi_trips)\n",
- " # no dataframe edge case\n",
- " elif len(gdf) == 0:\n",
- " print(f'NO DATAFRAME TO DISPLAY\\nlen(gdf) == {len(gdf)}')\n",
- " display(gdf)\n",
- " # hyper-focused edge case \n",
- " else: # in danger of unintrepretable map\n",
- " # what's going on?\n",
- " print(f'DISPLAY WARNING: len(gdf) < 5000\\n len(gdf) == {len(gdf)}')\n",
- " extras = 5000 // len(gdf) # how short are we?\n",
- " print(f'Graph will display DataFrame copied {1 + extras} times to saturate points')\n",
- " print('Actual DataFrame WILL NOT change, this DOES NOT apply to other graphs')\n",
- " # make temp dataframe (copied/saturated dataframe)\n",
- " temp_df = pd.concat([gdf] + [gdf.copy() for i in range(1 + extras)], axis=0)\n",
- " # and then we call the kdeplot method of seaborn by passing the number of visits:\n",
- " if dropdown_location.value == 'Pickup Location':\n",
- " points = hv.Points(temp_df, ['pickup_x', 'pickup_y'])\n",
- " else:\n",
- " points = hv.Points(temp_df, ['dropoff_x', 'dropoff_y'])\n",
- " taxi_trips = datashade(points, x_sampling=1, y_sampling=1, cmap=fire, width=900, height=480)\n",
- " # forget temp\n",
- " temp_df = ''\n",
- " display(map_tiles * taxi_trips)\n",
- " # erase temp dataframe\n",
- " temp_df = '' \n",
- " # display NYC coord map \n",
- " display(tf.set_background(tf.shade(agg, cmap=fire), \"black\")) \n",
+ " # nyc map \n",
" with plot_output:\n",
+ " # determine pickup location \n",
+ " if dropdown_location.value == 'Pickup Location':\n",
+ " points = hv.Points(gdf, ['pickup_x', 'pickup_y'])\n",
+ " else:\n",
+ " points = hv.Points(gdf, ['dropoff_x', 'dropoff_y'])\n",
+ " # graph it\n",
+ " taxi_trips = datashade(points, x_sampling=1, y_sampling=1, cmap=fire, width=900, height=480)\n",
+ " # overlay NYC map & display the graph\n",
" map_tiles = EsriImagery().opts(alpha=0.5, width=900, height=480, bgcolor='black')\n",
- " # check viability of graph\n",
- " if len(gdf) >= 5000:\n",
- " # determine pickup location \n",
- " if dropdown_location.value == 'Pickup Location':\n",
- " points = hv.Points(gdf, ['pickup_x', 'pickup_y'])\n",
- " else:\n",
- " points = hv.Points(gdf, ['dropoff_x', 'dropoff_y'])\n",
- " # graph it\n",
- " taxi_trips = datashade(points, x_sampling=1, y_sampling=1, cmap=fire, width=900, height=480)\n",
- " display(map_tiles * taxi_trips)\n",
- " # no dataframe edge case\n",
- " elif len(gdf) == 0:\n",
- " print(f'NO DATAFRAME TO DISPLAY\\nlen(gdf) == {len(gdf)}')\n",
- " display(gdf)\n",
- " # hyper-focused edge case \n",
- " else: # in danger of unintrepretable map\n",
- " # what's going on?\n",
- " print(f'DISPLAY WARNING: len(gdf) < 5000\\n len(gdf) == {len(gdf)}')\n",
- " extras = 5000 // len(gdf) # how short are we?\n",
- " print(f'Graph will display DataFrame copied {1 + extras} times to saturate points')\n",
- " print('Actual DataFrame WILL NOT change, this DOES NOT apply to other graphs')\n",
- " # make temp dataframe (copied/saturated dataframe)\n",
- " temp_df = pd.concat([gdf] + [gdf.copy() for i in range(1 + extras)], axis=0)\n",
- " # and then we call the kdeplot method of seaborn by passing the number of visits:\n",
- " if dropdown_location.value == 'Pickup Location':\n",
- " points = hv.Points(temp_df, ['pickup_x', 'pickup_y'])\n",
- " else:\n",
- " points = hv.Points(temp_df, ['dropoff_x', 'dropoff_y'])\n",
- " # graph it\n",
- " taxi_trips = datashade(points, x_sampling=1, y_sampling=1, cmap=fire, width=900, height=480)\n",
- " # forget temp\n",
- " temp_df = ''\n",
- " display(map_tiles * taxi_trips)\n",
- " # erase temp dataframe\n",
- " temp_df = '' \n",
- " # display NYC coord map \n",
- " display(tf.set_background(tf.shade(agg, cmap=fire), \"black\")) \n",
+ " display(map_tiles * taxi_trips)\n",
" # tip amount vs number of riders\n",
" with hv_output:\n",
" scatter = hv.Scatter(gdf, 'passenger_count', 'tip_amount')\n",
- " ranged = scatter.redim.range(tip_amount=(0, 30), passenger_count=(-0.5, 6.5))\n",
+ " ranged = scatter.redim.range(tip_amount=(0, 60), passenger_count=(-0.5, 6.5))\n",
" shaded = hd.spread(hd.datashade(ranged, x_sampling=0.15))\n",
" labelled = shaded.redim.label(passenger_count=\"Passengers\", tip_amount=\"Tip ($)\")\n",
+ " # let's see\n",
" display(labelled)\n",
" # trip distance vs tip amount\n",
" with hv_output2:\n",
@@ -2802,27 +2704,66 @@
" ranged = scatter.redim.range(trip_distance=(0, 20), tip_amount=(0, 20))\n",
" shaded = hd.spread(hd.datashade(ranged))\n",
" labelled = shaded.redim.label(trip_distance=\"Trip Distance\", tip_amount=\"Tip ($)\")\n",
+ " # let's see\n",
" display(labelled)\n",
" \n",
- "# The existing event handlers need to be adjusted \n",
+ " \n",
"def dropdown_day_eventhandler(change):\n",
" common_filtering(change.new, dropdown_month.value, dropdown_year.value, dropdown_riders.value, \n",
" dropdown_fare.value, dropdown_location.value)\n",
+ " \n",
"def dropdown_month_eventhandler(change):\n",
" common_filtering(dropdown_day.value, change.new, dropdown_year.value, dropdown_riders.value, \n",
" dropdown_fare.value, dropdown_location.value)\n",
+ " \n",
"def dropdown_year_eventhandler(change):\n",
" common_filtering(dropdown_day.value, dropdown_month.value, change.new, dropdown_riders.value, \n",
" dropdown_fare.value, dropdown_location.value)\n",
+ " \n",
"def dropdown_riders_eventhandler(change):\n",
- " common_filtering(dropdown_day.value, dropdown_month.value, dropdown_year.value, change.new.replace('\"','').replace(\"'\",''), \n",
- " dropdown_fare.value, dropdown_location.value) \n",
+ " # adjust change to remove errors \n",
+ " change = change.new.strip().replace('\"','').replace(\"'\",'')\n",
+ " # check change to make sure it works\n",
+ " valid = True\n",
+ " # is input blank \n",
+ " if change != '':\n",
+ " # make sure there's a direction\n",
+ " if change[0] not in '<>=':\n",
+ " # query not ready yet\n",
+ " valid = False\n",
+ " # make sure there's a number\n",
+ " if change[-1] not in '1234567890':\n",
+ " # query not ready yet\n",
+ " valid = False\n",
+ " # is the query ready?\n",
+ " if valid == True:\n",
+ " common_filtering(dropdown_day.value, dropdown_month.value, dropdown_year.value, \n",
+ " change, dropdown_fare.value, dropdown_location.value) \n",
+ " \n",
"def dropdown_fare_eventhandler(change):\n",
- " common_filtering(dropdown_day.value, dropdown_month.value, dropdown_year.value, dropdown_riders.value, \n",
- " change.new.replace('\"','').replace(\"'\",''), dropdown_location.value) \n",
+ " # adjust change to remove errors \n",
+ " change = change.new.strip().replace('\"','').replace(\"'\",'')\n",
+ " # check change to make sure it works\n",
+ " valid = True\n",
+ " # is input blank \n",
+ " if change != '':\n",
+ " # make sure there's a direction\n",
+ " if change[0] not in '<>=':\n",
+ " # query not ready yet\n",
+ " valid = False\n",
+ " # make sure there's a number\n",
+ " if change[-1] not in '1234567890':\n",
+ " # query not ready yet\n",
+ " valid = False\n",
+ " # is the query ready?\n",
+ " if valid == True:\n",
+ " common_filtering(dropdown_day.value, dropdown_month.value, dropdown_year.value, \n",
+ " dropdown_riders.value, change, dropdown_location.value) \n",
+ " \n",
"def dropdown_location_eventhandler(change):\n",
" common_filtering(dropdown_day.value, dropdown_month.value, dropdown_year.value, dropdown_riders.value, \n",
" dropdown_fare.value, change.new) \n",
+ " \n",
"\n",
"# We bind the handlers to the dropdowns, and that’s it!\n",
"dropdown_day.observe(dropdown_day_eventhandler, names='value')\n",
@@ -2832,6 +2773,7 @@
"dropdown_fare.observe(dropdown_fare_eventhandler, names='value')\n",
"dropdown_location.observe(dropdown_location_eventhandler, names='value')\n",
"\n",
+ "\n",
"def input_dashboard():\n",
" # The HBox will add widgets to it one at a time from left-to-right:\n",
" input_widgets = widgets.HBox([dropdown_day, dropdown_month, dropdown_year])\n",
@@ -2842,12 +2784,11 @@
" \n",
"def display_dashboard():\n",
" # dashboard outputs\n",
- " tab = widgets.Tab([output, Christmas_NYC, plot_output, hv_output, hv_output2])\n",
+ " tab = widgets.Tab([output, plot_output, hv_output, hv_output2])\n",
" tab.set_title(0, 'Dataset Exploration')\n",
- " tab.set_title(1, 'Christmas Map')\n",
- " tab.set_title(2, 'Coordinate Map')\n",
- " tab.set_title(3, 'Tips vs # Riders')\n",
- " tab.set_title(4, 'Tips vs Distance')\n",
+ " tab.set_title(1, 'Coordinate Map')\n",
+ " tab.set_title(2, 'Tips vs # Riders')\n",
+ " tab.set_title(3, 'Tips vs Distance')\n",
" # display outputs \n",
" display(tab)"
]
@@ -2867,7 +2808,7 @@
{
"data": {
"application/vnd.jupyter.widget-view+json": {
- "model_id": "8203f2502706466ba1023f6167b80fcf",
+ "model_id": "542e1862ca0144549585a3ff55d35e62",
"version_major": 2,
"version_minor": 0
},
@@ -2881,7 +2822,7 @@
{
"data": {
"application/vnd.jupyter.widget-view+json": {
- "model_id": "283914e92b394ef989eec7e905e05748",
+ "model_id": "c55575a743dc4142be59f35c368249c1",
"version_major": 2,
"version_minor": 0
},
@@ -2894,6 +2835,7 @@
}
],
"source": [
+ "# to fix: running query when incomplete\n",
"input_dashboard()"
]
},
@@ -2912,12 +2854,12 @@
{
"data": {
"application/vnd.jupyter.widget-view+json": {
- "model_id": "c202eeed1770411ea5e93e39691f064e",
+ "model_id": "5539e063239841b49dc3245e86b84b1b",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
- "Tab(children=(Output(), Output(), Output(), Output(), Output()), _titles={'0': 'Dataset Exploration', '1': 'Ch…"
+ "Tab(children=(Output(), Output(), Output(), Output()), _titles={'0': 'Dataset Exploration', '1': 'Coordinate M…"
]
},
"metadata": {},
@@ -2945,7 +2887,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.7.3"
+ "version": "3.7.6"
}
},
"nbformat": 4,