Skip to content

Commit

Permalink
Merge pull request #145 from iantei/Introduce_Inferred_Labels_for_Sta…
Browse files Browse the repository at this point in the history
…cked_Bar_Charts

Introduce Inferred Labels for Stacked Bar Charts
  • Loading branch information
shankari authored Sep 22, 2024
2 parents fc5afdd + 1a5abfb commit 88180dc
Show file tree
Hide file tree
Showing 3 changed files with 263 additions and 56 deletions.
102 changes: 78 additions & 24 deletions viz_scripts/generic_metrics.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,31 @@
" sensed_algo_prefix)"
]
},
{
"cell_type": "markdown",
"id": "325e5eda",
"metadata": {},
"source": [
"## Collect Data from Database for Inferred Metrics"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c26ff5f5",
"metadata": {},
"outputs": [],
"source": [
"expanded_ct_inferred, file_suffix_inferred, quality_text_inferred, debug_df_inferred = await scaffolding.load_viz_notebook_inferred_data(year,\n",
" month,\n",
" program,\n",
" study_type,\n",
" dynamic_labels,\n",
" dic_re,\n",
" dic_pur=dic_pur,\n",
" include_test_users=include_test_users)"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand Down Expand Up @@ -172,9 +197,14 @@
"labeled_match = re.match(r'Based on ([0-9]+) confirmed trips from ([0-9]+) (users|testers and participants)\\nof ([0-9]+) total trips from ([0-9]+) (users|testers and participants) (\\(([0-9.]+|nan)%\\))', quality_text)\n",
"# labeled_match\n",
"stacked_bar_quality_text_labeled = f\"{labeled_match.group(1)} trips {labeled_match.group(7)}\\n from {labeled_match.group(2)} {labeled_match.group(3)}\"\n",
"\n",
"sensed_match = re.match(r'Based on ([0-9]+) trips from ([0-9]+) (users|testers and participants)', quality_text_sensed)\n",
"stacked_bar_quality_text_sensed = f\"{sensed_match.group(1)} trips (100%)\\n from {sensed_match.group(2)} {sensed_match.group(3)}\"\n",
"stacked_bar_quality_text_labeled, stacked_bar_quality_text_sensed"
"\n",
"inferred_match = re.match(r'Based on ([0-9]+) confirmed trips from ([0-9]+) (users|testers and participants)\\nof ([0-9]+) total trips from ([0-9]+) (users|testers and participants) (\\(([0-9.]+|nan)%\\))', quality_text_inferred)\n",
"stacked_bar_quality_text_inferred = f\"{inferred_match.group(1)} trips {inferred_match.group(7)}\\n from {inferred_match.group(2)} {inferred_match.group(3)}\"\n",
"\n",
"stacked_bar_quality_text_labeled, stacked_bar_quality_text_sensed, stacked_bar_quality_text_inferred"
]
},
{
Expand Down Expand Up @@ -204,15 +234,16 @@
"plot_title_no_quality= \"Number of trips for each mode\"\n",
"\n",
"try:\n",
" fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(15,2*2), sharex=True)\n",
" fig, ax = plt.subplots(nrows=3, ncols=1, figsize=(15,3*2), sharex=True)\n",
" # We will have text results corresponding to the axes for simplicity and consistency\n",
" text_results = [[\"Unmodified Alt Text\", \"Unmodified HTML\"], [\"Unmodified Alt Text\", \"Unmodified HTML\"]]\n",
" text_results = [[\"Unmodified Alt Text\", \"Unmodified HTML\"], [\"Unmodified Alt Text\", \"Unmodified HTML\"], [\"Unmodified Alt Text\", \"Unmodified HTML\"]]\n",
" \n",
" plot_and_text_stacked_bar_chart(expanded_ct, lambda df: (df.groupby(\"mode_confirm_w_other\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False)), \n",
" \"Labeled by user\\n\"+stacked_bar_quality_text_labeled, ax[0], text_results[0], colors_mode, debug_df, values_to_translations)\n",
" plot_and_text_stacked_bar_chart(expanded_ct_inferred, lambda df: (df.groupby(\"mode_confirm_w_other\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False)), \n",
" \"Labeled and Inferred by OpenPATH\\n\"+stacked_bar_quality_text_inferred, ax[1], text_results[1], colors_mode, debug_df_inferred, values_to_translations)\n",
" plot_and_text_stacked_bar_chart(expanded_ct_sensed, lambda df: (df.groupby(\"primary_mode\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False)), \n",
" \"Sensed by OpenPATH\\n\"+stacked_bar_quality_text_sensed, ax[1], text_results[1], colors_sensed, debug_df_sensed)\n",
" \n",
" \"Sensed by OpenPATH\\n\"+stacked_bar_quality_text_sensed, ax[2], text_results[2], colors_sensed, debug_df_sensed)\n",
" set_title_and_save(fig, text_results, plot_title_no_quality, file_name)\n",
"except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n",
" plt.clf()\n",
Expand Down Expand Up @@ -254,13 +285,23 @@
"\n",
" expanded_ct_commute = expanded_ct.query(trip_purpose_query)\n",
" commute_quality_text = scaffolding.get_quality_text(expanded_ct, expanded_ct_commute, \"commute\", include_test_users) if not expanded_ct.empty else \"\"\n",
" plot_title = plot_title_no_quality + \"\\n\" + commute_quality_text\n",
" \n",
" expanded_ct_inferred_commute = expanded_ct_inferred.query(trip_purpose_query)\n",
" commute_quality_text_inferred = scaffolding.get_quality_text(expanded_ct_inferred, expanded_ct_inferred_commute, \"commute\", include_test_users) if not expanded_ct_inferred.empty else \"\"\n",
" plot_title = plot_title_no_quality\n",
"\n",
" commute_labeled_match = re.match(r'Based on ([0-9]+) confirmed commute trips from ([0-9]+) (users|testers and participants)\\nof ([0-9]+) total confirmed trips from ([0-9]+) (users|testers and participants) (\\(([0-9.]+|nan)%\\))', commute_quality_text)\n",
" stacked_bar_quality_text_commute_labeled = f\"{commute_labeled_match.group(1)} trips {commute_labeled_match.group(7)}\\n from {commute_labeled_match.group(2)} {commute_labeled_match.group(3)}\"\n",
"\n",
" commute_inferred_match = re.match(r'Based on ([0-9]+) confirmed commute trips from ([0-9]+) (users|testers and participants)\\nof ([0-9]+) total confirmed trips from ([0-9]+) (users|testers and participants) (\\(([0-9.]+|nan)%\\))', commute_quality_text_inferred)\n",
" stacked_bar_quality_text_commute_inferred = f\"{commute_inferred_match.group(1)} trips {commute_inferred_match.group(7)}\\n from {commute_inferred_match.group(2)} {commute_inferred_match.group(3)}\"\n",
"\n",
" # Plot entries\n",
" fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(15,2*1), sharex=True) \n",
" text_results = [\"Unmodified Alt Text\", \"Unmodified HTML\"]\n",
" fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(15,2*2), sharex=True) \n",
" text_results = [[\"Unmodified Alt Text\", \"Unmodified HTML\"], [\"Unmodified Alt Text\", \"Unmodified HTML\"]]\n",
" plot_and_text_stacked_bar_chart(expanded_ct_commute, lambda df: df.groupby(\"mode_confirm_w_other\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n",
" \"Labeled by user\\n (Confirmed trips)\", ax, text_results, colors_mode, debug_df, values_to_translations)\n",
" \"Labeled by user\\n (Confirmed trips)\", ax[0], text_results[0], colors_mode, debug_df, values_to_translations)\n",
" plot_and_text_stacked_bar_chart(expanded_ct_inferred_commute, lambda df: df.groupby(\"mode_confirm_w_other\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n",
" \"Labeled by user\\n (Confirmed trips)\", ax[1], text_results[1], colors_mode, debug_df_inferred, values_to_translations)\n",
" set_title_and_save(fig, text_results, plot_title, file_name)\n",
"except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n",
" plt.clf()\n",
Expand Down Expand Up @@ -290,10 +331,12 @@
"plot_title_no_quality=\"Number of trips for each purpose\"\n",
"file_name= f\"ntrips_purpose{file_suffix}\"\n",
"try:\n",
" fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(15,2*1), sharex=True)\n",
" text_results = [\"Unmodified Alt Text\", \"Unmodified HTML\"]\n",
" fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(15,2*2), sharex=True)\n",
" text_results = [[\"Unmodified Alt Text\", \"Unmodified HTML\"], [\"Unmodified Alt Text\", \"Unmodified HTML\"]]\n",
" plot_and_text_stacked_bar_chart(expanded_ct, lambda df: df.groupby(\"purpose_confirm_w_other\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n",
" \"Labeled by user\\n\"+stacked_bar_quality_text_labeled, ax, text_results, colors_purpose, debug_df, value_to_translations_purpose)\n",
" \"Labeled by user\\n\"+stacked_bar_quality_text_labeled, ax[0], text_results[0], colors_purpose, debug_df, value_to_translations_purpose)\n",
" plot_and_text_stacked_bar_chart(expanded_ct_inferred, lambda df: df.groupby(\"purpose_confirm_w_other\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n",
" \"Labeled and Inferred by OpenPATH\\n\"+stacked_bar_quality_text_inferred, ax[1], text_results[1], colors_purpose, debug_df_inferred)\n",
" set_title_and_save(fig, text_results, plot_title_no_quality, file_name)\n",
"except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n",
" plt.clf()\n",
Expand Down Expand Up @@ -337,17 +380,22 @@
" ## We do an existence check for the labeled df because we want to display the sensed value even if we don't have the labeled value\n",
" ## but we don't need to have an existence check for sensed because in that case we will have no data to display\n",
" expanded_ct_u80 = expanded_ct.loc[(expanded_ct['distance'] <= cutoff)] if \"mode_confirm_w_other\" in expanded_ct.columns else None\n",
" expanded_ct_inferred_u80 = expanded_ct_inferred.loc[(expanded_ct_inferred['distance'] <= cutoff)] if \"mode_confirm_w_other\" in expanded_ct_inferred.columns else None\n",
" expanded_ct_sensed_u80 = expanded_ct_sensed.loc[(expanded_ct_sensed['distance'] <= cutoff)]\n",
"\n",
" sensed_u80_quality_text = f\"{len(expanded_ct_sensed_u80)} trips ({round(len(expanded_ct_sensed_u80)/len(expanded_ct_sensed)*100)}% of all trips)\\nfrom {scaffolding.unique_users(expanded_ct_sensed_u80)} {sensed_match.group(3)}\"\n",
" labeled_u80_quality_text = f\"{len(expanded_ct_u80)} trips ({round(len(expanded_ct_u80)/len(expanded_ct)*100)}% of all labeled,\\n{round(len(expanded_ct_u80)/len(expanded_ct_sensed)*100)}% of all trips)\\nfrom {scaffolding.unique_users(expanded_ct_u80)} {sensed_match.group(3)}\" if \"Mode_confirm\" in expanded_ct.columns else \"0 labeled trips\"\n",
" \n",
" inferred_u80_quality_text = f\"{len(expanded_ct_inferred_u80)} trips ({round(len(expanded_ct_inferred_u80)/len(expanded_ct_inferred)*100)}% of all inferred,\\n{round(len(expanded_ct_inferred_u80)/len(expanded_ct_sensed)*100)}% of all trips)\\nfrom {scaffolding.unique_users(expanded_ct_inferred_u80)} {sensed_match.group(3)}\" if \"Mode_confirm\" in expanded_ct_inferred.columns else \"0 inferred trips\"\n",
"\n",
" # Plot entries\n",
" fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(15,2*2), sharex=True)\n",
" text_results = [[\"Unmodified Alt Text\", \"Unmodified HTML\"], [\"Unmodified Alt Text\", \"Unmodified HTML\"]]\n",
" fig, ax = plt.subplots(nrows=3, ncols=1, figsize=(15,3*2), sharex=True)\n",
" text_results = [[\"Unmodified Alt Text\", \"Unmodified HTML\"], [\"Unmodified Alt Text\", \"Unmodified HTML\"], [\"Unmodified Alt Text\", \"Unmodified HTML\"]]\n",
" plot_and_text_stacked_bar_chart(expanded_ct_u80, lambda df: df.groupby(\"mode_confirm_w_other\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n",
" \"Labeled by user\\n\"+labeled_u80_quality_text, ax[0], text_results[0], colors_mode, debug_df, values_to_translations)\n",
" plot_and_text_stacked_bar_chart(expanded_ct_inferred, lambda df: df.groupby(\"mode_confirm_w_other\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n",
" \"Labeled and Inferred by OpenPATH\\n\"+inferred_u80_quality_text, ax[1], text_results[1], colors_mode, debug_df_inferred, values_to_translations)\n",
" plot_and_text_stacked_bar_chart(expanded_ct_sensed_u80, lambda df: df.groupby(\"primary_mode\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n",
" \"Sensed by OpenPATH\\n\"+sensed_u80_quality_text, ax[1], text_results[1], colors_sensed, debug_df_sensed)\n",
" \"Sensed by OpenPATH\\n\"+sensed_u80_quality_text, ax[2], text_results[2], colors_sensed, debug_df_sensed)\n",
" set_title_and_save(fig, text_results, plot_title_no_quality, file_name)\n",
"except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n",
" # we can have an missing attribute error during the pre-procssing, in which case we should show the missing plot\n",
Expand Down Expand Up @@ -381,13 +429,15 @@
"file_name =f\"total_trip_length{file_suffix}\"\n",
"\n",
"try:\n",
" fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(15,2*2), sharex=True)\n",
" fig, ax = plt.subplots(nrows=3, ncols=1, figsize=(15,3*2), sharex=True)\n",
" \n",
" text_results = [[\"Unmodified Alt Text\", \"Unmodified HTML\"], [\"Unmodified Alt Text\", \"Unmodified HTML\"]]\n",
" text_results = [[\"Unmodified Alt Text\", \"Unmodified HTML\"], [\"Unmodified Alt Text\", \"Unmodified HTML\"], [\"Unmodified Alt Text\", \"Unmodified HTML\"]]\n",
" plot_and_text_stacked_bar_chart(expanded_ct, lambda df: df.groupby(\"mode_confirm_w_other\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n",
" \"Labeled by user\\n\"+stacked_bar_quality_text_labeled, ax[0], text_results[0], colors_mode, debug_df, values_to_translations)\n",
" plot_and_text_stacked_bar_chart(expanded_ct_inferred, lambda df: df.groupby(\"mode_confirm_w_other\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n",
" \"Labeled and Inferred by OpenPATH\\n\"+stacked_bar_quality_text_inferred, ax[1], text_results[1], colors_mode, debug_df_inferred, values_to_translations)\n",
" plot_and_text_stacked_bar_chart(expanded_ct_sensed, lambda df: df.groupby(\"primary_mode\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n",
" \"Sensed by OpenPATH\\n\"+stacked_bar_quality_text_sensed, ax[1], text_results[1], colors_sensed, debug_df_sensed)\n",
" \"Sensed by OpenPATH\\n\"+stacked_bar_quality_text_sensed, ax[2], text_results[2], colors_sensed, debug_df_sensed)\n",
" set_title_and_save(fig, text_results, plot_title_no_quality, file_name) \n",
"except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n",
" plt.clf()\n",
Expand Down Expand Up @@ -421,16 +471,20 @@
" ## We do an existence check for the labeled df because we want to display the sensed value even if we don't have the labeled value\n",
" ## but we don't need to have an existence check for sensed because in that case we will have no data to display\n",
" labeled_land_trips_df = expanded_ct[expanded_ct['mode_confirm_w_other'] != \"air\"] if \"mode_confirm_w_other\" in expanded_ct.columns else None\n",
" inferred_land_trips_df = expanded_ct_inferred[expanded_ct_inferred['mode_confirm_w_other'] != \"air\"] if \"mode_confirm_w_other\" in expanded_ct_inferred.columns else None\n",
" sensed_land_trips_df = expanded_ct_sensed[expanded_ct_sensed['primary_mode'] != \"AIR_OR_HSR\"]\n",
" \n",
" sensed_land_quality_text = f\"{len(sensed_land_trips_df)} trips ({round(len(sensed_land_trips_df)/len(expanded_ct_sensed)*100)}% of all trips)\\nfrom {scaffolding.unique_users(sensed_land_trips_df)} {sensed_match.group(3)}\"\n",
" labeled_land_quality_text = f\"{len(labeled_land_trips_df)} trips ({round(len(labeled_land_trips_df)/len(expanded_ct)*100)}% of all labeled,\\n{round(len(labeled_land_trips_df)/len(expanded_ct_sensed)*100)}%) of all trips)\\nfrom {scaffolding.unique_users(labeled_land_trips_df)} {sensed_match.group(3)}\" if \"mode_confirm_w_other\" in expanded_ct.columns else \"0 labeled trips\"\n",
"\n",
" fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(15,2*2), sharex=True)\n",
" labeled_land_quality_text = f\"{len(labeled_land_trips_df)} trips ({round(len(labeled_land_trips_df)/len(expanded_ct)*100)}% of all labeled,\\n{round(len(labeled_land_trips_df)/len(expanded_ct_sensed)*100)}%) of all trips)\\nfrom {scaffolding.unique_users(labeled_land_trips_df)} {sensed_match.group(3)}\" if \"Mode_confirm\" in expanded_ct.columns else \"0 labeled trips\"\n",
" inferred_land_quality_text = f\"{len(inferred_land_trips_df)} trips ({round(len(inferred_land_trips_df)/len(expanded_ct_inferred)*100)}% of all inferred,\\n{round(len(inferred_land_trips_df)/len(expanded_ct_sensed)*100)}%) of all trips)\\nfrom {scaffolding.unique_users(inferred_land_trips_df)} {sensed_match.group(3)}\" if \"Mode_confirm\" in expanded_ct_inferred.columns else \"0 inferred trips\"\n",
" \n",
" fig, ax = plt.subplots(nrows=3, ncols=1, figsize=(15,3*2), sharex=True)\n",
" plot_and_text_stacked_bar_chart(labeled_land_trips_df, lambda df: df.groupby(\"mode_confirm_w_other\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n",
" \"Labeled by user\\n\"+labeled_land_quality_text, ax[0], text_results[0], colors_mode, debug_df, values_to_translations)\n",
" plot_and_text_stacked_bar_chart(inferred_land_trips_df, lambda df: df.groupby(\"mode_confirm_w_other\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n",
" \"Labeled and Inferred by OpenPATH\\n\"+inferred_land_quality_text, ax[1], text_results[1], colors_mode, debug_df_inferred, values_to_translations)\n",
" plot_and_text_stacked_bar_chart(sensed_land_trips_df, lambda df: df.groupby(\"primary_mode\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n",
" \"Sensed by OpenPATH\\n\"+sensed_land_quality_text, ax[1], text_results[1], colors_sensed, debug_df_sensed)\n",
" \"Sensed by OpenPATH\\n\"+sensed_land_quality_text, ax[2], text_results[2], colors_sensed, debug_df_sensed)\n",
" set_title_and_save(fig, text_results, plot_title_no_quality, file_name) \n",
"except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n",
" plt.clf()\n",
Expand Down
Loading

0 comments on commit 88180dc

Please sign in to comment.