diff --git a/viz_scripts/energy_calculations.ipynb b/viz_scripts/energy_calculations.ipynb index 4095929..f287e8f 100644 --- a/viz_scripts/energy_calculations.ipynb +++ b/viz_scripts/energy_calculations.ipynb @@ -106,7 +106,7 @@ }, "outputs": [], "source": [ - "expanded_ct, file_suffix, quality_text, debug_df = scaffolding.load_viz_notebook_data(year,\n", + "expanded_ct, file_suffix, quality_text, debug_df = await scaffolding.load_viz_notebook_data(year,\n", " month,\n", " program,\n", " study_type,\n", diff --git a/viz_scripts/generic_metrics.ipynb b/viz_scripts/generic_metrics.ipynb index f93de0d..0e1f393 100644 --- a/viz_scripts/generic_metrics.ipynb +++ b/viz_scripts/generic_metrics.ipynb @@ -90,7 +90,8 @@ "metadata": {}, "outputs": [], "source": [ - "colors_mode, colors_purpose, colors_sensed = scaffolding.mapping_color_labels(dynamic_labels, dic_re, dic_pur)" + "colors_mode, colors_replaced, colors_purpose, colors_sensed, colors_ble = await scaffolding.mapping_color_labels(dynamic_labels)\n", + "values_to_translations, value_to_translations_purpose, values_to_translations_replaced = await scaffolding.translate_values_to_labels(dynamic_labels)" ] }, { @@ -108,7 +109,7 @@ "metadata": {}, "outputs": [], "source": [ - "expanded_ct, file_suffix, quality_text, debug_df = scaffolding.load_viz_notebook_data(year,\n", + "expanded_ct, file_suffix, quality_text, debug_df = await scaffolding.load_viz_notebook_data(year,\n", " month,\n", " program,\n", " study_type,\n", @@ -207,8 +208,8 @@ " # We will have text results corresponding to the axes for simplicity and consistency\n", " text_results = [[\"Unmodified Alt Text\", \"Unmodified HTML\"], [\"Unmodified Alt Text\", \"Unmodified HTML\"]]\n", " \n", - " plot_and_text_stacked_bar_chart(expanded_ct, lambda df: (df.groupby(\"Mode_confirm\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False)), \n", - " \"Labeled by user\\n\"+stacked_bar_quality_text_labeled, ax[0], text_results[0], colors_mode, debug_df)\n", + " plot_and_text_stacked_bar_chart(expanded_ct, lambda df: (df.groupby(\"mode_confirm_w_other\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False)), \n", + " \"Labeled by user\\n\"+stacked_bar_quality_text_labeled, ax[0], text_results[0], colors_mode, debug_df, values_to_translations)\n", " plot_and_text_stacked_bar_chart(expanded_ct_sensed, lambda df: (df.groupby(\"primary_mode\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False)), \n", " \"Sensed by OpenPATH\\n\"+stacked_bar_quality_text_sensed, ax[1], text_results[1], colors_sensed, debug_df_sensed)\n", " \n", @@ -258,8 +259,8 @@ " # Plot entries\n", " fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(15,2*1), sharex=True) \n", " text_results = [\"Unmodified Alt Text\", \"Unmodified HTML\"]\n", - " plot_and_text_stacked_bar_chart(expanded_ct_commute, lambda df: df.groupby(\"Mode_confirm\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n", - " \"Labeled by user\\n (Confirmed trips)\", ax, text_results, colors_mode, debug_df)\n", + " plot_and_text_stacked_bar_chart(expanded_ct_commute, lambda df: df.groupby(\"mode_confirm_w_other\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n", + " \"Labeled by user\\n (Confirmed trips)\", ax, text_results, colors_mode, debug_df, values_to_translations)\n", " set_title_and_save(fig, text_results, plot_title, file_name)\n", "except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n", " plt.clf()\n", @@ -291,8 +292,8 @@ "try:\n", " fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(15,2*1), sharex=True)\n", " text_results = [\"Unmodified Alt Text\", \"Unmodified HTML\"]\n", - " plot_and_text_stacked_bar_chart(expanded_ct, lambda df: df.groupby(\"Trip_purpose\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n", - " \"Labeled by user\\n\"+stacked_bar_quality_text_labeled, ax, text_results, colors_purpose, debug_df)\n", + " plot_and_text_stacked_bar_chart(expanded_ct, lambda df: df.groupby(\"purpose_confirm_w_other\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n", + " \"Labeled by user\\n\"+stacked_bar_quality_text_labeled, ax, text_results, colors_purpose, debug_df, value_to_translations_purpose)\n", " set_title_and_save(fig, text_results, plot_title_no_quality, file_name)\n", "except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n", " plt.clf()\n", @@ -335,7 +336,7 @@ "\n", " ## We do an existence check for the labeled df because we want to display the sensed value even if we don't have the labeled value\n", " ## but we don't need to have an existence check for sensed because in that case we will have no data to display\n", - " expanded_ct_u80 = expanded_ct.loc[(expanded_ct['distance'] <= cutoff)] if \"Mode_confirm\" in expanded_ct.columns else None\n", + " expanded_ct_u80 = expanded_ct.loc[(expanded_ct['distance'] <= cutoff)] if \"mode_confirm_w_other\" in expanded_ct.columns else None\n", " expanded_ct_sensed_u80 = expanded_ct_sensed.loc[(expanded_ct_sensed['distance'] <= cutoff)]\n", " sensed_u80_quality_text = f\"{len(expanded_ct_sensed_u80)} trips ({round(len(expanded_ct_sensed_u80)/len(expanded_ct_sensed)*100)}% of all trips)\\nfrom {scaffolding.unique_users(expanded_ct_sensed_u80)} {sensed_match.group(3)}\"\n", " labeled_u80_quality_text = f\"{len(expanded_ct_u80)} trips ({round(len(expanded_ct_u80)/len(expanded_ct)*100)}% of all labeled,\\n{round(len(expanded_ct_u80)/len(expanded_ct_sensed)*100)}% of all trips)\\nfrom {scaffolding.unique_users(expanded_ct_u80)} {sensed_match.group(3)}\" if \"Mode_confirm\" in expanded_ct.columns else \"0 labeled trips\"\n", @@ -343,8 +344,8 @@ " # Plot entries\n", " fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(15,2*2), sharex=True)\n", " text_results = [[\"Unmodified Alt Text\", \"Unmodified HTML\"], [\"Unmodified Alt Text\", \"Unmodified HTML\"]]\n", - " plot_and_text_stacked_bar_chart(expanded_ct_u80, lambda df: df.groupby(\"Mode_confirm\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n", - " \"Labeled by user\\n\"+labeled_u80_quality_text, ax[0], text_results[0], colors_mode, debug_df)\n", + " plot_and_text_stacked_bar_chart(expanded_ct_u80, lambda df: df.groupby(\"mode_confirm_w_other\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n", + " \"Labeled by user\\n\"+labeled_u80_quality_text, ax[0], text_results[0], colors_mode, debug_df, values_to_translations)\n", " plot_and_text_stacked_bar_chart(expanded_ct_sensed_u80, lambda df: df.groupby(\"primary_mode\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n", " \"Sensed by OpenPATH\\n\"+sensed_u80_quality_text, ax[1], text_results[1], colors_sensed, debug_df_sensed)\n", " set_title_and_save(fig, text_results, plot_title_no_quality, file_name)\n", @@ -383,8 +384,8 @@ " fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(15,2*2), sharex=True)\n", " \n", " text_results = [[\"Unmodified Alt Text\", \"Unmodified HTML\"], [\"Unmodified Alt Text\", \"Unmodified HTML\"]]\n", - " plot_and_text_stacked_bar_chart(expanded_ct, lambda df: df.groupby(\"Mode_confirm\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n", - " \"Labeled by user\\n\"+stacked_bar_quality_text_labeled, ax[0], text_results[0], colors_mode, debug_df)\n", + " plot_and_text_stacked_bar_chart(expanded_ct, lambda df: df.groupby(\"mode_confirm_w_other\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n", + " \"Labeled by user\\n\"+stacked_bar_quality_text_labeled, ax[0], text_results[0], colors_mode, debug_df, values_to_translations)\n", " plot_and_text_stacked_bar_chart(expanded_ct_sensed, lambda df: df.groupby(\"primary_mode\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n", " \"Sensed by OpenPATH\\n\"+stacked_bar_quality_text_sensed, ax[1], text_results[1], colors_sensed, debug_df_sensed)\n", " set_title_and_save(fig, text_results, plot_title_no_quality, file_name) \n", @@ -419,15 +420,15 @@ "try:\n", " ## We do an existence check for the labeled df because we want to display the sensed value even if we don't have the labeled value\n", " ## but we don't need to have an existence check for sensed because in that case we will have no data to display\n", - " labeled_land_trips_df = expanded_ct[expanded_ct['mode_confirm'] != \"air\"] if \"mode_confirm\" in expanded_ct.columns else None\n", + " labeled_land_trips_df = expanded_ct[expanded_ct['mode_confirm_w_other'] != \"air\"] if \"mode_confirm_w_other\" in expanded_ct.columns else None\n", " sensed_land_trips_df = expanded_ct_sensed[expanded_ct_sensed['primary_mode'] != \"AIR_OR_HSR\"]\n", " \n", " sensed_land_quality_text = f\"{len(sensed_land_trips_df)} trips ({round(len(sensed_land_trips_df)/len(expanded_ct_sensed)*100)}% of all trips)\\nfrom {scaffolding.unique_users(sensed_land_trips_df)} {sensed_match.group(3)}\"\n", - " labeled_land_quality_text = f\"{len(labeled_land_trips_df)} trips ({round(len(labeled_land_trips_df)/len(expanded_ct)*100)}% of all labeled,\\n{round(len(labeled_land_trips_df)/len(expanded_ct_sensed)*100)}%) of all trips)\\nfrom {scaffolding.unique_users(labeled_land_trips_df)} {sensed_match.group(3)}\" if \"Mode_confirm\" in expanded_ct.columns else \"0 labeled trips\"\n", + " labeled_land_quality_text = f\"{len(labeled_land_trips_df)} trips ({round(len(labeled_land_trips_df)/len(expanded_ct)*100)}% of all labeled,\\n{round(len(labeled_land_trips_df)/len(expanded_ct_sensed)*100)}%) of all trips)\\nfrom {scaffolding.unique_users(labeled_land_trips_df)} {sensed_match.group(3)}\" if \"mode_confirm_w_other\" in expanded_ct.columns else \"0 labeled trips\"\n", "\n", " fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(15,2*2), sharex=True)\n", - " plot_and_text_stacked_bar_chart(labeled_land_trips_df, lambda df: df.groupby(\"Mode_confirm\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n", - " \"Labeled by user\\n\"+labeled_land_quality_text, ax[0], text_results[0], colors_mode, debug_df)\n", + " plot_and_text_stacked_bar_chart(labeled_land_trips_df, lambda df: df.groupby(\"mode_confirm_w_other\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n", + " \"Labeled by user\\n\"+labeled_land_quality_text, ax[0], text_results[0], colors_mode, debug_df, values_to_translations)\n", " plot_and_text_stacked_bar_chart(sensed_land_trips_df, lambda df: df.groupby(\"primary_mode\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n", " \"Sensed by OpenPATH\\n\"+sensed_land_quality_text, ax[1], text_results[1], colors_sensed, debug_df_sensed)\n", " set_title_and_save(fig, text_results, plot_title_no_quality, file_name) \n", diff --git a/viz_scripts/generic_metrics_sensed.ipynb b/viz_scripts/generic_metrics_sensed.ipynb index 705c32f..dca74d2 100644 --- a/viz_scripts/generic_metrics_sensed.ipynb +++ b/viz_scripts/generic_metrics_sensed.ipynb @@ -96,8 +96,8 @@ " expanded_ct[\"primary_mode\"] = expanded_ct.ble_sensed_summary.apply(lambda md: max(md[\"distance\"], key=md[\"distance\"].get))\n", " unique_keys = expanded_ct.groupby(\"primary_mode\").agg({distance_col: \"count\"}).index\n", " print(unique_keys)\n", - " colors_mode, colors_purpose, colors_sensed = scaffolding.mapping_color_labels({}, dict(zip(unique_keys, unique_keys)), {})\n", - " colors_sensed = colors_mode\n", + " colors_mode, colors_replaced, colors_purpose, colors_sensed, colors_ble = await scaffolding.mapping_color_labels(unique_keys) #Extract ble color mapping\n", + " colors_sensed = colors_ble\n", "except ValueError as e:\n", " print(\"Got ValueError \", e)" ] diff --git a/viz_scripts/generic_timeseries.ipynb b/viz_scripts/generic_timeseries.ipynb index 189afdf..63ef911 100644 --- a/viz_scripts/generic_timeseries.ipynb +++ b/viz_scripts/generic_timeseries.ipynb @@ -87,7 +87,7 @@ }, "outputs": [], "source": [ - "expanded_ct, file_suffix, quality_text, debug_df = scaffolding.load_viz_notebook_data(year,\n", + "expanded_ct, file_suffix, quality_text, debug_df = await scaffolding.load_viz_notebook_data(year,\n", " month,\n", " program,\n", " study_type,\n", diff --git a/viz_scripts/mode_specific_metrics.ipynb b/viz_scripts/mode_specific_metrics.ipynb index 0e2e63d..dc62074 100644 --- a/viz_scripts/mode_specific_metrics.ipynb +++ b/viz_scripts/mode_specific_metrics.ipynb @@ -103,7 +103,8 @@ "metadata": {}, "outputs": [], "source": [ - "colors_mode, colors_purpose, colors_sensed = scaffolding.mapping_color_labels(dynamic_labels, dic_re, dic_pur)" + "colors_mode, colors_replaced, colors_purpose, colors_sensed, colors_ble = await scaffolding.mapping_color_labels(dynamic_labels)\n", + "values_to_translations, value_to_translations_purpose, value_to_translations_replaced = await scaffolding.translate_values_to_labels(dynamic_labels)" ] }, { @@ -121,7 +122,7 @@ "metadata": {}, "outputs": [], "source": [ - "expanded_ct, file_suffix, quality_text, debug_df = scaffolding.load_viz_notebook_data(year,\n", + "expanded_ct, file_suffix, quality_text, debug_df = await scaffolding.load_viz_notebook_data(year,\n", " month,\n", " program,\n", " study_type,\n", @@ -193,8 +194,8 @@ "try:\n", " fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(15,2*1), sharex=True)\n", " text_results = [\"Unmodified Alt Text\", \"Unmodified HTML\"]\n", - " plot_and_text_stacked_bar_chart(data_eb, lambda df: df.groupby(\"Trip_purpose\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False),\n", - " f\"Labeled `{mode_of_interest}` by user\", ax, text_results, colors_purpose, debug_df)\n", + " plot_and_text_stacked_bar_chart(data_eb, lambda df: df.groupby(\"purpose_confirm_w_other\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False),\n", + " f\"Labeled `{mode_of_interest}` by user\", ax, text_results, colors_purpose, debug_df, value_to_translations_purpose)\n", " plot_title = plot_title_no_quality + \"\\n\" + f\"For {mode_of_interest}: \" + quality_text\n", " set_title_and_save(fig, text_results, plot_title, file_name)\n", "except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n", @@ -228,8 +229,8 @@ "try:\n", " fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(15,2*1), sharex=True)\n", " text_results = [\"Unmodified Alt Text\", \"Unmodified HTML\"]\n", - " plot_and_text_stacked_bar_chart(data_eb, lambda df: df.groupby(\"Replaced_mode\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n", - " \"Labeled by user\\n (Trip distance)\", ax, text_results, colors_mode, debug_df)\n", + " plot_and_text_stacked_bar_chart(data_eb, lambda df: df.groupby(\"replaced_mode_w_other\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n", + " \"Labeled by user\\n (Trip distance)\", ax, text_results, colors_replaced, debug_df, value_to_translations_replaced)\n", " plot_title = plot_title_no_quality + \"\\n\" + f\"For {mode_of_interest}: \" + quality_text\n", " set_title_and_save(fig, text_results, plot_title, file_name)\n", "except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n", @@ -263,8 +264,8 @@ "try:\n", " fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(15,2*1), sharex=True)\n", " text_results = [\"Unmodified Alt Text\", \"Unmodified HTML\"]\n", - " plot_and_text_stacked_bar_chart(data_eb, lambda df: df.groupby(\"Replaced_mode\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n", - " f\"Labeled `{mode_of_interest}` by user\", ax, text_results, colors_mode, debug_df)\n", + " plot_and_text_stacked_bar_chart(data_eb, lambda df: df.groupby(\"replaced_mode_w_other\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n", + " f\"Labeled `{mode_of_interest}` by user\", ax, text_results, colors_replaced, debug_df, value_to_translations_replaced)\n", " plot_title = plot_title_no_quality + \"\\n\" + f\"For {mode_of_interest}: \" + quality_text\n", " set_title_and_save(fig, text_results, plot_title, file_name)\n", "except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n", diff --git a/viz_scripts/mode_specific_timeseries.ipynb b/viz_scripts/mode_specific_timeseries.ipynb index d696794..58ee56d 100644 --- a/viz_scripts/mode_specific_timeseries.ipynb +++ b/viz_scripts/mode_specific_timeseries.ipynb @@ -96,7 +96,7 @@ "metadata": {}, "outputs": [], "source": [ - "expanded_ct, file_suffix, quality_text, debug_df = scaffolding.load_viz_notebook_data(year,\n", + "expanded_ct, file_suffix, quality_text, debug_df = await scaffolding.load_viz_notebook_data(year,\n", " month,\n", " program,\n", " study_type,\n", diff --git a/viz_scripts/plots.py b/viz_scripts/plots.py index 9879ba8..34e26bc 100644 --- a/viz_scripts/plots.py +++ b/viz_scripts/plots.py @@ -63,16 +63,16 @@ def merge_small_entries(labels, values): else: #assuming labeled or inferred # This part if a bit tricky # We could have already had a non-zero other, and it could be small or large - if "Other" not in v2l_df.index: + if "other" not in v2l_df.index: # zero other will end up with misc_count if misc_count.vals > 0: - v2l_df.loc["Other"] = misc_count - elif "Other" in small_chunk.index: + v2l_df.loc["other"] = misc_count + elif "other" in small_chunk.index: # non-zero small other will already be in misc_count - v2l_df.loc["Other"] = misc_count + v2l_df.loc["other"] = misc_count else: # non-zero large other, will not already be in misc_count - v2l_df.loc["Other"] = v2l_df.loc["Other"] + misc_count + v2l_df.loc["other"] = v2l_df.loc["other"] + misc_count disp.display(v2l_df) @@ -111,7 +111,7 @@ def plot_and_text_error(e, ax, file_name): return alt_text, alt_html # Creates/ Appends single bar to the 100% Stacked Bar Chart -def plot_and_text_stacked_bar_chart(df, agg_fcn, bar_label, ax, text_result, colors, debug_df): +def plot_and_text_stacked_bar_chart(df, agg_fcn, bar_label, ax, text_result, colors, debug_df, values_to_translations={}): """ Inputs: df = Data frame corresponding to the bar in a stacked bar chart. It is expected to have three columns, which represent the 'label', 'value' @@ -145,7 +145,7 @@ def plot_and_text_stacked_bar_chart(df, agg_fcn, bar_label, ax, text_result, col mode_prop = long['Proportion'] mode_count = long['Value'] vals_str = [f'{y:.1f} %\n({x:.0f})' if y > 4 else '' for x, y in zip(mode_count, mode_prop)] - bar = ax.barh(y=bar_label, width=mode_prop, height=bar_height, left=bar_width, label=label, color=colors[label]) + bar = ax.barh(y=bar_label, width=mode_prop, height=bar_height, left=bar_width, label=values_to_translations.get(label, label), color=colors[label]) ax.bar_label(bar, label_type='center', labels=vals_str, rotation=90, fontsize=16) bar_width = [total + val for total, val in zip(bar_width, mode_prop)] else: @@ -158,9 +158,10 @@ def plot_and_text_stacked_bar_chart(df, agg_fcn, bar_label, ax, text_result, col # Fix for the error: RuntimeError("Unknown return type"), adding the below line to address as mentioned here https://github.com/matplotlib/matplotlib/issues/25625/ ax.set_xlim(right=ax.get_xlim()[1] + 1.0, auto=True) - text_result[0], text_result[1] = store_alt_text_and_html_stacked_bar_chart(df_all_entries, bar_label) + text_result[0], text_result[1] = store_alt_text_and_html_stacked_bar_chart(df_all_entries, bar_label, values_to_translations) print("After populating, %s" % text_result) except Exception as e: + print(e) # tb.print_exception(type(e), e, e.__traceback__) #ax.set_title("Insufficient data", loc="center") ax.set_ylabel(bar_label) @@ -456,7 +457,7 @@ def access_alt_html(html_content, chart_name): return html_content # Appends bar information into into the alt_html -def store_alt_text_and_html_stacked_bar_chart(df, var_name): +def store_alt_text_and_html_stacked_bar_chart(df, var_name, values_to_translations): """ Inputs: df = dataframe combining columns as Trip Type, Label, Value, Proportion chart_name = name of the chart @@ -464,12 +465,12 @@ def store_alt_text_and_html_stacked_bar_chart(df, var_name): # Generate alt text file alt_text = f"\nStacked Bar of: {var_name}\n" for i in range(len(df)): - alt_text += f"{df['Label'].iloc[i]} is {df['Value'].iloc[i]}({df['Proportion'].iloc[i]}%).\n" + alt_text += f"{values_to_translations.get(df['Label'].iloc[i], df['Label'].iloc[i])} is {df['Value'].iloc[i]}({df['Proportion'].iloc[i]}%).\n" # Generate html table alt_html = "\n" for i in range(len(df)): - alt_html += f"
Trip Type: {var_name}