diff --git a/viz_scripts/energy_calculations.ipynb b/viz_scripts/energy_calculations.ipynb index 4095929..f287e8f 100644 --- a/viz_scripts/energy_calculations.ipynb +++ b/viz_scripts/energy_calculations.ipynb @@ -106,7 +106,7 @@ }, "outputs": [], "source": [ - "expanded_ct, file_suffix, quality_text, debug_df = scaffolding.load_viz_notebook_data(year,\n", + "expanded_ct, file_suffix, quality_text, debug_df = await scaffolding.load_viz_notebook_data(year,\n", " month,\n", " program,\n", " study_type,\n", diff --git a/viz_scripts/generic_metrics.ipynb b/viz_scripts/generic_metrics.ipynb index f93de0d..0e1f393 100644 --- a/viz_scripts/generic_metrics.ipynb +++ b/viz_scripts/generic_metrics.ipynb @@ -90,7 +90,8 @@ "metadata": {}, "outputs": [], "source": [ - "colors_mode, colors_purpose, colors_sensed = scaffolding.mapping_color_labels(dynamic_labels, dic_re, dic_pur)" + "colors_mode, colors_replaced, colors_purpose, colors_sensed, colors_ble = await scaffolding.mapping_color_labels(dynamic_labels)\n", + "values_to_translations, value_to_translations_purpose, values_to_translations_replaced = await scaffolding.translate_values_to_labels(dynamic_labels)" ] }, { @@ -108,7 +109,7 @@ "metadata": {}, "outputs": [], "source": [ - "expanded_ct, file_suffix, quality_text, debug_df = scaffolding.load_viz_notebook_data(year,\n", + "expanded_ct, file_suffix, quality_text, debug_df = await scaffolding.load_viz_notebook_data(year,\n", " month,\n", " program,\n", " study_type,\n", @@ -207,8 +208,8 @@ " # We will have text results corresponding to the axes for simplicity and consistency\n", " text_results = [[\"Unmodified Alt Text\", \"Unmodified HTML\"], [\"Unmodified Alt Text\", \"Unmodified HTML\"]]\n", " \n", - " plot_and_text_stacked_bar_chart(expanded_ct, lambda df: (df.groupby(\"Mode_confirm\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False)), \n", - " \"Labeled by user\\n\"+stacked_bar_quality_text_labeled, ax[0], text_results[0], colors_mode, debug_df)\n", + " plot_and_text_stacked_bar_chart(expanded_ct, lambda df: (df.groupby(\"mode_confirm_w_other\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False)), \n", + " \"Labeled by user\\n\"+stacked_bar_quality_text_labeled, ax[0], text_results[0], colors_mode, debug_df, values_to_translations)\n", " plot_and_text_stacked_bar_chart(expanded_ct_sensed, lambda df: (df.groupby(\"primary_mode\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False)), \n", " \"Sensed by OpenPATH\\n\"+stacked_bar_quality_text_sensed, ax[1], text_results[1], colors_sensed, debug_df_sensed)\n", " \n", @@ -258,8 +259,8 @@ " # Plot entries\n", " fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(15,2*1), sharex=True) \n", " text_results = [\"Unmodified Alt Text\", \"Unmodified HTML\"]\n", - " plot_and_text_stacked_bar_chart(expanded_ct_commute, lambda df: df.groupby(\"Mode_confirm\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n", - " \"Labeled by user\\n (Confirmed trips)\", ax, text_results, colors_mode, debug_df)\n", + " plot_and_text_stacked_bar_chart(expanded_ct_commute, lambda df: df.groupby(\"mode_confirm_w_other\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n", + " \"Labeled by user\\n (Confirmed trips)\", ax, text_results, colors_mode, debug_df, values_to_translations)\n", " set_title_and_save(fig, text_results, plot_title, file_name)\n", "except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n", " plt.clf()\n", @@ -291,8 +292,8 @@ "try:\n", " fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(15,2*1), sharex=True)\n", " text_results = [\"Unmodified Alt Text\", \"Unmodified HTML\"]\n", - " plot_and_text_stacked_bar_chart(expanded_ct, lambda df: df.groupby(\"Trip_purpose\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n", - " \"Labeled by user\\n\"+stacked_bar_quality_text_labeled, ax, text_results, colors_purpose, debug_df)\n", + " plot_and_text_stacked_bar_chart(expanded_ct, lambda df: df.groupby(\"purpose_confirm_w_other\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n", + " \"Labeled by user\\n\"+stacked_bar_quality_text_labeled, ax, text_results, colors_purpose, debug_df, value_to_translations_purpose)\n", " set_title_and_save(fig, text_results, plot_title_no_quality, file_name)\n", "except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n", " plt.clf()\n", @@ -335,7 +336,7 @@ "\n", " ## We do an existence check for the labeled df because we want to display the sensed value even if we don't have the labeled value\n", " ## but we don't need to have an existence check for sensed because in that case we will have no data to display\n", - " expanded_ct_u80 = expanded_ct.loc[(expanded_ct['distance'] <= cutoff)] if \"Mode_confirm\" in expanded_ct.columns else None\n", + " expanded_ct_u80 = expanded_ct.loc[(expanded_ct['distance'] <= cutoff)] if \"mode_confirm_w_other\" in expanded_ct.columns else None\n", " expanded_ct_sensed_u80 = expanded_ct_sensed.loc[(expanded_ct_sensed['distance'] <= cutoff)]\n", " sensed_u80_quality_text = f\"{len(expanded_ct_sensed_u80)} trips ({round(len(expanded_ct_sensed_u80)/len(expanded_ct_sensed)*100)}% of all trips)\\nfrom {scaffolding.unique_users(expanded_ct_sensed_u80)} {sensed_match.group(3)}\"\n", " labeled_u80_quality_text = f\"{len(expanded_ct_u80)} trips ({round(len(expanded_ct_u80)/len(expanded_ct)*100)}% of all labeled,\\n{round(len(expanded_ct_u80)/len(expanded_ct_sensed)*100)}% of all trips)\\nfrom {scaffolding.unique_users(expanded_ct_u80)} {sensed_match.group(3)}\" if \"Mode_confirm\" in expanded_ct.columns else \"0 labeled trips\"\n", @@ -343,8 +344,8 @@ " # Plot entries\n", " fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(15,2*2), sharex=True)\n", " text_results = [[\"Unmodified Alt Text\", \"Unmodified HTML\"], [\"Unmodified Alt Text\", \"Unmodified HTML\"]]\n", - " plot_and_text_stacked_bar_chart(expanded_ct_u80, lambda df: df.groupby(\"Mode_confirm\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n", - " \"Labeled by user\\n\"+labeled_u80_quality_text, ax[0], text_results[0], colors_mode, debug_df)\n", + " plot_and_text_stacked_bar_chart(expanded_ct_u80, lambda df: df.groupby(\"mode_confirm_w_other\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n", + " \"Labeled by user\\n\"+labeled_u80_quality_text, ax[0], text_results[0], colors_mode, debug_df, values_to_translations)\n", " plot_and_text_stacked_bar_chart(expanded_ct_sensed_u80, lambda df: df.groupby(\"primary_mode\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n", " \"Sensed by OpenPATH\\n\"+sensed_u80_quality_text, ax[1], text_results[1], colors_sensed, debug_df_sensed)\n", " set_title_and_save(fig, text_results, plot_title_no_quality, file_name)\n", @@ -383,8 +384,8 @@ " fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(15,2*2), sharex=True)\n", " \n", " text_results = [[\"Unmodified Alt Text\", \"Unmodified HTML\"], [\"Unmodified Alt Text\", \"Unmodified HTML\"]]\n", - " plot_and_text_stacked_bar_chart(expanded_ct, lambda df: df.groupby(\"Mode_confirm\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n", - " \"Labeled by user\\n\"+stacked_bar_quality_text_labeled, ax[0], text_results[0], colors_mode, debug_df)\n", + " plot_and_text_stacked_bar_chart(expanded_ct, lambda df: df.groupby(\"mode_confirm_w_other\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n", + " \"Labeled by user\\n\"+stacked_bar_quality_text_labeled, ax[0], text_results[0], colors_mode, debug_df, values_to_translations)\n", " plot_and_text_stacked_bar_chart(expanded_ct_sensed, lambda df: df.groupby(\"primary_mode\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n", " \"Sensed by OpenPATH\\n\"+stacked_bar_quality_text_sensed, ax[1], text_results[1], colors_sensed, debug_df_sensed)\n", " set_title_and_save(fig, text_results, plot_title_no_quality, file_name) \n", @@ -419,15 +420,15 @@ "try:\n", " ## We do an existence check for the labeled df because we want to display the sensed value even if we don't have the labeled value\n", " ## but we don't need to have an existence check for sensed because in that case we will have no data to display\n", - " labeled_land_trips_df = expanded_ct[expanded_ct['mode_confirm'] != \"air\"] if \"mode_confirm\" in expanded_ct.columns else None\n", + " labeled_land_trips_df = expanded_ct[expanded_ct['mode_confirm_w_other'] != \"air\"] if \"mode_confirm_w_other\" in expanded_ct.columns else None\n", " sensed_land_trips_df = expanded_ct_sensed[expanded_ct_sensed['primary_mode'] != \"AIR_OR_HSR\"]\n", " \n", " sensed_land_quality_text = f\"{len(sensed_land_trips_df)} trips ({round(len(sensed_land_trips_df)/len(expanded_ct_sensed)*100)}% of all trips)\\nfrom {scaffolding.unique_users(sensed_land_trips_df)} {sensed_match.group(3)}\"\n", - " labeled_land_quality_text = f\"{len(labeled_land_trips_df)} trips ({round(len(labeled_land_trips_df)/len(expanded_ct)*100)}% of all labeled,\\n{round(len(labeled_land_trips_df)/len(expanded_ct_sensed)*100)}%) of all trips)\\nfrom {scaffolding.unique_users(labeled_land_trips_df)} {sensed_match.group(3)}\" if \"Mode_confirm\" in expanded_ct.columns else \"0 labeled trips\"\n", + " labeled_land_quality_text = f\"{len(labeled_land_trips_df)} trips ({round(len(labeled_land_trips_df)/len(expanded_ct)*100)}% of all labeled,\\n{round(len(labeled_land_trips_df)/len(expanded_ct_sensed)*100)}%) of all trips)\\nfrom {scaffolding.unique_users(labeled_land_trips_df)} {sensed_match.group(3)}\" if \"mode_confirm_w_other\" in expanded_ct.columns else \"0 labeled trips\"\n", "\n", " fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(15,2*2), sharex=True)\n", - " plot_and_text_stacked_bar_chart(labeled_land_trips_df, lambda df: df.groupby(\"Mode_confirm\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n", - " \"Labeled by user\\n\"+labeled_land_quality_text, ax[0], text_results[0], colors_mode, debug_df)\n", + " plot_and_text_stacked_bar_chart(labeled_land_trips_df, lambda df: df.groupby(\"mode_confirm_w_other\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n", + " \"Labeled by user\\n\"+labeled_land_quality_text, ax[0], text_results[0], colors_mode, debug_df, values_to_translations)\n", " plot_and_text_stacked_bar_chart(sensed_land_trips_df, lambda df: df.groupby(\"primary_mode\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n", " \"Sensed by OpenPATH\\n\"+sensed_land_quality_text, ax[1], text_results[1], colors_sensed, debug_df_sensed)\n", " set_title_and_save(fig, text_results, plot_title_no_quality, file_name) \n", diff --git a/viz_scripts/generic_metrics_sensed.ipynb b/viz_scripts/generic_metrics_sensed.ipynb index 705c32f..dca74d2 100644 --- a/viz_scripts/generic_metrics_sensed.ipynb +++ b/viz_scripts/generic_metrics_sensed.ipynb @@ -96,8 +96,8 @@ " expanded_ct[\"primary_mode\"] = expanded_ct.ble_sensed_summary.apply(lambda md: max(md[\"distance\"], key=md[\"distance\"].get))\n", " unique_keys = expanded_ct.groupby(\"primary_mode\").agg({distance_col: \"count\"}).index\n", " print(unique_keys)\n", - " colors_mode, colors_purpose, colors_sensed = scaffolding.mapping_color_labels({}, dict(zip(unique_keys, unique_keys)), {})\n", - " colors_sensed = colors_mode\n", + " colors_mode, colors_replaced, colors_purpose, colors_sensed, colors_ble = await scaffolding.mapping_color_labels(unique_keys) #Extract ble color mapping\n", + " colors_sensed = colors_ble\n", "except ValueError as e:\n", " print(\"Got ValueError \", e)" ] diff --git a/viz_scripts/generic_timeseries.ipynb b/viz_scripts/generic_timeseries.ipynb index 189afdf..63ef911 100644 --- a/viz_scripts/generic_timeseries.ipynb +++ b/viz_scripts/generic_timeseries.ipynb @@ -87,7 +87,7 @@ }, "outputs": [], "source": [ - "expanded_ct, file_suffix, quality_text, debug_df = scaffolding.load_viz_notebook_data(year,\n", + "expanded_ct, file_suffix, quality_text, debug_df = await scaffolding.load_viz_notebook_data(year,\n", " month,\n", " program,\n", " study_type,\n", diff --git a/viz_scripts/mode_specific_metrics.ipynb b/viz_scripts/mode_specific_metrics.ipynb index 0e2e63d..dc62074 100644 --- a/viz_scripts/mode_specific_metrics.ipynb +++ b/viz_scripts/mode_specific_metrics.ipynb @@ -103,7 +103,8 @@ "metadata": {}, "outputs": [], "source": [ - "colors_mode, colors_purpose, colors_sensed = scaffolding.mapping_color_labels(dynamic_labels, dic_re, dic_pur)" + "colors_mode, colors_replaced, colors_purpose, colors_sensed, colors_ble = await scaffolding.mapping_color_labels(dynamic_labels)\n", + "values_to_translations, value_to_translations_purpose, value_to_translations_replaced = await scaffolding.translate_values_to_labels(dynamic_labels)" ] }, { @@ -121,7 +122,7 @@ "metadata": {}, "outputs": [], "source": [ - "expanded_ct, file_suffix, quality_text, debug_df = scaffolding.load_viz_notebook_data(year,\n", + "expanded_ct, file_suffix, quality_text, debug_df = await scaffolding.load_viz_notebook_data(year,\n", " month,\n", " program,\n", " study_type,\n", @@ -193,8 +194,8 @@ "try:\n", " fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(15,2*1), sharex=True)\n", " text_results = [\"Unmodified Alt Text\", \"Unmodified HTML\"]\n", - " plot_and_text_stacked_bar_chart(data_eb, lambda df: df.groupby(\"Trip_purpose\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False),\n", - " f\"Labeled `{mode_of_interest}` by user\", ax, text_results, colors_purpose, debug_df)\n", + " plot_and_text_stacked_bar_chart(data_eb, lambda df: df.groupby(\"purpose_confirm_w_other\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False),\n", + " f\"Labeled `{mode_of_interest}` by user\", ax, text_results, colors_purpose, debug_df, value_to_translations_purpose)\n", " plot_title = plot_title_no_quality + \"\\n\" + f\"For {mode_of_interest}: \" + quality_text\n", " set_title_and_save(fig, text_results, plot_title, file_name)\n", "except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n", @@ -228,8 +229,8 @@ "try:\n", " fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(15,2*1), sharex=True)\n", " text_results = [\"Unmodified Alt Text\", \"Unmodified HTML\"]\n", - " plot_and_text_stacked_bar_chart(data_eb, lambda df: df.groupby(\"Replaced_mode\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n", - " \"Labeled by user\\n (Trip distance)\", ax, text_results, colors_mode, debug_df)\n", + " plot_and_text_stacked_bar_chart(data_eb, lambda df: df.groupby(\"replaced_mode_w_other\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n", + " \"Labeled by user\\n (Trip distance)\", ax, text_results, colors_replaced, debug_df, value_to_translations_replaced)\n", " plot_title = plot_title_no_quality + \"\\n\" + f\"For {mode_of_interest}: \" + quality_text\n", " set_title_and_save(fig, text_results, plot_title, file_name)\n", "except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n", @@ -263,8 +264,8 @@ "try:\n", " fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(15,2*1), sharex=True)\n", " text_results = [\"Unmodified Alt Text\", \"Unmodified HTML\"]\n", - " plot_and_text_stacked_bar_chart(data_eb, lambda df: df.groupby(\"Replaced_mode\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n", - " f\"Labeled `{mode_of_interest}` by user\", ax, text_results, colors_mode, debug_df)\n", + " plot_and_text_stacked_bar_chart(data_eb, lambda df: df.groupby(\"replaced_mode_w_other\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n", + " f\"Labeled `{mode_of_interest}` by user\", ax, text_results, colors_replaced, debug_df, value_to_translations_replaced)\n", " plot_title = plot_title_no_quality + \"\\n\" + f\"For {mode_of_interest}: \" + quality_text\n", " set_title_and_save(fig, text_results, plot_title, file_name)\n", "except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n", diff --git a/viz_scripts/mode_specific_timeseries.ipynb b/viz_scripts/mode_specific_timeseries.ipynb index d696794..58ee56d 100644 --- a/viz_scripts/mode_specific_timeseries.ipynb +++ b/viz_scripts/mode_specific_timeseries.ipynb @@ -96,7 +96,7 @@ "metadata": {}, "outputs": [], "source": [ - "expanded_ct, file_suffix, quality_text, debug_df = scaffolding.load_viz_notebook_data(year,\n", + "expanded_ct, file_suffix, quality_text, debug_df = await scaffolding.load_viz_notebook_data(year,\n", " month,\n", " program,\n", " study_type,\n", diff --git a/viz_scripts/plots.py b/viz_scripts/plots.py index 9879ba8..34e26bc 100644 --- a/viz_scripts/plots.py +++ b/viz_scripts/plots.py @@ -63,16 +63,16 @@ def merge_small_entries(labels, values): else: #assuming labeled or inferred # This part if a bit tricky # We could have already had a non-zero other, and it could be small or large - if "Other" not in v2l_df.index: + if "other" not in v2l_df.index: # zero other will end up with misc_count if misc_count.vals > 0: - v2l_df.loc["Other"] = misc_count - elif "Other" in small_chunk.index: + v2l_df.loc["other"] = misc_count + elif "other" in small_chunk.index: # non-zero small other will already be in misc_count - v2l_df.loc["Other"] = misc_count + v2l_df.loc["other"] = misc_count else: # non-zero large other, will not already be in misc_count - v2l_df.loc["Other"] = v2l_df.loc["Other"] + misc_count + v2l_df.loc["other"] = v2l_df.loc["other"] + misc_count disp.display(v2l_df) @@ -111,7 +111,7 @@ def plot_and_text_error(e, ax, file_name): return alt_text, alt_html # Creates/ Appends single bar to the 100% Stacked Bar Chart -def plot_and_text_stacked_bar_chart(df, agg_fcn, bar_label, ax, text_result, colors, debug_df): +def plot_and_text_stacked_bar_chart(df, agg_fcn, bar_label, ax, text_result, colors, debug_df, values_to_translations={}): """ Inputs: df = Data frame corresponding to the bar in a stacked bar chart. It is expected to have three columns, which represent the 'label', 'value' @@ -145,7 +145,7 @@ def plot_and_text_stacked_bar_chart(df, agg_fcn, bar_label, ax, text_result, col mode_prop = long['Proportion'] mode_count = long['Value'] vals_str = [f'{y:.1f} %\n({x:.0f})' if y > 4 else '' for x, y in zip(mode_count, mode_prop)] - bar = ax.barh(y=bar_label, width=mode_prop, height=bar_height, left=bar_width, label=label, color=colors[label]) + bar = ax.barh(y=bar_label, width=mode_prop, height=bar_height, left=bar_width, label=values_to_translations.get(label, label), color=colors[label]) ax.bar_label(bar, label_type='center', labels=vals_str, rotation=90, fontsize=16) bar_width = [total + val for total, val in zip(bar_width, mode_prop)] else: @@ -158,9 +158,10 @@ def plot_and_text_stacked_bar_chart(df, agg_fcn, bar_label, ax, text_result, col # Fix for the error: RuntimeError("Unknown return type"), adding the below line to address as mentioned here https://github.com/matplotlib/matplotlib/issues/25625/ ax.set_xlim(right=ax.get_xlim()[1] + 1.0, auto=True) - text_result[0], text_result[1] = store_alt_text_and_html_stacked_bar_chart(df_all_entries, bar_label) + text_result[0], text_result[1] = store_alt_text_and_html_stacked_bar_chart(df_all_entries, bar_label, values_to_translations) print("After populating, %s" % text_result) except Exception as e: + print(e) # tb.print_exception(type(e), e, e.__traceback__) #ax.set_title("Insufficient data", loc="center") ax.set_ylabel(bar_label) @@ -456,7 +457,7 @@ def access_alt_html(html_content, chart_name): return html_content # Appends bar information into into the alt_html -def store_alt_text_and_html_stacked_bar_chart(df, var_name): +def store_alt_text_and_html_stacked_bar_chart(df, var_name, values_to_translations): """ Inputs: df = dataframe combining columns as Trip Type, Label, Value, Proportion chart_name = name of the chart @@ -464,12 +465,12 @@ def store_alt_text_and_html_stacked_bar_chart(df, var_name): # Generate alt text file alt_text = f"\nStacked Bar of: {var_name}\n" for i in range(len(df)): - alt_text += f"{df['Label'].iloc[i]} is {df['Value'].iloc[i]}({df['Proportion'].iloc[i]}%).\n" + alt_text += f"{values_to_translations.get(df['Label'].iloc[i], df['Label'].iloc[i])} is {df['Value'].iloc[i]}({df['Proportion'].iloc[i]}%).\n" # Generate html table alt_html = "\n" for i in range(len(df)): - alt_html += f"{df['Label'].iloc[i]}{df['Value'].iloc[i]}{df['Proportion'].iloc[i]}%" + alt_html += f"{values_to_translations.get(df['Label'].iloc[i], df['Label'].iloc[i])}{df['Value'].iloc[i]}{df['Proportion'].iloc[i]}%" html_content = f"""

Trip Type: {var_name}

diff --git a/viz_scripts/scaffolding.py b/viz_scripts/scaffolding.py index 7eec6fd..2c54bf6 100644 --- a/viz_scripts/scaffolding.py +++ b/viz_scripts/scaffolding.py @@ -4,10 +4,13 @@ import sys from collections import defaultdict from collections import OrderedDict +import difflib import emission.storage.timeseries.abstract_timeseries as esta import emission.storage.timeseries.tcquery as esttc import emission.core.wrapper.localdate as ecwl +import emcommon.diary.base_modes as emcdb +import emcommon.util as emcu # Module for pretty-printing outputs (e.g. head) to help users # understand what is going on @@ -109,7 +112,7 @@ def expand_userinputs(labeled_ct): unique_users = lambda df: len(df.user_id.unique()) if "user_id" in df.columns else 0 trip_label_count = lambda s, df: len(df[s].dropna()) if s in df.columns else 0 -def load_viz_notebook_data(year, month, program, study_type, dynamic_labels, dic_re, dic_pur=None, include_test_users=False): +async def load_viz_notebook_data(year, month, program, study_type, dynamic_labels, dic_re, dic_pur=None, include_test_users=False): """ Inputs: year/month/program/study_type = parameters from the visualization notebook dic_* = label mappings; if dic_pur is included it will be used to recode trip purpose @@ -128,6 +131,13 @@ def load_viz_notebook_data(year, month, program, study_type, dynamic_labels, dic if "distance" in expanded_ct.columns: unit_conversions(expanded_ct) + # Select the labels from dynamic_labels is available, + # else get it from emcommon/resources/label-options.default.json + if (len(dynamic_labels)): + labels = dynamic_labels + else: + labels = await emcu.read_json_resource("label-options.default.json") + # Map new mode labels with translations dictionary from dynamic_labels # CASE 2 of https://github.com/e-mission/em-public-dashboard/issues/69#issuecomment-1256835867 if "mode_confirm" in expanded_ct.columns: @@ -136,6 +146,9 @@ def load_viz_notebook_data(year, month, program, study_type, dynamic_labels, dic expanded_ct['Mode_confirm'] = expanded_ct['mode_confirm'].map(dic_mode_mapping) else: expanded_ct['Mode_confirm'] = expanded_ct['mode_confirm'].map(dic_re) + # If the 'mode_confirm' is not available as the list of keys in the dynamic_labels or label_options.default.json, then, we should transform it as 'other' + mode_values = [item['value'] for item in labels['MODE']] + expanded_ct['mode_confirm_w_other'] = expanded_ct['mode_confirm'].apply(lambda mode: 'other' if mode not in mode_values else mode) if study_type == 'program': # CASE 2 of https://github.com/e-mission/em-public-dashboard/issues/69#issuecomment-1256835867 if 'replaced_mode' in expanded_ct.columns: @@ -144,6 +157,8 @@ def load_viz_notebook_data(year, month, program, study_type, dynamic_labels, dic expanded_ct['Replaced_mode'] = expanded_ct['replaced_mode'].map(dic_replaced_mapping) else: expanded_ct['Replaced_mode'] = expanded_ct['replaced_mode'].map(dic_re) + replaced_modes = [item['value'] for item in labels['REPLACED_MODE']] + expanded_ct['replaced_mode_w_other'] = expanded_ct['replaced_mode'].apply(lambda mode: 'other' if mode not in replaced_modes else mode) else: print("This is a program, but no replaced modes found. Likely cold start case. Ignoring replaced mode mapping") else: @@ -153,10 +168,12 @@ def load_viz_notebook_data(year, month, program, study_type, dynamic_labels, dic # CASE 2 of https://github.com/e-mission/em-public-dashboard/issues/69#issuecomment-1256835867 if dic_pur is not None and "purpose_confirm" in expanded_ct.columns: if (len(dynamic_labels)): - dic_purpose_mapping = mapping_labels(dynamic_labels, "PURPOSE") - expanded_ct['Trip_purpose'] = expanded_ct['purpose_confirm'].map(dic_purpose_mapping) + dic_purpose_mapping = mapping_labels(dynamic_labels, "PURPOSE") + expanded_ct['Trip_purpose'] = expanded_ct['purpose_confirm'].map(dic_purpose_mapping) else: expanded_ct['Trip_purpose'] = expanded_ct['purpose_confirm'].map(dic_pur) + purpose_values = [item['value'] for item in labels['PURPOSE']] + expanded_ct['purpose_confirm_w_other'] = expanded_ct['purpose_confirm'].apply(lambda value: 'other' if value not in purpose_values else value) # Document data quality file_suffix = get_file_suffix(year, month, program) @@ -191,28 +208,63 @@ def translate_labels(labels): translation = translations.get(value) translation_mapping[value] = translation return defaultdict(lambda: 'Other', translation_mapping) - dic_mapping = translate_labels(dynamic_labels[label_type]) + dic_mapping = translate_labels(dynamic_labels.get(label_type, '')) return dic_mapping # Function: Maps "MODE", "PURPOSE", and "REPLACED_MODE" to colors. -# Input: dynamic_labels, dic_re, and dic_pur +# Input: dynamic_labels # Output: Dictionary mapping between color with mode/purpose/sensed -def mapping_color_labels(dynamic_labels, dic_re, dic_pur): +async def mapping_color_labels(dynamic_labels = {}, unique_keys = []): + # Load default options from e-mission-common + labels = await emcu.read_json_resource("label-options.default.json") sensed_values = ["WALKING", "BICYCLING", "IN_VEHICLE", "AIR_OR_HSR", "UNKNOWN", "OTHER", "INVALID"] - if len(dynamic_labels) > 0: - mode_values = list(mapping_labels(dynamic_labels, "MODE").values()) if "MODE" in dynamic_labels else [] - replaced_mode_values = list(mapping_labels(dynamic_labels, "REPLACED_MODE").values()) if "REPLACED_MODE" in dynamic_labels else [] - purpose_values = list(mapping_labels(dynamic_labels, "PURPOSE").values()) + ['Other'] if "PURPOSE" in dynamic_labels else [] - combined_mode_values = mode_values + replaced_mode_values + ['Other'] - else: - combined_mode_values = (list(OrderedDict.fromkeys(dic_re.values())) + ['Other']) - purpose_values = list(OrderedDict.fromkeys(dic_pur.values())) - colors_mode = dict(zip(combined_mode_values, plt.cm.tab20.colors[:len(combined_mode_values)])) + # If dynamic_labels are provided, then we will use the dynamic labels for mapping + if len(dynamic_labels) > 0: + labels = dynamic_labels + + # Load base mode values and purpose values + mode_values = [mode["value"] for mode in labels["MODE"]] if "MODE" in labels else [] + purpose_values = [mode["value"] for mode in labels["PURPOSE"]] if "PURPOSE" in labels else [] + replaced_values = [mode["value"] for mode in labels["REPLACED_MODE"]] if "REPLACED_MODE" in labels else [] + + # Mapping between mode values and base_mode OR baseMode (backwards compatibility) + value_to_basemode = {mode["value"]: mode.get("base_mode", mode.get("baseMode", "UNKNOWN")) for mode in labels["MODE"]} + # Assign colors to mode, replaced, purpose, and sensed values + colors_mode = emcdb.dedupe_colors([ + [mode, emcdb.BASE_MODES[value_to_basemode.get(mode, "UNKNOWN")]['color']] + for mode in set(mode_values) + ], adjustment_range=[1,1.8]) + colors_replaced = emcdb.dedupe_colors([ + [mode, emcdb.BASE_MODES[value_to_basemode.get(mode, "UNKNOWN")]['color']] + for mode in set(replaced_values) + ], adjustment_range=[1,1.8]) colors_purpose = dict(zip(purpose_values, plt.cm.tab20.colors[:len(purpose_values)])) - colors_sensed = dict(zip(sensed_values, plt.cm.tab20.colors[:len(sensed_values)])) - - return colors_mode, colors_purpose, colors_sensed + colors_sensed = emcdb.dedupe_colors([ + [label, emcdb.BASE_MODES[label.upper()]['color'] if label.upper() != 'INVALID' else emcdb.BASE_MODES['UNKNOWN']['color']] + for label in sensed_values + ], adjustment_range=[1,1.8]) + colors_ble = emcdb.dedupe_colors([ + [label, emcdb.BASE_MODES[label]['color']] + for label in set(unique_keys) + ], adjustment_range=[1,1.8]) + return colors_mode, colors_replaced, colors_purpose, colors_sensed, colors_ble + +async def translate_values_to_labels(dynamic_labels, language="en"): + # Load default options from e-mission-common + labels = await emcu.read_json_resource("label-options.default.json") + + # If dynamic_labels are provided, then we will use the dynamic labels for mapping + if len(dynamic_labels) > 0: + labels = dynamic_labels + # Mapping between values and translations for display on plots (for Mode) + values_to_translations_mode = mapping_labels(labels, "MODE") + # Mapping between values and translations for display on plots (for Purpose) + values_to_translations_purpose = mapping_labels(labels, "PURPOSE") + # Mapping between values and translations for display on plots (for Replaced mode) + values_to_translations_replaced = mapping_labels(labels, "REPLACED_MODE") + + return values_to_translations_mode, values_to_translations_purpose, values_to_translations_replaced # Function: Maps survey answers to colors. # Input: dictionary of raw and translated survey answers diff --git a/viz_scripts/survey_metrics.ipynb b/viz_scripts/survey_metrics.ipynb index a7d395d..26fbf5f 100644 --- a/viz_scripts/survey_metrics.ipynb +++ b/viz_scripts/survey_metrics.ipynb @@ -61,7 +61,7 @@ "label_units, short_label, label_units_lower, distance_col, weight_unit = scaffolding.get_units(use_imperial)\n", "\n", "# get color mappings\n", - "colors_mode, colors_purpose, colors_sensed = scaffolding.mapping_color_labels({}, {}, {}) #just need sensed" + "colors_mode, colors_replaced, colors_purpose, colors_sensed, colors_ble = await scaffolding.mapping_color_labels() #just need sensed" ] }, { @@ -108,13 +108,16 @@ "outputs": [], "source": [ "#if fleet, replace primary_mode with primary_ble_sensed mode\n", - "if bluetooth_only and 'ble_sensed_summary' in expanded_ct_sensed.columns:\n", - " expanded_ct_sensed = expanded_ct_sensed[expanded_ct_sensed['ble_sensed_summary'].notna()]\n", - " expanded_ct_sensed[\"primary_mode\"] = expanded_ct_sensed.ble_sensed_summary.apply(lambda md: max(md[\"distance\"], key=md[\"distance\"].get))\n", - " unique_keys = expanded_ct_sensed.groupby(\"primary_mode\").agg({distance_col: \"count\"}).index\n", - " print(unique_keys)\n", - " colors_mode, colors_purpose, colors_sensed = scaffolding.mapping_color_labels({}, dict(zip(unique_keys, unique_keys)), {})\n", - " colors_sensed = colors_mode" + "try:\n", + " if bluetooth_only and 'ble_sensed_summary' in expanded_ct_sensed.columns:\n", + " expanded_ct_sensed = expanded_ct_sensed[expanded_ct_sensed['ble_sensed_summary'].notna()]\n", + " expanded_ct_sensed[\"primary_mode\"] = expanded_ct_sensed.ble_sensed_summary.apply(lambda md: max(md[\"distance\"], key=md[\"distance\"].get))\n", + " unique_keys = expanded_ct_sensed.groupby(\"primary_mode\").agg({distance_col: \"count\"}).index\n", + " print(unique_keys)\n", + " colors_mode, colors_replaced, colors_purpose, colors_sensed, colors_ble = await scaffolding.mapping_color_labels(unique_keys)\n", + " colors_sensed = colors_ble\n", + "except ValueError as e:\n", + " print(\"Got ValueError \", e)" ] }, {