e-mission · shankari · Jan 6, 2025 · Oct 11, 2024 · Oct 11, 2024 · Oct 11, 2024
diff --git a/viz_scripts/generic_metrics.ipynb b/viz_scripts/generic_metrics.ipynb
@@ -68,7 +68,7 @@
     "if survey_info.get('trip-labels', None) == 'ENKETO':\n",
     "    ipython = get_ipython()\n",
     "    ipython._showtraceback = scaffolding.no_traceback_handler\n",
-    "    raise Exception(\"The plots in this notebook are not relecant for ENKETO trip-labels\")"
+    "    raise Exception(\"The plots in this notebook are not relevant for ENKETO trip-labels\")"
    ]
   },
   {
@@ -121,7 +121,8 @@
     "                                                                            program,\n",
     "                                                                            study_type,\n",
     "                                                                            dynamic_labels,\n",
-    "                                                                            include_test_users=include_test_users)"
+    "                                                                            include_test_users=include_test_users,\n",
+    "                                                                            add_footprint=True)"
    ]
   },
   {
@@ -166,7 +167,8 @@
     "                                                                            program,\n",
     "                                                                            study_type,\n",
     "                                                                            dynamic_labels,\n",
-    "                                                                            include_test_users=include_test_users)"
+    "                                                                            include_test_users=include_test_users,\n",
+    "                                                                            add_footprint=True)"
    ]
   },
   {
@@ -244,7 +246,7 @@
     "    plot_and_text_stacked_bar_chart(expanded_ct, lambda df: (df.groupby(\"mode_confirm_w_other\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False)), \n",
     "                                    \"Labeled by user\\n\"+stacked_bar_quality_text_labeled, ax[0], text_results[0], colors_mode, debug_df, values_to_translations)\n",
     "    plot_and_text_stacked_bar_chart(expanded_ct_inferred, lambda df: (df.groupby(\"mode_confirm_w_other\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False)), \n",
-    "                                    \"Labeled and Inferred by OpenPATH\\n\"+stacked_bar_quality_text_inferred, ax[1], text_results[1], colors_mode, debug_df_inferred, values_to_translations)\n",
+    "                                    \"Inferred from prior labels\\n\"+stacked_bar_quality_text_inferred, ax[1], text_results[1], colors_mode, debug_df_inferred, values_to_translations)\n",
     "    plot_and_text_stacked_bar_chart(expanded_ct_sensed, lambda df: (df.groupby(\"primary_mode\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False)), \n",
     "                                    \"Sensed by OpenPATH\\n\"+stacked_bar_quality_text_sensed, ax[2], text_results[2], colors_sensed, debug_df_sensed)\n",
     "    set_title_and_save(fig, text_results, plot_title_no_quality, file_name)\n",
@@ -297,7 +299,7 @@
     "    plot_and_text_stacked_bar_chart(expanded_ct_commute, lambda df: df.groupby(\"mode_confirm_w_other\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n",
     "                                    \"Labeled by user\\n\"+stacked_bar_quality_text_commute_labeled, ax[0], text_results[0], colors_mode, debug_df, values_to_translations)\n",
     "    plot_and_text_stacked_bar_chart(expanded_ct_inferred_commute, lambda df: df.groupby(\"mode_confirm_w_other\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n",
-    "                                    \"Labeled and Inferred by OpenPATH\\n\"+stacked_bar_quality_text_commute_inferred, ax[1], text_results[1], colors_mode, debug_df_inferred, values_to_translations)\n",
+    "                                    \"Inferred from prior labels\\n\"+stacked_bar_quality_text_commute_inferred, ax[1], text_results[1], colors_mode, debug_df_inferred, values_to_translations)\n",
     "    set_title_and_save(fig, text_results, plot_title, file_name)\n",
     "except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n",
     "    plt.clf()\n",
@@ -332,7 +334,7 @@
     "    plot_and_text_stacked_bar_chart(expanded_ct, lambda df: df.groupby(\"purpose_confirm_w_other\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n",
     "                                    \"Labeled by user\\n\"+stacked_bar_quality_text_labeled, ax[0], text_results[0], colors_purpose, debug_df, value_to_translations_purpose)\n",
     "    plot_and_text_stacked_bar_chart(expanded_ct_inferred, lambda df: df.groupby(\"purpose_confirm_w_other\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n",
-    "                                    \"Labeled and Inferred by OpenPATH\\n\"+stacked_bar_quality_text_inferred, ax[1], text_results[1], colors_purpose, debug_df_inferred)\n",
+    "                                    \"Inferred from prior labels\\n\"+stacked_bar_quality_text_inferred, ax[1], text_results[1], colors_purpose, debug_df_inferred)\n",
     "    set_title_and_save(fig, text_results, plot_title_no_quality, file_name)\n",
     "except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n",
     "    plt.clf()\n",
@@ -431,7 +433,7 @@
     "    plot_and_text_stacked_bar_chart(expanded_ct, lambda df: df.groupby(\"mode_confirm_w_other\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n",
     "                                    \"Labeled by user\\n\"+stacked_bar_quality_text_labeled, ax[0], text_results[0], colors_mode, debug_df, values_to_translations)\n",
     "    plot_and_text_stacked_bar_chart(expanded_ct_inferred, lambda df: df.groupby(\"mode_confirm_w_other\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n",
-    "                                    \"Labeled and Inferred by OpenPATH\\n\"+stacked_bar_quality_text_inferred, ax[1], text_results[1], colors_mode, debug_df_inferred, values_to_translations)\n",
+    "                                    \"Inferred from prior labels\\n\"+stacked_bar_quality_text_inferred, ax[1], text_results[1], colors_mode, debug_df_inferred, values_to_translations)\n",
     "    plot_and_text_stacked_bar_chart(expanded_ct_sensed, lambda df: df.groupby(\"primary_mode\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n",
     "                                    \"Sensed by OpenPATH\\n\"+stacked_bar_quality_text_sensed, ax[2], text_results[2], colors_sensed, debug_df_sensed)\n",
     "    set_title_and_save(fig, text_results, plot_title_no_quality, file_name)    \n",
@@ -466,8 +468,8 @@
     "try:\n",
     "    ## We do an existence check for the labeled df because we want to display the sensed value even if we don't have the labeled value\n",
     "    ## but we don't need to have an existence check for sensed because in that case we will have no data to display\n",
-    "    labeled_land_trips_df = expanded_ct[expanded_ct['mode_confirm_w_other'] != \"air\"] if \"mode_confirm_w_other\" in expanded_ct.columns else None\n",
-    "    inferred_land_trips_df = expanded_ct_inferred[expanded_ct_inferred['mode_confirm_w_other'] != \"air\"] if \"mode_confirm_w_other\" in expanded_ct_inferred.columns else None\n",
+    "    labeled_land_trips_df = expanded_ct[expanded_ct['base_mode'] != \"AIR\"] if \"mode_confirm_w_other\" in expanded_ct.columns else None\n",
+    "    inferred_land_trips_df = expanded_ct_inferred[expanded_ct_inferred['base_mode'] != \"AIR\"] if \"mode_confirm_w_other\" in expanded_ct_inferred.columns else None\n",
     "    sensed_land_trips_df = expanded_ct_sensed[expanded_ct_sensed['primary_mode'] != \"AIR_OR_HSR\"]\n",
     "    \n",
     "    sensed_land_quality_text = f\"{len(sensed_land_trips_df)} trips ({round(len(sensed_land_trips_df)/len(expanded_ct_sensed)*100)}% of all trips)\\nfrom {scaffolding.unique_users(sensed_land_trips_df)} {sensed_match.group(3)}\"\n",
@@ -478,7 +480,7 @@
     "    plot_and_text_stacked_bar_chart(labeled_land_trips_df, lambda df: df.groupby(\"mode_confirm_w_other\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n",
     "                                    \"Labeled by user\\n\"+labeled_land_quality_text,  ax[0], text_results[0], colors_mode, debug_df, values_to_translations)\n",
     "    plot_and_text_stacked_bar_chart(inferred_land_trips_df, lambda df: df.groupby(\"mode_confirm_w_other\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n",
-    "                                    \"Labeled and Inferred by OpenPATH\\n\"+inferred_land_quality_text, ax[1], text_results[1], colors_mode, debug_df_inferred, values_to_translations)\n",
+    "                                    \"Inferred from prior labels\\n\"+inferred_land_quality_text, ax[1], text_results[1], colors_mode, debug_df_inferred, values_to_translations)\n",
     "    plot_and_text_stacked_bar_chart(sensed_land_trips_df, lambda df: df.groupby(\"primary_mode\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n",
     "                                    \"Sensed by OpenPATH\\n\"+sensed_land_quality_text, ax[2], text_results[2], colors_sensed, debug_df_sensed)\n",
     "    set_title_and_save(fig, text_results, plot_title_no_quality, file_name)    \n",

diff --git a/viz_scripts/mode_specific_metrics.ipynb b/viz_scripts/mode_specific_metrics.ipynb
@@ -264,7 +264,7 @@
     "    plot_and_text_stacked_bar_chart(data_eb, lambda df: df.groupby(\"purpose_confirm_w_other\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False),\n",
     "                                    f\"Labeled `{mode_of_interest}` by user\\n\"+stacked_bar_quality_text, ax[0], text_results[0], colors_purpose, debug_df, value_to_translations_purpose)\n",
     "    plot_and_text_stacked_bar_chart(data_eb_inferred, lambda df: df.groupby(\"purpose_confirm_w_other\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False),\n",
-    "                                    f\"Labeled and Inferred `{mode_of_interest}` by OpenPATH\\n\"+stacked_bar_quality_text_inferred, ax[1], text_results[1], colors_purpose, debug_df_inferred, value_to_translations_purpose)\n",
+    "                                    f\"Inferred `{mode_of_interest}` from prior labels\\n\"+stacked_bar_quality_text_inferred, ax[1], text_results[1], colors_purpose, debug_df_inferred, value_to_translations_purpose)\n",
     "    plot_title = plot_title_no_quality + \"\\n\" + f\"For {mode_of_interest}: \" + quality_text\n",
     "    set_title_and_save(fig, text_results, plot_title, file_name)\n",
     "except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n",
@@ -301,7 +301,7 @@
     "    plot_and_text_stacked_bar_chart(data_eb, lambda df: df.groupby(\"replaced_mode_w_other\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n",
     "                                    \"Labeled by user\\n (Trip distance)\\n\"+stacked_bar_quality_text, ax[0], text_results[0], colors_replaced, debug_df, value_to_translations_replaced)\n",
     "    plot_and_text_stacked_bar_chart(data_eb_inferred, lambda df: df.groupby(\"replaced_mode_w_other\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n",
-    "                                    \"Labeled and Inferred by OpenPATH\\n (Trip distance)\\n\"+stacked_bar_quality_text_inferred, ax[1], text_results[1], colors_replaced, debug_df_inferred, value_to_translations_replaced)\n",
+    "                                    \"Inferred from prior labels\\n (Trip distance)\\n\"+stacked_bar_quality_text_inferred, ax[1], text_results[1], colors_replaced, debug_df_inferred, value_to_translations_replaced)\n",
     "    plot_title = plot_title_no_quality\n",
     "    set_title_and_save(fig, text_results, plot_title, file_name)\n",
     "except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n",
@@ -338,7 +338,7 @@
     "    plot_and_text_stacked_bar_chart(data_eb, lambda df: df.groupby(\"replaced_mode_w_other\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n",
     "                                    f\"Labeled `{mode_of_interest}` by user\\n\"+stacked_bar_quality_text, ax[0], text_results[0], colors_replaced, debug_df, value_to_translations_replaced)\n",
     "    plot_and_text_stacked_bar_chart(data_eb_inferred, lambda df: df.groupby(\"replaced_mode_w_other\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n",
-    "                                    f\"Labeled and Inferred `{mode_of_interest}` by OpenPATH\\n\"+stacked_bar_quality_text_inferred, ax[1], text_results[1], colors_replaced, debug_df_inferred, value_to_translations_replaced)\n",
+    "                                    f\"Inferred `{mode_of_interest}` from prior labels\\n\"+stacked_bar_quality_text_inferred, ax[1], text_results[1], colors_replaced, debug_df_inferred, value_to_translations_replaced)\n",
     "    plot_title = plot_title_no_quality + \"\\n\" + f\"For {mode_of_interest}: \" + quality_text\n",
     "    set_title_and_save(fig, text_results, plot_title, file_name)\n",
     "except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n",

diff --git a/viz_scripts/scaffolding.py b/viz_scripts/scaffolding.py
@@ -5,6 +5,7 @@
 from collections import defaultdict
 from collections import OrderedDict
 import difflib
+import logging
 
 import emission.storage.timeseries.abstract_timeseries as esta
 import emission.storage.timeseries.tcquery as esttc
@@ -59,6 +60,7 @@ async def add_base_mode_footprint(trip_list):
     labels = await emcu.read_json_resource("label-options.default.json")
     value_to_basemode = {mode["value"]: mode.get("base_mode", mode.get("baseMode", "UNKNOWN")) for mode in labels["MODE"]}
 
+    counter_trip_error = 0
     for trip in trip_list:
         #format so emffc can get id for metadata
         trip['data']['_id'] = trip['_id']
@@ -75,13 +77,14 @@ async def add_base_mode_footprint(trip_list):
                     trip['data']['replaced_base_mode'] = "UNKNOWN"
                     trip['data']['replaced_mode_footprint'] = {}
 
-            except:
-                print("hit exception")
+            except Exception as e:
+                counter_trip_error = counter_trip_error + 1
+                logging.debug(f"The exception is : {e} for the trip - {trip['data']['_id']}")
                 trip['data']['base_mode'] = "UNKNOWN"
                 trip['data']['replaced_base_mode'] = "UNKNOWN"
                 trip['data']['mode_confirm_footprint'] = {}
                 trip['data']['replaced_mode_footprint'] = {}
-            
+    logging.debug(f"There are {counter_trip_error} trip errors")
     return trip_list
 
 async def load_all_confirmed_trips(tq, add_footprint):
@@ -254,7 +257,7 @@ async def map_trip_data(expanded_trip_df, study_type, dynamic_labels):
 
     return expanded_trip_df
 
-async def load_viz_notebook_inferred_data(year, month, program, study_type, dynamic_labels, include_test_users=False):
+async def load_viz_notebook_inferred_data(year, month, program, study_type, dynamic_labels, include_test_users=False, add_footprint=False):
     """ Inputs:
     year/month/program/study_type = parameters from the visualization notebook
     dic_* = label mappings; if dic_pur is included it will be used to recode trip purpose
@@ -263,7 +266,7 @@ async def load_viz_notebook_inferred_data(year, month, program, study_type, dyna
     """
     # Access database
     tq = get_time_query(year, month)
-    participant_ct_df = await load_all_participant_trips(program, tq, include_test_users)
+    participant_ct_df = await load_all_participant_trips(program, tq, include_test_users, add_footprint)
     inferred_ct = filter_inferred_trips(participant_ct_df)
     expanded_it = expand_inferredlabels(inferred_ct)
     expanded_it = await map_trip_data(expanded_it, study_type, dynamic_labels)
@@ -501,29 +504,21 @@ def unit_conversions(df):
     df['distance_miles']= df["distance"]*0.00062 #meters to miles
     df['distance_kms'] = df["distance"] / 1000 #meters to kms
 
-def extract_kwh(footprint_dict):
-    if 'kwh' in footprint_dict.keys():
-        return footprint_dict['kwh']
-    else:
-        print("missing kwh", footprint_dict)
-        return np.nan 
-
-def extract_co2(footprint_dict):
-    if 'kg_co2' in footprint_dict.keys():
-        return footprint_dict['kg_co2']
+def extract_footprint(footprint_dict, footprint_key):
+    if footprint_key in footprint_dict.keys():
+        return footprint_dict[footprint_key]
     else:
-        print("missing co2", footprint_dict)
         return np.nan
 
 def unpack_energy_emissions(expanded_ct):
-    expanded_ct['Mode_confirm_kg_CO2'] = expanded_ct['mode_confirm_footprint'].apply(extract_co2)
+    expanded_ct['Mode_confirm_kg_CO2'] = expanded_ct['mode_confirm_footprint'].apply(extract_footprint, footprint_key='kg_co2')
     expanded_ct['Mode_confirm_lb_CO2'] = kg_to_lb(expanded_ct['Mode_confirm_kg_CO2'])
-    expanded_ct['Replaced_mode_kg_CO2'] = expanded_ct['replaced_mode_footprint'].apply(extract_co2)
+    expanded_ct['Replaced_mode_kg_CO2'] = expanded_ct['replaced_mode_footprint'].apply(extract_footprint, footprint_key='kg_co2')
     expanded_ct['Replaced_mode_lb_CO2'] = kg_to_lb(expanded_ct['Replaced_mode_kg_CO2'])
     CO2_impact(expanded_ct)
 
-    expanded_ct['Replaced_mode_EI(kWH)'] = expanded_ct['replaced_mode_footprint'].apply(extract_kwh)
-    expanded_ct['Mode_confirm_EI(kWH)'] = expanded_ct['mode_confirm_footprint'].apply(extract_kwh)
+    expanded_ct['Replaced_mode_EI(kWH)'] = expanded_ct['replaced_mode_footprint'].apply(extract_footprint, footprint_key='kwh')
+    expanded_ct['Mode_confirm_EI(kWH)'] = expanded_ct['mode_confirm_footprint'].apply(extract_footprint, footprint_key='kwh')
     energy_impact(expanded_ct)
 
     return expanded_ct