Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor footprint changes #167

Merged
merged 9 commits into from
Jan 6, 2025
22 changes: 12 additions & 10 deletions viz_scripts/generic_metrics.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@
"if survey_info.get('trip-labels', None) == 'ENKETO':\n",
" ipython = get_ipython()\n",
" ipython._showtraceback = scaffolding.no_traceback_handler\n",
" raise Exception(\"The plots in this notebook are not relecant for ENKETO trip-labels\")"
" raise Exception(\"The plots in this notebook are not relevant for ENKETO trip-labels\")"
]
},
{
Expand Down Expand Up @@ -121,7 +121,8 @@
" program,\n",
" study_type,\n",
" dynamic_labels,\n",
" include_test_users=include_test_users)"
" include_test_users=include_test_users,\n",
" add_footprint=True)"
]
},
{
Expand Down Expand Up @@ -166,7 +167,8 @@
" program,\n",
" study_type,\n",
" dynamic_labels,\n",
" include_test_users=include_test_users)"
" include_test_users=include_test_users,\n",
" add_footprint=True)"
]
},
{
Expand Down Expand Up @@ -244,7 +246,7 @@
" plot_and_text_stacked_bar_chart(expanded_ct, lambda df: (df.groupby(\"mode_confirm_w_other\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False)), \n",
" \"Labeled by user\\n\"+stacked_bar_quality_text_labeled, ax[0], text_results[0], colors_mode, debug_df, values_to_translations)\n",
" plot_and_text_stacked_bar_chart(expanded_ct_inferred, lambda df: (df.groupby(\"mode_confirm_w_other\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False)), \n",
" \"Labeled and Inferred by OpenPATH\\n\"+stacked_bar_quality_text_inferred, ax[1], text_results[1], colors_mode, debug_df_inferred, values_to_translations)\n",
" \"Inferred from prior labels\\n\"+stacked_bar_quality_text_inferred, ax[1], text_results[1], colors_mode, debug_df_inferred, values_to_translations)\n",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good text change. More concise & clearer

" plot_and_text_stacked_bar_chart(expanded_ct_sensed, lambda df: (df.groupby(\"primary_mode\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False)), \n",
" \"Sensed by OpenPATH\\n\"+stacked_bar_quality_text_sensed, ax[2], text_results[2], colors_sensed, debug_df_sensed)\n",
" set_title_and_save(fig, text_results, plot_title_no_quality, file_name)\n",
Expand Down Expand Up @@ -297,7 +299,7 @@
" plot_and_text_stacked_bar_chart(expanded_ct_commute, lambda df: df.groupby(\"mode_confirm_w_other\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n",
" \"Labeled by user\\n\"+stacked_bar_quality_text_commute_labeled, ax[0], text_results[0], colors_mode, debug_df, values_to_translations)\n",
" plot_and_text_stacked_bar_chart(expanded_ct_inferred_commute, lambda df: df.groupby(\"mode_confirm_w_other\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n",
" \"Labeled and Inferred by OpenPATH\\n\"+stacked_bar_quality_text_commute_inferred, ax[1], text_results[1], colors_mode, debug_df_inferred, values_to_translations)\n",
" \"Inferred from prior labels\\n\"+stacked_bar_quality_text_commute_inferred, ax[1], text_results[1], colors_mode, debug_df_inferred, values_to_translations)\n",
" set_title_and_save(fig, text_results, plot_title, file_name)\n",
"except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n",
" plt.clf()\n",
Expand Down Expand Up @@ -332,7 +334,7 @@
" plot_and_text_stacked_bar_chart(expanded_ct, lambda df: df.groupby(\"purpose_confirm_w_other\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n",
" \"Labeled by user\\n\"+stacked_bar_quality_text_labeled, ax[0], text_results[0], colors_purpose, debug_df, value_to_translations_purpose)\n",
" plot_and_text_stacked_bar_chart(expanded_ct_inferred, lambda df: df.groupby(\"purpose_confirm_w_other\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n",
" \"Labeled and Inferred by OpenPATH\\n\"+stacked_bar_quality_text_inferred, ax[1], text_results[1], colors_purpose, debug_df_inferred)\n",
" \"Inferred from prior labels\\n\"+stacked_bar_quality_text_inferred, ax[1], text_results[1], colors_purpose, debug_df_inferred)\n",
" set_title_and_save(fig, text_results, plot_title_no_quality, file_name)\n",
"except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n",
" plt.clf()\n",
Expand Down Expand Up @@ -431,7 +433,7 @@
" plot_and_text_stacked_bar_chart(expanded_ct, lambda df: df.groupby(\"mode_confirm_w_other\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n",
" \"Labeled by user\\n\"+stacked_bar_quality_text_labeled, ax[0], text_results[0], colors_mode, debug_df, values_to_translations)\n",
" plot_and_text_stacked_bar_chart(expanded_ct_inferred, lambda df: df.groupby(\"mode_confirm_w_other\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n",
" \"Labeled and Inferred by OpenPATH\\n\"+stacked_bar_quality_text_inferred, ax[1], text_results[1], colors_mode, debug_df_inferred, values_to_translations)\n",
" \"Inferred from prior labels\\n\"+stacked_bar_quality_text_inferred, ax[1], text_results[1], colors_mode, debug_df_inferred, values_to_translations)\n",
" plot_and_text_stacked_bar_chart(expanded_ct_sensed, lambda df: df.groupby(\"primary_mode\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n",
" \"Sensed by OpenPATH\\n\"+stacked_bar_quality_text_sensed, ax[2], text_results[2], colors_sensed, debug_df_sensed)\n",
" set_title_and_save(fig, text_results, plot_title_no_quality, file_name) \n",
Expand Down Expand Up @@ -466,8 +468,8 @@
"try:\n",
" ## We do an existence check for the labeled df because we want to display the sensed value even if we don't have the labeled value\n",
" ## but we don't need to have an existence check for sensed because in that case we will have no data to display\n",
" labeled_land_trips_df = expanded_ct[expanded_ct['mode_confirm_w_other'] != \"air\"] if \"mode_confirm_w_other\" in expanded_ct.columns else None\n",
" inferred_land_trips_df = expanded_ct_inferred[expanded_ct_inferred['mode_confirm_w_other'] != \"air\"] if \"mode_confirm_w_other\" in expanded_ct_inferred.columns else None\n",
" labeled_land_trips_df = expanded_ct[expanded_ct['base_mode'] != \"AIR\"] if \"mode_confirm_w_other\" in expanded_ct.columns else None\n",
" inferred_land_trips_df = expanded_ct_inferred[expanded_ct_inferred['base_mode'] != \"AIR\"] if \"mode_confirm_w_other\" in expanded_ct_inferred.columns else None\n",
iantei marked this conversation as resolved.
Show resolved Hide resolved
" sensed_land_trips_df = expanded_ct_sensed[expanded_ct_sensed['primary_mode'] != \"AIR_OR_HSR\"]\n",
" \n",
" sensed_land_quality_text = f\"{len(sensed_land_trips_df)} trips ({round(len(sensed_land_trips_df)/len(expanded_ct_sensed)*100)}% of all trips)\\nfrom {scaffolding.unique_users(sensed_land_trips_df)} {sensed_match.group(3)}\"\n",
Expand All @@ -478,7 +480,7 @@
" plot_and_text_stacked_bar_chart(labeled_land_trips_df, lambda df: df.groupby(\"mode_confirm_w_other\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n",
" \"Labeled by user\\n\"+labeled_land_quality_text, ax[0], text_results[0], colors_mode, debug_df, values_to_translations)\n",
" plot_and_text_stacked_bar_chart(inferred_land_trips_df, lambda df: df.groupby(\"mode_confirm_w_other\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n",
" \"Labeled and Inferred by OpenPATH\\n\"+inferred_land_quality_text, ax[1], text_results[1], colors_mode, debug_df_inferred, values_to_translations)\n",
" \"Inferred from prior labels\\n\"+inferred_land_quality_text, ax[1], text_results[1], colors_mode, debug_df_inferred, values_to_translations)\n",
" plot_and_text_stacked_bar_chart(sensed_land_trips_df, lambda df: df.groupby(\"primary_mode\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n",
" \"Sensed by OpenPATH\\n\"+sensed_land_quality_text, ax[2], text_results[2], colors_sensed, debug_df_sensed)\n",
" set_title_and_save(fig, text_results, plot_title_no_quality, file_name) \n",
Expand Down
6 changes: 3 additions & 3 deletions viz_scripts/mode_specific_metrics.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,7 @@
" plot_and_text_stacked_bar_chart(data_eb, lambda df: df.groupby(\"purpose_confirm_w_other\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False),\n",
" f\"Labeled `{mode_of_interest}` by user\\n\"+stacked_bar_quality_text, ax[0], text_results[0], colors_purpose, debug_df, value_to_translations_purpose)\n",
" plot_and_text_stacked_bar_chart(data_eb_inferred, lambda df: df.groupby(\"purpose_confirm_w_other\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False),\n",
" f\"Labeled and Inferred `{mode_of_interest}` by OpenPATH\\n\"+stacked_bar_quality_text_inferred, ax[1], text_results[1], colors_purpose, debug_df_inferred, value_to_translations_purpose)\n",
" f\"Inferred `{mode_of_interest}` from prior labels\\n\"+stacked_bar_quality_text_inferred, ax[1], text_results[1], colors_purpose, debug_df_inferred, value_to_translations_purpose)\n",
" plot_title = plot_title_no_quality + \"\\n\" + f\"For {mode_of_interest}: \" + quality_text\n",
" set_title_and_save(fig, text_results, plot_title, file_name)\n",
"except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n",
Expand Down Expand Up @@ -301,7 +301,7 @@
" plot_and_text_stacked_bar_chart(data_eb, lambda df: df.groupby(\"replaced_mode_w_other\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n",
" \"Labeled by user\\n (Trip distance)\\n\"+stacked_bar_quality_text, ax[0], text_results[0], colors_replaced, debug_df, value_to_translations_replaced)\n",
" plot_and_text_stacked_bar_chart(data_eb_inferred, lambda df: df.groupby(\"replaced_mode_w_other\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n",
" \"Labeled and Inferred by OpenPATH\\n (Trip distance)\\n\"+stacked_bar_quality_text_inferred, ax[1], text_results[1], colors_replaced, debug_df_inferred, value_to_translations_replaced)\n",
" \"Inferred from prior labels\\n (Trip distance)\\n\"+stacked_bar_quality_text_inferred, ax[1], text_results[1], colors_replaced, debug_df_inferred, value_to_translations_replaced)\n",
" plot_title = plot_title_no_quality\n",
" set_title_and_save(fig, text_results, plot_title, file_name)\n",
"except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n",
Expand Down Expand Up @@ -338,7 +338,7 @@
" plot_and_text_stacked_bar_chart(data_eb, lambda df: df.groupby(\"replaced_mode_w_other\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n",
" f\"Labeled `{mode_of_interest}` by user\\n\"+stacked_bar_quality_text, ax[0], text_results[0], colors_replaced, debug_df, value_to_translations_replaced)\n",
" plot_and_text_stacked_bar_chart(data_eb_inferred, lambda df: df.groupby(\"replaced_mode_w_other\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n",
" f\"Labeled and Inferred `{mode_of_interest}` by OpenPATH\\n\"+stacked_bar_quality_text_inferred, ax[1], text_results[1], colors_replaced, debug_df_inferred, value_to_translations_replaced)\n",
" f\"Inferred `{mode_of_interest}` from prior labels\\n\"+stacked_bar_quality_text_inferred, ax[1], text_results[1], colors_replaced, debug_df_inferred, value_to_translations_replaced)\n",
" plot_title = plot_title_no_quality + \"\\n\" + f\"For {mode_of_interest}: \" + quality_text\n",
" set_title_and_save(fig, text_results, plot_title, file_name)\n",
"except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n",
Expand Down
35 changes: 15 additions & 20 deletions viz_scripts/scaffolding.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from collections import defaultdict
from collections import OrderedDict
import difflib
import logging

import emission.storage.timeseries.abstract_timeseries as esta
import emission.storage.timeseries.tcquery as esttc
Expand Down Expand Up @@ -59,6 +60,7 @@ async def add_base_mode_footprint(trip_list):
labels = await emcu.read_json_resource("label-options.default.json")
value_to_basemode = {mode["value"]: mode.get("base_mode", mode.get("baseMode", "UNKNOWN")) for mode in labels["MODE"]}

counter_trip_error = 0
for trip in trip_list:
#format so emffc can get id for metadata
trip['data']['_id'] = trip['_id']
Expand All @@ -75,13 +77,14 @@ async def add_base_mode_footprint(trip_list):
trip['data']['replaced_base_mode'] = "UNKNOWN"
trip['data']['replaced_mode_footprint'] = {}

except:
print("hit exception")
except Exception as e:
counter_trip_error = counter_trip_error + 1
logging.debug(f"The exception is : {e} for the trip - {trip['data']['_id']}")
iantei marked this conversation as resolved.
Show resolved Hide resolved
trip['data']['base_mode'] = "UNKNOWN"
trip['data']['replaced_base_mode'] = "UNKNOWN"
trip['data']['mode_confirm_footprint'] = {}
trip['data']['replaced_mode_footprint'] = {}
logging.debug(f"There are {counter_trip_error} trip errors")
Copy link
Contributor

@shankari shankari Jan 6, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Interesting. As a future cleanup, I think we should report these errors in some place other than logs @Abby-Wheelis @iantei so we don't have to scrape logs to figure out if there is something we need to fix.

Maybe in the server stats, or even displayed in the generated chart if we can figure out a way to do so without confusing people too much.

return trip_list

async def load_all_confirmed_trips(tq, add_footprint):
Expand Down Expand Up @@ -254,7 +257,7 @@ async def map_trip_data(expanded_trip_df, study_type, dynamic_labels):

return expanded_trip_df

async def load_viz_notebook_inferred_data(year, month, program, study_type, dynamic_labels, include_test_users=False):
async def load_viz_notebook_inferred_data(year, month, program, study_type, dynamic_labels, include_test_users=False, add_footprint=False):
shankari marked this conversation as resolved.
Show resolved Hide resolved
""" Inputs:
year/month/program/study_type = parameters from the visualization notebook
dic_* = label mappings; if dic_pur is included it will be used to recode trip purpose
Expand All @@ -263,7 +266,7 @@ async def load_viz_notebook_inferred_data(year, month, program, study_type, dyna
"""
# Access database
tq = get_time_query(year, month)
participant_ct_df = await load_all_participant_trips(program, tq, include_test_users)
participant_ct_df = await load_all_participant_trips(program, tq, include_test_users, add_footprint)
inferred_ct = filter_inferred_trips(participant_ct_df)
expanded_it = expand_inferredlabels(inferred_ct)
expanded_it = await map_trip_data(expanded_it, study_type, dynamic_labels)
Expand Down Expand Up @@ -501,29 +504,21 @@ def unit_conversions(df):
df['distance_miles']= df["distance"]*0.00062 #meters to miles
df['distance_kms'] = df["distance"] / 1000 #meters to kms

def extract_kwh(footprint_dict):
if 'kwh' in footprint_dict.keys():
return footprint_dict['kwh']
else:
print("missing kwh", footprint_dict)
return np.nan

def extract_co2(footprint_dict):
if 'kg_co2' in footprint_dict.keys():
return footprint_dict['kg_co2']
def extract_footprint(footprint_dict, footprint_key):
if footprint_key in footprint_dict.keys():
return footprint_dict[footprint_key]
shankari marked this conversation as resolved.
Show resolved Hide resolved
else:
print("missing co2", footprint_dict)
return np.nan

def unpack_energy_emissions(expanded_ct):
expanded_ct['Mode_confirm_kg_CO2'] = expanded_ct['mode_confirm_footprint'].apply(extract_co2)
expanded_ct['Mode_confirm_kg_CO2'] = expanded_ct['mode_confirm_footprint'].apply(extract_footprint, footprint_key='kg_co2')
expanded_ct['Mode_confirm_lb_CO2'] = kg_to_lb(expanded_ct['Mode_confirm_kg_CO2'])
expanded_ct['Replaced_mode_kg_CO2'] = expanded_ct['replaced_mode_footprint'].apply(extract_co2)
expanded_ct['Replaced_mode_kg_CO2'] = expanded_ct['replaced_mode_footprint'].apply(extract_footprint, footprint_key='kg_co2')
expanded_ct['Replaced_mode_lb_CO2'] = kg_to_lb(expanded_ct['Replaced_mode_kg_CO2'])
CO2_impact(expanded_ct)

expanded_ct['Replaced_mode_EI(kWH)'] = expanded_ct['replaced_mode_footprint'].apply(extract_kwh)
expanded_ct['Mode_confirm_EI(kWH)'] = expanded_ct['mode_confirm_footprint'].apply(extract_kwh)
expanded_ct['Replaced_mode_EI(kWH)'] = expanded_ct['replaced_mode_footprint'].apply(extract_footprint, footprint_key='kwh')
expanded_ct['Mode_confirm_EI(kWH)'] = expanded_ct['mode_confirm_footprint'].apply(extract_footprint, footprint_key='kwh')
energy_impact(expanded_ct)

return expanded_ct
Expand Down