From 3ec13b75f2f96cb0ee1c0d9167a3dc27503eca26 Mon Sep 17 00:00:00 2001 From: iantei Date: Fri, 16 Aug 2024 12:12:23 -0700 Subject: [PATCH 01/22] 1. Introduce load_viz_notebook_inferred_data(), filter_inferred_trips() and expand_inferredlabels() for processsing, filtering and expanding inferred labels. 2. map_trip_data() to extract the mapping functionality. --- viz_scripts/scaffolding.py | 99 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 99 insertions(+) diff --git a/viz_scripts/scaffolding.py b/viz_scripts/scaffolding.py index e2abc57..e7e73a4 100644 --- a/viz_scripts/scaffolding.py +++ b/viz_scripts/scaffolding.py @@ -78,6 +78,15 @@ def filter_labeled_trips(mixed_trip_df): disp.display(labeled_ct.head()) return labeled_ct +def filter_inferred_trips(mixed_trip_df): + # CASE 1 of https://github.com/e-mission/em-public-dashboard/issues/69#issuecomment-1256835867 + if len(mixed_trip_df) == 0: + return mixed_trip_df + inferred_ct = mixed_trip_df[mixed_trip_df['inferred_labels'].apply(lambda x: bool(x))] + print("After filtering, found %s inferred trips" % len(inferred_ct)) + disp.display(inferred_ct.head()) + return inferred_ct + def expand_userinputs(labeled_ct): ''' param: labeled_ct: a dataframe of confirmed trips, some of which have labels @@ -105,6 +114,27 @@ def expand_userinputs(labeled_ct): disp.display(expanded_ct.head()) return expanded_ct +def expand_inferredlabels(inferred_ct): + if len(inferred_ct) == 0: + return inferred_ct + + max_labels_list = [] + max_p_list = [] + + for item in inferred_ct.inferred_labels: + max_entry = max(item, key=lambda x: x['p']) + max_labels_list.append(max_entry['labels']) + max_p_list.append(max_entry['p']) + + inferred_only_labels = pd.DataFrame(max_labels_list, index=inferred_ct.index) + disp.display(inferred_only_labels) + inferred_only_p = pd.DataFrame(max_p_list, index=inferred_ct.index, columns=['p']) + disp.display(inferred_only_p) + expanded_inferred_ct = pd.concat([inferred_ct, inferred_only_labels, inferred_only_p], axis=1) + expanded_inferred_ct.reset_index(drop=True, inplace=True) + disp.display(expanded_inferred_ct.head()) + return expanded_inferred_ct + # CASE 2 of https://github.com/e-mission/em-public-dashboard/issues/69#issuecomment-1256835867 unique_users = lambda df: len(df.user_id.unique()) if "user_id" in df.columns else 0 trip_label_count = lambda s, df: len(df[s].dropna()) if s in df.columns else 0 @@ -176,6 +206,75 @@ def load_viz_notebook_data(year, month, program, study_type, dynamic_labels, dic return expanded_ct, file_suffix, quality_text, debug_df +def map_trip_data(df, study_type, dynamic_labels, dic_re, dic_pur): + # Change meters to miles + # CASE 2 of https://github.com/e-mission/em-public-dashboard/issues/69#issuecomment-1256835867 + if "distance" in df.columns: + unit_conversions(df) + + # Map new mode labels with translations dictionary from dynamic_labels + # CASE 2 of https://github.com/e-mission/em-public-dashboard/issues/69#issuecomment-1256835867 + if "mode_confirm" in df.columns: + if (len(dynamic_labels)): + dic_mode_mapping = mapping_labels(dynamic_labels, "MODE") + df['Mode_confirm'] = df['mode_confirm'].map(dic_mode_mapping) + else: + df['Mode_confirm'] = df['mode_confirm'].map(dic_re) + if study_type == 'program': + # CASE 2 of https://github.com/e-mission/em-public-dashboard/issues/69#issuecomment-1256835867 + if 'replaced_mode' in df.columns: + if (len(dynamic_labels)): + dic_replaced_mapping = mapping_labels(dynamic_labels, "REPLACED_MODE") + df['Replaced_mode'] = df['replaced_mode'].map(dic_replaced_mapping) + else: + df['Replaced_mode'] = df['replaced_mode'].map(dic_re) + else: + print("This is a program, but no replaced modes found. Likely cold start case. Ignoring replaced mode mapping") + else: + print("This is a study, not expecting any replaced modes.") + + # Trip purpose mapping + # CASE 2 of https://github.com/e-mission/em-public-dashboard/issues/69#issuecomment-1256835867 + if dic_pur is not None and "purpose_confirm" in df.columns: + if (len(dynamic_labels)): + dic_purpose_mapping = mapping_labels(dynamic_labels, "PURPOSE") + df['Trip_purpose'] = df['purpose_confirm'].map(dic_purpose_mapping) + else: + df['Trip_purpose'] = df['purpose_confirm'].map(dic_pur) + return df + +def load_viz_notebook_inferred_data(year, month, program, study_type, dynamic_labels, dic_re, dic_pur=None, include_test_users=False): + """ Inputs: + year/month/program/study_type = parameters from the visualization notebook + dic_* = label mappings; if dic_pur is included it will be used to recode trip purpose + + Pipeline to load and process the data before use in visualization notebooks. + """ + # Access database + tq = get_time_query(year, month) + participant_ct_df = load_all_participant_trips(program, tq, include_test_users) + inferred_ct = filter_inferred_trips(participant_ct_df) + expanded_it = expand_inferredlabels(inferred_ct) + expanded_it = map_trip_data(expanded_it, study_type, dynamic_labels, dic_re, dic_pur) + + # Document data quality + file_suffix = get_file_suffix(year, month, program) + quality_text = get_quality_text(participant_ct_df, expanded_it, None, include_test_users) + + debug_df = pd.DataFrame.from_dict({ + "year": year, + "month": month, + "Registered_participants": len(get_participant_uuids(program, include_test_users)), + "Participants_with_at_least_one_trip": unique_users(participant_ct_df), + "Participant_with_at_least_one_inferred_trip": unique_users(inferred_ct), + "Trips_with_at_least_one_inferred_label": len(inferred_ct), + "Trips_with_mode_confirm_inferred_label": trip_label_count("Mode_confirm", expanded_it), + "Trips_with_trip_purpose_inferred_label": trip_label_count("Trip_purpose", expanded_it) + }, + orient='index', columns=["value"]) + + return expanded_it, file_suffix, quality_text, debug_df + # Function to map the "MODE", "REPLACED_MODE", "PURPOSE" to respective en-translations # Input: dynamic_labels, label_type: MODE, REPLACED_MODE, PURPOSE # Return: Dictionary mapping between the label type and its english translation. From 9897c76ca43127d526ebd2a97faa061f45dd4155 Mon Sep 17 00:00:00 2001 From: iantei Date: Fri, 16 Aug 2024 12:37:47 -0700 Subject: [PATCH 02/22] Utilize map_trip_data() for common trip mapping functionality in load_viz_notebook_data() for refactor. --- viz_scripts/scaffolding.py | 36 +----------------------------------- 1 file changed, 1 insertion(+), 35 deletions(-) diff --git a/viz_scripts/scaffolding.py b/viz_scripts/scaffolding.py index e7e73a4..ef2200d 100644 --- a/viz_scripts/scaffolding.py +++ b/viz_scripts/scaffolding.py @@ -152,41 +152,7 @@ def load_viz_notebook_data(year, month, program, study_type, dynamic_labels, dic labeled_ct = filter_labeled_trips(participant_ct_df) expanded_ct = expand_userinputs(labeled_ct) expanded_ct = data_quality_check(expanded_ct) - - # Change meters to miles - # CASE 2 of https://github.com/e-mission/em-public-dashboard/issues/69#issuecomment-1256835867 - if "distance" in expanded_ct.columns: - unit_conversions(expanded_ct) - - # Map new mode labels with translations dictionary from dynamic_labels - # CASE 2 of https://github.com/e-mission/em-public-dashboard/issues/69#issuecomment-1256835867 - if "mode_confirm" in expanded_ct.columns: - if (len(dynamic_labels)): - dic_mode_mapping = mapping_labels(dynamic_labels, "MODE") - expanded_ct['Mode_confirm'] = expanded_ct['mode_confirm'].map(dic_mode_mapping) - else: - expanded_ct['Mode_confirm'] = expanded_ct['mode_confirm'].map(dic_re) - if study_type == 'program': - # CASE 2 of https://github.com/e-mission/em-public-dashboard/issues/69#issuecomment-1256835867 - if 'replaced_mode' in expanded_ct.columns: - if (len(dynamic_labels)): - dic_replaced_mapping = mapping_labels(dynamic_labels, "REPLACED_MODE") - expanded_ct['Replaced_mode'] = expanded_ct['replaced_mode'].map(dic_replaced_mapping) - else: - expanded_ct['Replaced_mode'] = expanded_ct['replaced_mode'].map(dic_re) - else: - print("This is a program, but no replaced modes found. Likely cold start case. Ignoring replaced mode mapping") - else: - print("This is a study, not expecting any replaced modes.") - - # Trip purpose mapping - # CASE 2 of https://github.com/e-mission/em-public-dashboard/issues/69#issuecomment-1256835867 - if dic_pur is not None and "purpose_confirm" in expanded_ct.columns: - if (len(dynamic_labels)): - dic_purpose_mapping = mapping_labels(dynamic_labels, "PURPOSE") - expanded_ct['Trip_purpose'] = expanded_ct['purpose_confirm'].map(dic_purpose_mapping) - else: - expanded_ct['Trip_purpose'] = expanded_ct['purpose_confirm'].map(dic_pur) + expanded_ct = map_trip_data(expanded_ct, study_type, dynamic_labels, dic_re, dic_pur) # Document data quality file_suffix = get_file_suffix(year, month, program) From b4d704b9281139e9be6c75d83a981e6935df583f Mon Sep 17 00:00:00 2001 From: iantei Date: Sun, 18 Aug 2024 13:41:43 -0700 Subject: [PATCH 03/22] Add load_viz_notebook_inferred_data() function for inferred metrics, and incorporate inferred label for Distribution of modes. --- viz_scripts/generic_metrics.ipynb | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/viz_scripts/generic_metrics.ipynb b/viz_scripts/generic_metrics.ipynb index 6ced8fc..21bee81 100644 --- a/viz_scripts/generic_metrics.ipynb +++ b/viz_scripts/generic_metrics.ipynb @@ -140,6 +140,23 @@ " sensed_algo_prefix)" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "c26ff5f5", + "metadata": {}, + "outputs": [], + "source": [ + "expanded_ct_inferred, file_suffix_inferred, quality_text_inferred, debug_df_inferred = scaffolding.load_viz_notebook_inferred_data(year,\n", + " month,\n", + " program,\n", + " study_type,\n", + " dynamic_labels,\n", + " dic_re,\n", + " dic_pur=dic_pur,\n", + " include_test_users=include_test_users)" + ] + }, { "cell_type": "code", "execution_count": null, @@ -171,9 +188,13 @@ "labeled_match = re.match(r'Based on ([0-9]+) confirmed trips from ([0-9]+) (users|testers and participants)\\nof ([0-9]+) total trips from ([0-9]+) (users|testers and participants) (\\(([0-9.]+|nan)%\\))', quality_text)\n", "# labeled_match\n", "stacked_bar_quality_text_labeled = f\"{labeled_match.group(1)} trips {labeled_match.group(7)}\\n from {labeled_match.group(2)} {labeled_match.group(3)}\"\n", + "\n", + "inferred_match = re.match(r'Based on ([0-9]+) confirmed trips from ([0-9]+) (users|testers and participants)\\nof ([0-9]+) total trips from ([0-9]+) (users|testers and participants) (\\(([0-9.]+|nan)%\\))', quality_text_inferred)\n", + "stacked_bar_quality_text_inferred = f\"{inferred_match.group(1)} trips {inferred_match.group(7)}\\n from {inferred_match.group(2)} {inferred_match.group(3)}\"\n", + "\n", "sensed_match = re.match(r'Based on ([0-9]+) trips from ([0-9]+) (users|testers and participants)', quality_text_sensed)\n", "stacked_bar_quality_text_sensed = f\"{sensed_match.group(1)} trips (100%)\\n from {sensed_match.group(2)} {sensed_match.group(3)}\"\n", - "stacked_bar_quality_text_labeled, stacked_bar_quality_text_sensed" + "stacked_bar_quality_text_labeled, stacked_bar_quality_text_sensed, stacked_bar_quality_text_inferred" ] }, { @@ -203,14 +224,16 @@ "plot_title_no_quality= \"Number of trips for each mode\"\n", "\n", "try:\n", - " fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(15,2*2), sharex=True)\n", + " fig, ax = plt.subplots(nrows=3, ncols=1, figsize=(15,3*2), sharex=True)\n", " # We will have text results corresponding to the axes for simplicity and consistency\n", - " text_results = [[\"Unmodified Alt Text\", \"Unmodified HTML\"], [\"Unmodified Alt Text\", \"Unmodified HTML\"]]\n", + " text_results = [[\"Unmodified Alt Text\", \"Unmodified HTML\"], [\"Unmodified Alt Text\", \"Unmodified HTML\"], [\"Unmodified Alt Text\", \"Unmodified HTML\"]]\n", " \n", " plot_and_text_stacked_bar_chart(expanded_ct, lambda df: (df.groupby(\"Mode_confirm\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False)), \n", " \"Labeled by user\\n\"+stacked_bar_quality_text_labeled, ax[0], text_results[0], colors_mode, debug_df)\n", " plot_and_text_stacked_bar_chart(expanded_ct_sensed, lambda df: (df.groupby(\"primary_mode\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False)), \n", " \"Sensed by OpenPATH\\n\"+stacked_bar_quality_text_sensed, ax[1], text_results[1], colors_sensed, debug_df_sensed)\n", + " plot_and_text_stacked_bar_chart(expanded_ct_inferred, lambda df: (df.groupby(\"Mode_confirm\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False)), \n", + " \"Inferred by OpenPATH\\n\"+stacked_bar_quality_text_inferred, ax[2], text_results[2], colors_mode, debug_df_inferred)\n", " \n", " set_title_and_save(fig, text_results, plot_title_no_quality, file_name)\n", "except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n", From add0e507307f86fabc97b61fce71a86cee8ff3ea Mon Sep 17 00:00:00 2001 From: iantei Date: Mon, 19 Aug 2024 10:13:56 -0700 Subject: [PATCH 04/22] 1. Introduce markdown for collecting data from database for Inferred Metrics 2. Update quality_text, fig, ax, text_results and introduce new plot_and_text_stacked_bar_chart() for all Stacked Bar Charts to represent inferred labels bar in generic_metrics notebook --- viz_scripts/generic_metrics.ipynb | 53 +++++++++++++++++++++++-------- 1 file changed, 39 insertions(+), 14 deletions(-) diff --git a/viz_scripts/generic_metrics.ipynb b/viz_scripts/generic_metrics.ipynb index 21bee81..5302422 100644 --- a/viz_scripts/generic_metrics.ipynb +++ b/viz_scripts/generic_metrics.ipynb @@ -140,6 +140,14 @@ " sensed_algo_prefix)" ] }, + { + "cell_type": "markdown", + "id": "325e5eda", + "metadata": {}, + "source": [ + "## Collect Data from Database for Inferred Metrics" + ] + }, { "cell_type": "code", "execution_count": null, @@ -189,11 +197,12 @@ "# labeled_match\n", "stacked_bar_quality_text_labeled = f\"{labeled_match.group(1)} trips {labeled_match.group(7)}\\n from {labeled_match.group(2)} {labeled_match.group(3)}\"\n", "\n", + "sensed_match = re.match(r'Based on ([0-9]+) trips from ([0-9]+) (users|testers and participants)', quality_text_sensed)\n", + "stacked_bar_quality_text_sensed = f\"{sensed_match.group(1)} trips (100%)\\n from {sensed_match.group(2)} {sensed_match.group(3)}\"\n", + "\n", "inferred_match = re.match(r'Based on ([0-9]+) confirmed trips from ([0-9]+) (users|testers and participants)\\nof ([0-9]+) total trips from ([0-9]+) (users|testers and participants) (\\(([0-9.]+|nan)%\\))', quality_text_inferred)\n", "stacked_bar_quality_text_inferred = f\"{inferred_match.group(1)} trips {inferred_match.group(7)}\\n from {inferred_match.group(2)} {inferred_match.group(3)}\"\n", "\n", - "sensed_match = re.match(r'Based on ([0-9]+) trips from ([0-9]+) (users|testers and participants)', quality_text_sensed)\n", - "stacked_bar_quality_text_sensed = f\"{sensed_match.group(1)} trips (100%)\\n from {sensed_match.group(2)} {sensed_match.group(3)}\"\n", "stacked_bar_quality_text_labeled, stacked_bar_quality_text_sensed, stacked_bar_quality_text_inferred" ] }, @@ -276,13 +285,17 @@ "\n", " expanded_ct_commute = expanded_ct.query(trip_purpose_query)\n", " commute_quality_text = scaffolding.get_quality_text(expanded_ct, expanded_ct_commute, \"commute\", include_test_users) if not expanded_ct.empty else \"\"\n", + " expanded_ct_inferred_commute = expanded_ct_inferred.query(trip_purpose_query)\n", + " commute_quality_text_inferred = scaffolding.get_quality_text(expanded_ct_inferred, expanded_ct_inferred_commute, \"commute\", include_test_users) if not expanded_ct_inferred.empty else \"\"\n", " plot_title = plot_title_no_quality + \"\\n\" + commute_quality_text\n", " \n", " # Plot entries\n", - " fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(15,2*1), sharex=True) \n", - " text_results = [\"Unmodified Alt Text\", \"Unmodified HTML\"]\n", + " fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(15,2*2), sharex=True) \n", + " text_results = [[\"Unmodified Alt Text\", \"Unmodified HTML\"], [\"Unmodified Alt Text\", \"Unmodified HTML\"]]\n", " plot_and_text_stacked_bar_chart(expanded_ct_commute, lambda df: df.groupby(\"Mode_confirm\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n", - " \"Labeled by user\\n (Confirmed trips)\", ax, text_results, colors_mode, debug_df)\n", + " \"Labeled by user\\n (Confirmed trips)\", ax[0], text_results[0], colors_mode, debug_df)\n", + " plot_and_text_stacked_bar_chart(expanded_ct_inferred_commute, lambda df: df.groupby(\"Mode_confirm\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n", + " \"Inferred by OpenPATH\\n (Confirmed trips)\", ax[1], text_results[1], colors_mode, debug_df_inferred)\n", " set_title_and_save(fig, text_results, plot_title, file_name)\n", "except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n", " plt.clf()\n", @@ -312,10 +325,12 @@ "plot_title_no_quality=\"Number of trips for each purpose\"\n", "file_name= f\"ntrips_purpose{file_suffix}\"\n", "try:\n", - " fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(15,2*1), sharex=True)\n", - " text_results = [\"Unmodified Alt Text\", \"Unmodified HTML\"]\n", + " fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(15,2*2), sharex=True)\n", + " text_results = [[\"Unmodified Alt Text\", \"Unmodified HTML\"], [\"Unmodified Alt Text\", \"Unmodified HTML\"]]\n", " plot_and_text_stacked_bar_chart(expanded_ct, lambda df: df.groupby(\"Trip_purpose\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n", - " \"Labeled by user\\n\"+stacked_bar_quality_text_labeled, ax, text_results, colors_purpose, debug_df)\n", + " \"Labeled by user\\n\"+stacked_bar_quality_text_labeled, ax[0], text_results[0], colors_purpose, debug_df)\n", + " plot_and_text_stacked_bar_chart(expanded_ct_inferred, lambda df: df.groupby(\"Trip_purpose\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n", + " \"Inferred by OpenPATH\\n\"+stacked_bar_quality_text_inferred, ax[1], text_results[1], colors_purpose, debug_df_inferred)\n", " set_title_and_save(fig, text_results, plot_title_no_quality, file_name)\n", "except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n", " plt.clf()\n", @@ -359,17 +374,21 @@ " ## We do an existence check for the labeled df because we want to display the sensed value even if we don't have the labeled value\n", " ## but we don't need to have an existence check for sensed because in that case we will have no data to display\n", " expanded_ct_u80 = expanded_ct.loc[(expanded_ct['distance'] <= cutoff)] if \"Mode_confirm\" in expanded_ct.columns else None\n", + " expanded_ct_inferred_u80 = expanded_ct.loc[(expanded_ct['distance'] <= cutoff)] if \"Mode_confirm\" in expanded_ct.columns else None\n", " expanded_ct_sensed_u80 = expanded_ct_sensed.loc[(expanded_ct_sensed['distance'] <= cutoff)]\n", " sensed_u80_quality_text = f\"{len(expanded_ct_sensed_u80)} trips ({round(len(expanded_ct_sensed_u80)/len(expanded_ct_sensed)*100)}% of all trips)\\nfrom {scaffolding.unique_users(expanded_ct_sensed_u80)} {sensed_match.group(3)}\"\n", " labeled_u80_quality_text = f\"{len(expanded_ct_u80)} trips ({round(len(expanded_ct_u80)/len(expanded_ct)*100)}% of all labeled,\\n{round(len(expanded_ct_u80)/len(expanded_ct_sensed)*100)}% of all trips)\\nfrom {scaffolding.unique_users(expanded_ct_u80)} {sensed_match.group(3)}\" if \"Mode_confirm\" in expanded_ct.columns else \"0 labeled trips\"\n", + " inferred_u80_quality_text = f\"{len(expanded_ct_inferred_u80)} trips ({round(len(expanded_ct_inferred_u80)/len(expanded_ct_inferred)*100)}% of all inferred,\\n{round(len(expanded_ct_inferred_u80)/len(expanded_ct_sensed)*100)}% of all trips)\\nfrom {scaffolding.unique_users(expanded_ct_inferred_u80)} {sensed_match.group(3)}\" if \"Mode_confirm\" in expanded_ct_inferred.columns else \"0 inferred trips\"\n", " \n", " # Plot entries\n", - " fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(15,2*2), sharex=True)\n", - " text_results = [[\"Unmodified Alt Text\", \"Unmodified HTML\"], [\"Unmodified Alt Text\", \"Unmodified HTML\"]]\n", + " fig, ax = plt.subplots(nrows=3, ncols=1, figsize=(15,3*2), sharex=True)\n", + " text_results = [[\"Unmodified Alt Text\", \"Unmodified HTML\"], [\"Unmodified Alt Text\", \"Unmodified HTML\"], [\"Unmodified Alt Text\", \"Unmodified HTML\"]]\n", " plot_and_text_stacked_bar_chart(expanded_ct_u80, lambda df: df.groupby(\"Mode_confirm\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n", " \"Labeled by user\\n\"+labeled_u80_quality_text, ax[0], text_results[0], colors_mode, debug_df)\n", " plot_and_text_stacked_bar_chart(expanded_ct_sensed_u80, lambda df: df.groupby(\"primary_mode\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n", " \"Sensed by OpenPATH\\n\"+sensed_u80_quality_text, ax[1], text_results[1], colors_sensed, debug_df_sensed)\n", + " plot_and_text_stacked_bar_chart(expanded_ct_inferred, lambda df: df.groupby(\"Mode_confirm\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n", + " \"Inferred by OpenPATH\\n\"+inferred_u80_quality_text, ax[2], text_results[2], colors_mode, debug_df_inferred)\n", " set_title_and_save(fig, text_results, plot_title_no_quality, file_name)\n", "except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n", " # we can have an missing attribute error during the pre-procssing, in which case we should show the missing plot\n", @@ -403,13 +422,15 @@ "file_name =f\"total_trip_length{file_suffix}\"\n", "\n", "try:\n", - " fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(15,2*2), sharex=True)\n", + " fig, ax = plt.subplots(nrows=3, ncols=1, figsize=(15,3*2), sharex=True)\n", " \n", - " text_results = [[\"Unmodified Alt Text\", \"Unmodified HTML\"], [\"Unmodified Alt Text\", \"Unmodified HTML\"]]\n", + " text_results = [[\"Unmodified Alt Text\", \"Unmodified HTML\"], [\"Unmodified Alt Text\", \"Unmodified HTML\"], [\"Unmodified Alt Text\", \"Unmodified HTML\"]]\n", " plot_and_text_stacked_bar_chart(expanded_ct, lambda df: df.groupby(\"Mode_confirm\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n", " \"Labeled by user\\n\"+stacked_bar_quality_text_labeled, ax[0], text_results[0], colors_mode, debug_df)\n", " plot_and_text_stacked_bar_chart(expanded_ct_sensed, lambda df: df.groupby(\"primary_mode\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n", " \"Sensed by OpenPATH\\n\"+stacked_bar_quality_text_sensed, ax[1], text_results[1], colors_sensed, debug_df_sensed)\n", + " plot_and_text_stacked_bar_chart(expanded_ct_inferred, lambda df: df.groupby(\"Mode_confirm\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n", + " \"Inferred by OpenPATH\\n\"+stacked_bar_quality_text_inferred, ax[2], text_results[2], colors_mode, debug_df_inferred)\n", " set_title_and_save(fig, text_results, plot_title_no_quality, file_name) \n", "except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n", " plt.clf()\n", @@ -443,16 +464,20 @@ " ## We do an existence check for the labeled df because we want to display the sensed value even if we don't have the labeled value\n", " ## but we don't need to have an existence check for sensed because in that case we will have no data to display\n", " labeled_land_trips_df = expanded_ct[expanded_ct['Mode_confirm'] != \"Airplane\"] if \"Mode_confirm\" in expanded_ct.columns else None\n", + " inferred_land_trips_df = expanded_ct_inferred[expanded_ct_inferred['Mode_confirm'] != \"Airplane\"] if \"Mode_confirm\" in expanded_ct_inferred.columns else None\n", " sensed_land_trips_df = expanded_ct_sensed[expanded_ct_sensed['primary_mode'] != \"AIR_OR_HSR\"]\n", " \n", " sensed_land_quality_text = f\"{len(sensed_land_trips_df)} trips ({round(len(sensed_land_trips_df)/len(expanded_ct_sensed)*100)}% of all trips)\\nfrom {scaffolding.unique_users(sensed_land_trips_df)} {sensed_match.group(3)}\"\n", " labeled_land_quality_text = f\"{len(labeled_land_trips_df)} trips ({round(len(labeled_land_trips_df)/len(expanded_ct)*100)}% of all labeled,\\n{round(len(labeled_land_trips_df)/len(expanded_ct_sensed)*100)}%) of all trips)\\nfrom {scaffolding.unique_users(labeled_land_trips_df)} {sensed_match.group(3)}\" if \"Mode_confirm\" in expanded_ct.columns else \"0 labeled trips\"\n", - "\n", - " fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(15,2*2), sharex=True)\n", + " inferred_land_quality_text = f\"{len(inferred_land_trips_df)} trips ({round(len(inferred_land_trips_df)/len(expanded_ct_inferred)*100)}% of all inferred,\\n{round(len(inferred_land_trips_df)/len(expanded_ct_sensed)*100)}%) of all trips)\\nfrom {scaffolding.unique_users(inferred_land_trips_df)} {sensed_match.group(3)}\" if \"Mode_confirm\" in expanded_ct_inferred.columns else \"0 inferred trips\"\n", + " \n", + " fig, ax = plt.subplots(nrows=3, ncols=1, figsize=(15,3*2), sharex=True)\n", " plot_and_text_stacked_bar_chart(labeled_land_trips_df, lambda df: df.groupby(\"Mode_confirm\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n", " \"Labeled by user\\n\"+labeled_land_quality_text, ax[0], text_results[0], colors_mode, debug_df)\n", " plot_and_text_stacked_bar_chart(sensed_land_trips_df, lambda df: df.groupby(\"primary_mode\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n", " \"Sensed by OpenPATH\\n\"+sensed_land_quality_text, ax[1], text_results[1], colors_sensed, debug_df_sensed)\n", + " plot_and_text_stacked_bar_chart(inferred_land_trips_df, lambda df: df.groupby(\"Mode_confirm\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n", + " \"Inferred by OpenPATH\\n\"+inferred_land_quality_text, ax[2], text_results[2], colors_mode, debug_df_inferred)\n", " set_title_and_save(fig, text_results, plot_title_no_quality, file_name) \n", "except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n", " plt.clf()\n", From 5d0546b569c44cdca1fdb079ae75a1b2d5ff5771 Mon Sep 17 00:00:00 2001 From: iantei Date: Mon, 19 Aug 2024 11:05:54 -0700 Subject: [PATCH 05/22] 1. Add load_viz_notebook_inferred_data() to collect data from db 2. Add query for mode_of_interest for inferred labels 3. Update fig, ax, text_results, plot_and_text_stacked_bar_chart() for all Stacked Bar Charts. --- viz_scripts/mode_specific_metrics.ipynb | 81 +++++++++++++++++++++---- 1 file changed, 70 insertions(+), 11 deletions(-) diff --git a/viz_scripts/mode_specific_metrics.ipynb b/viz_scripts/mode_specific_metrics.ipynb index 0e2e63d..e2fbb76 100644 --- a/viz_scripts/mode_specific_metrics.ipynb +++ b/viz_scripts/mode_specific_metrics.ipynb @@ -131,6 +131,31 @@ " include_test_users=include_test_users)" ] }, + { + "cell_type": "markdown", + "id": "dbc2bb22", + "metadata": {}, + "source": [ + "## Collect Data From Database for Inferred Metrics" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b8bd1755", + "metadata": {}, + "outputs": [], + "source": [ + "expanded_ct_inferred, file_suffix_inferred, quality_text_inferred, debug_df_inferred = scaffolding.load_viz_notebook_inferred_data(year,\n", + " month,\n", + " program,\n", + " study_type,\n", + " dynamic_labels,\n", + " dic_re,\n", + " dic_pur=dic_pur,\n", + " include_test_users=include_test_users)" + ] + }, { "cell_type": "code", "execution_count": null, @@ -172,6 +197,34 @@ "quality_text = scaffolding.get_quality_text(expanded_ct, data_eb, mode_of_interest, include_test_users)" ] }, + { + "cell_type": "markdown", + "id": "832337a6", + "metadata": {}, + "source": [ + "## Metrics for Specific Inferred Mode\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bed648bc", + "metadata": {}, + "outputs": [], + "source": [ + "data_eb_inferred = expanded_ct_inferred.query(f\"mode_confirm == '{mode_of_interest}'\") if \"mode_confirm\" in expanded_ct_inferred.columns else expanded_ct_inferred" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "54fcaff2", + "metadata": {}, + "outputs": [], + "source": [ + "quality_text_inferred = scaffolding.get_quality_text(expanded_ct_inferred, data_eb_inferred, mode_of_interest, include_test_users)" + ] + }, { "cell_type": "markdown", "id": "loaded-expert", @@ -191,10 +244,12 @@ "file_name= f\"ntrips_{mode_of_interest}_purpose{file_suffix}\"\n", "\n", "try:\n", - " fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(15,2*1), sharex=True)\n", - " text_results = [\"Unmodified Alt Text\", \"Unmodified HTML\"]\n", + " fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(15,2*2), sharex=True)\n", + " text_results = [[\"Unmodified Alt Text\", \"Unmodified HTML\"], [\"Unmodified Alt Text\", \"Unmodified HTML\"]]\n", " plot_and_text_stacked_bar_chart(data_eb, lambda df: df.groupby(\"Trip_purpose\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False),\n", - " f\"Labeled `{mode_of_interest}` by user\", ax, text_results, colors_purpose, debug_df)\n", + " f\"Labeled `{mode_of_interest}` by user\", ax[0], text_results[0], colors_purpose, debug_df)\n", + " plot_and_text_stacked_bar_chart(data_eb_inferred, lambda df: df.groupby(\"Trip_purpose\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False),\n", + " f\"Inferred `{mode_of_interest}` by OpenPATH\", ax[1], text_results[1], colors_purpose, debug_df_inferred)\n", " plot_title = plot_title_no_quality + \"\\n\" + f\"For {mode_of_interest}: \" + quality_text\n", " set_title_and_save(fig, text_results, plot_title, file_name)\n", "except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n", @@ -226,12 +281,14 @@ "file_name = f\"total_trip_length_{mode_of_interest}_replaced_mode{file_suffix}\"\n", "\n", "try:\n", - " fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(15,2*1), sharex=True)\n", - " text_results = [\"Unmodified Alt Text\", \"Unmodified HTML\"]\n", + " fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(15,2*2), sharex=True)\n", + " text_results = [[\"Unmodified Alt Text\", \"Unmodified HTML\"], [\"Unmodified Alt Text\", \"Unmodified HTML\"]]\n", " plot_and_text_stacked_bar_chart(data_eb, lambda df: df.groupby(\"Replaced_mode\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n", - " \"Labeled by user\\n (Trip distance)\", ax, text_results, colors_mode, debug_df)\n", - " plot_title = plot_title_no_quality + \"\\n\" + f\"For {mode_of_interest}: \" + quality_text\n", - " set_title_and_save(fig, text_results, plot_title, file_name)\n", + " \"Labeled by user\\n (Trip distance)\", ax[0], text_results[0], colors_mode, debug_df)\n", + " plot_and_text_stacked_bar_chart(data_eb_inferred, lambda df: df.groupby(\"Replaced_mode\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n", + " \"Inferred by OpenPATH\\n (Trip distance)\", ax[1], text_results[1], colors_mode, debug_df_inferred)\n", + "# plot_title = plot_title_no_quality + \"\\n\" + f\"For {mode_of_interest}: \" + quality_text\n", + "# set_title_and_save(fig, text_results, plot_title, file_name)\n", "except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n", " plt.clf()\n", " generate_missing_plot(plot_title_no_quality, debug_df, file_name)\n", @@ -261,10 +318,12 @@ "file_name = f'ntrips_{mode_of_interest}_total{file_suffix}'\n", "\n", "try:\n", - " fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(15,2*1), sharex=True)\n", - " text_results = [\"Unmodified Alt Text\", \"Unmodified HTML\"]\n", + " fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(15,2*2), sharex=True)\n", + " text_results = [[\"Unmodified Alt Text\", \"Unmodified HTML\"], [\"Unmodified Alt Text\", \"Unmodified HTML\"]]\n", " plot_and_text_stacked_bar_chart(data_eb, lambda df: df.groupby(\"Replaced_mode\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n", - " f\"Labeled `{mode_of_interest}` by user\", ax, text_results, colors_mode, debug_df)\n", + " f\"Labeled `{mode_of_interest}` by user\", ax[0], text_results[0], colors_mode, debug_df)\n", + " plot_and_text_stacked_bar_chart(data_eb_inferred, lambda df: df.groupby(\"Replaced_mode\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n", + " f\"Inferred `{mode_of_interest}` by OpenPATH\", ax[1], text_results[1], colors_mode, debug_df_inferred)\n", " plot_title = plot_title_no_quality + \"\\n\" + f\"For {mode_of_interest}: \" + quality_text\n", " set_title_and_save(fig, text_results, plot_title, file_name)\n", "except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n", From 9998bee96683ac63bd7acc16ce2430bfcf25a136 Mon Sep 17 00:00:00 2001 From: iantei Date: Mon, 19 Aug 2024 11:07:09 -0700 Subject: [PATCH 06/22] Uncomment plot_title() and set_title_save() for total trip length in mode_specific_metrics notebook --- viz_scripts/mode_specific_metrics.ipynb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/viz_scripts/mode_specific_metrics.ipynb b/viz_scripts/mode_specific_metrics.ipynb index e2fbb76..725d211 100644 --- a/viz_scripts/mode_specific_metrics.ipynb +++ b/viz_scripts/mode_specific_metrics.ipynb @@ -287,8 +287,8 @@ " \"Labeled by user\\n (Trip distance)\", ax[0], text_results[0], colors_mode, debug_df)\n", " plot_and_text_stacked_bar_chart(data_eb_inferred, lambda df: df.groupby(\"Replaced_mode\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n", " \"Inferred by OpenPATH\\n (Trip distance)\", ax[1], text_results[1], colors_mode, debug_df_inferred)\n", - "# plot_title = plot_title_no_quality + \"\\n\" + f\"For {mode_of_interest}: \" + quality_text\n", - "# set_title_and_save(fig, text_results, plot_title, file_name)\n", + " plot_title = plot_title_no_quality + \"\\n\" + f\"For {mode_of_interest}: \" + quality_text\n", + " set_title_and_save(fig, text_results, plot_title, file_name)\n", "except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n", " plt.clf()\n", " generate_missing_plot(plot_title_no_quality, debug_df, file_name)\n", From 043771078c5c5e2d14b6daf043c9fe91d0f36907 Mon Sep 17 00:00:00 2001 From: iantei Date: Mon, 19 Aug 2024 15:45:19 -0700 Subject: [PATCH 07/22] 1. Add commute_labeled/inferred_match regex, stacked_bar_quality_text_ 2. Update plot_and_text_stacked_bar_chart() for Distribution of modes in commute trips --- viz_scripts/generic_metrics.ipynb | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/viz_scripts/generic_metrics.ipynb b/viz_scripts/generic_metrics.ipynb index 5302422..f596015 100644 --- a/viz_scripts/generic_metrics.ipynb +++ b/viz_scripts/generic_metrics.ipynb @@ -287,15 +287,21 @@ " commute_quality_text = scaffolding.get_quality_text(expanded_ct, expanded_ct_commute, \"commute\", include_test_users) if not expanded_ct.empty else \"\"\n", " expanded_ct_inferred_commute = expanded_ct_inferred.query(trip_purpose_query)\n", " commute_quality_text_inferred = scaffolding.get_quality_text(expanded_ct_inferred, expanded_ct_inferred_commute, \"commute\", include_test_users) if not expanded_ct_inferred.empty else \"\"\n", - " plot_title = plot_title_no_quality + \"\\n\" + commute_quality_text\n", - " \n", + " plot_title = plot_title_no_quality\n", + "\n", + " commute_labeled_match = re.match(r'Based on ([0-9]+) confirmed commute trips from ([0-9]+) (users|testers and participants)\\nof ([0-9]+) total confirmed trips from ([0-9]+) (users|testers and participants) (\\(([0-9.]+|nan)%\\))', commute_quality_text)\n", + " stacked_bar_quality_text_commute_labeled = f\"{commute_labeled_match.group(1)} trips {commute_labeled_match.group(7)}\\n from {commute_labeled_match.group(2)} {commute_labeled_match.group(3)}\"\n", + "\n", + " commute_inferred_match = re.match(r'Based on ([0-9]+) confirmed commute trips from ([0-9]+) (users|testers and participants)\\nof ([0-9]+) total confirmed trips from ([0-9]+) (users|testers and participants) (\\(([0-9.]+|nan)%\\))', commute_quality_text_inferred)\n", + " stacked_bar_quality_text_commute_inferred = f\"{commute_inferred_match.group(1)} trips {commute_inferred_match.group(7)}\\n from {commute_inferred_match.group(2)} {commute_inferred_match.group(3)}\"\n", + "\n", " # Plot entries\n", " fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(15,2*2), sharex=True) \n", " text_results = [[\"Unmodified Alt Text\", \"Unmodified HTML\"], [\"Unmodified Alt Text\", \"Unmodified HTML\"]]\n", " plot_and_text_stacked_bar_chart(expanded_ct_commute, lambda df: df.groupby(\"Mode_confirm\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n", - " \"Labeled by user\\n (Confirmed trips)\", ax[0], text_results[0], colors_mode, debug_df)\n", + " \"Labeled by user\\n\"+stacked_bar_quality_text_commute_labeled, ax[0], text_results[0], colors_mode, debug_df)\n", " plot_and_text_stacked_bar_chart(expanded_ct_inferred_commute, lambda df: df.groupby(\"Mode_confirm\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n", - " \"Inferred by OpenPATH\\n (Confirmed trips)\", ax[1], text_results[1], colors_mode, debug_df_inferred)\n", + " \"Inferred by OpenPATH\\n\"+stacked_bar_quality_text_commute_inferred, ax[1], text_results[1], colors_mode, debug_df_inferred)\n", " set_title_and_save(fig, text_results, plot_title, file_name)\n", "except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n", " plt.clf()\n", From d098c18bfd726b31b6ea68c7394b55f265d1900e Mon Sep 17 00:00:00 2001 From: iantei Date: Mon, 19 Aug 2024 16:21:15 -0700 Subject: [PATCH 08/22] 1. Introduce regex to extract labeled_match and inferred_match 2. Use stacked_bar_quality_text and stacked_bar_quality_text_inferred with plot_and_text_stacked_bar_chart() 3. Adjust plot_title to plot_title_no_quality --- viz_scripts/mode_specific_metrics.ipynb | 44 ++++++++++++++++++++----- 1 file changed, 35 insertions(+), 9 deletions(-) diff --git a/viz_scripts/mode_specific_metrics.ipynb b/viz_scripts/mode_specific_metrics.ipynb index 725d211..ce535a1 100644 --- a/viz_scripts/mode_specific_metrics.ipynb +++ b/viz_scripts/mode_specific_metrics.ipynb @@ -225,6 +225,32 @@ "quality_text_inferred = scaffolding.get_quality_text(expanded_ct_inferred, data_eb_inferred, mode_of_interest, include_test_users)" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "044773bc", + "metadata": {}, + "outputs": [], + "source": [ + "quality_text, quality_text_inferred" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "139b4060", + "metadata": {}, + "outputs": [], + "source": [ + "import re\n", + "labeled_match = re.match(r'Based on ([0-9]+) confirmed {} trips from ([0-9]+) (users|testers and participants)\\nof ([0-9]+) total confirmed trips from ([0-9]+) (users|testers and participants) (\\(([0-9.]+|nan)%\\))'.format(mode_of_interest), quality_text)\n", + "stacked_bar_quality_text = f\"{labeled_match.group(1)} trips {labeled_match.group(7)}\\n from {labeled_match.group(2)} {labeled_match.group(3)}\"\n", + "inferred_match =re.match(r'Based on ([0-9]+) confirmed {} trips from ([0-9]+) (users|testers and participants)\\nof ([0-9]+) total confirmed trips from ([0-9]+) (users|testers and participants) (\\(([0-9.]+|nan)%\\))'.format(mode_of_interest), quality_text_inferred)\n", + "stacked_bar_quality_text_inferred = f\"{inferred_match.group(1)} trips {inferred_match.group(7)}\\n from {inferred_match.group(2)} {inferred_match.group(3)}\"\n", + "\n", + "stacked_bar_quality_text, stacked_bar_quality_text_inferred" + ] + }, { "cell_type": "markdown", "id": "loaded-expert", @@ -247,10 +273,10 @@ " fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(15,2*2), sharex=True)\n", " text_results = [[\"Unmodified Alt Text\", \"Unmodified HTML\"], [\"Unmodified Alt Text\", \"Unmodified HTML\"]]\n", " plot_and_text_stacked_bar_chart(data_eb, lambda df: df.groupby(\"Trip_purpose\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False),\n", - " f\"Labeled `{mode_of_interest}` by user\", ax[0], text_results[0], colors_purpose, debug_df)\n", + " f\"Labeled `{mode_of_interest}` by user\\n\"+stacked_bar_quality_text, ax[0], text_results[0], colors_purpose, debug_df)\n", " plot_and_text_stacked_bar_chart(data_eb_inferred, lambda df: df.groupby(\"Trip_purpose\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False),\n", - " f\"Inferred `{mode_of_interest}` by OpenPATH\", ax[1], text_results[1], colors_purpose, debug_df_inferred)\n", - " plot_title = plot_title_no_quality + \"\\n\" + f\"For {mode_of_interest}: \" + quality_text\n", + " f\"Inferred `{mode_of_interest}` by OpenPATH\\n\"+stacked_bar_quality_text_inferred, ax[1], text_results[1], colors_purpose, debug_df_inferred)\n", + " plot_title = plot_title_no_quality\n", " set_title_and_save(fig, text_results, plot_title, file_name)\n", "except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n", " plt.clf()\n", @@ -284,10 +310,10 @@ " fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(15,2*2), sharex=True)\n", " text_results = [[\"Unmodified Alt Text\", \"Unmodified HTML\"], [\"Unmodified Alt Text\", \"Unmodified HTML\"]]\n", " plot_and_text_stacked_bar_chart(data_eb, lambda df: df.groupby(\"Replaced_mode\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n", - " \"Labeled by user\\n (Trip distance)\", ax[0], text_results[0], colors_mode, debug_df)\n", + " \"Labeled by user\\n (Trip distance)\\n\"+stacked_bar_quality_text, ax[0], text_results[0], colors_mode, debug_df)\n", " plot_and_text_stacked_bar_chart(data_eb_inferred, lambda df: df.groupby(\"Replaced_mode\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n", - " \"Inferred by OpenPATH\\n (Trip distance)\", ax[1], text_results[1], colors_mode, debug_df_inferred)\n", - " plot_title = plot_title_no_quality + \"\\n\" + f\"For {mode_of_interest}: \" + quality_text\n", + " \"Inferred by OpenPATH\\n (Trip distance)\\n\"+stacked_bar_quality_text_inferred, ax[1], text_results[1], colors_mode, debug_df_inferred)\n", + " plot_title = plot_title_no_quality\n", " set_title_and_save(fig, text_results, plot_title, file_name)\n", "except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n", " plt.clf()\n", @@ -321,10 +347,10 @@ " fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(15,2*2), sharex=True)\n", " text_results = [[\"Unmodified Alt Text\", \"Unmodified HTML\"], [\"Unmodified Alt Text\", \"Unmodified HTML\"]]\n", " plot_and_text_stacked_bar_chart(data_eb, lambda df: df.groupby(\"Replaced_mode\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n", - " f\"Labeled `{mode_of_interest}` by user\", ax[0], text_results[0], colors_mode, debug_df)\n", + " f\"Labeled `{mode_of_interest}` by user\\n\"+stacked_bar_quality_text, ax[0], text_results[0], colors_mode, debug_df)\n", " plot_and_text_stacked_bar_chart(data_eb_inferred, lambda df: df.groupby(\"Replaced_mode\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n", - " f\"Inferred `{mode_of_interest}` by OpenPATH\", ax[1], text_results[1], colors_mode, debug_df_inferred)\n", - " plot_title = plot_title_no_quality + \"\\n\" + f\"For {mode_of_interest}: \" + quality_text\n", + " f\"Inferred `{mode_of_interest}` by OpenPATH\\n\"+stacked_bar_quality_text_inferred, ax[1], text_results[1], colors_mode, debug_df_inferred)\n", + " plot_title = plot_title_no_quality\n", " set_title_and_save(fig, text_results, plot_title, file_name)\n", "except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n", " plt.clf()\n", From 704450abb4b830b243632960f7515d9e53d74e29 Mon Sep 17 00:00:00 2001 From: iantei Date: Mon, 19 Aug 2024 16:49:38 -0700 Subject: [PATCH 09/22] Update expanded_ct_inferred_u80 to use expanded_ct_inferred instead of expanded_ct --- viz_scripts/generic_metrics.ipynb | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/viz_scripts/generic_metrics.ipynb b/viz_scripts/generic_metrics.ipynb index f596015..42f1397 100644 --- a/viz_scripts/generic_metrics.ipynb +++ b/viz_scripts/generic_metrics.ipynb @@ -380,12 +380,13 @@ " ## We do an existence check for the labeled df because we want to display the sensed value even if we don't have the labeled value\n", " ## but we don't need to have an existence check for sensed because in that case we will have no data to display\n", " expanded_ct_u80 = expanded_ct.loc[(expanded_ct['distance'] <= cutoff)] if \"Mode_confirm\" in expanded_ct.columns else None\n", - " expanded_ct_inferred_u80 = expanded_ct.loc[(expanded_ct['distance'] <= cutoff)] if \"Mode_confirm\" in expanded_ct.columns else None\n", + " expanded_ct_inferred_u80 = expanded_ct_inferred.loc[(expanded_ct_inferred['distance'] <= cutoff)] if \"Mode_confirm\" in expanded_ct_inferred.columns else None\n", " expanded_ct_sensed_u80 = expanded_ct_sensed.loc[(expanded_ct_sensed['distance'] <= cutoff)]\n", + "\n", " sensed_u80_quality_text = f\"{len(expanded_ct_sensed_u80)} trips ({round(len(expanded_ct_sensed_u80)/len(expanded_ct_sensed)*100)}% of all trips)\\nfrom {scaffolding.unique_users(expanded_ct_sensed_u80)} {sensed_match.group(3)}\"\n", " labeled_u80_quality_text = f\"{len(expanded_ct_u80)} trips ({round(len(expanded_ct_u80)/len(expanded_ct)*100)}% of all labeled,\\n{round(len(expanded_ct_u80)/len(expanded_ct_sensed)*100)}% of all trips)\\nfrom {scaffolding.unique_users(expanded_ct_u80)} {sensed_match.group(3)}\" if \"Mode_confirm\" in expanded_ct.columns else \"0 labeled trips\"\n", " inferred_u80_quality_text = f\"{len(expanded_ct_inferred_u80)} trips ({round(len(expanded_ct_inferred_u80)/len(expanded_ct_inferred)*100)}% of all inferred,\\n{round(len(expanded_ct_inferred_u80)/len(expanded_ct_sensed)*100)}% of all trips)\\nfrom {scaffolding.unique_users(expanded_ct_inferred_u80)} {sensed_match.group(3)}\" if \"Mode_confirm\" in expanded_ct_inferred.columns else \"0 inferred trips\"\n", - " \n", + "\n", " # Plot entries\n", " fig, ax = plt.subplots(nrows=3, ncols=1, figsize=(15,3*2), sharex=True)\n", " text_results = [[\"Unmodified Alt Text\", \"Unmodified HTML\"], [\"Unmodified Alt Text\", \"Unmodified HTML\"], [\"Unmodified Alt Text\", \"Unmodified HTML\"]]\n", From 49b2da455f47625505c57d38ef27687e0029d11e Mon Sep 17 00:00:00 2001 From: iantei Date: Wed, 21 Aug 2024 09:54:38 -0700 Subject: [PATCH 10/22] Update in map_trip_data() param name from df to expanded_trip_df --- viz_scripts/scaffolding.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/viz_scripts/scaffolding.py b/viz_scripts/scaffolding.py index ef2200d..aa2735c 100644 --- a/viz_scripts/scaffolding.py +++ b/viz_scripts/scaffolding.py @@ -172,28 +172,28 @@ def load_viz_notebook_data(year, month, program, study_type, dynamic_labels, dic return expanded_ct, file_suffix, quality_text, debug_df -def map_trip_data(df, study_type, dynamic_labels, dic_re, dic_pur): +def map_trip_data(expanded_trip_df, study_type, dynamic_labels, dic_re, dic_pur): # Change meters to miles # CASE 2 of https://github.com/e-mission/em-public-dashboard/issues/69#issuecomment-1256835867 - if "distance" in df.columns: - unit_conversions(df) + if "distance" in expanded_trip_df.columns: + unit_conversions(expanded_trip_df) # Map new mode labels with translations dictionary from dynamic_labels # CASE 2 of https://github.com/e-mission/em-public-dashboard/issues/69#issuecomment-1256835867 - if "mode_confirm" in df.columns: + if "mode_confirm" in expanded_trip_df.columns: if (len(dynamic_labels)): dic_mode_mapping = mapping_labels(dynamic_labels, "MODE") - df['Mode_confirm'] = df['mode_confirm'].map(dic_mode_mapping) + expanded_trip_df['Mode_confirm'] = expanded_trip_df['mode_confirm'].map(dic_mode_mapping) else: - df['Mode_confirm'] = df['mode_confirm'].map(dic_re) + expanded_trip_df['Mode_confirm'] = expanded_trip_df['mode_confirm'].map(dic_re) if study_type == 'program': # CASE 2 of https://github.com/e-mission/em-public-dashboard/issues/69#issuecomment-1256835867 - if 'replaced_mode' in df.columns: + if 'replaced_mode' in expanded_trip_df.columns: if (len(dynamic_labels)): dic_replaced_mapping = mapping_labels(dynamic_labels, "REPLACED_MODE") - df['Replaced_mode'] = df['replaced_mode'].map(dic_replaced_mapping) + expanded_trip_df['Replaced_mode'] = expanded_trip_df['replaced_mode'].map(dic_replaced_mapping) else: - df['Replaced_mode'] = df['replaced_mode'].map(dic_re) + expanded_trip_df['Replaced_mode'] = expanded_trip_df['replaced_mode'].map(dic_re) else: print("This is a program, but no replaced modes found. Likely cold start case. Ignoring replaced mode mapping") else: @@ -201,13 +201,13 @@ def map_trip_data(df, study_type, dynamic_labels, dic_re, dic_pur): # Trip purpose mapping # CASE 2 of https://github.com/e-mission/em-public-dashboard/issues/69#issuecomment-1256835867 - if dic_pur is not None and "purpose_confirm" in df.columns: + if dic_pur is not None and "purpose_confirm" in expanded_trip_df.columns: if (len(dynamic_labels)): dic_purpose_mapping = mapping_labels(dynamic_labels, "PURPOSE") - df['Trip_purpose'] = df['purpose_confirm'].map(dic_purpose_mapping) + expanded_trip_df['Trip_purpose'] = expanded_trip_df['purpose_confirm'].map(dic_purpose_mapping) else: - df['Trip_purpose'] = df['purpose_confirm'].map(dic_pur) - return df + expanded_trip_df['Trip_purpose'] = expanded_trip_df['purpose_confirm'].map(dic_pur) + return expanded_trip_df def load_viz_notebook_inferred_data(year, month, program, study_type, dynamic_labels, dic_re, dic_pur=None, include_test_users=False): """ Inputs: From 62c3af07c3e1fa4c4d9f5b21f781b5f3998775f2 Mon Sep 17 00:00:00 2001 From: iantei Date: Fri, 13 Sep 2024 10:24:29 -0700 Subject: [PATCH 11/22] Re-order Inferred Trip Stacked Charts above Sensed. --- viz_scripts/generic_metrics.ipynb | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/viz_scripts/generic_metrics.ipynb b/viz_scripts/generic_metrics.ipynb index 42f1397..e33db0d 100644 --- a/viz_scripts/generic_metrics.ipynb +++ b/viz_scripts/generic_metrics.ipynb @@ -239,11 +239,10 @@ " \n", " plot_and_text_stacked_bar_chart(expanded_ct, lambda df: (df.groupby(\"Mode_confirm\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False)), \n", " \"Labeled by user\\n\"+stacked_bar_quality_text_labeled, ax[0], text_results[0], colors_mode, debug_df)\n", - " plot_and_text_stacked_bar_chart(expanded_ct_sensed, lambda df: (df.groupby(\"primary_mode\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False)), \n", - " \"Sensed by OpenPATH\\n\"+stacked_bar_quality_text_sensed, ax[1], text_results[1], colors_sensed, debug_df_sensed)\n", " plot_and_text_stacked_bar_chart(expanded_ct_inferred, lambda df: (df.groupby(\"Mode_confirm\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False)), \n", " \"Inferred by OpenPATH\\n\"+stacked_bar_quality_text_inferred, ax[2], text_results[2], colors_mode, debug_df_inferred)\n", - " \n", + " plot_and_text_stacked_bar_chart(expanded_ct_sensed, lambda df: (df.groupby(\"primary_mode\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False)), \n", + " \"Sensed by OpenPATH\\n\"+stacked_bar_quality_text_sensed, ax[1], text_results[1], colors_sensed, debug_df_sensed)\n", " set_title_and_save(fig, text_results, plot_title_no_quality, file_name)\n", "except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n", " plt.clf()\n", @@ -392,10 +391,10 @@ " text_results = [[\"Unmodified Alt Text\", \"Unmodified HTML\"], [\"Unmodified Alt Text\", \"Unmodified HTML\"], [\"Unmodified Alt Text\", \"Unmodified HTML\"]]\n", " plot_and_text_stacked_bar_chart(expanded_ct_u80, lambda df: df.groupby(\"Mode_confirm\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n", " \"Labeled by user\\n\"+labeled_u80_quality_text, ax[0], text_results[0], colors_mode, debug_df)\n", - " plot_and_text_stacked_bar_chart(expanded_ct_sensed_u80, lambda df: df.groupby(\"primary_mode\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n", - " \"Sensed by OpenPATH\\n\"+sensed_u80_quality_text, ax[1], text_results[1], colors_sensed, debug_df_sensed)\n", " plot_and_text_stacked_bar_chart(expanded_ct_inferred, lambda df: df.groupby(\"Mode_confirm\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n", " \"Inferred by OpenPATH\\n\"+inferred_u80_quality_text, ax[2], text_results[2], colors_mode, debug_df_inferred)\n", + " plot_and_text_stacked_bar_chart(expanded_ct_sensed_u80, lambda df: df.groupby(\"primary_mode\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n", + " \"Sensed by OpenPATH\\n\"+sensed_u80_quality_text, ax[1], text_results[1], colors_sensed, debug_df_sensed)\n", " set_title_and_save(fig, text_results, plot_title_no_quality, file_name)\n", "except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n", " # we can have an missing attribute error during the pre-procssing, in which case we should show the missing plot\n", @@ -434,10 +433,10 @@ " text_results = [[\"Unmodified Alt Text\", \"Unmodified HTML\"], [\"Unmodified Alt Text\", \"Unmodified HTML\"], [\"Unmodified Alt Text\", \"Unmodified HTML\"]]\n", " plot_and_text_stacked_bar_chart(expanded_ct, lambda df: df.groupby(\"Mode_confirm\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n", " \"Labeled by user\\n\"+stacked_bar_quality_text_labeled, ax[0], text_results[0], colors_mode, debug_df)\n", - " plot_and_text_stacked_bar_chart(expanded_ct_sensed, lambda df: df.groupby(\"primary_mode\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n", - " \"Sensed by OpenPATH\\n\"+stacked_bar_quality_text_sensed, ax[1], text_results[1], colors_sensed, debug_df_sensed)\n", " plot_and_text_stacked_bar_chart(expanded_ct_inferred, lambda df: df.groupby(\"Mode_confirm\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n", " \"Inferred by OpenPATH\\n\"+stacked_bar_quality_text_inferred, ax[2], text_results[2], colors_mode, debug_df_inferred)\n", + " plot_and_text_stacked_bar_chart(expanded_ct_sensed, lambda df: df.groupby(\"primary_mode\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n", + " \"Sensed by OpenPATH\\n\"+stacked_bar_quality_text_sensed, ax[1], text_results[1], colors_sensed, debug_df_sensed)\n", " set_title_and_save(fig, text_results, plot_title_no_quality, file_name) \n", "except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n", " plt.clf()\n", @@ -481,10 +480,10 @@ " fig, ax = plt.subplots(nrows=3, ncols=1, figsize=(15,3*2), sharex=True)\n", " plot_and_text_stacked_bar_chart(labeled_land_trips_df, lambda df: df.groupby(\"Mode_confirm\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n", " \"Labeled by user\\n\"+labeled_land_quality_text, ax[0], text_results[0], colors_mode, debug_df)\n", - " plot_and_text_stacked_bar_chart(sensed_land_trips_df, lambda df: df.groupby(\"primary_mode\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n", - " \"Sensed by OpenPATH\\n\"+sensed_land_quality_text, ax[1], text_results[1], colors_sensed, debug_df_sensed)\n", " plot_and_text_stacked_bar_chart(inferred_land_trips_df, lambda df: df.groupby(\"Mode_confirm\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n", " \"Inferred by OpenPATH\\n\"+inferred_land_quality_text, ax[2], text_results[2], colors_mode, debug_df_inferred)\n", + " plot_and_text_stacked_bar_chart(sensed_land_trips_df, lambda df: df.groupby(\"primary_mode\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n", + " \"Sensed by OpenPATH\\n\"+sensed_land_quality_text, ax[1], text_results[1], colors_sensed, debug_df_sensed)\n", " set_title_and_save(fig, text_results, plot_title_no_quality, file_name) \n", "except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n", " plt.clf()\n", From 007f9bf1e2c87542ea728fcf5c881ee1ea23d73b Mon Sep 17 00:00:00 2001 From: iantei Date: Fri, 13 Sep 2024 10:52:54 -0700 Subject: [PATCH 12/22] Update the index of axis and text_results for sensed and labeled trip stacked bars. --- viz_scripts/generic_metrics.ipynb | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/viz_scripts/generic_metrics.ipynb b/viz_scripts/generic_metrics.ipynb index e33db0d..198321e 100644 --- a/viz_scripts/generic_metrics.ipynb +++ b/viz_scripts/generic_metrics.ipynb @@ -240,9 +240,9 @@ " plot_and_text_stacked_bar_chart(expanded_ct, lambda df: (df.groupby(\"Mode_confirm\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False)), \n", " \"Labeled by user\\n\"+stacked_bar_quality_text_labeled, ax[0], text_results[0], colors_mode, debug_df)\n", " plot_and_text_stacked_bar_chart(expanded_ct_inferred, lambda df: (df.groupby(\"Mode_confirm\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False)), \n", - " \"Inferred by OpenPATH\\n\"+stacked_bar_quality_text_inferred, ax[2], text_results[2], colors_mode, debug_df_inferred)\n", + " \"Inferred by OpenPATH\\n\"+stacked_bar_quality_text_inferred, ax[1], text_results[1], colors_mode, debug_df_inferred)\n", " plot_and_text_stacked_bar_chart(expanded_ct_sensed, lambda df: (df.groupby(\"primary_mode\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False)), \n", - " \"Sensed by OpenPATH\\n\"+stacked_bar_quality_text_sensed, ax[1], text_results[1], colors_sensed, debug_df_sensed)\n", + " \"Sensed by OpenPATH\\n\"+stacked_bar_quality_text_sensed, ax[2], text_results[2], colors_sensed, debug_df_sensed)\n", " set_title_and_save(fig, text_results, plot_title_no_quality, file_name)\n", "except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n", " plt.clf()\n", @@ -392,9 +392,9 @@ " plot_and_text_stacked_bar_chart(expanded_ct_u80, lambda df: df.groupby(\"Mode_confirm\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n", " \"Labeled by user\\n\"+labeled_u80_quality_text, ax[0], text_results[0], colors_mode, debug_df)\n", " plot_and_text_stacked_bar_chart(expanded_ct_inferred, lambda df: df.groupby(\"Mode_confirm\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n", - " \"Inferred by OpenPATH\\n\"+inferred_u80_quality_text, ax[2], text_results[2], colors_mode, debug_df_inferred)\n", + " \"Inferred by OpenPATH\\n\"+inferred_u80_quality_text, ax[1], text_results[1], colors_mode, debug_df_inferred)\n", " plot_and_text_stacked_bar_chart(expanded_ct_sensed_u80, lambda df: df.groupby(\"primary_mode\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n", - " \"Sensed by OpenPATH\\n\"+sensed_u80_quality_text, ax[1], text_results[1], colors_sensed, debug_df_sensed)\n", + " \"Sensed by OpenPATH\\n\"+sensed_u80_quality_text, ax[2], text_results[2], colors_sensed, debug_df_sensed)\n", " set_title_and_save(fig, text_results, plot_title_no_quality, file_name)\n", "except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n", " # we can have an missing attribute error during the pre-procssing, in which case we should show the missing plot\n", @@ -434,9 +434,9 @@ " plot_and_text_stacked_bar_chart(expanded_ct, lambda df: df.groupby(\"Mode_confirm\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n", " \"Labeled by user\\n\"+stacked_bar_quality_text_labeled, ax[0], text_results[0], colors_mode, debug_df)\n", " plot_and_text_stacked_bar_chart(expanded_ct_inferred, lambda df: df.groupby(\"Mode_confirm\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n", - " \"Inferred by OpenPATH\\n\"+stacked_bar_quality_text_inferred, ax[2], text_results[2], colors_mode, debug_df_inferred)\n", + " \"Inferred by OpenPATH\\n\"+stacked_bar_quality_text_inferred, ax[1], text_results[1], colors_mode, debug_df_inferred)\n", " plot_and_text_stacked_bar_chart(expanded_ct_sensed, lambda df: df.groupby(\"primary_mode\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n", - " \"Sensed by OpenPATH\\n\"+stacked_bar_quality_text_sensed, ax[1], text_results[1], colors_sensed, debug_df_sensed)\n", + " \"Sensed by OpenPATH\\n\"+stacked_bar_quality_text_sensed, ax[2], text_results[2], colors_sensed, debug_df_sensed)\n", " set_title_and_save(fig, text_results, plot_title_no_quality, file_name) \n", "except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n", " plt.clf()\n", @@ -481,9 +481,9 @@ " plot_and_text_stacked_bar_chart(labeled_land_trips_df, lambda df: df.groupby(\"Mode_confirm\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n", " \"Labeled by user\\n\"+labeled_land_quality_text, ax[0], text_results[0], colors_mode, debug_df)\n", " plot_and_text_stacked_bar_chart(inferred_land_trips_df, lambda df: df.groupby(\"Mode_confirm\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n", - " \"Inferred by OpenPATH\\n\"+inferred_land_quality_text, ax[2], text_results[2], colors_mode, debug_df_inferred)\n", + " \"Inferred by OpenPATH\\n\"+inferred_land_quality_text, ax[1], text_results[1], colors_mode, debug_df_inferred)\n", " plot_and_text_stacked_bar_chart(sensed_land_trips_df, lambda df: df.groupby(\"primary_mode\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n", - " \"Sensed by OpenPATH\\n\"+sensed_land_quality_text, ax[1], text_results[1], colors_sensed, debug_df_sensed)\n", + " \"Sensed by OpenPATH\\n\"+sensed_land_quality_text, ax[2], text_results[2], colors_sensed, debug_df_sensed)\n", " set_title_and_save(fig, text_results, plot_title_no_quality, file_name) \n", "except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n", " plt.clf()\n", From 260e8ad11909b44bf52ab5fd5369a1452ee37e91 Mon Sep 17 00:00:00 2001 From: iantei Date: Mon, 16 Sep 2024 22:22:22 -0700 Subject: [PATCH 13/22] Update expand_inferredlabels(). Iterate over the inferred_ct to see if there is user_input or not. If there is user_input, chose it over inferred_labels. --- viz_scripts/scaffolding.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/viz_scripts/scaffolding.py b/viz_scripts/scaffolding.py index aa2735c..11689ad 100644 --- a/viz_scripts/scaffolding.py +++ b/viz_scripts/scaffolding.py @@ -82,6 +82,7 @@ def filter_inferred_trips(mixed_trip_df): # CASE 1 of https://github.com/e-mission/em-public-dashboard/issues/69#issuecomment-1256835867 if len(mixed_trip_df) == 0: return mixed_trip_df + # Identify which trips contains inferred_labels inferred_ct = mixed_trip_df[mixed_trip_df['inferred_labels'].apply(lambda x: bool(x))] print("After filtering, found %s inferred trips" % len(inferred_ct)) disp.display(inferred_ct.head()) @@ -119,18 +120,18 @@ def expand_inferredlabels(inferred_ct): return inferred_ct max_labels_list = [] - max_p_list = [] - - for item in inferred_ct.inferred_labels: - max_entry = max(item, key=lambda x: x['p']) - max_labels_list.append(max_entry['labels']) - max_p_list.append(max_entry['p']) + for _, row in inferred_ct.iterrows(): + # In the trip, prioritize availabilty of user_input over inferred_labels for label selection + if row.user_input == {}: + # Extract the label which has highest "p" value + max_entry = max(row.inferred_labels, key=lambda x: x['p']) + max_labels_list.append(max_entry['labels']) + else: + max_labels_list.append(row.user_input) inferred_only_labels = pd.DataFrame(max_labels_list, index=inferred_ct.index) - disp.display(inferred_only_labels) - inferred_only_p = pd.DataFrame(max_p_list, index=inferred_ct.index, columns=['p']) - disp.display(inferred_only_p) - expanded_inferred_ct = pd.concat([inferred_ct, inferred_only_labels, inferred_only_p], axis=1) + disp.display(inferred_only_labels.head()) + expanded_inferred_ct = pd.concat([inferred_ct, inferred_only_labels], axis=1) expanded_inferred_ct.reset_index(drop=True, inplace=True) disp.display(expanded_inferred_ct.head()) return expanded_inferred_ct From 3c33e7585321809bf7de8640cf7be9cae6dbbbec Mon Sep 17 00:00:00 2001 From: iantei Date: Mon, 16 Sep 2024 23:00:06 -0700 Subject: [PATCH 14/22] Filter for inferred trip bar - it should have either user_input or inferred_labels --- viz_scripts/scaffolding.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/viz_scripts/scaffolding.py b/viz_scripts/scaffolding.py index 11689ad..51d29bc 100644 --- a/viz_scripts/scaffolding.py +++ b/viz_scripts/scaffolding.py @@ -82,8 +82,8 @@ def filter_inferred_trips(mixed_trip_df): # CASE 1 of https://github.com/e-mission/em-public-dashboard/issues/69#issuecomment-1256835867 if len(mixed_trip_df) == 0: return mixed_trip_df - # Identify which trips contains inferred_labels - inferred_ct = mixed_trip_df[mixed_trip_df['inferred_labels'].apply(lambda x: bool(x))] + # Identify trips which has either inferred_labels or has user_input + inferred_ct = mixed_trip_df[(mixed_trip_df['inferred_labels'].apply(lambda x: bool(x))) | (mixed_trip_df.user_input != {})] print("After filtering, found %s inferred trips" % len(inferred_ct)) disp.display(inferred_ct.head()) return inferred_ct From 72fcb205c6e4946e227a9cd264952774013096e4 Mon Sep 17 00:00:00 2001 From: iantei Date: Wed, 18 Sep 2024 10:12:10 -0700 Subject: [PATCH 15/22] Use confidence_threshold to filter labels from inferred_labels. --- viz_scripts/scaffolding.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/viz_scripts/scaffolding.py b/viz_scripts/scaffolding.py index 51d29bc..ec1e423 100644 --- a/viz_scripts/scaffolding.py +++ b/viz_scripts/scaffolding.py @@ -125,7 +125,10 @@ def expand_inferredlabels(inferred_ct): if row.user_input == {}: # Extract the label which has highest "p" value max_entry = max(row.inferred_labels, key=lambda x: x['p']) - max_labels_list.append(max_entry['labels']) + if (max_entry['p'] > row.confidence_threshold): + max_labels_list.append(max_entry['labels']) + else: + max_labels_list.append({}) else: max_labels_list.append(row.user_input) From 3d718b5cadd15ac2c37d71db71161a415e856844 Mon Sep 17 00:00:00 2001 From: iantei Date: Wed, 18 Sep 2024 10:36:49 -0700 Subject: [PATCH 16/22] Update bar_label for inferred bars from Inferred by OpenPATH ... to Labeled and Inferred by OpenPATH ... --- viz_scripts/generic_metrics.ipynb | 12 ++++++------ viz_scripts/mode_specific_metrics.ipynb | 6 +++--- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/viz_scripts/generic_metrics.ipynb b/viz_scripts/generic_metrics.ipynb index 198321e..3e3dbcd 100644 --- a/viz_scripts/generic_metrics.ipynb +++ b/viz_scripts/generic_metrics.ipynb @@ -240,7 +240,7 @@ " plot_and_text_stacked_bar_chart(expanded_ct, lambda df: (df.groupby(\"Mode_confirm\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False)), \n", " \"Labeled by user\\n\"+stacked_bar_quality_text_labeled, ax[0], text_results[0], colors_mode, debug_df)\n", " plot_and_text_stacked_bar_chart(expanded_ct_inferred, lambda df: (df.groupby(\"Mode_confirm\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False)), \n", - " \"Inferred by OpenPATH\\n\"+stacked_bar_quality_text_inferred, ax[1], text_results[1], colors_mode, debug_df_inferred)\n", + " \"Labeled and Inferred by OpenPATH\\n\"+stacked_bar_quality_text_inferred, ax[1], text_results[1], colors_mode, debug_df_inferred)\n", " plot_and_text_stacked_bar_chart(expanded_ct_sensed, lambda df: (df.groupby(\"primary_mode\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False)), \n", " \"Sensed by OpenPATH\\n\"+stacked_bar_quality_text_sensed, ax[2], text_results[2], colors_sensed, debug_df_sensed)\n", " set_title_and_save(fig, text_results, plot_title_no_quality, file_name)\n", @@ -300,7 +300,7 @@ " plot_and_text_stacked_bar_chart(expanded_ct_commute, lambda df: df.groupby(\"Mode_confirm\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n", " \"Labeled by user\\n\"+stacked_bar_quality_text_commute_labeled, ax[0], text_results[0], colors_mode, debug_df)\n", " plot_and_text_stacked_bar_chart(expanded_ct_inferred_commute, lambda df: df.groupby(\"Mode_confirm\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n", - " \"Inferred by OpenPATH\\n\"+stacked_bar_quality_text_commute_inferred, ax[1], text_results[1], colors_mode, debug_df_inferred)\n", + " \"Labeled and Inferred by OpenPATH\\n\"+stacked_bar_quality_text_commute_inferred, ax[1], text_results[1], colors_mode, debug_df_inferred)\n", " set_title_and_save(fig, text_results, plot_title, file_name)\n", "except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n", " plt.clf()\n", @@ -335,7 +335,7 @@ " plot_and_text_stacked_bar_chart(expanded_ct, lambda df: df.groupby(\"Trip_purpose\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n", " \"Labeled by user\\n\"+stacked_bar_quality_text_labeled, ax[0], text_results[0], colors_purpose, debug_df)\n", " plot_and_text_stacked_bar_chart(expanded_ct_inferred, lambda df: df.groupby(\"Trip_purpose\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n", - " \"Inferred by OpenPATH\\n\"+stacked_bar_quality_text_inferred, ax[1], text_results[1], colors_purpose, debug_df_inferred)\n", + " \"Labeled and Inferred by OpenPATH\\n\"+stacked_bar_quality_text_inferred, ax[1], text_results[1], colors_purpose, debug_df_inferred)\n", " set_title_and_save(fig, text_results, plot_title_no_quality, file_name)\n", "except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n", " plt.clf()\n", @@ -392,7 +392,7 @@ " plot_and_text_stacked_bar_chart(expanded_ct_u80, lambda df: df.groupby(\"Mode_confirm\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n", " \"Labeled by user\\n\"+labeled_u80_quality_text, ax[0], text_results[0], colors_mode, debug_df)\n", " plot_and_text_stacked_bar_chart(expanded_ct_inferred, lambda df: df.groupby(\"Mode_confirm\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n", - " \"Inferred by OpenPATH\\n\"+inferred_u80_quality_text, ax[1], text_results[1], colors_mode, debug_df_inferred)\n", + " \"Labeled and Inferred by OpenPATH\\n\"+inferred_u80_quality_text, ax[1], text_results[1], colors_mode, debug_df_inferred)\n", " plot_and_text_stacked_bar_chart(expanded_ct_sensed_u80, lambda df: df.groupby(\"primary_mode\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n", " \"Sensed by OpenPATH\\n\"+sensed_u80_quality_text, ax[2], text_results[2], colors_sensed, debug_df_sensed)\n", " set_title_and_save(fig, text_results, plot_title_no_quality, file_name)\n", @@ -434,7 +434,7 @@ " plot_and_text_stacked_bar_chart(expanded_ct, lambda df: df.groupby(\"Mode_confirm\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n", " \"Labeled by user\\n\"+stacked_bar_quality_text_labeled, ax[0], text_results[0], colors_mode, debug_df)\n", " plot_and_text_stacked_bar_chart(expanded_ct_inferred, lambda df: df.groupby(\"Mode_confirm\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n", - " \"Inferred by OpenPATH\\n\"+stacked_bar_quality_text_inferred, ax[1], text_results[1], colors_mode, debug_df_inferred)\n", + " \"Labeled and Inferred by OpenPATH\\n\"+stacked_bar_quality_text_inferred, ax[1], text_results[1], colors_mode, debug_df_inferred)\n", " plot_and_text_stacked_bar_chart(expanded_ct_sensed, lambda df: df.groupby(\"primary_mode\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n", " \"Sensed by OpenPATH\\n\"+stacked_bar_quality_text_sensed, ax[2], text_results[2], colors_sensed, debug_df_sensed)\n", " set_title_and_save(fig, text_results, plot_title_no_quality, file_name) \n", @@ -481,7 +481,7 @@ " plot_and_text_stacked_bar_chart(labeled_land_trips_df, lambda df: df.groupby(\"Mode_confirm\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n", " \"Labeled by user\\n\"+labeled_land_quality_text, ax[0], text_results[0], colors_mode, debug_df)\n", " plot_and_text_stacked_bar_chart(inferred_land_trips_df, lambda df: df.groupby(\"Mode_confirm\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n", - " \"Inferred by OpenPATH\\n\"+inferred_land_quality_text, ax[1], text_results[1], colors_mode, debug_df_inferred)\n", + " \"Labeled and Inferred by OpenPATH\\n\"+inferred_land_quality_text, ax[1], text_results[1], colors_mode, debug_df_inferred)\n", " plot_and_text_stacked_bar_chart(sensed_land_trips_df, lambda df: df.groupby(\"primary_mode\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n", " \"Sensed by OpenPATH\\n\"+sensed_land_quality_text, ax[2], text_results[2], colors_sensed, debug_df_sensed)\n", " set_title_and_save(fig, text_results, plot_title_no_quality, file_name) \n", diff --git a/viz_scripts/mode_specific_metrics.ipynb b/viz_scripts/mode_specific_metrics.ipynb index ce535a1..101e8c9 100644 --- a/viz_scripts/mode_specific_metrics.ipynb +++ b/viz_scripts/mode_specific_metrics.ipynb @@ -275,7 +275,7 @@ " plot_and_text_stacked_bar_chart(data_eb, lambda df: df.groupby(\"Trip_purpose\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False),\n", " f\"Labeled `{mode_of_interest}` by user\\n\"+stacked_bar_quality_text, ax[0], text_results[0], colors_purpose, debug_df)\n", " plot_and_text_stacked_bar_chart(data_eb_inferred, lambda df: df.groupby(\"Trip_purpose\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False),\n", - " f\"Inferred `{mode_of_interest}` by OpenPATH\\n\"+stacked_bar_quality_text_inferred, ax[1], text_results[1], colors_purpose, debug_df_inferred)\n", + " f\"Labeled and Inferred `{mode_of_interest}` by OpenPATH\\n\"+stacked_bar_quality_text_inferred, ax[1], text_results[1], colors_purpose, debug_df_inferred)\n", " plot_title = plot_title_no_quality\n", " set_title_and_save(fig, text_results, plot_title, file_name)\n", "except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n", @@ -312,7 +312,7 @@ " plot_and_text_stacked_bar_chart(data_eb, lambda df: df.groupby(\"Replaced_mode\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n", " \"Labeled by user\\n (Trip distance)\\n\"+stacked_bar_quality_text, ax[0], text_results[0], colors_mode, debug_df)\n", " plot_and_text_stacked_bar_chart(data_eb_inferred, lambda df: df.groupby(\"Replaced_mode\").agg({distance_col: 'sum'}).sort_values(by=distance_col, ascending=False), \n", - " \"Inferred by OpenPATH\\n (Trip distance)\\n\"+stacked_bar_quality_text_inferred, ax[1], text_results[1], colors_mode, debug_df_inferred)\n", + " \"Labeled and Inferred by OpenPATH\\n (Trip distance)\\n\"+stacked_bar_quality_text_inferred, ax[1], text_results[1], colors_mode, debug_df_inferred)\n", " plot_title = plot_title_no_quality\n", " set_title_and_save(fig, text_results, plot_title, file_name)\n", "except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n", @@ -349,7 +349,7 @@ " plot_and_text_stacked_bar_chart(data_eb, lambda df: df.groupby(\"Replaced_mode\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n", " f\"Labeled `{mode_of_interest}` by user\\n\"+stacked_bar_quality_text, ax[0], text_results[0], colors_mode, debug_df)\n", " plot_and_text_stacked_bar_chart(data_eb_inferred, lambda df: df.groupby(\"Replaced_mode\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False), \n", - " f\"Inferred `{mode_of_interest}` by OpenPATH\\n\"+stacked_bar_quality_text_inferred, ax[1], text_results[1], colors_mode, debug_df_inferred)\n", + " f\"Labeled and Inferred `{mode_of_interest}` by OpenPATH\\n\"+stacked_bar_quality_text_inferred, ax[1], text_results[1], colors_mode, debug_df_inferred)\n", " plot_title = plot_title_no_quality\n", " set_title_and_save(fig, text_results, plot_title, file_name)\n", "except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n", From 83e259bd2bcb8c5dbe19e58d5e657c453866c9ce Mon Sep 17 00:00:00 2001 From: iantei Date: Fri, 20 Sep 2024 23:00:03 -0700 Subject: [PATCH 17/22] In case there is no user_input, and confidence_threshold is not met, append the labels_list with dict - uncertain for all labels. Later filter it out from the dataframe. --- viz_scripts/scaffolding.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/viz_scripts/scaffolding.py b/viz_scripts/scaffolding.py index 03c3f8e..a6c7333 100644 --- a/viz_scripts/scaffolding.py +++ b/viz_scripts/scaffolding.py @@ -128,7 +128,7 @@ def expand_inferredlabels(inferred_ct): if (max_entry['p'] > row.confidence_threshold): max_labels_list.append(max_entry['labels']) else: - max_labels_list.append({}) + max_labels_list.append({'mode_confirm':'uncertain', 'purpose_confirm':'uncertain', 'replaced_mode':'uncertain'}) else: max_labels_list.append(row.user_input) @@ -136,6 +136,8 @@ def expand_inferredlabels(inferred_ct): disp.display(inferred_only_labels.head()) expanded_inferred_ct = pd.concat([inferred_ct, inferred_only_labels], axis=1) expanded_inferred_ct.reset_index(drop=True, inplace=True) + # Filter out the dataframe in which mode_confirm, purpose_confirm and replaced_mode is uncertain + expanded_inferred_ct = expanded_inferred_ct[(expanded_inferred_ct['mode_confirm'] != 'uncertain') & (expanded_inferred_ct['purpose_confirm'] != 'uncertain') & (expanded_inferred_ct['replaced_mode'] != 'uncertain')] disp.display(expanded_inferred_ct.head()) return expanded_inferred_ct From 9905ca7e9c7aaac92d19c9ed368630a2b0beaa65 Mon Sep 17 00:00:00 2001 From: iantei Date: Fri, 20 Sep 2024 23:42:35 -0700 Subject: [PATCH 18/22] Replace use of iterrow over panda dataframe with df.apply() method. Remove reset_index on expanded_inferred_ct dataframe. --- viz_scripts/scaffolding.py | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/viz_scripts/scaffolding.py b/viz_scripts/scaffolding.py index a6c7333..8917e7d 100644 --- a/viz_scripts/scaffolding.py +++ b/viz_scripts/scaffolding.py @@ -119,23 +119,19 @@ def expand_inferredlabels(inferred_ct): if len(inferred_ct) == 0: return inferred_ct - max_labels_list = [] - for _, row in inferred_ct.iterrows(): - # In the trip, prioritize availabilty of user_input over inferred_labels for label selection - if row.user_input == {}: - # Extract the label which has highest "p" value - max_entry = max(row.inferred_labels, key=lambda x: x['p']) - if (max_entry['p'] > row.confidence_threshold): - max_labels_list.append(max_entry['labels']) - else: - max_labels_list.append({'mode_confirm':'uncertain', 'purpose_confirm':'uncertain', 'replaced_mode':'uncertain'}) - else: - max_labels_list.append(row.user_input) - - inferred_only_labels = pd.DataFrame(max_labels_list, index=inferred_ct.index) + def _select_max_label(row): + if row['user_input']: + return row['user_input'] + max_entry = max(row['inferred_labels'], key=lambda x: x['p']) + return max_entry['labels'] if max_entry['p'] > row['confidence_threshold'] else { + 'mode_confirm': 'uncertain', + 'purpose_confirm': 'uncertain', + 'replaced_mode': 'uncertain' + } + + inferred_only_labels = inferred_ct.apply(_select_max_label, axis=1).apply(pd.Series) disp.display(inferred_only_labels.head()) expanded_inferred_ct = pd.concat([inferred_ct, inferred_only_labels], axis=1) - expanded_inferred_ct.reset_index(drop=True, inplace=True) # Filter out the dataframe in which mode_confirm, purpose_confirm and replaced_mode is uncertain expanded_inferred_ct = expanded_inferred_ct[(expanded_inferred_ct['mode_confirm'] != 'uncertain') & (expanded_inferred_ct['purpose_confirm'] != 'uncertain') & (expanded_inferred_ct['replaced_mode'] != 'uncertain')] disp.display(expanded_inferred_ct.head()) From a983a5b19b283728b8f4e32ef083cd5429f0f0b0 Mon Sep 17 00:00:00 2001 From: iantei Date: Fri, 20 Sep 2024 23:50:59 -0700 Subject: [PATCH 19/22] Update expand_inferredlabels() to expand_labeled_inferredlabels(), and update the variable names to add prefix of labeled. We display both labeled and inferred labels altogether for inferred bars in stacked bar charts. --- viz_scripts/scaffolding.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/viz_scripts/scaffolding.py b/viz_scripts/scaffolding.py index 8917e7d..96cf19e 100644 --- a/viz_scripts/scaffolding.py +++ b/viz_scripts/scaffolding.py @@ -115,9 +115,9 @@ def expand_userinputs(labeled_ct): disp.display(expanded_ct.head()) return expanded_ct -def expand_inferredlabels(inferred_ct): - if len(inferred_ct) == 0: - return inferred_ct +def expand_inferredlabels(labeled_inferred_ct): + if len(labeled_inferred_ct) == 0: + return labeled_inferred_ct def _select_max_label(row): if row['user_input']: @@ -129,13 +129,13 @@ def _select_max_label(row): 'replaced_mode': 'uncertain' } - inferred_only_labels = inferred_ct.apply(_select_max_label, axis=1).apply(pd.Series) - disp.display(inferred_only_labels.head()) - expanded_inferred_ct = pd.concat([inferred_ct, inferred_only_labels], axis=1) + labeled_inferred_labels = labeled_inferred_ct.apply(_select_max_label, axis=1).apply(pd.Series) + disp.display(labeled_inferred_labels.head()) + expanded_labeled_inferred_ct = pd.concat([labeled_inferred_ct, labeled_inferred_labels], axis=1) # Filter out the dataframe in which mode_confirm, purpose_confirm and replaced_mode is uncertain - expanded_inferred_ct = expanded_inferred_ct[(expanded_inferred_ct['mode_confirm'] != 'uncertain') & (expanded_inferred_ct['purpose_confirm'] != 'uncertain') & (expanded_inferred_ct['replaced_mode'] != 'uncertain')] - disp.display(expanded_inferred_ct.head()) - return expanded_inferred_ct + expanded_labeled_inferred_ct = expanded_labeled_inferred_ct[(expanded_labeled_inferred_ct['mode_confirm'] != 'uncertain') & (expanded_labeled_inferred_ct['purpose_confirm'] != 'uncertain') & (expanded_labeled_inferred_ct['replaced_mode'] != 'uncertain')] + disp.display(expanded_labeled_inferred_ct.head()) + return expanded_labeled_inferred_ct # CASE 2 of https://github.com/e-mission/em-public-dashboard/issues/69#issuecomment-1256835867 unique_users = lambda df: len(df.user_id.unique()) if "user_id" in df.columns else 0 From acf5da6786425b1d6c9b6919ba846909ab8a3443 Mon Sep 17 00:00:00 2001 From: iantei Date: Sat, 21 Sep 2024 08:29:53 -0700 Subject: [PATCH 20/22] Fix merge with main - Introduce read_json_resource function. Introduce *_w_other cols for the dataframe. --- viz_scripts/scaffolding.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/viz_scripts/scaffolding.py b/viz_scripts/scaffolding.py index 650b41f..1e2bae8 100644 --- a/viz_scripts/scaffolding.py +++ b/viz_scripts/scaffolding.py @@ -183,6 +183,13 @@ def map_trip_data(expanded_trip_df, study_type, dynamic_labels, dic_re, dic_pur) if "distance" in expanded_trip_df.columns: unit_conversions(expanded_trip_df) + # Select the labels from dynamic_labels is available, + # else get it from emcommon/resources/label-options.default.json + if (len(dynamic_labels)): + labels = dynamic_labels + else: + labels = await emcu.read_json_resource("label-options.default.json") + # Map new mode labels with translations dictionary from dynamic_labels # CASE 2 of https://github.com/e-mission/em-public-dashboard/issues/69#issuecomment-1256835867 if "mode_confirm" in expanded_trip_df.columns: @@ -191,6 +198,9 @@ def map_trip_data(expanded_trip_df, study_type, dynamic_labels, dic_re, dic_pur) expanded_trip_df['Mode_confirm'] = expanded_trip_df['mode_confirm'].map(dic_mode_mapping) else: expanded_trip_df['Mode_confirm'] = expanded_trip_df['mode_confirm'].map(dic_re) + # If the 'mode_confirm' is not available as the list of keys in the dynamic_labels or label_options.default.json, then, we should transform it as 'other' + mode_values = [item['value'] for item in labels['MODE']] + expanded_trip_df['mode_confirm_w_other'] = expanded_trip_df['mode_confirm'].apply(lambda mode: 'other' if mode not in mode_values else mode) if study_type == 'program': # CASE 2 of https://github.com/e-mission/em-public-dashboard/issues/69#issuecomment-1256835867 if 'replaced_mode' in expanded_trip_df.columns: @@ -199,6 +209,8 @@ def map_trip_data(expanded_trip_df, study_type, dynamic_labels, dic_re, dic_pur) expanded_trip_df['Replaced_mode'] = expanded_trip_df['replaced_mode'].map(dic_replaced_mapping) else: expanded_trip_df['Replaced_mode'] = expanded_trip_df['replaced_mode'].map(dic_re) + replaced_modes = [item['value'] for item in labels['REPLACED_MODE']] + expanded_trip_df['replaced_mode_w_other'] = expanded_trip_df['replaced_mode'].apply(lambda mode: 'other' if mode not in replaced_modes else mode) else: print("This is a program, but no replaced modes found. Likely cold start case. Ignoring replaced mode mapping") else: @@ -212,6 +224,9 @@ def map_trip_data(expanded_trip_df, study_type, dynamic_labels, dic_re, dic_pur) expanded_trip_df['Trip_purpose'] = expanded_trip_df['purpose_confirm'].map(dic_purpose_mapping) else: expanded_trip_df['Trip_purpose'] = expanded_trip_df['purpose_confirm'].map(dic_pur) + purpose_values = [item['value'] for item in labels['PURPOSE']] + expanded_trip_df['purpose_confirm_w_other'] = expanded_trip_df['purpose_confirm'].apply(lambda value: 'other' if value not in purpose_values else value) + return expanded_trip_df def load_viz_notebook_inferred_data(year, month, program, study_type, dynamic_labels, dic_re, dic_pur=None, include_test_users=False): From 4d8c403d1c121cba4c0751a05bccbb59f7921151 Mon Sep 17 00:00:00 2001 From: iantei Date: Sat, 21 Sep 2024 08:37:38 -0700 Subject: [PATCH 21/22] Update load_viz_notebook_inferred_data() to be async, and call it as await from notebook. Update the map_trip_data() to be async, and call to it as await in scaffolding.py --- viz_scripts/generic_metrics.ipynb | 2 +- viz_scripts/mode_specific_metrics.ipynb | 2 +- viz_scripts/scaffolding.py | 8 ++++---- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/viz_scripts/generic_metrics.ipynb b/viz_scripts/generic_metrics.ipynb index 32e81bf..656601c 100644 --- a/viz_scripts/generic_metrics.ipynb +++ b/viz_scripts/generic_metrics.ipynb @@ -156,7 +156,7 @@ "metadata": {}, "outputs": [], "source": [ - "expanded_ct_inferred, file_suffix_inferred, quality_text_inferred, debug_df_inferred = scaffolding.load_viz_notebook_inferred_data(year,\n", + "expanded_ct_inferred, file_suffix_inferred, quality_text_inferred, debug_df_inferred = await scaffolding.load_viz_notebook_inferred_data(year,\n", " month,\n", " program,\n", " study_type,\n", diff --git a/viz_scripts/mode_specific_metrics.ipynb b/viz_scripts/mode_specific_metrics.ipynb index be9e8e7..0913650 100644 --- a/viz_scripts/mode_specific_metrics.ipynb +++ b/viz_scripts/mode_specific_metrics.ipynb @@ -147,7 +147,7 @@ "metadata": {}, "outputs": [], "source": [ - "expanded_ct_inferred, file_suffix_inferred, quality_text_inferred, debug_df_inferred = scaffolding.load_viz_notebook_inferred_data(year,\n", + "expanded_ct_inferred, file_suffix_inferred, quality_text_inferred, debug_df_inferred = await scaffolding.load_viz_notebook_inferred_data(year,\n", " month,\n", " program,\n", " study_type,\n", diff --git a/viz_scripts/scaffolding.py b/viz_scripts/scaffolding.py index 1e2bae8..880c819 100644 --- a/viz_scripts/scaffolding.py +++ b/viz_scripts/scaffolding.py @@ -157,7 +157,7 @@ async def load_viz_notebook_data(year, month, program, study_type, dynamic_label labeled_ct = filter_labeled_trips(participant_ct_df) expanded_ct = expand_userinputs(labeled_ct) expanded_ct = data_quality_check(expanded_ct) - expanded_ct = map_trip_data(expanded_ct, study_type, dynamic_labels, dic_re, dic_pur) + expanded_ct = await map_trip_data(expanded_ct, study_type, dynamic_labels, dic_re, dic_pur) # Document data quality file_suffix = get_file_suffix(year, month, program) @@ -177,7 +177,7 @@ async def load_viz_notebook_data(year, month, program, study_type, dynamic_label return expanded_ct, file_suffix, quality_text, debug_df -def map_trip_data(expanded_trip_df, study_type, dynamic_labels, dic_re, dic_pur): +async def map_trip_data(expanded_trip_df, study_type, dynamic_labels, dic_re, dic_pur): # Change meters to miles # CASE 2 of https://github.com/e-mission/em-public-dashboard/issues/69#issuecomment-1256835867 if "distance" in expanded_trip_df.columns: @@ -229,7 +229,7 @@ def map_trip_data(expanded_trip_df, study_type, dynamic_labels, dic_re, dic_pur) return expanded_trip_df -def load_viz_notebook_inferred_data(year, month, program, study_type, dynamic_labels, dic_re, dic_pur=None, include_test_users=False): +async def load_viz_notebook_inferred_data(year, month, program, study_type, dynamic_labels, dic_re, dic_pur=None, include_test_users=False): """ Inputs: year/month/program/study_type = parameters from the visualization notebook dic_* = label mappings; if dic_pur is included it will be used to recode trip purpose @@ -241,7 +241,7 @@ def load_viz_notebook_inferred_data(year, month, program, study_type, dynamic_la participant_ct_df = load_all_participant_trips(program, tq, include_test_users) inferred_ct = filter_inferred_trips(participant_ct_df) expanded_it = expand_inferredlabels(inferred_ct) - expanded_it = map_trip_data(expanded_it, study_type, dynamic_labels, dic_re, dic_pur) + expanded_it = await map_trip_data(expanded_it, study_type, dynamic_labels, dic_re, dic_pur) # Document data quality file_suffix = get_file_suffix(year, month, program) From 1a5abfbb0a5448e324efc55254340074ce419a5b Mon Sep 17 00:00:00 2001 From: iantei Date: Sat, 21 Sep 2024 08:38:44 -0700 Subject: [PATCH 22/22] Fix type : debug_df_inferred from debug_df_inferre --- viz_scripts/mode_specific_metrics.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/viz_scripts/mode_specific_metrics.ipynb b/viz_scripts/mode_specific_metrics.ipynb index 0913650..a1d362e 100644 --- a/viz_scripts/mode_specific_metrics.ipynb +++ b/viz_scripts/mode_specific_metrics.ipynb @@ -276,7 +276,7 @@ " plot_and_text_stacked_bar_chart(data_eb, lambda df: df.groupby(\"purpose_confirm_w_other\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False),\n", " f\"Labeled `{mode_of_interest}` by user\\n\"+stacked_bar_quality_text, ax[0], text_results[0], colors_purpose, debug_df, value_to_translations_purpose)\n", " plot_and_text_stacked_bar_chart(data_eb_inferred, lambda df: df.groupby(\"purpose_confirm_w_other\").agg({distance_col: 'count'}).sort_values(by=distance_col, ascending=False),\n", - " f\"Labeled and Inferred `{mode_of_interest}` by OpenPATH\\n\"+stacked_bar_quality_text_inferred, ax[1], text_results[1], colors_purpose, debug_df_inferre,value_to_translations_purpose)\n", + " f\"Labeled and Inferred `{mode_of_interest}` by OpenPATH\\n\"+stacked_bar_quality_text_inferred, ax[1], text_results[1], colors_purpose, debug_df_inferred, value_to_translations_purpose)\n", " plot_title = plot_title_no_quality + \"\\n\" + f\"For {mode_of_interest}: \" + quality_text\n", " set_title_and_save(fig, text_results, plot_title, file_name)\n", "except (AttributeError, KeyError, pd.errors.UndefinedVariableError) as e:\n",