updateed notebooks; added few features (axes cosmetics) to analysis p…

…lots
visionjo · Feb 7, 2020 · bd3475d · bd3475d
1 parent f4c1ce7
commit bd3475d
Show file tree

Hide file tree

Showing 2 changed files with 17 additions and 32 deletions.
diff --git a/code/experiments/find_best_thresholds.py b/code/experiments/find_best_thresholds.py
@@ -5,7 +5,6 @@
 add_package_path()
 
 dir_data = "../../data/bfw-data/"
-dir_features = f"{dir_data}features/senet50/"
 f_datatable = f"{dir_data}bfw-v0.1.5-datatable.pkl"
 
 data = pd.read_pickle(f_datatable)

diff --git a/code/facebias/analysis.py b/code/facebias/analysis.py
@@ -145,8 +145,7 @@ def overlapped_score_distribution(data, log_scale=False, save_figure_path=None):
  color_legend = plt.legend(fontsize=fontsize)
  solid_line = Line2D([0], [0], color="black", linestyle="-")
  dash_line = Line2D([0], [0], color="black", linestyle="--")
- plt.legend([solid_line, dash_line], ["intra", "inter"], fontsize=fontsize,
- loc=2)
+ plt.legend([solid_line, dash_line], ["intra", "inter"], fontsize=fontsize, loc=2)
  plt.gca().add_artist(color_legend)
 
  # handle log scale
@@ -231,8 +230,7 @@ def overlapped_score_distribution(data, log_scale=False, save_figure_path=None):
  color_legend = plt.legend(fontsize=fontsize)
  solid_line = Line2D([0], [0], color="black", linestyle="-")
  dash_line = Line2D([0], [0], color="black", linestyle="--")
- plt.legend([solid_line, dash_line], ["intra", "inter"], fontsize=fontsize,
- loc=2)
+ plt.legend([solid_line, dash_line], ["intra", "inter"], fontsize=fontsize, loc=2)
  plt.gca().add_artist(color_legend)
 
  # handle log scale
@@ -377,13 +375,10 @@ def confusion_matrix(im_paths, dir_embeddings, save_figure_path=None):
  """
  data = pd.read_csv(im_paths)
  image_list = data["path"].to_list()
- feature = load_features_from_image_list(
- image_list, dir_embeddings, ext_feat="npy"
- )
+ feature = load_features_from_image_list(image_list, dir_embeddings, ext_feat="npy")
  data = get_attribute_gender_ethnicity(data, "path")
  data["id"] = (
- data["path"].apply(lambda x: "/".join(x.split("/")[:-1])).astype(
- "category")
+ data["path"].apply(lambda x: "/".join(x.split("/")[:-1])).astype("category")
  )
  score_matrix = cosine_similarity(
  data["path"].apply(lambda x: feature[x][0]).to_list()
@@ -402,21 +397,20 @@ def confusion_matrix(im_paths, dir_embeddings, save_figure_path=None):
  confusion_npy[np.isnan(confusion_npy)] = 0
  confusion_npy = confusion_npy.reshape((8, -1))
  all_subgroup = data["a"].unique()
- confusion_df = pd.DataFrame(confusion_npy, index=all_subgroup,
- columns=all_subgroup)
+ confusion_df = pd.DataFrame(confusion_npy, index=all_subgroup, columns=all_subgroup)
 
  n_samples_per_subgroup = data["a"].count() / len(all_subgroup)
  confusion_percent_error_df = (confusion_df / n_samples_per_subgroup) * 100
  plot_confusion_matrix(confusion_percent_error_df, save_figure_path)
 
 
 def create_bias_analysis_plots(
-  im_pair_paths,
-  im_paths,
-  dir_embeddings,
-  data=None,
-  save_data=None,
-  dir_output="results",
+ im_pair_paths,
+ im_paths,
+ dir_embeddings,
+ data=None,
+ save_data=None,
+ dir_output="results",
 ):
  """
  Using image pairs from 'image_pair_path', plot the following three plots.
@@ -467,9 +461,7 @@ def create_bias_analysis_plots(
  im_pair_paths, dir_embeddings
  )
  if save_data is not None:
- Path(os.path.dirname(save_data)).mkdir(
- parents=True, exist_ok=True
- )
+ Path(os.path.dirname(save_data)).mkdir(parents=True, exist_ok=True)
  with open(save_data, "wb") as f:
  pk.dump(data_pair_df, f)
 
@@ -489,8 +481,7 @@ def create_bias_analysis_plots(
  data_pair_df, log_scale=False, save_figure_path=over_dist_path
  )
 
- log_over_dist_path = join(dir_output,
- "overlapped_log_scale_score_dist.png")
+ log_over_dist_path = join(dir_output, "overlapped_log_scale_score_dist.png")
  print(
  f"producing overlapped score distribution plot on log scale. "
  f"result will be saved to {log_over_dist_path}"
@@ -517,8 +508,7 @@ def create_bias_analysis_plots(
  f"{det_gender_path}"
  )
  det_plot(
- data_pair_df, "g1", "DET Curve Per Gender",
- save_figure_path=det_gender_path
+ data_pair_df, "g1", "DET Curve Per Gender", save_figure_path=det_gender_path
  )
 
  det_ethnicity_path = join(dir_output, "det_ethnicity.png")
@@ -538,14 +528,10 @@ def create_bias_analysis_plots(
  f"producing confusion matrix plot. result will be saved to "
  f"{confusion_matrix_path}"
  )
- confusion_matrix(
- im_paths, dir_embeddings,
- save_figure_path=confusion_matrix_path
- )
+ confusion_matrix(im_paths, dir_embeddings, save_figure_path=confusion_matrix_path)
 
 
-def clean_image_pair_and_image_list_csv(im_pair_paths, im_paths,
- dir_embeddings):
+def clean_image_pair_and_image_list_csv(im_pair_paths, im_paths, dir_embeddings):
  """
  Clean image pair csv and image list csv by deleting the rows that contain a
  path to an image whose embedding does not exist in embedding_dir_path
@@ -571,7 +557,7 @@ def clean_image_pair_and_image_list_csv(im_pair_paths, im_paths,
  old_nrow = image_pair.shape[0]
  image_pair = image_pair[
  image_pair["p1"].map(check_exist) & image_pair["p2"].map(check_exist)
-  ]
+ ]
  new_nrow = image_pair.shape[0]
  print(
  f"For image pair csv, {old_nrow - new_nrow} rows out of {old_nrow} rows"