Skip to content

Commit

Permalink
updateed notebooks; added few features (axes cosmetics) to analysis p…
Browse files Browse the repository at this point in the history
…lots
  • Loading branch information
visionjo committed Feb 7, 2020
1 parent f4c1ce7 commit bd3475d
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 32 deletions.
1 change: 0 additions & 1 deletion code/experiments/find_best_thresholds.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
add_package_path()

dir_data = "../../data/bfw-data/"
dir_features = f"{dir_data}features/senet50/"
f_datatable = f"{dir_data}bfw-v0.1.5-datatable.pkl"

data = pd.read_pickle(f_datatable)
Expand Down
48 changes: 17 additions & 31 deletions code/facebias/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,8 +145,7 @@ def overlapped_score_distribution(data, log_scale=False, save_figure_path=None):
color_legend = plt.legend(fontsize=fontsize)
solid_line = Line2D([0], [0], color="black", linestyle="-")
dash_line = Line2D([0], [0], color="black", linestyle="--")
plt.legend([solid_line, dash_line], ["intra", "inter"], fontsize=fontsize,
loc=2)
plt.legend([solid_line, dash_line], ["intra", "inter"], fontsize=fontsize, loc=2)
plt.gca().add_artist(color_legend)

# handle log scale
Expand Down Expand Up @@ -231,8 +230,7 @@ def overlapped_score_distribution(data, log_scale=False, save_figure_path=None):
color_legend = plt.legend(fontsize=fontsize)
solid_line = Line2D([0], [0], color="black", linestyle="-")
dash_line = Line2D([0], [0], color="black", linestyle="--")
plt.legend([solid_line, dash_line], ["intra", "inter"], fontsize=fontsize,
loc=2)
plt.legend([solid_line, dash_line], ["intra", "inter"], fontsize=fontsize, loc=2)
plt.gca().add_artist(color_legend)

# handle log scale
Expand Down Expand Up @@ -377,13 +375,10 @@ def confusion_matrix(im_paths, dir_embeddings, save_figure_path=None):
"""
data = pd.read_csv(im_paths)
image_list = data["path"].to_list()
feature = load_features_from_image_list(
image_list, dir_embeddings, ext_feat="npy"
)
feature = load_features_from_image_list(image_list, dir_embeddings, ext_feat="npy")
data = get_attribute_gender_ethnicity(data, "path")
data["id"] = (
data["path"].apply(lambda x: "/".join(x.split("/")[:-1])).astype(
"category")
data["path"].apply(lambda x: "/".join(x.split("/")[:-1])).astype("category")
)
score_matrix = cosine_similarity(
data["path"].apply(lambda x: feature[x][0]).to_list()
Expand All @@ -402,21 +397,20 @@ def confusion_matrix(im_paths, dir_embeddings, save_figure_path=None):
confusion_npy[np.isnan(confusion_npy)] = 0
confusion_npy = confusion_npy.reshape((8, -1))
all_subgroup = data["a"].unique()
confusion_df = pd.DataFrame(confusion_npy, index=all_subgroup,
columns=all_subgroup)
confusion_df = pd.DataFrame(confusion_npy, index=all_subgroup, columns=all_subgroup)

n_samples_per_subgroup = data["a"].count() / len(all_subgroup)
confusion_percent_error_df = (confusion_df / n_samples_per_subgroup) * 100
plot_confusion_matrix(confusion_percent_error_df, save_figure_path)


def create_bias_analysis_plots(
im_pair_paths,
im_paths,
dir_embeddings,
data=None,
save_data=None,
dir_output="results",
im_pair_paths,
im_paths,
dir_embeddings,
data=None,
save_data=None,
dir_output="results",
):
"""
Using image pairs from 'image_pair_path', plot the following three plots.
Expand Down Expand Up @@ -467,9 +461,7 @@ def create_bias_analysis_plots(
im_pair_paths, dir_embeddings
)
if save_data is not None:
Path(os.path.dirname(save_data)).mkdir(
parents=True, exist_ok=True
)
Path(os.path.dirname(save_data)).mkdir(parents=True, exist_ok=True)
with open(save_data, "wb") as f:
pk.dump(data_pair_df, f)

Expand All @@ -489,8 +481,7 @@ def create_bias_analysis_plots(
data_pair_df, log_scale=False, save_figure_path=over_dist_path
)

log_over_dist_path = join(dir_output,
"overlapped_log_scale_score_dist.png")
log_over_dist_path = join(dir_output, "overlapped_log_scale_score_dist.png")
print(
f"producing overlapped score distribution plot on log scale. "
f"result will be saved to {log_over_dist_path}"
Expand All @@ -517,8 +508,7 @@ def create_bias_analysis_plots(
f"{det_gender_path}"
)
det_plot(
data_pair_df, "g1", "DET Curve Per Gender",
save_figure_path=det_gender_path
data_pair_df, "g1", "DET Curve Per Gender", save_figure_path=det_gender_path
)

det_ethnicity_path = join(dir_output, "det_ethnicity.png")
Expand All @@ -538,14 +528,10 @@ def create_bias_analysis_plots(
f"producing confusion matrix plot. result will be saved to "
f"{confusion_matrix_path}"
)
confusion_matrix(
im_paths, dir_embeddings,
save_figure_path=confusion_matrix_path
)
confusion_matrix(im_paths, dir_embeddings, save_figure_path=confusion_matrix_path)


def clean_image_pair_and_image_list_csv(im_pair_paths, im_paths,
dir_embeddings):
def clean_image_pair_and_image_list_csv(im_pair_paths, im_paths, dir_embeddings):
"""
Clean image pair csv and image list csv by deleting the rows that contain a
path to an image whose embedding does not exist in embedding_dir_path
Expand All @@ -571,7 +557,7 @@ def clean_image_pair_and_image_list_csv(im_pair_paths, im_paths,
old_nrow = image_pair.shape[0]
image_pair = image_pair[
image_pair["p1"].map(check_exist) & image_pair["p2"].map(check_exist)
]
]
new_nrow = image_pair.shape[0]
print(
f"For image pair csv, {old_nrow - new_nrow} rows out of {old_nrow} rows"
Expand Down

0 comments on commit bd3475d

Please sign in to comment.