-
Notifications
You must be signed in to change notification settings - Fork 9
/
evaluation_metrics.py
83 lines (68 loc) · 3.59 KB
/
evaluation_metrics.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
# -*- coding: utf-8 -*-
import numpy as np
import csv
from scipy.stats import spearmanr, kendalltau, rankdata
from collections import Counter
def evaluate_summary(predicted_summary, user_summary, eval_method):
""" Compare the predicted summary with the user defined one(s).
:param np.ndarray predicted_summary: The generated summary from our model.
:param np.ndarray user_summary: The user defined ground truth summaries (or summary).
:param str eval_method: The proposed evaluation method; either 'max' (SumMe) or 'avg' (TVSum).
:return: The reduced fscore based on the eval_method
"""
max_len = max(len(predicted_summary), user_summary.shape[1])
S = np.zeros(max_len, dtype=int)
G = np.zeros(max_len, dtype=int)
S[:len(predicted_summary)] = predicted_summary
f_scores = []
for user in range(user_summary.shape[0]):
G[:user_summary.shape[1]] = user_summary[user]
overlapped = S & G
# Compute precision, recall, f-score
precision = sum(overlapped)/sum(S)
recall = sum(overlapped)/sum(G)
if precision+recall == 0:
f_scores.append(0)
else:
f_scores.append(2 * precision * recall * 100 / (precision + recall))
if eval_method == 'max':
return max(f_scores)
else:
return sum(f_scores)/len(f_scores)
def get_corr_coeff(pred_imp_scores, video, dataset):
""" Read users annotations (frame-level importance scores) for the `video` of the `dataset`* in use. Compare the
multiple user annotations for the test video with the predicted frame-level importance scores of our CA-SUM for the
same video, by computing the Spearman's rho and Kendall's tau correlation coefficients. It must be noted, that the
calculated values are the average correlation coefficients over the multiple annotators.
* Applicable only for the TVSum dataset.
:param list[float] pred_imp_scores: The predicted frame-level importance scores from our CA-SUM model.
:param str video: The name of the test video being inferenced.
:param str dataset: The dataset in use.
:return: A tuple containing the video-level Spearman's rho and Kendall's tau correlation coefficients.
"""
# Read the user annotations from the file
annot_path = f".../CA-SUM/data/{dataset}/ydata-anno.tsv"
with open(annot_path) as annot_file:
user = int(video.split("_")[-1])
annot = list(csv.reader(annot_file, delimiter="\t"))
annotation_length = list(Counter(np.array(annot)[:, 0]).values())[user-1]
init = (user - 1) * annotation_length
till = user * annotation_length
user_scores = []
for row in annot[init:till]:
curr_user_score = row[2].split(",")
curr_user_score = np.array([float(num) for num in curr_user_score])
curr_user_score = curr_user_score / curr_user_score.max(initial=-1) # Normalize scores between 0 and 1
curr_user_score = curr_user_score[::15]
user_scores.append(curr_user_score)
pred_imp_scores = np.array(pred_imp_scores)
rho_coeff, tau_coeff = [], []
for annot in range(len(user_scores)):
true_user_score = user_scores[annot]
curr_rho_coeff, _ = spearmanr(pred_imp_scores, true_user_score)
curr_tau_coeff, _ = kendalltau(rankdata(pred_imp_scores), rankdata(true_user_score))
rho_coeff.append(curr_rho_coeff)
tau_coeff.append(curr_tau_coeff)
rho_coeff = np.array(rho_coeff).mean() # mean over all user annotations
tau_coeff = np.array(tau_coeff).mean() # mean over all user annotations
return rho_coeff, tau_coeff