From fde778014c7e2970a65c042523c49bff7cb2e17c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?David=20Monlla=C3=B3?= Date: Thu, 2 Jan 2020 12:01:59 +0800 Subject: [PATCH] MDL-67622 analytics: Fix f1score calculation --- moodlemlbackend/VERSION | 2 +- moodlemlbackend/processor/estimator.py | 59 ++++++++++---------------- 2 files changed, 23 insertions(+), 38 deletions(-) diff --git a/moodlemlbackend/VERSION b/moodlemlbackend/VERSION index 276cbf9..2bf1c1c 100644 --- a/moodlemlbackend/VERSION +++ b/moodlemlbackend/VERSION @@ -1 +1 @@ -2.3.0 +2.3.1 diff --git a/moodlemlbackend/processor/estimator.py b/moodlemlbackend/processor/estimator.py index 65294a7..c4892d2 100644 --- a/moodlemlbackend/processor/estimator.py +++ b/moodlemlbackend/processor/estimator.py @@ -20,7 +20,7 @@ import numpy as np from sklearn.model_selection import train_test_split from sklearn.metrics import roc_curve, auc -from sklearn.metrics import confusion_matrix +from sklearn.metrics import f1_score, recall_score, precision_score, accuracy_score from sklearn.utils import shuffle import tensorflow as tf @@ -79,7 +79,8 @@ def __init__(self, modelid, directory): np.seterr(all='raise') @staticmethod - def warnings_to_log(message, category, filename, lineno): + def warnings_to_log(message, category, filename, lineno, file=None, + line=None): """showwarnings overwritten""" logging.warning('%s:%s: %s:%s', filename, lineno, category.__name__, message) @@ -421,12 +422,12 @@ def rate_prediction(self, classifier, X_test, y_test): self.roc_curve_plot.add(fpr, tpr, 'Positives') # Calculate accuracy, sensitivity and specificity. - [acc, prec, rec, f1_score] = self.calculate_metrics( - y_test == 1, y_pred == 1) + [acc, prec, rec, f1score] = self.calculate_metrics(y_test, y_pred) + self.accuracies.append(acc) self.precisions.append(prec) self.recalls.append(rec) - self.f1_scores.append(f1_score) + self.f1_scores.append(f1score) @staticmethod def get_score(classifier, X_test, y_test): @@ -439,38 +440,22 @@ def get_score(classifier, X_test, y_test): return probs[range(n_samples), y_test] @staticmethod - def calculate_metrics(y_test_true, y_pred_true): - """Calculates confusion matrix metrics""" - - test_p = y_test_true - test_n = np.invert(test_p) - - pred_p = y_pred_true - pred_n = np.invert(pred_p) - - pp = np.count_nonzero(test_p) - nn = np.count_nonzero(test_n) - tp = np.count_nonzero(test_p * pred_p) - tn = np.count_nonzero(test_n * pred_n) - fn = np.count_nonzero(test_p * pred_n) - fp = np.count_nonzero(test_n * pred_p) - - accuracy = (tp + tn) / (pp + nn) - if tp != 0 or fp != 0: - precision = tp / (tp + fp) - else: - precision = 0 - if tp != 0 or fn != 0: - recall = tp / (tp + fn) - else: - recall = 0 - - if precision + recall != 0: - f1_score = 2 * precision * recall / (precision + recall) - else: - f1_score = 0 - - return [accuracy, precision, recall, f1_score] + def calculate_metrics(y_test, y_pred): + """Calculates the accuracy metrics""" + + accuracy = accuracy_score(y_test, y_pred) + + # Wrapping all the scoring function calls in a try & except to prevent + # the following warning to result in a "TypeError: warnings_to_log() + # takes 4 positional arguments but 6 were given" when sklearn calls + # warnings.warn with an "UndefinedMetricWarning:Precision is + # ill-defined and being set to 0.0 in labels with no predicted + # samples." message on python 3.7.x + precision = precision_score(y_test, y_pred, average='weighted') + recall = recall_score(y_test, y_pred, average='weighted') + f1score = f1_score(y_test, y_pred, average='weighted') + + return [accuracy, precision, recall, f1score] def get_evaluation_results(self, min_score, accepted_deviation): """Returns the evaluation results after all iterations"""