Skip to content

Commit

Permalink
Merge pull request #19 from dmonllao/MDL-67040_master
Browse files Browse the repository at this point in the history
Mdl 67040 master
  • Loading branch information
stronk7 authored Jan 15, 2020
2 parents c0860c7 + 91db1e9 commit c6bd888
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 18 deletions.
2 changes: 1 addition & 1 deletion moodlemlbackend/VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
2.3.1
2.4.0
10 changes: 9 additions & 1 deletion moodlemlbackend/model/tensor.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,10 @@ def __init__(self, n_features, n_classes, n_epoch, batch_size,
self.batch_size = batch_size
self.starter_learning_rate = starter_learning_rate
self.n_features = n_features
self.n_hidden = 10

# Based on the number of features although we need a reasonable
# minimum.
self.n_hidden = max(4, int(n_features / 3))
self.n_classes = n_classes
self.tensor_logdir = tensor_logdir

Expand Down Expand Up @@ -141,6 +144,11 @@ def build_graph(self, initial_weights=False):

loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(
logits=self.probs, labels=self.y_))

regularizer = (tf.nn.l2_loss(W['input-hidden']) * 0.01) + \
(tf.nn.l2_loss(W['hidden-output']) * 0.01)
loss = tf.reduce_mean(loss + regularizer)

tf.summary.scalar("loss", loss)

with tf.name_scope('accuracy'):
Expand Down
70 changes: 54 additions & 16 deletions moodlemlbackend/processor/estimator.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,8 @@ def __init__(self, modelid, directory):

# Logging.
logfile = os.path.join(self.logsdir, 'info.log')
for handler in logging.root.handlers[:]:
logging.root.removeHandler(handler)
logging.basicConfig(filename=logfile, level=logging.DEBUG)
warnings.showwarning = self.warnings_to_log

Expand Down Expand Up @@ -224,9 +226,32 @@ def __init__(self, modelid, directory, dataset=None):
def get_classifier(self, X, y, initial_weights=False):
"""Gets the classifier"""

n_epoch = 50
batch_size = 1000
try:
n_rows = X.shape[0]
except AttributeError:
# No X during model import.
# n_rows value does not really matter during import.
n_rows = 1

if n_rows < 1000:
batch_size = n_rows
else:
# A min batch size of 1000.
x_tenpercent = int(n_rows / 10)
batch_size = max(1000, x_tenpercent)

# We need ~10,000 iterations so that the 0.5 learning rate decreases
# to 0.01 with a decay rate of 0.96. We use 12,000 so that the
# algorithm has some time to finish the training on lr < 0.01.
starter_learning_rate = 0.5
if n_rows > batch_size:
n_epoch = int(12000 / (n_rows / batch_size))
else:
# Less than 1000 rows (1000 is the minimum batch size we defined).
# We don't need to iterate than many times if we have less than
# 1000 records, starting with 0.5 the learning rate will get to
# ~0.05 in 5000 epochs.
n_epoch = 5000

n_classes = self.n_classes
n_features = self.n_features
Expand Down Expand Up @@ -378,6 +403,7 @@ def evaluate_dataset(self, filepath, min_score=0.6,
logging.info("AUC: %.2f%%", result['auc'])
logging.info("AUC standard deviation: %.4f",
result['auc_deviation'])

logging.info("Accuracy: %.2f%%", result['accuracy'] * 100)
logging.info("Precision (predicted elements that are real): %.2f%%",
result['precision'] * 100)
Expand All @@ -400,21 +426,27 @@ def rate_prediction(self, classifier, X_test, y_test):
y_test = y_test.T[0]

if self.is_binary:
# ROC curve calculations.
fpr, tpr, _ = roc_curve(y_test, y_score)

# When the amount of samples is small we can randomly end up
# having just one class instead of examples of each, which
# triggers a "UndefinedMetricWarning: No negative samples in
# y_true, false positive value should be meaningless"
# and returning NaN.
if math.isnan(fpr[0]) or math.isnan(tpr[0]):
return
try:
# ROC curve calculations.
fpr, tpr, _ = roc_curve(y_test, y_score)

# When the amount of samples is small we can randomly end up
# having just one class instead of examples of each, which
# triggers a "UndefinedMetricWarning: No negative samples in
# y_true, false positive value should be meaningless"
# and returning NaN.
if math.isnan(fpr[0]) or math.isnan(tpr[0]):
return

self.aucs.append(auc(fpr, tpr))

self.aucs.append(auc(fpr, tpr))
# Draw it.
self.roc_curve_plot.add(fpr, tpr, 'Positives')

# Draw it.
self.roc_curve_plot.add(fpr, tpr, 'Positives')
except Exception:
# Nevermind.
pass

# Calculate accuracy, sensitivity and specificity.
[acc, prec, rec, f1score] = self.calculate_metrics(y_test, y_pred)
Expand Down Expand Up @@ -468,8 +500,14 @@ def get_evaluation_results(self, min_score, accepted_deviation):

result = dict()
if self.is_binary and len(self.aucs) > 0:
result['auc'] = np.mean(self.aucs)
result['auc_deviation'] = np.std(self.aucs)
try:
result['auc'] = np.mean(self.aucs)
result['auc_deviation'] = np.std(self.aucs)
except Exception:
# No worries.
result['auc'] = 0.0
result['auc_deviation'] = 1.0
pass

result['accuracy'] = avg_accuracy
result['precision'] = avg_precision
Expand Down

0 comments on commit c6bd888

Please sign in to comment.