From 6e540791005c9f91061ed78a8423e7b7bc50de6a Mon Sep 17 00:00:00 2001
From: Christoph Molnar <christoph.molnar@gmail.com>
Date: Tue, 22 May 2018 14:47:00 +0200
Subject: [PATCH] fixes problems due to missing type="prob"

---
 manuscript/05.1-agnostic.Rmd                  | 2 +-
 manuscript/05.6-agnostic-global-surrogate.Rmd | 2 +-
 manuscript/05.7-agnostic-lime.Rmd             | 2 +-
 manuscript/05.8-agnostic-shapley.Rmd          | 5 ++---
 4 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/manuscript/05.1-agnostic.Rmd b/manuscript/05.1-agnostic.Rmd
index 552e781a0..928630af8 100644
--- a/manuscript/05.1-agnostic.Rmd
+++ b/manuscript/05.1-agnostic.Rmd
@@ -29,7 +29,7 @@ Let's take a high level view on model-agnostic interpretability.
 We first abstract the world by capturing it by collecting data and abstract it further by learning the essence of the data (for the task) with a machine learning model.
 Interpretability is just another layer on top, that helps humans understand.
 
-```{r bigpicture, fig.cap="The big picture of explainable machine learning. The real world goes through many layers before it reaches the human in forms of explanations."}
+```{r bigpicture, fig.cap="The big picture of explainable machine learning. The real world goes through many layers before it reaches the human in the form of explanations."}
 knitr::include_graphics("images/big-picture.png")
 ```
 
diff --git a/manuscript/05.6-agnostic-global-surrogate.Rmd b/manuscript/05.6-agnostic-global-surrogate.Rmd
index ea16d5a2b..9375fe3a1 100644
--- a/manuscript/05.6-agnostic-global-surrogate.Rmd
+++ b/manuscript/05.6-agnostic-global-surrogate.Rmd
@@ -109,7 +109,7 @@ data(cervical)
 cervical.task = makeClassifTask(data = cervical, target = "Biopsy")
 mod.cervical = mlr::train(mlr::makeLearner(cl = 'classif.randomForest', predict.type = "prob"), cervical.task)
 
-pred.cervical = Predictor$new(mod.cervical, data = cervical[names(cervical) != "Biopsy"])
+pred.cervical = Predictor$new(mod.cervical, data = cervical[names(cervical) != "Biopsy"], type = "prob")
 tree.cervical = TreeSurrogate$new(pred.cervical, maxdepth = 2) 
 plot(tree.cervical)
 pred.tree.cervical  = predict(tree.cervical, cervical)["Cancer"]
diff --git a/manuscript/05.7-agnostic-lime.Rmd b/manuscript/05.7-agnostic-lime.Rmd
index 1c6e12599..2ee158d88 100644
--- a/manuscript/05.7-agnostic-lime.Rmd
+++ b/manuscript/05.7-agnostic-lime.Rmd
@@ -236,7 +236,7 @@ library("iml")
 library("gridExtra")
 instance_indices = c(295, 8)
 set.seed(44)
-pred = Predictor$new(model, data = bike.train.x, class = "above")
+pred = Predictor$new(model, data = bike.train.x, class = "above", type = "prob")
 lim = LocalModel$new(pred, x.interest = bike.train.x[instance_indices[1],], k = n_features_lime)
 
 a = plot(lim)
diff --git a/manuscript/05.8-agnostic-shapley.Rmd b/manuscript/05.8-agnostic-shapley.Rmd
index dfb37f3c2..f9b3dc614 100644
--- a/manuscript/05.8-agnostic-shapley.Rmd
+++ b/manuscript/05.8-agnostic-shapley.Rmd
@@ -92,7 +92,7 @@ cervical.x = cervical[names(cervical) != 'Biopsy']
 model <- caret::train(cervical.x,
                cervical$Biopsy,
                method = 'rf', ntree=ntree, maximise = FALSE)
-predictor = Predictor$new(model, class = 1, data = cervical.x)
+predictor = Predictor$new(model, class = "Cancer", data = cervical.x, type = "prob")
 
 instance_indices = 326
 x.interest = cervical.x[instance_indices,]
@@ -104,8 +104,7 @@ diff.prediction = actual.prediction - avg.prediction
 
 
 ```{r shapley-cervical-plot, fig.cap = sprintf("Feature value contributions for woman %i in the cervical cancer dataset. With a prediction of %.2f, this woman's cancer probability is %.2f above the average prediction of %.2f. The feature value that increased the probability the most is the number of diagnosed STDs. The feature contributions sum up to the difference of actual and average prediction (%.2f).", instance_indices, actual.prediction,diff.prediction, avg.prediction, diff.prediction)}
-# shapley1 = shapley(model, bike.train.x, x.interest = bike.train.x[instance_indices[1],], class = 2)
-shapley2 = Shapley$new(predictor, x.interest = x.interest)
+shapley2 = Shapley$new(predictor, x.interest = x.interest, sample.size = 100)
 plot(shapley2) + scale_y_continuous("Feature value contribution") +
   ggtitle(sprintf("Actual prediction: %.2f\nAverage prediction: %.2f\nDifference: %.2f", actual.prediction, avg.prediction, diff.prediction))
 ```