From 6e540791005c9f91061ed78a8423e7b7bc50de6a Mon Sep 17 00:00:00 2001 From: Christoph Molnar Date: Tue, 22 May 2018 14:47:00 +0200 Subject: [PATCH] fixes problems due to missing type="prob" --- manuscript/05.1-agnostic.Rmd | 2 +- manuscript/05.6-agnostic-global-surrogate.Rmd | 2 +- manuscript/05.7-agnostic-lime.Rmd | 2 +- manuscript/05.8-agnostic-shapley.Rmd | 5 ++--- 4 files changed, 5 insertions(+), 6 deletions(-) diff --git a/manuscript/05.1-agnostic.Rmd b/manuscript/05.1-agnostic.Rmd index 552e781a0..928630af8 100644 --- a/manuscript/05.1-agnostic.Rmd +++ b/manuscript/05.1-agnostic.Rmd @@ -29,7 +29,7 @@ Let's take a high level view on model-agnostic interpretability. We first abstract the world by capturing it by collecting data and abstract it further by learning the essence of the data (for the task) with a machine learning model. Interpretability is just another layer on top, that helps humans understand. -```{r bigpicture, fig.cap="The big picture of explainable machine learning. The real world goes through many layers before it reaches the human in forms of explanations."} +```{r bigpicture, fig.cap="The big picture of explainable machine learning. The real world goes through many layers before it reaches the human in the form of explanations."} knitr::include_graphics("images/big-picture.png") ``` diff --git a/manuscript/05.6-agnostic-global-surrogate.Rmd b/manuscript/05.6-agnostic-global-surrogate.Rmd index ea16d5a2b..9375fe3a1 100644 --- a/manuscript/05.6-agnostic-global-surrogate.Rmd +++ b/manuscript/05.6-agnostic-global-surrogate.Rmd @@ -109,7 +109,7 @@ data(cervical) cervical.task = makeClassifTask(data = cervical, target = "Biopsy") mod.cervical = mlr::train(mlr::makeLearner(cl = 'classif.randomForest', predict.type = "prob"), cervical.task) -pred.cervical = Predictor$new(mod.cervical, data = cervical[names(cervical) != "Biopsy"]) +pred.cervical = Predictor$new(mod.cervical, data = cervical[names(cervical) != "Biopsy"], type = "prob") tree.cervical = TreeSurrogate$new(pred.cervical, maxdepth = 2) plot(tree.cervical) pred.tree.cervical = predict(tree.cervical, cervical)["Cancer"] diff --git a/manuscript/05.7-agnostic-lime.Rmd b/manuscript/05.7-agnostic-lime.Rmd index 1c6e12599..2ee158d88 100644 --- a/manuscript/05.7-agnostic-lime.Rmd +++ b/manuscript/05.7-agnostic-lime.Rmd @@ -236,7 +236,7 @@ library("iml") library("gridExtra") instance_indices = c(295, 8) set.seed(44) -pred = Predictor$new(model, data = bike.train.x, class = "above") +pred = Predictor$new(model, data = bike.train.x, class = "above", type = "prob") lim = LocalModel$new(pred, x.interest = bike.train.x[instance_indices[1],], k = n_features_lime) a = plot(lim) diff --git a/manuscript/05.8-agnostic-shapley.Rmd b/manuscript/05.8-agnostic-shapley.Rmd index dfb37f3c2..f9b3dc614 100644 --- a/manuscript/05.8-agnostic-shapley.Rmd +++ b/manuscript/05.8-agnostic-shapley.Rmd @@ -92,7 +92,7 @@ cervical.x = cervical[names(cervical) != 'Biopsy'] model <- caret::train(cervical.x, cervical$Biopsy, method = 'rf', ntree=ntree, maximise = FALSE) -predictor = Predictor$new(model, class = 1, data = cervical.x) +predictor = Predictor$new(model, class = "Cancer", data = cervical.x, type = "prob") instance_indices = 326 x.interest = cervical.x[instance_indices,] @@ -104,8 +104,7 @@ diff.prediction = actual.prediction - avg.prediction ```{r shapley-cervical-plot, fig.cap = sprintf("Feature value contributions for woman %i in the cervical cancer dataset. With a prediction of %.2f, this woman's cancer probability is %.2f above the average prediction of %.2f. The feature value that increased the probability the most is the number of diagnosed STDs. The feature contributions sum up to the difference of actual and average prediction (%.2f).", instance_indices, actual.prediction,diff.prediction, avg.prediction, diff.prediction)} -# shapley1 = shapley(model, bike.train.x, x.interest = bike.train.x[instance_indices[1],], class = 2) -shapley2 = Shapley$new(predictor, x.interest = x.interest) +shapley2 = Shapley$new(predictor, x.interest = x.interest, sample.size = 100) plot(shapley2) + scale_y_continuous("Feature value contribution") + ggtitle(sprintf("Actual prediction: %.2f\nAverage prediction: %.2f\nDifference: %.2f", actual.prediction, avg.prediction, diff.prediction)) ```