counterfactuals-with-constraint-solvers.html

<!doctype html><html lang=en-uk><head><script data-goatcounter=https://ruivieira-dev.goatcounter.com/count async src=//gc.zgo.at/count.js></script><script src=https://unpkg.com/@alpinejs/intersect@3.x.x/dist/cdn.min.js></script><script src=https://unpkg.com/alpinejs@3.x.x/dist/cdn.min.js></script><script type=module src=https://ruivieira.dev/js/deeplinks/deeplinks.js></script><link rel=preload href=https://ruivieira.dev/lib/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin=anonymous><link rel=preload href=https://ruivieira.dev/lib/fonts/fa-regular-400.woff2 as=font type=font/woff2 crossorigin=anonymous><link rel=preload href=https://ruivieira.dev/lib/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin=anonymous><link rel=preload href=https://ruivieira.dev/fonts/firacode/FiraCode-Regular.woff2 as=font type=font/woff2 crossorigin=anonymous><link rel=preload href=https://ruivieira.dev/fonts/vollkorn/Vollkorn-Regular.woff2 as=font type=font/woff2 crossorigin=anonymous><link rel=stylesheet href=https://ruivieira.dev/css/kbd.css type=text/css><meta charset=utf-8><meta http-equiv=X-UA-Compatible content="IE=edge"><title>Counterfactuals with Constraint Solvers · Rui Vieira</title>
<link rel=canonical href=https://ruivieira.dev/counterfactuals-with-constraint-solvers.html><meta name=viewport content="width=device-width,initial-scale=1"><meta name=robots content="all,follow"><meta name=googlebot content="index,follow,snippet,archive"><meta property="og:title" content="Counterfactuals with Constraint Solvers"><meta property="og:description" content="ScoringAn implementation on how to calculate counterfactuals with Constraint Solvers (namely OptaPlanner) is available here.
This implementation satisfies several criteria of the counterfactuals.
The penalisation score is represented with a BendableBigDecimalScore 1, having three &ldquo;hard&rdquo; levels and two &ldquo;soft&rdquo; levels.
The first hard level component, 1, penalises the score according to the distance between the prediction, $y^{\prime}$ for the currently proposed solution, $x^{\prime}$ and the original prediction $y$, that is this our $(\hat{f}(x^{\prime})-y^{\prime})^2$."><meta property="og:type" content="article"><meta property="og:url" content="https://ruivieira.dev/counterfactuals-with-constraint-solvers.html"><meta property="article:section" content="posts"><meta property="article:modified_time" content="2024-01-28T14:51:25+00:00"><meta name=twitter:card content="summary"><meta name=twitter:title content="Counterfactuals with Constraint Solvers"><meta name=twitter:description content="ScoringAn implementation on how to calculate counterfactuals with Constraint Solvers (namely OptaPlanner) is available here.
This implementation satisfies several criteria of the counterfactuals.
The penalisation score is represented with a BendableBigDecimalScore 1, having three &ldquo;hard&rdquo; levels and two &ldquo;soft&rdquo; levels.
The first hard level component, 1, penalises the score according to the distance between the prediction, $y^{\prime}$ for the currently proposed solution, $x^{\prime}$ and the original prediction $y$, that is this our $(\hat{f}(x^{\prime})-y^{\prime})^2$."><link rel=stylesheet href=https://ruivieira.dev/css/styles.css><!--[if lt IE 9]><script src=https://oss.maxcdn.com/html5shiv/3.7.2/html5shiv.min.js></script><script src=https://oss.maxcdn.com/respond/1.4.2/respond.min.js></script><![endif]--><link rel=icon type=image/png href=https://ruivieira.dev/images/favicon.ico></head><body class="max-width mx-auto px3 ltr" x-data="{currentHeading: undefined}"><div class="content index py4"><div id=header-post><a id=menu-icon href=#><i class="fas fa-eye fa-lg"></i></a>
<a id=menu-icon-tablet href=#><i class="fas fa-eye fa-lg"></i></a>
<a id=top-icon-tablet href=# onclick='$("html, body").animate({scrollTop:0},"fast")' style=display:none aria-label="Top of Page"><i class="fas fa-chevron-up fa-lg"></i></a>
<span id=menu><span id=nav><ul><li><a href=https://ruivieira.dev/>Home</a></li><li><a href=https://ruivieira.dev/blog/>Blog</a></li><li><a href=https://ruivieira.dev/draw/>Drawings</a></li><li><a href=https://ruivieira.dev/map/>All pages</a></li><li><a href=https://ruivieira.dev/search.html>Search</a></li></ul></span><br><div id=share style=display:none></div><div id=toc><h4>Contents</h4><nav id=TableOfContents><ul><li><a href=#scoring :class="{'toc-h2':true, 'toc-highlight': currentHeading == '#scoring' }">Scoring</a></li><li><a href=#implementation :class="{'toc-h2':true, 'toc-highlight': currentHeading == '#implementation' }">Implementation</a></li><li><a href=#prediction-distance :class="{'toc-h3':true, 'toc-highlight': currentHeading == '#prediction-distance' }">Prediction distance</a></li><li><a href=#tolerance :class="{'toc-h3':true, 'toc-highlight': currentHeading == '#tolerance' }">Tolerance</a></li><li><a href=#gower-distance :class="{'toc-h3':true, 'toc-highlight': currentHeading == '#gower-distance' }">Gower distance</a></li><li><a href=#actionability-score :class="{'toc-h2':true, 'toc-highlight': currentHeading == '#actionability-score' }">Actionability score</a></li><li><a href=#confidence-score :class="{'toc-h2':true, 'toc-highlight': currentHeading == '#confidence-score' }">Confidence score</a></li><li><a href=#feature-distance :class="{'toc-h2':true, 'toc-highlight': currentHeading == '#feature-distance' }">Feature distance</a></li><li><a href=#searching :class="{'toc-h2':true, 'toc-highlight': currentHeading == '#searching' }">Searching</a></li></ul></nav><h4>Related</h4><nav><ul><li class="header-post toc"><span class=backlink-count>1</span>
<a href=https://ruivieira.dev/explainability.html>Explainability</a></li><li class="header-post toc"><span class=backlink-count>1</span>
<a href=https://ruivieira.dev/gower-distance.html>Gower distance</a></li><li class="header-post toc"><span class=backlink-count>1</span>
<a href=https://ruivieira.dev/counterfactuals.html>Counterfactuals</a></li></ul></nav></div></span></div><article class=post itemscope itemtype=http://schema.org/BlogPosting><header><h1 class=posttitle itemprop="name headline">Counterfactuals with Constraint Solvers</h1><div class=meta><div class=postdate>Updated <time datetime="2024-01-28 14:51:25 +0000 GMT" itemprop=datePublished>2024-01-28</time>
<span class=commit-hash>(<a href=https://ruivieira.dev/log/index.html#0c79fa3>0c79fa3</a>)</span></div></div></header><div class=content itemprop=articleBody><h2 id=scoring x-intersect="currentHeading = '#scoring'">Scoring</h2><p>An implementation on how to calculate <a href=https://ruivieira.dev/counterfactuals.html>counterfactuals</a> with Constraint Solvers (namely <a href>OptaPlanner</a>) is available <a href=https://github.com/kiegroup/trusty-ai-sandbox/tree/master/explainability/core/counterfactuals>here</a>.</p><p>This implementation satisfies several criteria of the <a href=https://ruivieira.dev/counterfactuals.html#desiderata>counterfactuals</a>.</p><p>The penalisation score is represented with a <code>BendableBigDecimalScore</code> <sup id=fnref:1><a href=#fn:1 class=footnote-ref role=doc-noteref>1</a></sup>, having three &ldquo;<em>hard</em>&rdquo; levels and two &ldquo;<em>soft</em>&rdquo; levels.</p><figure><img src=https://ruivieira.dev/Excalidraw/CF%20score%20structure.excalidraw.svg alt="CF score structure.excalidraw.svg" loading=lazy></figure><p>The first hard level component, <span class=point>1</span>, penalises the score according to the distance between the prediction, $y^{\prime}$ for the currently proposed solution, $x^{\prime}$ and the original prediction $y$, that is this our $(\hat{f}(x^{\prime})-y^{\prime})^2$. This corresponds to the counterfactual&rsquo;s <a href=https://ruivieira.dev/counterfactuals.html#validity>validity</a>.</p><p>The <a href=https://ruivieira.dev/counterfactuals.html#actionability>actionability</a> is score with <span class=point>2</span>. This component penalises the score according to number of <em>immutable</em> features which were changed in the counterfactual.</p><p>A confidence score component, <span class=point>3</span> is use to, optionally, impose a minimum confidence threshold to the counterfactual&rsquo;s associated prediction, $x^{\prime}$.</p><p>Finally, the <em>feature distance</em>, <span class=point>4</span>, penalises the score according to the feature distance. This is the representation of</p><p>$$
d(x, x^{\prime}).
$$</p><p>In the concrete implementation linked above, the distance, $d$, chosen is a <a href=https://ruivieira.dev/distance-metrics.html#manhattan-distance-l1>Manhattan</a> (or $L^1$) distance calculated feature-wise. This also corresponds to the <a href=https://ruivieira.dev/counterfactuals.html#validity>validity property.</a></p><h2 id=implementation x-intersect="currentHeading = '#implementation'">Implementation</h2><figure><img src=https://ruivieira.dev/pages/site/images/diagrams/counterfactual_impl.png alt=../../images/diagrams/counterfactual_impl.png loading=lazy></figure><p>Entities are defined by classes such as <code>Integer</code>, <code>Categorical</code>, <code>Boolean</code> or <code>Float</code>, as shown in <span class=point>5</span>.
Each of the features, shown in <span class=point>6</span>, is created as an instance of one of these entities. For instance, <code>feature1</code> would be of type <code>Integer</code> and <code>feature2</code> would be of type <code>Categorical</code>, <em>etc.</em></p><p>The original data point, $x$ is represented by this set of features (<span class=point>6</span>).</p><p>A planning solution (<code>PlanningSolution</code>), illustrated in <span class=point>7</span> will produce candidate solutions (shown in <span class=point>8</span>)</p><p>For each solution, we propose a new set of features ($x^{\prime}$) as a counterfactual candidate. For instance, ~solution A~ in <span class=point>8</span>.</p><p>In the following section we will look at how each component is calculated. We will refer to each &ldquo;hard&rdquo; level component as $H_1, H_2$ and $H_3$ and the &ldquo;soft&rdquo; component as $S_1$. The overal score consists, then, of $S={H_1, H_2, H_3, S_1 }$</p><h3 id=prediction-distance x-intersect="currentHeading = '#prediction-distance'">Prediction distance</h3><p>The first component of the score, <span class=point>1</span> is established by sending the proposed counterfactual $x^{\prime}$, <span class=point>8</span> to a predictive model, <span class=point>9</span> and calculating the distance between the desired outcome, $y^{\prime}$ and the model&rsquo;s prediction. This is done component wise, for each feature of the output. That is, for a prediction with $N$ features, we calculate</p><p>$$
H_1=\left(\sum_i^Nf(x^{\prime}_i) - y^{\prime}_i\right)^2
$$</p><h3 id=tolerance x-intersect="currentHeading = '#tolerance'">Tolerance</h3><p>For numerical features, the above score ($H_1$) will cause the counterfactual to be invalid, unless the distance between the outcomes and proposed values is <em>exactly zero</em>.</p><p>We can solve this problem by introducing a &ldquo;tolerance&rdquo; adjustment, which allows proposed values to be accepted if they are &ldquo;close enough&rdquo; to the goal.</p><p>To make the tolerance scale-invariant and unit-less we can use a relative change and set the distance to zero, if smaller than the threshold $t$, that is</p><p>$$
d = \begin{cases}
0,&\qquad\text{if},\frac{\vert f(x&rsquo;_i)-y&rsquo;_i\vert}{\max(\vert f(x&rsquo;_i)\vert,\vert y&rsquo;_i\vert)} &lt; t \\
\vert f(x&rsquo;_i)-y&rsquo;_i\vert,&\qquad\text{otherwise}
\end{cases}
$$</p><p>and compare to the threshold $t$. As an example, for a goal $y&rsquo;_i=3$ and a threshold of $t=0.01$, around the goal we would have the distance as in the figure below:</p><figure><img src=https://ruivieira.dev/pages/site/images/cf_distance_1.png alt=../../images/cf_distance_1.png loading=lazy></figure><p>This would however fail for the edge case where $y^{\prime}_i=0$ as we can see below:</p><figure><img src=https://ruivieira.dev/pages/site/images/cf_distance_2.png alt=../../images/cf_distance_2.png loading=lazy></figure><p>To solve this, we can introduce a special case for $y^{\prime}_i=0$, such that:</p><p>$$
d_{g=0} = \begin{cases}
0,&\qquad \text{if}\ |f(x^{\prime}_i)|&lt; t\\
\lVert f(x^{\prime}_i) - y^{\prime}_i \rVert,&\qquad\text{otherwise}
\end{cases}
$$</p><p>So that we have now the desired behaviour at $y^{\prime}_i=0$:</p><figure><img src=https://ruivieira.dev/pages/site/images/cf_distance_3.png alt=../../images/cf_distance_3.png loading=lazy></figure><h3 id=gower-distance x-intersect="currentHeading = '#gower-distance'">Gower distance</h3><p>An alternative metric for the outcome distance (and mixed variables in general) is the <a href>site/Machine learning/Gower distance</a>.</p><h2 id=actionability-score x-intersect="currentHeading = '#actionability-score'">Actionability score</h2><p>For the second component, the actionability score, <span class=point>2</span>. We calculate the number of features for the protected set $\mathcal{A}$, which have a different value from the original. That is, assuming we have a certain number of protectd features $M$, such that $\mathcal{A}={A_1,A_2,\dots,A_M}$, we calculate:</p><p>$$
H_2 = \sum_{a \in \mathcal{A}} \mathbb{1}(x_a \neq x^{\prime}_a),
$$</p><h2 id=confidence-score x-intersect="currentHeading = '#confidence-score'">Confidence score</h2><p>For each feature $i$, if we have a prediction confidence, $p_i(f(x^{\prime}))$, we calculate the number of features which have a confidence below a certain predefined threshold, $P_i$. If the threshold is not defined, this component will always be zero and not influence the counterfactual selection. Assuming we have defined a threshold for all $N$ features, $P = {P_1, P_2, \dots, P_N}$ we calculate this score as</p><p>$$
H_3 = \sum_i^N \mathbb{1} \left( p_i \left( f(x^{\prime}) &lt; P_i \right) \right)
$$</p><h2 id=feature-distance x-intersect="currentHeading = '#feature-distance'">Feature distance</h2><p>Considering that each datapoint $x$ consists of different $N$ features, such that $x=\left(f_1,\dots,f_n\right)$ and that each feature might be numerical or categorical<sup id=fnref:2><a href=#fn:2 class=footnote-ref role=doc-noteref>2</a></sup>, we calculate the distance between a datapoint $x$ and a potential counterfactual $x^{\prime}$:</p><p>$$
d\left(x,x^{\prime}\right)=\sum_{i=1}^Nd^{\prime}\left(x_i,x_i^{\prime}\right)
$$
$$
d^{\prime}\left(x_i,x_i^{\prime}\right)=
\begin{cases}
\left(x_i-x_i^{\prime}\right)^2,\quad\text{if}\ x_i,x_i^{\prime}\in\mathbb{N} \lor x_i,x_i^{\prime}\in\mathbb{R}\
1-\delta_{x,x^{\prime}},\quad\text{if}\ x_i,x_i^{\prime}\ \text{categorical}
\end{cases}
$$</p><p>Since in many scenarios we might not have access to the training data, the above distance are not normalised. In the event that we do have access to training data, then we can use the <strong>standard deviation</strong> ($SD$) to normalise the features. The $SD$ can be calculated as:</p><p>$$
SD=\sqrt{\frac{1}{N}\sum_{i=1}^N\left(x_i-\bar{x}\right)^2}
$$</p><p>so that, in this case, we scale the numerical features with</p><p>$$
\bar{d}^{\prime}\left(x_i,x_i^{\prime}\right)=
\frac{\left(x_i-x_i^{\prime}\right)^2}{SD}.
$$</p><h2 id=searching x-intersect="currentHeading = '#searching'">Searching</h2><p>To search for a counterfactual, we start by specifying a search domain for each feature.
This will include:</p><ul><li>An upper and lower bounds for numerical features, respectively $\mathcal{D}_l, \mathcal{D}_u$</li><li>A set of categories for categorical features, $\mathcal{C}$</li><li>$\mathcal{B}={0,1}$ for the specific case of boolean/binary values</li></ul><p>Typically these values would be either established by someone with domain knowledge, or by values that might reflect our expectation for the actual counterfactual (for instance, an ~age~ would have realistic values).</p><p>The algorithm used for the search is <strong>Tabu search</strong><sup id=fnref:3><a href=#fn:3 class=footnote-ref role=doc-noteref>3</a></sup> (Glover, 1989).</p><figure><img src=https://ruivieira.dev/pages/site/images/diagrams/tabu_chart.png alt=../../images/diagrams/tabu_chart.png loading=lazy></figure><div class=footnotes role=doc-endnotes><hr><ol><li id=fn:1><p><a href=https://docs.optaplanner.org/8.0.0.Final/optaplanner-javadoc/org/optaplanner/core/api/score/buildin/bendablebigdecimal/BendableBigDecimalScore.html>BendableBigDecimalScore documentation</a>&#160;<a href=#fnref:1 class=footnote-backref role=doc-backlink>&#8617;&#xfe0e;</a></p></li><li id=fn:2><p>Here we are considering binary and boolean values as categorical.&#160;<a href=#fnref:2 class=footnote-backref role=doc-backlink>&#8617;&#xfe0e;</a></p></li><li id=fn:3><p>cite:glover1989tabu Glover, F. (1989). Tabu search—part i. ORSA Journal on computing, 1(3), 190–206.&#160;<a href=#fnref:3 class=footnote-backref role=doc-backlink>&#8617;&#xfe0e;</a></p></li></ol></div></div></article><div id=footer-post-container><div id=footer-post><div id=nav-footer style=display:none><ul><li><a href=https://ruivieira.dev/>Home</a></li><li><a href=https://ruivieira.dev/blog/>Blog</a></li><li><a href=https://ruivieira.dev/draw/>Drawings</a></li><li><a href=https://ruivieira.dev/map/>All pages</a></li><li><a href=https://ruivieira.dev/search.html>Search</a></li></ul></div><div id=toc-footer style=display:none><nav id=TableOfContents><ul><li><a href=#scoring>Scoring</a></li><li><a href=#implementation>Implementation</a><ul><li><a href=#prediction-distance>Prediction distance</a></li><li><a href=#tolerance>Tolerance</a></li><li><a href=#gower-distance>Gower distance</a></li></ul></li><li><a href=#actionability-score>Actionability score</a></li><li><a href=#confidence-score>Confidence score</a></li><li><a href=#feature-distance>Feature distance</a></li><li><a href=#searching>Searching</a></li></ul></nav></div><div id=share-footer style=display:none></div><div id=actions-footer><a id=menu-toggle class=icon href=# onclick='return $("#nav-footer").toggle(),!1' aria-label=Menu><i class="fas fa-bars fa-lg" aria-hidden=true></i> Menu</a>
<a id=toc-toggle class=icon href=# onclick='return $("#toc-footer").toggle(),!1' aria-label=TOC><i class="fas fa-list fa-lg" aria-hidden=true></i> TOC</a>
<a id=share-toggle class=icon href=# onclick='return $("#share-footer").toggle(),!1' aria-label=Share><i class="fas fa-share-alt fa-lg" aria-hidden=true></i> share</a>
<a id=top style=display:none class=icon href=# onclick='$("html, body").animate({scrollTop:0},"fast")' aria-label="Top of Page"><i class="fas fa-chevron-up fa-lg" aria-hidden=true></i> Top</a></div></div></div><footer id=footer><div class=footer-left>Copyright &copy; 2024 Rui Vieira</div><div class=footer-right><nav><ul><li><a href=https://ruivieira.dev/>Home</a></li><li><a href=https://ruivieira.dev/blog/>Blog</a></li><li><a href=https://ruivieira.dev/draw/>Drawings</a></li><li><a href=https://ruivieira.dev/map/>All pages</a></li><li><a href=https://ruivieira.dev/search.html>Search</a></li></ul></nav></div></footer></div></body><link rel=stylesheet href=https://ruivieira.dev/css/fa.min.css><script src=https://ruivieira.dev/js/jquery-3.6.0.min.js></script><script src=https://ruivieira.dev/js/mark.min.js></script><script src=https://ruivieira.dev/js/main.js></script><script>MathJax={tex:{inlineMath:[["$","$"],["\\(","\\)"]]},svg:{fontCache:"global"}}</script><script type=text/javascript id=MathJax-script async src=https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js></script></html>