-
Notifications
You must be signed in to change notification settings - Fork 0
/
ksummarizer.tex
316 lines (264 loc) · 11.7 KB
/
ksummarizer.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
\documentclass{article}
\input{prelim}
\title{The \K Summarizer}
\author{Runtime Verification, Inc.}
\begin{document}
\maketitle
The purpose of this document is to provide a formal specification of
the \K summarizer (\url{https://github.com/runtimeverification/erc20-verification/tree/master/ksummarize})
that is mathematically sound
and is faithful to the implementation.
We will also study the case studies
of the summarizer for semantics-based compilation (SBC),
model checking, and formal verification.
\section{Preliminaries}
Throughout this document, we assume that there is an
underlying matching logic theory $\Gamma^L$ that defines the formal semantics
of some programming language $L$.
In \Cref{fig:notations} we list the standard notations that will be used
in this document.
\begin{figure}
\fbox{\parbox{\textwidth}{
\emph{The following standard notations will be used:}
\begin{itemize}\renewcommand\labelitemi{--}
\item $\varphi$, $\psi$: an arbitrary pattern.
\item $t$: a term pattern, built from element variables and functional symbols.
\item $p$: a predicate pattern.
\item $t \land p$: a constraint term.
\item $\Gamma^L \vdash \varphi$: $\varphi$ is provable from $\Gamma^L$.
\item $\tau \equiv [\varphi_1 / x_1 , \dots , \varphi_n / x_n]$: a substitution.
\item $\varphi\tau$ or $\varphi[\varphi_1 / x_1 , \dots , \varphi_n / x_n]$:
applying the substitution to $\varphi$.
\item $\ceil{\varphi}$: the definedness pattern of $\varphi$.
\item $\onext \varphi$: ``one-path next'' of $\varphi$.
\item $\anext \varphi$: ``all-path next'' of $\varphi$, defined as
$\anext \varphi \equiv \lnot \onext \lnot \varphi$.
\item $\itSTOP$: stopped/terminal states; abbreviation for $\anext \bot$.
\item $\itNONSTOP$: non-stopped states; abbreviation for $\onext \top$.
\item $\sanext \varphi$: ``strongly all-path next'' of $\varphi$,
defined as $\sanext \varphi \equiv \anext \varphi \land \itNONSTOP$.
\item $\varphi \ToExOne \varphi'$: ``one-step one-path execution''; abbreviation for $\varphi \imp \onext \varphi'$.
\item $\varphi \ToAlOne \varphi'$: ``one-step all-path execution'';
abbreviation for $\varphi \imp \sanext \varphi'$.
\item $\varphi \ToAl \varphi'$: ``all-path execution'';
abbreviation for $\varphi \imp \mu X \ld \varphi \lor \anext X$.
\item more to be added \ldots
\end{itemize}
}}
\caption{List of Notations}
\label{fig:notations}
\end{figure}
\section{Matching and Unification}
In this section we formalize matching and unification as proving matching logic theorems.
Some definitions and results are from Arusoaie and Lucanu's paper \url{https://arxiv.org/pdf/1811.02835.pdf}.
\subsection{Substitution Patterns}
\begin{definition}
Given a substitution
\[\tau \equiv [\varphi_1 / x_1 , \dots , \varphi_n / x_n] \]
we define a corresponding \emph{substitution pattern}
\[\varphitau \equiv (x_1 = \varphi_1) \land \dots \land (x_n = \varphi_n) \]
\end{definition}
\begin{proposition}
$\Gamma^L \vdash \varphitau \imp (\psi = \psi\tau)$.
\end{proposition}
\begin{proof}
This (derived) proof rule is called \prule{Equality Elimination}.
\end{proof}
\subsection{Matching}
\begin{definition}
\label{def:matching}
Let $\varphi$ and $\psi$ be two patterns without free set variables.
We call $\varphi$ the \emph{left pattern}
and $\psi$ the \emph{right pattern}.\footnote{Usually $\varphi$ is called the ``term'' and $\psi$ the ``pattern'', but since matching logic formulas are already called patterns, we use left/right patterns to avoid confusion.}
We say that $\varphi$ \emph{matches} $\psi$
if
\[\Gamma^L \vdash \varphi \imp \exists \FV{\psi} \ld \psi \]
\end{definition}
\begin{example}
Suppose that $f$ is a function symbol.
Then $f(0,0)$ matches $f(x,y)$, because
\[\Gamma^L \vdash f(0,1) \imp \exists x \exists y \ld f(x,y)\]
\end{example}
\begin{example}
Suppose that $f$ is a function symbol.
Then $f(0,z)$ matches $f(x,y)$ because
\[\Gamma^L \vdash f(0,z) \imp \exists x \exists y \ld f(x,y)\]
Note that the above is equivalent to
\[\Gamma^L \vdash \forall z \ld \left(f(0,z) \imp \exists x \exists y \ld f(x,y)\right)\]
In other words, all variables in the left pattern are universally quantified.
\end{example}
\begin{example}
Suppose that $f$ is a function symbol.
Then $f(0, x)$ matches $f(x,y)$ because
\[\Gamma^L \vdash f(0,x) \imp \exists x \exists y \ld f(x,y)\]
Note that $x$ in the right pattern is quantified/bound.
Generally speaking, all the variables in the right pattern are quantified
and can be freely renamed.
It allows us to assume that $\varphi$ and $\psi$ have distinct free variables
without loss of generality.
\end{example}
Semantically speaking,
$\varphi$ matches $\psi$ implies that
\[\ev{\varphi}{\rho,M} \subseteq \ev{\exists \FV{\psi} \ld \psi}{\rho,M}
= \bigcup_{a_1,\dots,a_n \in M} \ev{\psi}{\rho[a_1/x_1,\dots,a_n/x_n],M}
\]
where $\FV{\psi} = \{x_1,\dots,x_n\}$.
If $\varphi$ is a unit pattern (i.e., a functional pattern),
then $\ev{\varphi}{\rho,M}$ is a unit set and there exist
$a_1,\dots,a_n \in M$ such that
\[\ev{\varphi}{\rho,M} \subseteq \ev{\psi}{\rho[a_1/x_1,\dots,a_n/x_n],M}
\]
Pattern matching gives us closed-form solutions of
these witness values $a_1,\dots,a_n$.
Formally,
\begin{definition}
Let $\varphi$ and $\psi$ be two patterns
and $\sigma$ be a substitution pattern.
We say that $\sigma$ is a \emph{solution} to the matching problem
\[\varphi \matchQ \psi\]
if
\[\Gamma^L \vdash \varphi \imp \psi\tau\]
\end{definition}
there exists a particular value $v_i$ for each $x_i \in \FV{\psi}$ such that
$\Gamma^L \vdash \varphi \imp \psi[v_1/x_1, v_2/x_2,\dots]$,
where $v_i$ depends on
\begin{definition}
We say that $\{\sigma_1,\dots,\sigma_n\}$ is a \emph{complete solution}
to the matching problem
\[\varphi_1 \matchQ \psi_1, \dots, \varphi_m \matchQ \psi_m\] if
\[\vdash \left(\bigwedge_{i=1}^m \varphi_i \subseteq \psi_i \right)
\dimp \varphi^{\tau_1} \lor \dots \lor \varphi^{\tau_n}
\]
\end{definition}
\begin{proposition}
For terms $t$ and $s$,
\Cref{def:matching} coincides with the classical definition of term matching.
\end{proposition}
\begin{proposition}
$\varphi$ matches $\psi$ if and only if
\[\vdash \left(\exists \FV{\varphi} \ld \varphi\right)
\subseteq \left(\exists \FV{\psi} \ld \psi\right)
\]
\end{proposition}
\begin{proof}
By \Cref{def:matching}.
\end{proof}
\subsection{Unification}
\begin{definition}
\label{def:unification}
Let $\varphi$ and $\psi$ be two patterns. We say that $\varphi$ \emph{unifies}
with $\psi$ if
\[\vdash \ceil{\left(\exists \FV{\varphi} \ld \varphi\right)
\land \left(\exists \FV{\psi} \ld \psi\right)}\]
We say that $\{\sigma_1,\dots,\sigma_n\}$ is a \emph{complete solution}
to the unification problem
\[\varphi_1 \unifyQ \psi_1, \dots, \varphi_m \unifyQ \psi_m\]
if
\[\vdash \left(\bigwedge_{i=1}^m \ceil{\varphi_i \land \psi_i}\right)
\dimp \varphi^{\tau_1} \lor \dots \lor \varphi^{\tau_n}
\]
\end{definition}
\begin{proposition}
For terms $t$ and $s$,
\Cref{def:unification} coincides with the classical definition of term unification.
\end{proposition}
\subsection{Modulo Theories}
\Cref{def:matching,def:unification} work with underlying theories,
in which case we obtain matching/unification modulo theories.
\section{Transition Systems and Rewriting Relations}
TBA
\section{\K Summarizer Assuming Determinism}
Throughout this section, let us assume that
the underlying formal semantics are deterministic.
Formally, it means that
$\Gamma^L \vdash \onext \varphi \imp \anext \varphi$
for all $\varphi$ of sort $\Cfg$, which is the sort of computation configurations.
We introduce the following notations:
\begin{itemize}\renewcommand\labelitemi{--}
\item $t_1 \To_1 t_2$: one-step rewriting
\item $t_1 \To_n t_2$: $n$-step rewriting
\item $t_1 \To_* t_2$: finite-step rewriting
\item $t_1 \To_+ t_2$: finite-step at-least-one-step rewriting
\end{itemize}
\begin{definition}
\label{def:KCFG}
A \K control-flow graph (abbreviated KCFG)
$G = (V, \Er, \Ea, \Es)$ is a directed graph
with three types of edges, where
\begin{itemize}
\item the vertex set $V$ is a set of patterns of sort $\Cfg$;
\item $\Er \subseteq V \times V$ is called the \emph{rewriting relation};
\item $\Ea \subseteq V \times V$ is called the \emph{abstracting relation};
\item $\Es \subseteq V \times V$ is called the \emph{splitting relation}.
\end{itemize}
We write $\varphi \tor \psi$
($\varphi \toa \psi$ and $\varphi \tos \psi$, resp.)
for the three types of edges.
\end{definition}
\Cref{def:KCFG} defines KCFGs in its most general form
as directed graphs with patterns as its nodes and
three types of edges.
The actual KCFGs that are generated by the \K summarizer
are of a more specific form, which we refer to as \emph{regular} KCFGs.
A regular KCFG has \emph{constrained terms}
of the form $t \land p$ as its nodes,
where $t$ is a term and $p$ is a predicate.
The rewriting relation $\Er$ corresponds to the executions of basic
blocks.
The abstracting relation $\Ea$ corresponds to pattern matching.
The splitting relation $\Es$ corresponds to adding
additional path conditions.
In the following we will formalize regular KCFGs that are constructed by the \K summarizer.
\subsection{Input to the \K Summarizer}
\label{sec:k-summarizer-input}
\paragraph{Formal Semantics.}
Let $S = \{ \lhs_i \To \rhs_i \mid i = 1, \dots, \abs{S} \} $ be the set of semantic rules.
Further, let $\Scut \cup \Sncut$ be a division of $S$ where
rules in $\Scut$ are called \emph{cut rules}.
Rules in $\Sncut$ are called \emph{non-cut rules}.
The left-hand sides of the cut-rules are called \emph{cut patterns}.
The set of all cut patterns is defined as
\[\Pcut = \{\lhs_i \mid \lhs_i \To \rhs_i \in \Scut\}\]
\paragraph{Initial Pattern.}
Let $\ct_0 \equiv t_0 \land p_0$ be the constrained term that represents the initial configuration
that is given to the \K summarizer for which a KCFG is constructed.
\paragraph{Target Patterns.}
Let $\Pt$ be a set of patterns called the \emph{target patterns}.
When a target pattern is reached the \K summarizer stops the construction of the KCFG.
\paragraph{Abstracting Function.}
Let $\absfun$ be the abstracting function
from constrained terms to constrained terms, with the property that
for any $\ct$ there exists a substitution $\sigma$ such that
\[\Gamma^L \vdash \ct = \absfun(\ct)\sigma\]
\subsection{Regular KCFGs}
\begin{definition}
Given the input in \Cref{sec:k-summarizer-input}
A \emph{regular}
KCFG w.r.t. $\ct_0$
\[G = (V, \ct_0, \Er, \Ea, \Es)\]
satisfies the following conditions.
\begin{enumerate}
\item $(V, \Er, \Ea, \Es)$ is a KCFG.
\item $V$ is a set of constrained terms and $\ct_0 \in V$.
\item For every $\ct \in V$, there can be only one type of outgoing edges
(rewriting, abstracting, or splitting).
\item For every $\ct_1 \tor \ct_2$, $\Gamma^L \vdash \ct_1 \To_+ \ct_2$.
Also, $\ct_2$ is the only successor node of $\ct_1$ in $G$.
\item For every $\ct_1 \toa \ct_2$, there exists a substitution $\sigma$ such that $\Gamma^L \vdash \ct_1 = \ct_2\sigma$.
We shall write $\ct_1 \toa^\sigma \ct_2$ to indicate the witness substitution $\sigma$.
Also, $\ct_2$ is the only successor node of $\ct_1$ in $G$.
\item For every $\ct_1 \tos \ct_2$, there exists
$p$ such that $\Gamma^L \vdash \ct_2 = \ct_1 \land p$.
We shall write $\ct_1 \tos^p \ct_2$ to indicate the witness predicate $p$.
\item Let $\ct \in V$ be a node that has outgoing spitting edges
and $\ct_1,\dots,\ct_n$ are all of its ($\Es$-)successors:
\[\ct\tos^{p_1} \ct_1 , \dots , \ct \tos^{p_n} \ct_n\]
Then $\Gamma^L \vdash p_1 \lor \dots \lor p_n$.
\item $(G, \Er \cup \Es)$ forms a tree.
\item There are no consecutive edges of the same type because they can be collapsed (using the \verb|remove_intermediate_covers| routine).
\end{enumerate}
We call the above regular KCFG \emph{complete} if
all the target nodes (i.e., nodes that do not have any outgoing edges)
are target patterns given in $\Pt$.
\end{definition}
\end{document}