Skip to content

Commit

Permalink
illustrate levenshtein alignments
Browse files Browse the repository at this point in the history
  • Loading branch information
breandan committed Feb 28, 2024
1 parent 49c1a31 commit 1b29790
Show file tree
Hide file tree
Showing 4 changed files with 169 additions and 31 deletions.
55 changes: 28 additions & 27 deletions latex/splash2024/nfa_cfg.tex
Original file line number Diff line number Diff line change
Expand Up @@ -13,21 +13,21 @@
\node[accepting, state, right of=30] (40) {$q_{4,0}$};
\node[accepting, state, right of=40] (50) {$q_{5,0}$};

\node[state, above of=00, shift={(-1.5cm,-0.0cm)}] (01) {$q_{0,1}$};
\node[state, above of=00, shift={(-2cm,0cm)}] (01) {$q_{0,1}$};
\node[state, right of=01] (11) {$q_{1,1}$};
\node[state, right of=11] (21) {$q_{2,1}$};
\node[accepting, state, right of=21] (31) {$q_{3,1}$};
\node[accepting, state, right of=31] (41) {$q_{4,1}$};
\node[accepting, state, right of=41] (51) {$q_{5,1}$};

\node[state, above of=01, shift={(-1.5cm,-0.0cm)}] (0j) {$q_{0,2}$};
\node[state, above of=01, shift={(-2cm,0cm)}] (0j) {$q_{0,2}$};
\node[state, right of=0j] (1j) {$q_{1,2}$};
\node[state, right of=1j] (2j) {$q_{2,2}$};
\node[state, right of=2j] (3j) {$q_{3,2}$};
\node[accepting, state, right of=3j] (4j) {$q_{4,2}$};
\node[accepting, state, right of=4j] (5j) {$q_{5,2}$};

\node[state, above of=0j, shift={(-1.5cm,0.0cm)}] (0k) {$q_{0,3}$};
\node[state, above of=0j, shift={(-2cm,0cm)}] (0k) {$q_{0,3}$};
\node[state, right of=0k] (1k) {$q_{1,3}$};
\node[state, right of=1k] (2k) {$q_{2,3}$};
\node[state, right of=2k] (3k) {$q_{3,3}$};
Expand All @@ -41,9 +41,9 @@
\draw [->] (40) edge[below] node{$\sigma_5$} (50);

\draw [->] (01) edge[below] node{$\sigma_1$} (11);
\draw [->] (11) edge[below] node{$\sigma_2$} (21);
\draw [->] (21) edge[below] node{$\sigma_3$} (31);
\draw [->] (31) edge[below] node{$\sigma_4$} (41);
\draw [->] (11) edge[below] node[shift={(-0.2cm,0cm)}]{$\sigma_2$} (21);
\draw [->] (21) edge[below] node[shift={(-0.2cm,0cm)}]{$\sigma_3$} (31);
\draw [->] (31) edge[below] node[shift={(-0.2cm,0cm)}]{$\sigma_4$} (41);
\draw [->] (41) edge[below] node{$\sigma_5$} (51);

\draw [->] (0j) edge[below] node{$\sigma_1$} (1j);
Expand All @@ -65,15 +65,16 @@
\draw [->] (40) edge[left] node{$\phantom{\cdot}$} (51);

% Super-knight arcs
\draw [->, red] (00) edge[bend left=3] node[east, shift={(-0.3cm,-0.65cm)}]{$\color{red}\sigma_3$} (3j);
\draw [->, red] (10) edge[bend left=3] node[east, shift={(-0.3cm,-0.65cm)}]{$\color{red}\sigma_4$} (4j);
\draw [->, red] (20) edge[bend left=3] node[east, shift={(-0.3cm,-0.65cm)}]{$\color{red}\sigma_5$} (5j);
\draw [->, red] (01) edge[bend left=3] node[east, shift={(-0.3cm,-0.65cm)}]{$\color{red}\sigma_3$} (3k);
\draw [->, red] (11) edge[bend left=3] node[east, shift={(-0.3cm,-0.65cm)}]{$\color{red}\sigma_4$} (4k);
\draw [->, red] (21) edge[bend left=3] node[east, shift={(-0.3cm,-0.65cm)}]{$\color{red}\sigma_5$} (5k);
\draw [->, red] (00) edge[bend right=8] node[east, shift={(-0.2cm,-0.7cm)}]{$\color{red}\sigma_3$} (3j);
\draw [->, red] (10) edge[bend right=8] node[east, shift={(-0.2cm,-0.7cm)}]{$\color{red}\sigma_4$} (4j);
\draw [->, red] (20) edge[bend right=8] node[east, shift={(-0.2cm,-0.7cm)}]{$\color{red}\sigma_5$} (5j);

\draw [->, violet] (00) edge[bend right=1] node[east, shift={(0cm,0.35cm)}]{$\color{violet}\sigma_4$} (4k);
\draw [->, violet] (10) edge[bend right=1] node[east, shift={(0cm,0.35cm)}]{$\color{violet}\sigma_5$} (5k);
\draw [->, red] (01) edge[bend left=8] node[east, shift={(-0.2cm,-0.7cm)}]{$\color{red}\sigma_3$} (3k);
\draw [->, red] (11) edge[bend left=8] node[east, shift={(-0.2cm,-0.7cm)}]{$\color{red}\sigma_4$} (4k);
\draw [->, red] (21) edge[bend left=8] node[east, shift={(-0.2cm,-0.7cm)}]{$\color{red}\sigma_5$} (5k);

\draw [->, violet] (00) edge node[east, shift={(-0.1cm,-0.8cm)}]{$\color{violet}\sigma_4$} (4k);
\draw [->, violet] (10) edge node[east, shift={(-0.1cm,-0.8cm)}]{$\color{violet}\sigma_5$} (5k);

\draw [->] (01) edge[left] node{$\phantom{\cdot}$} (1j);
\draw [->] (11) edge[left] node{$\phantom{\cdot}$} (2j);
Expand Down Expand Up @@ -108,23 +109,23 @@
\draw [->] (4j) edge[bend left=10, left] node{$\phantom{\cdot}$} (4k);
\draw [->] (5j) edge[bend left=10, left] node{$\phantom{\cdot}$} (5k);

\draw [->, blue] (00) edge[bend left=2,below] node[shift={(-0.7cm,-0.5cm)}]{$\color{blue}\sigma_2$} (21);
\draw [->, blue] (10) edge[bend left=2,below] node[shift={(-0.7cm,-0.5cm)}]{$\color{blue}\sigma_3$} (31);
\draw [->, blue] (20) edge[bend left=2,below] node[shift={(-0.7cm,-0.5cm)}]{$\color{blue}\sigma_4$} (41);
\draw [->, blue] (30) edge[bend left=2,below] node[shift={(-0.7cm,-0.5cm)}]{$\color{blue}\sigma_5$} (51);
\draw [->, blue] (00) edge[bend right=11,below] node[shift={(0.5cm,0.3cm)}]{$\color{blue}\sigma_2$} (21);
\draw [->, blue] (10) edge[bend right=11,below] node[shift={(0.5cm,0.3cm)}]{$\color{blue}\sigma_3$} (31);
\draw [->, blue] (20) edge[bend right=11,below] node[shift={(0.5cm,0.3cm)}]{$\color{blue}\sigma_4$} (41);
\draw [->, blue] (30) edge[bend right=11,below] node[shift={(0.5cm,0.3cm)}]{$\color{blue}\sigma_5$} (51);

\draw [->, blue] (01) edge[bend left=2,below] node[shift={(-0.7cm,-0.5cm)}]{$\color{blue}\sigma_2$} (2j);
\draw [->, blue] (11) edge[bend left=2,below] node[shift={(-0.7cm,-0.5cm)}]{$\color{blue}\sigma_3$} (3j);
\draw [->, blue] (21) edge[bend left=2,below] node[shift={(-0.7cm,-0.5cm)}]{$\color{blue}\sigma_4$} (4j);
\draw [->, blue] (31) edge[bend left=2,below] node[shift={(-0.7cm,-0.5cm)}]{$\color{blue}\sigma_4$} (5j);
\draw [->, blue] (01) edge[bend right=3,below] node[shift={(0.3cm,0.2cm)}]{$\color{blue}\sigma_2$} (2j);
\draw [->, blue] (11) edge[bend right=3,below] node[shift={(0.3cm,0.2cm)}]{$\color{blue}\sigma_3$} (3j);
\draw [->, blue] (21) edge[bend right=3,below] node[shift={(0.3cm,0.2cm)}]{$\color{blue}\sigma_4$} (4j);
\draw [->, blue] (31) edge[bend right=3,below] node[shift={(0.3cm,0.2cm)}]{$\color{blue}\sigma_4$} (5j);

\draw [->, blue] (0j) edge[bend left=2,below] node[shift={(-0.7cm,-0.5cm)}]{$\color{blue}\sigma_2$} (2k);
\draw [->, blue] (1j) edge[bend left=2,below] node[shift={(-0.7cm,-0.5cm)}]{$\color{blue}\sigma_3$} (3k);
\draw [->, blue] (2j) edge[bend left=2,below] node[shift={(-0.7cm,-0.5cm)}]{$\color{blue}\sigma_4$} (4k);
\draw [->, blue] (3j) edge[bend left=2,below] node[shift={(-0.7cm,-0.5cm)}]{$\color{blue}\sigma_5$} (5k);
\draw [->, blue] (0j) edge[bend left=8,below] node[shift={(-0.45cm,-0.55cm)}]{$\color{blue}\sigma_2$} (2k);
\draw [->, blue] (1j) edge[bend left=8,below] node[shift={(-0.45cm,-0.55cm)}]{$\color{blue}\sigma_3$} (3k);
\draw [->, blue] (2j) edge[bend left=8,below] node[shift={(-0.45cm,-0.55cm)}]{$\color{blue}\sigma_4$} (4k);
\draw [->, blue] (3j) edge[bend left=8,below] node[shift={(-0.45cm,-0.55cm)}]{$\color{blue}\sigma_5$} (5k);

%https://tex.stackexchange.com/a/20986/139648
\draw [decorate,decoration={brace,amplitude=10pt,raise=10pt,mirror}] (00.south west) -- (50.south east) node[midway,yshift=-3em]{\textbf{String length}};
\draw [decorate,decoration={brace,amplitude=10pt,raise=20pt}] (00.south west) -- (0k.north west) node[midway,xshift=-1.2cm,yshift=-0.6cm,rotate=-60]{\textbf{Edit distance}};
\draw [decorate,decoration={brace,amplitude=10pt,raise=20pt}] (00.south west) -- (0k.north west) node[midway,xshift=-1cm,yshift=-1cm,rotate=-54]{\textbf{Edit distance}};
\end{tikzpicture}
}
6 changes: 4 additions & 2 deletions latex/splash2024/preamble.tex
Original file line number Diff line number Diff line change
Expand Up @@ -387,9 +387,11 @@

\newcommand{\duparrow}{
\tikz{
\fill (0pt,0pt) circle [radius = 1pt];
\fill[white] (0pt,0pt) circle [radius = 1pt];
\fill (6pt,0pt) circle [radius = 1pt];
\fill (0pt,6pt) circle [radius = 1pt];
\draw [-to] (0pt,0pt) -- (0pt,6pt);
\fill[white] (6pt,6pt) circle [radius = 1pt];
\draw [-to] (6pt,0pt) -- (0pt,6pt);
}
}

Expand Down
Binary file modified latex/splash2024/splash.pdf
Binary file not shown.
139 changes: 137 additions & 2 deletions latex/splash2024/splash.tex
Original file line number Diff line number Diff line change
Expand Up @@ -344,7 +344,7 @@

\begin{figure}[H]
\begin{center}
\resizebox{.9\textwidth}{!}{\input{nfa_cfg.tex}}
\input{nfa_cfg.tex}
\end{center}
\caption{Automaton recognizing Levenshtein $\Delta(\sigma: \Sigma^5, 3)$ reachability. Unlabeled arcs accept any terminal.}
\end{figure}
Expand Down Expand Up @@ -383,7 +383,142 @@
\BinaryInfC{$q_{i, j}\in F$}
\end{prooftree}

\noindent These rewrite rules can generate a large grammar whose cardinality is approximated by $|P_\cap|=|I||F| + |\delta| + |P||Q|^3$. Since it is so large, materializing the grammar directly can be expensive, however instead of materializing it directly, we can lazily enumerate its inhabitants by performing a kind of proof search.
\newcommand{\substitutionExample}{
\tikz{
\foreach \x in {0,8,16,24,32,40}{
\fill (\x pt,0pt) circle [radius = 1pt];
\fill (\x pt,8pt) circle [radius = 1pt];
}
\phantom{\fill (0pt,-8pt) circle [radius = 1pt];}
\draw [-to] (0pt,0pt) -- (8pt,0pt);
\draw [-to] (8pt,0pt) -- (16pt,0pt);
\draw [-to] (16pt,0pt) -- (24pt,8pt);
\draw [-to] (24pt,8pt) -- (32pt,8pt);
\draw [-to] (32pt,8pt) -- (40pt,8pt);
}
}

\newcommand{\insertionExample}{
\tikz{
\foreach \x in {0,8,16,24,32,40}{
\fill (\x pt,0pt) circle [radius = 1pt];
\fill (\x pt,8pt) circle [radius = 1pt];
}
\phantom{\fill (0pt,-8pt) circle [radius = 1pt];}
\fill[white] (16pt,0pt) circle [radius = 1.2pt];
\fill[white] (24pt,8pt) circle [radius = 1.2pt];
\draw [-to] (0pt,0pt) -- (8pt,0pt);
\draw [-to] (8pt,0pt) -- (24pt,0pt);
\draw [-to] (24pt,0pt) -- (16pt,8pt);
\draw [-to] (16pt,8pt) -- (32pt,8pt);
\draw [-to] (32pt,8pt) -- (40pt,8pt);
}
}

\newcommand{\deletionExample}{
\tikz{
\foreach \x in {0,8,16,24,32,40}{
\fill (\x pt,0pt) circle [radius = 1pt];
\fill (\x pt,8pt) circle [radius = 1pt];
}
\phantom{\fill (0pt,-8pt) circle [radius = 1pt];}
\draw [-to] (0pt,0pt) -- (8pt,0pt);
\draw [-to] (8pt,0pt) -- (16pt,0pt);
\draw [-to] (16pt,0pt) -- (24pt,0pt);
\draw [-to] (24pt,0pt) -- (40pt,8pt);
}
}

\newcommand{\doubleDeletionExample}{
\tikz{
\foreach \x in {0,8,16,24,32,40}{
\fill (\x pt,0pt) circle [radius = 1pt];
\fill (\x pt,8pt) circle [radius = 1pt];
\fill (\x pt,16pt) circle [radius = 1pt];
}
\draw [-to] (0pt,0pt) -- (24pt,16pt);
\draw [-to] (24pt,16pt) -- (32pt,16pt);
\draw [-to] (32pt,16pt) -- (40pt,16pt);
}
}

\newcommand{\subDelExample}{
\tikz{
\foreach \x in {0,8,16,24,32,40}{
\fill (\x pt,0pt) circle [radius = 1pt];
\fill (\x pt,8pt) circle [radius = 1pt];
\fill (\x pt,16pt) circle [radius = 1pt];
}
\draw [-to] (0pt,0pt) -- (8pt,0pt);
\draw [-to] (8pt,0pt) -- (16pt,8pt);
\draw [-to] (16pt,8pt) -- (32pt,16pt);
\draw [-to] (32pt,16pt) -- (40pt,16pt);
}
}

\newcommand{\subSubExample}{
\tikz{
\foreach \x in {0,8,16,24,32,40}{
\fill (\x pt,0pt) circle [radius = 1pt];
\fill (\x pt,8pt) circle [radius = 1pt];
\fill (\x pt,16pt) circle [radius = 1pt];
}
\draw [-to] (0pt,0pt) -- (8pt,0pt);
\draw [-to] (8pt,0pt) -- (8pt,8pt);
\draw [-to] (8pt,8pt) -- (16pt,8pt);
\draw [-to] (16pt,8pt) -- (24pt,16pt);
\draw [-to] (24pt,16pt) -- (32pt,16pt);
\draw [-to] (32pt,16pt) -- (40pt,16pt);
}
}

\newcommand{\insertDeleteExample}{
\tikz{
\foreach \x in {0,8,16,24,32,40,48}{
\fill (\x pt,0pt) circle [radius = 1pt];
\fill (\x pt,8pt) circle [radius = 1pt];
\fill (\x pt,16pt) circle [radius = 1pt];
}
\fill[white] (16pt,16pt) circle [radius = 1.2pt];
\fill[white] (8pt,0pt) circle [radius = 1.2pt];
\fill[white] (16pt,8pt) circle [radius = 1.2pt];
\draw [-to] (0pt,0pt) -- (16pt,0pt);
\draw [-to] (16pt,0pt) -- (8pt,8pt);
\draw [-to] (8pt,8pt) -- (24pt,8pt);
\draw [-to] (24pt,8pt) -- (32pt,16pt);
\draw [-to] (32pt,16pt) -- (40pt,16pt);
\draw [-to] (40pt,16pt) -- (48pt,16pt);
}
}

Each arc plays a specific role. $\duparrow$ handles insertions, $\ddiagarrow$ handles substitutions, $\duparrow$ handles insertions and $\knightarrow$ handles deletions of various lengths. Let us consider some illustrative examples.

\begin{table}[h!]
\begin{tabular}{ccccccc}

\texttt{f\hspace{3pt}.\hspace{3pt}\hlorange{[}\hspace{3pt}x\hspace{3pt})} &
\texttt{f\hspace{3pt}.\hspace{3pt}\phantom{(}\hspace{3pt}x\hspace{3pt})} &
\texttt{f\hspace{3pt}.\hspace{3pt}(\hspace{3pt}\hlred{x}\hspace{3pt})} &
\texttt{\hlred{.}\hspace{3pt}\hlred{+}\hspace{3pt}(\hspace{3pt}x\hspace{3pt})} &
\texttt{f\hspace{3pt}\hlorange{.}\hspace{3pt}\hlred{(}\hspace{3pt}x\hspace{3pt};} &
\texttt{[\hspace{3pt}\hlorange{,}\hspace{3pt}\hlorange{x}\hspace{3pt}y\hspace{3pt}]} &
\texttt{[\hspace{3pt}\phantom{,}\hspace{3pt},\hspace{3pt}\hlred{x}\hspace{3pt}y\hspace{3pt}]} \\

\texttt{f\hspace{3pt}.\hspace{3pt}\hlorange{(}\hspace{3pt}x\hspace{3pt})} &
\texttt{f\hspace{3pt}.\hspace{3pt}\hlgreen{(}\hspace{3pt}x\hspace{3pt})} &
\texttt{f\hspace{3pt}.\hspace{3pt}(\hspace{3pt}\phantom{x}\hspace{3pt})} &
\texttt{\phantom{f}\hspace{3pt}\phantom{.}\hspace{3pt}(\hspace{3pt}x\hspace{3pt})} &
\texttt{f\hspace{3pt}\hlorange{*}\hspace{3pt}\phantom{(}\hspace{3pt}x\hspace{3pt})} &
\texttt{[\hspace{3pt}\hlorange{x}\hspace{3pt}\hlorange{,}\hspace{3pt}y\hspace{3pt}]} &
\texttt{[\hspace{3pt}\hlgreen{x}\hspace{3pt},\hspace{3pt}\phantom{x}\hspace{3pt}y\hspace{3pt}]} \\

\substitutionExample & \insertionExample & \deletionExample & \doubleDeletionExample & \subDelExample & \subSubExample & \insertDeleteExample
\end{tabular}
\end{table}

Note that the same edit can have multiple Levenshtein alignments. $\textsc{Done}$ constructs the final states, which are all states accepting strings $\sigma'$ such that Levenshtein distance of $\Delta(\sigma, \sigma') \leq d_\max$.

% \noindent These rewrite rules can generate a large grammar whose cardinality is approximated by $|P_\cap|=|I||F| + |\delta| + |P||Q|^3$. Since it is so large, materializing the grammar directly can be expensive, however instead of materializing it directly, we can lazily enumerate its inhabitants by performing a kind of proof search.

\subsection{Levenshtein Bar-Hillel Specialization}

Expand Down

0 comments on commit 1b29790

Please sign in to comment.