Skip to content

Commit

Permalink
Add Nfa::to_dot implementation
Browse files Browse the repository at this point in the history
  • Loading branch information
exellentcoin26 committed Jul 15, 2023
1 parent 0e47d39 commit 20de61d
Show file tree
Hide file tree
Showing 2 changed files with 86 additions and 19 deletions.
103 changes: 85 additions & 18 deletions src/fsm/nfa.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
#![allow(unused)]

use crate::regex::{
ast::{self, Ast, ExprKind},
tokenizer::QuantifierKind,
Expand Down Expand Up @@ -34,6 +32,50 @@ impl Nfa {
fn builder(start_state_final: bool) -> NfaBuilder {
NfaBuilder::new(start_state_final)
}

fn get_final_states(&self) -> impl Iterator<Item = StateId> + '_ {

Check warning on line 36 in src/fsm/nfa.rs

View workflow job for this annotation

GitHub Actions / clippy

methods `get_final_states` and `to_dot` are never used

warning: methods `get_final_states` and `to_dot` are never used --> src/fsm/nfa.rs:36:8 | 31 | impl Nfa { | -------- methods in this implementation ... 36 | fn get_final_states(&self) -> impl Iterator<Item = StateId> + '_ { | ^^^^^^^^^^^^^^^^ ... 43 | fn to_dot(&self) -> String { | ^^^^^^
self.states
.iter()
.filter_map(|s| if s.fin { Some(s.id) } else { None })
}

/// Converts the NFA to dot language using the grahviz dot language format.
fn to_dot(&self) -> String {
let transition_dot = self
.states
.iter()
.flat_map(|state| {
state
.transitions
.iter()
.flat_map(move |(input, dest_states)| {
dest_states.iter().map(move |dest| {
format!("{} -> {} [label = \"{:?}\"]\n", state.id, dest, input)
})
})
})
.collect::<String>();

let final_dot = format!(
"node [shape = doublecircle]; {}",
self.get_final_states()
.map(|s| s.to_string())
.collect::<Vec<String>>()
.join(" ")
);

format!(
"digraph nfa {{\n\
\trankdir = LR;\n\
\n\
\t{}\n\
\tnode [shape = circle]
\n\
\t{}\n\
}}",
final_dot, transition_dot
)
}
}

impl State {
Expand Down Expand Up @@ -100,11 +142,10 @@ impl NfaBuilder {
}
}

fn get_final_states(&self) -> Vec<StateId> {
fn get_final_states(&self) -> impl Iterator<Item = StateId> + '_ {
self.states
.iter()
.filter_map(|s| if s.fin { Some(s.id) } else { None })
.collect()
}

fn with_state(mut self, fin: bool) -> Self {
Expand Down Expand Up @@ -153,17 +194,13 @@ impl Compiler {
}

fn compile(mut self, expr: &ExprKind) -> Nfa {
self.expr(
expr,
self.nfa.start_state,
Some(
*self
.nfa
.get_final_states()
.first()
.expect("exected at least one final state for the NFA to start with"),
),
);
let end_state = self
.nfa
.get_final_states()
.next()
.expect("exected at least one final state for the NFA to start with");

self.expr(expr, self.nfa.start_state, Some(end_state));
self.nfa.build()
}

Expand Down Expand Up @@ -194,13 +231,43 @@ impl Compiler {
self.expr(lhs, start, end);
self.expr(rhs, start, end);
}
ExprKind::Lit(lit, quantifier) => {
ExprKind::Lit(lit, _quantifier) => {
// TODO: Decide on how to implement quantification of states. Right now I think it
// might be possible to combine quantifiers and take min/max values of the range
// values to decide the new quantifier.
todo!()

let dest_state = match end {
Some(dest) => dest,
None => self.nfa.add_state(false),
};

self.nfa.add_transition(start, dest_state, lit.clone());
}
ExprKind::Group(expr, _quantifier) => {
// TODO: Decide on how to implement quantification of expressions. A quantification
// can be expressed as a wrapped expression with a gateway state that counts how
// many times it is passed and can both go to the end state for the quantification
// wrapper and redo the expression when the quantification is still or not yet
// valid.

self.expr(expr, start, end);
}
ExprKind::Group(expr, quantifier) => todo!(),
}
}
}

#[cfg(test)]
mod foo {
use super::Nfa;
use crate::regex::ast::LiteralKind;

#[test]
fn to_dot() {
let nfa = Nfa::builder(false)
.with_state(true)
.with_transition(0, 1, LiteralKind::Match('a'))
.build();

println!("{}", nfa.to_dot());
}
}
2 changes: 1 addition & 1 deletion src/regex/ast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ pub(crate) enum LiteralKind {
}

/// literal that appears in a group.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
pub(crate) enum GroupedLiteralKind {
/// A single token (unicode character constructs can consist multiple characters).
Match(char),

Check warning on line 53 in src/regex/ast.rs

View workflow job for this annotation

GitHub Actions / clippy

variants `Match`, `Class`, and `Range` are never constructed

warning: variants `Match`, `Class`, and `Range` are never constructed --> src/regex/ast.rs:53:5 | 51 | pub(crate) enum GroupedLiteralKind { | ------------------ variants in this enum 52 | /// A single token (unicode character constructs can consist multiple characters). 53 | Match(char), | ^^^^^ 54 | /// A shorthand for character groups (e.g., `\w`, `\D`, `.`). 55 | Class(tokenizer::ClassKind), | ^^^^^ 56 | /// A character range (e.g., `0-1`, `a-z`). 57 | Range(char, char), | ^^^^^ | = note: `GroupedLiteralKind` has derived impls for the traits `Clone` and `Debug`, but these are intentionally ignored during dead code analysis
Expand Down

0 comments on commit 20de61d

Please sign in to comment.