Skip to content

Commit

Permalink
Generate ast when parsing regex
Browse files Browse the repository at this point in the history
  • Loading branch information
exellentcoin26 committed Jul 6, 2023
1 parent 63d7dd2 commit 8f6d7e3
Show file tree
Hide file tree
Showing 2 changed files with 128 additions and 85 deletions.
32 changes: 20 additions & 12 deletions src/regex/ast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,36 +12,44 @@
//! character_group_item ::= CHARACTER_CLASS | character_range | CHARACTER;
//! ```

use std::collections::HashSet;

use super::tokenizer;

/// Regular expression ast.
#[derive(Debug)]
pub enum Expr {
#[derive(Debug, Clone)]
pub enum ExprKind {
/// Concatenation of regular expressions.
Concat(Vec<ExprKind>),
/// An empty regex expresion.
Empty,
/// An alternative expression (e.g., `<expression> | <expression>`).
Alt(Box<Expr>, Box<Expr>),
/// A literal (e.g., `a`, `[^ca]`, `[a-z]`).
Lit(LiteralKind),
/// A quantified expresion (e.g., `(ab[ac]){3,}`, `[0-1]*`).
Quant(Box<Expr>, Option<tokenizer::QuantifierKind>),
Alt(Box<ExprKind>, Box<ExprKind>),
/// A literal (e.g., `a`, `[^ca]`, `[a-z]`, `[0-1]*`).
Lit(LiteralKind, Option<tokenizer::QuantifierKind>),
/// A grouped expression (e.g., `([a-z] | foo)`, `(ab[ac]){3,}`).
Group(Box<ExprKind>, Option<tokenizer::QuantifierKind>),
}

/// Literal kind that appears in an expression (e.g., `b`, `[^ab]`).
#[derive(Debug)]
#[derive(Debug, Clone)]
pub enum LiteralKind {
/// A single token (unicode character constructs can consist multiple characters).
Match(char),
/// A shorthand for character groups (e.g., `\w`, `\D`, `.`).
Class(tokenizer::ClassKind),
/// A group of characters (e.g. `[^a-cl47i]`).
Group {
negated: bool,
kind: HashSet<GroupedLiteralKind>,
literals: Vec<GroupedLiteralKind>,
},
}

/// literal that appears in a group.
#[derive(Debug)]
#[derive(Debug, Clone, Copy)]
pub enum GroupedLiteralKind {
/// A single token (unicode character constructs can consist multiple characters).
Match(char),
/// A shorthand for character groups (e.g., `\w`, `\D`, `.`).
Class(tokenizer::ClassKind),
/// A character range (e.g., `0-1`, `a-z`).
Range(char, char),
}
Loading

0 comments on commit 8f6d7e3

Please sign in to comment.