Skip to content

Commit

Permalink
feat: improve alphabet definition
Browse files Browse the repository at this point in the history
  • Loading branch information
paoloose committed Oct 2, 2023
1 parent c2f1cad commit dfc6aa2
Show file tree
Hide file tree
Showing 4 changed files with 78 additions and 8 deletions.
2 changes: 1 addition & 1 deletion assignments/week1/logic-parser/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion assignments/week1/logic-parser/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "logic-parser"
version = "1.1.1"
version = "1.2.0"
description = "A simple lexer & parser for logical expressions that supports output as AST, JSON and SVG"
edition = "2021"
homepage = "https://github.com/paoloose/discrete-mathematics/tree/main/assignments/week1/logic-parser"
Expand Down
68 changes: 62 additions & 6 deletions assignments/week1/logic-parser/src/lexing/lexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,18 @@ pub const DEFAULT_ALPHABET: fn(char) -> bool = |c| { char::is_alphanumeric(c) ||
pub const DEFAULT_START_ALPHABET: fn(char) -> bool = |c| { char::is_alphabetic(c) || c == '_' };

impl<'a> Lexer<'a> {
/// Creates a new Lexer with the default alphabet.
///
/// This is conceptually equivalent of doing
///
/// ```
/// use logic_parser::lexing::Lexer;
///
/// let mut lexer = Lexer::with_alphabets(
/// |c| c.is_alphanumeric() || c == '_',
/// |c| c.is_alphabetic() || c == '_'
/// );
/// ```
pub fn new() -> Self {
Lexer {
is_in_alphabet: DEFAULT_ALPHABET,
Expand All @@ -24,7 +36,32 @@ impl<'a> Lexer<'a> {
}
}

pub fn with_alphabet(alphabet: fn(char) -> bool, start_chars_alphabet: fn(char) -> bool) -> Self {
/// This allows you to define a custom alphabet for the lexer.
///
/// ```
/// use logic_parser::lexing::Lexer;
/// use logic_parser::parsing::{Parser, ASTNode};
///
/// let query = "(tag:pink || tag:anime) && (mime:image/* || mime:video/*)";
/// let mut lexer = Lexer::with_alphabets(
/// |c| c.is_alphanumeric() || c == '_' || c == ':' || c == '*' || c == '/',
/// |c| c.is_alphabetic(),
/// );
///
/// let tokens = lexer.tokenize(query).unwrap();
///
/// let mut parser = Parser::new(&tokens);
/// parser.parse().unwrap();
/// ```
///
/// # WARNING
///
/// Be aware of the following:
///
/// - Creating an alphabet such that the `start_chars_alphabet` contains `alphabet` plus
/// some other characters is **undefined behaviour**. It will probably loop forever.
///
pub fn with_alphabets(alphabet: fn(char) -> bool, start_chars_alphabet: fn(char) -> bool) -> Self {
Lexer {
is_in_alphabet: alphabet,
is_in_start_chars_alphabet: start_chars_alphabet,
Expand All @@ -33,6 +70,22 @@ impl<'a> Lexer<'a> {
}
}

/// Creates a lexer that uses the same alphabet for the start characters and the rest.
///
/// Equivalent to doing
///
/// ```
/// use logic_parser::lexing::Lexer;
///
/// let custom_alphabet = |c: char| c.is_alphanumeric() || c == '_';
///
/// let mut lexer = Lexer::with_alphabets(custom_alphabet, custom_alphabet);
/// lexer.tokenize("_puppies_").unwrap();
/// ```
pub fn with_alphabet(alphabet: fn(char) -> bool) -> Self {
Self::with_alphabets(alphabet, alphabet)
}

pub fn tokenize(&mut self, src: &'a str) -> Result<Vec<Token>> {
let mut tokens = Vec::new();
self.src = src;
Expand Down Expand Up @@ -126,10 +179,7 @@ impl<'a> Lexer<'a> {
}

fn skip_whitespaces(&mut self) -> usize {
self.take_while(|c| {
let result = c == '\t' || c == ' ' || c == '\r';
result
})
self.take_while(|c| c == '\t' || c == ' ' || c == '\r')
}

fn take_while<F>(&mut self, pred: F) -> usize
Expand All @@ -147,6 +197,12 @@ impl<'a> Lexer<'a> {
}
}

impl<'a> Default for Lexer<'a> {
fn default() -> Self {
Self::new()
}
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down Expand Up @@ -199,7 +255,7 @@ mod tests {

#[test]
fn error_is_returned_when_alphabet_doesnt_match() {
let mut lexer = Lexer::with_alphabet(
let mut lexer = Lexer::with_alphabets(
|c| ['a', 'b', 'c', '1', '2', '3'].contains(&c),
|c| ['a', 'b', 'c'].contains(&c)
);
Expand Down
14 changes: 14 additions & 0 deletions assignments/week1/logic-parser/src/parsing/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -297,4 +297,18 @@ mod test {
_ => unreachable!()
}
}

#[test]
fn parsing_custom_expressions() {
let query = "(tag:pink || tag:anime) && (mime:image/* || mime:video/*)";
let mut lexer = Lexer::with_alphabets(
|c| c.is_alphanumeric() || c == '_' || c == ':' || c == '*' || c == '/',
|c| c.is_alphabetic(),
);

let tokens = lexer.tokenize(query).unwrap();

let mut parser = Parser::new(&tokens);
parser.parse().unwrap();
}
}

0 comments on commit dfc6aa2

Please sign in to comment.