Skip to content

Commit

Permalink
Merge pull request #7 from Danchikon/feature/prefix_tree
Browse files Browse the repository at this point in the history
2: trie base operations
  • Loading branch information
danchykon authored Sep 28, 2023
2 parents 63eeff8 + 2f1c20b commit 861075f
Show file tree
Hide file tree
Showing 16 changed files with 309 additions and 31 deletions.
33 changes: 25 additions & 8 deletions cmd/src/main.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
use core::{PipelineBehaviour, Pipeline};
use engine::pipelines::{LowerCasePipelineBehaviour, StemmingPipelineBehaviour, RedundantTokensPipelineBehaviour};
use core::{trie, Pipeline, PipelineBehaviour};
use engine::pipeline::behaviours::{
LowerCasePipelineBehaviour, RedundantTokensPipelineBehaviour, StemmingPipelineBehaviour,
};
use rust_stemmers::{Algorithm, Stemmer};
use std::{collections::HashSet, vec};

fn main() {
let mut tokens = vec![
let tokens = vec![
String::from("vALUes"),
"Muck".to_string(),
"Duck".to_string(),
Expand All @@ -24,18 +26,33 @@ fn main() {
let redundant_tokens = RedundantTokensPipelineBehaviour::new(hash_set);

let behaviours: Vec<Box<dyn PipelineBehaviour>> = vec![
Box::new(lower_case),
Box::new(redundant_tokens),
Box::new(lower_case),
Box::new(redundant_tokens),
Box::new(stemming),
];
];

let pipeline = Pipeline::new(behaviours);

let result = tokens
.iter()
.map(|token| pipeline.execute(token))
.filter_map(|token| token)
.filter_map(|token| pipeline.execute(token.clone()))
.collect::<HashSet<String>>();

let mut tree = trie::Node::root();

let v = "value".to_string();
tree.insert(v);
tree.insert("var".to_string());
tree.insert("val".to_string());
tree.insert("swimming".to_string());
tree.insert("swap".to_string());

println!("var - {}", tree.exists(&"var".to_string()));
println!("vay - {}", tree.exists(&"vay".to_string()));

println!("all - {:?}", tree.values());
println!("count - {:?}", tree.count());
println!("height - {:?}", tree.height());

println!("{:?}", result);
}
6 changes: 4 additions & 2 deletions core/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
mod document;
mod pipeline;
mod pipeline_behaviour;
mod token;
mod tokenizer;
mod pipeline;
mod tests;
pub mod trie;

pub use pipeline::*;
pub use document::*;
pub use pipeline::*;
pub use pipeline_behaviour::*;
pub use token::*;
pub use tokenizer::*;
12 changes: 6 additions & 6 deletions core/src/pipeline.rs
Original file line number Diff line number Diff line change
@@ -1,24 +1,24 @@
use crate::PipelineBehaviour;

pub struct Pipeline {
behaviours: Vec<Box<dyn PipelineBehaviour>>
behaviours: Vec<Box<dyn PipelineBehaviour>>,
}

impl Pipeline {
pub fn new(behaviours: Vec<Box<dyn PipelineBehaviour>>) -> Self {
Pipeline { behaviours }
}

pub fn execute(&self, token: &String) -> Option<String> {
let mut result = Some(token.clone());
pub fn execute(&self, token: String) -> Option<String> {
let mut result = Some(token);

for behaviour in self.behaviours.iter() {
result = match result {
Some(value) => behaviour.execute(&value),
None => None
Some(value) => behaviour.execute(value),
None => None,
};
}

result
}
}
}
2 changes: 1 addition & 1 deletion core/src/pipeline_behaviour.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
pub trait PipelineBehaviour {
fn execute(&self, token: &String) -> Option<String>;
fn execute(&self, token: String) -> Option<String>;
}
1 change: 1 addition & 0 deletions core/src/tests.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
mod trie;
1 change: 1 addition & 0 deletions core/src/tests/trie.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
mod node;
123 changes: 123 additions & 0 deletions core/src/tests/trie/node.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
use crate::trie::Node;


#[test]
fn root_should_create_root_node() {
//Arrange
let node = Node::root();

//Assert
assert!(node.is_root());
assert!(node.is_leaf());
assert!(!node.is_value());
assert_eq!(node.count(), 1);
assert_eq!(node.height(), 1);
}

#[test]
fn new_should_create_node() {
//Arrange
let key = 'a';
let is_value = true;

let node = Node::new(key, is_value);

//Assert
assert!(!node.is_root());
assert!(node.is_leaf());
assert!(node.is_value());
assert_eq!(node.count(), 1);
assert_eq!(node.height(), 1);
}

#[test]
fn count_should_return_count_of_nodes() {
//Arrange
let mut root = Node::root();
let node_1 = Node::new('a', true);
let mut node_2 = Node::new('b', false);
let node_3 = Node::new('c', true);

//Act
node_2.add(node_3).unwrap();
root.add(node_1).unwrap();
root.add(node_2).unwrap();

//Assert
assert_eq!(root.count(), 4);
}

#[test]
fn height_should_return_height_of_trie() {
//Arrange
let mut root = Node::root();
let node_1 = Node::new('a', true);
let mut node_2 = Node::new('b', false);
let node_3 = Node::new('c', true);

//Act
node_2.add(node_3).unwrap();
root.add(node_1).unwrap();
root.add(node_2).unwrap();

//Assert
assert_eq!(root.height(), 3);
}

#[test]
fn add_should_return_ok_when_node_is_not_root() {
//Arrange
let key = 'a';
let is_value = true;

let mut root = Node::root();
let node = Node::new(key, is_value);

//Act
let add_result = root.add(node);

//Assert
assert!(add_result.is_ok());
}

#[test]
fn add_should_return_err_when_node_is_root() {
//Arrange
let mut root = Node::root();
let node = Node::root();

//Act
let add_result = root.add(node);

//Assert
assert!(add_result.is_err());
}

#[test]
fn exists_should_return_true_when_value_is_exist() {
//Arrange
let value = "satisfy".to_string();

let mut root = Node::root();

//Act
root.insert(value.clone());

//Assert
assert!(root.exists(&value));
}

#[test]
fn exists_should_return_false_when_value_is_not_exist() {
//Arrange
let value = "satisfy".to_string();
let value_to_search = "opportunity".to_string();

let mut root = Node::root();

//Act
root.insert(value);

//Assert
assert!(!root.exists(&value_to_search));
}
4 changes: 4 additions & 0 deletions core/src/trie.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
mod node;

pub use node::*;

129 changes: 129 additions & 0 deletions core/src/trie/node.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
use std::collections::HashMap;

pub struct Node {
key: Option<char>,
is_value: bool,
children: HashMap<char, Node>,
}

impl Node {
pub fn new(key: char, is_value: bool) -> Self {
Node {
key: Some(key),
is_value,
children: HashMap::new(),
}
}

pub fn root() -> Self {
Node {
key: None,
is_value: false,
children: HashMap::new(),
}
}

pub fn is_root(&self) -> bool {
self.key.is_none()
}

pub fn is_leaf(&self) -> bool {
self.children.len() == 0
}

pub fn is_value(&self) -> bool {
self.is_value
}

pub fn add(&mut self, node: Self) -> Result<(), ()> {
match node.key {
Some(key) => {
self.children.insert(key, node);

Ok(())
}
None => Err(()),
}
}

pub fn insert(&mut self, mut value: String) {
let len = value.len();

if len > 0 {
let is_value = len == 1;
let first = value.remove(0);

match self.children.get_mut(&first) {
Some(child) => {
child.is_value = is_value;
child.insert(value);
}
None => {
let mut node = Self::new(first, is_value);

node.insert(value);
self.add(node).unwrap();
}
}
}
}

pub fn count(&self) -> u32 {
let mut count = 1;

for child in self.children.values().into_iter() {
count += child.count()
}

count
}

pub fn height(&self) -> u32 {
let mut height = 0;

for child in self.children.values().into_iter() {
height = height.max(child.height())
}

height + 1
}

pub fn child(&self, key: &char) -> Option<&Node> {
self.children.get(key)
}

pub fn values(&self) -> Vec<String> {
let mut values = Vec::<String>::new();

self.values_internal(String::new(), &mut values);

values
}

fn values_internal(&self, current: String, values: &mut Vec<String>) {
for (key, child) in self.children.iter() {
let mut next = current.clone();
next.push(key.clone());

child.values_internal(next, values)
}

if self.is_value() {
values.push(current);
}
}

pub fn exists(&self, value: &String) -> bool {
let mut current = Some(self);

for key in value.chars().into_iter() {
match current {
Some(node) => current = node.children.get(&key),
None => return false,
}
}

current.is_some_and(|node| node.is_value())
}
}

2 changes: 1 addition & 1 deletion engine/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1 +1 @@
pub mod pipelines;
pub mod pipeline;
1 change: 1 addition & 0 deletions engine/src/pipeline.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
pub mod behaviours;
7 changes: 7 additions & 0 deletions engine/src/pipeline/behaviours.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
mod lower_case_pipeline_behaviour;
mod redundant_tokens_pipeline_behaviour;
mod stemming_pipeline_behaviour;

pub use lower_case_pipeline_behaviour::*;
pub use redundant_tokens_pipeline_behaviour::*;
pub use stemming_pipeline_behaviour::*;
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ impl LowerCasePipelineBehaviour {
}

impl PipelineBehaviour for LowerCasePipelineBehaviour {
fn execute(&self, token: &String) -> Option<String> {
fn execute(&self, token: String) -> Option<String> {
Some(token.to_lowercase())
}
}
Loading

0 comments on commit 861075f

Please sign in to comment.