Skip to content

Commit

Permalink
feat: implement the non-proc macro version of resolve_tokens
Browse files Browse the repository at this point in the history
  • Loading branch information
psteinroe committed Sep 22, 2023
1 parent f25f23a commit 50032f2
Show file tree
Hide file tree
Showing 5 changed files with 185 additions and 1 deletion.
30 changes: 30 additions & 0 deletions crates/parser/src/get_children_codegen.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
use codegen::get_children;

get_children!();

#[cfg(test)]
mod tests {
use crate::get_children_codegen::get_children;

#[test]
fn test_get_children() {
let input = "with c as (insert into contact (id) values ('id')) select * from c;";

let pg_query_root = match pg_query::parse(input) {
Ok(parsed) => Some(
parsed
.protobuf
.nodes()
.iter()
.find(|n| n.1 == 1)
.unwrap()
.0
.to_enum(),
),
Err(_) => None,
};

let children = get_children(&pg_query_root.unwrap(), input.to_string(), 1);
assert_eq!(children.len(), 13);
}
}
3 changes: 3 additions & 0 deletions crates/parser/src/get_location_codegen.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
use codegen::get_location;

get_location!();
3 changes: 3 additions & 0 deletions crates/parser/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,10 @@
//! To see how these drawbacks are mitigated, see the `statement.rs` and the `source_file.rs` module.
mod ast_node;
mod get_children_codegen;
mod get_location_codegen;
mod parser;
mod resolve_tokens;
mod sibling_token;
mod source_parser;
mod statement_parser;
Expand Down
141 changes: 141 additions & 0 deletions crates/parser/src/resolve_tokens.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
use crate::get_children_codegen::ChildrenNode;
use crate::get_location_codegen::get_location;
use cstree::text::{TextRange, TextSize};
use pg_query::{protobuf::ScanToken, NodeEnum};

// all tokens of a node beneath it
// get estimation for each node location from tokens
// and also node range
//
// how to handle tokens that cannot be put beneath node based on the ast?
// pass token -> if not beneath current node, apply immediately

#[derive(Debug, Clone)]
pub struct NestedNode {
pub node: NodeEnum,
pub depth: i32,
pub path: String,
pub tokens: Vec<i32>,
pub range: TextRange,
}

/// Turns a `Vec<ChildrenNode>` into a `Vec<NestedNode>` by adding `tokens` and `range` to each node.
///
/// For each node, we walk all properties and search for tokens that match the property value. The
/// token that is closest to the node or a parent is used.
///
/// The node range is the minimum start and maximum end of all tokens.
pub fn resolve_tokens(
children: &Vec<ChildrenNode>,
tokens: &Vec<ScanToken>,
text: &str,
) -> Vec<NestedNode> {
children
.iter()
.map(|c| {
let nearest_parent_location = get_nearest_parent_location(&c, children);
let furthest_child_location = get_furthest_child_location(&c, children);

let mut child_tokens = Vec::new();

let mut find_token = |property: String| {
child_tokens.push(
tokens
.iter()
.filter_map(|t| {
if get_token_text(
usize::try_from(t.start).unwrap(),
usize::try_from(t.end).unwrap(),
text,
) != property
{
return None;
}

if furthest_child_location.is_some()
&& furthest_child_location.unwrap() < t.start as i32
{
return None;
}

let distance = t.start - nearest_parent_location;
if distance > 0 {
Some((distance, t))
} else {
None
}
})
.min_by_key(|(d, _)| d.to_owned())
.map(|(_, t)| t)
.unwrap(),
);
};

match &c.node {
NodeEnum::RangeVar(n) => {
find_token(n.relname.to_owned());
}
_ => {}
};

NestedNode {
node: c.node.to_owned(),
depth: c.depth,
path: c.path.to_owned(),
tokens: child_tokens.iter().map(|t| t.token).collect(),
range: TextRange::new(
TextSize::from(
child_tokens.iter().min_by_key(|t| t.start).unwrap().start as u32,
),
TextSize::from(child_tokens.iter().max_by_key(|t| t.end).unwrap().end as u32),
),
}
})
.collect()
}

fn get_token_text(start: usize, end: usize, text: &str) -> String {
text.chars()
.skip(start)
.take(end - start)
.collect::<String>()
}

fn get_furthest_child_location(c: &ChildrenNode, children: &Vec<ChildrenNode>) -> Option<i32> {
children
.iter()
.filter_map(|n| {
if !n.path.starts_with(c.path.as_str()) {
return None;
}
get_location(&n.node)
})
.max()
}

fn get_nearest_parent_location(n: &ChildrenNode, children: &Vec<ChildrenNode>) -> i32 {
// if location is set, return it
let location = get_location(&n.node);
if location.is_some() {
return location.unwrap();
}

// go up in the tree and check if location exists on any parent
let mut path_elements = n.path.split(".").collect::<Vec<&str>>();
path_elements.pop();
while path_elements.len() > 0 {
let parent_path = path_elements.join(".");
let node = children.iter().find(|c| c.path == parent_path);
if node.is_some() {
let location = get_location(&node.unwrap().node);
if location.is_some() {
return location.unwrap();
}
}

path_elements.pop();
}

// fallback to 0
return 0;
}
9 changes: 8 additions & 1 deletion crates/parser/src/statement_parser.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use cstree::text::{TextRange, TextSize};
use logos::{Logos, Span};

use crate::{parser::Parser, syntax_kind_codegen::SyntaxKind};
use crate::{get_children_codegen::get_children, parser::Parser, syntax_kind_codegen::SyntaxKind};

/// A super simple lexer for sql statements.
///
Expand Down Expand Up @@ -83,6 +83,13 @@ impl Parser {
}
};

let mut pg_query_nodes = match &pg_query_root {
Some(root) => get_children(root, text.to_string(), 1)
.into_iter()
.peekable(),
None => Vec::new().into_iter().peekable(),
};

let mut lexer = StatementToken::lexer(&text);

// parse root node if no syntax errors
Expand Down

0 comments on commit 50032f2

Please sign in to comment.