Skip to content

Commit

Permalink
fix: minor fixes while making tests green
Browse files Browse the repository at this point in the history
  • Loading branch information
psteinroe committed Oct 4, 2023
1 parent 789673b commit 3ee8664
Show file tree
Hide file tree
Showing 3 changed files with 84 additions and 48 deletions.
58 changes: 39 additions & 19 deletions crates/codegen/src/get_child_tokens.rs
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ pub fn get_child_tokens_mod(_item: proc_macro2::TokenStream) -> proc_macro2::Tok
fn from(value: String) -> TokenProperty {
assert!(value.len() > 0, "String property value has length 0");
TokenProperty {
value: Some(value),
value: Some(value.to_lowercase()),
token: None,
}
}
Expand Down Expand Up @@ -115,11 +115,24 @@ pub fn get_child_tokens_mod(_item: proc_macro2::TokenStream) -> proc_macro2::Tok
}
}

fn get_token_text(start: usize, end: usize, text: &str) -> String {
fn get_token_text(token: &ScanToken ,text: &str) -> String {
let start = usize::try_from(token.start).unwrap();
let end = usize::try_from(token.end).unwrap();
text.chars()
.skip(start)
.take(end - start)
.collect::<String>()
.to_lowercase()
}

/// returns a list of aliases for a string. primarily used for data types.
///
/// list from https://www.postgresql.org/docs/current/datatype.html
fn aliases(text: &str) -> Vec<&str> {
match text {
"integer" | "int" | "int4" => vec!["integer", "int", "int4"],
_ => vec![text],
}
}


Expand All @@ -136,17 +149,19 @@ pub fn get_child_tokens_mod(_item: proc_macro2::TokenStream) -> proc_macro2::Tok
return None;
}
}

// make a string comparison of the text of the token and the property value
if property.value.is_some()
&& get_token_text(
usize::try_from(t.start).unwrap(),
usize::try_from(t.end).unwrap(),
text,
)
.to_lowercase()
!= property.value.as_ref().unwrap().to_lowercase()
{
return None;
if property.value.is_some() {
let mut token_text = get_token_text(t, text);
// if token is Sconst, remove leading and trailing quotes
if t.token() == Token::Sconst {
let string_delimiter: &[char; 2] = &['\'', '$'];
token_text = token_text.trim_start_matches(string_delimiter).trim_end_matches(string_delimiter).to_string();
}

if !aliases(property.value.as_ref().unwrap()).contains(&token_text.as_str()) {
return None;
}
}

// if the furthest child location is set, and it is smaller than the start of the token,
Expand All @@ -170,14 +185,16 @@ pub fn get_child_tokens_mod(_item: proc_macro2::TokenStream) -> proc_macro2::Tok
.min_by_key(|(d, _)| d.to_owned())
.map(|(_, t)| t);

if token.is_none() {
panic!(
"No matching token found for property {:?} in {:#?}",
property, tokens
);
}
// if token.is_none() {
// panic!(
// "No matching token found for property {:#?} of node {:#?} in {:#?} with tokens {:#?}",
// property, node, text, tokens
// );
// }

child_tokens.push(token.unwrap());
if token.is_some() {
child_tokens.push(token.unwrap());
}
};

match node {
Expand Down Expand Up @@ -221,6 +238,9 @@ fn custom_handlers(node: &Node) -> TokenStream {
"Boolean" => quote! {
get_token(TokenProperty::from(n));
},
"AStar" => quote! {
get_token(TokenProperty::from(Token::Ascii42));
},
"AConst" => quote! {
if n.isnull {
get_token(TokenProperty::from(Token::NullP));
Expand Down
58 changes: 33 additions & 25 deletions crates/parser/src/estimate_node_range.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ use crate::get_child_tokens_codegen::get_child_tokens;
use crate::get_location_codegen::get_location;
use crate::get_nodes_codegen::Node;
use cstree::text::{TextRange, TextSize};
use log::debug;
use pg_query::{protobuf::ScanToken, protobuf::Token, NodeEnum};

#[derive(Debug, Clone)]
Expand Down Expand Up @@ -42,20 +43,23 @@ pub fn estimate_node_range(
// If not available, the closest estimation is the smaller value of the start of the first direct child token,
// and the start of all children ranges. If neither is available, let’s panic for now.
// The parent location as a fallback should never be required, because any node must have either children with tokens, or a token itself.
let children_ranges = ranged_nodes
.iter()
.filter(|x| x.inner.path.starts_with(n.path.as_str()))
.collect::<Vec<&RangedNode>>();
let location = get_location(&n.node);
let from = if location.is_some() {
location.unwrap()
Some(location.unwrap())
} else {
let start_of_first_child_token = if child_tokens.len() > 0 {
Some(child_tokens.iter().min_by_key(|t| t.start).unwrap().start)
} else {
None
};
let start_of_all_children_ranges = if ranged_nodes.len() > 0 {
let start_of_all_children_ranges = if children_ranges.len() > 0 {
Some(
ranged_nodes
children_ranges
.iter()
.filter(|x| x.inner.path.starts_with(n.path.as_str()))
.min_by_key(|n| n.range.start())
.unwrap()
.range
Expand All @@ -67,17 +71,18 @@ pub fn estimate_node_range(

if start_of_first_child_token.is_some() {
if start_of_all_children_ranges.is_some() {
min(
Some(min(
start_of_first_child_token.unwrap(),
u32::from(start_of_all_children_ranges.unwrap()) as i32,
)
))
} else {
start_of_first_child_token.unwrap()
Some(start_of_first_child_token.unwrap())
}
} else if start_of_all_children_ranges.is_some() {
u32::from(start_of_all_children_ranges.unwrap()) as i32
Some(u32::from(start_of_all_children_ranges.unwrap()) as i32)
} else {
panic!("No location or child tokens found for node {:?}", n);
debug!("No location or child tokens found for node {:?}", n);
None
}
};

Expand All @@ -87,11 +92,10 @@ pub fn estimate_node_range(
} else {
None
};
let end_of_all_children_ranges = if ranged_nodes.len() > 0 {
let end_of_all_children_ranges = if children_ranges.len() > 0 {
Some(
ranged_nodes
children_ranges
.iter()
.filter(|x| x.inner.path.starts_with(n.path.as_str()))
.max_by_key(|n| n.range.end())
.unwrap()
.range
Expand All @@ -102,30 +106,34 @@ pub fn estimate_node_range(
};
let to = if end_of_last_child_token.is_some() {
if end_of_all_children_ranges.is_some() {
max(
Some(max(
end_of_last_child_token.unwrap(),
u32::from(end_of_all_children_ranges.unwrap()) as i32,
)
))
} else {
end_of_last_child_token.unwrap()
Some(end_of_last_child_token.unwrap())
}
} else if end_of_all_children_ranges.is_some() {
u32::from(end_of_all_children_ranges.unwrap()) as i32
Some(u32::from(end_of_all_children_ranges.unwrap()) as i32)
} else {
panic!("No child tokens or children ranges found for node {:?}", n);
debug!("No child tokens or children ranges found for node {:?}", n);
None
};

// TODO: validate that prepending is enough to ensure that `ranged_nodes` is sorted by
// range.start
ranged_nodes.insert(
0,
RangedNode {
if from.is_some() && to.is_some() {
ranged_nodes.push(RangedNode {
inner: n.to_owned(),
range: TextRange::new(TextSize::from(from as u32), TextSize::from(to as u32)),
},
);
range: TextRange::new(
TextSize::from(from.unwrap() as u32),
TextSize::from(to.unwrap() as u32),
),
});
}
});

// sort by start of range, and then by depth
ranged_nodes.sort_by_key(|i| (i.range.start(), i.inner.depth));

ranged_nodes
}

Expand Down
16 changes: 12 additions & 4 deletions crates/parser/src/source_parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -93,10 +93,10 @@ impl Parser {
self.token(SyntaxKind::Newline, token.text.as_str());
}
SourceFileToken::Statement => {
// self.parse_statement(
// token.text.as_str(),
// Some(offset + u32::from(token.span.start())),
// );
self.parse_statement_at(
token.text.as_str(),
Some(offset + u32::from(token.span.start())),
);
}
};
}
Expand All @@ -110,6 +110,10 @@ impl Parser {
mod tests {
use super::*;

fn init() {
let _ = env_logger::builder().is_test(true).try_init();
}

#[test]
fn test_source_file_lexer() {
let input = "select * from contact where id = '123';\n\n-- test comment\n\nselect wrong statement;\n\nselect id,username from contact\n\nselect id,name\nfrom contact -- test inline comment\nwhere id = '123';\n\n";
Expand Down Expand Up @@ -145,6 +149,8 @@ mod tests {

#[test]
fn test_source_file_parser() {
init();

let input = "select id, name from users where id = '1224';
select select;
Expand All @@ -166,6 +172,8 @@ select 1;

#[test]
fn test_lexer_with_nested_statements() {
init();

let input = "select * from test;
select 123;
Expand Down

0 comments on commit 3ee8664

Please sign in to comment.