diff --git a/crates/codegen/src/get_child_tokens.rs b/crates/codegen/src/get_child_tokens.rs index ce386882..7d33ef63 100644 --- a/crates/codegen/src/get_child_tokens.rs +++ b/crates/codegen/src/get_child_tokens.rs @@ -78,7 +78,7 @@ pub fn get_child_tokens_mod(_item: proc_macro2::TokenStream) -> proc_macro2::Tok fn from(value: String) -> TokenProperty { assert!(value.len() > 0, "String property value has length 0"); TokenProperty { - value: Some(value), + value: Some(value.to_lowercase()), token: None, } } @@ -115,11 +115,24 @@ pub fn get_child_tokens_mod(_item: proc_macro2::TokenStream) -> proc_macro2::Tok } } - fn get_token_text(start: usize, end: usize, text: &str) -> String { + fn get_token_text(token: &ScanToken ,text: &str) -> String { + let start = usize::try_from(token.start).unwrap(); + let end = usize::try_from(token.end).unwrap(); text.chars() .skip(start) .take(end - start) .collect::() + .to_lowercase() + } + + /// returns a list of aliases for a string. primarily used for data types. + /// + /// list from https://www.postgresql.org/docs/current/datatype.html + fn aliases(text: &str) -> Vec<&str> { + match text { + "integer" | "int" | "int4" => vec!["integer", "int", "int4"], + _ => vec![text], + } } @@ -136,17 +149,19 @@ pub fn get_child_tokens_mod(_item: proc_macro2::TokenStream) -> proc_macro2::Tok return None; } } + // make a string comparison of the text of the token and the property value - if property.value.is_some() - && get_token_text( - usize::try_from(t.start).unwrap(), - usize::try_from(t.end).unwrap(), - text, - ) - .to_lowercase() - != property.value.as_ref().unwrap().to_lowercase() - { - return None; + if property.value.is_some() { + let mut token_text = get_token_text(t, text); + // if token is Sconst, remove leading and trailing quotes + if t.token() == Token::Sconst { + let string_delimiter: &[char; 2] = &['\'', '$']; + token_text = token_text.trim_start_matches(string_delimiter).trim_end_matches(string_delimiter).to_string(); + } + + if !aliases(property.value.as_ref().unwrap()).contains(&token_text.as_str()) { + return None; + } } // if the furthest child location is set, and it is smaller than the start of the token, @@ -170,14 +185,16 @@ pub fn get_child_tokens_mod(_item: proc_macro2::TokenStream) -> proc_macro2::Tok .min_by_key(|(d, _)| d.to_owned()) .map(|(_, t)| t); - if token.is_none() { - panic!( - "No matching token found for property {:?} in {:#?}", - property, tokens - ); - } + // if token.is_none() { + // panic!( + // "No matching token found for property {:#?} of node {:#?} in {:#?} with tokens {:#?}", + // property, node, text, tokens + // ); + // } - child_tokens.push(token.unwrap()); + if token.is_some() { + child_tokens.push(token.unwrap()); + } }; match node { @@ -221,6 +238,9 @@ fn custom_handlers(node: &Node) -> TokenStream { "Boolean" => quote! { get_token(TokenProperty::from(n)); }, + "AStar" => quote! { + get_token(TokenProperty::from(Token::Ascii42)); + }, "AConst" => quote! { if n.isnull { get_token(TokenProperty::from(Token::NullP)); diff --git a/crates/parser/src/estimate_node_range.rs b/crates/parser/src/estimate_node_range.rs index 8d461c87..e601fab5 100644 --- a/crates/parser/src/estimate_node_range.rs +++ b/crates/parser/src/estimate_node_range.rs @@ -4,6 +4,7 @@ use crate::get_child_tokens_codegen::get_child_tokens; use crate::get_location_codegen::get_location; use crate::get_nodes_codegen::Node; use cstree::text::{TextRange, TextSize}; +use log::debug; use pg_query::{protobuf::ScanToken, protobuf::Token, NodeEnum}; #[derive(Debug, Clone)] @@ -42,20 +43,23 @@ pub fn estimate_node_range( // If not available, the closest estimation is the smaller value of the start of the first direct child token, // and the start of all children ranges. If neither is available, let’s panic for now. // The parent location as a fallback should never be required, because any node must have either children with tokens, or a token itself. + let children_ranges = ranged_nodes + .iter() + .filter(|x| x.inner.path.starts_with(n.path.as_str())) + .collect::>(); let location = get_location(&n.node); let from = if location.is_some() { - location.unwrap() + Some(location.unwrap()) } else { let start_of_first_child_token = if child_tokens.len() > 0 { Some(child_tokens.iter().min_by_key(|t| t.start).unwrap().start) } else { None }; - let start_of_all_children_ranges = if ranged_nodes.len() > 0 { + let start_of_all_children_ranges = if children_ranges.len() > 0 { Some( - ranged_nodes + children_ranges .iter() - .filter(|x| x.inner.path.starts_with(n.path.as_str())) .min_by_key(|n| n.range.start()) .unwrap() .range @@ -67,17 +71,18 @@ pub fn estimate_node_range( if start_of_first_child_token.is_some() { if start_of_all_children_ranges.is_some() { - min( + Some(min( start_of_first_child_token.unwrap(), u32::from(start_of_all_children_ranges.unwrap()) as i32, - ) + )) } else { - start_of_first_child_token.unwrap() + Some(start_of_first_child_token.unwrap()) } } else if start_of_all_children_ranges.is_some() { - u32::from(start_of_all_children_ranges.unwrap()) as i32 + Some(u32::from(start_of_all_children_ranges.unwrap()) as i32) } else { - panic!("No location or child tokens found for node {:?}", n); + debug!("No location or child tokens found for node {:?}", n); + None } }; @@ -87,11 +92,10 @@ pub fn estimate_node_range( } else { None }; - let end_of_all_children_ranges = if ranged_nodes.len() > 0 { + let end_of_all_children_ranges = if children_ranges.len() > 0 { Some( - ranged_nodes + children_ranges .iter() - .filter(|x| x.inner.path.starts_with(n.path.as_str())) .max_by_key(|n| n.range.end()) .unwrap() .range @@ -102,30 +106,34 @@ pub fn estimate_node_range( }; let to = if end_of_last_child_token.is_some() { if end_of_all_children_ranges.is_some() { - max( + Some(max( end_of_last_child_token.unwrap(), u32::from(end_of_all_children_ranges.unwrap()) as i32, - ) + )) } else { - end_of_last_child_token.unwrap() + Some(end_of_last_child_token.unwrap()) } } else if end_of_all_children_ranges.is_some() { - u32::from(end_of_all_children_ranges.unwrap()) as i32 + Some(u32::from(end_of_all_children_ranges.unwrap()) as i32) } else { - panic!("No child tokens or children ranges found for node {:?}", n); + debug!("No child tokens or children ranges found for node {:?}", n); + None }; - // TODO: validate that prepending is enough to ensure that `ranged_nodes` is sorted by - // range.start - ranged_nodes.insert( - 0, - RangedNode { + if from.is_some() && to.is_some() { + ranged_nodes.push(RangedNode { inner: n.to_owned(), - range: TextRange::new(TextSize::from(from as u32), TextSize::from(to as u32)), - }, - ); + range: TextRange::new( + TextSize::from(from.unwrap() as u32), + TextSize::from(to.unwrap() as u32), + ), + }); + } }); + // sort by start of range, and then by depth + ranged_nodes.sort_by_key(|i| (i.range.start(), i.inner.depth)); + ranged_nodes } diff --git a/crates/parser/src/source_parser.rs b/crates/parser/src/source_parser.rs index 341d6eb9..98e4c91f 100644 --- a/crates/parser/src/source_parser.rs +++ b/crates/parser/src/source_parser.rs @@ -93,10 +93,10 @@ impl Parser { self.token(SyntaxKind::Newline, token.text.as_str()); } SourceFileToken::Statement => { - // self.parse_statement( - // token.text.as_str(), - // Some(offset + u32::from(token.span.start())), - // ); + self.parse_statement_at( + token.text.as_str(), + Some(offset + u32::from(token.span.start())), + ); } }; } @@ -110,6 +110,10 @@ impl Parser { mod tests { use super::*; + fn init() { + let _ = env_logger::builder().is_test(true).try_init(); + } + #[test] fn test_source_file_lexer() { let input = "select * from contact where id = '123';\n\n-- test comment\n\nselect wrong statement;\n\nselect id,username from contact\n\nselect id,name\nfrom contact -- test inline comment\nwhere id = '123';\n\n"; @@ -145,6 +149,8 @@ mod tests { #[test] fn test_source_file_parser() { + init(); + let input = "select id, name from users where id = '1224'; select select; @@ -166,6 +172,8 @@ select 1; #[test] fn test_lexer_with_nested_statements() { + init(); + let input = "select * from test; select 123;