From f19f1b169810229dea61b117145c2b0436bbf229 Mon Sep 17 00:00:00 2001 From: Eugene Tolbakov Date: Wed, 26 Jun 2024 23:07:09 +0100 Subject: [PATCH] feat(sql): add casting support for shortened intervals (#4220) * feat(sql): add casting support for shortened intervals * chore(sql): apply CR suggestion, minor renamings --- .../statements/transform/expand_interval.rs | 187 +++++++++++------- .../common/types/interval/interval.result | 16 ++ .../common/types/interval/interval.sql | 4 + 3 files changed, 140 insertions(+), 67 deletions(-) diff --git a/src/sql/src/statements/transform/expand_interval.rs b/src/sql/src/statements/transform/expand_interval.rs index f22792f9bf71..e5c26fa8cf3a 100644 --- a/src/sql/src/statements/transform/expand_interval.rs +++ b/src/sql/src/statements/transform/expand_interval.rs @@ -18,15 +18,15 @@ use std::ops::ControlFlow; use itertools::Itertools; use lazy_static::lazy_static; use regex::Regex; -use sqlparser::ast::{Expr, Interval, Value}; +use sqlparser::ast::{DataType, Expr, Interval, Value}; use crate::statements::transform::TransformRule; lazy_static! { /// Matches either one or more digits `(\d+)` or one or more ASCII characters `[a-zA-Z]` or plus/minus signs - static ref INTERVAL_SHORT_NAME_PATTERN: Regex = Regex::new(r"([+-]?\d+|[a-zA-Z]+|\+|-)").unwrap(); + static ref INTERVAL_ABBREVIATION_PATTERN: Regex = Regex::new(r"([+-]?\d+|[a-zA-Z]+|\+|-)").unwrap(); - static ref INTERVAL_SHORT_NAME_MAPPING: HashMap<&'static str, &'static str> = HashMap::from([ + static ref INTERVAL_ABBREVIATION_MAPPING: HashMap<&'static str, &'static str> = HashMap::from([ ("y","years"), ("mon","months"), ("w","weeks"), @@ -35,14 +35,13 @@ lazy_static! { ("m","minutes"), ("s","seconds"), ("millis","milliseconds"), - ("mils","milliseconds"), - ("ms","microseconds"), + ("ms","milliseconds"), ("us","microseconds"), ("ns","nanoseconds"), ]); } -/// 'Interval' expression transformer +/// 'INTERVAL' abbreviation transformer /// - `y` for `years` /// - `mon` for `months` /// - `w` for `weeks` @@ -51,12 +50,11 @@ lazy_static! { /// - `m` for `minutes` /// - `s` for `seconds` /// - `millis` for `milliseconds` -/// - `mils` for `milliseconds` -/// - `ms` for `microseconds` +/// - `ms` for `milliseconds` /// - `us` for `microseconds` /// - `ns` for `nanoseconds` -/// Required for use cases that use the shortened version of Interval declaration, -/// f.e `select interval '1h'` or `select interval '3w'` +/// Required for scenarios that use the shortened version of `INTERVAL`, +/// f.e `SELECT INTERVAL '1h'` or `SELECT INTERVAL '3w2d'` pub(crate) struct ExpandIntervalTransformRule; impl TransformRule for ExpandIntervalTransformRule { @@ -65,45 +63,73 @@ impl TransformRule for ExpandIntervalTransformRule { /// it's AST has `left` part of type `Value::SingleQuotedString` which needs to be handled specifically. /// To handle the `right` part which is `Interval` no extra steps are needed. fn visit_expr(&self, expr: &mut Expr) -> ControlFlow<()> { - if let Expr::Interval(interval) = expr { - match *interval.value.clone() { + match expr { + Expr::Interval(interval) => match &*interval.value { Expr::Value(Value::SingleQuotedString(value)) | Expr::Value(Value::DoubleQuotedString(value)) => { - if let Some(data) = expand_interval_name(&value) { - *expr = create_interval_with_expanded_name( + if let Some(expanded_name) = expand_interval_name(value) { + *expr = update_existing_interval_with_value( interval, - single_quoted_string_expr(data), + single_quoted_string_expr(expanded_name), ); } } - Expr::BinaryOp { left, op, right } => match *left { + Expr::BinaryOp { left, op, right } => match &**left { Expr::Value(Value::SingleQuotedString(value)) | Expr::Value(Value::DoubleQuotedString(value)) => { - if let Some(data) = expand_interval_name(&value) { - let new_value = Box::new(Expr::BinaryOp { - left: single_quoted_string_expr(data), - op, - right, + if let Some(expanded_name) = expand_interval_name(value) { + let new_expr_value = Box::new(Expr::BinaryOp { + left: single_quoted_string_expr(expanded_name), + op: op.clone(), + right: right.clone(), }); - *expr = create_interval_with_expanded_name(interval, new_value); + *expr = update_existing_interval_with_value(interval, new_expr_value); } } _ => {} }, _ => {} + }, + Expr::Cast { + expr: cast_exp, + data_type, + .. + } => { + if DataType::Interval == *data_type { + match &**cast_exp { + Expr::Value(Value::SingleQuotedString(value)) + | Expr::Value(Value::DoubleQuotedString(value)) => { + let interval_name = + expand_interval_name(value).unwrap_or_else(|| value.to_string()); + *expr = create_interval(single_quoted_string_expr(interval_name)); + } + _ => {} + } + } } + _ => {} } ControlFlow::<()>::Continue(()) } } -fn single_quoted_string_expr(data: String) -> Box { - Box::new(Expr::Value(Value::SingleQuotedString(data))) +fn single_quoted_string_expr(string: String) -> Box { + Box::new(Expr::Value(Value::SingleQuotedString(string))) +} + +fn create_interval(value: Box) -> Expr { + Expr::Interval(Interval { + value, + leading_field: None, + leading_precision: None, + last_field: None, + fractional_seconds_precision: None, + }) } -fn create_interval_with_expanded_name(interval: &Interval, new_value: Box) -> Expr { +fn update_existing_interval_with_value(interval: &Interval, value: Box) -> Expr { Expr::Interval(Interval { - value: new_value, + value, leading_field: interval.leading_field.clone(), leading_precision: interval.leading_precision, last_field: interval.last_field.clone(), @@ -111,19 +137,21 @@ fn create_interval_with_expanded_name(interval: &Interval, new_value: Box) }) } -/// Expands a shortened interval name to its full name. -/// Returns an interval's full name (e.g., "years", "hours", "minutes") according to `INTERVAL_SHORT_NAME_MAPPING` mapping +/// Expands an interval abbreviation to its full name. +/// Returns an interval's full name (e.g., "years", "hours", "minutes") according to the `INTERVAL_ABBREVIATION_MAPPING` /// If the `interval_str` contains whitespaces, the interval name is considered to be in a full form. /// Hybrid format "1y 2 days 3h" is not supported. fn expand_interval_name(interval_str: &str) -> Option { return if !interval_str.contains(|c: char| c.is_whitespace()) { Some( - INTERVAL_SHORT_NAME_PATTERN + INTERVAL_ABBREVIATION_PATTERN .find_iter(interval_str) - .map(|mat| match INTERVAL_SHORT_NAME_MAPPING.get(mat.as_str()) { - Some(&expanded_name) => expanded_name, - None => mat.as_str(), - }) + .map( + |mat| match INTERVAL_ABBREVIATION_MAPPING.get(mat.as_str()) { + Some(&expanded_name) => expanded_name, + None => mat.as_str(), + }, + ) .join(" "), ) } else { @@ -135,10 +163,11 @@ fn expand_interval_name(interval_str: &str) -> Option { mod tests { use std::ops::ControlFlow; - use sqlparser::ast::{BinaryOperator, Expr, Interval, Value}; + use sqlparser::ast::{BinaryOperator, DataType, Expr, Interval, Value}; use crate::statements::transform::expand_interval::{ - expand_interval_name, single_quoted_string_expr, ExpandIntervalTransformRule, + create_interval, expand_interval_name, single_quoted_string_expr, + ExpandIntervalTransformRule, }; use crate::statements::transform::TransformRule; @@ -153,8 +182,7 @@ mod tests { ("5s", "5 seconds"), ("2m", "2 minutes"), ("100millis", "100 milliseconds"), - ("150mils", "150 milliseconds"), - ("200ms", "200 microseconds"), + ("200ms", "200 milliseconds"), ("350us", "350 microseconds"), ("400ns", "400 nanoseconds"), ]; @@ -175,22 +203,22 @@ mod tests { ("2y4mon6w", "2 years 4 months 6 weeks"), ("5d3h1m", "5 days 3 hours 1 minutes"), ( - "10s312millis789ms", - "10 seconds 312 milliseconds 789 microseconds", + "10s312ms789ns", + "10 seconds 312 milliseconds 789 nanoseconds", ), ( - "23mils987us754ns", + "23millis987us754ns", "23 milliseconds 987 microseconds 754 nanoseconds", ), ("-1d-5h", "-1 days -5 hours"), ("-2y-4mon-6w", "-2 years -4 months -6 weeks"), ("-5d-3h-1m", "-5 days -3 hours -1 minutes"), ( - "-10s-312millis-789ms", - "-10 seconds -312 milliseconds -789 microseconds", + "-10s-312ms-789ns", + "-10 seconds -312 milliseconds -789 nanoseconds", ), ( - "-23mils-987us-754ns", + "-23millis-987us-754ns", "-23 milliseconds -987 microseconds -754 nanoseconds", ), ]; @@ -204,13 +232,7 @@ mod tests { fn test_visit_expr_when_interval_is_single_quoted_string_expr() { let interval_transformation_rule = ExpandIntervalTransformRule {}; - let mut string_expr = Expr::Interval(Interval { - value: single_quoted_string_expr("5y".to_string()), - leading_field: None, - leading_precision: None, - last_field: None, - fractional_seconds_precision: None, - }); + let mut string_expr = create_interval(single_quoted_string_expr("5y".to_string())); let control_flow = interval_transformation_rule.visit_expr(&mut string_expr); @@ -233,24 +255,12 @@ mod tests { fn test_visit_expr_when_interval_is_binary_op() { let interval_transformation_rule = ExpandIntervalTransformRule {}; - let mut binary_op_expr = Expr::Interval(Interval { - value: Box::new(Expr::BinaryOp { - left: single_quoted_string_expr("2d".to_string()), - op: BinaryOperator::Minus, - right: Box::new(Expr::Interval(Interval { - value: single_quoted_string_expr("1d".to_string()), - leading_field: None, - leading_precision: None, - last_field: None, - fractional_seconds_precision: None, - })), - }), - leading_field: None, - leading_precision: None, - last_field: None, - fractional_seconds_precision: None, + let binary_op = Box::new(Expr::BinaryOp { + left: single_quoted_string_expr("2d".to_string()), + op: BinaryOperator::Minus, + right: Box::new(create_interval(single_quoted_string_expr("1d".to_string()))), }); - + let mut binary_op_expr = create_interval(binary_op); let control_flow = interval_transformation_rule.visit_expr(&mut binary_op_expr); assert_eq!(control_flow, ControlFlow::Continue(())); @@ -275,4 +285,47 @@ mod tests { }) ); } + + #[test] + fn test_visit_expr_when_cast_expr() { + let interval_transformation_rule = ExpandIntervalTransformRule {}; + + let mut cast_to_interval_expr = Expr::Cast { + expr: single_quoted_string_expr("3y2mon".to_string()), + data_type: DataType::Interval, + format: None, + }; + + let control_flow = interval_transformation_rule.visit_expr(&mut cast_to_interval_expr); + + assert_eq!(control_flow, ControlFlow::Continue(())); + assert_eq!( + cast_to_interval_expr, + Expr::Interval(Interval { + value: Box::new(Expr::Value(Value::SingleQuotedString( + "3 years 2 months".to_string() + ))), + leading_field: None, + leading_precision: None, + last_field: None, + fractional_seconds_precision: None, + }) + ); + + let mut cast_to_i64_expr = Expr::Cast { + expr: single_quoted_string_expr("5".to_string()), + data_type: DataType::Int64, + format: None, + }; + let control_flow = interval_transformation_rule.visit_expr(&mut cast_to_i64_expr); + assert_eq!(control_flow, ControlFlow::Continue(())); + assert_eq!( + cast_to_i64_expr, + Expr::Cast { + expr: single_quoted_string_expr("5".to_string()), + data_type: DataType::Int64, + format: None, + } + ); + } } diff --git a/tests/cases/standalone/common/types/interval/interval.result b/tests/cases/standalone/common/types/interval/interval.result index c4c4ad8e60d2..a793b32b5eda 100644 --- a/tests/cases/standalone/common/types/interval/interval.result +++ b/tests/cases/standalone/common/types/interval/interval.result @@ -23,6 +23,14 @@ SELECT INTERVAL '-2 months'; | 0 years -2 mons 0 days 0 hours 0 mins 0.000000000 secs | +---------------------------------------------------------+ +SELECT '3 hours'::INTERVAL; + ++-------------------------------------------------------+ +| IntervalMonthDayNano("10800000000000") | ++-------------------------------------------------------+ +| 0 years 0 mons 0 days 3 hours 0 mins 0.000000000 secs | ++-------------------------------------------------------+ + SELECT INTERVAL '1 year 2 months 3 days 4 hours' + INTERVAL '1 year'; +------------------------------------------------------------------------------------------------------------------+ @@ -305,6 +313,14 @@ SELECT INTERVAL '1y2w3d4h'; | 0 years 12 mons 17 days 4 hours 0 mins 0.000000000 secs | +---------------------------------------------------------+ +SELECT '3y2mon'::INTERVAL; + ++---------------------------------------------------------+ +| IntervalMonthDayNano("3010670175542044828554670112768") | ++---------------------------------------------------------+ +| 0 years 38 mons 0 days 0 hours 0 mins 0.000000000 secs | ++---------------------------------------------------------+ + SELECT INTERVAL '7 days' - INTERVAL '1d'; +----------------------------------------------------------------------------------------------+ diff --git a/tests/cases/standalone/common/types/interval/interval.sql b/tests/cases/standalone/common/types/interval/interval.sql index 9988d27644bd..16c92ef3634d 100644 --- a/tests/cases/standalone/common/types/interval/interval.sql +++ b/tests/cases/standalone/common/types/interval/interval.sql @@ -5,6 +5,8 @@ SELECT INTERVAL '1.5 year'; SELECT INTERVAL '-2 months'; +SELECT '3 hours'::INTERVAL; + SELECT INTERVAL '1 year 2 months 3 days 4 hours' + INTERVAL '1 year'; SELECT INTERVAL '1 year 2 months 3 days 4 hours' - INTERVAL '1 year'; @@ -77,6 +79,8 @@ SELECT INTERVAL '-1h-5m'; SELECT INTERVAL '1y2w3d4h'; +SELECT '3y2mon'::INTERVAL; + SELECT INTERVAL '7 days' - INTERVAL '1d'; SELECT INTERVAL '2h' + INTERVAL '1h';