Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix: Support for e notation using existing parse_decimal in string to decimal conversion #6905

Open
wants to merge 8 commits into
base: main
Choose a base branch
from
168 changes: 10 additions & 158 deletions arrow-cast/src/cast/decimal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
// under the License.

use crate::cast::*;
use crate::parse::*;

/// A utility trait that provides checked conversions between
/// decimal types inspired by [`NumCast`]
Expand Down Expand Up @@ -230,106 +231,6 @@ where
)?))
}

/// Parses given string to specified decimal native (i128/i256) based on given
/// scale. Returns an `Err` if it cannot parse given string.
pub(crate) fn parse_string_to_decimal_native<T: DecimalType>(
value_str: &str,
scale: usize,
) -> Result<T::Native, ArrowError>
where
T::Native: DecimalCast + ArrowNativeTypeOp,
{
let value_str = value_str.trim();
let parts: Vec<&str> = value_str.split('.').collect();
if parts.len() > 2 {
return Err(ArrowError::InvalidArgumentError(format!(
"Invalid decimal format: {value_str:?}"
)));
}

let (negative, first_part) = if parts[0].is_empty() {
(false, parts[0])
} else {
match parts[0].as_bytes()[0] {
b'-' => (true, &parts[0][1..]),
b'+' => (false, &parts[0][1..]),
_ => (false, parts[0]),
}
};

let integers = first_part;
let decimals = if parts.len() == 2 { parts[1] } else { "" };

if !integers.is_empty() && !integers.as_bytes()[0].is_ascii_digit() {
return Err(ArrowError::InvalidArgumentError(format!(
"Invalid decimal format: {value_str:?}"
)));
}

if !decimals.is_empty() && !decimals.as_bytes()[0].is_ascii_digit() {
return Err(ArrowError::InvalidArgumentError(format!(
"Invalid decimal format: {value_str:?}"
)));
}

// Adjust decimal based on scale
let mut number_decimals = if decimals.len() > scale {
let decimal_number = i256::from_string(decimals).ok_or_else(|| {
ArrowError::InvalidArgumentError(format!("Cannot parse decimal format: {value_str}"))
})?;

let div = i256::from_i128(10_i128).pow_checked((decimals.len() - scale) as u32)?;

let half = div.div_wrapping(i256::from_i128(2));
let half_neg = half.neg_wrapping();

let d = decimal_number.div_wrapping(div);
let r = decimal_number.mod_wrapping(div);

// Round result
let adjusted = match decimal_number >= i256::ZERO {
true if r >= half => d.add_wrapping(i256::ONE),
false if r <= half_neg => d.sub_wrapping(i256::ONE),
_ => d,
};

let integers = if !integers.is_empty() {
i256::from_string(integers)
.ok_or_else(|| {
ArrowError::InvalidArgumentError(format!(
"Cannot parse decimal format: {value_str}"
))
})
.map(|v| v.mul_wrapping(i256::from_i128(10_i128).pow_wrapping(scale as u32)))?
} else {
i256::ZERO
};

format!("{}", integers.add_wrapping(adjusted))
} else {
let padding = if scale > decimals.len() { scale } else { 0 };

let decimals = format!("{decimals:0<padding$}");
format!("{integers}{decimals}")
};

if negative {
number_decimals.insert(0, '-');
}

let value = i256::from_string(number_decimals.as_str()).ok_or_else(|| {
ArrowError::InvalidArgumentError(format!(
"Cannot convert {} to {}: Overflow",
value_str,
T::PREFIX
))
})?;

T::Native::from_decimal(value).ok_or_else(|| {
ArrowError::InvalidArgumentError(format!("Cannot convert {} to {}", value_str, T::PREFIX))
})
}

pub(crate) fn generic_string_to_decimal_cast<'a, T, S>(
from: &'a S,
precision: u8,
Expand All @@ -342,10 +243,9 @@ where
&'a S: StringArrayType<'a>,
{
if cast_options.safe {
let iter = from.iter().map(|v| {
v.and_then(|v| parse_string_to_decimal_native::<T>(v, scale as usize).ok())
.and_then(|v| T::is_valid_decimal_precision(v, precision).then_some(v))
});
let iter = from
.iter()
.map(|v| v.and_then(|v| parse_decimal::<T>(v, precision, scale).ok()));
// Benefit:
// 20% performance improvement
// Soundness:
Expand All @@ -359,15 +259,12 @@ where
.iter()
.map(|v| {
v.map(|v| {
parse_string_to_decimal_native::<T>(v, scale as usize)
.map_err(|_| {
ArrowError::CastError(format!(
"Cannot cast string '{}' to value of {:?} type",
v,
T::DATA_TYPE,
))
})
.and_then(|v| T::validate_decimal_precision(v, precision).map(|_| v))
parse_decimal::<T>(v, precision, scale).map_err(|_| {
ArrowError::CastError(format!(
"Cannot cast string '{}' to decimal type of precision {} and scale {}",
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

T:DATA_TYPE shows default Decimal(38,10) or Decimal256(76,..) in the error message, hiding the precision and scale provided for cast.

v, precision, scale
))
})
})
.transpose()
})
Expand Down Expand Up @@ -622,48 +519,3 @@ where
let array = array.unary::<_, T>(op);
Ok(Arc::new(array))
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_parse_string_to_decimal_native() -> Result<(), ArrowError> {
assert_eq!(
parse_string_to_decimal_native::<Decimal128Type>("0", 0)?,
0_i128
);
assert_eq!(
parse_string_to_decimal_native::<Decimal128Type>("0", 5)?,
0_i128
);

assert_eq!(
parse_string_to_decimal_native::<Decimal128Type>("123", 0)?,
123_i128
);
assert_eq!(
parse_string_to_decimal_native::<Decimal128Type>("123", 5)?,
12300000_i128
);

assert_eq!(
parse_string_to_decimal_native::<Decimal128Type>("123.45", 0)?,
123_i128
);
assert_eq!(
parse_string_to_decimal_native::<Decimal128Type>("123.45", 5)?,
12345000_i128
);

assert_eq!(
parse_string_to_decimal_native::<Decimal128Type>("123.4567891", 0)?,
123_i128
);
assert_eq!(
parse_string_to_decimal_native::<Decimal128Type>("123.4567891", 5)?,
12345679_i128
);
Ok(())
}
}
132 changes: 31 additions & 101 deletions arrow-cast/src/cast/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2501,6 +2501,7 @@ where
#[cfg(test)]
mod tests {
use super::*;
use crate::parse::parse_decimal;
use arrow_buffer::{Buffer, IntervalDayTime, NullBuffer};
use chrono::NaiveDate;
use half::f16;
Expand Down Expand Up @@ -3843,6 +3844,22 @@ mod tests {
}
}
}
#[test]
fn test_cast_with_options_utf8_to_decimal() {
let array = StringArray::from(vec!["4e7"]);
let result = cast_with_options(
&array,
&DataType::Decimal128(10, 2),
&CastOptions {
safe: false,
format_options: FormatOptions::default(),
},
)
.unwrap();
let output_array = result.as_any().downcast_ref::<Decimal128Array>();
let result_128 = parse_decimal::<Decimal128Type>("40000000", 10, 2);
assert_eq!(output_array.unwrap().value(0), result_128.unwrap());
}

#[test]
fn test_cast_utf8_to_bool() {
Expand Down Expand Up @@ -8481,99 +8498,6 @@ mod tests {
);
}

#[test]
fn test_parse_string_to_decimal() {
assert_eq!(
Decimal128Type::format_decimal(
parse_string_to_decimal_native::<Decimal128Type>("123.45", 2).unwrap(),
38,
2,
),
"123.45"
);
assert_eq!(
Decimal128Type::format_decimal(
parse_string_to_decimal_native::<Decimal128Type>("12345", 2).unwrap(),
38,
2,
),
"12345.00"
);
assert_eq!(
Decimal128Type::format_decimal(
parse_string_to_decimal_native::<Decimal128Type>("0.12345", 2).unwrap(),
38,
2,
),
"0.12"
);
assert_eq!(
Decimal128Type::format_decimal(
parse_string_to_decimal_native::<Decimal128Type>(".12345", 2).unwrap(),
38,
2,
),
"0.12"
);
assert_eq!(
Decimal128Type::format_decimal(
parse_string_to_decimal_native::<Decimal128Type>(".1265", 2).unwrap(),
38,
2,
),
"0.13"
);
assert_eq!(
Decimal128Type::format_decimal(
parse_string_to_decimal_native::<Decimal128Type>(".1265", 2).unwrap(),
38,
2,
),
"0.13"
);

assert_eq!(
Decimal256Type::format_decimal(
parse_string_to_decimal_native::<Decimal256Type>("123.45", 3).unwrap(),
38,
3,
),
"123.450"
);
assert_eq!(
Decimal256Type::format_decimal(
parse_string_to_decimal_native::<Decimal256Type>("12345", 3).unwrap(),
38,
3,
),
"12345.000"
);
assert_eq!(
Decimal256Type::format_decimal(
parse_string_to_decimal_native::<Decimal256Type>("0.12345", 3).unwrap(),
38,
3,
),
"0.123"
);
assert_eq!(
Decimal256Type::format_decimal(
parse_string_to_decimal_native::<Decimal256Type>(".12345", 3).unwrap(),
38,
3,
),
"0.123"
);
assert_eq!(
Decimal256Type::format_decimal(
parse_string_to_decimal_native::<Decimal256Type>(".1265", 3).unwrap(),
38,
3,
),
"0.127"
);
}

fn test_cast_string_to_decimal(array: ArrayRef) {
// Decimal128
let output_type = DataType::Decimal128(38, 2);
Expand Down Expand Up @@ -8832,16 +8756,16 @@ mod tests {
format_options: FormatOptions::default(),
};
let casted_err = cast_with_options(&array, &output_type, &option).unwrap_err();
assert!(casted_err
.to_string()
.contains("Cannot cast string '4.4.5' to value of Decimal128(38, 10) type"));
assert!(casted_err.to_string().contains(
"Cast error: Cannot cast string '4.4.5' to decimal type of precision 38 and scale 2"
));

let str_array = StringArray::from(vec![". 0.123"]);
let array = Arc::new(str_array) as ArrayRef;
let casted_err = cast_with_options(&array, &output_type, &option).unwrap_err();
assert!(casted_err
.to_string()
.contains("Cannot cast string '. 0.123' to value of Decimal128(38, 10) type"));
assert!(casted_err.to_string().contains(
"Cast error: Cannot cast string '. 0.123' to decimal type of precision 38 and scale 2"
));
}

fn test_cast_string_to_decimal128_overflow(overflow_array: ArrayRef) {
Expand Down Expand Up @@ -8885,7 +8809,10 @@ mod tests {
format_options: FormatOptions::default(),
},
);
assert_eq!("Invalid argument error: 100000000000 is too large to store in a Decimal128 of precision 10. Max is 9999999999", err.unwrap_err().to_string());
assert_eq!(
"Cast error: Cannot cast string '1000' to decimal type of precision 10 and scale 8",
err.unwrap_err().to_string()
);
}

#[test]
Expand Down Expand Up @@ -8968,7 +8895,10 @@ mod tests {
format_options: FormatOptions::default(),
},
);
assert_eq!("Invalid argument error: 100000000000 is too large to store in a Decimal256 of precision 10. Max is 9999999999", err.unwrap_err().to_string());
assert_eq!(
"Cast error: Cannot cast string '1000' to decimal type of precision 10 and scale 8",
err.unwrap_err().to_string()
);
}

#[test]
Expand Down
Loading
Loading