Skip to content

Commit

Permalink
fix millisecond fraction being handled with wrong scale (#65)
Browse files Browse the repository at this point in the history
* add tests for #61

* fix millisecond fraction being handled with wrong scale

* also raise error if fraction too long

* additional test cases

* update comment

* fix doctest
  • Loading branch information
davidhewitt committed Jun 26, 2024
1 parent 72f1c79 commit 11d6365
Show file tree
Hide file tree
Showing 5 changed files with 73 additions and 19 deletions.
7 changes: 5 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -66,10 +66,13 @@ assert_eq!(dt.to_string(), "2022-01-01T12:13:14Z");
To control the specifics of time parsing you can use provide a `TimeConfig`:

```rust
use speedate::{DateTime, Date, Time, TimeConfig};
use speedate::{DateTime, Date, Time, TimeConfig, MicrosecondsPrecisionOverflowBehavior};
let dt = DateTime::parse_bytes_with_config(
"1689102037.5586429".as_bytes(),
&TimeConfig::builder().unix_timestamp_offset(Some(0)).build(),
&TimeConfig::builder()
.unix_timestamp_offset(Some(0))
.microseconds_precision_overflow_behavior(MicrosecondsPrecisionOverflowBehavior::Truncate)
.build(),
).unwrap();
assert_eq!(
dt,
Expand Down
12 changes: 6 additions & 6 deletions src/date.rs
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ impl FromStr for Date {

// 2e10 if greater than this, the number is in ms, if less than or equal, it's in seconds
// (in seconds this is 11th October 2603, in ms it's 20th August 1970)
const MS_WATERSHED: i64 = 20_000_000_000;
pub(crate) const MS_WATERSHED: i64 = 20_000_000_000;
// 1600-01-01 as a unix timestamp used for from_timestamp below
const UNIX_1600: i64 = -11_676_096_000;
// 9999-12-31T23:59:59 as a unix timestamp, used as max allowed value below
Expand Down Expand Up @@ -272,11 +272,11 @@ impl Date {

pub(crate) fn timestamp_watershed(timestamp: i64) -> Result<(i64, u32), ParseError> {
let ts_abs = timestamp.checked_abs().ok_or(ParseError::DateTooSmall)?;
let (mut seconds, mut microseconds) = if ts_abs > MS_WATERSHED {
(timestamp / 1_000, timestamp % 1_000 * 1000)
} else {
(timestamp, 0)
};
if ts_abs <= MS_WATERSHED {
return Ok((timestamp, 0));
}
let mut seconds = timestamp / 1_000;
let mut microseconds = ((timestamp % 1_000) * 1000) as i32;
if microseconds < 0 {
seconds -= 1;
microseconds += 1_000_000;
Expand Down
54 changes: 45 additions & 9 deletions src/datetime.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use crate::numbers::{float_parse_bytes, IntFloat};
use crate::TimeConfigBuilder;
use crate::date::MS_WATERSHED;
use crate::{int_parse_bytes, MicrosecondsPrecisionOverflowBehavior, TimeConfigBuilder};
use crate::{time::TimeConfig, Date, ParseError, Time};
use std::cmp::Ordering;
use std::fmt;
Expand Down Expand Up @@ -339,14 +339,50 @@ impl DateTime {
pub fn parse_bytes_with_config(bytes: &[u8], config: &TimeConfig) -> Result<Self, ParseError> {
match Self::parse_bytes_rfc3339_with_config(bytes, config) {
Ok(d) => Ok(d),
Err(e) => match float_parse_bytes(bytes) {
IntFloat::Int(int) => Self::from_timestamp_with_config(int, 0, config),
IntFloat::Float(float) => {
let micro = (float.fract() * 1_000_000_f64).round() as u32;
Self::from_timestamp_with_config(float.floor() as i64, micro, config)
Err(e) => {
let mut split = bytes.splitn(2, |&b| b == b'.');
let Some(timestamp) =
int_parse_bytes(split.next().expect("splitn always returns at least one element"))
else {
return Err(e);
};
let float_fraction = split.next();
debug_assert!(split.next().is_none()); // at most two elements
match float_fraction {
// If fraction exists but is empty (i.e. trailing `.`), allow for backwards compatibility;
// TODO might want to reconsider this later?
Some(b"") | None => Self::from_timestamp_with_config(timestamp, 0, config),
Some(fract) => {
// fraction is either:
// - up to 3 digits of millisecond fractions, i.e. microseconds
// - or up to 6 digits of second fractions, i.e. milliseconds
let max_digits = if timestamp > MS_WATERSHED { 3 } else { 6 };
let Some(fract_integers) = int_parse_bytes(fract) else {
return Err(e);
};
if config.microseconds_precision_overflow_behavior
== MicrosecondsPrecisionOverflowBehavior::Error
&& fract.len() > max_digits
{
return Err(if timestamp > MS_WATERSHED {
ParseError::MillisecondFractionTooLong
} else {
ParseError::SecondFractionTooLong
});
}
// TODO: Technically this is rounding, but this is what the existing
// behaviour already did. Probably this is always better than "truncating"
// so we might want to change MicrosecondsPrecisionOverflowBehavior and
// make other uses also round / deprecate truncating.
let multiple = 10f64.powf(max_digits as f64 - fract.len() as f64);
Self::from_timestamp_with_config(
timestamp,
(fract_integers as f64 * multiple).round() as u32,
config,
)
}
}
IntFloat::Err => Err(e),
},
}
}
}

Expand Down
2 changes: 2 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,8 @@ pub enum ParseError {
SecondFractionTooLong,
/// second fraction digits missing after `.`
SecondFractionMissing,
/// millisecond fraction value is more than 3 digits long
MillisecondFractionTooLong,
/// invalid digit in duration
DurationInvalidNumber,
/// `t` character repeated in duration
Expand Down
17 changes: 15 additions & 2 deletions tests/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -864,6 +864,13 @@ param_tests! {
dt_unix1: ok => "1654646400", "2022-06-08T00:00:00";
dt_unix2: ok => "1654646404", "2022-06-08T00:00:04";
dt_unix_float: ok => "1654646404.5", "2022-06-08T00:00:04.500000";
dt_unix_float_limit: ok => "1654646404.123456", "2022-06-08T00:00:04.123456";
dt_unix_float_ms: ok => "1654646404000.5", "2022-06-08T00:00:04.000500";
dt_unix_float_ms_limit: ok => "1654646404123.456", "2022-06-08T00:00:04.123456";
dt_unix_float_empty: ok => "1654646404.", "2022-06-08T00:00:04";
dt_unix_float_ms_empty: ok => "1654646404000.", "2022-06-08T00:00:04";
dt_unix_float_too_long: err => "1654646404.1234567", SecondFractionTooLong;
dt_unix_float_ms_too_long: err => "1654646404123.4567", MillisecondFractionTooLong;
dt_short_date: err => "xxx", TooShort;
dt_short_time: err => "2020-01-01T12:0", TooShort;
dt: err => "202x-01-01", InvalidCharYear;
Expand Down Expand Up @@ -1390,7 +1397,10 @@ fn test_datetime_parse_bytes_does_not_add_offset_for_rfc3339() {
fn test_datetime_parse_unix_timestamp_from_bytes_with_utc_offset() {
let time = DateTime::parse_bytes_with_config(
"1689102037.5586429".as_bytes(),
&(TimeConfigBuilder::new().unix_timestamp_offset(Some(0)).build()),
&(TimeConfigBuilder::new()
.unix_timestamp_offset(Some(0))
.microseconds_precision_overflow_behavior(MicrosecondsPrecisionOverflowBehavior::Truncate)
.build()),
)
.unwrap();
assert_eq!(time.to_string(), "2023-07-11T19:00:37.558643Z");
Expand All @@ -1400,7 +1410,10 @@ fn test_datetime_parse_unix_timestamp_from_bytes_with_utc_offset() {
fn test_datetime_parse_unix_timestamp_from_bytes_as_naive() {
let time = DateTime::parse_bytes_with_config(
"1689102037.5586429".as_bytes(),
&(TimeConfigBuilder::new().unix_timestamp_offset(None).build()),
&(TimeConfigBuilder::new()
.unix_timestamp_offset(None)
.microseconds_precision_overflow_behavior(MicrosecondsPrecisionOverflowBehavior::Truncate)
.build()),
)
.unwrap();
assert_eq!(time.to_string(), "2023-07-11T19:00:37.558643");
Expand Down

0 comments on commit 11d6365

Please sign in to comment.