diff --git a/Cargo.lock b/Cargo.lock index 2869429..59e6878 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4,7 +4,7 @@ version = 3 [[package]] name = "_arrow_json" -version = "0.8.0" +version = "0.9.0" dependencies = [ "arrow", "pyo3", @@ -14,7 +14,7 @@ dependencies = [ [[package]] name = "_pgpq" -version = "0.8.0" +version = "0.9.0" dependencies = [ "arrow", "arrow-schema", @@ -1303,7 +1303,7 @@ checksum = "9b2a4787296e9989611394c33f193f676704af1686e70b8f8033ab5ba9a35a94" [[package]] name = "pgpq" -version = "0.8.0" +version = "0.9.0" dependencies = [ "anyhow", "arrow", diff --git a/README.md b/README.md index 033fe12..4782b23 100644 --- a/README.md +++ b/README.md @@ -59,7 +59,7 @@ There's no reason we can't support struct data types as well. | Timestamp(Millisecond) | TIMESTAMP | | Timestamp(Second) | TIMESTAMP | | Date32 | DATE | -| Date64 | DATE | +| Date64 | Not supported | | Time32(Millisecond) | TIME | | Time32(Second) | TIME | | Time64(Nanosecond) | Not supported | diff --git a/core/Cargo.toml b/core/Cargo.toml index be6c11f..2e6d606 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "pgpq" -version = "0.8.0" +version = "0.9.0" edition = "2021" description = "Encode Apache Arrow `RecordBatch`es to Postgres' native binary format" license = "MIT" diff --git a/core/src/encoders.rs b/core/src/encoders.rs index b6709d8..b5bfa89 100644 --- a/core/src/encoders.rs +++ b/core/src/encoders.rs @@ -45,7 +45,6 @@ pub enum Encoder<'a> { TimestampMillisecond(TimestampMillisecondEncoder<'a>), TimestampSecond(TimestampSecondEncoder<'a>), Date32(Date32Encoder<'a>), - Date64(Date64Encoder<'a>), Time32Millisecond(Time32MillisecondEncoder<'a>), Time32Second(Time32SecondEncoder<'a>), Time64Microsecond(Time64MicrosecondEncoder<'a>), @@ -235,23 +234,54 @@ impl_encode!( BufMut::put_f64 ); -const ONE_S_TO_MS: i64 = 1_000; -const ONE_S_TO_US: i64 = 1_000_000; - -// Postgres starts counting on Jan 1st 2000 -// This is Jan 1st 2000 relative to the UNIX Epoch in us -const POSTGRES_BASE_TIMESTAMP_S: i64 = 946_684_800; -const POSTGRES_BASE_TIMESTAMP_MS: i64 = POSTGRES_BASE_TIMESTAMP_S * ONE_S_TO_MS; -const POSTGRES_BASE_TIMESTAMP_US: i64 = POSTGRES_BASE_TIMESTAMP_S * ONE_S_TO_US; +const PG_BASE_TIMESTAMP_OFFSET_US: i64 = 946_684_800_000_000; // microseconds between 2000-01-01 at midnight (Postgres's epoch) and 1970-01-01 (Arrow's / UNIX epoch) +const PG_BASE_TIMESTAMP_OFFSET_MS: i64 = 946_684_800_000; // milliseconds between 2000-01-01 at midnight (Postgres's epoch) and 1970-01-01 (Arrow's / UNIX epoch) +const PG_BASE_TIMESTAMP_OFFSET_S: i64 = 946_684_800; // seconds between 2000-01-01 at midnight (Postgres's epoch) and 1970-01-01 (Arrow's / UNIX epoch) + +#[inline(always)] +fn convert_arrow_timestamp_microseconds_to_pg_timestamp( + _field: &str, + timestamp_us: i64, +) -> Result { + // adjust the timestamp from microseconds since 1970-01-01 to microseconds since 2000-01-01 checking for overflows and underflow + timestamp_us + .checked_sub(PG_BASE_TIMESTAMP_OFFSET_US) + .ok_or_else(|| ErrorKind::Encode { + reason: "Underflow converting microseconds since 1970-01-01 (Arrow) to microseconds since 2000-01-01 (Postgres)".to_string(), + }) +} -const NUM_US_PER_MS: i64 = 1_000; -const NUM_US_PER_S: i64 = 1_000_000; +/// Convert from Arrow timestamps (milliseconds since 1970-01-01) to Postgres timestamps (microseconds since 2000-01-01) +#[inline(always)] +fn convert_arrow_timestamp_milliseconds_to_pg_timestamp( + _field: &str, + timestamp_ms: i64, +) -> Result { + let timestamp_ms = timestamp_ms.checked_sub(PG_BASE_TIMESTAMP_OFFSET_MS).ok_or_else(|| ErrorKind::Encode { + reason: "Underflow converting milliseconds since 1970-01-01 (Arrow) to microseconds since 2000-01-01 (Postgres)".to_string(), + })?; + // convert to microseconds, checking for overflows + timestamp_ms + .checked_mul(1_000) + .ok_or_else(|| ErrorKind::Encode { + reason: "Overflow converting milliseconds to microseconds".to_string(), + }) +} -#[inline] -fn adjust_timestamp(val: i64, offset: i64) -> Result { - val.sub_checked(offset).map_err(|_| ErrorKind::Encode { - reason: "Value too large to transmit".to_string(), - }) +#[inline(always)] +fn convert_arrow_timestamp_seconds_to_pg_timestamp( + _field: &str, + timestamp_s: i64, +) -> Result { + let timestamp_s = timestamp_s.checked_sub(PG_BASE_TIMESTAMP_OFFSET_S).ok_or_else(|| ErrorKind::Encode { + reason: "Underflow converting seconds since 1970-01-01 (Arrow) to microseconds since 2000-01-01 (Postgres)".to_string(), + })?; + // convert to microseconds, checking for overflows + timestamp_s + .checked_mul(1_000_000) + .ok_or_else(|| ErrorKind::Encode { + reason: "Overflow converting seconds to microseconds".to_string(), + }) } #[derive(Debug)] @@ -262,7 +292,7 @@ pub struct TimestampMicrosecondEncoder<'a> { impl_encode_fallible!( TimestampMicrosecondEncoder, type_size_fixed(PostgresType::Timestamp.size()), - |_: &str, v: i64| adjust_timestamp(v, POSTGRES_BASE_TIMESTAMP_US), + convert_arrow_timestamp_microseconds_to_pg_timestamp, BufMut::put_i64 ); @@ -274,15 +304,7 @@ pub struct TimestampMillisecondEncoder<'a> { impl_encode_fallible!( TimestampMillisecondEncoder, type_size_fixed(PostgresType::Timestamp.size()), - |_: &str, v: i64| { - let v = adjust_timestamp(v, POSTGRES_BASE_TIMESTAMP_MS)?; - match v.mul_checked(NUM_US_PER_MS) { - Ok(v) => Ok(v), - Err(_) => Err(ErrorKind::Encode { - reason: "Overflow encoding millisecond timestamp as microseconds".to_string(), - }), - } - }, + convert_arrow_timestamp_milliseconds_to_pg_timestamp, BufMut::put_i64 ); @@ -294,69 +316,80 @@ pub struct TimestampSecondEncoder<'a> { impl_encode_fallible!( TimestampSecondEncoder, type_size_fixed(PostgresType::Timestamp.size()), - |_: &str, v: i64| { - let v = adjust_timestamp(v, POSTGRES_BASE_TIMESTAMP_S)?; - match v.mul_checked(NUM_US_PER_S) { - Ok(v) => Ok(v), - Err(_) => Err(ErrorKind::Encode { - reason: "Overflow encoding seconds timestamp as microseconds".to_string(), - }), - } - }, + convert_arrow_timestamp_seconds_to_pg_timestamp, BufMut::put_i64 ); -#[derive(Debug)] -pub struct Date32Encoder<'a> { - arr: &'a arrow_array::Date32Array, +const PG_BASE_DATE_OFFSET: i32 = 10_957; // Number of days between PostgreSQL's epoch (2000-01-01) and Arrow's / UNIX epoch (1970-01-01) + +#[inline(always)] +fn convert_arrow_date32_to_postgres_date(_field: &str, date: i32) -> Result { + // adjust the date from days since 1970-01-01 to days since 2000-01-01 checking for overflows and underflow + date.checked_sub(PG_BASE_DATE_OFFSET).ok_or_else(|| ErrorKind::Encode { + reason: "Underflow converting days since 1970-01-01 (Arrow) to days since 2000-01-01 (Postgres)".to_string(), + }) } -impl_encode!(Date32Encoder, 4, identity, BufMut::put_i32); #[derive(Debug)] -pub struct Date64Encoder<'a> { - arr: &'a arrow_array::Date64Array, +pub struct Date32Encoder<'a> { + arr: &'a arrow_array::Date32Array, field: String, } impl_encode_fallible!( - Date64Encoder, - type_size_fixed(PostgresType::Date.size()), - |_field: &str, v: i64| { - i32::try_from(v).map_err(|_| ErrorKind::Encode { - reason: "overflow converting 64 bit date to 32 bit date".to_string(), - }) - }, + Date32Encoder, + 4, + convert_arrow_date32_to_postgres_date, BufMut::put_i32 ); +fn convert_arrow_time_seconds_to_postgres_time( + _field: &str, + time_s: i32, +) -> Result { + // convert to microseconds, checking for overflows + let time_s = time_s as i64; + time_s + .checked_mul(1_000_000) + .ok_or_else(|| ErrorKind::Encode { + reason: "Overflow converting seconds to microseconds".to_string(), + }) +} + +fn convert_arrow_time_milliseconds_to_postgres_time( + _field: &str, + time_ms: i32, +) -> Result { + // convert to microseconds, checking for overflows + let time_ms = time_ms as i64; + time_ms.checked_mul(1_000).ok_or_else(|| ErrorKind::Encode { + reason: "Overflow converting milliseconds to microseconds".to_string(), + }) +} + #[derive(Debug)] pub struct Time32MillisecondEncoder<'a> { arr: &'a arrow_array::Time32MillisecondArray, + field: String, } -impl_encode!( +impl_encode_fallible!( Time32MillisecondEncoder, type_size_fixed(PostgresType::Time.size()), - |v| (v as i64) * NUM_US_PER_MS, + convert_arrow_time_milliseconds_to_postgres_time, BufMut::put_i64 ); #[derive(Debug)] pub struct Time32SecondEncoder<'a> { arr: &'a arrow_array::Time32SecondArray, + field: String, } -impl_encode!( +impl_encode_fallible!( Time32SecondEncoder, type_size_fixed(PostgresType::Time.size()), - |v| (v as i64) * NUM_US_PER_S, + convert_arrow_time_seconds_to_postgres_time, BufMut::put_i64 ); -#[inline] -fn write_duration(buf: &mut BytesMut, duration_us: i64) { - buf.put_i64(duration_us); - buf.put_i32(0); // days - buf.put_i32(0); // months -} - #[derive(Debug)] pub struct Time64MicrosecondEncoder<'a> { arr: &'a arrow_array::Time64MicrosecondArray, @@ -369,6 +402,16 @@ pub struct DurationMicrosecondEncoder<'a> { } impl_encode!(DurationMicrosecondEncoder, 16, identity, write_duration); +const NUM_US_PER_MS: i64 = 1_000; +const NUM_US_PER_S: i64 = 1_000_000; + +#[inline] +fn write_duration(buf: &mut BytesMut, duration_us: i64) { + buf.put_i64(duration_us); + buf.put_i32(0); // days + buf.put_i32(0); // months +} + #[derive(Debug)] pub struct DurationMillisecondEncoder<'a> { arr: &'a arrow_array::DurationMillisecondArray, @@ -379,7 +422,7 @@ impl_encode_fallible!( type_size_fixed(PostgresType::Interval.size()), |_: &str, v: i64| v.mul_checked(NUM_US_PER_MS).map_err(|_| { ErrorKind::Encode { - reason: "Overflow encoding millisecond Duration as microseconds".to_string(), + reason: "Overflow encoding millisecond duration as microseconds".to_string(), } }), write_duration @@ -390,12 +433,13 @@ pub struct DurationSecondEncoder<'a> { arr: &'a arrow_array::DurationSecondArray, field: String, } + impl_encode_fallible!( DurationSecondEncoder, type_size_fixed(PostgresType::Interval.size()), |_: &str, v: i64| v.mul_checked(NUM_US_PER_S).map_err(|_| { ErrorKind::Encode { - reason: "Overflow encoding second Duration as microseconds".to_string(), + reason: "Overflow encoding seconds duration as microseconds".to_string(), } }), write_duration @@ -832,7 +876,7 @@ impl_encoder_builder_stateless_with_field!( pub struct Date32EncoderBuilder { field: Arc, } -impl_encoder_builder_stateless!( +impl_encoder_builder_stateless_with_field!( Date32EncoderBuilder, Encoder::Date32, Date32Encoder, @@ -840,23 +884,11 @@ impl_encoder_builder_stateless!( |dt: &DataType| matches!(dt, DataType::Date32) ); -#[derive(Debug, Clone, PartialEq)] -pub struct Date64EncoderBuilder { - field: Arc, -} -impl_encoder_builder_stateless_with_field!( - Date64EncoderBuilder, - Encoder::Date64, - Date64Encoder, - PostgresType::Date, - |dt: &DataType| matches!(dt, DataType::Date64) -); - #[derive(Debug, Clone, PartialEq)] pub struct Time32MillisecondEncoderBuilder { field: Arc, } -impl_encoder_builder_stateless!( +impl_encoder_builder_stateless_with_field!( Time32MillisecondEncoderBuilder, Encoder::Time32Millisecond, Time32MillisecondEncoder, @@ -868,7 +900,7 @@ impl_encoder_builder_stateless!( pub struct Time32SecondEncoderBuilder { field: Arc, } -impl_encoder_builder_stateless!( +impl_encoder_builder_stateless_with_field!( Time32SecondEncoderBuilder, Encoder::Time32Second, Time32SecondEncoder, @@ -1147,7 +1179,6 @@ pub enum EncoderBuilder { TimestampMillisecond(TimestampMillisecondEncoderBuilder), TimestampSecond(TimestampSecondEncoderBuilder), Date32(Date32EncoderBuilder), - Date64(Date64EncoderBuilder), Time32Millisecond(Time32MillisecondEncoderBuilder), Time32Second(Time32SecondEncoderBuilder), Time64Microsecond(Time64MicrosecondEncoderBuilder), @@ -1198,7 +1229,6 @@ impl EncoderBuilder { TimeUnit::Second => Self::TimestampSecond(TimestampSecondEncoderBuilder { field }), }, DataType::Date32 => Self::Date32(Date32EncoderBuilder { field }), - DataType::Date64 => Self::Date64(Date64EncoderBuilder { field }), DataType::Time32(unit) => match unit { TimeUnit::Millisecond => { Self::Time32Millisecond(Time32MillisecondEncoderBuilder { field }) diff --git a/core/tests/generate_test_data.py b/core/tests/generate_test_data.py index 0721173..4ee4ce0 100644 --- a/core/tests/generate_test_data.py +++ b/core/tests/generate_test_data.py @@ -26,6 +26,8 @@ class Col: time_ms = floor(time_s * 1e3) time_us = floor(time_s * 1e6) +date32 = 2**16 - 1 + duration_s = 60 duration_ms = floor(duration_s * 1e3) duration_us = floor(duration_s * 1e6) @@ -44,10 +46,6 @@ class Col: (pa.field("int16", pa.int16()), [-1, 0, 1]), (pa.field("int32", pa.int32()), [-1, 0, 1]), (pa.field("int64", pa.int64()), [-1, 0, 1]), - # ( - # pa.field("float16", pa.float16()), - # [np.float16(v) for v in [-1, 0, 1, float("inf")]], - # ), (pa.field("float32", pa.float32()), [-1, 0, 1, float("inf")]), (pa.field("float64", pa.float64()), [-1, 0, 1, float("inf")]), (pa.field("timestamp_us_notz", pa.timestamp("us", None)), [0, 1, timestamp_us]), @@ -68,6 +66,7 @@ class Col: (pa.field("time_s", pa.time32("s")), [0, 1, time_s]), (pa.field("time_ms", pa.time32("ms")), [0, 1, time_ms]), (pa.field("time_us", pa.time64("us")), [0, 1, time_us]), + (pa.field("date32", pa.date32()), [0, -date32, date32]), (pa.field("duration_us", pa.duration("us")), [0, 1, duration_us]), (pa.field("duration_ms", pa.duration("ms")), [0, 1, duration_ms]), (pa.field("duration_s", pa.duration("s")), [0, 1, duration_s]), diff --git a/core/tests/integration_tests.rs b/core/tests/integration_tests.rs index 8fa1cf1..9d94754 100644 --- a/core/tests/integration_tests.rs +++ b/core/tests/integration_tests.rs @@ -141,6 +141,11 @@ fn test_time_us() { run_test_case("time_us") } +#[test] +fn test_date32() { + run_test_case("date32") +} + #[test] fn test_duration_us() { run_test_case("duration_us") @@ -271,6 +276,11 @@ fn test_time_us_nullable() { run_test_case("time_us_nullable") } +#[test] +fn test_date32_nullable() { + run_test_case("date32_nullable") +} + #[test] fn test_duration_us_nullable() { run_test_case("duration_us_nullable") @@ -401,6 +411,11 @@ fn test_list_time_us() { run_test_case("list_time_us") } +#[test] +fn test_list_date32() { + run_test_case("list_date32") +} + #[test] fn test_list_duration_us() { run_test_case("list_duration_us") @@ -531,6 +546,11 @@ fn test_list_time_us_nullable() { run_test_case("list_time_us_nullable") } +#[test] +fn test_list_date32_nullable() { + run_test_case("list_date32_nullable") +} + #[test] fn test_list_duration_us_nullable() { run_test_case("list_duration_us_nullable") @@ -661,6 +681,11 @@ fn test_list_nullable_time_us() { run_test_case("list_nullable_time_us") } +#[test] +fn test_list_nullable_date32() { + run_test_case("list_nullable_date32") +} + #[test] fn test_list_nullable_duration_us() { run_test_case("list_nullable_duration_us") @@ -791,6 +816,11 @@ fn test_list_nullable_time_us_nullable() { run_test_case("list_nullable_time_us_nullable") } +#[test] +fn test_list_nullable_date32_nullable() { + run_test_case("list_nullable_date32_nullable") +} + #[test] fn test_list_nullable_duration_us_nullable() { run_test_case("list_nullable_duration_us_nullable") diff --git a/core/tests/snapshots/date32.bin b/core/tests/snapshots/date32.bin new file mode 100644 index 0000000..e59e164 Binary files /dev/null and b/core/tests/snapshots/date32.bin differ diff --git a/core/tests/snapshots/date32_nullable.bin b/core/tests/snapshots/date32_nullable.bin new file mode 100644 index 0000000..f499307 Binary files /dev/null and b/core/tests/snapshots/date32_nullable.bin differ diff --git a/core/tests/snapshots/large_binary_nullable.bin b/core/tests/snapshots/large_binary_nullable.bin index 8eb5791..5adc39f 100644 Binary files a/core/tests/snapshots/large_binary_nullable.bin and b/core/tests/snapshots/large_binary_nullable.bin differ diff --git a/core/tests/snapshots/large_string.bin b/core/tests/snapshots/large_string.bin index 47b3b9f..553a681 100644 Binary files a/core/tests/snapshots/large_string.bin and b/core/tests/snapshots/large_string.bin differ diff --git a/core/tests/snapshots/large_string_nullable.bin b/core/tests/snapshots/large_string_nullable.bin index 8eb5791..5adc39f 100644 Binary files a/core/tests/snapshots/large_string_nullable.bin and b/core/tests/snapshots/large_string_nullable.bin differ diff --git a/core/tests/snapshots/list_date32.bin b/core/tests/snapshots/list_date32.bin new file mode 100644 index 0000000..4503410 Binary files /dev/null and b/core/tests/snapshots/list_date32.bin differ diff --git a/core/tests/snapshots/list_date32_nullable.bin b/core/tests/snapshots/list_date32_nullable.bin new file mode 100644 index 0000000..6819565 Binary files /dev/null and b/core/tests/snapshots/list_date32_nullable.bin differ diff --git a/core/tests/snapshots/list_large_binary.bin b/core/tests/snapshots/list_large_binary.bin index dd9e4c5..91269ec 100644 Binary files a/core/tests/snapshots/list_large_binary.bin and b/core/tests/snapshots/list_large_binary.bin differ diff --git a/core/tests/snapshots/list_large_binary_nullable.bin b/core/tests/snapshots/list_large_binary_nullable.bin index 2743c08..22105d8 100644 Binary files a/core/tests/snapshots/list_large_binary_nullable.bin and b/core/tests/snapshots/list_large_binary_nullable.bin differ diff --git a/core/tests/snapshots/list_large_string.bin b/core/tests/snapshots/list_large_string.bin index f289f1c..e4c9b5c 100644 Binary files a/core/tests/snapshots/list_large_string.bin and b/core/tests/snapshots/list_large_string.bin differ diff --git a/core/tests/snapshots/list_large_string_nullable.bin b/core/tests/snapshots/list_large_string_nullable.bin index 5a3c343..bb49903 100644 Binary files a/core/tests/snapshots/list_large_string_nullable.bin and b/core/tests/snapshots/list_large_string_nullable.bin differ diff --git a/core/tests/snapshots/list_nullable_date32.bin b/core/tests/snapshots/list_nullable_date32.bin new file mode 100644 index 0000000..9621ff3 Binary files /dev/null and b/core/tests/snapshots/list_nullable_date32.bin differ diff --git a/core/tests/snapshots/list_nullable_date32_nullable.bin b/core/tests/snapshots/list_nullable_date32_nullable.bin new file mode 100644 index 0000000..2ad4822 Binary files /dev/null and b/core/tests/snapshots/list_nullable_date32_nullable.bin differ diff --git a/core/tests/snapshots/list_nullable_large_binary.bin b/core/tests/snapshots/list_nullable_large_binary.bin index 2253904..fce5a32 100644 Binary files a/core/tests/snapshots/list_nullable_large_binary.bin and b/core/tests/snapshots/list_nullable_large_binary.bin differ diff --git a/core/tests/snapshots/list_nullable_large_binary_nullable.bin b/core/tests/snapshots/list_nullable_large_binary_nullable.bin index d932bd8..088e3a8 100644 Binary files a/core/tests/snapshots/list_nullable_large_binary_nullable.bin and b/core/tests/snapshots/list_nullable_large_binary_nullable.bin differ diff --git a/core/tests/snapshots/list_nullable_large_string.bin b/core/tests/snapshots/list_nullable_large_string.bin index e82a8ee..f2188dd 100644 Binary files a/core/tests/snapshots/list_nullable_large_string.bin and b/core/tests/snapshots/list_nullable_large_string.bin differ diff --git a/core/tests/snapshots/list_nullable_large_string_nullable.bin b/core/tests/snapshots/list_nullable_large_string_nullable.bin index 828da1d..fea9745 100644 Binary files a/core/tests/snapshots/list_nullable_large_string_nullable.bin and b/core/tests/snapshots/list_nullable_large_string_nullable.bin differ diff --git a/core/tests/testdata/date32.arrow b/core/tests/testdata/date32.arrow new file mode 100644 index 0000000..ffb6a14 Binary files /dev/null and b/core/tests/testdata/date32.arrow differ diff --git a/core/tests/testdata/date32_nullable.arrow b/core/tests/testdata/date32_nullable.arrow new file mode 100644 index 0000000..0f2d06e Binary files /dev/null and b/core/tests/testdata/date32_nullable.arrow differ diff --git a/core/tests/testdata/large_binary_nullable.arrow b/core/tests/testdata/large_binary_nullable.arrow index 67e3a8e..bf4f600 100644 Binary files a/core/tests/testdata/large_binary_nullable.arrow and b/core/tests/testdata/large_binary_nullable.arrow differ diff --git a/core/tests/testdata/large_string.arrow b/core/tests/testdata/large_string.arrow index e76ab07..92a907d 100644 Binary files a/core/tests/testdata/large_string.arrow and b/core/tests/testdata/large_string.arrow differ diff --git a/core/tests/testdata/large_string_nullable.arrow b/core/tests/testdata/large_string_nullable.arrow index 32e6cb8..32fdb4f 100644 Binary files a/core/tests/testdata/large_string_nullable.arrow and b/core/tests/testdata/large_string_nullable.arrow differ diff --git a/core/tests/testdata/list_date32.arrow b/core/tests/testdata/list_date32.arrow new file mode 100644 index 0000000..ffaa744 Binary files /dev/null and b/core/tests/testdata/list_date32.arrow differ diff --git a/core/tests/testdata/list_date32_nullable.arrow b/core/tests/testdata/list_date32_nullable.arrow new file mode 100644 index 0000000..aa682a5 Binary files /dev/null and b/core/tests/testdata/list_date32_nullable.arrow differ diff --git a/core/tests/testdata/list_large_binary.arrow b/core/tests/testdata/list_large_binary.arrow index 7015eab..c0ebbc4 100644 Binary files a/core/tests/testdata/list_large_binary.arrow and b/core/tests/testdata/list_large_binary.arrow differ diff --git a/core/tests/testdata/list_large_binary_nullable.arrow b/core/tests/testdata/list_large_binary_nullable.arrow index 04f4dfe..9df1d5e 100644 Binary files a/core/tests/testdata/list_large_binary_nullable.arrow and b/core/tests/testdata/list_large_binary_nullable.arrow differ diff --git a/core/tests/testdata/list_large_string.arrow b/core/tests/testdata/list_large_string.arrow index aa15114..cd872c8 100644 Binary files a/core/tests/testdata/list_large_string.arrow and b/core/tests/testdata/list_large_string.arrow differ diff --git a/core/tests/testdata/list_large_string_nullable.arrow b/core/tests/testdata/list_large_string_nullable.arrow index ea608fe..2cbfdd5 100644 Binary files a/core/tests/testdata/list_large_string_nullable.arrow and b/core/tests/testdata/list_large_string_nullable.arrow differ diff --git a/core/tests/testdata/list_nullable_date32.arrow b/core/tests/testdata/list_nullable_date32.arrow new file mode 100644 index 0000000..506835b Binary files /dev/null and b/core/tests/testdata/list_nullable_date32.arrow differ diff --git a/core/tests/testdata/list_nullable_date32_nullable.arrow b/core/tests/testdata/list_nullable_date32_nullable.arrow new file mode 100644 index 0000000..fa2925f Binary files /dev/null and b/core/tests/testdata/list_nullable_date32_nullable.arrow differ diff --git a/core/tests/testdata/list_nullable_large_binary.arrow b/core/tests/testdata/list_nullable_large_binary.arrow index 410d866..8ee0609 100644 Binary files a/core/tests/testdata/list_nullable_large_binary.arrow and b/core/tests/testdata/list_nullable_large_binary.arrow differ diff --git a/core/tests/testdata/list_nullable_large_binary_nullable.arrow b/core/tests/testdata/list_nullable_large_binary_nullable.arrow index f86c387..ece52b0 100644 Binary files a/core/tests/testdata/list_nullable_large_binary_nullable.arrow and b/core/tests/testdata/list_nullable_large_binary_nullable.arrow differ diff --git a/core/tests/testdata/list_nullable_large_string.arrow b/core/tests/testdata/list_nullable_large_string.arrow index 2cc33f9..1f92db1 100644 Binary files a/core/tests/testdata/list_nullable_large_string.arrow and b/core/tests/testdata/list_nullable_large_string.arrow differ diff --git a/core/tests/testdata/list_nullable_large_string_nullable.arrow b/core/tests/testdata/list_nullable_large_string_nullable.arrow index f85555d..d945ab2 100644 Binary files a/core/tests/testdata/list_nullable_large_string_nullable.arrow and b/core/tests/testdata/list_nullable_large_string_nullable.arrow differ diff --git a/json/Cargo.toml b/json/Cargo.toml index ed244b5..d69e11c 100644 --- a/json/Cargo.toml +++ b/json/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "_arrow_json" -version = "0.8.0" +version = "0.9.0" edition = "2021" readme = "README.md" license = "MIT" diff --git a/json/pyproject.toml b/json/pyproject.toml index d07858c..f103421 100644 --- a/json/pyproject.toml +++ b/json/pyproject.toml @@ -22,11 +22,11 @@ requires-python = ">=3.7" [project.optional-dependencies] test = [ - "pytest >=7.0.0", - "maturin >= 0.14.0", - "pre-commit>=2.16.0", - "pyarrow >= 11.0.0", - "polars >= 0.16.1", + "pytest>=7.0.0", + "maturin>= 1.0.0", + "pre-commit>= 2.16.0", + "pyarrow>= 11.0.0", + "polars>= 0.16.1", ] bench = [] @@ -36,7 +36,7 @@ documentation = "https://github.com/adriangb/pgpq/README.md" repository = "https://github.com/adriangb/pgpq" [build-system] -requires = ["maturin>=0.14.0,<15"] +requires = ["maturin>=1.0.0,<2"] build-backend = "maturin" [tool.maturin] diff --git a/py/Cargo.toml b/py/Cargo.toml index eb02300..da04f8d 100644 --- a/py/Cargo.toml +++ b/py/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "_pgpq" -version = "0.8.0" +version = "0.9.0" edition = "2021" readme = "README.md" license = "MIT" diff --git a/py/python/pgpq/_pgpq.pyi b/py/python/pgpq/_pgpq.pyi index 7dffe64..cd35ecd 100644 --- a/py/python/pgpq/_pgpq.pyi +++ b/py/python/pgpq/_pgpq.pyi @@ -140,9 +140,6 @@ class TimestampSecondEncoderBuilder: class Date32EncoderBuilder: def __init__(self, field: pyarrow.Field) -> None: ... -class Date64EncoderBuilder: - def __init__(self, field: pyarrow.Field) -> None: ... - class Time32MillisecondEncoderBuilder: def __init__(self, field: pyarrow.Field) -> None: ... @@ -211,7 +208,6 @@ EncoderBuilder = ( | TimestampMillisecondEncoderBuilder | TimestampSecondEncoderBuilder | Date32EncoderBuilder - | Date64EncoderBuilder | Time32MillisecondEncoderBuilder | Time32SecondEncoderBuilder | Time64MicrosecondEncoderBuilder diff --git a/py/python/pgpq/encoders.py b/py/python/pgpq/encoders.py index 46bebfc..381a36b 100644 --- a/py/python/pgpq/encoders.py +++ b/py/python/pgpq/encoders.py @@ -14,7 +14,6 @@ TimestampMillisecondEncoderBuilder, TimestampSecondEncoderBuilder, Date32EncoderBuilder, - Date64EncoderBuilder, Time32MillisecondEncoderBuilder, Time32SecondEncoderBuilder, Time64MicrosecondEncoderBuilder, @@ -45,7 +44,6 @@ "TimestampMillisecondEncoderBuilder", "TimestampSecondEncoderBuilder", "Date32EncoderBuilder", - "Date64EncoderBuilder", "Time32MillisecondEncoderBuilder", "Time32SecondEncoderBuilder", "Time64MicrosecondEncoderBuilder", diff --git a/py/src/encoders.rs b/py/src/encoders.rs index 62a04ce..1b14544 100644 --- a/py/src/encoders.rs +++ b/py/src/encoders.rs @@ -273,14 +273,6 @@ pub struct Date32EncoderBuilder { } impl_passthrough_encoder_builder!(Date32EncoderBuilder); -#[pyclass(module = "pgpq._pgpq")] -#[derive(Debug, Clone)] -pub struct Date64EncoderBuilder { - field: Py, - inner: pgpq::encoders::EncoderBuilder, -} -impl_passthrough_encoder_builder!(Date64EncoderBuilder); - #[pyclass(module = "pgpq._pgpq")] #[derive(Debug, Clone)] pub struct Time32MillisecondEncoderBuilder { @@ -491,7 +483,6 @@ pub enum EncoderBuilder { TimestampMillisecond(TimestampMillisecondEncoderBuilder), TimestampSecond(TimestampSecondEncoderBuilder), Date32(Date32EncoderBuilder), - Date64(Date64EncoderBuilder), Time32Millisecond(Time32MillisecondEncoderBuilder), Time32Second(Time32SecondEncoderBuilder), Time64Microsecond(Time64MicrosecondEncoderBuilder), @@ -524,7 +515,6 @@ impl crate::utils::PythonRepr for EncoderBuilder { EncoderBuilder::TimestampMillisecond(inner) => inner.py_repr(py), EncoderBuilder::TimestampSecond(inner) => inner.py_repr(py), EncoderBuilder::Date32(inner) => inner.py_repr(py), - EncoderBuilder::Date64(inner) => inner.py_repr(py), EncoderBuilder::Time32Millisecond(inner) => inner.py_repr(py), EncoderBuilder::Time32Second(inner) => inner.py_repr(py), EncoderBuilder::Time64Microsecond(inner) => inner.py_repr(py), @@ -644,12 +634,6 @@ impl EncoderBuilder { inner, }) } - pgpq::encoders::EncoderBuilder::Date64(_) => { - EncoderBuilder::Date64(Date64EncoderBuilder { - field: py_field.to_object(py), - inner, - }) - } pgpq::encoders::EncoderBuilder::Time32Millisecond(_) => { EncoderBuilder::Time32Millisecond(Time32MillisecondEncoderBuilder { field: py_field.to_object(py), @@ -837,13 +821,6 @@ impl From for EncoderBuilder { inner: value, }) } - pgpq::encoders::EncoderBuilder::Date64(inner) => { - let field = inner.field(); - EncoderBuilder::Date64(Date64EncoderBuilder { - field: field.to_pyarrow(py).unwrap(), - inner: value, - }) - } pgpq::encoders::EncoderBuilder::Time32Millisecond(inner) => { let field = inner.field(); EncoderBuilder::Time32Millisecond(Time32MillisecondEncoderBuilder { @@ -954,7 +931,6 @@ impl From for pgpq::encoders::EncoderBuilder { EncoderBuilder::TimestampMillisecond(inner) => inner.inner, EncoderBuilder::TimestampSecond(inner) => inner.inner, EncoderBuilder::Date32(inner) => inner.inner, - EncoderBuilder::Date64(inner) => inner.inner, EncoderBuilder::Time32Millisecond(inner) => inner.inner, EncoderBuilder::Time32Second(inner) => inner.inner, EncoderBuilder::Time64Microsecond(inner) => inner.inner, @@ -989,7 +965,6 @@ impl IntoPy for EncoderBuilder { EncoderBuilder::TimestampMillisecond(inner) => inner.into_py(py), EncoderBuilder::TimestampSecond(inner) => inner.into_py(py), EncoderBuilder::Date32(inner) => inner.into_py(py), - EncoderBuilder::Date64(inner) => inner.into_py(py), EncoderBuilder::Time32Millisecond(inner) => inner.into_py(py), EncoderBuilder::Time32Second(inner) => inner.into_py(py), EncoderBuilder::Time64Microsecond(inner) => inner.into_py(py), diff --git a/py/src/lib.rs b/py/src/lib.rs index b336c66..9a40ffc 100644 --- a/py/src/lib.rs +++ b/py/src/lib.rs @@ -107,7 +107,6 @@ fn _pgpq(_py: Python, m: &PyModule) -> PyResult<()> { m.add_class::()?; m.add_class::()?; m.add_class::()?; - m.add_class::()?; m.add_class::()?; m.add_class::()?; m.add_class::()?; diff --git a/py/src/template.py b/py/src/template.py index d8a15bd..cc6ec49 100644 --- a/py/src/template.py +++ b/py/src/template.py @@ -28,7 +28,6 @@ def __init__(self, field: pyarrow.Field) -> None: "TimestampMillisecondEncoderBuilder", "TimestampSecondEncoderBuilder", "Date32EncoderBuilder", - "Date64EncoderBuilder", "Time32MillisecondEncoderBuilder", "Time32SecondEncoderBuilder", "Time64MicrosecondEncoderBuilder", diff --git a/py/test_pgpq/test_pgpq.py b/py/test_pgpq/test_pgpq.py index dd44d04..a43a12f 100644 --- a/py/test_pgpq/test_pgpq.py +++ b/py/test_pgpq/test_pgpq.py @@ -48,9 +48,9 @@ def copy_buffer_and_get_rows( return rows -TESTCASES = [ - f.strip(".bin").split("/")[-1] for f in sorted(glob("core/tests/snapshots/*")) -] +TESTCASES = sorted( + [f.strip(".bin").split("/")[-1] for f in sorted(glob("core/tests/snapshots/*"))] +) @pytest.mark.parametrize(