From 325ee0b47ddaa24a18949532786b0df526badc90 Mon Sep 17 00:00:00 2001 From: Emil Ernerfeldt Date: Fri, 22 Nov 2024 10:24:05 +0100 Subject: [PATCH] Update `re_arrow2` to a version using `half::f16` (#8199) * See https://github.com/rerun-io/re_arrow2/pull/12 --- Cargo.lock | 5 +- Cargo.toml | 4 +- .../re_types_builder/src/codegen/rust/api.rs | 4 +- .../re_types/src/datatypes/tensor_buffer.rs | 4 +- .../re_types/src/datatypes/tensor_data_ext.rs | 94 +------------------ crates/store/re_types/src/tensor_data.rs | 2 +- .../src/testing/datatypes/affix_fuzzer21.rs | 7 +- crates/store/re_types/tests/types/fuzzy.rs | 3 +- crates/store/re_types_core/Cargo.toml | 1 + crates/store/re_types_core/src/size_bytes.rs | 2 +- 10 files changed, 18 insertions(+), 108 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 6a11c06acdda..4409be78b369 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5213,8 +5213,7 @@ dependencies = [ [[package]] name = "re_arrow2" version = "0.17.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "787fa1df3020f018e02c1f957edfc6890a73372444de397c36011cda61c9b489" +source = "git+https://github.com/rerun-io/re_arrow2?rev=79ba149e823c88c1baa770a33c6ea8b6244d0597#79ba149e823c88c1baa770a33c6ea8b6244d0597" dependencies = [ "ahash", "arrow-array", @@ -5230,6 +5229,7 @@ dependencies = [ "ethnum", "foreign_vec", "getrandom", + "half", "hash_hasher", "hashbrown 0.14.5", "num-traits", @@ -6347,6 +6347,7 @@ dependencies = [ "bytemuck", "criterion", "document-features", + "half", "itertools 0.13.0", "nohash-hasher", "once_cell", diff --git a/Cargo.toml b/Cargo.toml index b03806b0a3fc..ab4a9697a76f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -582,7 +582,7 @@ egui_tiles = { git = "https://github.com/rerun-io/egui_tiles", rev = "48e0ef5664 # walkers = { git = "https://github.com/rerun-io/walkers", rev = "8939cceb3fa49ca8648ee16fe1d8432f5ab0bdcc" } # https://github.com/podusowski/walkers/pull/222 # commit on `rerun-io/re_arrow2` `main` branch -# https://github.com/rerun-io/re_arrow2/commit/e4717d6debc6d4474ec10db8f629f823f57bad07 -# re_arrow2 = { git = "https://github.com/rerun-io/re_arrow2", rev = "e4717d6debc6d4474ec10db8f629f823f57bad07" } +# https://github.com/rerun-io/re_arrow2/commit/79ba149e823c88c1baa770a33c6ea8b6244d0597 +re_arrow2 = { git = "https://github.com/rerun-io/re_arrow2", rev = "79ba149e823c88c1baa770a33c6ea8b6244d0597" } # dav1d = { path = "/home/cmc/dev/rerun-io/rav1d", package = "re_rav1d", version = "0.1.1" } diff --git a/crates/build/re_types_builder/src/codegen/rust/api.rs b/crates/build/re_types_builder/src/codegen/rust/api.rs index 99feda813e40..28510ccadcbe 100644 --- a/crates/build/re_types_builder/src/codegen/rust/api.rs +++ b/crates/build/re_types_builder/src/codegen/rust/api.rs @@ -751,7 +751,7 @@ impl quote::ToTokens for TypeTokenizer<'_> { Type::Int32 => quote!(i32), Type::Int64 => quote!(i64), Type::Bool => quote!(bool), - Type::Float16 => quote!(arrow2::types::f16), + Type::Float16 => quote!(half::f16), Type::Float32 => quote!(f32), Type::Float64 => quote!(f64), Type::String => quote!(::re_types_core::ArrowString), @@ -789,7 +789,7 @@ impl quote::ToTokens for &ElementType { ElementType::Int32 => quote!(i32), ElementType::Int64 => quote!(i64), ElementType::Bool => quote!(bool), - ElementType::Float16 => quote!(arrow2::types::f16), + ElementType::Float16 => quote!(half::f16), ElementType::Float32 => quote!(f32), ElementType::Float64 => quote!(f64), ElementType::String => quote!(::re_types_core::ArrowString), diff --git a/crates/store/re_types/src/datatypes/tensor_buffer.rs b/crates/store/re_types/src/datatypes/tensor_buffer.rs index ac2a9ea6a8d2..cf18d7717817 100644 --- a/crates/store/re_types/src/datatypes/tensor_buffer.rs +++ b/crates/store/re_types/src/datatypes/tensor_buffer.rs @@ -48,7 +48,7 @@ pub enum TensorBuffer { I64(::re_types_core::ArrowBuffer), /// 16bit IEEE-754 floating point, also known as `half`. - F16(::re_types_core::ArrowBuffer), + F16(::re_types_core::ArrowBuffer), /// 32bit IEEE-754 floating point, also known as `float` or `single`. F32(::re_types_core::ArrowBuffer), @@ -86,7 +86,7 @@ impl ::re_types_core::SizeBytes for TensorBuffer { && <::re_types_core::ArrowBuffer>::is_pod() && <::re_types_core::ArrowBuffer>::is_pod() && <::re_types_core::ArrowBuffer>::is_pod() - && <::re_types_core::ArrowBuffer>::is_pod() + && <::re_types_core::ArrowBuffer>::is_pod() && <::re_types_core::ArrowBuffer>::is_pod() && <::re_types_core::ArrowBuffer>::is_pod() } diff --git a/crates/store/re_types/src/datatypes/tensor_data_ext.rs b/crates/store/re_types/src/datatypes/tensor_data_ext.rs index d9cf2ff39cd0..bca0ddde0c2d 100644 --- a/crates/store/re_types/src/datatypes/tensor_data_ext.rs +++ b/crates/store/re_types/src/datatypes/tensor_data_ext.rs @@ -233,7 +233,7 @@ tensor_type!(i16, I16); tensor_type!(i32, I32); tensor_type!(i64, I64); -tensor_type!(arrow2::types::f16, F16); +tensor_type!(half::f16, F16); tensor_type!(f32, F32); tensor_type!(f64, F64); @@ -256,98 +256,6 @@ impl<'a> TryFrom<&'a TensorData> for ::ndarray::ArrayViewD<'a, u8> { } } -// Manual expansion of tensor_type! macro for `half::f16` types. We need to do this -// because arrow uses its own half type. The two use the same underlying representation -// but are still distinct types. `half::f16`, however, is more full-featured and -// generally a better choice to use when converting to ndarray. -// ========================================== -// TODO(jleibs): would be nice to support this with the macro definition as well -// but the bytemuck casts add a bit of complexity here. -impl<'a> TryFrom<&'a TensorData> for ::ndarray::ArrayViewD<'a, half::f16> { - type Error = TensorCastError; - - fn try_from(value: &'a TensorData) -> Result { - let shape: Vec<_> = value.shape.iter().map(|d| d.size as usize).collect(); - if let TensorBuffer::F16(data) = &value.buffer { - ndarray::ArrayViewD::from_shape(shape, bytemuck::cast_slice(data.as_slice())) - .map_err(|err| TensorCastError::BadTensorShape { source: err }) - } else { - Err(TensorCastError::TypeMismatch) - } - } -} - -impl<'a, D: ::ndarray::Dimension> TryFrom<::ndarray::ArrayView<'a, half::f16, D>> for TensorData { - type Error = TensorCastError; - - fn try_from(view: ::ndarray::ArrayView<'a, half::f16, D>) -> Result { - let shape = view - .shape() - .iter() - .map(|dim| TensorDimension { - size: *dim as u64, - name: None, - }) - .collect(); - match view.to_slice() { - Some(slice) => Ok(Self { - shape, - buffer: TensorBuffer::F16(Vec::from(bytemuck::cast_slice(slice)).into()), - }), - None => Ok(Self { - shape, - buffer: TensorBuffer::F16( - view.iter() - .map(|f| arrow2::types::f16::from_bits(f.to_bits())) - .collect::>() - .into(), - ), - }), - } - } -} - -impl TryFrom<::ndarray::Array> for TensorData { - type Error = TensorCastError; - - fn try_from(value: ndarray::Array) -> Result { - let shape = value - .shape() - .iter() - .map(|dim| TensorDimension { - size: *dim as u64, - name: None, - }) - .collect(); - if value.is_standard_layout() { - let (vec, offset) = value.into_raw_vec_and_offset(); - // into_raw_vec_and_offset() guarantees that the logical element order (.iter()) matches the internal - // storage order in the returned vector if the array is in standard layout. - let vec_slice = if let Some(offset) = offset { - &vec[offset..] - } else { - debug_assert!(vec.is_empty()); - &vec - }; - Ok(Self { - shape, - buffer: TensorBuffer::F16(Vec::from(bytemuck::cast_slice(vec_slice)).into()), - }) - } else { - Ok(Self { - shape, - buffer: TensorBuffer::F16( - value - .iter() - .map(|f| arrow2::types::f16::from_bits(f.to_bits())) - .collect::>() - .into(), - ), - }) - } - } -} - // ---------------------------------------------------------------------------- #[cfg(feature = "image")] diff --git a/crates/store/re_types/src/tensor_data.rs b/crates/store/re_types/src/tensor_data.rs index 08b0d2d5b67e..bc55783f9e33 100644 --- a/crates/store/re_types/src/tensor_data.rs +++ b/crates/store/re_types/src/tensor_data.rs @@ -292,7 +292,7 @@ pub enum TensorElement { /// /// Uses the standard IEEE 754-2008 binary16 format. /// Set . - F16(arrow2::types::f16), + F16(half::f16), /// 32-bit floating point number. F32(f32), diff --git a/crates/store/re_types/src/testing/datatypes/affix_fuzzer21.rs b/crates/store/re_types/src/testing/datatypes/affix_fuzzer21.rs index acec414caa74..37aef1bbab50 100644 --- a/crates/store/re_types/src/testing/datatypes/affix_fuzzer21.rs +++ b/crates/store/re_types/src/testing/datatypes/affix_fuzzer21.rs @@ -20,8 +20,8 @@ use ::re_types_core::{DeserializationError, DeserializationResult}; #[derive(Clone, Debug, Default, PartialEq)] pub struct AffixFuzzer21 { - pub single_half: arrow2::types::f16, - pub many_halves: ::re_types_core::ArrowBuffer, + pub single_half: half::f16, + pub many_halves: ::re_types_core::ArrowBuffer, } impl ::re_types_core::SizeBytes for AffixFuzzer21 { @@ -32,8 +32,7 @@ impl ::re_types_core::SizeBytes for AffixFuzzer21 { #[inline] fn is_pod() -> bool { - ::is_pod() - && <::re_types_core::ArrowBuffer>::is_pod() + ::is_pod() && <::re_types_core::ArrowBuffer>::is_pod() } } diff --git a/crates/store/re_types/tests/types/fuzzy.rs b/crates/store/re_types/tests/types/fuzzy.rs index 4ea2cba3c072..ae2fac91930e 100644 --- a/crates/store/re_types/tests/types/fuzzy.rs +++ b/crates/store/re_types/tests/types/fuzzy.rs @@ -2,7 +2,6 @@ use std::collections::HashMap; -use arrow2::types::f16; use re_types::{ testing::{ archetypes::{AffixFuzzer1, AffixFuzzer2, AffixFuzzer3, AffixFuzzer4}, @@ -11,6 +10,8 @@ use re_types::{ Archetype as _, AsComponents as _, }; +use half::f16; + use crate::util; #[test] diff --git a/crates/store/re_types_core/Cargo.toml b/crates/store/re_types_core/Cargo.toml index 5be97ce1e10e..8545e2421940 100644 --- a/crates/store/re_types_core/Cargo.toml +++ b/crates/store/re_types_core/Cargo.toml @@ -52,6 +52,7 @@ arrow2 = { workspace = true, features = [ backtrace.workspace = true bytemuck.workspace = true document-features.workspace = true +half.workspace = true itertools.workspace = true nohash-hasher.workspace = true once_cell.workspace = true diff --git a/crates/store/re_types_core/src/size_bytes.rs b/crates/store/re_types_core/src/size_bytes.rs index 277dec0c1189..3c821883eb08 100644 --- a/crates/store/re_types_core/src/size_bytes.rs +++ b/crates/store/re_types_core/src/size_bytes.rs @@ -212,7 +212,7 @@ macro_rules! impl_size_bytes_pod { } impl_size_bytes_pod!(u8, u16, u32, u64, u128, i8, i16, i32, i64, i128, bool, f32, f64); -impl_size_bytes_pod!(arrow2::types::f16); +impl_size_bytes_pod!(half::f16); impl SizeBytes for (T, U) where