diff --git a/Cargo.lock b/Cargo.lock index 4409be78b369..4cd0eddc231c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5990,6 +5990,7 @@ name = "re_space_view" version = "0.21.0-alpha.1+dev" dependencies = [ "ahash", + "arrow", "bytemuck", "egui", "glam", @@ -6080,6 +6081,7 @@ version = "0.21.0-alpha.1+dev" dependencies = [ "ahash", "anyhow", + "arrow", "bitflags 2.6.0", "bytemuck", "criterion", diff --git a/crates/store/re_chunk/Cargo.toml b/crates/store/re_chunk/Cargo.toml index de978a9eaeed..676a0d1ddbab 100644 --- a/crates/store/re_chunk/Cargo.toml +++ b/crates/store/re_chunk/Cargo.toml @@ -31,7 +31,7 @@ serde = [ ] ## Enable conversion to and from arrow-rs types -arrow = ["arrow2/arrow", "dep:arrow"] +arrow = ["arrow2/arrow"] [dependencies] @@ -49,6 +49,7 @@ re_types_core.workspace = true # External ahash.workspace = true anyhow.workspace = true +arrow.workspace = true arrow2 = { workspace = true, features = [ "compute_concatenate", "compute_filter", @@ -64,7 +65,6 @@ thiserror.workspace = true # Optional dependencies: serde = { workspace = true, optional = true, features = ["derive", "rc"] } -arrow = { workspace = true, optional = true } # Native dependencies: [target.'cfg(not(target_arch = "wasm32"))'.dependencies] diff --git a/crates/store/re_chunk/src/iter.rs b/crates/store/re_chunk/src/iter.rs index 647e311aefa8..5f4604aecadd 100644 --- a/crates/store/re_chunk/src/iter.rs +++ b/crates/store/re_chunk/src/iter.rs @@ -475,7 +475,7 @@ impl Chunk { /// * [`Self::iter_string`]. /// * [`Self::iter_component_arrays`]. /// * [`Self::iter_component`]. - pub fn iter_buffer( + pub fn iter_buffer( &self, component_name: &ComponentName, ) -> impl Iterator>> + '_ { diff --git a/crates/store/re_types_core/Cargo.toml b/crates/store/re_types_core/Cargo.toml index 8545e2421940..71b15bcd42a9 100644 --- a/crates/store/re_types_core/Cargo.toml +++ b/crates/store/re_types_core/Cargo.toml @@ -45,9 +45,9 @@ anyhow.workspace = true arrow.workspace = true arrow2 = { workspace = true, features = [ "arrow", + "compute_concatenate", "io_ipc", "io_print", - "compute_concatenate", ] } backtrace.workspace = true bytemuck.workspace = true diff --git a/crates/store/re_types_core/src/arrow_buffer.rs b/crates/store/re_types_core/src/arrow_buffer.rs index f386770efa15..a8ed54e38d8d 100644 --- a/crates/store/re_types_core/src/arrow_buffer.rs +++ b/crates/store/re_types_core/src/arrow_buffer.rs @@ -1,4 +1,6 @@ -/// Convenience-wrapper around an [`arrow2::buffer::Buffer`] that is known to contain a +use arrow::datatypes::ArrowNativeType; + +/// Convenience-wrapper around an [`arrow::buffer::ScalarBuffer`] that is known to contain a /// a primitive type. /// /// The [`ArrowBuffer`] object is internally reference-counted and can be @@ -6,30 +8,27 @@ /// This avoids some of the lifetime complexities that would otherwise /// arise from returning a `&[T]` directly, but is significantly more /// performant than doing the full allocation necessary to return a `Vec`. -#[derive(Clone, Debug, Default, PartialEq)] -pub struct ArrowBuffer(arrow2::buffer::Buffer); +#[derive(Clone, Debug, PartialEq)] +pub struct ArrowBuffer(arrow::buffer::ScalarBuffer); + +impl Default for ArrowBuffer { + fn default() -> Self { + Self(arrow::buffer::ScalarBuffer::::from(vec![])) + } +} -impl crate::SizeBytes for ArrowBuffer { +impl crate::SizeBytes for ArrowBuffer { #[inline] fn heap_size_bytes(&self) -> u64 { - let Self(buf) = self; - std::mem::size_of_val(buf.as_slice()) as _ + std::mem::size_of_val(self.as_slice()) as _ } } -impl ArrowBuffer { +impl ArrowBuffer { /// The number of instances of T stored in this buffer. #[inline] pub fn num_instances(&self) -> usize { - // WARNING: If you are touching this code, make sure you know what len() actually does. - // - // There is ambiguity in how arrow2 and arrow-rs talk about buffer lengths, including - // some incorrect documentation: https://github.com/jorgecarleitao/arrow2/issues/1430 - // - // Arrow2 `Buffer` is typed and `len()` is the number of units of `T`, but the documentation - // is currently incorrect. - // Arrow-rs `Buffer` is untyped and len() is in bytes, but `ScalarBuffer`s are in units of T. - self.0.len() + self.as_slice().len() } /// The number of bytes stored in this buffer @@ -45,7 +44,7 @@ impl ArrowBuffer { #[inline] pub fn as_slice(&self) -> &[T] { - self.0.as_slice() + self.0.as_ref() } /// Returns a new [`ArrowBuffer`] that is a slice of this buffer starting at `offset`. @@ -56,19 +55,19 @@ impl ArrowBuffer { /// Panics iff `offset + length` is larger than `len`. #[inline] pub fn sliced(self, range: std::ops::Range) -> Self { - Self(self.0.sliced(range.start, range.len())) + Self(self.0.slice(range.start, range.len())) } } -impl ArrowBuffer { +impl ArrowBuffer { /// Cast POD (plain-old-data) types to another POD type. /// /// For instance: cast a buffer of `u8` to a buffer of `f32`. #[inline] - pub fn cast_pod( + pub fn cast_pod( &self, ) -> Result, bytemuck::PodCastError> { - // TODO(emilk): when we switch from arrow2, see if we can make this function zero-copy + // TODO(#2978): when we switch from arrow2, see if we can make this function zero-copy re_tracing::profile_function!(); let target_slice: &[Target] = bytemuck::try_cast_slice(self.as_slice())?; Ok(ArrowBuffer::from(target_slice.to_vec())) @@ -84,17 +83,17 @@ impl ArrowBuffer { } } -impl Eq for ArrowBuffer {} +impl Eq for ArrowBuffer {} -impl ArrowBuffer { +impl ArrowBuffer { #[inline] pub fn to_vec(&self) -> Vec { - self.0.as_slice().to_vec() + self.as_slice().to_vec() } } -impl - From> for ArrowBuffer +impl From> + for ArrowBuffer { #[inline] fn from(value: arrow::buffer::ScalarBuffer) -> Self { @@ -102,38 +101,61 @@ impl } } -impl From> for ArrowBuffer { +impl From> + for ArrowBuffer +{ #[inline] - fn from(value: arrow2::buffer::Buffer) -> Self { - Self(value) + fn from(arrow2_buffer: arrow2::buffer::Buffer) -> Self { + let num_elements = arrow2_buffer.len(); + let arrow1_buffer = arrow::buffer::Buffer::from(arrow2_buffer); + let scalar_buffer = arrow::buffer::ScalarBuffer::new(arrow1_buffer, 0, num_elements); + Self(scalar_buffer) } } -impl From> for ArrowBuffer { +impl From> for ArrowBuffer { #[inline] fn from(value: Vec) -> Self { Self(value.into()) } } -impl From<&[T]> for ArrowBuffer { +impl From<&[T]> for ArrowBuffer { #[inline] fn from(value: &[T]) -> Self { - Self(value.iter().cloned().collect()) // TODO(emilk): avoid extra clones + Self(value.iter().copied().collect()) } } -impl FromIterator for ArrowBuffer { +impl FromIterator for ArrowBuffer { fn from_iter>(iter: I) -> Self { - Self(arrow2::buffer::Buffer::from_iter(iter)) + Self(arrow::buffer::ScalarBuffer::from_iter(iter)) } } -impl std::ops::Deref for ArrowBuffer { +impl std::ops::Deref for ArrowBuffer { type Target = [T]; #[inline] fn deref(&self) -> &[T] { - self.0.as_slice() + self.as_slice() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_arrow2_compatibility() { + let arrow2_buffer = arrow2::buffer::Buffer::::from(vec![1.0, 2.0, 3.0, 4.0, 5.0]); + assert_eq!(arrow2_buffer.as_slice(), &[1.0, 2.0, 3.0, 4.0, 5.0]); + + let sliced_arrow2_buffer = arrow2_buffer.sliced(1, 3); + assert_eq!(sliced_arrow2_buffer.as_slice(), &[2.0, 3.0, 4.0]); + + let arrow_buffer = ArrowBuffer::::from(sliced_arrow2_buffer); + assert_eq!(arrow_buffer.num_instances(), 3); + assert_eq!(arrow_buffer.as_slice(), &[2.0, 3.0, 4.0]); } } diff --git a/crates/viewer/re_space_view/Cargo.toml b/crates/viewer/re_space_view/Cargo.toml index 348b445d6b02..64993d91ecb7 100644 --- a/crates/viewer/re_space_view/Cargo.toml +++ b/crates/viewer/re_space_view/Cargo.toml @@ -36,6 +36,7 @@ re_viewer_context.workspace = true re_viewport_blueprint.workspace = true ahash.workspace = true +arrow.workspace = true bytemuck.workspace = true egui.workspace = true glam.workspace = true diff --git a/crates/viewer/re_space_view/src/results_ext.rs b/crates/viewer/re_space_view/src/results_ext.rs index 8f0da5517cec..792a5f87c740 100644 --- a/crates/viewer/re_space_view/src/results_ext.rs +++ b/crates/viewer/re_space_view/src/results_ext.rs @@ -500,7 +500,7 @@ impl<'a> HybridResultsChunkIter<'a> { /// Iterate as indexed buffers. /// /// See [`Chunk::iter_buffer`] for more information. - pub fn buffer( + pub fn buffer( &'a self, ) -> impl Iterator>)> + 'a { self.chunks.iter().flat_map(|chunk| { diff --git a/crates/viewer/re_space_view_spatial/Cargo.toml b/crates/viewer/re_space_view_spatial/Cargo.toml index f7334d6485e4..7dce5b2ea216 100644 --- a/crates/viewer/re_space_view_spatial/Cargo.toml +++ b/crates/viewer/re_space_view_spatial/Cargo.toml @@ -41,6 +41,7 @@ re_video.workspace = true re_viewer_context.workspace = true re_viewport_blueprint.workspace = true +arrow.workspace = true arrow2.workspace = true ahash.workspace = true anyhow.workspace = true diff --git a/crates/viewer/re_space_view_spatial/src/visualizers/utilities/entity_iterator.rs b/crates/viewer/re_space_view_spatial/src/visualizers/utilities/entity_iterator.rs index f0b7e04fc351..0e5351386774 100644 --- a/crates/viewer/re_space_view_spatial/src/visualizers/utilities/entity_iterator.rs +++ b/crates/viewer/re_space_view_spatial/src/visualizers/utilities/entity_iterator.rs @@ -227,7 +227,7 @@ pub fn iter_string<'a>( /// /// See [`Chunk::iter_buffer`] for more information. #[allow(unused)] -pub fn iter_buffer<'a, T: arrow2::types::NativeType>( +pub fn iter_buffer<'a, T: arrow::datatypes::ArrowNativeType + arrow2::types::NativeType>( chunks: &'a std::borrow::Cow<'a, [Chunk]>, timeline: Timeline, component_name: ComponentName,