Skip to content

Commit

Permalink
Update re_arrow2 to a version using half::f16 (#8199)
Browse files Browse the repository at this point in the history
  • Loading branch information
emilk authored Nov 22, 2024
1 parent f93d74c commit 325ee0b
Show file tree
Hide file tree
Showing 10 changed files with 18 additions and 108 deletions.
5 changes: 3 additions & 2 deletions Cargo.lock
Original file line number Diff line number Diff line change
Expand Up @@ -5213,8 +5213,7 @@ dependencies = [
[[package]]
name = "re_arrow2"
version = "0.17.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "787fa1df3020f018e02c1f957edfc6890a73372444de397c36011cda61c9b489"
source = "git+https://github.com/rerun-io/re_arrow2?rev=79ba149e823c88c1baa770a33c6ea8b6244d0597#79ba149e823c88c1baa770a33c6ea8b6244d0597"
dependencies = [
"ahash",
"arrow-array",
Expand All @@ -5230,6 +5229,7 @@ dependencies = [
"ethnum",
"foreign_vec",
"getrandom",
"half",
"hash_hasher",
"hashbrown 0.14.5",
"num-traits",
Expand Down Expand Up @@ -6347,6 +6347,7 @@ dependencies = [
"bytemuck",
"criterion",
"document-features",
"half",
"itertools 0.13.0",
"nohash-hasher",
"once_cell",
Expand Down
4 changes: 2 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -582,7 +582,7 @@ egui_tiles = { git = "https://github.com/rerun-io/egui_tiles", rev = "48e0ef5664
# walkers = { git = "https://github.com/rerun-io/walkers", rev = "8939cceb3fa49ca8648ee16fe1d8432f5ab0bdcc" } # https://github.com/podusowski/walkers/pull/222

# commit on `rerun-io/re_arrow2` `main` branch
# https://github.com/rerun-io/re_arrow2/commit/e4717d6debc6d4474ec10db8f629f823f57bad07
# re_arrow2 = { git = "https://github.com/rerun-io/re_arrow2", rev = "e4717d6debc6d4474ec10db8f629f823f57bad07" }
# https://github.com/rerun-io/re_arrow2/commit/79ba149e823c88c1baa770a33c6ea8b6244d0597
re_arrow2 = { git = "https://github.com/rerun-io/re_arrow2", rev = "79ba149e823c88c1baa770a33c6ea8b6244d0597" }

# dav1d = { path = "/home/cmc/dev/rerun-io/rav1d", package = "re_rav1d", version = "0.1.1" }
4 changes: 2 additions & 2 deletions crates/build/re_types_builder/src/codegen/rust/api.rs
Original file line number Diff line number Diff line change
Expand Up @@ -751,7 +751,7 @@ impl quote::ToTokens for TypeTokenizer<'_> {
Type::Int32 => quote!(i32),
Type::Int64 => quote!(i64),
Type::Bool => quote!(bool),
Type::Float16 => quote!(arrow2::types::f16),
Type::Float16 => quote!(half::f16),
Type::Float32 => quote!(f32),
Type::Float64 => quote!(f64),
Type::String => quote!(::re_types_core::ArrowString),
Expand Down Expand Up @@ -789,7 +789,7 @@ impl quote::ToTokens for &ElementType {
ElementType::Int32 => quote!(i32),
ElementType::Int64 => quote!(i64),
ElementType::Bool => quote!(bool),
ElementType::Float16 => quote!(arrow2::types::f16),
ElementType::Float16 => quote!(half::f16),
ElementType::Float32 => quote!(f32),
ElementType::Float64 => quote!(f64),
ElementType::String => quote!(::re_types_core::ArrowString),
Expand Down
4 changes: 2 additions & 2 deletions crates/store/re_types/src/datatypes/tensor_buffer.rs

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

94 changes: 1 addition & 93 deletions crates/store/re_types/src/datatypes/tensor_data_ext.rs
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,7 @@ tensor_type!(i16, I16);
tensor_type!(i32, I32);
tensor_type!(i64, I64);

tensor_type!(arrow2::types::f16, F16);
tensor_type!(half::f16, F16);

tensor_type!(f32, F32);
tensor_type!(f64, F64);
Expand All @@ -256,98 +256,6 @@ impl<'a> TryFrom<&'a TensorData> for ::ndarray::ArrayViewD<'a, u8> {
}
}

// Manual expansion of tensor_type! macro for `half::f16` types. We need to do this
// because arrow uses its own half type. The two use the same underlying representation
// but are still distinct types. `half::f16`, however, is more full-featured and
// generally a better choice to use when converting to ndarray.
// ==========================================
// TODO(jleibs): would be nice to support this with the macro definition as well
// but the bytemuck casts add a bit of complexity here.
impl<'a> TryFrom<&'a TensorData> for ::ndarray::ArrayViewD<'a, half::f16> {
type Error = TensorCastError;

fn try_from(value: &'a TensorData) -> Result<Self, Self::Error> {
let shape: Vec<_> = value.shape.iter().map(|d| d.size as usize).collect();
if let TensorBuffer::F16(data) = &value.buffer {
ndarray::ArrayViewD::from_shape(shape, bytemuck::cast_slice(data.as_slice()))
.map_err(|err| TensorCastError::BadTensorShape { source: err })
} else {
Err(TensorCastError::TypeMismatch)
}
}
}

impl<'a, D: ::ndarray::Dimension> TryFrom<::ndarray::ArrayView<'a, half::f16, D>> for TensorData {
type Error = TensorCastError;

fn try_from(view: ::ndarray::ArrayView<'a, half::f16, D>) -> Result<Self, Self::Error> {
let shape = view
.shape()
.iter()
.map(|dim| TensorDimension {
size: *dim as u64,
name: None,
})
.collect();
match view.to_slice() {
Some(slice) => Ok(Self {
shape,
buffer: TensorBuffer::F16(Vec::from(bytemuck::cast_slice(slice)).into()),
}),
None => Ok(Self {
shape,
buffer: TensorBuffer::F16(
view.iter()
.map(|f| arrow2::types::f16::from_bits(f.to_bits()))
.collect::<Vec<_>>()
.into(),
),
}),
}
}
}

impl<D: ::ndarray::Dimension> TryFrom<::ndarray::Array<half::f16, D>> for TensorData {
type Error = TensorCastError;

fn try_from(value: ndarray::Array<half::f16, D>) -> Result<Self, Self::Error> {
let shape = value
.shape()
.iter()
.map(|dim| TensorDimension {
size: *dim as u64,
name: None,
})
.collect();
if value.is_standard_layout() {
let (vec, offset) = value.into_raw_vec_and_offset();
// into_raw_vec_and_offset() guarantees that the logical element order (.iter()) matches the internal
// storage order in the returned vector if the array is in standard layout.
let vec_slice = if let Some(offset) = offset {
&vec[offset..]
} else {
debug_assert!(vec.is_empty());
&vec
};
Ok(Self {
shape,
buffer: TensorBuffer::F16(Vec::from(bytemuck::cast_slice(vec_slice)).into()),
})
} else {
Ok(Self {
shape,
buffer: TensorBuffer::F16(
value
.iter()
.map(|f| arrow2::types::f16::from_bits(f.to_bits()))
.collect::<Vec<_>>()
.into(),
),
})
}
}
}

// ----------------------------------------------------------------------------

#[cfg(feature = "image")]
Expand Down
2 changes: 1 addition & 1 deletion crates/store/re_types/src/tensor_data.rs
Original file line number Diff line number Diff line change
Expand Up @@ -292,7 +292,7 @@ pub enum TensorElement {
///
/// Uses the standard IEEE 754-2008 binary16 format.
/// Set <https://en.wikipedia.org/wiki/Half-precision_floating-point_format>.
F16(arrow2::types::f16),
F16(half::f16),

/// 32-bit floating point number.
F32(f32),
Expand Down
7 changes: 3 additions & 4 deletions crates/store/re_types/src/testing/datatypes/affix_fuzzer21.rs

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion crates/store/re_types/tests/types/fuzzy.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

use std::collections::HashMap;

use arrow2::types::f16;
use re_types::{
testing::{
archetypes::{AffixFuzzer1, AffixFuzzer2, AffixFuzzer3, AffixFuzzer4},
Expand All @@ -11,6 +10,8 @@ use re_types::{
Archetype as _, AsComponents as _,
};

use half::f16;

use crate::util;

#[test]
Expand Down
1 change: 1 addition & 0 deletions crates/store/re_types_core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ arrow2 = { workspace = true, features = [
backtrace.workspace = true
bytemuck.workspace = true
document-features.workspace = true
half.workspace = true
itertools.workspace = true
nohash-hasher.workspace = true
once_cell.workspace = true
Expand Down
2 changes: 1 addition & 1 deletion crates/store/re_types_core/src/size_bytes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@ macro_rules! impl_size_bytes_pod {
}

impl_size_bytes_pod!(u8, u16, u32, u64, u128, i8, i16, i32, i64, i128, bool, f32, f64);
impl_size_bytes_pod!(arrow2::types::f16);
impl_size_bytes_pod!(half::f16);

impl<T, U> SizeBytes for (T, U)
where
Expand Down

0 comments on commit 325ee0b

Please sign in to comment.