From cab348d2f6d02ad4327af9d0a8de9550db653aa8 Mon Sep 17 00:00:00 2001 From: Devin Jeanpierre Date: Fri, 11 Oct 2024 15:30:32 -0700 Subject: [PATCH 1/5] Remove unnecessary use of `unsafe` by reusing existing code. --- arrow-buffer/src/builder/offset.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arrow-buffer/src/builder/offset.rs b/arrow-buffer/src/builder/offset.rs index 1ef0e3170c96..a51ca5f01d76 100644 --- a/arrow-buffer/src/builder/offset.rs +++ b/arrow-buffer/src/builder/offset.rs @@ -70,8 +70,11 @@ impl OffsetBufferBuilder { /// /// Panics if offsets overflow `O` pub fn finish_cloned(&self) -> OffsetBuffer { - O::from_usize(self.last_offset).expect("overflow"); - unsafe { OffsetBuffer::new_unchecked(self.offsets.clone().into()) } + let cloned = Self { + offsets: self.offsets.clone(), + last_offset: self.last_offset, + }; + cloned.finish() } } From 7d23f7e801ea21c55ca139721f934cb0b196e2e5 Mon Sep 17 00:00:00 2001 From: Devin Jeanpierre Date: Fri, 11 Oct 2024 15:38:47 -0700 Subject: [PATCH 2/5] Remove unnecessarily scary use of MaybeUninit. The MaybeUninit doesn't ever hold any uninitialized bytes, but if it _did_, then this would have been UB! Because the write can write fewer than 8 bytes, it would leave the remaining bytes uninitialized. Since the `MaybeUninit` is never uninitialized, we can remove the use of `MaybeUninit`, and just use `u64`. Then it becomes clear that the only thing for the unsafe block is the `copy_nonoverlapping`. --- arrow-buffer/src/util/bit_mask.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arrow-buffer/src/util/bit_mask.rs b/arrow-buffer/src/util/bit_mask.rs index d4c2fa4744e1..0c95acd12eb1 100644 --- a/arrow-buffer/src/util/bit_mask.rs +++ b/arrow-buffer/src/util/bit_mask.rs @@ -131,12 +131,12 @@ unsafe fn set_upto_64bits( #[inline] unsafe fn read_bytes_to_u64(data: &[u8], offset: usize, count: usize) -> u64 { debug_assert!(count <= 8); - let mut tmp = std::mem::MaybeUninit::::new(0); + let mut tmp : u64 = 0; let src = data.as_ptr().add(offset); unsafe { - std::ptr::copy_nonoverlapping(src, tmp.as_mut_ptr() as *mut u8, count); - tmp.assume_init() + std::ptr::copy_nonoverlapping(src, &mut tmp as *mut _ as *mut u8, count); } + tmp } /// # Safety From c6201c700f6aa9ff3197e25d78bdeb919a90863c Mon Sep 17 00:00:00 2001 From: Devin Jeanpierre Date: Fri, 11 Oct 2024 15:42:03 -0700 Subject: [PATCH 3/5] Give read_bytes_to_u64 a more precise safety comment, matching write_u64_bytes. --- arrow-buffer/src/util/bit_mask.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arrow-buffer/src/util/bit_mask.rs b/arrow-buffer/src/util/bit_mask.rs index 0c95acd12eb1..9de260ceb387 100644 --- a/arrow-buffer/src/util/bit_mask.rs +++ b/arrow-buffer/src/util/bit_mask.rs @@ -127,7 +127,7 @@ unsafe fn set_upto_64bits( } /// # Safety -/// The caller must ensure all arguments are within the valid range. +/// The caller must ensure `data` has `offset..(offset + 8)` range #[inline] unsafe fn read_bytes_to_u64(data: &[u8], offset: usize, count: usize) -> u64 { debug_assert!(count <= 8); From c1c8bec68d5c253e9ac355c9afb5ca0c5ce6558c Mon Sep 17 00:00:00 2001 From: Devin Jeanpierre Date: Mon, 14 Oct 2024 12:38:38 -0700 Subject: [PATCH 4/5] Push `count <= 8` precondition into doc comment. The alternative would be to actually convert the `debug_assert` to an `assert`, or clamp to 8, but both of these could conceivably have a performance impact, so in the interest of being a pure win, with no downsides, that change is left to a future editor. --- arrow-buffer/src/util/bit_mask.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arrow-buffer/src/util/bit_mask.rs b/arrow-buffer/src/util/bit_mask.rs index 9de260ceb387..7591f33a42e7 100644 --- a/arrow-buffer/src/util/bit_mask.rs +++ b/arrow-buffer/src/util/bit_mask.rs @@ -127,7 +127,7 @@ unsafe fn set_upto_64bits( } /// # Safety -/// The caller must ensure `data` has `offset..(offset + 8)` range +/// The caller must ensure `data` has `offset..(offset + 8)` range, and `count <= 8`. #[inline] unsafe fn read_bytes_to_u64(data: &[u8], offset: usize, count: usize) -> u64 { debug_assert!(count <= 8); From 8abab9a46ae42bc15ee5dfbb2240d2564f690b02 Mon Sep 17 00:00:00 2001 From: Devin Jeanpierre Date: Mon, 14 Oct 2024 13:09:00 -0700 Subject: [PATCH 5/5] cargo fmt I hear git has an `absorb` extension, but I don't have it. :( --- arrow-buffer/src/util/bit_mask.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arrow-buffer/src/util/bit_mask.rs b/arrow-buffer/src/util/bit_mask.rs index 7591f33a42e7..83c395db8c03 100644 --- a/arrow-buffer/src/util/bit_mask.rs +++ b/arrow-buffer/src/util/bit_mask.rs @@ -131,7 +131,7 @@ unsafe fn set_upto_64bits( #[inline] unsafe fn read_bytes_to_u64(data: &[u8], offset: usize, count: usize) -> u64 { debug_assert!(count <= 8); - let mut tmp : u64 = 0; + let mut tmp: u64 = 0; let src = data.as_ptr().add(offset); unsafe { std::ptr::copy_nonoverlapping(src, &mut tmp as *mut _ as *mut u8, count);