From 074e0f82d412f475c0604f40c660a0ba163897de Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Fri, 13 Sep 2024 18:04:16 -0400 Subject: [PATCH 1/4] Implement set_bits fuzz test --- arrow-buffer/src/util/bit_mask.rs | 320 ++++++++++++++++++++---------- 1 file changed, 215 insertions(+), 105 deletions(-) diff --git a/arrow-buffer/src/util/bit_mask.rs b/arrow-buffer/src/util/bit_mask.rs index 8f81cb7d0469..218ac13c55f5 100644 --- a/arrow-buffer/src/util/bit_mask.rs +++ b/arrow-buffer/src/util/bit_mask.rs @@ -64,125 +64,235 @@ pub fn set_bits( #[cfg(test)] mod tests { use super::*; + use crate::bit_util::unset_bit; + use rand::prelude::StdRng; + use rand::{Fill, Rng, SeedableRng}; + use std::fmt::Display; #[test] fn test_set_bits_aligned() { - let mut destination: Vec = vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0]; - let source: &[u8] = &[ - 0b11100111, 0b10100101, 0b10011001, 0b11011011, 0b11101011, 0b11000011, 0b11100111, - 0b10100101, - ]; - - let destination_offset = 8; - let source_offset = 0; - - let len = 64; - - let expected_data: &[u8] = &[ - 0, 0b11100111, 0b10100101, 0b10011001, 0b11011011, 0b11101011, 0b11000011, 0b11100111, - 0b10100101, 0, - ]; - let expected_null_count = 24; - let result = set_bits( - destination.as_mut_slice(), - source, - destination_offset, - source_offset, - len, - ); - - assert_eq!(destination, expected_data); - assert_eq!(result, expected_null_count); + SetBitsTest { + write_data: vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + data: vec![ + 0b11100111, 0b10100101, 0b10011001, 0b11011011, 0b11101011, 0b11000011, 0b11100111, + 0b10100101, + ], + offset_write: 8, + offset_read: 0, + len: 64, + expected_data: vec![ + 0, 0b11100111, 0b10100101, 0b10011001, 0b11011011, 0b11101011, 0b11000011, + 0b11100111, 0b10100101, 0, + ], + expected_null_count: 24, + } + .verify(); } #[test] fn test_set_bits_unaligned_destination_start() { - let mut destination: Vec = vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0]; - let source: &[u8] = &[ - 0b11100111, 0b10100101, 0b10011001, 0b11011011, 0b11101011, 0b11000011, 0b11100111, - 0b10100101, - ]; - - let destination_offset = 3; - let source_offset = 0; - - let len = 64; - - let expected_data: &[u8] = &[ - 0b00111000, 0b00101111, 0b11001101, 0b11011100, 0b01011110, 0b00011111, 0b00111110, - 0b00101111, 0b00000101, 0b00000000, - ]; - let expected_null_count = 24; - let result = set_bits( - destination.as_mut_slice(), - source, - destination_offset, - source_offset, - len, - ); - - assert_eq!(destination, expected_data); - assert_eq!(result, expected_null_count); + SetBitsTest { + write_data: vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + data: vec![ + 0b11100111, 0b10100101, 0b10011001, 0b11011011, 0b11101011, 0b11000011, 0b11100111, + 0b10100101, + ], + offset_write: 3, + offset_read: 0, + len: 64, + expected_data: vec![ + 0b00111000, 0b00101111, 0b11001101, 0b11011100, 0b01011110, 0b00011111, 0b00111110, + 0b00101111, 0b00000101, 0b00000000, + ], + expected_null_count: 24, + } + .verify(); } #[test] fn test_set_bits_unaligned_destination_end() { - let mut destination: Vec = vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0]; - let source: &[u8] = &[ - 0b11100111, 0b10100101, 0b10011001, 0b11011011, 0b11101011, 0b11000011, 0b11100111, - 0b10100101, - ]; - - let destination_offset = 8; - let source_offset = 0; - - let len = 62; - - let expected_data: &[u8] = &[ - 0, 0b11100111, 0b10100101, 0b10011001, 0b11011011, 0b11101011, 0b11000011, 0b11100111, - 0b00100101, 0, - ]; - let expected_null_count = 23; - let result = set_bits( - destination.as_mut_slice(), - source, - destination_offset, - source_offset, - len, - ); - - assert_eq!(destination, expected_data); - assert_eq!(result, expected_null_count); + SetBitsTest { + write_data: vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + data: vec![ + 0b11100111, 0b10100101, 0b10011001, 0b11011011, 0b11101011, 0b11000011, 0b11100111, + 0b10100101, + ], + offset_write: 8, + offset_read: 0, + len: 62, + expected_data: vec![ + 0, 0b11100111, 0b10100101, 0b10011001, 0b11011011, 0b11101011, 0b11000011, + 0b11100111, 0b00100101, 0, + ], + expected_null_count: 23, + } + .verify(); } #[test] fn test_set_bits_unaligned() { - let mut destination: Vec = vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]; - let source: &[u8] = &[ - 0b11100111, 0b10100101, 0b10011001, 0b11011011, 0b11101011, 0b11000011, 0b11100111, - 0b10100101, 0b10011001, 0b11011011, 0b11101011, 0b11000011, 0b11100111, 0b10100101, - 0b10011001, 0b11011011, 0b11101011, 0b11000011, - ]; - - let destination_offset = 3; - let source_offset = 5; - - let len = 95; - - let expected_data: &[u8] = &[ - 0b01111000, 0b01101001, 0b11100110, 0b11110110, 0b11111010, 0b11110000, 0b01111001, - 0b01101001, 0b11100110, 0b11110110, 0b11111010, 0b11110000, 0b00000001, - ]; - let expected_null_count = 35; - let result = set_bits( - destination.as_mut_slice(), - source, - destination_offset, - source_offset, - len, - ); - - assert_eq!(destination, expected_data); - assert_eq!(result, expected_null_count); + SetBitsTest { + write_data: vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + data: vec![ + 0b11100111, 0b10100101, 0b10011001, 0b11011011, 0b11101011, 0b11000011, 0b11100111, + 0b10100101, 0b10011001, 0b11011011, 0b11101011, 0b11000011, 0b11100111, 0b10100101, + 0b10011001, 0b11011011, 0b11101011, 0b11000011, + ], + offset_write: 3, + offset_read: 5, + len: 95, + expected_data: vec![ + 0b01111000, 0b01101001, 0b11100110, 0b11110110, 0b11111010, 0b11110000, 0b01111001, + 0b01101001, 0b11100110, 0b11110110, 0b11111010, 0b11110000, 0b00000001, + ], + expected_null_count: 35, + } + .verify(); + } + + #[test] + fn set_bits_fuz() { + let mut rng = StdRng::seed_from_u64(42); + let mut data = SetBitsTest::new(); + for _ in 0..10000 { + data.regen(&mut rng); + data.verify(); + } + } + + #[derive(Debug, Default)] + struct SetBitsTest { + /// target write data + write_data: Vec, + /// source data + data: Vec, + offset_write: usize, + offset_read: usize, + len: usize, + /// the expected contents of write_data after the test + expected_data: Vec, + /// the expected number of nulls copied at the end of the test + expected_null_count: usize, + } + + /// prints a byte slice as a binary string like "01010101 10101010" + struct BinaryFormatter<'a>(&'a [u8]); + impl<'a> Display for BinaryFormatter<'a> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + for byte in self.0 { + write!(f, "{:08b} ", byte)?; + } + write!(f, " ")?; + Ok(()) + } + } + + impl Display for SetBitsTest { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + writeln!(f, "SetBitsTest {{")?; + writeln!(f, " write_data: {}", BinaryFormatter(&self.write_data))?; + writeln!(f, " data: {}", BinaryFormatter(&self.data))?; + writeln!( + f, + " expected_data: {}", + BinaryFormatter(&self.expected_data) + )?; + writeln!(f, " offset_write: {}", self.offset_write)?; + writeln!(f, " offset_read: {}", self.offset_read)?; + writeln!(f, " len: {}", self.len)?; + writeln!(f, " expected_null_count: {}", self.expected_null_count)?; + writeln!(f, "}}") + } + } + + impl SetBitsTest { + /// create a new instance of FuzzData + fn new() -> Self { + Self::default() + } + + /// Update this instance's fields with randomly selected values and expected data + fn regen(&mut self, rng: &mut StdRng) { + // (read) data + // ------------------+-----------------+------- + // .. offset_read .. | data | ... + // ------------------+-----------------+------- + + // Write data + // -------------------+-----------------+------- + // .. offset_write .. | (data to write) | ... + // -------------------+-----------------+------- + + // length of data to copy + let len = rng.gen_range(0..=200); + + // randomly pick where we will write to + let offset_write_bits = rng.gen_range(0..=200); + let offset_write_bytes = if offset_write_bits % 8 == 0 { + offset_write_bits / 8 + } else { + (offset_write_bits / 8) + 1 + }; + let extra_write_data_bytes = rng.gen_range(0..=5); // ensure 0 shows up often + + // randomly decide where we will read from + let extra_read_data_bytes = rng.gen_range(0..=5); // make sure 0 shows up often + let offset_read_bits = rng.gen_range(0..=200); + let offset_read_bytes = if offset_read_bits % 8 != 0 { + (offset_read_bits / 8) + 1 + } else { + offset_read_bits / 8 + }; + + // create space for writing + self.write_data.clear(); + self.write_data + .resize(offset_write_bytes + len + extra_write_data_bytes, 0); + + // interestingly set_bits seems to assume the output is already zeroed + // the fuzz tests fail when this is uncommented + //self.write_data.try_fill(rng).unwrap(); + self.offset_write = offset_write_bits; + + // make source data + self.data + .resize(offset_read_bytes + len + extra_read_data_bytes, 0); + // fill source data with random bytes + self.data.try_fill(rng).unwrap(); + self.offset_read = offset_read_bits; + + self.len = len; + + // generated expectated output (not efficient) + self.expected_data.resize(self.write_data.len(), 0); + self.expected_data.copy_from_slice(&self.write_data); + + self.expected_null_count = 0; + for i in 0..self.len { + let bit = get_bit(&self.data, self.offset_read + i); + if bit { + set_bit(&mut self.expected_data, self.offset_write + i); + } else { + unset_bit(&mut self.expected_data, self.offset_write + i); + self.expected_null_count += 1; + } + } + } + + /// call set_bits with the given parameters and compare with the expected output + fn verify(&self) { + // call set_bits and compare + let mut actual = self.write_data.to_vec(); + let null_count = set_bits( + &mut actual, + &self.data, + self.offset_write, + self.offset_read, + self.len, + ); + + assert_eq!(actual, self.expected_data, "self: {}", self); + assert_eq!(null_count, self.expected_null_count, "self: {}", self); + } } } From 938380e130c550fb964f31546e5be83da478f150 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Sun, 15 Sep 2024 07:44:50 -0400 Subject: [PATCH 2/4] Update arrow-buffer/src/util/bit_mask.rs --- arrow-buffer/src/util/bit_mask.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arrow-buffer/src/util/bit_mask.rs b/arrow-buffer/src/util/bit_mask.rs index 218ac13c55f5..7e79f3c25b51 100644 --- a/arrow-buffer/src/util/bit_mask.rs +++ b/arrow-buffer/src/util/bit_mask.rs @@ -154,7 +154,7 @@ mod tests { fn set_bits_fuz() { let mut rng = StdRng::seed_from_u64(42); let mut data = SetBitsTest::new(); - for _ in 0..10000 { + for _ in 0..100 { data.regen(&mut rng); data.verify(); } From 9a18e67e9bd03df56bec7ef60dea44cc327e3645 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Sun, 15 Sep 2024 07:44:55 -0400 Subject: [PATCH 3/4] Update arrow-buffer/src/util/bit_mask.rs --- arrow-buffer/src/util/bit_mask.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arrow-buffer/src/util/bit_mask.rs b/arrow-buffer/src/util/bit_mask.rs index 7e79f3c25b51..42150b8631b9 100644 --- a/arrow-buffer/src/util/bit_mask.rs +++ b/arrow-buffer/src/util/bit_mask.rs @@ -151,7 +151,7 @@ mod tests { } #[test] - fn set_bits_fuz() { + fn set_bits_fuzz() { let mut rng = StdRng::seed_from_u64(42); let mut data = SetBitsTest::new(); for _ in 0..100 { From e938ef3f7e9b0f02d830ab0139c25ae96e2c011c Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Wed, 18 Sep 2024 16:22:39 -0400 Subject: [PATCH 4/4] fix import --- arrow-buffer/src/util/bit_mask.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arrow-buffer/src/util/bit_mask.rs b/arrow-buffer/src/util/bit_mask.rs index 3459a80cd35e..6a552eb8f6c5 100644 --- a/arrow-buffer/src/util/bit_mask.rs +++ b/arrow-buffer/src/util/bit_mask.rs @@ -160,7 +160,7 @@ unsafe fn or_write_u64_bytes(data: &mut [u8], offset: usize, chunk: u64) { #[cfg(test)] mod tests { use super::*; - use crate::bit_util::unset_bit; + use crate::bit_util::{get_bit, set_bit, unset_bit}; use rand::prelude::StdRng; use rand::{Fill, Rng, SeedableRng}; use std::fmt::Display;