diff --git a/arrow-data/src/ffi.rs b/arrow-data/src/ffi.rs index 589f7dac6d19..3345595fac19 100644 --- a/arrow-data/src/ffi.rs +++ b/arrow-data/src/ffi.rs @@ -324,6 +324,6 @@ mod tests { assert_eq!(0, private_data.buffers_ptr.len()); - Box::into_raw(private_data); + let _ = Box::into_raw(private_data); } } diff --git a/arrow-ipc/src/writer.rs b/arrow-ipc/src/writer.rs index ade902f7cafd..b09dcdc5029b 100644 --- a/arrow-ipc/src/writer.rs +++ b/arrow-ipc/src/writer.rs @@ -2572,7 +2572,7 @@ mod tests { let mut fields = Vec::new(); let mut arrays = Vec::new(); for i in 0..num_cols { - let field = Field::new(&format!("col_{}", i), DataType::Decimal128(38, 10), true); + let field = Field::new(format!("col_{}", i), DataType::Decimal128(38, 10), true); let array = Decimal128Array::from(vec![num_cols as i128; num_rows]); fields.push(field); arrays.push(Arc::new(array) as Arc); @@ -2627,7 +2627,7 @@ mod tests { let mut fields = Vec::new(); let mut arrays = Vec::new(); for i in 0..num_cols { - let field = Field::new(&format!("col_{}", i), DataType::Decimal128(38, 10), true); + let field = Field::new(format!("col_{}", i), DataType::Decimal128(38, 10), true); let array = Decimal128Array::from(vec![num_cols as i128; num_rows]); fields.push(field); arrays.push(Arc::new(array) as Arc); @@ -2682,7 +2682,7 @@ mod tests { let mut fields = Vec::new(); let options = IpcWriteOptions::try_new(8, false, MetadataVersion::V5).unwrap(); for i in 0..num_cols { - let field = Field::new(&format!("col_{}", i), DataType::Decimal128(38, 10), true); + let field = Field::new(format!("col_{}", i), DataType::Decimal128(38, 10), true); fields.push(field); } let schema = Schema::new(fields); diff --git a/arrow-ord/src/sort.rs b/arrow-ord/src/sort.rs index 140d878f39aa..168f82747c91 100644 --- a/arrow-ord/src/sort.rs +++ b/arrow-ord/src/sort.rs @@ -403,7 +403,7 @@ fn sort_fixed_size_list( } #[inline(never)] -fn sort_impl( +fn sort_impl( options: SortOptions, valids: &mut [(u32, T)], nulls: &[u32], diff --git a/arrow-schema/src/fields.rs b/arrow-schema/src/fields.rs index 63aef18ddf9c..5b9ce2a6da61 100644 --- a/arrow-schema/src/fields.rs +++ b/arrow-schema/src/fields.rs @@ -389,14 +389,13 @@ impl UnionFields { let mut set = 0_u128; type_ids .into_iter() - .map(|idx| { + .inspect(|&idx| { let mask = 1_u128 << idx; if (set & mask) != 0 { panic!("duplicate type id: {}", idx); } else { set |= mask; } - idx }) .zip(fields) .collect() diff --git a/parquet/src/arrow/arrow_reader/statistics.rs b/parquet/src/arrow/arrow_reader/statistics.rs index 602a9ad5e506..4c0dac05c7d9 100644 --- a/parquet/src/arrow/arrow_reader/statistics.rs +++ b/parquet/src/arrow/arrow_reader/statistics.rs @@ -568,13 +568,9 @@ macro_rules! make_data_page_stats_iterator { let next = self.iter.next(); match next { Some((len, index)) => match index { - $index_type(native_index) => Some( - native_index - .indexes - .iter() - .map(|x| $func(x)) - .collect::>(), - ), + $index_type(native_index) => { + Some(native_index.indexes.iter().map($func).collect::>()) + } // No matching `Index` found; // thus no statistics that can be extracted. // We return vec![None; len] to effectively diff --git a/parquet/src/arrow/arrow_writer/mod.rs b/parquet/src/arrow/arrow_writer/mod.rs index 5c5f28ac0f72..3ec7a3dfea36 100644 --- a/parquet/src/arrow/arrow_writer/mod.rs +++ b/parquet/src/arrow/arrow_writer/mod.rs @@ -3082,8 +3082,8 @@ mod tests { let min = byte_array_stats.min_opt().unwrap(); let max = byte_array_stats.max_opt().unwrap(); - assert_eq!(min.as_bytes(), &[b'a']); - assert_eq!(max.as_bytes(), &[b'd']); + assert_eq!(min.as_bytes(), b"a"); + assert_eq!(max.as_bytes(), b"d"); } else { panic!("expecting Statistics::ByteArray"); } @@ -3154,8 +3154,8 @@ mod tests { let min = byte_array_stats.min_opt().unwrap(); let max = byte_array_stats.max_opt().unwrap(); - assert_eq!(min.as_bytes(), &[b'a']); - assert_eq!(max.as_bytes(), &[b'd']); + assert_eq!(min.as_bytes(), b"a"); + assert_eq!(max.as_bytes(), b"d"); } else { panic!("expecting Statistics::ByteArray"); } diff --git a/parquet/src/data_type.rs b/parquet/src/data_type.rs index 324e1c379bcd..a3bcfd16730f 100644 --- a/parquet/src/data_type.rs +++ b/parquet/src/data_type.rs @@ -1298,11 +1298,8 @@ mod tests { #[test] fn test_byte_array_from() { - assert_eq!( - ByteArray::from(vec![b'A', b'B', b'C']).data(), - &[b'A', b'B', b'C'] - ); - assert_eq!(ByteArray::from("ABC").data(), &[b'A', b'B', b'C']); + assert_eq!(ByteArray::from(b"ABC".to_vec()).data(), b"ABC"); + assert_eq!(ByteArray::from("ABC").data(), b"ABC"); assert_eq!( ByteArray::from(Bytes::from(vec![1u8, 2u8, 3u8, 4u8, 5u8])).data(), &[1u8, 2u8, 3u8, 4u8, 5u8] diff --git a/parquet/src/file/metadata/writer.rs b/parquet/src/file/metadata/writer.rs index dad960790e6c..92ce60556c3e 100644 --- a/parquet/src/file/metadata/writer.rs +++ b/parquet/src/file/metadata/writer.rs @@ -55,18 +55,14 @@ impl<'a, W: Write> ThriftMetadataWriter<'a, W> { // write offset index to the file for (row_group_idx, row_group) in self.row_groups.iter_mut().enumerate() { for (column_idx, column_metadata) in row_group.columns.iter_mut().enumerate() { - match &offset_indexes[row_group_idx][column_idx] { - Some(offset_index) => { - let start_offset = self.buf.bytes_written(); - let mut protocol = TCompactOutputProtocol::new(&mut self.buf); - offset_index.write_to_out_protocol(&mut protocol)?; - let end_offset = self.buf.bytes_written(); - // set offset and index for offset index - column_metadata.offset_index_offset = Some(start_offset as i64); - column_metadata.offset_index_length = - Some((end_offset - start_offset) as i32); - } - None => {} + if let Some(offset_index) = &offset_indexes[row_group_idx][column_idx] { + let start_offset = self.buf.bytes_written(); + let mut protocol = TCompactOutputProtocol::new(&mut self.buf); + offset_index.write_to_out_protocol(&mut protocol)?; + let end_offset = self.buf.bytes_written(); + // set offset and index for offset index + column_metadata.offset_index_offset = Some(start_offset as i64); + column_metadata.offset_index_length = Some((end_offset - start_offset) as i32); } } } @@ -84,18 +80,14 @@ impl<'a, W: Write> ThriftMetadataWriter<'a, W> { // write column index to the file for (row_group_idx, row_group) in self.row_groups.iter_mut().enumerate() { for (column_idx, column_metadata) in row_group.columns.iter_mut().enumerate() { - match &column_indexes[row_group_idx][column_idx] { - Some(column_index) => { - let start_offset = self.buf.bytes_written(); - let mut protocol = TCompactOutputProtocol::new(&mut self.buf); - column_index.write_to_out_protocol(&mut protocol)?; - let end_offset = self.buf.bytes_written(); - // set offset and index for offset index - column_metadata.column_index_offset = Some(start_offset as i64); - column_metadata.column_index_length = - Some((end_offset - start_offset) as i32); - } - None => {} + if let Some(column_index) = &column_indexes[row_group_idx][column_idx] { + let start_offset = self.buf.bytes_written(); + let mut protocol = TCompactOutputProtocol::new(&mut self.buf); + column_index.write_to_out_protocol(&mut protocol)?; + let end_offset = self.buf.bytes_written(); + // set offset and index for offset index + column_metadata.column_index_offset = Some(start_offset as i64); + column_metadata.column_index_length = Some((end_offset - start_offset) as i32); } } } @@ -524,7 +516,6 @@ mod tests { async fn load_metadata_from_bytes(file_size: usize, data: Bytes) -> ParquetMetaData { use crate::arrow::async_reader::{MetadataFetch, MetadataLoader}; use crate::errors::Result as ParquetResult; - use bytes::Bytes; use futures::future::BoxFuture; use futures::FutureExt; use std::ops::Range; diff --git a/parquet/src/schema/types.rs b/parquet/src/schema/types.rs index 190374fd88b0..2665f28fed54 100644 --- a/parquet/src/schema/types.rs +++ b/parquet/src/schema/types.rs @@ -294,105 +294,102 @@ impl<'a> PrimitiveTypeBuilder<'a> { )); } - match &self.logical_type { - Some(logical_type) => { - // If a converted type is populated, check that it is consistent with - // its logical type - if self.converted_type != ConvertedType::NONE { - if ConvertedType::from(self.logical_type.clone()) != self.converted_type { - return Err(general_err!( - "Logical type {:?} is incompatible with converted type {} for field '{}'", - logical_type, - self.converted_type, - self.name - )); - } - } else { - // Populate the converted type for backwards compatibility - basic_info.converted_type = self.logical_type.clone().into(); + if let Some(logical_type) = &self.logical_type { + // If a converted type is populated, check that it is consistent with + // its logical type + if self.converted_type != ConvertedType::NONE { + if ConvertedType::from(self.logical_type.clone()) != self.converted_type { + return Err(general_err!( + "Logical type {:?} is incompatible with converted type {} for field '{}'", + logical_type, + self.converted_type, + self.name + )); + } + } else { + // Populate the converted type for backwards compatibility + basic_info.converted_type = self.logical_type.clone().into(); + } + // Check that logical type and physical type are compatible + match (logical_type, self.physical_type) { + (LogicalType::Map, _) | (LogicalType::List, _) => { + return Err(general_err!( + "{:?} cannot be applied to a primitive type for field '{}'", + logical_type, + self.name + )); } - // Check that logical type and physical type are compatible - match (logical_type, self.physical_type) { - (LogicalType::Map, _) | (LogicalType::List, _) => { + (LogicalType::Enum, PhysicalType::BYTE_ARRAY) => {} + (LogicalType::Decimal { scale, precision }, _) => { + // Check that scale and precision are consistent with legacy values + if *scale != self.scale { return Err(general_err!( - "{:?} cannot be applied to a primitive type for field '{}'", - logical_type, + "DECIMAL logical type scale {} must match self.scale {} for field '{}'", + scale, + self.scale, self.name )); } - (LogicalType::Enum, PhysicalType::BYTE_ARRAY) => {} - (LogicalType::Decimal { scale, precision }, _) => { - // Check that scale and precision are consistent with legacy values - if *scale != self.scale { - return Err(general_err!( - "DECIMAL logical type scale {} must match self.scale {} for field '{}'", - scale, - self.scale, - self.name - )); - } - if *precision != self.precision { - return Err(general_err!( - "DECIMAL logical type precision {} must match self.precision {} for field '{}'", - precision, - self.precision, - self.name - )); - } - self.check_decimal_precision_scale()?; - } - (LogicalType::Date, PhysicalType::INT32) => {} - ( - LogicalType::Time { - unit: TimeUnit::MILLIS(_), - .. - }, - PhysicalType::INT32, - ) => {} - (LogicalType::Time { unit, .. }, PhysicalType::INT64) => { - if *unit == TimeUnit::MILLIS(Default::default()) { - return Err(general_err!( - "Cannot use millisecond unit on INT64 type for field '{}'", - self.name - )); - } - } - (LogicalType::Timestamp { .. }, PhysicalType::INT64) => {} - (LogicalType::Integer { bit_width, .. }, PhysicalType::INT32) - if *bit_width <= 32 => {} - (LogicalType::Integer { bit_width, .. }, PhysicalType::INT64) - if *bit_width == 64 => {} - // Null type - (LogicalType::Unknown, PhysicalType::INT32) => {} - (LogicalType::String, PhysicalType::BYTE_ARRAY) => {} - (LogicalType::Json, PhysicalType::BYTE_ARRAY) => {} - (LogicalType::Bson, PhysicalType::BYTE_ARRAY) => {} - (LogicalType::Uuid, PhysicalType::FIXED_LEN_BYTE_ARRAY) if self.length == 16 => {} - (LogicalType::Uuid, PhysicalType::FIXED_LEN_BYTE_ARRAY) => { - return Err(general_err!( - "UUID cannot annotate field '{}' because it is not a FIXED_LEN_BYTE_ARRAY(16) field", - self.name - )) - } - (LogicalType::Float16, PhysicalType::FIXED_LEN_BYTE_ARRAY) - if self.length == 2 => {} - (LogicalType::Float16, PhysicalType::FIXED_LEN_BYTE_ARRAY) => { + if *precision != self.precision { return Err(general_err!( - "FLOAT16 cannot annotate field '{}' because it is not a FIXED_LEN_BYTE_ARRAY(2) field", + "DECIMAL logical type precision {} must match self.precision {} for field '{}'", + precision, + self.precision, self.name - )) + )); } - (a, b) => { + self.check_decimal_precision_scale()?; + } + (LogicalType::Date, PhysicalType::INT32) => {} + ( + LogicalType::Time { + unit: TimeUnit::MILLIS(_), + .. + }, + PhysicalType::INT32, + ) => {} + (LogicalType::Time { unit, .. }, PhysicalType::INT64) => { + if *unit == TimeUnit::MILLIS(Default::default()) { return Err(general_err!( - "Cannot annotate {:?} from {} for field '{}'", - a, - b, + "Cannot use millisecond unit on INT64 type for field '{}'", self.name - )) + )); } } + (LogicalType::Timestamp { .. }, PhysicalType::INT64) => {} + (LogicalType::Integer { bit_width, .. }, PhysicalType::INT32) + if *bit_width <= 32 => {} + (LogicalType::Integer { bit_width, .. }, PhysicalType::INT64) + if *bit_width == 64 => {} + // Null type + (LogicalType::Unknown, PhysicalType::INT32) => {} + (LogicalType::String, PhysicalType::BYTE_ARRAY) => {} + (LogicalType::Json, PhysicalType::BYTE_ARRAY) => {} + (LogicalType::Bson, PhysicalType::BYTE_ARRAY) => {} + (LogicalType::Uuid, PhysicalType::FIXED_LEN_BYTE_ARRAY) if self.length == 16 => {} + (LogicalType::Uuid, PhysicalType::FIXED_LEN_BYTE_ARRAY) => { + return Err(general_err!( + "UUID cannot annotate field '{}' because it is not a FIXED_LEN_BYTE_ARRAY(16) field", + self.name + )) + } + (LogicalType::Float16, PhysicalType::FIXED_LEN_BYTE_ARRAY) + if self.length == 2 => {} + (LogicalType::Float16, PhysicalType::FIXED_LEN_BYTE_ARRAY) => { + return Err(general_err!( + "FLOAT16 cannot annotate field '{}' because it is not a FIXED_LEN_BYTE_ARRAY(2) field", + self.name + )) + } + (a, b) => { + return Err(general_err!( + "Cannot annotate {:?} from {} for field '{}'", + a, + b, + self.name + )) + } } - None => {} } match self.converted_type {