From b3f06f6cc4d4f4431a1f86cfc9f30de3a1fc1a1b Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Sat, 4 May 2024 07:42:04 -0400 Subject: [PATCH] Fix up clippy for Rust 1.78 (#5710) * Fix up clippy for Rust 1.78 * Should not be pub --- arrow-cast/src/parse.rs | 11 +++++++- .../src/bin/arrow-json-integration-test.rs | 8 +++++- arrow-json/src/reader/serializer.rs | 28 ++++++++----------- arrow-json/src/writer/encoder.rs | 17 ++++++++--- arrow-select/src/take.rs | 4 +-- arrow/src/util/string_writer.rs | 8 +++--- parquet/src/column/writer/mod.rs | 7 ----- parquet/src/encodings/encoding/mod.rs | 6 ++-- parquet/src/file/statistics.rs | 4 +-- 9 files changed, 54 insertions(+), 39 deletions(-) diff --git a/arrow-cast/src/parse.rs b/arrow-cast/src/parse.rs index 3102fcc58ada..f48a309fdf03 100644 --- a/arrow-cast/src/parse.rs +++ b/arrow-cast/src/parse.rs @@ -435,11 +435,20 @@ impl Parser for Float64Type { } } +/// This API is only stable since 1.70 so can't use it when current MSRV is lower +#[inline(always)] +fn is_some_and(opt: Option, f: impl FnOnce(T) -> bool) -> bool { + match opt { + None => false, + Some(x) => f(x), + } +} + macro_rules! parser_primitive { ($t:ty) => { impl Parser for $t { fn parse(string: &str) -> Option { - if !string.as_bytes().last().is_some_and(|x| x.is_ascii_digit()) { + if !is_some_and(string.as_bytes().last(), |x| x.is_ascii_digit()) { return None; } match atoi::FromRadix10SignedChecked::from_radix_10_signed_checked( diff --git a/arrow-integration-testing/src/bin/arrow-json-integration-test.rs b/arrow-integration-testing/src/bin/arrow-json-integration-test.rs index 9f1abb16a668..cc3dd2110e36 100644 --- a/arrow-integration-testing/src/bin/arrow-json-integration-test.rs +++ b/arrow-integration-testing/src/bin/arrow-json-integration-test.rs @@ -40,7 +40,13 @@ struct Args { arrow: String, #[clap(short, long, help("Path to JSON file"))] json: String, - #[clap(value_enum, short, long, default_value_t = Mode::Validate, help="Mode of integration testing tool")] + #[clap( + value_enum, + short, + long, + default_value = "VALIDATE", + help = "Mode of integration testing tool" + )] mode: Mode, #[clap(short, long)] verbose: bool, diff --git a/arrow-json/src/reader/serializer.rs b/arrow-json/src/reader/serializer.rs index 378d77bd9155..2e1d76f98d05 100644 --- a/arrow-json/src/reader/serializer.rs +++ b/arrow-json/src/reader/serializer.rs @@ -309,16 +309,16 @@ impl<'a, 'b> SerializeMap for ObjectSerializer<'a, 'b> { type Ok = (); type Error = SerializerError; - fn serialize_key(&mut self, key: &T) -> Result<(), Self::Error> + fn serialize_key(&mut self, key: &T) -> Result<(), Self::Error> where - T: Serialize, + T: Serialize + ?Sized, { key.serialize(&mut *self.serializer) } - fn serialize_value(&mut self, value: &T) -> Result<(), Self::Error> + fn serialize_value(&mut self, value: &T) -> Result<(), Self::Error> where - T: Serialize, + T: Serialize + ?Sized, { value.serialize(&mut *self.serializer) } @@ -333,13 +333,9 @@ impl<'a, 'b> SerializeStruct for ObjectSerializer<'a, 'b> { type Ok = (); type Error = SerializerError; - fn serialize_field( - &mut self, - key: &'static str, - value: &T, - ) -> Result<(), Self::Error> + fn serialize_field(&mut self, key: &'static str, value: &T) -> Result<(), Self::Error> where - T: Serialize, + T: Serialize + ?Sized, { key.serialize(&mut *self.serializer)?; value.serialize(&mut *self.serializer) @@ -376,9 +372,9 @@ impl<'a, 'b> SerializeSeq for ListSerializer<'a, 'b> { type Ok = (); type Error = SerializerError; - fn serialize_element(&mut self, value: &T) -> Result<(), Self::Error> + fn serialize_element(&mut self, value: &T) -> Result<(), Self::Error> where - T: Serialize, + T: Serialize + ?Sized, { value.serialize(&mut *self.serializer) } @@ -393,9 +389,9 @@ impl<'a, 'b> SerializeTuple for ListSerializer<'a, 'b> { type Ok = (); type Error = SerializerError; - fn serialize_element(&mut self, value: &T) -> Result<(), Self::Error> + fn serialize_element(&mut self, value: &T) -> Result<(), Self::Error> where - T: Serialize, + T: Serialize + ?Sized, { value.serialize(&mut *self.serializer) } @@ -410,9 +406,9 @@ impl<'a, 'b> SerializeTupleStruct for ListSerializer<'a, 'b> { type Ok = (); type Error = SerializerError; - fn serialize_field(&mut self, value: &T) -> Result<(), Self::Error> + fn serialize_field(&mut self, value: &T) -> Result<(), Self::Error> where - T: Serialize, + T: Serialize + ?Sized, { value.serialize(&mut *self.serializer) } diff --git a/arrow-json/src/writer/encoder.rs b/arrow-json/src/writer/encoder.rs index 810e65b2268f..056ddf3dd963 100644 --- a/arrow-json/src/writer/encoder.rs +++ b/arrow-json/src/writer/encoder.rs @@ -155,12 +155,21 @@ struct StructArrayEncoder<'a> { explicit_nulls: bool, } +/// This API is only stable since 1.70 so can't use it when current MSRV is lower +#[inline(always)] +fn is_some_and(opt: Option, f: impl FnOnce(T) -> bool) -> bool { + match opt { + None => false, + Some(x) => f(x), + } +} + impl<'a> Encoder for StructArrayEncoder<'a> { fn encode(&mut self, idx: usize, out: &mut Vec) { out.push(b'{'); let mut is_first = true; for field_encoder in &mut self.encoders { - let is_null = field_encoder.nulls.as_ref().is_some_and(|n| n.is_null(idx)); + let is_null = is_some_and(field_encoder.nulls.as_ref(), |n| n.is_null(idx)); if is_null && !self.explicit_nulls { continue; } @@ -447,13 +456,13 @@ impl<'a> MapEncoder<'a> { let (values, value_nulls) = make_encoder_impl(values, options)?; // We sanity check nulls as these are currently not enforced by MapArray (#1697) - if key_nulls.is_some_and(|x| x.null_count() != 0) { + if is_some_and(key_nulls, |x| x.null_count() != 0) { return Err(ArrowError::InvalidArgumentError( "Encountered nulls in MapArray keys".to_string(), )); } - if array.entries().nulls().is_some_and(|x| x.null_count() != 0) { + if is_some_and(array.entries().nulls(), |x| x.null_count() != 0) { return Err(ArrowError::InvalidArgumentError( "Encountered nulls in MapArray entries".to_string(), )); @@ -478,7 +487,7 @@ impl<'a> Encoder for MapEncoder<'a> { out.push(b'{'); for idx in start..end { - let is_null = self.value_nulls.as_ref().is_some_and(|n| n.is_null(idx)); + let is_null = is_some_and(self.value_nulls.as_ref(), |n| n.is_null(idx)); if is_null && !self.explicit_nulls { continue; } diff --git a/arrow-select/src/take.rs b/arrow-select/src/take.rs index dc9e13040c8e..8939d3f719f0 100644 --- a/arrow-select/src/take.rs +++ b/arrow-select/src/take.rs @@ -1401,9 +1401,9 @@ mod tests { ); } - fn _test_take_string<'a, K: 'static>() + fn _test_take_string<'a, K>() where - K: Array + PartialEq + From>>, + K: Array + PartialEq + From>> + 'static, { let index = UInt32Array::from(vec![Some(3), None, Some(1), Some(3), Some(4)]); diff --git a/arrow/src/util/string_writer.rs b/arrow/src/util/string_writer.rs index 4c61f183e206..57a98d6f51d2 100644 --- a/arrow/src/util/string_writer.rs +++ b/arrow/src/util/string_writer.rs @@ -63,6 +63,7 @@ //! } //! ``` +use std::fmt::Formatter; use std::io::{Error, ErrorKind, Result, Write}; #[derive(Debug)] @@ -83,10 +84,9 @@ impl Default for StringWriter { Self::new() } } - -impl ToString for StringWriter { - fn to_string(&self) -> String { - self.data.clone() +impl std::fmt::Display for StringWriter { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.data) } } diff --git a/parquet/src/column/writer/mod.rs b/parquet/src/column/writer/mod.rs index 9ef505cc7cf6..e084d9ba41fa 100644 --- a/parquet/src/column/writer/mod.rs +++ b/parquet/src/column/writer/mod.rs @@ -1172,13 +1172,6 @@ fn compare_greater(descr: &ColumnDescriptor, a: &T, b: &T) // https://github.com/apache/parquet-mr/blob/master/parquet-column/src/main/java/org/apache/parquet/column/values/factory/DefaultV1ValuesWriterFactory.java // https://github.com/apache/parquet-mr/blob/master/parquet-column/src/main/java/org/apache/parquet/column/values/factory/DefaultV2ValuesWriterFactory.java -/// Trait to define default encoding for types, including whether or not the type -/// supports dictionary encoding. -trait EncodingWriteSupport { - /// Returns true if dictionary is supported for column writer, false otherwise. - fn has_dictionary_support(props: &WriterProperties) -> bool; -} - /// Returns encoding for a column when no other encoding is provided in writer properties. fn fallback_encoding(kind: Type, props: &WriterProperties) -> Encoding { match (kind, props.writer_version()) { diff --git a/parquet/src/encodings/encoding/mod.rs b/parquet/src/encodings/encoding/mod.rs index 9099187a545c..d797b3cb2f52 100644 --- a/parquet/src/encodings/encoding/mod.rs +++ b/parquet/src/encodings/encoding/mod.rs @@ -24,7 +24,7 @@ use crate::data_type::private::ParquetValueType; use crate::data_type::*; use crate::encodings::rle::RleEncoder; use crate::errors::{ParquetError, Result}; -use crate::util::bit_util::{self, num_required_bits, BitWriter}; +use crate::util::bit_util::{num_required_bits, BitWriter}; use bytes::Bytes; pub use dict_encoder::DictEncoder; @@ -47,12 +47,13 @@ pub trait Encoder: Send { /// identified by `valid_bits`. /// /// Returns the number of non-null values encoded. + #[cfg(test)] fn put_spaced(&mut self, values: &[T::T], valid_bits: &[u8]) -> Result { let num_values = values.len(); let mut buffer = Vec::with_capacity(num_values); // TODO: this is pretty inefficient. Revisit in future. for (i, item) in values.iter().enumerate().take(num_values) { - if bit_util::get_bit(valid_bits, i) { + if crate::util::bit_util::get_bit(valid_bits, i) { buffer.push(item.clone()); } } @@ -727,6 +728,7 @@ mod tests { use crate::encodings::decoding::{get_decoder, Decoder, DictDecoder, PlainDecoder}; use crate::schema::types::{ColumnDescPtr, ColumnDescriptor, ColumnPath, Type as SchemaType}; use crate::util::test_common::rand_gen::{random_bytes, RandGen}; + use crate::util::bit_util; const TEST_SET_SIZE: usize = 1024; diff --git a/parquet/src/file/statistics.rs b/parquet/src/file/statistics.rs index 1bc003d48854..d24b91741bef 100644 --- a/parquet/src/file/statistics.rs +++ b/parquet/src/file/statistics.rs @@ -271,8 +271,8 @@ pub fn to_thrift(stats: Option<&Statistics>) -> Option { if stats.is_min_max_backwards_compatible() { // Copy to deprecated min, max values for compatibility with older readers - thrift_stats.min = min.clone(); - thrift_stats.max = max.clone(); + thrift_stats.min.clone_from(&min); + thrift_stats.max.clone_from(&max); } if !stats.is_min_max_deprecated() {