Skip to content

Commit

Permalink
Manually run fmt on all files under parquet (#6328)
Browse files Browse the repository at this point in the history
* manually run fmt on all file under parquet

* apply formatting that had been skipped

* add more to comment

* update formatting instructions
  • Loading branch information
etseidl authored Sep 9, 2024
1 parent 0491294 commit 25e1969
Show file tree
Hide file tree
Showing 18 changed files with 143 additions and 204 deletions.
7 changes: 7 additions & 0 deletions .github/workflows/rust.yml
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,13 @@ jobs:
run: rustup component add rustfmt
- name: Format arrow
run: cargo fmt --all -- --check
- name: Format parquet
# Many modules in parquet are skipped, so check parquet separately. If this check fails, run:
# cargo fmt -p parquet -- --config skip_children=true `find ./parquet -name "*.rs" \! -name format.rs`
# from the top level arrow-rs directory and check in the result.
# https://github.com/apache/arrow-rs/issues/6179
working-directory: parquet
run: cargo fmt -p parquet -- --check --config skip_children=true `find . -name "*.rs" \! -name format.rs`
- name: Format object_store
working-directory: object_store
run: cargo fmt --all -- --check
Expand Down
7 changes: 7 additions & 0 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,13 @@ PR be sure to run the following and check for lint issues:
cargo +stable fmt --all -- --check
```

Note that currently the above will not check all source files in the parquet crate. To check all
parquet files run the following from the top-level `arrow-rs` directory:

```bash
cargo fmt -p parquet -- --check --config skip_children=true `find . -name "*.rs" \! -name format.rs`
```

## Breaking Changes

Our [release schedule] allows breaking API changes only in major releases.
Expand Down
2 changes: 1 addition & 1 deletion parquet/src/arrow/array_reader/empty_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@

use crate::arrow::array_reader::ArrayReader;
use crate::errors::Result;
use arrow_schema::{DataType as ArrowType, Fields};
use arrow_array::{ArrayRef, StructArray};
use arrow_data::ArrayDataBuilder;
use arrow_schema::{DataType as ArrowType, Fields};
use std::any::Any;
use std::sync::Arc;

Expand Down
93 changes: 23 additions & 70 deletions parquet/src/arrow/array_reader/fixed_size_list_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ impl ArrayReader for FixedSizeListArrayReader {
"Encountered misaligned row with length {} (expected length {})",
row_len,
self.fixed_size
))
));
}
row_len = 0;

Expand Down Expand Up @@ -226,9 +226,7 @@ mod tests {
use super::*;
use crate::arrow::{
array_reader::{test_util::InMemoryArrayReader, ListArrayReader},
arrow_reader::{
ArrowReaderBuilder, ArrowReaderOptions, ParquetRecordBatchReader,
},
arrow_reader::{ArrowReaderBuilder, ArrowReaderOptions, ParquetRecordBatchReader},
ArrowWriter,
};
use arrow::datatypes::{Field, Int32Type};
Expand Down Expand Up @@ -279,10 +277,7 @@ mod tests {
let mut list_array_reader = FixedSizeListArrayReader::new(
Box::new(item_array_reader),
3,
ArrowType::FixedSizeList(
Arc::new(Field::new("item", ArrowType::Int32, true)),
3,
),
ArrowType::FixedSizeList(Arc::new(Field::new("item", ArrowType::Int32, true)), 3),
2,
1,
true,
Expand Down Expand Up @@ -328,10 +323,7 @@ mod tests {
let mut list_array_reader = FixedSizeListArrayReader::new(
Box::new(item_array_reader),
2,
ArrowType::FixedSizeList(
Arc::new(Field::new("item", ArrowType::Int32, true)),
2,
),
ArrowType::FixedSizeList(Arc::new(Field::new("item", ArrowType::Int32, true)), 2),
1,
1,
false,
Expand All @@ -354,14 +346,10 @@ mod tests {
// [[4, 5]],
// [[null, null]],
// ]
let l2_type = ArrowType::FixedSizeList(
Arc::new(Field::new("item", ArrowType::Int32, true)),
2,
);
let l1_type = ArrowType::FixedSizeList(
Arc::new(Field::new("item", l2_type.clone(), false)),
1,
);
let l2_type =
ArrowType::FixedSizeList(Arc::new(Field::new("item", ArrowType::Int32, true)), 2);
let l1_type =
ArrowType::FixedSizeList(Arc::new(Field::new("item", l2_type.clone(), false)), 1);

let array = PrimitiveArray::<Int32Type>::from(vec![
None,
Expand Down Expand Up @@ -413,14 +401,8 @@ mod tests {
Some(vec![0, 0, 2, 0, 2, 0, 0, 2, 0, 2]),
);

let l2 = FixedSizeListArrayReader::new(
Box::new(item_array_reader),
2,
l2_type,
4,
2,
false,
);
let l2 =
FixedSizeListArrayReader::new(Box::new(item_array_reader), 2, l2_type, 4, 2, false);
let mut l1 = FixedSizeListArrayReader::new(Box::new(l2), 1, l1_type, 3, 1, true);

let expected_1 = expected.slice(0, 2);
Expand Down Expand Up @@ -454,10 +436,7 @@ mod tests {
let mut list_array_reader = FixedSizeListArrayReader::new(
Box::new(item_array_reader),
0,
ArrowType::FixedSizeList(
Arc::new(Field::new("item", ArrowType::Int32, true)),
0,
),
ArrowType::FixedSizeList(Arc::new(Field::new("item", ArrowType::Int32, true)), 0),
2,
1,
true,
Expand All @@ -473,8 +452,7 @@ mod tests {
#[test]
fn test_nested_var_list() {
// [[[1, null, 3], null], [[4], []], [[5, 6], [null, null]], null]
let mut builder =
FixedSizeListBuilder::new(ListBuilder::new(Int32Builder::new()), 2);
let mut builder = FixedSizeListBuilder::new(ListBuilder::new(Int32Builder::new()), 2);
builder.values().append_value([Some(1), None, Some(3)]);
builder.values().append_null();
builder.append(true);
Expand Down Expand Up @@ -503,12 +481,9 @@ mod tests {
None,
]));

let inner_type =
ArrowType::List(Arc::new(Field::new("item", ArrowType::Int32, true)));
let list_type = ArrowType::FixedSizeList(
Arc::new(Field::new("item", inner_type.clone(), true)),
2,
);
let inner_type = ArrowType::List(Arc::new(Field::new("item", ArrowType::Int32, true)));
let list_type =
ArrowType::FixedSizeList(Arc::new(Field::new("item", inner_type.clone(), true)), 2);

let item_array_reader = InMemoryArrayReader::new(
ArrowType::Int32,
Expand All @@ -517,22 +492,11 @@ mod tests {
Some(vec![0, 2, 2, 1, 0, 1, 0, 2, 1, 2, 0]),
);

let inner_array_reader = ListArrayReader::<i32>::new(
Box::new(item_array_reader),
inner_type,
4,
2,
true,
);
let inner_array_reader =
ListArrayReader::<i32>::new(Box::new(item_array_reader), inner_type, 4, 2, true);

let mut list_array_reader = FixedSizeListArrayReader::new(
Box::new(inner_array_reader),
2,
list_type,
2,
1,
true,
);
let mut list_array_reader =
FixedSizeListArrayReader::new(Box::new(inner_array_reader), 2, list_type, 2, 1, true);
let actual = list_array_reader.next_batch(1024).unwrap();
let actual = actual
.as_any()
Expand Down Expand Up @@ -564,21 +528,13 @@ mod tests {
);

// [null, 2, 3, null, 5]
let primitive = PrimitiveArray::<Int32Type>::from_iter(vec![
None,
Some(2),
Some(3),
None,
Some(5),
]);
let primitive =
PrimitiveArray::<Int32Type>::from_iter(vec![None, Some(2), Some(3), None, Some(5)]);

let schema = Arc::new(Schema::new(vec![
Field::new(
"list",
ArrowType::FixedSizeList(
Arc::new(Field::new("item", ArrowType::Int32, true)),
4,
),
ArrowType::FixedSizeList(Arc::new(Field::new("item", ArrowType::Int32, true)), 4),
true,
),
Field::new("primitive", ArrowType::Int32, true),
Expand Down Expand Up @@ -643,10 +599,7 @@ mod tests {

let schema = Arc::new(Schema::new(vec![Field::new(
"list",
ArrowType::FixedSizeList(
Arc::new(Field::new("item", ArrowType::Int32, true)),
4,
),
ArrowType::FixedSizeList(Arc::new(Field::new("item", ArrowType::Int32, true)), 4),
true,
)]));

Expand Down
29 changes: 8 additions & 21 deletions parquet/src/arrow/array_reader/list_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -125,8 +125,7 @@ impl<OffsetSize: OffsetSizeTrait> ArrayReader for ListArrayReader<OffsetSize> {
// lists, and for consistency we do the same for nulls.

// The output offsets for the computed ListArray
let mut list_offsets: Vec<OffsetSize> =
Vec::with_capacity(next_batch_array.len() + 1);
let mut list_offsets: Vec<OffsetSize> = Vec::with_capacity(next_batch_array.len() + 1);

// The validity mask of the computed ListArray if nullable
let mut validity = self
Expand Down Expand Up @@ -270,9 +269,7 @@ mod tests {
GenericListArray::<OffsetSize>::DATA_TYPE_CONSTRUCTOR(field)
}

fn downcast<OffsetSize: OffsetSizeTrait>(
array: &ArrayRef,
) -> &'_ GenericListArray<OffsetSize> {
fn downcast<OffsetSize: OffsetSizeTrait>(array: &ArrayRef) -> &'_ GenericListArray<OffsetSize> {
array
.as_any()
.downcast_ref::<GenericListArray<OffsetSize>>()
Expand Down Expand Up @@ -383,18 +380,12 @@ mod tests {
Some(vec![0, 3, 2, 2, 2, 1, 1, 1, 1, 3, 3, 2, 3, 3, 2, 0, 0, 0]),
);

let l3 = ListArrayReader::<OffsetSize>::new(
Box::new(item_array_reader),
l3_type,
5,
3,
true,
);
let l3 =
ListArrayReader::<OffsetSize>::new(Box::new(item_array_reader), l3_type, 5, 3, true);

let l2 = ListArrayReader::<OffsetSize>::new(Box::new(l3), l2_type, 3, 2, false);

let mut l1 =
ListArrayReader::<OffsetSize>::new(Box::new(l2), l1_type, 2, 1, true);
let mut l1 = ListArrayReader::<OffsetSize>::new(Box::new(l2), l1_type, 2, 1, true);

let expected_1 = expected.slice(0, 2);
let expected_2 = expected.slice(2, 2);
Expand Down Expand Up @@ -560,8 +551,7 @@ mod tests {
.unwrap();
writer.close().unwrap();

let file_reader: Arc<dyn FileReader> =
Arc::new(SerializedFileReader::new(file).unwrap());
let file_reader: Arc<dyn FileReader> = Arc::new(SerializedFileReader::new(file).unwrap());

let file_metadata = file_reader.metadata().file_metadata();
let schema = file_metadata.schema_descr();
Expand All @@ -573,8 +563,7 @@ mod tests {
)
.unwrap();

let mut array_reader =
build_array_reader(fields.as_ref(), &mask, &file_reader).unwrap();
let mut array_reader = build_array_reader(fields.as_ref(), &mask, &file_reader).unwrap();

let batch = array_reader.next_batch(100).unwrap();
assert_eq!(batch.data_type(), array_reader.get_data_type());
Expand All @@ -584,9 +573,7 @@ mod tests {
"table_info",
ArrowType::List(Arc::new(Field::new(
"table_info",
ArrowType::Struct(
vec![Field::new("name", ArrowType::Binary, false)].into()
),
ArrowType::Struct(vec![Field::new("name", ArrowType::Binary, false)].into()),
false
))),
false
Expand Down
12 changes: 5 additions & 7 deletions parquet/src/arrow/array_reader/map_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -184,21 +184,19 @@ mod tests {
map_builder.append(true).expect("adding map entry");

// Create record batch
let batch =
RecordBatch::try_new(Arc::new(schema), vec![Arc::new(map_builder.finish())])
.expect("create record batch");
let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(map_builder.finish())])
.expect("create record batch");

// Write record batch to file
let mut buffer = Vec::with_capacity(1024);
let mut writer = ArrowWriter::try_new(&mut buffer, batch.schema(), None)
.expect("creat file writer");
let mut writer =
ArrowWriter::try_new(&mut buffer, batch.schema(), None).expect("creat file writer");
writer.write(&batch).expect("writing file");
writer.close().expect("close writer");

// Read file
let reader = Bytes::from(buffer);
let record_batch_reader =
ParquetRecordBatchReader::try_new(reader, 1024).unwrap();
let record_batch_reader = ParquetRecordBatchReader::try_new(reader, 1024).unwrap();
for maybe_record_batch in record_batch_reader {
let record_batch = maybe_record_batch.expect("Getting current batch");
let col = record_batch.column(0);
Expand Down
24 changes: 11 additions & 13 deletions parquet/src/arrow/array_reader/struct_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -112,10 +112,10 @@ impl ArrayReader for StructArrayReader {
.collect::<Result<Vec<_>>>()?;

// check that array child data has same size
let children_array_len =
children_array.first().map(|arr| arr.len()).ok_or_else(|| {
general_err!("Struct array reader should have at least one child!")
})?;
let children_array_len = children_array
.first()
.map(|arr| arr.len())
.ok_or_else(|| general_err!("Struct array reader should have at least one child!"))?;

let all_children_len_eq = children_array
.iter()
Expand Down Expand Up @@ -169,8 +169,7 @@ impl ArrayReader for StructArrayReader {
return Err(general_err!("Failed to decode level data for struct array"));
}

array_data_builder =
array_data_builder.null_bit_buffer(Some(bitmap_builder.into()));
array_data_builder = array_data_builder.null_bit_buffer(Some(bitmap_builder.into()));
}

let array_data = unsafe { array_data_builder.build_unchecked() };
Expand Down Expand Up @@ -282,13 +281,12 @@ mod tests {
// null,
// ]

let expected_l =
Arc::new(ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
Some(vec![Some(1), Some(2), None]),
Some(vec![]),
None,
None,
]));
let expected_l = Arc::new(ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
Some(vec![Some(1), Some(2), None]),
Some(vec![]),
None,
None,
]));

let validity = Buffer::from([0b00000111]);
let struct_fields = vec![(
Expand Down
Loading

0 comments on commit 25e1969

Please sign in to comment.