Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Manually run fmt on all files under parquet #6328

Merged
merged 4 commits into from
Sep 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .github/workflows/rust.yml
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,13 @@ jobs:
run: rustup component add rustfmt
- name: Format arrow
run: cargo fmt --all -- --check
- name: Format parquet
# Many modules in parquet are skipped, so check parquet separately. If this check fails, run:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

💯 thank you

The only other thing I think is important here is to provide instructions (as comments) about how to fix the CI check if it fails. E.g. "if this test fails, run cargo fmt ..... and check in the result" kind

As a comment in rust.yml? Can do. I was also thinking some words in the formatting section of CONTRIBUTING.md would be helpful.

Yes, the reason I think it is useful to include this information is so that if the CI test fails, the developer can just look at the CI failure report on github (rather than hunting around in the docs) and know how to fix it.

THis looks good to me -- thank you

https://github.com/apache/arrow-rs/blob/ffd216d57469a33303c0d2ec9b974fd25cc0e0f9/.github/workflows/arrow_flight.yml#L75C37-L76C70

# cargo fmt -p parquet -- --config skip_children=true `find ./parquet -name "*.rs" \! -name format.rs`
# from the top level arrow-rs directory and check in the result.
# https://github.com/apache/arrow-rs/issues/6179
working-directory: parquet
run: cargo fmt -p parquet -- --check --config skip_children=true `find . -name "*.rs" \! -name format.rs`
- name: Format object_store
working-directory: object_store
run: cargo fmt --all -- --check
Expand Down
7 changes: 7 additions & 0 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,13 @@ PR be sure to run the following and check for lint issues:
cargo +stable fmt --all -- --check
```

Note that currently the above will not check all source files in the parquet crate. To check all
parquet files run the following from the top-level `arrow-rs` directory:

```bash
cargo fmt -p parquet -- --check --config skip_children=true `find . -name "*.rs" \! -name format.rs`
```

## Breaking Changes

Our [release schedule] allows breaking API changes only in major releases.
Expand Down
2 changes: 1 addition & 1 deletion parquet/src/arrow/array_reader/empty_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@

use crate::arrow::array_reader::ArrayReader;
use crate::errors::Result;
use arrow_schema::{DataType as ArrowType, Fields};
use arrow_array::{ArrayRef, StructArray};
use arrow_data::ArrayDataBuilder;
use arrow_schema::{DataType as ArrowType, Fields};
use std::any::Any;
use std::sync::Arc;

Expand Down
93 changes: 23 additions & 70 deletions parquet/src/arrow/array_reader/fixed_size_list_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ impl ArrayReader for FixedSizeListArrayReader {
"Encountered misaligned row with length {} (expected length {})",
row_len,
self.fixed_size
))
));
}
row_len = 0;

Expand Down Expand Up @@ -226,9 +226,7 @@ mod tests {
use super::*;
use crate::arrow::{
array_reader::{test_util::InMemoryArrayReader, ListArrayReader},
arrow_reader::{
ArrowReaderBuilder, ArrowReaderOptions, ParquetRecordBatchReader,
},
arrow_reader::{ArrowReaderBuilder, ArrowReaderOptions, ParquetRecordBatchReader},
ArrowWriter,
};
use arrow::datatypes::{Field, Int32Type};
Expand Down Expand Up @@ -279,10 +277,7 @@ mod tests {
let mut list_array_reader = FixedSizeListArrayReader::new(
Box::new(item_array_reader),
3,
ArrowType::FixedSizeList(
Arc::new(Field::new("item", ArrowType::Int32, true)),
3,
),
ArrowType::FixedSizeList(Arc::new(Field::new("item", ArrowType::Int32, true)), 3),
2,
1,
true,
Expand Down Expand Up @@ -328,10 +323,7 @@ mod tests {
let mut list_array_reader = FixedSizeListArrayReader::new(
Box::new(item_array_reader),
2,
ArrowType::FixedSizeList(
Arc::new(Field::new("item", ArrowType::Int32, true)),
2,
),
ArrowType::FixedSizeList(Arc::new(Field::new("item", ArrowType::Int32, true)), 2),
1,
1,
false,
Expand All @@ -354,14 +346,10 @@ mod tests {
// [[4, 5]],
// [[null, null]],
// ]
let l2_type = ArrowType::FixedSizeList(
Arc::new(Field::new("item", ArrowType::Int32, true)),
2,
);
let l1_type = ArrowType::FixedSizeList(
Arc::new(Field::new("item", l2_type.clone(), false)),
1,
);
let l2_type =
ArrowType::FixedSizeList(Arc::new(Field::new("item", ArrowType::Int32, true)), 2);
let l1_type =
ArrowType::FixedSizeList(Arc::new(Field::new("item", l2_type.clone(), false)), 1);

let array = PrimitiveArray::<Int32Type>::from(vec![
None,
Expand Down Expand Up @@ -413,14 +401,8 @@ mod tests {
Some(vec![0, 0, 2, 0, 2, 0, 0, 2, 0, 2]),
);

let l2 = FixedSizeListArrayReader::new(
Box::new(item_array_reader),
2,
l2_type,
4,
2,
false,
);
let l2 =
FixedSizeListArrayReader::new(Box::new(item_array_reader), 2, l2_type, 4, 2, false);
let mut l1 = FixedSizeListArrayReader::new(Box::new(l2), 1, l1_type, 3, 1, true);

let expected_1 = expected.slice(0, 2);
Expand Down Expand Up @@ -454,10 +436,7 @@ mod tests {
let mut list_array_reader = FixedSizeListArrayReader::new(
Box::new(item_array_reader),
0,
ArrowType::FixedSizeList(
Arc::new(Field::new("item", ArrowType::Int32, true)),
0,
),
ArrowType::FixedSizeList(Arc::new(Field::new("item", ArrowType::Int32, true)), 0),
2,
1,
true,
Expand All @@ -473,8 +452,7 @@ mod tests {
#[test]
fn test_nested_var_list() {
// [[[1, null, 3], null], [[4], []], [[5, 6], [null, null]], null]
let mut builder =
FixedSizeListBuilder::new(ListBuilder::new(Int32Builder::new()), 2);
let mut builder = FixedSizeListBuilder::new(ListBuilder::new(Int32Builder::new()), 2);
builder.values().append_value([Some(1), None, Some(3)]);
builder.values().append_null();
builder.append(true);
Expand Down Expand Up @@ -503,12 +481,9 @@ mod tests {
None,
]));

let inner_type =
ArrowType::List(Arc::new(Field::new("item", ArrowType::Int32, true)));
let list_type = ArrowType::FixedSizeList(
Arc::new(Field::new("item", inner_type.clone(), true)),
2,
);
let inner_type = ArrowType::List(Arc::new(Field::new("item", ArrowType::Int32, true)));
let list_type =
ArrowType::FixedSizeList(Arc::new(Field::new("item", inner_type.clone(), true)), 2);

let item_array_reader = InMemoryArrayReader::new(
ArrowType::Int32,
Expand All @@ -517,22 +492,11 @@ mod tests {
Some(vec![0, 2, 2, 1, 0, 1, 0, 2, 1, 2, 0]),
);

let inner_array_reader = ListArrayReader::<i32>::new(
Box::new(item_array_reader),
inner_type,
4,
2,
true,
);
let inner_array_reader =
ListArrayReader::<i32>::new(Box::new(item_array_reader), inner_type, 4, 2, true);

let mut list_array_reader = FixedSizeListArrayReader::new(
Box::new(inner_array_reader),
2,
list_type,
2,
1,
true,
);
let mut list_array_reader =
FixedSizeListArrayReader::new(Box::new(inner_array_reader), 2, list_type, 2, 1, true);
let actual = list_array_reader.next_batch(1024).unwrap();
let actual = actual
.as_any()
Expand Down Expand Up @@ -564,21 +528,13 @@ mod tests {
);

// [null, 2, 3, null, 5]
let primitive = PrimitiveArray::<Int32Type>::from_iter(vec![
None,
Some(2),
Some(3),
None,
Some(5),
]);
let primitive =
PrimitiveArray::<Int32Type>::from_iter(vec![None, Some(2), Some(3), None, Some(5)]);

let schema = Arc::new(Schema::new(vec![
Field::new(
"list",
ArrowType::FixedSizeList(
Arc::new(Field::new("item", ArrowType::Int32, true)),
4,
),
ArrowType::FixedSizeList(Arc::new(Field::new("item", ArrowType::Int32, true)), 4),
true,
),
Field::new("primitive", ArrowType::Int32, true),
Expand Down Expand Up @@ -643,10 +599,7 @@ mod tests {

let schema = Arc::new(Schema::new(vec![Field::new(
"list",
ArrowType::FixedSizeList(
Arc::new(Field::new("item", ArrowType::Int32, true)),
4,
),
ArrowType::FixedSizeList(Arc::new(Field::new("item", ArrowType::Int32, true)), 4),
true,
)]));

Expand Down
29 changes: 8 additions & 21 deletions parquet/src/arrow/array_reader/list_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -125,8 +125,7 @@ impl<OffsetSize: OffsetSizeTrait> ArrayReader for ListArrayReader<OffsetSize> {
// lists, and for consistency we do the same for nulls.

// The output offsets for the computed ListArray
let mut list_offsets: Vec<OffsetSize> =
Vec::with_capacity(next_batch_array.len() + 1);
let mut list_offsets: Vec<OffsetSize> = Vec::with_capacity(next_batch_array.len() + 1);

// The validity mask of the computed ListArray if nullable
let mut validity = self
Expand Down Expand Up @@ -270,9 +269,7 @@ mod tests {
GenericListArray::<OffsetSize>::DATA_TYPE_CONSTRUCTOR(field)
}

fn downcast<OffsetSize: OffsetSizeTrait>(
array: &ArrayRef,
) -> &'_ GenericListArray<OffsetSize> {
fn downcast<OffsetSize: OffsetSizeTrait>(array: &ArrayRef) -> &'_ GenericListArray<OffsetSize> {
array
.as_any()
.downcast_ref::<GenericListArray<OffsetSize>>()
Expand Down Expand Up @@ -383,18 +380,12 @@ mod tests {
Some(vec![0, 3, 2, 2, 2, 1, 1, 1, 1, 3, 3, 2, 3, 3, 2, 0, 0, 0]),
);

let l3 = ListArrayReader::<OffsetSize>::new(
Box::new(item_array_reader),
l3_type,
5,
3,
true,
);
let l3 =
ListArrayReader::<OffsetSize>::new(Box::new(item_array_reader), l3_type, 5, 3, true);

let l2 = ListArrayReader::<OffsetSize>::new(Box::new(l3), l2_type, 3, 2, false);

let mut l1 =
ListArrayReader::<OffsetSize>::new(Box::new(l2), l1_type, 2, 1, true);
let mut l1 = ListArrayReader::<OffsetSize>::new(Box::new(l2), l1_type, 2, 1, true);

let expected_1 = expected.slice(0, 2);
let expected_2 = expected.slice(2, 2);
Expand Down Expand Up @@ -560,8 +551,7 @@ mod tests {
.unwrap();
writer.close().unwrap();

let file_reader: Arc<dyn FileReader> =
Arc::new(SerializedFileReader::new(file).unwrap());
let file_reader: Arc<dyn FileReader> = Arc::new(SerializedFileReader::new(file).unwrap());

let file_metadata = file_reader.metadata().file_metadata();
let schema = file_metadata.schema_descr();
Expand All @@ -573,8 +563,7 @@ mod tests {
)
.unwrap();

let mut array_reader =
build_array_reader(fields.as_ref(), &mask, &file_reader).unwrap();
let mut array_reader = build_array_reader(fields.as_ref(), &mask, &file_reader).unwrap();

let batch = array_reader.next_batch(100).unwrap();
assert_eq!(batch.data_type(), array_reader.get_data_type());
Expand All @@ -584,9 +573,7 @@ mod tests {
"table_info",
ArrowType::List(Arc::new(Field::new(
"table_info",
ArrowType::Struct(
vec![Field::new("name", ArrowType::Binary, false)].into()
),
ArrowType::Struct(vec![Field::new("name", ArrowType::Binary, false)].into()),
false
))),
false
Expand Down
12 changes: 5 additions & 7 deletions parquet/src/arrow/array_reader/map_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -184,21 +184,19 @@ mod tests {
map_builder.append(true).expect("adding map entry");

// Create record batch
let batch =
RecordBatch::try_new(Arc::new(schema), vec![Arc::new(map_builder.finish())])
.expect("create record batch");
let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(map_builder.finish())])
.expect("create record batch");

// Write record batch to file
let mut buffer = Vec::with_capacity(1024);
let mut writer = ArrowWriter::try_new(&mut buffer, batch.schema(), None)
.expect("creat file writer");
let mut writer =
ArrowWriter::try_new(&mut buffer, batch.schema(), None).expect("creat file writer");
writer.write(&batch).expect("writing file");
writer.close().expect("close writer");

// Read file
let reader = Bytes::from(buffer);
let record_batch_reader =
ParquetRecordBatchReader::try_new(reader, 1024).unwrap();
let record_batch_reader = ParquetRecordBatchReader::try_new(reader, 1024).unwrap();
for maybe_record_batch in record_batch_reader {
let record_batch = maybe_record_batch.expect("Getting current batch");
let col = record_batch.column(0);
Expand Down
24 changes: 11 additions & 13 deletions parquet/src/arrow/array_reader/struct_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -112,10 +112,10 @@ impl ArrayReader for StructArrayReader {
.collect::<Result<Vec<_>>>()?;

// check that array child data has same size
let children_array_len =
children_array.first().map(|arr| arr.len()).ok_or_else(|| {
general_err!("Struct array reader should have at least one child!")
})?;
let children_array_len = children_array
.first()
.map(|arr| arr.len())
.ok_or_else(|| general_err!("Struct array reader should have at least one child!"))?;

let all_children_len_eq = children_array
.iter()
Expand Down Expand Up @@ -169,8 +169,7 @@ impl ArrayReader for StructArrayReader {
return Err(general_err!("Failed to decode level data for struct array"));
}

array_data_builder =
array_data_builder.null_bit_buffer(Some(bitmap_builder.into()));
array_data_builder = array_data_builder.null_bit_buffer(Some(bitmap_builder.into()));
}

let array_data = unsafe { array_data_builder.build_unchecked() };
Expand Down Expand Up @@ -282,13 +281,12 @@ mod tests {
// null,
// ]

let expected_l =
Arc::new(ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
Some(vec![Some(1), Some(2), None]),
Some(vec![]),
None,
None,
]));
let expected_l = Arc::new(ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
Some(vec![Some(1), Some(2), None]),
Some(vec![]),
None,
None,
]));

let validity = Buffer::from([0b00000111]);
let struct_fields = vec![(
Expand Down
Loading
Loading