Skip to content

Commit

Permalink
Result into error in case of endianness mismatches
Browse files Browse the repository at this point in the history
This commit implements the Byte Order (Endianness) recommendations we
could read from the Apache Arrow official specification (quoted here):

> _"At first we will return an error when trying to read a Schema
> with an endianness that does not match the underlying system."_

Resolves: #3459
  • Loading branch information
pangiole committed Jan 14, 2024
1 parent 4c3e9be commit b1b4e84
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 23 deletions.
31 changes: 8 additions & 23 deletions arrow-integration-testing/tests/ipc_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
//! Tests for reading the content of [`FileReader`] and [`StreamReader`]
//! in `testing/arrow-ipc-stream/integration/...`
use arrow::error::ArrowError;
use arrow::ipc::reader::{FileReader, StreamReader};
use arrow::util::test_util::arrow_test_data;
use arrow_integration_testing::read_gzip_json;
Expand Down Expand Up @@ -55,26 +56,12 @@ fn read_0_1_7() {
});
}

#[test]
#[should_panic(expected = "Big Endian is not supported for Decimal!")]
fn read_1_0_0_bigendian_decimal_should_panic() {
let testdata = arrow_test_data();
verify_arrow_file(&testdata, "1.0.0-bigendian", "generated_decimal");
}

#[test]
#[should_panic(expected = "Last offset 687865856 of Utf8 is larger than values length 41")]
fn read_1_0_0_bigendian_dictionary_should_panic() {
// The offsets are not translated for big-endian files
// https://github.com/apache/arrow-rs/issues/859
let testdata = arrow_test_data();
verify_arrow_file(&testdata, "1.0.0-bigendian", "generated_dictionary");
}

#[test]
fn read_1_0_0_bigendian() {
let testdata = arrow_test_data();
let paths = [
"generated_decimal",
"generated_dictionary",
"generated_interval",
"generated_datetime",
"generated_map",
Expand All @@ -91,14 +78,12 @@ fn read_1_0_0_bigendian() {
))
.unwrap();

FileReader::try_new(file, None).unwrap();
let reader = FileReader::try_new(file, None);

// While the the reader doesn't error but the values are not
// read correctly on little endian platforms so verifying the
// contents fails
//
// https://github.com/apache/arrow-rs/issues/3459
//verify_arrow_file(&testdata, "1.0.0-bigendian", path);
assert!(reader.is_err());
let err = reader.err().unwrap();
assert!(matches!(err, ArrowError::IpcError(_)));
assert_eq!(err.to_string(), "Ipc error: the endianness of the source system does not match the endianness of the target system.");
});
}

Expand Down
9 changes: 9 additions & 0 deletions arrow-ipc/src/gen/Schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1039,6 +1039,15 @@ impl Endianness {
_ => None,
}
}

/// Returns true if the endianness of the source system matches the endianness of the target system.
pub fn equals_to_target_endianness(self) -> bool {
match self {
Self::Little => cfg!(target_endian = "little"),
Self::Big => cfg!(target_endian = "big"),
_ => false,
}
}
}
impl core::fmt::Debug for Endianness {
fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
Expand Down
6 changes: 6 additions & 0 deletions arrow-ipc/src/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -790,6 +790,12 @@ impl FileReaderBuilder {
let total_blocks = blocks.len();

let ipc_schema = footer.schema().unwrap();
if !ipc_schema.endianness().equals_to_target_endianness() {
return Err(ArrowError::IpcError(
"the endianness of the source system does not match the endianness of the target system.".to_owned()
));
}

let schema = crate::convert::fb_to_schema(ipc_schema);

let mut custom_metadata = HashMap::new();
Expand Down

0 comments on commit b1b4e84

Please sign in to comment.