From 006a388b424876c1d7bb5f315bc81c142b736df2 Mon Sep 17 00:00:00 2001 From: Ed Seidl Date: Sat, 14 Dec 2024 09:23:17 -0800 Subject: [PATCH] modify some tests to truncate in the middle of a multibyte char --- parquet/src/column/writer/mod.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/parquet/src/column/writer/mod.rs b/parquet/src/column/writer/mod.rs index 2f33698f9d4..80e1ac2b10e 100644 --- a/parquet/src/column/writer/mod.rs +++ b/parquet/src/column/writer/mod.rs @@ -3268,8 +3268,8 @@ mod tests { assert_eq!(&r, "yyyyyyyz".as_bytes()); // 2-byte without overflow - let r = truncate_and_increment_utf8("ééééé", 8).unwrap(); - assert_eq!(&r, "éééê".as_bytes()); + let r = truncate_and_increment_utf8("ééééé", 7).unwrap(); + assert_eq!(&r, "ééê".as_bytes()); // 2-byte that overflows lowest byte let r = truncate_and_increment_utf8("\u{ff}\u{ff}\u{ff}\u{ff}\u{ff}", 8).unwrap(); @@ -3281,7 +3281,7 @@ mod tests { // 3-byte without overflow [U+800, U+800, U+800] -> [U+800, U+801] (note that these // characters should render right to left). - let r = truncate_and_increment_utf8("ࠀࠀࠀ", 8).unwrap(); + let r = truncate_and_increment_utf8("ࠀࠀࠀࠀ", 8).unwrap(); assert_eq!(&r, "ࠀࠁ".as_bytes()); // max 3-byte should not truncate as it would need 4-byte code points @@ -3289,7 +3289,7 @@ mod tests { assert!(r.is_none()); // 4-byte without overflow - let r = truncate_and_increment_utf8("𐀀𐀀𐀀", 8).unwrap(); + let r = truncate_and_increment_utf8("𐀀𐀀𐀀𐀀", 9).unwrap(); assert_eq!(&r, "𐀀𐀁".as_bytes()); // max 4-byte should not truncate