From df43dac4fab101ec09b94888e6cd0abdde2de1ec Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Thu, 28 Sep 2023 10:59:25 -0400 Subject: [PATCH] Allow RLE for bools in v1 pages (#885) --- fastparquet/core.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fastparquet/core.py b/fastparquet/core.py index eab78abb..a2156c6f 100644 --- a/fastparquet/core.py +++ b/fastparquet/core.py @@ -126,8 +126,7 @@ def read_data_page(f, helper, header, metadata, skip_nulls=False, nval = daph.num_values - num_nulls se = helper.schema_element(metadata.path_in_schema) if daph.encoding == parquet_thrift.Encoding.PLAIN: - - width = helper.schema_element(metadata.path_in_schema).type_length + width = se.type_length values = read_plain(io_obj.read(), metadata.type, int(daph.num_values - num_nulls), @@ -137,7 +136,9 @@ def read_data_page(f, helper, header, metadata, skip_nulls=False, parquet_thrift.Encoding.RLE_DICTIONARY, parquet_thrift.Encoding.RLE]: # bit_width is stored as single byte. - if daph.encoding == parquet_thrift.Encoding.RLE: + if metadata.type == parquet_thrift.Type.BOOLEAN: + bit_width = 1 + elif daph.encoding == parquet_thrift.Encoding.RLE: bit_width = se.type_length else: bit_width = io_obj.read_byte()