From 58cdab6adc72c377e2460f90b05e90b9140ddf29 Mon Sep 17 00:00:00 2001 From: Thomas Grainger Date: Sat, 7 Oct 2023 12:47:37 -0700 Subject: [PATCH] fixes for numpy2 (#886) * np.lib.pad was removed in numpy2 * skip rapidjson when json dumping np.float64 on numpy 2 * pin cython<3 on pandas build --- .github/workflows/main.yaml | 2 +- fastparquet/test/test_json.py | 22 +++++++++++++--------- fastparquet/writer.py | 4 ++-- 3 files changed, 16 insertions(+), 12 deletions(-) diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index 6836e358..b8321e35 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -120,7 +120,7 @@ jobs: - name: pip-install shell: bash -l {0} run: | - pip install Cython + pip install 'Cython<3' pip install hypothesis pip install pytest-localserver pytest-xdist pytest-asyncio pip install -e . --no-deps # Install fastparquet diff --git a/fastparquet/test/test_json.py b/fastparquet/test/test_json.py index e40c4e68..e52c8a5b 100644 --- a/fastparquet/test/test_json.py +++ b/fastparquet/test/test_json.py @@ -4,6 +4,7 @@ import numpy as np import pytest +from packaging.version import Version from fastparquet.json import ( JsonCodecError, @@ -25,15 +26,15 @@ def _clear_cache(): @pytest.mark.parametrize( - "data", + "data,has_float64", [ - None, - [1, 1, 2, 3, 5], - [1.23, -3.45], - [np.float64(0.12), np.float64(4.56)], - [[1, 2, 4], ["x", "y", "z"]], - {"k1": "value", "k2": "à/è", "k3": 3}, - {"k1": [1, 2, 3], "k2": [4.1, 5.2, 6.3]}, + (None, False), + ([1, 1, 2, 3, 5], False), + ([1.23, -3.45], False), + ([np.float64(0.12), np.float64(4.56)], True), + ([[1, 2, 4], ["x", "y", "z"]], False), + ({"k1": "value", "k2": "à/è", "k3": 3}, False), + ({"k1": [1, 2, 3], "k2": [4.1, 5.2, 6.3]}, False), ], ) @pytest.mark.parametrize( @@ -44,7 +45,10 @@ def _clear_cache(): "decoder_module, decoder_class", list(_codec_classes.items()), ) -def test_engine(encoder_module, encoder_class, decoder_module, decoder_class, data): +def test_engine(encoder_module, encoder_class, decoder_module, decoder_class, data, has_float64): + if encoder_module == "rapidjson" and has_float64 and Version(np.__version__).major >= 2: + pytest.skip(reason="rapidjson cannot json dump np.float64 on numpy 2") + pytest.importorskip(encoder_module) pytest.importorskip(decoder_module) diff --git a/fastparquet/writer.py b/fastparquet/writer.py index 8c8e1364..d439ad5a 100644 --- a/fastparquet/writer.py +++ b/fastparquet/writer.py @@ -243,7 +243,7 @@ def convert(data, se): elif type == parquet_thrift.Type.BOOLEAN: # TODO: with our own bitpack writer, no need to copy for # the padding - padded = np.lib.pad(data.values, (0, 8 - (len(data) % 8)), + padded = np.pad(data.values, (0, 8 - (len(data) % 8)), 'constant', constant_values=(0, 0)) out = np.packbits(padded.reshape(-1, 8)[:, ::-1].ravel()) elif dtype.name in typemap: @@ -265,7 +265,7 @@ def convert(data, se): elif type == parquet_thrift.Type.BOOLEAN: # TODO: with our own bitpack writer, no need to copy for # the padding - padded = np.lib.pad(data.values, (0, 8 - (len(data) % 8)), + padded = np.pad(data.values, (0, 8 - (len(data) % 8)), 'constant', constant_values=(0, 0)) out = np.packbits(padded.reshape(-1, 8)[:, ::-1].ravel()) else: