Skip to content

Commit

Permalink
Add test of #12243
Browse files Browse the repository at this point in the history
  • Loading branch information
wence- committed Mar 22, 2024
1 parent dd53c60 commit 5c00e79
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 4 deletions.
38 changes: 37 additions & 1 deletion python/cudf/cudf/tests/dataframe/test_io_serialization.py
Original file line number Diff line number Diff line change
@@ -1 +1,37 @@
# Copyright (c) 2023, NVIDIA CORPORATION.
# Copyright (c) 2023-2024, NVIDIA CORPORATION.
from io import BytesIO

import pandas as pd
import pyarrow.parquet as pq
import pytest

import cudf
from cudf.testing._utils import assert_eq


@pytest.mark.parametrize(
"index",
[range(1, 11), list(range(1, 11)), range(1, 11)[::2]],
ids=["RangeIndex", "IntIndex", "StridedRange"],
)
@pytest.mark.parametrize("write_index", [False, True, None])
@pytest.mark.parametrize("empty", [False, True], ids=["nonempty", "empty"])
def test_dataframe_parquet_roundtrip(index, write_index, empty):
if empty:
data = {}
else:
data = {"a": [i * 2 for i in index]}
df = cudf.DataFrame(data=data, index=index)
pf = pd.DataFrame(data=data, index=index)
gpu_buf = BytesIO()
cpu_buf = BytesIO()

df.to_parquet(gpu_buf, index=write_index)
pf.to_parquet(cpu_buf, index=write_index)
gpu_table = pq.read_table(gpu_buf)
cpu_table = pq.read_table(cpu_buf)
assert gpu_table.schema.pandas_metadata == cpu_table.schema.pandas_metadata

gpu_read = cudf.read_parquet(gpu_buf)
cpu_read = cudf.read_parquet(cpu_buf)
assert_eq(gpu_read, cpu_read)
14 changes: 11 additions & 3 deletions python/cudf/cudf/tests/test_parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -2442,9 +2442,17 @@ def test_parquet_index(pdf, index):
run_parquet_index(pdf, index)


@pytest.mark.parametrize("index", [None, True])
@pytest.mark.xfail(
reason="https://github.com/rapidsai/cudf/issues/12243",
@pytest.mark.parametrize(
"index",
[
pytest.param(
None,
marks=pytest.mark.xfail(
reason="https://github.com/apache/arrow/issues/40743"
),
),
True,
],
)
def test_parquet_index_empty(index):
pdf = pd.DataFrame(index=pd.RangeIndex(0, 10, 1))
Expand Down

0 comments on commit 5c00e79

Please sign in to comment.