Skip to content

Commit

Permalink
Closes #3020 dataframe.dropna (#3101)
Browse files Browse the repository at this point in the history
Co-authored-by: Amanda Potts <ajpotts@users.noreply.github.com>
  • Loading branch information
ajpotts and ajpotts committed Apr 19, 2024
1 parent 8c1c57e commit 8ac2645
Show file tree
Hide file tree
Showing 4 changed files with 718 additions and 3 deletions.
118 changes: 118 additions & 0 deletions PROTO_tests/tests/dataframe_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -1002,6 +1002,124 @@ def test_multi_col_merge(self):
# assert_frame_equal(sorted_ak.to_pandas()[sorted_column_names],
# sorted_pd[sorted_column_names])

def test_isna_notna(self):
df = ak.DataFrame(
{
"A": [np.nan, 2, 2, 3],
"B": [3, np.nan, 5, 0],
"C": [1, np.nan, 2, np.nan],
"D": ["a", "b", "c", ""],
}
)
assert_frame_equal(df.isna().to_pandas(), df.to_pandas().isna())
assert_frame_equal(df.notna().to_pandas(), df.to_pandas().notna())

def test_any_all(self):
df1 = ak.DataFrame(
{
"A": [True, True, True, True],
"B": [True, True, True, False],
"C": [True, False, True, False],
"D": [False, False, False, False],
"E": [0, 1, 2, 3],
"F": ["a", "b", "c", ""],
}
)

df2 = ak.DataFrame(
{
"A": [True, True, True, True],
"B": [True, True, True, True],
}
)

df3 = ak.DataFrame(
{
"A": [False, False],
"B": [False, False],
}
)

df4 = ak.DataFrame({"A": [1, 2, 3], "B": ["a", "b", "c"]})

df5 = ak.DataFrame()

for df in [df1, df2, df3, df4, df5]:
for axis in [0, 1, "index", "columns"]:
# There's a bug in assert_series_equal where two empty series will not register as equal.
if df.to_pandas().any(axis=axis, bool_only=True).empty:
assert df.any(axis=axis).to_pandas().empty is True
else:
assert_series_equal(
df.any(axis=axis).to_pandas(), df.to_pandas().any(axis=axis, bool_only=True)
)
if df.to_pandas().all(axis=axis, bool_only=True).empty:
assert df.all(axis=axis).to_pandas().empty is True
else:
assert_series_equal(
df.all(axis=axis).to_pandas(), df.to_pandas().all(axis=axis, bool_only=True)
)
# Test is axis=None
assert df.any(axis=None) == df.to_pandas().any(axis=None, bool_only=True)
assert df.all(axis=None) == df.to_pandas().all(axis=None, bool_only=True)

def test_dropna(self):
df1 = ak.DataFrame(
{
"A": [True, True, True, True],
"B": [1, np.nan, 2, np.nan],
"C": [1, 2, 3, np.nan],
"D": [False, False, False, False],
"E": [1, 2, 3, 4],
"F": ["a", "b", "c", "d"],
"G": [1, 2, 3, 4],
}
)

df2 = ak.DataFrame(
{
"A": [True, True, True, True],
"B": [True, True, True, True],
}
)

df3 = ak.DataFrame(
{
"A": [False, False],
"B": [False, False],
}
)

df4 = ak.DataFrame({"A": [1, 2, 3], "B": ["a", "b", "c"]})

df5 = ak.DataFrame()

for df in [df1, df2, df3, df4, df5]:
for axis in [0, 1, "index", "columns"]:
for how in ["any", "all"]:
for ignore_index in [True, False]:
assert_frame_equal(
df.dropna(axis=axis, how=how, ignore_index=ignore_index).to_pandas(
retain_index=True
),
df.to_pandas(retain_index=True).dropna(
axis=axis, how=how, ignore_index=ignore_index
),
)

for thresh in [0, 1, 2, 3, 4, 5]:
if df.to_pandas(retain_index=True).dropna(axis=axis, thresh=thresh).empty:
assert (
df.dropna(axis=axis, thresh=thresh).to_pandas(retain_index=True).empty
== True
)

else:
assert_frame_equal(
df.dropna(axis=axis, thresh=thresh).to_pandas(retain_index=True),
df.to_pandas(retain_index=True).dropna(axis=axis, thresh=thresh),
)

def test_memory_usage(self):
dtypes = [ak.int64, ak.float64, ak.bool]
data = dict([(str(t), ak.ones(5000, dtype=ak.int64).astype(t)) for t in dtypes])
Expand Down
Loading

0 comments on commit 8ac2645

Please sign in to comment.