Skip to content

Commit

Permalink
add deprecation warnings for various IO methods
Browse files Browse the repository at this point in the history
  • Loading branch information
rjzamora committed Nov 1, 2024
1 parent 788cb24 commit 1f54219
Show file tree
Hide file tree
Showing 10 changed files with 170 additions and 58 deletions.
32 changes: 26 additions & 6 deletions python/dask_cudf/dask_cudf/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
# Copyright (c) 2018-2024, NVIDIA CORPORATION.

from dask import config
import warnings
from importlib import import_module

from dask import config
import dask.dataframe as dd
from dask.dataframe import from_delayed # noqa: E402

Expand Down Expand Up @@ -34,26 +36,44 @@ def read_parquet(*args, **kwargs):
return dd.read_parquet(*args, **kwargs)


def raise_not_implemented_error(attr_name):
def _deprecated_api(old_api, new_api=None, rec=None):
def inner_func(*args, **kwargs):
if new_api:
# Use alternative
msg = f"{old_api} is now deprecated. "
msg += rec or f"Please use {new_api} instead."
warnings.warn(msg, FutureWarning)
new_attr = new_api.split(".")
module = import_module(".".join(new_attr[:-1]))
return getattr(module, new_attr[-1])(*args, **kwargs)

# No alternative - raise an error
raise NotImplementedError(
f"Top-level {attr_name} API is not available for dask-expr."
f"{old_api} is no longer supported. " + (rec or "")
)

return inner_func


if QUERY_PLANNING_ON:
from ._expr.expr import _patch_dask_expr
from . import io # noqa: F401

groupby_agg = raise_not_implemented_error("groupby_agg")
groupby_agg = _deprecated_api("dask_cudf.groupby_agg")
read_text = DataFrame.read_text
to_orc = raise_not_implemented_error("to_orc")
_patch_dask_expr()

else:
from ._legacy.groupby import groupby_agg # noqa: F401
from ._legacy.io import read_text, to_orc # noqa: F401
from ._legacy.io import read_text # noqa: F401
from . import io # noqa: F401


to_orc = _deprecated_api(
"dask_cudf.to_orc",
new_api="dask_cudf._legacy.io.to_orc",
rec="Please use DataFrame.to_orc instead.",
)


__all__ = [
Expand Down
5 changes: 4 additions & 1 deletion python/dask_cudf/dask_cudf/_expr/collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,10 @@ def groupby(
)

def to_orc(self, *args, **kwargs):
return self.to_legacy_dataframe().to_orc(*args, **kwargs)
from dask_cudf._legacy.io import to_orc

return to_orc(self, *args, **kwargs)
# return self.to_legacy_dataframe().to_orc(*args, **kwargs)

@staticmethod
def read_text(*args, **kwargs):
Expand Down
48 changes: 0 additions & 48 deletions python/dask_cudf/dask_cudf/_legacy/core.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
# Copyright (c) 2018-2024, NVIDIA CORPORATION.

import math
import textwrap
import warnings

import numpy as np
Expand Down Expand Up @@ -57,9 +56,6 @@ def __repr__(self):
return s % (type(self).__name__, len(self.dask), self.npartitions)


concat = dd.concat


normalize_token.register(_Frame, lambda a: a._name)


Expand Down Expand Up @@ -688,50 +684,6 @@ def reduction(
return dd.core.new_dd_object(graph, b, meta, (None, None))


@_dask_cudf_performance_tracking
def from_cudf(data, npartitions=None, chunksize=None, sort=True, name=None):
from dask_cudf import QUERY_PLANNING_ON

if isinstance(getattr(data, "index", None), cudf.MultiIndex):
raise NotImplementedError(
"dask_cudf does not support MultiIndex Dataframes."
)

# Dask-expr doesn't support the `name` argument
name = {}
if not QUERY_PLANNING_ON:
name = {
"name": name
or ("from_cudf-" + tokenize(data, npartitions or chunksize))
}

return dd.from_pandas(
data,
npartitions=npartitions,
chunksize=chunksize,
sort=sort,
**name,
)


from_cudf.__doc__ = (
textwrap.dedent(
"""
Create a :class:`.DataFrame` from a :class:`cudf.DataFrame`.
This function is a thin wrapper around
:func:`dask.dataframe.from_pandas`, accepting the same
arguments (described below) excepting that it operates on cuDF
rather than pandas objects.\n
"""
)
# TODO: `dd.from_pandas.__doc__` is empty when
# `DASK_DATAFRAME__QUERY_PLANNING=True`
# since dask-expr does not provide a docstring for from_pandas.
+ textwrap.dedent(dd.from_pandas.__doc__ or "")
)


for name in (
"add",
"sub",
Expand Down
53 changes: 52 additions & 1 deletion python/dask_cudf/dask_cudf/core.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
# Copyright (c) 2020-2024, NVIDIA CORPORATION.

import textwrap

import dask.dataframe as dd
from dask.tokenize import tokenize

import cudf
from cudf.utils.performance_tracking import _dask_cudf_performance_tracking

# This module provides backward compatibility for legacy import patterns.
if dd.DASK_EXPR_ENABLED:
Expand All @@ -12,4 +18,49 @@
else:
from dask_cudf._legacy.core import DataFrame, Index, Series # noqa: F401

from dask_cudf._legacy.core import concat, from_cudf # noqa: F401

concat = dd.concat # noqa: F401


@_dask_cudf_performance_tracking
def from_cudf(data, npartitions=None, chunksize=None, sort=True, name=None):
from dask_cudf import QUERY_PLANNING_ON

if isinstance(getattr(data, "index", None), cudf.MultiIndex):
raise NotImplementedError(
"dask_cudf does not support MultiIndex Dataframes."
)

# Dask-expr doesn't support the `name` argument
name = {}
if not QUERY_PLANNING_ON:
name = {
"name": name
or ("from_cudf-" + tokenize(data, npartitions or chunksize))
}

return dd.from_pandas(
data,
npartitions=npartitions,
chunksize=chunksize,
sort=sort,
**name,
)


from_cudf.__doc__ = (
textwrap.dedent(
"""
Create a :class:`.DataFrame` from a :class:`cudf.DataFrame`.
This function is a thin wrapper around
:func:`dask.dataframe.from_pandas`, accepting the same
arguments (described below) excepting that it operates on cuDF
rather than pandas objects.\n
"""
)
# TODO: `dd.from_pandas.__doc__` is empty when
# `DASK_DATAFRAME__QUERY_PLANNING=True`
# since dask-expr does not provide a docstring for from_pandas.
+ textwrap.dedent(dd.from_pandas.__doc__ or "")
)
33 changes: 32 additions & 1 deletion python/dask_cudf/dask_cudf/io/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,32 @@
# Copyright (c) 2018-2024, NVIDIA CORPORATION.
# Copyright (c) 2024, NVIDIA CORPORATION.

from dask_cudf import _deprecated_api

from . import csv, orc, json, parquet, text # noqa: F401


read_csv = _deprecated_api(
"dask_cudf.io.read_csv", new_api="dask_cudf.read_csv"
)
read_json = _deprecated_api(
"dask_cudf.io.read_json", new_api="dask_cudf.read_json"
)
read_orc = _deprecated_api(
"dask_cudf.io.read_orc", new_api="dask_cudf.read_orc"
)
to_orc = _deprecated_api(
"dask_cudf.io.to_orc",
new_api="dask_cudf._legacy.io.to_orc",
rec="Please use the DataFrame.to_orc method instead.",
)
read_text = _deprecated_api(
"dask_cudf.io.read_text", new_api="dask_cudf.read_text"
)
read_parquet = _deprecated_api(
"dask_cudf.io.read_parquet", new_api="dask_cudf.read_parquet"
)
to_parquet = _deprecated_api(
"dask_cudf.io.to_parquet",
new_api="dask_cudf._legacy.io.parquet.to_parquet",
rec="Please use the DataFrame.to_parquet method instead.",
)
8 changes: 8 additions & 0 deletions python/dask_cudf/dask_cudf/io/csv.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# Copyright (c) 2024, NVIDIA CORPORATION.

from dask_cudf import _deprecated_api

read_csv = _deprecated_api(
"dask_cudf.io.csv.read_csv",
new_api="dask_cudf.read_csv",
)
8 changes: 8 additions & 0 deletions python/dask_cudf/dask_cudf/io/json.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# Copyright (c) 2024, NVIDIA CORPORATION.

from dask_cudf import _deprecated_api

read_json = _deprecated_api(
"dask_cudf.io.json.read_json",
new_api="dask_cudf.read_json",
)
13 changes: 13 additions & 0 deletions python/dask_cudf/dask_cudf/io/orc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Copyright (c) 2024, NVIDIA CORPORATION.

from dask_cudf import _deprecated_api

read_orc = _deprecated_api(
"dask_cudf.io.orc.read_orc",
new_api="dask_cudf.read_orc",
)
to_orc = _deprecated_api(
"dask_cudf.io.orc.to_orc",
new_api="dask_cudf._legacy.io.orc.to_orc",
rec="Please use the DataFrame.to_orc method instead.",
)
20 changes: 19 additions & 1 deletion python/dask_cudf/dask_cudf/io/parquet.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2018-2024, NVIDIA CORPORATION.
# Copyright (c) 2024, NVIDIA CORPORATION.
import functools

import pandas as pd
Expand All @@ -7,6 +7,8 @@

import cudf

from dask_cudf import _deprecated_api

# Dask-expr imports CudfEngine from this module
from dask_cudf._legacy.io.parquet import CudfEngine # noqa: F401

Expand Down Expand Up @@ -115,3 +117,19 @@ def _tune_up(self, parent):
if isinstance(parent, CudfFusedParquetIO):
return
return parent.substitute(self, CudfFusedParquetIO(self))


read_parquet = _deprecated_api(
"dask_cudf.io.parquet.read_parquet",
new_api="dask_cudf.read_parquet",
)
to_parquet = _deprecated_api(
"dask_cudf.io.parquet.to_parquet",
new_api="dask_cudf._legacy.io.parquet.to_parquet",
rec="Please use the DataFrame.to_parquet method instead.",
)
create_metadata_file = _deprecated_api(
"dask_cudf.io.parquet.create_metadata_file",
new_api="dask_cudf._legacy.io.parquet.create_metadata_file",
rec="Please raise an issue if this feature is needed.",
)
8 changes: 8 additions & 0 deletions python/dask_cudf/dask_cudf/io/text.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# Copyright (c) 2024, NVIDIA CORPORATION.

from dask_cudf import _deprecated_api

read_text = _deprecated_api(
"dask_cudf.io.text.read_text",
new_api="dask_cudf.read_text",
)

0 comments on commit 1f54219

Please sign in to comment.