diff --git a/docs/for_pandas.rst b/docs/for_pandas.rst index 3b552bc8..ced424e3 100644 --- a/docs/for_pandas.rst +++ b/docs/for_pandas.rst @@ -37,7 +37,7 @@ This is useful when dataframe has nullable columns because pandas auto-conversio Easy to Load DataFrame ---------------------- -The :func:`~gokart.task.TaskOnKart.load_data_frame` method is used to load input ``pandas.DataFrame``. +The :func:`~gokart.task.TaskOnKart.load` method is used to load input ``pandas.DataFrame``. .. code:: python @@ -45,11 +45,9 @@ The :func:`~gokart.task.TaskOnKart.load_data_frame` method is used to load input return MakeDataFrameTask() def run(self): - df = self.load_data_frame(required_columns={'colA', 'colB'}, drop_columns=True) + df = self.load() -This allows us to omit ``reset_index`` and ``drop`` when loading. If there is a missing column in an example above, ``AssertionError`` will be raised. This feature is useful for pipelines based on pandas. - -Please refer to :func:`~gokart.task.TaskOnKart.load_data_frame`. +Please refer to :func:`~gokart.task.TaskOnKart.load`. Fail on empty DataFrame diff --git a/docs/task_on_kart.rst b/docs/task_on_kart.rst index 0a924d8a..ce52c6d5 100644 --- a/docs/task_on_kart.rst +++ b/docs/task_on_kart.rst @@ -247,6 +247,9 @@ TaskOnKart.load_data_frame Please refer to :doc:`for_pandas`. +.. warning:: + This function is deprecated. Please use :func:`~gokart.task.TaskOnKart.load` instead. + TaskOnKart.fail_on_empty_dump ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/gokart/task.py b/gokart/task.py index 2f351d2a..a7f7a06d 100644 --- a/gokart/task.py +++ b/gokart/task.py @@ -2,11 +2,17 @@ import inspect import os import random +import sys import types from importlib import import_module from logging import getLogger from typing import Any, Callable, Dict, Generator, Generic, Iterable, List, Optional, Set, TypeVar, Union, overload +if sys.version_info < (3, 13): + from typing_extensions import deprecated +else: + from warning import deprecated + import luigi import pandas as pd from luigi.parameter import ParameterVisibility @@ -311,6 +317,9 @@ def _load(targets): return _load(self._get_input_targets(target)) + @deprecated("""This function is deprecated. use `load` instead. +If you want to specify `required_columns` and `drop_columns`, please extract the columns after loading. ex: `load()[['colA', 'colB']]` +""") def load_data_frame( self, target: Union[None, str, TargetOnKart] = None, required_columns: Optional[Set[str]] = None, drop_columns: bool = False ) -> pd.DataFrame: diff --git a/poetry.lock b/poetry.lock index 6bc381c5..319b1bee 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.5 and should not be changed by hand. [[package]] name = "apscheduler" @@ -2324,4 +2324,4 @@ test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools", [metadata] lock-version = "2.0" python-versions = ">=3.9,<4" -content-hash = "bd857b87476ecb8812a0546918bc59ed94c91f8e132637bb47daa1251f458461" +content-hash = "5912abedf4502ca2decaf792f500528ec6f9a4c98deb8d5f394a083ee198085d" diff --git a/pyproject.toml b/pyproject.toml index b3fb2a01..ce3a7113 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,6 +30,9 @@ APScheduler = "*" redis = "*" dill = "*" backoff = "^2.2.1" +typing-extensions = [ + {version = "^4.11.0", python = "<3.13"} +] [tool.poetry.group.dev.dependencies] ruff = "*"