style(ruff): select additional rules such as isort

ibis-project · Mar 12, 2024 · 7f54e85 · 7f54e85
1 parent d726f70
commit 7f54e85
Show file tree

Hide file tree

Showing 14 changed files with 165 additions and 79 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,7 +1,6 @@
 repos:
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    # Ruff version.
-    rev: v0.3.1
+    rev: v0.3.2
     hooks:
       - id: ruff
         args: [--fix]

diff --git a/ibisml/__init__.py b/ibisml/__init__.py
@@ -1,24 +1,25 @@
-from ._version import __version__
 from ibisml.core import Recipe, Step
 from ibisml.select import (
-    selector,
-    everything,
+    categorical,
     cols,
     contains,
+    date,
     endswith,
-    startswith,
-    matches,
+    everything,
+    floating,
     has_type,
-    numeric,
+    integer,
+    matches,
     nominal,
-    categorical,
+    numeric,
+    selector,
+    startswith,
     string,
-    integer,
-    floating,
     temporal,
-    date,
     time,
     timestamp,
     where,
 )
 from ibisml.steps import *  # noqa: F403
+
+from ._version import __version__
diff --git a/ibisml/core.py b/ibisml/core.py
@@ -1,27 +1,29 @@
 from __future__ import annotations
 
 import copy
-from collections.abc import Sequence, Iterable
-from typing import Any, Callable, Literal, cast, TYPE_CHECKING
+from collections.abc import Iterable, Sequence
 from functools import cache
+from typing import TYPE_CHECKING, Any, Callable, Literal, cast
 
-import numpy as np
-import pyarrow as pa
-import pandas as pd
 import ibis
 import ibis.expr.types as ir
+import numpy as np
+import pandas as pd
+import pyarrow as pa
 
 if TYPE_CHECKING:
-    import polars as pl
     import dask.dataframe as dd
+    import polars as pl
     import xgboost as xgb
 
 
 def _as_table(X: Any):
     if isinstance(X, ir.Table):
         return X
     elif isinstance(X, np.ndarray):
-        return ibis.memtable(pd.DataFrame(X, columns=[f"x{i}" for i in range(X.shape[-1])]))
+        return ibis.memtable(
+            pd.DataFrame(X, columns=[f"x{i}" for i in range(X.shape[-1])])
+        )
     else:
         return ibis.memtable(X)
 
@@ -74,10 +76,7 @@ def _get_categorize_chunk() -> Callable[[str, list[str], Any], pd.DataFrame]:
     dask cluster.
     """
 
-    def categorize(
-        df: pd.DataFrame,
-        categories: dict[str, list[Any]],
-    ) -> pd.DataFrame:
+    def categorize(df: pd.DataFrame, categories: dict[str, list[Any]]) -> pd.DataFrame:
         import pandas as pd
 
         new = {}
@@ -130,7 +129,9 @@ def set_params(self, **kwargs):
             self.steps = kwargs.get("steps")
 
     def set_output(
-        self, *, transform: Literal["default", "pandas", "pyarrow", "polars", None] = None
+        self,
+        *,
+        transform: Literal["default", "pandas", "pyarrow", "polars", None] = None,
     ) -> Recipe:
         """Set output type returned by `transform`.
 
@@ -146,14 +147,17 @@ def set_output(
             - `"polars"`: Polars dataframe
             - `"pyarrow"`: Pyarrow table
             - `None`: Transform configuration is unchanged
+
         """
         if transform is None:
             return self
 
-        formats = ("default", "pandas", "polars", "pyarrow")
+        formats = "default", "pandas", "polars", "pyarrow"
 
         if transform not in formats:
-            raise ValueError(f"`transform` must be one of {formats!r}, got {transform}")
+            raise ValueError(
+                f"`transform` must be one of {formats!r}, got {transform!r}"
+            )
 
         self._output_format = transform
         return self
@@ -183,6 +187,7 @@ def fit(self, X, y=None) -> Recipe:
         -------
         self
             Returns the same instance.
+
         """
         table = _as_table(X)
         metadata = Metadata()
@@ -204,6 +209,7 @@ def transform(self, X):
         -------
         Xt
             Transformed data.
+
         """
         if self._output_format == "pandas":
             return self.to_pandas(X)
@@ -229,6 +235,7 @@ def fit_transform(self, X, y=None):
         -------
         Xt
             Transformed training data.
+
         """
         return self.fit(X, y).transform(X)
 
@@ -265,7 +272,9 @@ def _categorize_dask_dataframe(self, ddf: dd.DataFrame) -> dd.DataFrame:
 
         categorize = _get_categorize_chunk()
 
-        categories = {col: cats.values for col, cats in self.metadata_.categories.items()}
+        categories = {
+            col: cats.values for col, cats in self.metadata_.categories.items()
+        }
         return ddf.map_partitions(categorize, categories)
 
     def _categorize_pyarrow_batches(
@@ -297,8 +306,8 @@ def to_table(self, X: ir.Table) -> ir.Table:
         ----------
         X : table-like
             The input data to transform.
-        """
 
+        """
         table = _as_table(X)
         for step in self.steps:
             table = step.transform_table(table)
@@ -316,6 +325,7 @@ def to_pandas(self, X: Any, categories: bool = False) -> pd.DataFrame:
             series. If False (the default) these columns will be returned
             as numeric columns containing only their integral categorical
             codes.
+
         """
         df = self.to_table(X).to_pandas()
         if categories:
@@ -329,6 +339,7 @@ def to_numpy(self, X: Any) -> np.ndarray:
         ----------
         X : table-like
             The input data to transform.
+
         """
         table = self.to_table(X)
         if not all(t.is_numeric() for t in table.schema().types):
@@ -344,6 +355,7 @@ def to_polars(self, X: Any) -> pl.DataFrame:
         ----------
         X : table-like
             The input data to transform.
+
         """
         return self.to_table(X).to_polars()
 
@@ -359,13 +371,16 @@ def to_pyarrow(self, X: Any, categories: bool = False) -> pa.Table:
             columns. If False (the default) these columns will be returned
             as numeric columns containing only their integral categorical
             codes.
+
         """
         table = self.to_table(X).to_pyarrow()
         if categories:
             table = self._categorize_pyarrow(table)
         return table
 
-    def to_pyarrow_batches(self, X: Any, categories: bool = False) -> pa.RecordBatchReader:
+    def to_pyarrow_batches(
+        self, X: Any, categories: bool = False
+    ) -> pa.RecordBatchReader:
         """Transform X and return a ``pyarrow.RecordBatchReader``.
 
         Parameters
@@ -377,6 +392,7 @@ def to_pyarrow_batches(self, X: Any, categories: bool = False) -> pa.RecordBatch
             columns. If False (the default) these columns will be returned
             as numeric columns containing only their integral categorical
             codes.
+
         """
         reader = self.to_table(X).to_pyarrow_batches()
         if categories:
@@ -395,6 +411,7 @@ def to_dask_dataframe(self, X: Any, categories: bool = False) -> dd.DataFrame:
             series. If False (the default) these columns will be returned
             as numeric columns containing only their integral categorical
             codes.
+
         """
         import dask.dataframe as dd
 
@@ -427,7 +444,9 @@ def to_dmatrix(self, X: Any) -> xgb.DMatrix:
         import xgboost as xgb
 
         df = self.to_pandas(X, categories=True)
-        return xgb.DMatrix(df[self.features], df[self.outcomes], enable_categorical=True)
+        return xgb.DMatrix(
+            df[self.features], df[self.outcomes], enable_categorical=True
+        )
 
     def to_dask_dmatrix(self, X: Any) -> xgb.dask.DaskDMatrix:
         """Transform X and return a ``xgboost.dask.DMatrix``
@@ -436,6 +455,7 @@ def to_dask_dmatrix(self, X: Any) -> xgb.dask.DaskDMatrix:
         ----------
         X : table-like
             The input data to transform.
+
         """
         import xgboost as xgb
         from dask.distributed import get_client

diff --git a/ibisml/select.py b/ibisml/select.py
@@ -2,10 +2,10 @@
 
 import re
 from collections.abc import Collection
-from typing import Callable, Union, ClassVar
+from typing import Callable, ClassVar, Union
 
-import ibis.expr.types as ir
 import ibis.expr.datatypes as dt
+import ibis.expr.types as ir
 
 from ibisml.core import Metadata
 
@@ -95,6 +95,7 @@ class and_(Selector):
     ----------
     selectors
         One or more selectors to combine.
+
     """
 
     __slots__ = ("selectors",)
@@ -117,6 +118,7 @@ class or_(Selector):
     ----------
     selectors
         One or more selectors to combine.
+
     """
 
     __slots__ = ("selectors",)
@@ -139,6 +141,7 @@ class not_(Selector):
     ----------
     selector
         The selector to wrap.
+
     """
 
     __slots__ = ("selector",)
@@ -169,6 +172,7 @@ class cols(Selector):
     ----------
     columns
         Names of the columns to select.
+
     """
 
     __slots__ = ("columns",)
@@ -187,6 +191,7 @@ class contains(Selector):
     ----------
     pattern
         The string to search for in column names.
+
     """
 
     __slots__ = ("pattern",)
@@ -205,6 +210,7 @@ class endswith(Selector):
     ----------
     suffix
         The column name suffix to match.
+
     """
 
     __slots__ = ("suffix",)
@@ -223,6 +229,7 @@ class startswith(Selector):
     ----------
     prefix
         The column name prefix to match.
+
     """
 
     __slots__ = ("prefix",)
@@ -241,6 +248,7 @@ class matches(Selector):
     ----------
     pattern
         The pattern to search for in column names.
+
     """
 
     __slots__ = ("pattern",)
@@ -259,6 +267,7 @@ class has_type(Selector):
     ----------
     dtype
         The dtype to match. May be a dtype instance, string, or dtype class.
+
     """
 
     __slots__ = ("dtype",)
@@ -285,7 +294,8 @@ class _TypeSelector(Selector):
 
     def matches(self, col: ir.Column, metadata: Metadata) -> bool:
         return metadata.get_categories(col.get_name()) is None and isinstance(
-            col.type(), self._type
+            col.type(),
+            self._type,
         )
 
 
@@ -373,6 +383,7 @@ class where(Selector):
     predicate
         A predicate function from ``Column`` to ``bool``. Only columns where
         ``predicate`` returns ``True`` will be selected.
+
     """
 
     __slots__ = ("predicate",)

diff --git a/ibisml/steps/__init__.py b/ibisml/steps/__init__.py
@@ -1,9 +1,8 @@
-from ibisml.steps.common import Cast, Drop, MutateAt, Mutate
+from ibisml.steps.common import Cast, Drop, Mutate, MutateAt
+from ibisml.steps.encode import CategoricalEncode, OneHotEncode
 from ibisml.steps.impute import FillNA, ImputeMean, ImputeMedian, ImputeMode
 from ibisml.steps.standardize import ScaleMinMax, ScaleStandard
-from ibisml.steps.encode import OneHotEncode, CategoricalEncode
-from ibisml.steps.temporal import ExpandDateTime, ExpandDate, ExpandTime
-
+from ibisml.steps.temporal import ExpandDate, ExpandDateTime, ExpandTime
 
 __all__ = (
     "Cast",