Skip to content

Commit

Permalink
style(ruff): select additional rules such as isort
Browse files Browse the repository at this point in the history
  • Loading branch information
deepyaman committed Mar 12, 2024
1 parent d726f70 commit 7f54e85
Show file tree
Hide file tree
Showing 14 changed files with 165 additions and 79 deletions.
3 changes: 1 addition & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
repos:
- repo: https://github.com/astral-sh/ruff-pre-commit
# Ruff version.
rev: v0.3.1
rev: v0.3.2
hooks:
- id: ruff
args: [--fix]
Expand Down
21 changes: 11 additions & 10 deletions ibisml/__init__.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,25 @@
from ._version import __version__
from ibisml.core import Recipe, Step
from ibisml.select import (
selector,
everything,
categorical,
cols,
contains,
date,
endswith,
startswith,
matches,
everything,
floating,
has_type,
numeric,
integer,
matches,
nominal,
categorical,
numeric,
selector,
startswith,
string,
integer,
floating,
temporal,
date,
time,
timestamp,
where,
)
from ibisml.steps import * # noqa: F403

from ._version import __version__
56 changes: 38 additions & 18 deletions ibisml/core.py
Original file line number Diff line number Diff line change
@@ -1,27 +1,29 @@
from __future__ import annotations

import copy
from collections.abc import Sequence, Iterable
from typing import Any, Callable, Literal, cast, TYPE_CHECKING
from collections.abc import Iterable, Sequence
from functools import cache
from typing import TYPE_CHECKING, Any, Callable, Literal, cast

import numpy as np
import pyarrow as pa
import pandas as pd
import ibis
import ibis.expr.types as ir
import numpy as np
import pandas as pd
import pyarrow as pa

if TYPE_CHECKING:
import polars as pl
import dask.dataframe as dd
import polars as pl
import xgboost as xgb


def _as_table(X: Any):
if isinstance(X, ir.Table):
return X
elif isinstance(X, np.ndarray):
return ibis.memtable(pd.DataFrame(X, columns=[f"x{i}" for i in range(X.shape[-1])]))
return ibis.memtable(
pd.DataFrame(X, columns=[f"x{i}" for i in range(X.shape[-1])])
)
else:
return ibis.memtable(X)

Expand Down Expand Up @@ -74,10 +76,7 @@ def _get_categorize_chunk() -> Callable[[str, list[str], Any], pd.DataFrame]:
dask cluster.
"""

def categorize(
df: pd.DataFrame,
categories: dict[str, list[Any]],
) -> pd.DataFrame:
def categorize(df: pd.DataFrame, categories: dict[str, list[Any]]) -> pd.DataFrame:
import pandas as pd

new = {}
Expand Down Expand Up @@ -130,7 +129,9 @@ def set_params(self, **kwargs):
self.steps = kwargs.get("steps")

def set_output(
self, *, transform: Literal["default", "pandas", "pyarrow", "polars", None] = None
self,
*,
transform: Literal["default", "pandas", "pyarrow", "polars", None] = None,
) -> Recipe:
"""Set output type returned by `transform`.
Expand All @@ -146,14 +147,17 @@ def set_output(
- `"polars"`: Polars dataframe
- `"pyarrow"`: Pyarrow table
- `None`: Transform configuration is unchanged
"""
if transform is None:
return self

formats = ("default", "pandas", "polars", "pyarrow")
formats = "default", "pandas", "polars", "pyarrow"

if transform not in formats:
raise ValueError(f"`transform` must be one of {formats!r}, got {transform}")
raise ValueError(
f"`transform` must be one of {formats!r}, got {transform!r}"
)

self._output_format = transform
return self
Expand Down Expand Up @@ -183,6 +187,7 @@ def fit(self, X, y=None) -> Recipe:
-------
self
Returns the same instance.
"""
table = _as_table(X)
metadata = Metadata()
Expand All @@ -204,6 +209,7 @@ def transform(self, X):
-------
Xt
Transformed data.
"""
if self._output_format == "pandas":
return self.to_pandas(X)
Expand All @@ -229,6 +235,7 @@ def fit_transform(self, X, y=None):
-------
Xt
Transformed training data.
"""
return self.fit(X, y).transform(X)

Expand Down Expand Up @@ -265,7 +272,9 @@ def _categorize_dask_dataframe(self, ddf: dd.DataFrame) -> dd.DataFrame:

categorize = _get_categorize_chunk()

categories = {col: cats.values for col, cats in self.metadata_.categories.items()}
categories = {
col: cats.values for col, cats in self.metadata_.categories.items()
}
return ddf.map_partitions(categorize, categories)

def _categorize_pyarrow_batches(
Expand Down Expand Up @@ -297,8 +306,8 @@ def to_table(self, X: ir.Table) -> ir.Table:
----------
X : table-like
The input data to transform.
"""
"""
table = _as_table(X)
for step in self.steps:
table = step.transform_table(table)
Expand All @@ -316,6 +325,7 @@ def to_pandas(self, X: Any, categories: bool = False) -> pd.DataFrame:
series. If False (the default) these columns will be returned
as numeric columns containing only their integral categorical
codes.
"""
df = self.to_table(X).to_pandas()
if categories:
Expand All @@ -329,6 +339,7 @@ def to_numpy(self, X: Any) -> np.ndarray:
----------
X : table-like
The input data to transform.
"""
table = self.to_table(X)
if not all(t.is_numeric() for t in table.schema().types):
Expand All @@ -344,6 +355,7 @@ def to_polars(self, X: Any) -> pl.DataFrame:
----------
X : table-like
The input data to transform.
"""
return self.to_table(X).to_polars()

Expand All @@ -359,13 +371,16 @@ def to_pyarrow(self, X: Any, categories: bool = False) -> pa.Table:
columns. If False (the default) these columns will be returned
as numeric columns containing only their integral categorical
codes.
"""
table = self.to_table(X).to_pyarrow()
if categories:
table = self._categorize_pyarrow(table)
return table

def to_pyarrow_batches(self, X: Any, categories: bool = False) -> pa.RecordBatchReader:
def to_pyarrow_batches(
self, X: Any, categories: bool = False
) -> pa.RecordBatchReader:
"""Transform X and return a ``pyarrow.RecordBatchReader``.
Parameters
Expand All @@ -377,6 +392,7 @@ def to_pyarrow_batches(self, X: Any, categories: bool = False) -> pa.RecordBatch
columns. If False (the default) these columns will be returned
as numeric columns containing only their integral categorical
codes.
"""
reader = self.to_table(X).to_pyarrow_batches()
if categories:
Expand All @@ -395,6 +411,7 @@ def to_dask_dataframe(self, X: Any, categories: bool = False) -> dd.DataFrame:
series. If False (the default) these columns will be returned
as numeric columns containing only their integral categorical
codes.
"""
import dask.dataframe as dd

Expand Down Expand Up @@ -427,7 +444,9 @@ def to_dmatrix(self, X: Any) -> xgb.DMatrix:
import xgboost as xgb

df = self.to_pandas(X, categories=True)
return xgb.DMatrix(df[self.features], df[self.outcomes], enable_categorical=True)
return xgb.DMatrix(
df[self.features], df[self.outcomes], enable_categorical=True
)

def to_dask_dmatrix(self, X: Any) -> xgb.dask.DaskDMatrix:
"""Transform X and return a ``xgboost.dask.DMatrix``
Expand All @@ -436,6 +455,7 @@ def to_dask_dmatrix(self, X: Any) -> xgb.dask.DaskDMatrix:
----------
X : table-like
The input data to transform.
"""
import xgboost as xgb
from dask.distributed import get_client
Expand Down
17 changes: 14 additions & 3 deletions ibisml/select.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@

import re
from collections.abc import Collection
from typing import Callable, Union, ClassVar
from typing import Callable, ClassVar, Union

import ibis.expr.types as ir
import ibis.expr.datatypes as dt
import ibis.expr.types as ir

from ibisml.core import Metadata

Expand Down Expand Up @@ -95,6 +95,7 @@ class and_(Selector):
----------
selectors
One or more selectors to combine.
"""

__slots__ = ("selectors",)
Expand All @@ -117,6 +118,7 @@ class or_(Selector):
----------
selectors
One or more selectors to combine.
"""

__slots__ = ("selectors",)
Expand All @@ -139,6 +141,7 @@ class not_(Selector):
----------
selector
The selector to wrap.
"""

__slots__ = ("selector",)
Expand Down Expand Up @@ -169,6 +172,7 @@ class cols(Selector):
----------
columns
Names of the columns to select.
"""

__slots__ = ("columns",)
Expand All @@ -187,6 +191,7 @@ class contains(Selector):
----------
pattern
The string to search for in column names.
"""

__slots__ = ("pattern",)
Expand All @@ -205,6 +210,7 @@ class endswith(Selector):
----------
suffix
The column name suffix to match.
"""

__slots__ = ("suffix",)
Expand All @@ -223,6 +229,7 @@ class startswith(Selector):
----------
prefix
The column name prefix to match.
"""

__slots__ = ("prefix",)
Expand All @@ -241,6 +248,7 @@ class matches(Selector):
----------
pattern
The pattern to search for in column names.
"""

__slots__ = ("pattern",)
Expand All @@ -259,6 +267,7 @@ class has_type(Selector):
----------
dtype
The dtype to match. May be a dtype instance, string, or dtype class.
"""

__slots__ = ("dtype",)
Expand All @@ -285,7 +294,8 @@ class _TypeSelector(Selector):

def matches(self, col: ir.Column, metadata: Metadata) -> bool:
return metadata.get_categories(col.get_name()) is None and isinstance(
col.type(), self._type
col.type(),
self._type,
)


Expand Down Expand Up @@ -373,6 +383,7 @@ class where(Selector):
predicate
A predicate function from ``Column`` to ``bool``. Only columns where
``predicate`` returns ``True`` will be selected.
"""

__slots__ = ("predicate",)
Expand Down
7 changes: 3 additions & 4 deletions ibisml/steps/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
from ibisml.steps.common import Cast, Drop, MutateAt, Mutate
from ibisml.steps.common import Cast, Drop, Mutate, MutateAt
from ibisml.steps.encode import CategoricalEncode, OneHotEncode
from ibisml.steps.impute import FillNA, ImputeMean, ImputeMedian, ImputeMode
from ibisml.steps.standardize import ScaleMinMax, ScaleStandard
from ibisml.steps.encode import OneHotEncode, CategoricalEncode
from ibisml.steps.temporal import ExpandDateTime, ExpandDate, ExpandTime

from ibisml.steps.temporal import ExpandDate, ExpandDateTime, ExpandTime

__all__ = (
"Cast",
Expand Down
Loading

0 comments on commit 7f54e85

Please sign in to comment.