Skip to content

Commit

Permalink
update version
Browse files Browse the repository at this point in the history
  • Loading branch information
oaksharks committed Dec 15, 2023
1 parent ae55533 commit 587335d
Show file tree
Hide file tree
Showing 4 changed files with 16 additions and 8 deletions.
2 changes: 1 addition & 1 deletion deeptables/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '0.2.5'
__version__ = '0.2.6'
2 changes: 1 addition & 1 deletion deeptables/models/deeptable.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import pickle
import time

import dask
import numpy as np
import pandas as pd
from joblib import Parallel, delayed
Expand Down Expand Up @@ -415,6 +414,7 @@ def fit_cross_validation(self, X, y, X_eval=None, X_test=None, num_folds=5, stra
oof_proba = np.full((X_shape[0], 1), np.nan)

if is_dask_installed and DaskToolBox.exist_dask_object(X, y):
import dask
X = DaskToolBox.reset_index(DaskToolBox.to_dask_frame_or_series(X))
y = DaskToolBox.to_dask_type(y)
if DaskToolBox.is_dask_dataframe_or_series(y):
Expand Down
17 changes: 13 additions & 4 deletions deeptables/utils/dataset_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,12 @@
from distutils.version import LooseVersion
from functools import partial

import dask
import dask.dataframe as dd
import numpy as np
import tensorflow as tf
from tensorflow.keras.utils import to_categorical as tf_to_categorical

from deeptables.utils import consts, dt_logging

from hypernets.tabular import get_tool_box, is_dask_installed
logger = dt_logging.get_logger(__name__)

TFDG_DASK_CHUNK = 100
Expand Down Expand Up @@ -105,6 +103,7 @@ def __call__(self, X, y=None, *, batch_size, shuffle, drop_remainder):
return ds

def _to_ds20(self, X, y=None, *, batch_size, shuffle, drop_remainder):
import dask
ds_types = {}
ds_shapes = {}
meta = self._get_meta(X)
Expand All @@ -118,6 +117,7 @@ def _to_ds20(self, X, y=None, *, batch_size, shuffle, drop_remainder):
ds_types[k] = 'int32'

if y is not None:
import dask.dataframe as dd
if isinstance(y, dd.Series):
y = y.to_dask_array(lengths=True)
if self.task == consts.TASK_MULTICLASS:
Expand Down Expand Up @@ -149,6 +149,7 @@ def to_spec(name, dtype, idx):
sig = {k: to_spec(k, dtype, idx) for k, (dtype, idx) in meta.items()}

if y is not None:
import dask.dataframe as dd
if isinstance(y, dd.Series):
y = y.to_dask_array(lengths=True)
if self.task == consts.TASK_MULTICLASS:
Expand All @@ -167,6 +168,7 @@ def to_spec(name, dtype, idx):

@staticmethod
def _generate(meta, X, y, *, batch_size, shuffle, drop_remainder):
import dask
total_size = dask.compute(X.shape)[0][0]
chunk_size = min(total_size, batch_size * TFDG_DASK_CHUNK)
fn = partial(_TFDGForDask._compute_chunk, X, y, chunk_size)
Expand Down Expand Up @@ -205,6 +207,7 @@ def _generate(meta, X, y, *, batch_size, shuffle, drop_remainder):

@staticmethod
def _to_categorical(y, *, num_classes):
import dask
if len(y.shape) == 1:
y = y.reshape(dask.compute(y.shape[0])[0], 1)
fn = partial(tf_to_categorical, num_classes=num_classes, dtype='float32')
Expand All @@ -213,6 +216,7 @@ def _to_categorical(y, *, num_classes):

@staticmethod
def _compute_chunk(X, y, chunk_size, i):
import dask
try:
Xc = X[i:i + chunk_size]
yc = y[i:i + chunk_size] if y is not None else None
Expand All @@ -236,7 +240,12 @@ def _range(start, stop, step, shuffle):
def to_dataset(config, task, num_classes, X, y=None, *,
batch_size, shuffle, drop_remainder,
categorical_columns, continuous_columns, var_len_categorical_columns):
cls = _TFDGForDask if isinstance(X, dd.DataFrame) else _TFDGForPandas

if is_dask_installed:
import dask.dataframe as dd
cls = _TFDGForDask if isinstance(X, dd.DataFrame) else _TFDGForPandas
else:
cls = _TFDGForPandas
logger.info(f'create dataset generator with {cls.__name__}, '
f'batch_size={batch_size}, shuffle={shuffle}, drop_remainder={drop_remainder}')

Expand Down
3 changes: 1 addition & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ numpy>=1.16.5
scikit-learn>=0.22.1
lightgbm>=2.2.0
category_encoders>=2.1.0
hypernets>=0.2.5.1
hypernets>=0.3.0
h5py>=2.10.0
eli5
dask

0 comments on commit 587335d

Please sign in to comment.