From 90e1c76a02104e691dc95690a47913c6daf30afe Mon Sep 17 00:00:00 2001 From: lixfz Date: Thu, 4 May 2023 14:32:56 +0800 Subject: [PATCH 1/2] Fix trial pickling --- hypernets/core/trial.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/hypernets/core/trial.py b/hypernets/core/trial.py index 0a09a55..d6dc09c 100644 --- a/hypernets/core/trial.py +++ b/hypernets/core/trial.py @@ -15,6 +15,18 @@ from ..core.searcher import OptimizeDirection +def _is_bigdata(v): + big_data_types = (pd.Series, pd.DataFrame, np.ndarray) + if isinstance(v, big_data_types): + return True + + type_name = type(v).__name__.lower() + if any(type_name.find(s) for s in ('array', 'dataframe', 'series')): + return True + + return False + + class Trial(): def __init__(self, space_sample, trial_no, reward, elapsed, model_file=None, succeeded=True): self.space_sample = space_sample @@ -81,10 +93,9 @@ def __getstate__(self): # state = {k: v for k, v in state.items() if k != 'memo'} memo = state.get('memo', None) - big_data_types = (pd.Series, pd.DataFrame, np.ndarray) - big_data_exists = isinstance(memo, dict) and any(isinstance(v, big_data_types) for v in memo.values()) + big_data_exists = isinstance(memo, dict) and any(_is_bigdata(v) for v in memo.values()) if big_data_exists: - compacted_memo = {k: v for k, v in memo.items() if not isinstance(v, big_data_types)} + compacted_memo = {k: v for k, v in memo.items() if not _is_bigdata(v)} state = state.copy() state['memo'] = compacted_memo From 729104fe7d13744d9c5be4c0032e41ba0dc15303 Mon Sep 17 00:00:00 2001 From: lixfz Date: Thu, 4 May 2023 14:33:08 +0800 Subject: [PATCH 2/2] Fix cache --- hypernets/tabular/cache.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hypernets/tabular/cache.py b/hypernets/tabular/cache.py index 437d8b2..fb0ec3f 100644 --- a/hypernets/tabular/cache.py +++ b/hypernets/tabular/cache.py @@ -241,7 +241,7 @@ def _store_cache(toolbox, cache_path, data, meta): elif isinstance(data, (list, tuple)): items = [f'_{i}' for i in range(len(data))] for d, i in zip(data, items): - _store_cache(f'{cache_path}{i}', d, meta) + _store_cache(toolbox, f'{cache_path}{i}', d, meta) meta.update({'kind': _KIND_LIST, 'items': items}) else: pq = toolbox.parquet()