Skip to content

Commit

Permalink
Disallow use of Pickle since numpy versions diverge
Browse files Browse the repository at this point in the history
  • Loading branch information
PGijsbers committed Nov 29, 2024
1 parent a5d192c commit 0b7fb24
Showing 1 changed file with 23 additions and 13 deletions.
36 changes: 23 additions & 13 deletions frameworks/AutoGluon/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

from amlb.utils import call_script_in_same_dir
from amlb.benchmark import TaskConfig
from amlb.data import Dataset, DatasetType
Expand All @@ -8,8 +7,8 @@
def setup(*args, **kwargs):
call_script_in_same_dir(__file__, "setup.sh", *args, **kwargs)

def run(dataset: Dataset, config: TaskConfig):

def run(dataset: Dataset, config: TaskConfig):
if dataset.type == DatasetType.timeseries:
return run_autogluon_timeseries(dataset, config)
else:
Expand All @@ -18,23 +17,33 @@ def run(dataset: Dataset, config: TaskConfig):

def run_autogluon_tabular(dataset: Dataset, config: TaskConfig):
from frameworks.shared.caller import run_in_venv

data = dict(
train=dict(path=dataset.train.data_path('parquet')),
test=dict(path=dataset.test.data_path('parquet')),
target=dict(
name=dataset.target.name,
classes=dataset.target.values
),
train=dict(path=dataset.train.data_path("parquet")),
test=dict(path=dataset.test.data_path("parquet")),
target=dict(name=dataset.target.name, classes=dataset.target.values),
problem_type=dataset.type.name, # AutoGluon problem_type is using same names as amlb.data.DatasetType
)
if config.measure_inference_time:
data["inference_subsample_files"] = dataset.inference_subsample_files(fmt="parquet")
data["inference_subsample_files"] = dataset.inference_subsample_files(
fmt="parquet"
)

options = {"serialization": {"numpy_allow_pickle": False}}

return run_in_venv(
__file__,
"exec.py",
input_data=data,
dataset=dataset,
config=config,
options=options,
)

return run_in_venv(__file__, "exec.py",
input_data=data, dataset=dataset, config=config)

def run_autogluon_timeseries(dataset: Dataset, config: TaskConfig):
from frameworks.shared.caller import run_in_venv

dataset = deepcopy(dataset)

data = dict(
Expand All @@ -50,5 +59,6 @@ def run_autogluon_timeseries(dataset: Dataset, config: TaskConfig):
repeated_item_id=dataset.repeated_item_id,
)

return run_in_venv(__file__, "exec_ts.py",
input_data=data, dataset=dataset, config=config)
return run_in_venv(
__file__, "exec_ts.py", input_data=data, dataset=dataset, config=config
)

0 comments on commit 0b7fb24

Please sign in to comment.