From ecb6906f6b3883799b6f87ddc86680eea705e8bc Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Wed, 18 Nov 2020 08:21:26 +0100 Subject: [PATCH 1/2] Allow loose reinitialization from runs also --- openml/runs/functions.py | 7 +++++-- openml/setups/functions.py | 11 ++++++----- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/openml/runs/functions.py b/openml/runs/functions.py index 194e4b598..a3c0fbc03 100644 --- a/openml/runs/functions.py +++ b/openml/runs/functions.py @@ -321,7 +321,7 @@ def get_run_trace(run_id: int) -> OpenMLRunTrace: return run_trace -def initialize_model_from_run(run_id: int) -> Any: +def initialize_model_from_run(run_id: int, strict_version: bool = True) -> Any: """ Initialized a model based on a run_id (i.e., using the exact same parameter settings) @@ -331,12 +331,15 @@ def initialize_model_from_run(run_id: int) -> Any: run_id : int The Openml run_id + strict_version: bool (default=True) + See `flow_to_model` strict_version. + Returns ------- model """ run = get_run(run_id) - return initialize_model(run.setup_id) + return initialize_model(run.setup_id, strict_version) def initialize_model_from_trace( diff --git a/openml/setups/functions.py b/openml/setups/functions.py index b418a6106..e3540a35b 100644 --- a/openml/setups/functions.py +++ b/openml/setups/functions.py @@ -228,16 +228,17 @@ def __list_setups(api_call, output_format="object"): return setups -def initialize_model(setup_id: int) -> Any: - """ - Initialized a model based on a setup_id (i.e., using the exact - same parameter settings) +def initialize_model(setup_id: int, strict_version: bool = True) -> Any: + """ Initialized a model based on a setup_id (i.e., using the exact same parameter settings) Parameters ---------- setup_id : int The Openml setup_id + strict_version: bool (default=True) + See `flow_to_model` strict_version. + Returns ------- model @@ -256,7 +257,7 @@ def initialize_model(setup_id: int) -> Any: subflow = flow subflow.parameters[hyperparameter.parameter_name] = hyperparameter.value - model = flow.extension.flow_to_model(flow) + model = flow.extension.flow_to_model(flow, strict_version=strict_version) return model From 7d97fcde86766547d619e6cb74cee7495dbf721f Mon Sep 17 00:00:00 2001 From: neeratyoy Date: Wed, 9 Dec 2020 16:15:06 +0100 Subject: [PATCH 2/2] Initial unit test design --- tests/test_runs/test_run_functions.py | 42 +++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/tests/test_runs/test_run_functions.py b/tests/test_runs/test_run_functions.py index b155d6cd5..46f35c104 100644 --- a/tests/test_runs/test_run_functions.py +++ b/tests/test_runs/test_run_functions.py @@ -940,6 +940,48 @@ def test_initialize_model_from_run(self): self.assertEqual(flowS.components["Imputer"].parameters["strategy"], '"most_frequent"') self.assertEqual(flowS.components["VarianceThreshold"].parameters["threshold"], "0.05") + @unittest.skipIf( + LooseVersion(sklearn.__version__) < "0.20", + reason="sklearn flows and pipelines changed drastically post 0.20 version", + ) + def test_initialize_model_from_run_nonstrict(self): + # available runs on the test server, sorted based on the sklearn versions it uses + sklearn_runs = { + "sklearn==0.23.2": [ + 100, + 111, + 112, + 120, + 123, + 130, + 153, + 164, + 166, + 167, + 168, + 803, + 806, + 807, + 808, + 814, + 1373, + 1374, + 1375, + 1376, + 1377, + 1378, + ], + "sklearn==0.23.1": [386, 480, 512, 516, 520, 772, 1085, 1124, 1178, 1646], + "sklearn==0.20.2": [481], + "sklearn==0.22.2": [1108, 1612, 1642], + "sklearn==0.21.0": [1838], + } + current_version = "sklearn=={}".format(LooseVersion(sklearn.__version__).vstring) + sklearn_runs.pop(current_version) + version_choice = np.random.choice(list(sklearn_runs.keys())) + run_id_choice = np.random.choice(sklearn_runs[version_choice]) + _ = openml.runs.initialize_model_from_run(run_id=run_id_choice, strict_version=False) + @unittest.skipIf( LooseVersion(sklearn.__version__) < "0.20", reason="SimpleImputer doesn't handle mixed type DataFrame as input",