From cf0cd0fa97f59a1dc5ccd02a7f52e6f198ca855a Mon Sep 17 00:00:00 2001 From: Juliano Garcia Date: Wed, 27 Jan 2021 08:37:45 -0300 Subject: [PATCH] Fix Spatial LC check (#158) * fix spatial lc check * Trigger CircleCI * Try to clean cache * Add double quotes * Cache hit in the previous commit * Revert commits (PR #159 will fix the tests) Co-authored-by: Guilherme Beltramini --- src/fklearn/validation/splitters.py | 2 +- tests/validation/test_splitters.py | 26 ++++++++++++++++++++++++-- 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/src/fklearn/validation/splitters.py b/src/fklearn/validation/splitters.py index 92832869..597f0173 100644 --- a/src/fklearn/validation/splitters.py +++ b/src/fklearn/validation/splitters.py @@ -422,7 +422,7 @@ def spatial_learning_curve_splitter(train_data: pd.DataFrame, random_state : int A seed for the random number generator that shuffles the IDs. """ - if np.min(train_percentages) < 0 or np.min(train_percentages) > 1: + if np.min(train_percentages) < 0 or np.max(train_percentages) > 1: raise ValueError('Train percentages must be between 0 and 1') if isinstance(training_limit, str): diff --git a/tests/validation/test_splitters.py b/tests/validation/test_splitters.py index a36b39c5..69b3a5c3 100644 --- a/tests/validation/test_splitters.py +++ b/tests/validation/test_splitters.py @@ -1,7 +1,7 @@ from datetime import timedelta import pandas as pd - +import pytest from fklearn.validation.splitters import \ k_fold_splitter, out_of_time_and_space_splitter, spatial_learning_curve_splitter, time_learning_curve_splitter, \ reverse_time_learning_curve_splitter, stability_curve_time_splitter, stability_curve_time_in_space_splitter, \ @@ -106,7 +106,7 @@ def test_time_and_space_learning_curve_splitter(): assert len(train_4[train_4.space.isin(test_4.space)]) == 0 -def test_spatial_learning_curve_splitte(): +def test_spatial_learning_curve_splitter(): result, logs = spatial_learning_curve_splitter( sample_data, train_percentages=[0.5, 1.0], @@ -132,6 +132,28 @@ def test_spatial_learning_curve_splitte(): assert test_2["time"].min() - train_2["time"].max() >= timedelta(days=180) assert len(train_2) > len(train_1) + # should raise an exception when percentage is off bounds + with pytest.raises(ValueError): + result, logs = spatial_learning_curve_splitter( + sample_data, + train_percentages=[0.5, 1.1], + space_column="space", + time_column="time", + training_limit="2015-09-09", + holdout_gap=timedelta(days=180), + random_state=0 + ) + with pytest.raises(ValueError): + result, logs = spatial_learning_curve_splitter( + sample_data, + train_percentages=[-0.1, 1.0], + space_column="space", + time_column="time", + training_limit="2015-09-09", + holdout_gap=timedelta(days=180), + random_state=0 + ) + def test_time_learning_curve_splitter(): result, logs = time_learning_curve_splitter(sample_data, '2015-05-05', time_column='time',