Skip to content

Commit

Permalink
Fix Spatial LC check (#158)
Browse files Browse the repository at this point in the history
* fix spatial lc check

* Trigger CircleCI

* Try to clean cache

* Add double quotes

* Cache hit in the previous commit

* Revert commits (PR #159 will fix the tests)

Co-authored-by: Guilherme Beltramini <guilherme.beltramini@nubank.com.br>
  • Loading branch information
robotenique and Guilherme Beltramini authored Jan 27, 2021
1 parent 43785bf commit cf0cd0f
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 3 deletions.
2 changes: 1 addition & 1 deletion src/fklearn/validation/splitters.py
Original file line number Diff line number Diff line change
Expand Up @@ -422,7 +422,7 @@ def spatial_learning_curve_splitter(train_data: pd.DataFrame,
random_state : int
A seed for the random number generator that shuffles the IDs.
"""
if np.min(train_percentages) < 0 or np.min(train_percentages) > 1:
if np.min(train_percentages) < 0 or np.max(train_percentages) > 1:
raise ValueError('Train percentages must be between 0 and 1')

if isinstance(training_limit, str):
Expand Down
26 changes: 24 additions & 2 deletions tests/validation/test_splitters.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from datetime import timedelta

import pandas as pd

import pytest
from fklearn.validation.splitters import \
k_fold_splitter, out_of_time_and_space_splitter, spatial_learning_curve_splitter, time_learning_curve_splitter, \
reverse_time_learning_curve_splitter, stability_curve_time_splitter, stability_curve_time_in_space_splitter, \
Expand Down Expand Up @@ -106,7 +106,7 @@ def test_time_and_space_learning_curve_splitter():
assert len(train_4[train_4.space.isin(test_4.space)]) == 0


def test_spatial_learning_curve_splitte():
def test_spatial_learning_curve_splitter():
result, logs = spatial_learning_curve_splitter(
sample_data,
train_percentages=[0.5, 1.0],
Expand All @@ -132,6 +132,28 @@ def test_spatial_learning_curve_splitte():
assert test_2["time"].min() - train_2["time"].max() >= timedelta(days=180)
assert len(train_2) > len(train_1)

# should raise an exception when percentage is off bounds
with pytest.raises(ValueError):
result, logs = spatial_learning_curve_splitter(
sample_data,
train_percentages=[0.5, 1.1],
space_column="space",
time_column="time",
training_limit="2015-09-09",
holdout_gap=timedelta(days=180),
random_state=0
)
with pytest.raises(ValueError):
result, logs = spatial_learning_curve_splitter(
sample_data,
train_percentages=[-0.1, 1.0],
space_column="space",
time_column="time",
training_limit="2015-09-09",
holdout_gap=timedelta(days=180),
random_state=0
)


def test_time_learning_curve_splitter():
result, logs = time_learning_curve_splitter(sample_data, '2015-05-05', time_column='time',
Expand Down

0 comments on commit cf0cd0f

Please sign in to comment.