Skip to content

Commit

Permalink
Add verbose argument to fklearn validator (#155)
Browse files Browse the repository at this point in the history
* Add verbose argument to fklearn validator

* Fix lint

* Update CHANGELOG

* Fix lint

* Fix tests

* Bump version
  • Loading branch information
bpassanezi authored Nov 20, 2020
1 parent 5cc182f commit 0ee6ab9
Show file tree
Hide file tree
Showing 4 changed files with 32 additions and 13 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# Changelog

## [1.22.0] - 2020-11-18
- **Enhancement**
- Add verbose method to `validator` and `parallel_validator`

## [1.21.0] - 2020-10-02
- **Enhancement**
- Now transformers can create a new column instead of replace the input
Expand Down
2 changes: 1 addition & 1 deletion src/fklearn/resources/VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1.21.0
1.22.0
31 changes: 23 additions & 8 deletions src/fklearn/validation/validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

from fklearn.types import EvalFnType, LearnerFnType, LogType
from fklearn.types import SplitterFnType, ValidatorReturnType, PerturbFnType
from tqdm import tqdm


def validator_iteration(data: pd.DataFrame,
Expand All @@ -20,7 +21,8 @@ def validator_iteration(data: pd.DataFrame,
fold_num: int,
train_fn: LearnerFnType,
eval_fn: EvalFnType,
predict_oof: bool = False) -> LogType:
predict_oof: bool = False,
verbose: bool = False) -> LogType:
"""
Perform an iteration of train test split, training and evaluation.
Expand Down Expand Up @@ -65,7 +67,10 @@ def validator_iteration(data: pd.DataFrame,

eval_results = []
oof_predictions = []
for test_index in test_indexes:

if verbose:
print(f"Running validation for {fold_num} fold.")
for test_index in (tqdm(test_indexes) if verbose else test_indexes):
test_predictions = predict_fn(data.iloc[test_index])
eval_results.append(eval_fn(test_predictions))
if predict_oof:
Expand All @@ -85,7 +90,8 @@ def validator(train_data: pd.DataFrame,
eval_fn: EvalFnType,
perturb_fn_train: PerturbFnType = identity,
perturb_fn_test: PerturbFnType = identity,
predict_oof: bool = False) -> ValidatorReturnType:
predict_oof: bool = False,
verbose: bool = False) -> ValidatorReturnType:
"""
Splits the training data into folds given by the split function and
performs a train-evaluation sequence on each fold by calling
Expand Down Expand Up @@ -122,6 +128,9 @@ def validator(train_data: pd.DataFrame,
predict_oof : bool
Whether to return out of fold predictions on the logs
verbose: bool
Whether to show more information about the cross validation or not
Returns
----------
A list of log-like dictionary evaluations.
Expand All @@ -134,7 +143,8 @@ def validator(train_data: pd.DataFrame,

def fold_iter(fold: Tuple[int, Tuple[pd.Index, pd.Index]]) -> LogType:
(fold_num, (train_index, test_indexes)) = fold
return validator_iteration(train_data, train_index, test_indexes, fold_num, train_fn, eval_fn, predict_oof)
return validator_iteration(train_data, train_index, test_indexes, fold_num,
train_fn, eval_fn, predict_oof, verbose)

zipped_logs = pipe(folds,
enumerate,
Expand Down Expand Up @@ -168,11 +178,12 @@ def parallel_validator_iteration(train_data: pd.DataFrame,
fold: Tuple[int, Tuple[pd.Index, pd.Index]],
train_fn: LearnerFnType,
eval_fn: EvalFnType,
predict_oof: bool) -> LogType:
predict_oof: bool,
verbose: bool = False) -> LogType:
(fold_num, (train_index, test_indexes)) = fold
train_fn = cloudpickle.loads(train_fn)
eval_fn = cloudpickle.loads(eval_fn)
return validator_iteration(train_data, train_index, test_indexes, fold_num, train_fn, eval_fn, predict_oof)
return validator_iteration(train_data, train_index, test_indexes, fold_num, train_fn, eval_fn, predict_oof, verbose)


@curry
Expand All @@ -181,7 +192,8 @@ def parallel_validator(train_data: pd.DataFrame,
train_fn: LearnerFnType,
eval_fn: EvalFnType,
n_jobs: int = 1,
predict_oof: bool = False) -> ValidatorReturnType:
predict_oof: bool = False,
verbose: bool = False) -> ValidatorReturnType:
"""
Splits the training data into folds given by the split function and
performs a train-evaluation sequence on each fold. Tries to run each
Expand Down Expand Up @@ -213,6 +225,9 @@ def parallel_validator(train_data: pd.DataFrame,
predict_oof : bool
Whether to return out of fold predictions on the logs
verbose: bool
Whether to show more information about the cross validation or not
Returns
----------
A list log-like dictionary evaluations.
Expand All @@ -223,7 +238,7 @@ def parallel_validator(train_data: pd.DataFrame,
dumped_eval_fn = cloudpickle.dumps(eval_fn)

result = Parallel(n_jobs=n_jobs, backend="threading")(
delayed(parallel_validator_iteration)(train_data, x, dumped_train_fn, dumped_eval_fn, predict_oof)
delayed(parallel_validator_iteration)(train_data, x, dumped_train_fn, dumped_eval_fn, predict_oof, verbose)
for x in enumerate(folds))
gc.collect()

Expand Down
8 changes: 4 additions & 4 deletions tests/training/test_transformation.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,17 +56,17 @@ def test_capper():

expected2 = pd.DataFrame({"feat1": [7, 9], "feat2": [75, None]})

pred_fn1, data1, log = capper(input_df, ["feat1", "feat2"], {"feat1": 9.0})
pred_fn1, data1, log = capper(input_df, ["feat1", "feat2"], {"feat1": 9})
pred_fn2, data2, log = capper(
input_df, ["feat1", "feat2"], {"feat1": 9.0}, suffix="_suffix"
input_df, ["feat1", "feat2"], {"feat1": 9}, suffix="_suffix"
)
pred_fn3, data3, log = capper(
input_df, ["feat1", "feat2"], {"feat1": 9.0}, prefix="prefix_"
input_df, ["feat1", "feat2"], {"feat1": 9}, prefix="prefix_"
)
pred_fn4, data4, log = capper(
input_df,
["feat1", "feat2"],
{"feat1": 9.0},
{"feat1": 9},
columns_mapping={"feat1": "feat1_raw", "feat2": "feat2_raw"},
)

Expand Down

0 comments on commit 0ee6ab9

Please sign in to comment.