diff --git a/CHANGELOG.md b/CHANGELOG.md index 1b05f8e6..9a6e117d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # Changelog +## [1.22.0] - 2020-11-18 +- **Enhancement** + - Add verbose method to `validator` and `parallel_validator` + ## [1.21.0] - 2020-10-02 - **Enhancement** - Now transformers can create a new column instead of replace the input diff --git a/src/fklearn/resources/VERSION b/src/fklearn/resources/VERSION index 3500250a..57807d6d 100644 --- a/src/fklearn/resources/VERSION +++ b/src/fklearn/resources/VERSION @@ -1 +1 @@ -1.21.0 +1.22.0 diff --git a/src/fklearn/validation/validator.py b/src/fklearn/validation/validator.py index 1fc1ec9f..98ef37c1 100644 --- a/src/fklearn/validation/validator.py +++ b/src/fklearn/validation/validator.py @@ -12,6 +12,7 @@ from fklearn.types import EvalFnType, LearnerFnType, LogType from fklearn.types import SplitterFnType, ValidatorReturnType, PerturbFnType +from tqdm import tqdm def validator_iteration(data: pd.DataFrame, @@ -20,7 +21,8 @@ def validator_iteration(data: pd.DataFrame, fold_num: int, train_fn: LearnerFnType, eval_fn: EvalFnType, - predict_oof: bool = False) -> LogType: + predict_oof: bool = False, + verbose: bool = False) -> LogType: """ Perform an iteration of train test split, training and evaluation. @@ -65,7 +67,10 @@ def validator_iteration(data: pd.DataFrame, eval_results = [] oof_predictions = [] - for test_index in test_indexes: + + if verbose: + print(f"Running validation for {fold_num} fold.") + for test_index in (tqdm(test_indexes) if verbose else test_indexes): test_predictions = predict_fn(data.iloc[test_index]) eval_results.append(eval_fn(test_predictions)) if predict_oof: @@ -85,7 +90,8 @@ def validator(train_data: pd.DataFrame, eval_fn: EvalFnType, perturb_fn_train: PerturbFnType = identity, perturb_fn_test: PerturbFnType = identity, - predict_oof: bool = False) -> ValidatorReturnType: + predict_oof: bool = False, + verbose: bool = False) -> ValidatorReturnType: """ Splits the training data into folds given by the split function and performs a train-evaluation sequence on each fold by calling @@ -122,6 +128,9 @@ def validator(train_data: pd.DataFrame, predict_oof : bool Whether to return out of fold predictions on the logs + verbose: bool + Whether to show more information about the cross validation or not + Returns ---------- A list of log-like dictionary evaluations. @@ -134,7 +143,8 @@ def validator(train_data: pd.DataFrame, def fold_iter(fold: Tuple[int, Tuple[pd.Index, pd.Index]]) -> LogType: (fold_num, (train_index, test_indexes)) = fold - return validator_iteration(train_data, train_index, test_indexes, fold_num, train_fn, eval_fn, predict_oof) + return validator_iteration(train_data, train_index, test_indexes, fold_num, + train_fn, eval_fn, predict_oof, verbose) zipped_logs = pipe(folds, enumerate, @@ -168,11 +178,12 @@ def parallel_validator_iteration(train_data: pd.DataFrame, fold: Tuple[int, Tuple[pd.Index, pd.Index]], train_fn: LearnerFnType, eval_fn: EvalFnType, - predict_oof: bool) -> LogType: + predict_oof: bool, + verbose: bool = False) -> LogType: (fold_num, (train_index, test_indexes)) = fold train_fn = cloudpickle.loads(train_fn) eval_fn = cloudpickle.loads(eval_fn) - return validator_iteration(train_data, train_index, test_indexes, fold_num, train_fn, eval_fn, predict_oof) + return validator_iteration(train_data, train_index, test_indexes, fold_num, train_fn, eval_fn, predict_oof, verbose) @curry @@ -181,7 +192,8 @@ def parallel_validator(train_data: pd.DataFrame, train_fn: LearnerFnType, eval_fn: EvalFnType, n_jobs: int = 1, - predict_oof: bool = False) -> ValidatorReturnType: + predict_oof: bool = False, + verbose: bool = False) -> ValidatorReturnType: """ Splits the training data into folds given by the split function and performs a train-evaluation sequence on each fold. Tries to run each @@ -213,6 +225,9 @@ def parallel_validator(train_data: pd.DataFrame, predict_oof : bool Whether to return out of fold predictions on the logs + verbose: bool + Whether to show more information about the cross validation or not + Returns ---------- A list log-like dictionary evaluations. @@ -223,7 +238,7 @@ def parallel_validator(train_data: pd.DataFrame, dumped_eval_fn = cloudpickle.dumps(eval_fn) result = Parallel(n_jobs=n_jobs, backend="threading")( - delayed(parallel_validator_iteration)(train_data, x, dumped_train_fn, dumped_eval_fn, predict_oof) + delayed(parallel_validator_iteration)(train_data, x, dumped_train_fn, dumped_eval_fn, predict_oof, verbose) for x in enumerate(folds)) gc.collect() diff --git a/tests/training/test_transformation.py b/tests/training/test_transformation.py index 46c8d511..9578b401 100644 --- a/tests/training/test_transformation.py +++ b/tests/training/test_transformation.py @@ -56,17 +56,17 @@ def test_capper(): expected2 = pd.DataFrame({"feat1": [7, 9], "feat2": [75, None]}) - pred_fn1, data1, log = capper(input_df, ["feat1", "feat2"], {"feat1": 9.0}) + pred_fn1, data1, log = capper(input_df, ["feat1", "feat2"], {"feat1": 9}) pred_fn2, data2, log = capper( - input_df, ["feat1", "feat2"], {"feat1": 9.0}, suffix="_suffix" + input_df, ["feat1", "feat2"], {"feat1": 9}, suffix="_suffix" ) pred_fn3, data3, log = capper( - input_df, ["feat1", "feat2"], {"feat1": 9.0}, prefix="prefix_" + input_df, ["feat1", "feat2"], {"feat1": 9}, prefix="prefix_" ) pred_fn4, data4, log = capper( input_df, ["feat1", "feat2"], - {"feat1": 9.0}, + {"feat1": 9}, columns_mapping={"feat1": "feat1_raw", "feat2": "feat2_raw"}, )