diff --git a/docs/source/release_notes.rst b/docs/source/release_notes.rst index a9ab610f9e..cde7647c97 100644 --- a/docs/source/release_notes.rst +++ b/docs/source/release_notes.rst @@ -12,6 +12,7 @@ Release Notes * Fixes * Updated ``get_best_sampler_for_data`` to consider all non-numeric datatypes as categorical for SMOTE :pr:`2590` * Fixed inconsistent test results from `TargetDistributionDataCheck` :pr:`2608` + * Adopted vectorized pd.NA checking for Woodwork 0.5.1 support :pr:`2626` * Changes * Renamed SMOTE samplers to SMOTE oversampler :pr:`2595` * Changed ``partial_dependence`` and ``graph_partial_dependence`` to raise a ``PartialDependenceError`` instead of ``ValueError``. This is not a breaking change because ``PartialDependenceError`` is a subclass of ``ValueError`` :pr:`2604` diff --git a/evalml/tests/data_checks_tests/test_invalid_targets_data_check.py b/evalml/tests/data_checks_tests/test_invalid_targets_data_check.py index 6bfe6b4e65..ce5ebfb20c 100644 --- a/evalml/tests/data_checks_tests/test_invalid_targets_data_check.py +++ b/evalml/tests/data_checks_tests/test_invalid_targets_data_check.py @@ -54,13 +54,6 @@ def test_invalid_target_data_check_nan_error(): assert invalid_targets_check.validate(X, y=pd.Series([np.nan, np.nan, np.nan])) == { "warnings": [], "errors": [ - DataCheckError( - message="Target is unsupported Unknown type. Valid Woodwork " - "logical types include: integer, double, boolean", - data_check_name=invalid_targets_data_check_name, - message_code=DataCheckMessageCode.TARGET_UNSUPPORTED_TYPE, - details={"unsupported_type": "unknown"}, - ).to_dict(), DataCheckError( message="Target is either empty or fully null.", data_check_name=invalid_targets_data_check_name, @@ -794,13 +787,6 @@ def test_invalid_target_data_action_for_all_null(problem_type): expected = { "warnings": [], "errors": [ - DataCheckError( - message="Target is unsupported Unknown type. Valid Woodwork " - "logical types include: integer, double, boolean", - data_check_name=invalid_targets_data_check_name, - message_code=DataCheckMessageCode.TARGET_UNSUPPORTED_TYPE, - details={"unsupported_type": "unknown"}, - ).to_dict(), DataCheckError( message="Target is either empty or fully null.", data_check_name=invalid_targets_data_check_name, diff --git a/evalml/utils/woodwork_utils.py b/evalml/utils/woodwork_utils.py index 84b2e154dd..2dc37cc908 100644 --- a/evalml/utils/woodwork_utils.py +++ b/evalml/utils/woodwork_utils.py @@ -66,7 +66,7 @@ def infer_feature_types(data, feature_types=None): def convert_all_nan_unknown_to_double(data): def is_column_pd_na(data, col): - return all([isinstance(x, type(pd.NA)) for x in data[col]]) + return all(data[col].isna()) def is_column_unknown(data, col): return isinstance(data.ww.logical_types[col], Unknown) @@ -100,7 +100,7 @@ def is_column_unknown(data, col): return convert_all_nan_unknown_to_double(data) if isinstance(data, pd.Series): - if all([isinstance(x, type(pd.NA)) for x in data]): + if all(data.isna()): data = data.replace(pd.NA, np.nan) feature_types = "Double" return ww.init_series(data, logical_type=feature_types)