diff --git a/onedal/tests/utils/_dataframes_support.py b/onedal/tests/utils/_dataframes_support.py new file mode 100644 index 0000000000..5e3fdbdecf --- /dev/null +++ b/onedal/tests/utils/_dataframes_support.py @@ -0,0 +1,83 @@ +# =============================================================================== +# Copyright 2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# =============================================================================== + +import pytest + +try: + import dpctl + import dpctl.tensor as dpt + + dpctl_available = True +except ImportError: + dpctl_available = False + +try: + import dpnp + + dpnp_available = True +except ImportError: + dpnp_available = False + +import numpy as np + +from onedal.tests.utils._device_selection import get_queues + + +def get_dataframes_and_queues( + dataframe_filter_="numpy,dpnp,dpctl", device_filter_="cpu,gpu" +): + dataframes_and_queues = [ + pytest.param("numpy", None, id="numpy"), + ] + + def get_df_and_q(dataframe: str): + df_and_q = [] + for queue in get_queues(device_filter_): + id = "{}-{}".format(dataframe, queue.id) + df_and_q.append(pytest.param(dataframe, queue.values[0], id=id)) + return df_and_q + + if dpctl_available and "dpctl" in dataframe_filter_: + dataframes_and_queues.extend(get_df_and_q("dpctl")) + if dpnp_available and "dpnp" in dataframe_filter_: + dataframes_and_queues.extend(get_df_and_q("dpnp")) + return dataframes_and_queues + + +def _as_numpy(obj, *args, **kwargs): + if dpnp_available and isinstance(obj, dpnp.ndarray): + return obj.asnumpy(*args, **kwargs) + if dpctl_available and isinstance(obj, dpt.usm_ndarray): + return dpt.to_numpy(obj, *args, **kwargs) + return np.asarray(obj, *args, **kwargs) + + +def _convert_to_dataframe(obj, sycl_queue=None, target_df=None, *args, **kwargs): + if target_df is None: + return obj + # Numpy ndarray. + # `sycl_queue` arg is ignored. + if target_df == "numpy": + return np.asarray(obj, *args, **kwargs) + # DPNP ndarray. + if target_df == "dpnp": + return dpnp.asarray( + obj, usm_type="device", sycl_queue=sycl_queue, *args, **kwargs + ) + # DPCtl tensor. + if target_df == "dpctl": + return dpt.asarray(obj, usm_type="device", sycl_queue=sycl_queue, *args, **kwargs) + raise RuntimeError("Unsupported dataframe conversion") diff --git a/sklearnex/model_selection/tests/test_model_selection.py b/sklearnex/model_selection/tests/test_model_selection.py index 78af6b12e0..276965899c 100755 --- a/sklearnex/model_selection/tests/test_model_selection.py +++ b/sklearnex/model_selection/tests/test_model_selection.py @@ -19,6 +19,8 @@ from numpy.testing import assert_allclose +# TODO: +# add pytest params for checking different dataframe inputs/outputs. def test_sklearnex_import_train_test_split(): from sklearnex.model_selection import train_test_split diff --git a/sklearnex/neighbors/tests/test_neighbors.py b/sklearnex/neighbors/tests/test_neighbors.py index 735f40a4b8..9be17cc6ad 100755 --- a/sklearnex/neighbors/tests/test_neighbors.py +++ b/sklearnex/neighbors/tests/test_neighbors.py @@ -16,45 +16,69 @@ # =============================================================================== import numpy as np +import pytest from numpy.testing import assert_allclose +from onedal.tests.utils._dataframes_support import ( + _as_numpy, + _convert_to_dataframe, + get_dataframes_and_queues, +) -def test_sklearnex_import_knn_classifier(): + +@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues()) +def test_sklearnex_import_knn_classifier(dataframe, queue): from sklearnex.neighbors import KNeighborsClassifier - X = [[0], [1], [2], [3]] - y = [0, 0, 1, 1] + X = _convert_to_dataframe([[0], [1], [2], [3]], sycl_queue=queue, target_df=dataframe) + y = _convert_to_dataframe([0, 0, 1, 1], sycl_queue=queue, target_df=dataframe) neigh = KNeighborsClassifier(n_neighbors=3).fit(X, y) + y_test = _convert_to_dataframe([[1.1]], sycl_queue=queue, target_df=dataframe) + pred = _as_numpy(neigh.predict(y_test)) assert "sklearnex" in neigh.__module__ - assert_allclose(neigh.predict([[1.1]]), [0]) + assert_allclose(pred, [0]) -def test_sklearnex_import_knn_regression(): +@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues()) +def test_sklearnex_import_knn_regression(dataframe, queue): from sklearnex.neighbors import KNeighborsRegressor - X = [[0], [1], [2], [3]] - y = [0, 0, 1, 1] + X = _convert_to_dataframe([[0], [1], [2], [3]], sycl_queue=queue, target_df=dataframe) + y = _convert_to_dataframe([0, 0, 1, 1], sycl_queue=queue, target_df=dataframe) neigh = KNeighborsRegressor(n_neighbors=2).fit(X, y) + y_test = _convert_to_dataframe([[1.5]], sycl_queue=queue, target_df=dataframe) + pred = _as_numpy(neigh.predict(y_test)) assert "sklearnex" in neigh.__module__ - assert_allclose(neigh.predict([[1.5]]), [0.5]) + assert_allclose(pred, [0.5]) -def test_sklearnex_import_nn(): +# TODO: +# investigate failure for `dpnp.ndarrays` and `dpctl.tensors`. +@pytest.mark.parametrize( + "dataframe,queue", get_dataframes_and_queues(dataframe_filter_="numpy") +) +def test_sklearnex_import_nn(dataframe, queue): from sklearnex.neighbors import NearestNeighbors X = [[0, 0, 2], [1, 0, 0], [0, 0, 1]] + X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe) + test = _convert_to_dataframe([[0, 0, 1.3]], sycl_queue=queue, target_df=dataframe) neigh = NearestNeighbors(n_neighbors=2).fit(X) + result = neigh.kneighbors(test, 2, return_distance=False) + result = _as_numpy(result) assert "sklearnex" in neigh.__module__ - result = neigh.kneighbors([[0, 0, 1.3]], 2, return_distance=False) assert_allclose(result, [[2, 0]]) -def test_sklearnex_import_lof(): +@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues()) +def test_sklearnex_import_lof(dataframe, queue): from sklearnex.neighbors import LocalOutlierFactor X = [[7, 7, 7], [1, 0, 0], [0, 0, 1], [0, 0, 1]] + X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe) lof = LocalOutlierFactor(n_neighbors=2) result = lof.fit_predict(X) + result = _as_numpy(result) assert hasattr(lof, "_knn") assert "sklearnex" in lof.__module__ assert "sklearnex" in lof._knn.__module__ diff --git a/sklearnex/preview/decomposition/tests/test_preview_pca.py b/sklearnex/preview/decomposition/tests/test_preview_pca.py index 5a3a891bce..27627b4767 100755 --- a/sklearnex/preview/decomposition/tests/test_preview_pca.py +++ b/sklearnex/preview/decomposition/tests/test_preview_pca.py @@ -16,14 +16,23 @@ # =============================================================================== import numpy as np +import pytest from numpy.testing import assert_allclose +from onedal.tests.utils._dataframes_support import ( + _as_numpy, + _convert_to_dataframe, + get_dataframes_and_queues, +) -def test_sklearnex_import(): + +@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues()) +def test_sklearnex_import(dataframe, queue): from sklearnex.preview.decomposition import PCA - X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]]) + X = [[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]] + X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe) pca = PCA(n_components=2, svd_solver="full").fit(X) assert "sklearnex" in pca.__module__ assert hasattr(pca, "_onedal_estimator") - assert_allclose(pca.singular_values_, [6.30061232, 0.54980396]) + assert_allclose(_as_numpy(pca.singular_values_), [6.30061232, 0.54980396]) diff --git a/sklearnex/preview/ensemble/tests/test_preview_ensemble.py b/sklearnex/preview/ensemble/tests/test_preview_ensemble.py index 0a064e8ed0..41568e2ebb 100755 --- a/sklearnex/preview/ensemble/tests/test_preview_ensemble.py +++ b/sklearnex/preview/ensemble/tests/test_preview_ensemble.py @@ -15,13 +15,20 @@ # limitations under the License. # =============================================================================== +import pytest from numpy.testing import assert_allclose from sklearn.datasets import make_classification, make_regression from daal4py.sklearn._utils import daal_check_version +from onedal.tests.utils._dataframes_support import ( + _as_numpy, + _convert_to_dataframe, + get_dataframes_and_queues, +) -def test_sklearnex_import_rf_classifier(): +@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues()) +def test_sklearnex_import_rf_classifier(dataframe, queue): from sklearnex.preview.ensemble import RandomForestClassifier X, y = make_classification( @@ -32,22 +39,36 @@ def test_sklearnex_import_rf_classifier(): random_state=0, shuffle=False, ) + X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe) + y = _convert_to_dataframe(y, sycl_queue=queue, target_df=dataframe) rf = RandomForestClassifier(max_depth=2, random_state=0).fit(X, y) assert "sklearnex.preview" in rf.__module__ - assert_allclose([1], rf.predict([[0, 0, 0, 0]])) + assert_allclose([1], _as_numpy(rf.predict([[0, 0, 0, 0]]))) -def test_sklearnex_import_rf_regression(): +# TODO: +# investigate failure for `dpnp.ndarrays` and `dpctl.tensors` on `GPU` +@pytest.mark.parametrize( + "dataframe,queue", get_dataframes_and_queues(device_filter_="cpu") +) +def test_sklearnex_import_rf_regression(dataframe, queue): from sklearnex.preview.ensemble import RandomForestRegressor X, y = make_regression(n_features=4, n_informative=2, random_state=0, shuffle=False) + X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe) + y = _convert_to_dataframe(y, sycl_queue=queue, target_df=dataframe) rf = RandomForestRegressor(max_depth=2, random_state=0).fit(X, y) assert "sklearnex.preview" in rf.__module__ - pred = rf.predict([[0, 0, 0, 0]]) + pred = _as_numpy(rf.predict([[0, 0, 0, 0]])) assert_allclose([-6.839], pred, atol=1e-2) -def test_sklearnex_import_et_classifier(): +# TODO: +# investigate failure for `dpnp.ndarrays` and `dpctl.tensors` on `GPU` +@pytest.mark.parametrize( + "dataframe,queue", get_dataframes_and_queues(device_filter_="cpu") +) +def test_sklearnex_import_et_classifier(dataframe, queue): from sklearnex.preview.ensemble import ExtraTreesClassifier X, y = make_classification( @@ -58,22 +79,31 @@ def test_sklearnex_import_et_classifier(): random_state=0, shuffle=False, ) + X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe) + y = _convert_to_dataframe(y, sycl_queue=queue, target_df=dataframe) # For the 2023.2 release, random_state is not supported # defaults to seed=777, although it is set to 0 rf = ExtraTreesClassifier(max_depth=2, random_state=0).fit(X, y) assert "sklearnex" in rf.__module__ - assert_allclose([1], rf.predict([[0, 0, 0, 0]])) + assert_allclose([1], _as_numpy(rf.predict([[0, 0, 0, 0]]))) -def test_sklearnex_import_et_regression(): +# TODO: +# investigate failure for `dpnp.ndarrays` and `dpctl.tensors` on `GPU` +@pytest.mark.parametrize( + "dataframe,queue", get_dataframes_and_queues(device_filter_="cpu") +) +def test_sklearnex_import_et_regression(dataframe, queue): from sklearnex.preview.ensemble import ExtraTreesRegressor X, y = make_regression(n_features=4, n_informative=2, random_state=0, shuffle=False) + X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe) + y = _convert_to_dataframe(y, sycl_queue=queue, target_df=dataframe) # For the 2023.2 release, random_state is not supported # defaults to seed=777, although it is set to 0 rf = ExtraTreesRegressor(max_depth=2, random_state=0).fit(X, y) assert "sklearnex" in rf.__module__ - pred = rf.predict([[0, 0, 0, 0]]) + pred = _as_numpy(rf.predict([[0, 0, 0, 0]])) if daal_check_version((2023, "P", 200)): assert_allclose([27.138], pred, atol=1e-2) else: diff --git a/sklearnex/preview/linear_model/tests/test_preview_linear.py b/sklearnex/preview/linear_model/tests/test_preview_linear.py index 80d00324e4..bb643dfd4b 100755 --- a/sklearnex/preview/linear_model/tests/test_preview_linear.py +++ b/sklearnex/preview/linear_model/tests/test_preview_linear.py @@ -16,17 +16,26 @@ # =============================================================================== import numpy as np +import pytest from numpy.testing import assert_allclose from sklearn.datasets import make_regression from daal4py.sklearn._utils import daal_check_version +from onedal.tests.utils._dataframes_support import ( + _as_numpy, + _convert_to_dataframe, + get_dataframes_and_queues, +) -def test_sklearnex_import_linear(): +@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues()) +def test_sklearnex_import_linear(dataframe, queue): from sklearnex.preview.linear_model import LinearRegression X = np.array([[1, 1], [1, 2], [2, 2], [2, 3]]) y = np.dot(X, np.array([1, 2])) + 3 + X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe) + y = _convert_to_dataframe(y, sycl_queue=queue, target_df=dataframe) linreg = LinearRegression().fit(X, y) if daal_check_version((2023, "P", 100)): assert "sklearnex" in linreg.__module__ @@ -34,5 +43,5 @@ def test_sklearnex_import_linear(): else: assert "daal4py" in linreg.__module__ assert linreg.n_features_in_ == 2 - assert_allclose(linreg.intercept_, 3.0) - assert_allclose(linreg.coef_, [1.0, 2.0]) + assert_allclose(_as_numpy(linreg.intercept_), 3.0) + assert_allclose(_as_numpy(linreg.coef_), [1.0, 2.0]) diff --git a/sklearnex/svm/tests/test_svm.py b/sklearnex/svm/tests/test_svm.py index 6fcfb3718c..e41dd143bf 100755 --- a/sklearnex/svm/tests/test_svm.py +++ b/sklearnex/svm/tests/test_svm.py @@ -16,48 +16,87 @@ # =============================================================================== import numpy as np +import pytest from numpy.testing import assert_allclose +from onedal.tests.utils._dataframes_support import ( + _as_numpy, + _convert_to_dataframe, + get_dataframes_and_queues, +) -def test_sklearnex_import_svc(): + +# TODO: +# investigate failure for `dpnp.ndarrays` and `dpctl.tensors` on `GPU` +@pytest.mark.parametrize( + "dataframe,queue", get_dataframes_and_queues(device_filter_="cpu") +) +def test_sklearnex_import_svc(dataframe, queue): from sklearnex.svm import SVC X = np.array([[-2, -1], [-1, -1], [-1, -2], [+1, +1], [+1, +2], [+2, +1]]) y = np.array([1, 1, 1, 2, 2, 2]) + X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe) + y = _convert_to_dataframe(y, sycl_queue=queue, target_df=dataframe) svc = SVC(kernel="linear").fit(X, y) assert "daal4py" in svc.__module__ or "sklearnex" in svc.__module__ - assert_allclose(svc.dual_coef_, [[-0.25, 0.25]]) - assert_allclose(svc.support_, [1, 3]) + assert_allclose(_as_numpy(svc.dual_coef_), [[-0.25, 0.25]]) + assert_allclose(_as_numpy(svc.support_), [1, 3]) -def test_sklearnex_import_nusvc(): +# TODO: +# investigate failure for `dpnp.ndarrays` and `dpctl.tensors` on `GPU` +@pytest.mark.parametrize( + "dataframe,queue", get_dataframes_and_queues(device_filter_="cpu") +) +def test_sklearnex_import_nusvc(dataframe, queue): from sklearnex.svm import NuSVC X = np.array([[-2, -1], [-1, -1], [-1, -2], [+1, +1], [+1, +2], [+2, +1]]) y = np.array([1, 1, 1, 2, 2, 2]) + X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe) + y = _convert_to_dataframe(y, sycl_queue=queue, target_df=dataframe) svc = NuSVC(kernel="linear").fit(X, y) assert "daal4py" in svc.__module__ or "sklearnex" in svc.__module__ - assert_allclose(svc.dual_coef_, [[-0.04761905, -0.0952381, 0.0952381, 0.04761905]]) - assert_allclose(svc.support_, [0, 1, 3, 4]) + assert_allclose( + _as_numpy(svc.dual_coef_), [[-0.04761905, -0.0952381, 0.0952381, 0.04761905]] + ) + assert_allclose(_as_numpy(svc.support_), [0, 1, 3, 4]) -def test_sklearnex_import_svr(): +# TODO: +# investigate failure for `dpnp.ndarrays` and `dpctl.tensors` on `GPU` +@pytest.mark.parametrize( + "dataframe,queue", get_dataframes_and_queues(device_filter_="cpu") +) +def test_sklearnex_import_svr(dataframe, queue): from sklearnex.svm import SVR X = np.array([[-2, -1], [-1, -1], [-1, -2], [+1, +1], [+1, +2], [+2, +1]]) y = np.array([1, 1, 1, 2, 2, 2]) + X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe) + y = _convert_to_dataframe(y, sycl_queue=queue, target_df=dataframe) svc = SVR(kernel="linear").fit(X, y) assert "daal4py" in svc.__module__ or "sklearnex" in svc.__module__ - assert_allclose(svc.dual_coef_, [[-0.1, 0.1]]) - assert_allclose(svc.support_, [1, 3]) + assert_allclose(_as_numpy(svc.dual_coef_), [[-0.1, 0.1]]) + assert_allclose(_as_numpy(svc.support_), [1, 3]) -def test_sklearnex_import_nusvr(): +# TODO: +# investigate failure for `dpnp.ndarrays` and `dpctl.tensors` on `GPU` +@pytest.mark.parametrize( + "dataframe,queue", get_dataframes_and_queues(device_filter_="cpu") +) +def test_sklearnex_import_nusvr(dataframe, queue): from sklearnex.svm import NuSVR X = np.array([[-2, -1], [-1, -1], [-1, -2], [+1, +1], [+1, +2], [+2, +1]]) y = np.array([1, 1, 1, 2, 2, 2]) + X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe) + y = _convert_to_dataframe(y, sycl_queue=queue, target_df=dataframe) svc = NuSVR(kernel="linear", nu=0.9).fit(X, y) assert "daal4py" in svc.__module__ or "sklearnex" in svc.__module__ - assert_allclose(svc.dual_coef_, [[-1.0, 0.611111, 1.0, -0.611111]], rtol=1e-3) - assert_allclose(svc.support_, [1, 2, 3, 5]) + assert_allclose( + _as_numpy(svc.dual_coef_), [[-1.0, 0.611111, 1.0, -0.611111]], rtol=1e-3 + ) + assert_allclose(_as_numpy(svc.support_), [1, 2, 3, 5])