intel · samir-nasibli · Aug 14, 2023 · Jul 27, 2023 · Jul 27, 2023 · Jul 27, 2023
diff --git a/onedal/tests/utils/_dataframes_support.py b/onedal/tests/utils/_dataframes_support.py
@@ -0,0 +1,82 @@
+# ===============================================================================
+# Copyright 2023 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ===============================================================================
+
+import pytest
+
+try:
+    import dpctl
+    import dpctl.tensor as dpt
+
+    dpctl_available = True
+except ImportError:
+    dpctl_available = False
+
+try:
+    import dpnp
+
+    dpnp_available = True
+except ImportError:
+    dpnp_available = False
+
+import numpy as np
+
+from onedal.tests.utils._device_selection import get_queues
+
+
+def get_dataframes_and_queues(
+    dataframe_filter_="numpy,dpnp,dpctl", device_filter_="cpu,gpu"
+):
+    dataframes_and_queues = [
+        pytest.param("numpy", None, id="numpy"),
+    ]
+
+    def get_df_and_q(dataframe: str):
+        df_and_q = []
+        for queue in get_queues(device_filter_):
+            id = "{}-{}".format(dataframe, queue.id)
+            df_and_q.append(pytest.param(dataframe, queue.values[0], id=id))
+        return df_and_q
+
+    if dpctl_available and "dpctl" in dataframe_filter_:
+        dataframes_and_queues.extend(get_df_and_q("dpctl"))
+    if dpnp_available and "dpnp" in dataframe_filter_:
+        dataframes_and_queues.extend(get_df_and_q("dpnp"))
+    return dataframes_and_queues
+
+
+def _as_numpy(obj, *args, **kwargs):
+    if dpnp_available and isinstance(obj, dpnp.ndarray):
+        return obj.asnumpy(*args, **kwargs)
+    if dpctl_available and isinstance(obj, dpt.usm_ndarray):
+        return dpt.to_numpy(obj, *args, **kwargs)
+    return np.asarray(obj, *args, **kwargs)
+
+
+def _convert_to_dataframe(obj, sycl_queue=None, target_df=None, *args, **kwargs):
+    if target_df is None:
+        return obj
+    # Numpy ndarray.
+    # `sycl_queue` arg is ignored.
+    elif target_df == "numpy":
+        return np.asarray(obj, *args, **kwargs)
+    # DPNP ndarray.
+    elif target_df == "dpnp":
+        return dpnp.asarray(
+            obj, usm_type="device", sycl_queue=sycl_queue, *args, **kwargs
+        )
+    # DPCtl tensor.
+    elif target_df == "dpctl":
+        return dpt.asarray(obj, usm_type="device", sycl_queue=sycl_queue, *args, **kwargs)
diff --git a/sklearnex/model_selection/tests/test_model_selection.py b/sklearnex/model_selection/tests/test_model_selection.py
@@ -19,6 +19,8 @@
 from numpy.testing import assert_allclose
 
 
+# TODO:
+# add pytest params for checking different dataframe inputs/outputs.
 def test_sklearnex_import_train_test_split():
     from sklearnex.model_selection import train_test_split
 

diff --git a/sklearnex/neighbors/tests/test_neighbors.py b/sklearnex/neighbors/tests/test_neighbors.py
@@ -16,45 +16,69 @@
 # ===============================================================================
 
 import numpy as np
+import pytest
 from numpy.testing import assert_allclose
 
+from onedal.tests.utils._dataframes_support import (
+    _as_numpy,
+    _convert_to_dataframe,
+    get_dataframes_and_queues,
+)
 
-def test_sklearnex_import_knn_classifier():
+
+@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
+def test_sklearnex_import_knn_classifier(dataframe, queue):
     from sklearnex.neighbors import KNeighborsClassifier
 
-    X = [[0], [1], [2], [3]]
-    y = [0, 0, 1, 1]
+    X = _convert_to_dataframe([[0], [1], [2], [3]], sycl_queue=queue, target_df=dataframe)
+    y = _convert_to_dataframe([0, 0, 1, 1], sycl_queue=queue, target_df=dataframe)
     neigh = KNeighborsClassifier(n_neighbors=3).fit(X, y)
+    y_test = _convert_to_dataframe([[1.1]], sycl_queue=queue, target_df=dataframe)
+    pred = _as_numpy(neigh.predict(y_test))
     assert "sklearnex" in neigh.__module__
-    assert_allclose(neigh.predict([[1.1]]), [0])
+    assert_allclose(pred, [0])
 
 
-def test_sklearnex_import_knn_regression():
+@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
+def test_sklearnex_import_knn_regression(dataframe, queue):
     from sklearnex.neighbors import KNeighborsRegressor
 
-    X = [[0], [1], [2], [3]]
-    y = [0, 0, 1, 1]
+    X = _convert_to_dataframe([[0], [1], [2], [3]], sycl_queue=queue, target_df=dataframe)
+    y = _convert_to_dataframe([0, 0, 1, 1], sycl_queue=queue, target_df=dataframe)
     neigh = KNeighborsRegressor(n_neighbors=2).fit(X, y)
+    y_test = _convert_to_dataframe([[1.5]], sycl_queue=queue, target_df=dataframe)
+    pred = _as_numpy(neigh.predict(y_test))
     assert "sklearnex" in neigh.__module__
-    assert_allclose(neigh.predict([[1.5]]), [0.5])
+    assert_allclose(pred, [0.5])
 
 
-def test_sklearnex_import_nn():
+# TODO:
+# investigate failure for `dpnp.ndarrays` and `dpctl.tensors`.
+@pytest.mark.parametrize(
+    "dataframe,queue", get_dataframes_and_queues(dataframe_filter_="numpy")
+)
+def test_sklearnex_import_nn(dataframe, queue):
     from sklearnex.neighbors import NearestNeighbors
 
     X = [[0, 0, 2], [1, 0, 0], [0, 0, 1]]
+    X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
+    test = _convert_to_dataframe([[0, 0, 1.3]], sycl_queue=queue, target_df=dataframe)
     neigh = NearestNeighbors(n_neighbors=2).fit(X)
+    result = neigh.kneighbors(test, 2, return_distance=False)
+    result = _as_numpy(result)
     assert "sklearnex" in neigh.__module__
-    result = neigh.kneighbors([[0, 0, 1.3]], 2, return_distance=False)
     assert_allclose(result, [[2, 0]])
 
 
-def test_sklearnex_import_lof():
+@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
+def test_sklearnex_import_lof(dataframe, queue):
     from sklearnex.neighbors import LocalOutlierFactor
 
     X = [[7, 7, 7], [1, 0, 0], [0, 0, 1], [0, 0, 1]]
+    X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
     lof = LocalOutlierFactor(n_neighbors=2)
     result = lof.fit_predict(X)
+    result = _as_numpy(result)
     assert hasattr(lof, "_knn")
     assert "sklearnex" in lof.__module__
     assert "sklearnex" in lof._knn.__module__

diff --git a/sklearnex/preview/decomposition/tests/test_preview_pca.py b/sklearnex/preview/decomposition/tests/test_preview_pca.py
@@ -16,14 +16,23 @@
 # ===============================================================================
 
 import numpy as np
+import pytest
 from numpy.testing import assert_allclose
 
+from onedal.tests.utils._dataframes_support import (
+    _as_numpy,
+    _convert_to_dataframe,
+    get_dataframes_and_queues,
+)
 
-def test_sklearnex_import():
+
+@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
+def test_sklearnex_import(dataframe, queue):
     from sklearnex.preview.decomposition import PCA
 
-    X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
+    X = [[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]]
+    X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
     pca = PCA(n_components=2, svd_solver="full").fit(X)
     assert "sklearnex" in pca.__module__
     assert hasattr(pca, "_onedal_estimator")
-    assert_allclose(pca.singular_values_, [6.30061232, 0.54980396])
+    assert_allclose(_as_numpy(pca.singular_values_), [6.30061232, 0.54980396])
diff --git a/sklearnex/preview/ensemble/tests/test_preview_ensemble.py b/sklearnex/preview/ensemble/tests/test_preview_ensemble.py
@@ -15,13 +15,20 @@
 # limitations under the License.
 # ===============================================================================
 
+import pytest
 from numpy.testing import assert_allclose
 from sklearn.datasets import make_classification, make_regression
 
 from daal4py.sklearn._utils import daal_check_version
+from onedal.tests.utils._dataframes_support import (
+    _as_numpy,
+    _convert_to_dataframe,
+    get_dataframes_and_queues,
+)
 
 
-def test_sklearnex_import_rf_classifier():
+@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
+def test_sklearnex_import_rf_classifier(dataframe, queue):
     from sklearnex.preview.ensemble import RandomForestClassifier
 
     X, y = make_classification(
@@ -32,22 +39,36 @@ def test_sklearnex_import_rf_classifier():
         random_state=0,
         shuffle=False,
     )
+    X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
+    y = _convert_to_dataframe(y, sycl_queue=queue, target_df=dataframe)
     rf = RandomForestClassifier(max_depth=2, random_state=0).fit(X, y)
     assert "sklearnex.preview" in rf.__module__
-    assert_allclose([1], rf.predict([[0, 0, 0, 0]]))
+    assert_allclose([1], _as_numpy(rf.predict([[0, 0, 0, 0]])))
 
 
-def test_sklearnex_import_rf_regression():
+# TODO:
+# investigate failure for `dpnp.ndarrays` and `dpctl.tensors` on `GPU`
+@pytest.mark.parametrize(
+    "dataframe,queue", get_dataframes_and_queues(device_filter_="cpu")
+)
+def test_sklearnex_import_rf_regression(dataframe, queue):
     from sklearnex.preview.ensemble import RandomForestRegressor
 
     X, y = make_regression(n_features=4, n_informative=2, random_state=0, shuffle=False)
+    X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
+    y = _convert_to_dataframe(y, sycl_queue=queue, target_df=dataframe)
     rf = RandomForestRegressor(max_depth=2, random_state=0).fit(X, y)
     assert "sklearnex.preview" in rf.__module__
-    pred = rf.predict([[0, 0, 0, 0]])
+    pred = _as_numpy(rf.predict([[0, 0, 0, 0]]))
     assert_allclose([-6.839], pred, atol=1e-2)
 
 
-def test_sklearnex_import_et_classifier():
+# TODO:
+# investigate failure for `dpnp.ndarrays` and `dpctl.tensors` on `GPU`
+@pytest.mark.parametrize(
+    "dataframe,queue", get_dataframes_and_queues(device_filter_="cpu")
+)
+def test_sklearnex_import_et_classifier(dataframe, queue):
     from sklearnex.preview.ensemble import ExtraTreesClassifier
 
     X, y = make_classification(
@@ -58,22 +79,31 @@ def test_sklearnex_import_et_classifier():
         random_state=0,
         shuffle=False,
     )
+    X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
+    y = _convert_to_dataframe(y, sycl_queue=queue, target_df=dataframe)
     # For the 2023.2 release, random_state is not supported
     # defaults to seed=777, although it is set to 0
     rf = ExtraTreesClassifier(max_depth=2, random_state=0).fit(X, y)
     assert "sklearnex" in rf.__module__
-    assert_allclose([1], rf.predict([[0, 0, 0, 0]]))
+    assert_allclose([1], _as_numpy(rf.predict([[0, 0, 0, 0]])))
 
 
-def test_sklearnex_import_et_regression():
+# TODO:
+# investigate failure for `dpnp.ndarrays` and `dpctl.tensors` on `GPU`
+@pytest.mark.parametrize(
+    "dataframe,queue", get_dataframes_and_queues(device_filter_="cpu")
+)
+def test_sklearnex_import_et_regression(dataframe, queue):
     from sklearnex.preview.ensemble import ExtraTreesRegressor
 
     X, y = make_regression(n_features=4, n_informative=2, random_state=0, shuffle=False)
+    X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
+    y = _convert_to_dataframe(y, sycl_queue=queue, target_df=dataframe)
     # For the 2023.2 release, random_state is not supported
     # defaults to seed=777, although it is set to 0
     rf = ExtraTreesRegressor(max_depth=2, random_state=0).fit(X, y)
     assert "sklearnex" in rf.__module__
-    pred = rf.predict([[0, 0, 0, 0]])
+    pred = _as_numpy(rf.predict([[0, 0, 0, 0]]))
     if daal_check_version((2023, "P", 200)):
         assert_allclose([27.138], pred, atol=1e-2)
     else:

diff --git a/sklearnex/preview/linear_model/tests/test_preview_linear.py b/sklearnex/preview/linear_model/tests/test_preview_linear.py
@@ -16,23 +16,32 @@
 # ===============================================================================
 
 import numpy as np
+import pytest
 from numpy.testing import assert_allclose
 from sklearn.datasets import make_regression
 
 from daal4py.sklearn._utils import daal_check_version
+from onedal.tests.utils._dataframes_support import (
+    _as_numpy,
+    _convert_to_dataframe,
+    get_dataframes_and_queues,
+)
 
 
-def test_sklearnex_import_linear():
+@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
+def test_sklearnex_import_linear(dataframe, queue):
     from sklearnex.preview.linear_model import LinearRegression
 
     X = np.array([[1, 1], [1, 2], [2, 2], [2, 3]])
     y = np.dot(X, np.array([1, 2])) + 3
+    X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
+    y = _convert_to_dataframe(y, sycl_queue=queue, target_df=dataframe)
     linreg = LinearRegression().fit(X, y)
     if daal_check_version((2023, "P", 100)):
         assert "sklearnex" in linreg.__module__
         assert hasattr(linreg, "_onedal_estimator")
     else:
         assert "daal4py" in linreg.__module__
     assert linreg.n_features_in_ == 2
-    assert_allclose(linreg.intercept_, 3.0)
-    assert_allclose(linreg.coef_, [1.0, 2.0])
+    assert_allclose(_as_numpy(linreg.intercept_), 3.0)
+    assert_allclose(_as_numpy(linreg.coef_), [1.0, 2.0])