[fix] changes for GitHub actions PR CI for matching oneDAL Nightly-bu…

…ild (uxlfoundation#2076) * Update ci.yml * Update activate_components.bat * Update test_linear.py * Update test_incremental_linear.py * Update test_kmeans.py * Update deselected_tests.yaml * Update deselected_tests.yaml * add deselction mechanism for Non-Intel Hardware * remove warnings * address codefactor recommendations * make explicit * mistake in deselection process * remove bad code * remove bad code * isort fixes * forgotten change to incremental_linear * add more deselections * match uxlfoundation#2081 * fix errors in formatting * correct english * second english correction * remove some deselections * set 2025.2 fail for recheck
samir-nasibli · Oct 4, 2024 · a2b9bf3 · a2b9bf3
1 parent 2fccf44
commit a2b9bf3
Show file tree

Hide file tree

Showing 9 changed files with 98 additions and 21 deletions.
diff --git a/.github/scripts/activate_components.bat b/.github/scripts/activate_components.bat
@@ -22,8 +22,8 @@ rem %3 - dpcpp activate flag
 rem prepare vc
 call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall" x64
 rem prepare icx only if no parameter is given.
-if "%3"=="" call .\dpcpp\compiler\%1\env\vars.bat
+if "%3"=="" call .\oneapi\compiler\%1\env\vars.bat
 rem prepare tbb
-call .\dpcpp\tbb\%2\env\vars.bat
+call .\oneapi\tbb\%2\env\vars.bat
 rem prepare oneDAL
 call .\__release_win\daal\latest\env\vars.bat
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -219,8 +219,8 @@ jobs:
         shell: cmd
         run: |
           call .\venv\Scripts\activate.bat
-          call .\dpcpp\compiler\${{ env.DPCPP_VERSION }}\env\vars.bat
-          call .\dpcpp\compiler\${{ env.DPCPP_VERSION }}\bin\sycl-ls.exe
+          call .\oneapi\compiler\${{ env.DPCPP_VERSION }}\env\vars.bat
+          call .\oneapi\compiler\${{ env.DPCPP_VERSION }}\bin\sycl-ls.exe
           bash .ci/scripts/describe_system.sh
       - name: Build daal4py/sklearnex
         shell: cmd

diff --git a/sklearnex/cluster/k_means.py b/sklearnex/cluster/k_means.py
@@ -102,8 +102,8 @@ def _onedal_fit_supported(self, method_name, X, y=None, sample_weight=None):
             patching_status = PatchingConditionsChain(f"sklearn.cluster.{class_name}.fit")
 
             sample_count = _num_samples(X)
-            self._algorithm = self.algorithm
             supported_algs = ["auto", "full", "lloyd", "elkan"]
+
             if self.algorithm == "elkan":
                 logging.getLogger("sklearnex").info(
                     "oneDAL does not support 'elkan', using 'lloyd' algorithm instead."

diff --git a/sklearnex/cluster/tests/test_kmeans.py b/sklearnex/cluster/tests/test_kmeans.py
@@ -28,6 +28,7 @@
     get_queues,
 )
 from sklearnex import config_context
+from sklearnex.tests.utils import _IS_INTEL
 
 
 def generate_dense_dataset(n_samples, n_features, density, n_clusters):
@@ -45,11 +46,11 @@ def generate_dense_dataset(n_samples, n_features, density, n_clusters):
 
 
 @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
-@pytest.mark.parametrize("algorithm", ["lloyd", "elkan"])
+@pytest.mark.parametrize(
+    "algorithm", ["lloyd" if sklearn_check_version("1.1") else "full", "elkan"]
+)
 @pytest.mark.parametrize("init", ["k-means++", "random"])
 def test_sklearnex_import_for_dense_data(dataframe, queue, algorithm, init):
-    if not sklearn_check_version("1.1") and algorithm == "lloyd":
-        pytest.skip("lloyd requires sklearn>=1.1.")
     from sklearnex.cluster import KMeans
 
     X_dense = generate_dense_dataset(1000, 10, 0.5, 3)
@@ -70,7 +71,9 @@ def test_sklearnex_import_for_dense_data(dataframe, queue, algorithm, init):
     reason="Sparse data requires oneDAL>=2024.7.0",
 )
 @pytest.mark.parametrize("queue", get_queues())
-@pytest.mark.parametrize("algorithm", ["lloyd", "elkan"])
+@pytest.mark.parametrize(
+    "algorithm", ["lloyd" if sklearn_check_version("1.1") else "full", "elkan"]
+)
 @pytest.mark.parametrize("init", ["k-means++", "random"])
 def test_sklearnex_import_for_sparse_data(queue, algorithm, init):
     from sklearnex.cluster import KMeans
@@ -86,11 +89,10 @@ def test_sklearnex_import_for_sparse_data(queue, algorithm, init):
 
 
 @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
-@pytest.mark.parametrize("algorithm", ["lloyd", "elkan"])
+@pytest.mark.parametrize(
+    "algorithm", ["lloyd" if sklearn_check_version("1.1") else "full", "elkan"]
+)
 def test_results_on_dense_gold_data(dataframe, queue, algorithm):
-    if not sklearn_check_version("1.1") and algorithm == "lloyd":
-        pytest.skip("lloyd requires sklearn>=1.1.")
-
     from sklearnex.cluster import KMeans
 
     X_train = np.array([[1, 2], [1, 4], [1, 0], [10, 2], [10, 4], [10, 0]])
@@ -121,15 +123,19 @@ def test_results_on_dense_gold_data(dataframe, queue, algorithm):
 )
 @pytest.mark.parametrize("queue", get_queues())
 @pytest.mark.parametrize("init", ["k-means++", "random", "arraylike"])
-@pytest.mark.parametrize("algorithm", ["lloyd", "elkan"])
+@pytest.mark.parametrize(
+    "algorithm", ["lloyd" if sklearn_check_version("1.1") else "full", "elkan"]
+)
 @pytest.mark.parametrize(
     "dims", [(1000, 10, 0.95, 3), (50000, 100, 0.75, 10), (10000, 10, 0.8, 5)]
 )
 def test_dense_vs_sparse(queue, init, algorithm, dims):
     from sklearnex.cluster import KMeans
 
-    if init == "random":
-        pytest.skip("Random initialization in sparse K-means is buggy.")
+    if init == "random" or (not _IS_INTEL and init == "k-means++"):
+        if daal_check_version((2025, "P", 200)):
+            pytest.fail("Re-verify failure of k-means++ in 2025.2 oneDAL")
+        pytest.skip(f"{init} initialization for sparse K-means is non-conformant.")
 
     # For higher level of sparsity (smaller density) the test may fail
     n_samples, n_features, density, n_clusters = dims

diff --git a/sklearnex/linear_model/tests/test_incremental_linear.py b/sklearnex/linear_model/tests/test_incremental_linear.py
@@ -24,6 +24,7 @@
     get_dataframes_and_queues,
 )
 from sklearnex.linear_model import IncrementalLinearRegression
+from sklearnex.tests.utils import _IS_INTEL
 
 
 @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
@@ -129,7 +130,10 @@ def test_sklearnex_partial_fit_multitarget_on_gold_data(
     np_y_pred = _as_numpy(y_pred)
 
     assert inclin.n_features_in_ == 2
-    tol = 7e-6 if dtype == np.float32 else 1e-7
+    tol = 1e-7
+    if dtype == np.float32:
+        tol = 7e-6 if _IS_INTEL else 2e-5
+
     assert_allclose(inclin.coef_, [1.0, 2.0], atol=tol)
     if fit_intercept:
         assert_allclose(inclin.intercept_, 3.0, atol=tol)

diff --git a/sklearnex/linear_model/tests/test_linear.py b/sklearnex/linear_model/tests/test_linear.py
@@ -29,6 +29,7 @@
     _convert_to_dataframe,
     get_dataframes_and_queues,
 )
+from sklearnex.tests.utils import _IS_INTEL
 
 
 @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
@@ -56,7 +57,9 @@ def test_sklearnex_import_linear(dataframe, queue, dtype, macro_block):
     assert "sklearnex" in linreg.__module__
     assert linreg.n_features_in_ == 2
 
-    tol = 1e-5 if _as_numpy(linreg.coef_).dtype == np.float32 else 1e-7
+    tol = 1e-7
+    if _as_numpy(linreg.coef_).dtype == np.float32:
+        tol = 1e-5 if _IS_INTEL else 2e-5
     assert_allclose(_as_numpy(linreg.intercept_), 3.0, rtol=tol)
     assert_allclose(_as_numpy(linreg.coef_), [1.0, 2.0], rtol=tol)
 

diff --git a/sklearnex/tests/test_run_to_run_stability.py b/sklearnex/tests/test_run_to_run_stability.py
@@ -45,6 +45,7 @@
 )
 from sklearnex.svm import SVC
 from sklearnex.tests.utils import (
+    _IS_INTEL,
     PATCHED_MODELS,
     SPECIAL_INSTANCES,
     call_method,
@@ -154,6 +155,14 @@ def test_standard_estimator_stability(estimator, method, dataframe, queue):
         pytest.skip(f"variation observed in {estimator}.score")
     if estimator in ["IncrementalEmpiricalCovariance"] and method == "mahalanobis":
         pytest.skip("allowed fallback to sklearn occurs")
+    if (
+        not _IS_INTEL
+        and ("Neighbors" in estimator or "LocalOutlierFactor" in estimator)
+        and method in ["score", "predict", "kneighbors", "kneighbors_graph"]
+    ):
+        if daal_check_version((2025, "P", 200)):
+            pytest.fail("Re-verify failure of algorithms in 2025.2 oneDAL")
+        pytest.skip(f"{estimator} shows instability on Non-Intel hardware")
 
     if "NearestNeighbors" in estimator and "radius" in method:
         pytest.skip(f"RadiusNeighbors estimator not implemented in sklearnex")
@@ -182,6 +191,14 @@ def test_special_estimator_stability(estimator, method, dataframe, queue):
         pytest.skip(f"variation observed in KMeans.score")
     if "NearestNeighbors" in estimator and "radius" in method:
         pytest.skip(f"RadiusNeighbors estimator not implemented in sklearnex")
+    if (
+        not _IS_INTEL
+        and ("Neighbors" in estimator or "LocalOutlierFactor" in estimator)
+        and method in ["score", "predict", "kneighbors", "kneighbors_graph"]
+    ):
+        if daal_check_version((2025, "P", 200)):
+            pytest.fail("Re-verify failure of algorithms in 2025.2 oneDAL")
+        pytest.skip(f"{estimator} shows instability on Non-Intel hardware")
 
     est = SPECIAL_INSTANCES[estimator]
 
@@ -200,11 +217,25 @@ def test_special_estimator_stability(estimator, method, dataframe, queue):
 @pytest.mark.parametrize("dataframe, queue", get_dataframes_and_queues("numpy,array_api"))
 @pytest.mark.parametrize("estimator, method", gen_models_info(SPARSE_INSTANCES))
 def test_sparse_estimator_stability(estimator, method, dataframe, queue):
-    if "KMeans" in estimator and method == "score" and queue == None:
-        pytest.skip(f"variation observed in KMeans.score")
-
+    if "KMeans" in estimator and method in "score" and queue == None:
+        pytest.skip(f"variation observed in KMeans.{method}")
+    if (
+        not daal_check_version((2025, "P", 0))
+        and "KMeans()" in estimator
+        and queue == None
+    ):
+        pytest.skip(f"variation observed in KMeans.{method} in 2024.7 oneDAL")
     if "NearestNeighbors" in estimator and "radius" in method:
         pytest.skip(f"RadiusNeighbors estimator not implemented in sklearnex")
+    if (
+        not _IS_INTEL
+        and ("Neighbors" in estimator or "LocalOutlierFactor" in estimator)
+        and method in ["score", "predict", "kneighbors", "kneighbors_graph"]
+    ):
+        if daal_check_version((2025, "P", 200)):
+            pytest.fail("Re-verify failure of algorithms in 2025.2 oneDAL")
+        pytest.skip(f"{estimator} shows instability on Non-Intel hardware")
+
     est = SPARSE_INSTANCES[estimator]
 
     if method and not hasattr(est, method):
@@ -228,6 +259,14 @@ def test_other_estimator_stability(estimator, method, dataframe, queue):
         pytest.skip(f"variation observed in KMeans.score")
     if "NearestNeighbors" in estimator and "radius" in method:
         pytest.skip(f"RadiusNeighbors estimator not implemented in sklearnex")
+    if (
+        not _IS_INTEL
+        and ("Neighbors" in estimator or "LocalOutlierFactor" in estimator)
+        and method in ["score", "predict", "kneighbors", "kneighbors_graph"]
+    ):
+        if daal_check_version((2025, "P", 200)):
+            pytest.fail("Re-verify failure of algorithms in 2025.2 oneDAL")
+        pytest.skip(f"{estimator} shows instability on Non-Intel hardware")
 
     est = STABILITY_INSTANCES[estimator]
 

diff --git a/sklearnex/tests/utils/__init__.py b/sklearnex/tests/utils/__init__.py
@@ -21,6 +21,7 @@
     SPECIAL_INSTANCES,
     UNPATCHED_FUNCTIONS,
     UNPATCHED_MODELS,
+    _get_processor_info,
     call_method,
     gen_dataset,
     gen_models_info,
@@ -39,3 +40,5 @@
     "gen_dataset",
     "sklearn_clone_dict",
 ]
+
+_IS_INTEL = "GenuineIntel" in _get_processor_info()
diff --git a/sklearnex/tests/utils/base.py b/sklearnex/tests/utils/base.py
@@ -14,6 +14,8 @@
 # limitations under the License.
 # ==============================================================================
 
+import platform
+import subprocess
 from functools import partial
 from inspect import Parameter, getattr_static, isclass, signature
 
@@ -344,3 +346,23 @@ def gen_dataset(
     np.uint32,
     np.uint64,
 ]
+
+
+def _get_processor_info():
+    proc = ""
+    if platform.system() == "Linux":
+        proc = (
+            subprocess.check_output(["/usr/bin/cat", "/proc/cpuinfo"])
+            .strip()
+            .decode("utf-8")
+        )
+    elif platform.system() == "Windows":
+        proc = platform.processor()
+    elif platform.system() == "Darwin":
+        proc = (
+            subprocess.check_output(["/usr/bin/sysctl", "-n", "machdep.cpu.brand_string"])
+            .strip()
+            .decode("utf-8")
+        )
+
+    return proc