diff --git a/.github/scripts/activate_components.bat b/.github/scripts/activate_components.bat index b99293c4a2..6d2e9427c6 100644 --- a/.github/scripts/activate_components.bat +++ b/.github/scripts/activate_components.bat @@ -22,8 +22,8 @@ rem %3 - dpcpp activate flag rem prepare vc call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall" x64 rem prepare icx only if no parameter is given. -if "%3"=="" call .\dpcpp\compiler\%1\env\vars.bat +if "%3"=="" call .\oneapi\compiler\%1\env\vars.bat rem prepare tbb -call .\dpcpp\tbb\%2\env\vars.bat +call .\oneapi\tbb\%2\env\vars.bat rem prepare oneDAL call .\__release_win\daal\latest\env\vars.bat diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 32f3428e77..fd5a059c5e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -219,8 +219,8 @@ jobs: shell: cmd run: | call .\venv\Scripts\activate.bat - call .\dpcpp\compiler\${{ env.DPCPP_VERSION }}\env\vars.bat - call .\dpcpp\compiler\${{ env.DPCPP_VERSION }}\bin\sycl-ls.exe + call .\oneapi\compiler\${{ env.DPCPP_VERSION }}\env\vars.bat + call .\oneapi\compiler\${{ env.DPCPP_VERSION }}\bin\sycl-ls.exe bash .ci/scripts/describe_system.sh - name: Build daal4py/sklearnex shell: cmd diff --git a/sklearnex/cluster/k_means.py b/sklearnex/cluster/k_means.py index a5cd870965..0b0a84ffb1 100644 --- a/sklearnex/cluster/k_means.py +++ b/sklearnex/cluster/k_means.py @@ -102,8 +102,8 @@ def _onedal_fit_supported(self, method_name, X, y=None, sample_weight=None): patching_status = PatchingConditionsChain(f"sklearn.cluster.{class_name}.fit") sample_count = _num_samples(X) - self._algorithm = self.algorithm supported_algs = ["auto", "full", "lloyd", "elkan"] + if self.algorithm == "elkan": logging.getLogger("sklearnex").info( "oneDAL does not support 'elkan', using 'lloyd' algorithm instead." diff --git a/sklearnex/cluster/tests/test_kmeans.py b/sklearnex/cluster/tests/test_kmeans.py index a3753e55a6..eb458a6748 100755 --- a/sklearnex/cluster/tests/test_kmeans.py +++ b/sklearnex/cluster/tests/test_kmeans.py @@ -28,6 +28,7 @@ get_queues, ) from sklearnex import config_context +from sklearnex.tests.utils import _IS_INTEL def generate_dense_dataset(n_samples, n_features, density, n_clusters): @@ -45,11 +46,11 @@ def generate_dense_dataset(n_samples, n_features, density, n_clusters): @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues()) -@pytest.mark.parametrize("algorithm", ["lloyd", "elkan"]) +@pytest.mark.parametrize( + "algorithm", ["lloyd" if sklearn_check_version("1.1") else "full", "elkan"] +) @pytest.mark.parametrize("init", ["k-means++", "random"]) def test_sklearnex_import_for_dense_data(dataframe, queue, algorithm, init): - if not sklearn_check_version("1.1") and algorithm == "lloyd": - pytest.skip("lloyd requires sklearn>=1.1.") from sklearnex.cluster import KMeans X_dense = generate_dense_dataset(1000, 10, 0.5, 3) @@ -70,7 +71,9 @@ def test_sklearnex_import_for_dense_data(dataframe, queue, algorithm, init): reason="Sparse data requires oneDAL>=2024.7.0", ) @pytest.mark.parametrize("queue", get_queues()) -@pytest.mark.parametrize("algorithm", ["lloyd", "elkan"]) +@pytest.mark.parametrize( + "algorithm", ["lloyd" if sklearn_check_version("1.1") else "full", "elkan"] +) @pytest.mark.parametrize("init", ["k-means++", "random"]) def test_sklearnex_import_for_sparse_data(queue, algorithm, init): from sklearnex.cluster import KMeans @@ -86,11 +89,10 @@ def test_sklearnex_import_for_sparse_data(queue, algorithm, init): @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues()) -@pytest.mark.parametrize("algorithm", ["lloyd", "elkan"]) +@pytest.mark.parametrize( + "algorithm", ["lloyd" if sklearn_check_version("1.1") else "full", "elkan"] +) def test_results_on_dense_gold_data(dataframe, queue, algorithm): - if not sklearn_check_version("1.1") and algorithm == "lloyd": - pytest.skip("lloyd requires sklearn>=1.1.") - from sklearnex.cluster import KMeans X_train = np.array([[1, 2], [1, 4], [1, 0], [10, 2], [10, 4], [10, 0]]) @@ -121,15 +123,19 @@ def test_results_on_dense_gold_data(dataframe, queue, algorithm): ) @pytest.mark.parametrize("queue", get_queues()) @pytest.mark.parametrize("init", ["k-means++", "random", "arraylike"]) -@pytest.mark.parametrize("algorithm", ["lloyd", "elkan"]) +@pytest.mark.parametrize( + "algorithm", ["lloyd" if sklearn_check_version("1.1") else "full", "elkan"] +) @pytest.mark.parametrize( "dims", [(1000, 10, 0.95, 3), (50000, 100, 0.75, 10), (10000, 10, 0.8, 5)] ) def test_dense_vs_sparse(queue, init, algorithm, dims): from sklearnex.cluster import KMeans - if init == "random": - pytest.skip("Random initialization in sparse K-means is buggy.") + if init == "random" or (not _IS_INTEL and init == "k-means++"): + if daal_check_version((2025, "P", 200)): + pytest.fail("Re-verify failure of k-means++ in 2025.2 oneDAL") + pytest.skip(f"{init} initialization for sparse K-means is non-conformant.") # For higher level of sparsity (smaller density) the test may fail n_samples, n_features, density, n_clusters = dims diff --git a/sklearnex/linear_model/tests/test_incremental_linear.py b/sklearnex/linear_model/tests/test_incremental_linear.py index 20d5c904b6..d4a96b8994 100644 --- a/sklearnex/linear_model/tests/test_incremental_linear.py +++ b/sklearnex/linear_model/tests/test_incremental_linear.py @@ -24,6 +24,7 @@ get_dataframes_and_queues, ) from sklearnex.linear_model import IncrementalLinearRegression +from sklearnex.tests.utils import _IS_INTEL @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues()) @@ -129,7 +130,10 @@ def test_sklearnex_partial_fit_multitarget_on_gold_data( np_y_pred = _as_numpy(y_pred) assert inclin.n_features_in_ == 2 - tol = 7e-6 if dtype == np.float32 else 1e-7 + tol = 1e-7 + if dtype == np.float32: + tol = 7e-6 if _IS_INTEL else 2e-5 + assert_allclose(inclin.coef_, [1.0, 2.0], atol=tol) if fit_intercept: assert_allclose(inclin.intercept_, 3.0, atol=tol) diff --git a/sklearnex/linear_model/tests/test_linear.py b/sklearnex/linear_model/tests/test_linear.py index 81a71bd6de..d4a4729df3 100644 --- a/sklearnex/linear_model/tests/test_linear.py +++ b/sklearnex/linear_model/tests/test_linear.py @@ -29,6 +29,7 @@ _convert_to_dataframe, get_dataframes_and_queues, ) +from sklearnex.tests.utils import _IS_INTEL @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues()) @@ -56,7 +57,9 @@ def test_sklearnex_import_linear(dataframe, queue, dtype, macro_block): assert "sklearnex" in linreg.__module__ assert linreg.n_features_in_ == 2 - tol = 1e-5 if _as_numpy(linreg.coef_).dtype == np.float32 else 1e-7 + tol = 1e-7 + if _as_numpy(linreg.coef_).dtype == np.float32: + tol = 1e-5 if _IS_INTEL else 2e-5 assert_allclose(_as_numpy(linreg.intercept_), 3.0, rtol=tol) assert_allclose(_as_numpy(linreg.coef_), [1.0, 2.0], rtol=tol) diff --git a/sklearnex/tests/test_run_to_run_stability.py b/sklearnex/tests/test_run_to_run_stability.py index f2e0560a27..8d8892f40f 100755 --- a/sklearnex/tests/test_run_to_run_stability.py +++ b/sklearnex/tests/test_run_to_run_stability.py @@ -45,6 +45,7 @@ ) from sklearnex.svm import SVC from sklearnex.tests.utils import ( + _IS_INTEL, PATCHED_MODELS, SPECIAL_INSTANCES, call_method, @@ -154,6 +155,14 @@ def test_standard_estimator_stability(estimator, method, dataframe, queue): pytest.skip(f"variation observed in {estimator}.score") if estimator in ["IncrementalEmpiricalCovariance"] and method == "mahalanobis": pytest.skip("allowed fallback to sklearn occurs") + if ( + not _IS_INTEL + and ("Neighbors" in estimator or "LocalOutlierFactor" in estimator) + and method in ["score", "predict", "kneighbors", "kneighbors_graph"] + ): + if daal_check_version((2025, "P", 200)): + pytest.fail("Re-verify failure of algorithms in 2025.2 oneDAL") + pytest.skip(f"{estimator} shows instability on Non-Intel hardware") if "NearestNeighbors" in estimator and "radius" in method: pytest.skip(f"RadiusNeighbors estimator not implemented in sklearnex") @@ -182,6 +191,14 @@ def test_special_estimator_stability(estimator, method, dataframe, queue): pytest.skip(f"variation observed in KMeans.score") if "NearestNeighbors" in estimator and "radius" in method: pytest.skip(f"RadiusNeighbors estimator not implemented in sklearnex") + if ( + not _IS_INTEL + and ("Neighbors" in estimator or "LocalOutlierFactor" in estimator) + and method in ["score", "predict", "kneighbors", "kneighbors_graph"] + ): + if daal_check_version((2025, "P", 200)): + pytest.fail("Re-verify failure of algorithms in 2025.2 oneDAL") + pytest.skip(f"{estimator} shows instability on Non-Intel hardware") est = SPECIAL_INSTANCES[estimator] @@ -200,11 +217,25 @@ def test_special_estimator_stability(estimator, method, dataframe, queue): @pytest.mark.parametrize("dataframe, queue", get_dataframes_and_queues("numpy,array_api")) @pytest.mark.parametrize("estimator, method", gen_models_info(SPARSE_INSTANCES)) def test_sparse_estimator_stability(estimator, method, dataframe, queue): - if "KMeans" in estimator and method == "score" and queue == None: - pytest.skip(f"variation observed in KMeans.score") - + if "KMeans" in estimator and method in "score" and queue == None: + pytest.skip(f"variation observed in KMeans.{method}") + if ( + not daal_check_version((2025, "P", 0)) + and "KMeans()" in estimator + and queue == None + ): + pytest.skip(f"variation observed in KMeans.{method} in 2024.7 oneDAL") if "NearestNeighbors" in estimator and "radius" in method: pytest.skip(f"RadiusNeighbors estimator not implemented in sklearnex") + if ( + not _IS_INTEL + and ("Neighbors" in estimator or "LocalOutlierFactor" in estimator) + and method in ["score", "predict", "kneighbors", "kneighbors_graph"] + ): + if daal_check_version((2025, "P", 200)): + pytest.fail("Re-verify failure of algorithms in 2025.2 oneDAL") + pytest.skip(f"{estimator} shows instability on Non-Intel hardware") + est = SPARSE_INSTANCES[estimator] if method and not hasattr(est, method): @@ -228,6 +259,14 @@ def test_other_estimator_stability(estimator, method, dataframe, queue): pytest.skip(f"variation observed in KMeans.score") if "NearestNeighbors" in estimator and "radius" in method: pytest.skip(f"RadiusNeighbors estimator not implemented in sklearnex") + if ( + not _IS_INTEL + and ("Neighbors" in estimator or "LocalOutlierFactor" in estimator) + and method in ["score", "predict", "kneighbors", "kneighbors_graph"] + ): + if daal_check_version((2025, "P", 200)): + pytest.fail("Re-verify failure of algorithms in 2025.2 oneDAL") + pytest.skip(f"{estimator} shows instability on Non-Intel hardware") est = STABILITY_INSTANCES[estimator] diff --git a/sklearnex/tests/utils/__init__.py b/sklearnex/tests/utils/__init__.py index 785a44c87b..60ca67fa37 100644 --- a/sklearnex/tests/utils/__init__.py +++ b/sklearnex/tests/utils/__init__.py @@ -21,6 +21,7 @@ SPECIAL_INSTANCES, UNPATCHED_FUNCTIONS, UNPATCHED_MODELS, + _get_processor_info, call_method, gen_dataset, gen_models_info, @@ -39,3 +40,5 @@ "gen_dataset", "sklearn_clone_dict", ] + +_IS_INTEL = "GenuineIntel" in _get_processor_info() diff --git a/sklearnex/tests/utils/base.py b/sklearnex/tests/utils/base.py index 6361bf8843..4ab317ff12 100755 --- a/sklearnex/tests/utils/base.py +++ b/sklearnex/tests/utils/base.py @@ -14,6 +14,8 @@ # limitations under the License. # ============================================================================== +import platform +import subprocess from functools import partial from inspect import Parameter, getattr_static, isclass, signature @@ -344,3 +346,23 @@ def gen_dataset( np.uint32, np.uint64, ] + + +def _get_processor_info(): + proc = "" + if platform.system() == "Linux": + proc = ( + subprocess.check_output(["/usr/bin/cat", "/proc/cpuinfo"]) + .strip() + .decode("utf-8") + ) + elif platform.system() == "Windows": + proc = platform.processor() + elif platform.system() == "Darwin": + proc = ( + subprocess.check_output(["/usr/bin/sysctl", "-n", "machdep.cpu.brand_string"]) + .strip() + .decode("utf-8") + ) + + return proc