From bec3ebbdc6d6ff2a673a99638896d2fa4c11cd85 Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Mon, 13 May 2024 14:36:00 -0700 Subject: [PATCH 01/82] BUG: fixing circular import in daal4py/sklearnex device_offloading --- daal4py/sklearn/_device_offload.py | 174 +++++++++++++++++++++++++++-- onedal/common/_policy.py | 11 +- sklearnex/_device_offload.py | 137 +---------------------- 3 files changed, 174 insertions(+), 148 deletions(-) diff --git a/daal4py/sklearn/_device_offload.py b/daal4py/sklearn/_device_offload.py index 80d9595396..e6514fc875 100644 --- a/daal4py/sklearn/_device_offload.py +++ b/daal4py/sklearn/_device_offload.py @@ -14,15 +14,39 @@ # limitations under the License. # ============================================================================== +from collections.abc import Iterable from functools import wraps +import numpy as np + +from daal4py.oneapi import _get_device_name_sycl_ctxt, _get_sycl_ctxt_params + try: - from sklearnex._config import get_config - from sklearnex._device_offload import ( - _copy_to_usm, - _get_global_queue, - _transfer_to_host, - ) + from dpctl import SyclQueue + from dpctl.memory import MemoryUSMDevice, as_usm_memory + from dpctl.tensor import usm_ndarray + + dpctl_available = True +except ImportError: + dpctl_available = False + +try: + import dpnp + + dpnp_available = True +except ImportError: + dpnp_available = False + + +# TODO: +# remove or update +try: + # from sklearnex._config import get_config + # from sklearnex._device_offload import ( + # #_copy_to_usm, + # # _get_global_queue, + # # _transfer_to_host, + # ) _sklearnex_available = True except ImportError: @@ -36,6 +60,129 @@ _sklearnex_available = False +def _copy_to_usm(queue, array): + if not dpctl_available: + raise RuntimeError( + "dpctl need to be installed to work " "with __sycl_usm_array_interface__" + ) + + if hasattr(array, "__array__"): + try: + mem = MemoryUSMDevice(array.nbytes, queue=queue) + mem.copy_from_host(array.tobytes()) + return usm_ndarray(array.shape, array.dtype, buffer=mem) + except ValueError as e: + # ValueError will raise if device does not support the dtype + # retry with float32 (needed for fp16 and fp64 support issues) + # try again as float32, if it is a float32 just raise the error. + if array.dtype == np.float32: + raise e + return _copy_to_usm(queue, array.astype(np.float32)) + else: + if isinstance(array, Iterable): + array = [_copy_to_usm(queue, i) for i in array] + return array + + +def _transfer_to_host(queue, *data): + has_usm_data, has_host_data = False, False + + host_data = [] + for item in data: + usm_iface = getattr(item, "__sycl_usm_array_interface__", None) + if usm_iface is not None: + if not dpctl_available: + raise RuntimeError( + "dpctl need to be installed to work " + "with __sycl_usm_array_interface__" + ) + if queue is not None: + if queue.sycl_device != usm_iface["syclobj"].sycl_device: + raise RuntimeError( + "Input data shall be located " "on single target device" + ) + else: + queue = usm_iface["syclobj"] + + buffer = as_usm_memory(item).copy_to_host() + order = "C" + if usm_iface["strides"] is not None: + if usm_iface["strides"][0] < usm_iface["strides"][1]: + order = "F" + item = np.ndarray( + shape=usm_iface["shape"], + dtype=usm_iface["typestr"], + buffer=buffer, + order=order, + ) + has_usm_data = True + else: + has_host_data = True + + mismatch_host_item = usm_iface is None and item is not None and has_usm_data + mismatch_usm_item = usm_iface is not None and has_host_data + + if mismatch_host_item or mismatch_usm_item: + raise RuntimeError("Input data shall be located on single target device") + + host_data.append(item) + return queue, host_data + + +class DummySyclQueue: + """This class is designed to act like dpctl.SyclQueue + to allow device dispatching in scenarios when dpctl is not available""" + + class DummySyclDevice: + def __init__(self, filter_string): + self._filter_string = filter_string + self.is_cpu = "cpu" in filter_string + self.is_gpu = "gpu" in filter_string + # TODO: check for possibility of fp64 support + # on other devices in this dummy class + self.has_aspect_fp64 = self.is_cpu + + if not (self.is_cpu): + logging.warning( + "Device support is limited. " + "Please install dpctl for full experience" + ) + + def get_filter_string(self): + return self._filter_string + + def __init__(self, filter_string): + self.sycl_device = self.DummySyclDevice(filter_string) + + +def _get_global_queue(target_offload=None): + d4p_target, _ = _get_device_name_sycl_ctxt(), _get_sycl_ctxt_params() + if d4p_target == "host": + d4p_target = "cpu" + + QueueClass = DummySyclQueue if not dpctl_available else SyclQueue + + if target_offload and target_offload != "auto": + if d4p_target is not None and d4p_target != target_offload: + if not isinstance(target_offload, str): + if d4p_target not in target_offload.sycl_device.get_filter_string(): + raise RuntimeError( + "Cannot use target offload option " + "inside daal4py.oneapi.sycl_context" + ) + else: + raise RuntimeError( + "Cannot use target offload option " + "inside daal4py.oneapi.sycl_context" + ) + if isinstance(target_offload, QueueClass): + return target_offload + return QueueClass(target_offload) + if d4p_target is not None: + return QueueClass(d4p_target) + return None + + def _get_host_inputs(*args, **kwargs): q = _get_global_queue() q, hostargs = _transfer_to_host(q, *args) @@ -51,7 +198,7 @@ def _extract_usm_iface(*args, **kwargs): return getattr(allargs[0], "__sycl_usm_array_interface__", None) -def _run_on_device(func, queue, obj=None, *args, **kwargs): +def _run_on_device(func, queue, obj=None, host_offload=False, *args, **kwargs): def dispatch_by_obj(obj, func, *args, **kwargs): if obj is not None: return func(obj, *args, **kwargs) @@ -61,7 +208,8 @@ def dispatch_by_obj(obj, func, *args, **kwargs): from daal4py.oneapi import _get_in_sycl_ctxt, sycl_context if _get_in_sycl_ctxt() is False: - host_offload = get_config()["allow_fallback_to_host"] + # TODO: + # host_offload = get_config()["allow_fallback_to_host"] with sycl_context( "gpu" if queue.sycl_device.is_gpu else "cpu", @@ -71,17 +219,21 @@ def dispatch_by_obj(obj, func, *args, **kwargs): return dispatch_by_obj(obj, func, *args, **kwargs) -def support_usm_ndarray(freefunc=False): +# TODO: +# add daal4py.sklearn.get_config +def support_usm_ndarray(freefunc=False, host_offload=False): def decorator(func): def wrapper_impl(obj, *args, **kwargs): if _sklearnex_available: usm_iface = _extract_usm_iface(*args, **kwargs) q, hostargs, hostkwargs = _get_host_inputs(*args, **kwargs) - result = _run_on_device(func, q, obj, *hostargs, **hostkwargs) + result = _run_on_device( + func, q, obj, host_offload, *hostargs, **hostkwargs + ) if usm_iface is not None and hasattr(result, "__array_interface__"): return _copy_to_usm(q, result) return result - return _run_on_device(func, None, obj, *args, **kwargs) + return _run_on_device(func, None, obj, host_offload, *args, **kwargs) if freefunc: diff --git a/onedal/common/_policy.py b/onedal/common/_policy.py index d5991606a6..a45d12e8c6 100644 --- a/onedal/common/_policy.py +++ b/onedal/common/_policy.py @@ -20,7 +20,7 @@ oneapi_is_available = "daal4py.oneapi" in sys.modules if oneapi_is_available: - from daal4py.oneapi import _get_sycl_ctxt, sycl_execution_context + from daal4py.oneapi import _get_sycl_ctxt def _get_policy(queue, *data): @@ -69,10 +69,9 @@ class _DataParallelInteropPolicy(_backend.data_parallel_policy): def __init__(self, queue): self._queue = queue self._d4p_interop = _Daal4PyContextReset() - if "sklearnex" in sys.modules: - from sklearnex._device_offload import DummySyclQueue + from daal4py.sklearn._device_offload import DummySyclQueue - if isinstance(queue, DummySyclQueue): - super().__init__(self._queue.sycl_device.get_filter_string()) - return + if isinstance(queue, DummySyclQueue): + super().__init__(self._queue.sycl_device.get_filter_string()) + return super().__init__(self._queue) diff --git a/sklearnex/_device_offload.py b/sklearnex/_device_offload.py index 7f409f521e..4c0b2a1420 100644 --- a/sklearnex/_device_offload.py +++ b/sklearnex/_device_offload.py @@ -14,16 +14,17 @@ # limitations under the License. # ============================================================================== -import logging import sys from collections.abc import Iterable from functools import wraps -import numpy as np +from daal4py.sklearn._device_offload import ( + _copy_to_usm, + _get_global_queue, + _transfer_to_host, +) try: - from dpctl import SyclQueue - from dpctl.memory import MemoryUSMDevice, as_usm_memory from dpctl.tensor import usm_ndarray dpctl_available = True @@ -38,119 +39,18 @@ dpnp_available = False from ._config import get_config -from ._utils import get_patch_message oneapi_is_available = "daal4py.oneapi" in sys.modules if oneapi_is_available: from daal4py.oneapi import _get_device_name_sycl_ctxt, _get_sycl_ctxt_params -class DummySyclQueue: - """This class is designed to act like dpctl.SyclQueue - to allow device dispatching in scenarios when dpctl is not available""" - - class DummySyclDevice: - def __init__(self, filter_string): - self._filter_string = filter_string - self.is_cpu = "cpu" in filter_string - self.is_gpu = "gpu" in filter_string - # TODO: check for possibility of fp64 support - # on other devices in this dummy class - self.has_aspect_fp64 = self.is_cpu - - if not (self.is_cpu): - logging.warning( - "Device support is limited. " - "Please install dpctl for full experience" - ) - - def get_filter_string(self): - return self._filter_string - - def __init__(self, filter_string): - self.sycl_device = self.DummySyclDevice(filter_string) - - def _get_device_info_from_daal4py(): if oneapi_is_available: return _get_device_name_sycl_ctxt(), _get_sycl_ctxt_params() return None, dict() -def _get_global_queue(): - target = get_config()["target_offload"] - d4p_target, _ = _get_device_info_from_daal4py() - if d4p_target == "host": - d4p_target = "cpu" - - QueueClass = DummySyclQueue if not dpctl_available else SyclQueue - - if target != "auto": - if d4p_target is not None and d4p_target != target: - if not isinstance(target, str): - if d4p_target not in target.sycl_device.get_filter_string(): - raise RuntimeError( - "Cannot use target offload option " - "inside daal4py.oneapi.sycl_context" - ) - else: - raise RuntimeError( - "Cannot use target offload option " - "inside daal4py.oneapi.sycl_context" - ) - if isinstance(target, QueueClass): - return target - return QueueClass(target) - if d4p_target is not None: - return QueueClass(d4p_target) - return None - - -def _transfer_to_host(queue, *data): - has_usm_data, has_host_data = False, False - - host_data = [] - for item in data: - usm_iface = getattr(item, "__sycl_usm_array_interface__", None) - if usm_iface is not None: - if not dpctl_available: - raise RuntimeError( - "dpctl need to be installed to work " - "with __sycl_usm_array_interface__" - ) - if queue is not None: - if queue.sycl_device != usm_iface["syclobj"].sycl_device: - raise RuntimeError( - "Input data shall be located " "on single target device" - ) - else: - queue = usm_iface["syclobj"] - - buffer = as_usm_memory(item).copy_to_host() - order = "C" - if usm_iface["strides"] is not None: - if usm_iface["strides"][0] < usm_iface["strides"][1]: - order = "F" - item = np.ndarray( - shape=usm_iface["shape"], - dtype=usm_iface["typestr"], - buffer=buffer, - order=order, - ) - has_usm_data = True - else: - has_host_data = True - - mismatch_host_item = usm_iface is None and item is not None and has_usm_data - mismatch_usm_item = usm_iface is not None and has_host_data - - if mismatch_host_item or mismatch_usm_item: - raise RuntimeError("Input data shall be located on single target device") - - host_data.append(item) - return queue, host_data - - def _get_backend(obj, queue, method_name, *data): cpu_device = queue is None or queue.sycl_device.is_cpu gpu_device = queue is not None and queue.sycl_device.is_gpu @@ -185,7 +85,7 @@ def _get_backend(obj, queue, method_name, *data): def dispatch(obj, method_name, branches, *args, **kwargs): - q = _get_global_queue() + q = _get_global_queue(get_config()["target_offload"]) q, hostargs = _transfer_to_host(q, *args) q, hostvalues = _transfer_to_host(q, *kwargs.values()) hostkwargs = dict(zip(kwargs.keys(), hostvalues)) @@ -203,31 +103,6 @@ def dispatch(obj, method_name, branches, *args, **kwargs): ) -def _copy_to_usm(queue, array): - if not dpctl_available: - raise RuntimeError( - "dpctl need to be installed to work " "with __sycl_usm_array_interface__" - ) - - if hasattr(array, "__array__"): - - try: - mem = MemoryUSMDevice(array.nbytes, queue=queue) - mem.copy_from_host(array.tobytes()) - return usm_ndarray(array.shape, array.dtype, buffer=mem) - except ValueError as e: - # ValueError will raise if device does not support the dtype - # retry with float32 (needed for fp16 and fp64 support issues) - # try again as float32, if it is a float32 just raise the error. - if array.dtype == np.float32: - raise e - return _copy_to_usm(queue, array.astype(np.float32)) - else: - if isinstance(array, Iterable): - array = [_copy_to_usm(queue, i) for i in array] - return array - - if dpnp_available: def _convert_to_dpnp(array): From 9052a731c9b16a07069531cf0853c3dc60f1a4af Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Wed, 15 May 2024 03:20:07 -0700 Subject: [PATCH 02/82] removed onedal4py sklearnex dependence --- daal4py/sklearn/_device_offload.py | 4 ++- onedal/_device_offload.py | 42 ++++++++++-------------------- 2 files changed, 17 insertions(+), 29 deletions(-) diff --git a/daal4py/sklearn/_device_offload.py b/daal4py/sklearn/_device_offload.py index e6514fc875..dbc0d172a6 100644 --- a/daal4py/sklearn/_device_offload.py +++ b/daal4py/sklearn/_device_offload.py @@ -19,7 +19,9 @@ import numpy as np -from daal4py.oneapi import _get_device_name_sycl_ctxt, _get_sycl_ctxt_params +oneapi_is_available = "daal4py.oneapi" in sys.modules +if oneapi_is_available: + from daal4py.oneapi import _get_device_name_sycl_ctxt, _get_sycl_ctxt_params try: from dpctl import SyclQueue diff --git a/onedal/_device_offload.py b/onedal/_device_offload.py index fcb9927b4e..f70415ade7 100644 --- a/onedal/_device_offload.py +++ b/onedal/_device_offload.py @@ -23,19 +23,11 @@ except ImportError: dpnp_available = False -try: - from sklearnex._device_offload import ( - _copy_to_usm, - _get_global_queue, - _transfer_to_host, - ) - - _sklearnex_available = True -except ImportError: - import logging - - logging.warning("Device support requires " "Intel(R) Extension for Scikit-learn*.") - _sklearnex_available = False +from daal4py.sklearn._device_offload import ( + _copy_to_usm, + _get_global_queue, + _transfer_to_host, +) def _get_host_inputs(*args, **kwargs): @@ -62,21 +54,15 @@ def _run_on_device(func, obj=None, *args, **kwargs): def support_usm_ndarray(freefunc=False): def decorator(func): def wrapper_impl(obj, *args, **kwargs): - if _sklearnex_available: - usm_iface = _extract_usm_iface(*args, **kwargs) - data_queue, hostargs, hostkwargs = _get_host_inputs(*args, **kwargs) - hostkwargs["queue"] = data_queue - result = _run_on_device(func, obj, *hostargs, **hostkwargs) - if usm_iface is not None and hasattr(result, "__array_interface__"): - result = _copy_to_usm(data_queue, result) - if ( - dpnp_available - and len(args) > 0 - and isinstance(args[0], dpnp.ndarray) - ): - result = dpnp.array(result, copy=False) - return result - return _run_on_device(func, obj, *args, **kwargs) + usm_iface = _extract_usm_iface(*args, **kwargs) + data_queue, hostargs, hostkwargs = _get_host_inputs(*args, **kwargs) + hostkwargs["queue"] = data_queue + result = _run_on_device(func, obj, *hostargs, **hostkwargs) + if usm_iface is not None and hasattr(result, "__array_interface__"): + result = _copy_to_usm(data_queue, result) + if dpnp_available and len(args) > 0 and isinstance(args[0], dpnp.ndarray): + result = dpnp.array(result, copy=False) + return result if freefunc: From b070c8908fcdf8bb944ab626809b13f5c1a7a913 Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Wed, 15 May 2024 05:24:17 -0700 Subject: [PATCH 03/82] minor fix --- daal4py/sklearn/_device_offload.py | 1 + 1 file changed, 1 insertion(+) diff --git a/daal4py/sklearn/_device_offload.py b/daal4py/sklearn/_device_offload.py index dbc0d172a6..7a66c20bcb 100644 --- a/daal4py/sklearn/_device_offload.py +++ b/daal4py/sklearn/_device_offload.py @@ -14,6 +14,7 @@ # limitations under the License. # ============================================================================== +import sys from collections.abc import Iterable from functools import wraps From 01d73da349645a43b26f0217e0a8d79ddcca440b Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Sat, 8 Jun 2024 01:17:13 -0700 Subject: [PATCH 04/82] minor update --- daal4py/sklearn/_device_offload.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/daal4py/sklearn/_device_offload.py b/daal4py/sklearn/_device_offload.py index 7a66c20bcb..92cce042f9 100644 --- a/daal4py/sklearn/_device_offload.py +++ b/daal4py/sklearn/_device_offload.py @@ -20,9 +20,7 @@ import numpy as np -oneapi_is_available = "daal4py.oneapi" in sys.modules -if oneapi_is_available: - from daal4py.oneapi import _get_device_name_sycl_ctxt, _get_sycl_ctxt_params +from daal4py.oneapi import _get_device_name_sycl_ctxt, _get_sycl_ctxt_params try: from dpctl import SyclQueue From b5f89212555d0eb0466c5cfe57b66792913c9617 Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Sat, 8 Jun 2024 15:38:25 -0700 Subject: [PATCH 05/82] added daal4py.sklearn._config for exposing sklearnex settings --- daal4py/sklearn/__init__.py | 3 + daal4py/sklearn/_config.py | 72 ++++++++++++++++ daal4py/sklearn/_device_offload.py | 132 ++++++++++++----------------- onedal/_device_offload.py | 12 +-- sklearnex/_config.py | 9 +- sklearnex/_device_offload.py | 19 +---- sklearnex/tests/test_config.py | 4 + 7 files changed, 149 insertions(+), 102 deletions(-) create mode 100644 daal4py/sklearn/_config.py diff --git a/daal4py/sklearn/__init__.py b/daal4py/sklearn/__init__.py index 6bced0b457..bcb0e2e409 100755 --- a/daal4py/sklearn/__init__.py +++ b/daal4py/sklearn/__init__.py @@ -14,6 +14,7 @@ # limitations under the License. # ============================================================================== +from ._config import _get_config, _set_config from .monkeypatch.dispatcher import _get_map_of_algorithms as sklearn_patch_map from .monkeypatch.dispatcher import _patch_names as sklearn_patch_names from .monkeypatch.dispatcher import disable as unpatch_sklearn @@ -21,6 +22,8 @@ from .monkeypatch.dispatcher import patch_is_enabled as sklearn_is_patched __all__ = [ + "_get_config", + "_set_config", "cluster", "decomposition", "ensemble", diff --git a/daal4py/sklearn/_config.py b/daal4py/sklearn/_config.py new file mode 100644 index 0000000000..133d0723d5 --- /dev/null +++ b/daal4py/sklearn/_config.py @@ -0,0 +1,72 @@ +# ============================================================================== +# Copyright 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Tools to expose sklearnex's config settings to daal4py level.""" + +import threading + +_default_global_config = { + "target_offload": "auto", + "allow_fallback_to_host": False, +} + +_threadlocal = threading.local() + + +def _get_daal4py_threadlocal_config(): + if not hasattr(_threadlocal, "d4p_global_config"): + _threadlocal.d4p_global_config = _default_global_config.copy() + return _threadlocal.d4p_global_config + + +def _get_config(): + """Retrieve current values for configuration set by :func:`set_config` + Returns + ------- + config : dict + Keys are parameter names that can be passed to :func:`set_config`. + See Also + -------- + _set_config : Set global configuration. + """ + daal4py_config = _get_daal4py_threadlocal_config().copy() + return {**daal4py_config} + + +def _set_config(target_offload=None, allow_fallback_to_host=None): + """Set global configuration + Parameters + ---------- + target_offload : string or dpctl.SyclQueue, default=None + The device primarily used to perform computations. + If string, expected to be "auto" (the execution context + is deduced from input data location), + or SYCL* filter selector string. Global default: "auto". + allow_fallback_to_host : bool, default=None + If True, allows to fallback computation to host device + in case particular estimator does not support the selected one. + Global default: False. + See Also + -------- + _get_config : Retrieve current values of the global configuration. + """ + + local_config = _get_daal4py_threadlocal_config() + + if target_offload is not None: + local_config["target_offload"] = target_offload + if allow_fallback_to_host is not None: + local_config["allow_fallback_to_host"] = allow_fallback_to_host diff --git a/daal4py/sklearn/_device_offload.py b/daal4py/sklearn/_device_offload.py index 92cce042f9..6bdf576f1b 100644 --- a/daal4py/sklearn/_device_offload.py +++ b/daal4py/sklearn/_device_offload.py @@ -14,13 +14,14 @@ # limitations under the License. # ============================================================================== +import logging import sys from collections.abc import Iterable from functools import wraps import numpy as np -from daal4py.oneapi import _get_device_name_sycl_ctxt, _get_sycl_ctxt_params +from ._config import _get_config try: from dpctl import SyclQueue @@ -31,34 +32,42 @@ except ImportError: dpctl_available = False -try: - import dpnp - dpnp_available = True -except ImportError: - dpnp_available = False +oneapi_is_available = "daal4py.oneapi" in sys.modules +if oneapi_is_available: + from daal4py.oneapi import _get_device_name_sycl_ctxt, _get_sycl_ctxt_params -# TODO: -# remove or update -try: - # from sklearnex._config import get_config - # from sklearnex._device_offload import ( - # #_copy_to_usm, - # # _get_global_queue, - # # _transfer_to_host, - # ) - - _sklearnex_available = True -except ImportError: - import logging +def _get_device_info(): + if oneapi_is_available: + return _get_device_name_sycl_ctxt(), _get_sycl_ctxt_params() + return None, dict() - logging.warning( - "Device support is limited in daal4py patching. " - "Use Intel(R) Extension for Scikit-learn* " - "for full experience." - ) - _sklearnex_available = False + +class DummySyclQueue: + """This class is designed to act like dpctl.SyclQueue + to allow device dispatching in scenarios when dpctl is not available""" + + class DummySyclDevice: + def __init__(self, filter_string): + self._filter_string = filter_string + self.is_cpu = "cpu" in filter_string + self.is_gpu = "gpu" in filter_string + # TODO: check for possibility of fp64 support + # on other devices in this dummy class + self.has_aspect_fp64 = self.is_cpu + + if not (self.is_cpu): + logging.warning( + "Device support is limited. " + "Please install dpctl for full experience" + ) + + def get_filter_string(self): + return self._filter_string + + def __init__(self, filter_string): + self.sycl_device = self.DummySyclDevice(filter_string) def _copy_to_usm(queue, array): @@ -68,6 +77,7 @@ def _copy_to_usm(queue, array): ) if hasattr(array, "__array__"): + try: mem = MemoryUSMDevice(array.nbytes, queue=queue) mem.copy_from_host(array.tobytes()) @@ -130,43 +140,18 @@ def _transfer_to_host(queue, *data): return queue, host_data -class DummySyclQueue: - """This class is designed to act like dpctl.SyclQueue - to allow device dispatching in scenarios when dpctl is not available""" - - class DummySyclDevice: - def __init__(self, filter_string): - self._filter_string = filter_string - self.is_cpu = "cpu" in filter_string - self.is_gpu = "gpu" in filter_string - # TODO: check for possibility of fp64 support - # on other devices in this dummy class - self.has_aspect_fp64 = self.is_cpu - - if not (self.is_cpu): - logging.warning( - "Device support is limited. " - "Please install dpctl for full experience" - ) - - def get_filter_string(self): - return self._filter_string - - def __init__(self, filter_string): - self.sycl_device = self.DummySyclDevice(filter_string) - - -def _get_global_queue(target_offload=None): - d4p_target, _ = _get_device_name_sycl_ctxt(), _get_sycl_ctxt_params() +def _get_global_queue(): + target = _get_config()["target_offload"] + d4p_target, _ = _get_device_info() if d4p_target == "host": d4p_target = "cpu" QueueClass = DummySyclQueue if not dpctl_available else SyclQueue - if target_offload and target_offload != "auto": - if d4p_target is not None and d4p_target != target_offload: - if not isinstance(target_offload, str): - if d4p_target not in target_offload.sycl_device.get_filter_string(): + if target != "auto": + if d4p_target is not None and d4p_target != target: + if not isinstance(target, str): + if d4p_target not in target.sycl_device.get_filter_string(): raise RuntimeError( "Cannot use target offload option " "inside daal4py.oneapi.sycl_context" @@ -176,9 +161,9 @@ def _get_global_queue(target_offload=None): "Cannot use target offload option " "inside daal4py.oneapi.sycl_context" ) - if isinstance(target_offload, QueueClass): - return target_offload - return QueueClass(target_offload) + if isinstance(target, QueueClass): + return target + return QueueClass(target) if d4p_target is not None: return QueueClass(d4p_target) return None @@ -199,18 +184,17 @@ def _extract_usm_iface(*args, **kwargs): return getattr(allargs[0], "__sycl_usm_array_interface__", None) -def _run_on_device(func, queue, obj=None, host_offload=False, *args, **kwargs): +def _run_on_device(func, queue, obj=None, *args, **kwargs): def dispatch_by_obj(obj, func, *args, **kwargs): if obj is not None: return func(obj, *args, **kwargs) return func(*args, **kwargs) - if queue is not None: + if queue is not None and oneapi_is_available: from daal4py.oneapi import _get_in_sycl_ctxt, sycl_context if _get_in_sycl_ctxt() is False: - # TODO: - # host_offload = get_config()["allow_fallback_to_host"] + host_offload = _get_config()["allow_fallback_to_host"] with sycl_context( "gpu" if queue.sycl_device.is_gpu else "cpu", @@ -220,21 +204,15 @@ def dispatch_by_obj(obj, func, *args, **kwargs): return dispatch_by_obj(obj, func, *args, **kwargs) -# TODO: -# add daal4py.sklearn.get_config -def support_usm_ndarray(freefunc=False, host_offload=False): +def support_usm_ndarray(freefunc=False): def decorator(func): def wrapper_impl(obj, *args, **kwargs): - if _sklearnex_available: - usm_iface = _extract_usm_iface(*args, **kwargs) - q, hostargs, hostkwargs = _get_host_inputs(*args, **kwargs) - result = _run_on_device( - func, q, obj, host_offload, *hostargs, **hostkwargs - ) - if usm_iface is not None and hasattr(result, "__array_interface__"): - return _copy_to_usm(q, result) - return result - return _run_on_device(func, None, obj, host_offload, *args, **kwargs) + usm_iface = _extract_usm_iface(*args, **kwargs) + q, hostargs, hostkwargs = _get_host_inputs(*args, **kwargs) + result = _run_on_device(func, q, obj, *hostargs, **hostkwargs) + if usm_iface is not None and hasattr(result, "__array_interface__"): + return _copy_to_usm(q, result) + return result if freefunc: diff --git a/onedal/_device_offload.py b/onedal/_device_offload.py index f70415ade7..677178e7e1 100644 --- a/onedal/_device_offload.py +++ b/onedal/_device_offload.py @@ -16,6 +16,12 @@ from functools import wraps +from daal4py.sklearn._device_offload import ( + _copy_to_usm, + _get_global_queue, + _transfer_to_host, +) + try: import dpnp @@ -23,12 +29,6 @@ except ImportError: dpnp_available = False -from daal4py.sklearn._device_offload import ( - _copy_to_usm, - _get_global_queue, - _transfer_to_host, -) - def _get_host_inputs(*args, **kwargs): q = _get_global_queue() diff --git a/sklearnex/_config.py b/sklearnex/_config.py index cf65a66b18..f6b151406f 100644 --- a/sklearnex/_config.py +++ b/sklearnex/_config.py @@ -20,10 +20,8 @@ from sklearn import get_config as skl_get_config from sklearn import set_config as skl_set_config -_default_global_config = { - "target_offload": "auto", - "allow_fallback_to_host": False, -} +from daal4py.sklearn import _set_config as _d4py_set_config +from daal4py.sklearn._config import _default_global_config _threadlocal = threading.local() @@ -69,6 +67,9 @@ def set_config(target_offload=None, allow_fallback_to_host=None, **sklearn_confi get_config : Retrieve current values of the global configuration. """ skl_set_config(**sklearn_configs) + _d4py_set_config( + target_offload=target_offload, allow_fallback_to_host=allow_fallback_to_host + ) local_config = _get_sklearnex_threadlocal_config() diff --git a/sklearnex/_device_offload.py b/sklearnex/_device_offload.py index 4c0b2a1420..fc77c86d96 100644 --- a/sklearnex/_device_offload.py +++ b/sklearnex/_device_offload.py @@ -14,15 +14,14 @@ # limitations under the License. # ============================================================================== -import sys from collections.abc import Iterable from functools import wraps +from daal4py.sklearn._device_offload import _copy_to_usm from daal4py.sklearn._device_offload import ( - _copy_to_usm, - _get_global_queue, - _transfer_to_host, + _get_device_info as _get_device_info_from_daal4py, ) +from daal4py.sklearn._device_offload import _get_global_queue, _transfer_to_host try: from dpctl.tensor import usm_ndarray @@ -40,16 +39,6 @@ from ._config import get_config -oneapi_is_available = "daal4py.oneapi" in sys.modules -if oneapi_is_available: - from daal4py.oneapi import _get_device_name_sycl_ctxt, _get_sycl_ctxt_params - - -def _get_device_info_from_daal4py(): - if oneapi_is_available: - return _get_device_name_sycl_ctxt(), _get_sycl_ctxt_params() - return None, dict() - def _get_backend(obj, queue, method_name, *data): cpu_device = queue is None or queue.sycl_device.is_cpu @@ -85,7 +74,7 @@ def _get_backend(obj, queue, method_name, *data): def dispatch(obj, method_name, branches, *args, **kwargs): - q = _get_global_queue(get_config()["target_offload"]) + q = _get_global_queue() q, hostargs = _transfer_to_host(q, *args) q, hostvalues = _transfer_to_host(q, *kwargs.values()) hostkwargs = dict(zip(kwargs.keys(), hostvalues)) diff --git a/sklearnex/tests/test_config.py b/sklearnex/tests/test_config.py index fc8fd2df3d..fe1f2b876e 100644 --- a/sklearnex/tests/test_config.py +++ b/sklearnex/tests/test_config.py @@ -16,6 +16,7 @@ import sklearn +import daal4py import sklearnex @@ -33,7 +34,10 @@ def test_set_config_works(): ) config = sklearnex.get_config() + config_d4p = daal4py.sklearn._get_config() assert config["target_offload"] == "cpu:0" assert config["allow_fallback_to_host"] assert config["assume_finite"] + assert config_d4p["target_offload"] == "cpu:0" + assert config_d4p["allow_fallback_to_host"] sklearnex.set_config(**default_config) From 8ce81ea6d805f0ae22a78fa268d7b9bdc3dd6af2 Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Tue, 11 Jun 2024 07:36:03 -0700 Subject: [PATCH 06/82] removed daal4py device_offloading updated onedal4py _device_offload module sklearnex _device_oflload depends on onedal4py only --- daal4py/sklearn/__init__.py | 3 - daal4py/sklearn/_device_offload.py | 231 ------------------------- {daal4py/sklearn => onedal}/_config.py | 38 +--- onedal/_device_offload.py | 144 ++++++++++++++- onedal/common/_policy.py | 19 +- sklearnex/_config.py | 19 +- sklearnex/_device_offload.py | 25 +-- sklearnex/tests/test_config.py | 8 +- 8 files changed, 158 insertions(+), 329 deletions(-) delete mode 100644 daal4py/sklearn/_device_offload.py rename {daal4py/sklearn => onedal}/_config.py (50%) diff --git a/daal4py/sklearn/__init__.py b/daal4py/sklearn/__init__.py index bcb0e2e409..6bced0b457 100755 --- a/daal4py/sklearn/__init__.py +++ b/daal4py/sklearn/__init__.py @@ -14,7 +14,6 @@ # limitations under the License. # ============================================================================== -from ._config import _get_config, _set_config from .monkeypatch.dispatcher import _get_map_of_algorithms as sklearn_patch_map from .monkeypatch.dispatcher import _patch_names as sklearn_patch_names from .monkeypatch.dispatcher import disable as unpatch_sklearn @@ -22,8 +21,6 @@ from .monkeypatch.dispatcher import patch_is_enabled as sklearn_is_patched __all__ = [ - "_get_config", - "_set_config", "cluster", "decomposition", "ensemble", diff --git a/daal4py/sklearn/_device_offload.py b/daal4py/sklearn/_device_offload.py deleted file mode 100644 index 6bdf576f1b..0000000000 --- a/daal4py/sklearn/_device_offload.py +++ /dev/null @@ -1,231 +0,0 @@ -# ============================================================================== -# Copyright 2014 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -import logging -import sys -from collections.abc import Iterable -from functools import wraps - -import numpy as np - -from ._config import _get_config - -try: - from dpctl import SyclQueue - from dpctl.memory import MemoryUSMDevice, as_usm_memory - from dpctl.tensor import usm_ndarray - - dpctl_available = True -except ImportError: - dpctl_available = False - - -oneapi_is_available = "daal4py.oneapi" in sys.modules -if oneapi_is_available: - from daal4py.oneapi import _get_device_name_sycl_ctxt, _get_sycl_ctxt_params - - -def _get_device_info(): - if oneapi_is_available: - return _get_device_name_sycl_ctxt(), _get_sycl_ctxt_params() - return None, dict() - - -class DummySyclQueue: - """This class is designed to act like dpctl.SyclQueue - to allow device dispatching in scenarios when dpctl is not available""" - - class DummySyclDevice: - def __init__(self, filter_string): - self._filter_string = filter_string - self.is_cpu = "cpu" in filter_string - self.is_gpu = "gpu" in filter_string - # TODO: check for possibility of fp64 support - # on other devices in this dummy class - self.has_aspect_fp64 = self.is_cpu - - if not (self.is_cpu): - logging.warning( - "Device support is limited. " - "Please install dpctl for full experience" - ) - - def get_filter_string(self): - return self._filter_string - - def __init__(self, filter_string): - self.sycl_device = self.DummySyclDevice(filter_string) - - -def _copy_to_usm(queue, array): - if not dpctl_available: - raise RuntimeError( - "dpctl need to be installed to work " "with __sycl_usm_array_interface__" - ) - - if hasattr(array, "__array__"): - - try: - mem = MemoryUSMDevice(array.nbytes, queue=queue) - mem.copy_from_host(array.tobytes()) - return usm_ndarray(array.shape, array.dtype, buffer=mem) - except ValueError as e: - # ValueError will raise if device does not support the dtype - # retry with float32 (needed for fp16 and fp64 support issues) - # try again as float32, if it is a float32 just raise the error. - if array.dtype == np.float32: - raise e - return _copy_to_usm(queue, array.astype(np.float32)) - else: - if isinstance(array, Iterable): - array = [_copy_to_usm(queue, i) for i in array] - return array - - -def _transfer_to_host(queue, *data): - has_usm_data, has_host_data = False, False - - host_data = [] - for item in data: - usm_iface = getattr(item, "__sycl_usm_array_interface__", None) - if usm_iface is not None: - if not dpctl_available: - raise RuntimeError( - "dpctl need to be installed to work " - "with __sycl_usm_array_interface__" - ) - if queue is not None: - if queue.sycl_device != usm_iface["syclobj"].sycl_device: - raise RuntimeError( - "Input data shall be located " "on single target device" - ) - else: - queue = usm_iface["syclobj"] - - buffer = as_usm_memory(item).copy_to_host() - order = "C" - if usm_iface["strides"] is not None: - if usm_iface["strides"][0] < usm_iface["strides"][1]: - order = "F" - item = np.ndarray( - shape=usm_iface["shape"], - dtype=usm_iface["typestr"], - buffer=buffer, - order=order, - ) - has_usm_data = True - else: - has_host_data = True - - mismatch_host_item = usm_iface is None and item is not None and has_usm_data - mismatch_usm_item = usm_iface is not None and has_host_data - - if mismatch_host_item or mismatch_usm_item: - raise RuntimeError("Input data shall be located on single target device") - - host_data.append(item) - return queue, host_data - - -def _get_global_queue(): - target = _get_config()["target_offload"] - d4p_target, _ = _get_device_info() - if d4p_target == "host": - d4p_target = "cpu" - - QueueClass = DummySyclQueue if not dpctl_available else SyclQueue - - if target != "auto": - if d4p_target is not None and d4p_target != target: - if not isinstance(target, str): - if d4p_target not in target.sycl_device.get_filter_string(): - raise RuntimeError( - "Cannot use target offload option " - "inside daal4py.oneapi.sycl_context" - ) - else: - raise RuntimeError( - "Cannot use target offload option " - "inside daal4py.oneapi.sycl_context" - ) - if isinstance(target, QueueClass): - return target - return QueueClass(target) - if d4p_target is not None: - return QueueClass(d4p_target) - return None - - -def _get_host_inputs(*args, **kwargs): - q = _get_global_queue() - q, hostargs = _transfer_to_host(q, *args) - q, hostvalues = _transfer_to_host(q, *kwargs.values()) - hostkwargs = dict(zip(kwargs.keys(), hostvalues)) - return q, hostargs, hostkwargs - - -def _extract_usm_iface(*args, **kwargs): - allargs = (*args, *kwargs.values()) - if len(allargs) == 0: - return None - return getattr(allargs[0], "__sycl_usm_array_interface__", None) - - -def _run_on_device(func, queue, obj=None, *args, **kwargs): - def dispatch_by_obj(obj, func, *args, **kwargs): - if obj is not None: - return func(obj, *args, **kwargs) - return func(*args, **kwargs) - - if queue is not None and oneapi_is_available: - from daal4py.oneapi import _get_in_sycl_ctxt, sycl_context - - if _get_in_sycl_ctxt() is False: - host_offload = _get_config()["allow_fallback_to_host"] - - with sycl_context( - "gpu" if queue.sycl_device.is_gpu else "cpu", - host_offload_on_fail=host_offload, - ): - return dispatch_by_obj(obj, func, *args, **kwargs) - return dispatch_by_obj(obj, func, *args, **kwargs) - - -def support_usm_ndarray(freefunc=False): - def decorator(func): - def wrapper_impl(obj, *args, **kwargs): - usm_iface = _extract_usm_iface(*args, **kwargs) - q, hostargs, hostkwargs = _get_host_inputs(*args, **kwargs) - result = _run_on_device(func, q, obj, *hostargs, **hostkwargs) - if usm_iface is not None and hasattr(result, "__array_interface__"): - return _copy_to_usm(q, result) - return result - - if freefunc: - - @wraps(func) - def wrapper_free(*args, **kwargs): - return wrapper_impl(None, *args, **kwargs) - - return wrapper_free - - @wraps(func) - def wrapper_with_self(self, *args, **kwargs): - return wrapper_impl(self, *args, **kwargs) - - return wrapper_with_self - - return decorator diff --git a/daal4py/sklearn/_config.py b/onedal/_config.py similarity index 50% rename from daal4py/sklearn/_config.py rename to onedal/_config.py index 133d0723d5..19c7b9e16b 100644 --- a/daal4py/sklearn/_config.py +++ b/onedal/_config.py @@ -14,7 +14,7 @@ # limitations under the License. # ============================================================================== -"""Tools to expose sklearnex's config settings to daal4py level.""" +"""Tools to expose some sklearnex's config settings to onedal4py level.""" import threading @@ -26,12 +26,14 @@ _threadlocal = threading.local() -def _get_daal4py_threadlocal_config(): - if not hasattr(_threadlocal, "d4p_global_config"): +def _get_onedal_threadlocal_config(): + if not hasattr(_threadlocal, "global_config"): _threadlocal.d4p_global_config = _default_global_config.copy() return _threadlocal.d4p_global_config +# TODO: +# docstrings def _get_config(): """Retrieve current values for configuration set by :func:`set_config` Returns @@ -42,31 +44,5 @@ def _get_config(): -------- _set_config : Set global configuration. """ - daal4py_config = _get_daal4py_threadlocal_config().copy() - return {**daal4py_config} - - -def _set_config(target_offload=None, allow_fallback_to_host=None): - """Set global configuration - Parameters - ---------- - target_offload : string or dpctl.SyclQueue, default=None - The device primarily used to perform computations. - If string, expected to be "auto" (the execution context - is deduced from input data location), - or SYCL* filter selector string. Global default: "auto". - allow_fallback_to_host : bool, default=None - If True, allows to fallback computation to host device - in case particular estimator does not support the selected one. - Global default: False. - See Also - -------- - _get_config : Retrieve current values of the global configuration. - """ - - local_config = _get_daal4py_threadlocal_config() - - if target_offload is not None: - local_config["target_offload"] = target_offload - if allow_fallback_to_host is not None: - local_config["allow_fallback_to_host"] = allow_fallback_to_host + onedal_config = _get_onedal_threadlocal_config().copy() + return {**onedal_config} diff --git a/onedal/_device_offload.py b/onedal/_device_offload.py index 677178e7e1..d553e8d005 100644 --- a/onedal/_device_offload.py +++ b/onedal/_device_offload.py @@ -14,13 +14,22 @@ # limitations under the License. # ============================================================================== +import logging +from collections.abc import Iterable from functools import wraps -from daal4py.sklearn._device_offload import ( - _copy_to_usm, - _get_global_queue, - _transfer_to_host, -) +import numpy as np + +from ._config import _get_config + +try: + from dpctl import SyclQueue + from dpctl.memory import MemoryUSMDevice, as_usm_memory + from dpctl.tensor import usm_ndarray + + dpctl_available = True +except ImportError: + dpctl_available = False try: import dpnp @@ -30,6 +39,114 @@ dpnp_available = False +class DummySyclQueue: + """This class is designed to act like dpctl.SyclQueue + to allow device dispatching in scenarios when dpctl is not available""" + + class DummySyclDevice: + def __init__(self, filter_string): + self._filter_string = filter_string + self.is_cpu = "cpu" in filter_string + self.is_gpu = "gpu" in filter_string + # TODO: check for possibility of fp64 support + # on other devices in this dummy class + self.has_aspect_fp64 = self.is_cpu + + if not (self.is_cpu): + logging.warning( + "Device support is limited. " + "Please install dpctl for full experience" + ) + + def get_filter_string(self): + return self._filter_string + + def __init__(self, filter_string): + self.sycl_device = self.DummySyclDevice(filter_string) + + +def _copy_to_usm(queue, array): + if not dpctl_available: + raise RuntimeError( + "dpctl need to be installed to work " "with __sycl_usm_array_interface__" + ) + + if hasattr(array, "__array__"): + + try: + mem = MemoryUSMDevice(array.nbytes, queue=queue) + mem.copy_from_host(array.tobytes()) + return usm_ndarray(array.shape, array.dtype, buffer=mem) + except ValueError as e: + # ValueError will raise if device does not support the dtype + # retry with float32 (needed for fp16 and fp64 support issues) + # try again as float32, if it is a float32 just raise the error. + if array.dtype == np.float32: + raise e + return _copy_to_usm(queue, array.astype(np.float32)) + else: + if isinstance(array, Iterable): + array = [_copy_to_usm(queue, i) for i in array] + return array + + +def _transfer_to_host(queue, *data): + has_usm_data, has_host_data = False, False + + host_data = [] + for item in data: + usm_iface = getattr(item, "__sycl_usm_array_interface__", None) + if usm_iface is not None: + if not dpctl_available: + raise RuntimeError( + "dpctl need to be installed to work " + "with __sycl_usm_array_interface__" + ) + if queue is not None: + if queue.sycl_device != usm_iface["syclobj"].sycl_device: + raise RuntimeError( + "Input data shall be located " "on single target device" + ) + else: + queue = usm_iface["syclobj"] + + buffer = as_usm_memory(item).copy_to_host() + order = "C" + if usm_iface["strides"] is not None: + if usm_iface["strides"][0] < usm_iface["strides"][1]: + order = "F" + item = np.ndarray( + shape=usm_iface["shape"], + dtype=usm_iface["typestr"], + buffer=buffer, + order=order, + ) + has_usm_data = True + else: + has_host_data = True + + mismatch_host_item = usm_iface is None and item is not None and has_usm_data + mismatch_usm_item = usm_iface is not None and has_host_data + + if mismatch_host_item or mismatch_usm_item: + raise RuntimeError("Input data shall be located on single target device") + + host_data.append(item) + return queue, host_data + + +def _get_global_queue(): + target = _get_config()["target_offload"] + + QueueClass = DummySyclQueue if not dpctl_available else SyclQueue + + if target != "auto": + if isinstance(target, QueueClass): + return target + return QueueClass(target) + return None + + def _get_host_inputs(*args, **kwargs): q = _get_global_queue() q, hostargs = _transfer_to_host(q, *args) @@ -51,17 +168,30 @@ def _run_on_device(func, obj=None, *args, **kwargs): return func(*args, **kwargs) +if dpnp_available: + # TODO: + # will be moved to _arrary_api module + def _convert_to_dpnp(array): + if isinstance(array, usm_ndarray): + return dpnp.array(array, copy=False) + elif isinstance(array, Iterable): + for i in range(len(array)): + array[i] = _convert_to_dpnp(array[i]) + return array + + def support_usm_ndarray(freefunc=False): def decorator(func): def wrapper_impl(obj, *args, **kwargs): usm_iface = _extract_usm_iface(*args, **kwargs) data_queue, hostargs, hostkwargs = _get_host_inputs(*args, **kwargs) - hostkwargs["queue"] = data_queue + if "queue" in hostkwargs: + hostkwargs["queue"] = data_queue result = _run_on_device(func, obj, *hostargs, **hostkwargs) if usm_iface is not None and hasattr(result, "__array_interface__"): result = _copy_to_usm(data_queue, result) if dpnp_available and len(args) > 0 and isinstance(args[0], dpnp.ndarray): - result = dpnp.array(result, copy=False) + result = _convert_to_dpnp(result) return result if freefunc: diff --git a/onedal/common/_policy.py b/onedal/common/_policy.py index a45d12e8c6..9928bbbaf5 100644 --- a/onedal/common/_policy.py +++ b/onedal/common/_policy.py @@ -18,10 +18,6 @@ from onedal import _backend, _is_dpc_backend -oneapi_is_available = "daal4py.oneapi" in sys.modules -if oneapi_is_available: - from daal4py.oneapi import _get_sycl_ctxt - def _get_policy(queue, *data): data_queue = _get_queue(*data) @@ -46,21 +42,9 @@ def _get_queue(*data): return None -class _Daal4PyContextReset: - def __init__(self): - self._d4p_context = None - if oneapi_is_available: - self._d4p_context = _get_sycl_ctxt() - - def __del__(self): - if self._d4p_context: - self._d4p_context.apply() - - class _HostInteropPolicy(_backend.host_policy): def __init__(self): super().__init__() - self._d4p_interop = _Daal4PyContextReset() if _is_dpc_backend: @@ -68,8 +52,7 @@ def __init__(self): class _DataParallelInteropPolicy(_backend.data_parallel_policy): def __init__(self, queue): self._queue = queue - self._d4p_interop = _Daal4PyContextReset() - from daal4py.sklearn._device_offload import DummySyclQueue + from onedal._device_offload import DummySyclQueue if isinstance(queue, DummySyclQueue): super().__init__(self._queue.sycl_device.get_filter_string()) diff --git a/sklearnex/_config.py b/sklearnex/_config.py index f6b151406f..10b3e4b7b5 100644 --- a/sklearnex/_config.py +++ b/sklearnex/_config.py @@ -14,22 +14,12 @@ # limitations under the License. # ============================================================================== -import threading from contextlib import contextmanager from sklearn import get_config as skl_get_config from sklearn import set_config as skl_set_config -from daal4py.sklearn import _set_config as _d4py_set_config -from daal4py.sklearn._config import _default_global_config - -_threadlocal = threading.local() - - -def _get_sklearnex_threadlocal_config(): - if not hasattr(_threadlocal, "global_config"): - _threadlocal.global_config = _default_global_config.copy() - return _threadlocal.global_config +from onedal._config import _get_onedal_threadlocal_config def get_config(): @@ -44,7 +34,7 @@ def get_config(): set_config : Set global configuration. """ sklearn = skl_get_config() - sklearnex = _get_sklearnex_threadlocal_config().copy() + sklearnex = _get_onedal_threadlocal_config().copy() return {**sklearn, **sklearnex} @@ -67,11 +57,8 @@ def set_config(target_offload=None, allow_fallback_to_host=None, **sklearn_confi get_config : Retrieve current values of the global configuration. """ skl_set_config(**sklearn_configs) - _d4py_set_config( - target_offload=target_offload, allow_fallback_to_host=allow_fallback_to_host - ) - local_config = _get_sklearnex_threadlocal_config() + local_config = _get_onedal_threadlocal_config() if target_offload is not None: local_config["target_offload"] = target_offload diff --git a/sklearnex/_device_offload.py b/sklearnex/_device_offload.py index fc77c86d96..ce34316829 100644 --- a/sklearnex/_device_offload.py +++ b/sklearnex/_device_offload.py @@ -17,11 +17,12 @@ from collections.abc import Iterable from functools import wraps -from daal4py.sklearn._device_offload import _copy_to_usm -from daal4py.sklearn._device_offload import ( - _get_device_info as _get_device_info_from_daal4py, +from onedal._device_offload import ( + _convert_to_dpnp, + _copy_to_usm, + _get_global_queue, + _transfer_to_host, ) -from daal4py.sklearn._device_offload import _get_global_queue, _transfer_to_host try: from dpctl.tensor import usm_ndarray @@ -51,10 +52,7 @@ def _get_backend(obj, queue, method_name, *data): else: return "sklearn", None, patching_status - _, d4p_options = _get_device_info_from_daal4py() - allow_fallback_to_host = get_config()["allow_fallback_to_host"] or d4p_options.get( - "host_offload_on_fail", False - ) + allow_fallback_to_host = get_config()["allow_fallback_to_host"] if gpu_device: patching_status = obj._onedal_gpu_supported(method_name, *data) @@ -92,17 +90,6 @@ def dispatch(obj, method_name, branches, *args, **kwargs): ) -if dpnp_available: - - def _convert_to_dpnp(array): - if isinstance(array, usm_ndarray): - return dpnp.array(array, copy=False) - elif isinstance(array, Iterable): - for i in range(len(array)): - array[i] = _convert_to_dpnp(array[i]) - return array - - def wrap_output_data(func): @wraps(func) def wrapper(self, *args, **kwargs): diff --git a/sklearnex/tests/test_config.py b/sklearnex/tests/test_config.py index fe1f2b876e..f2cad35f98 100644 --- a/sklearnex/tests/test_config.py +++ b/sklearnex/tests/test_config.py @@ -16,7 +16,7 @@ import sklearn -import daal4py +import onedal import sklearnex @@ -34,10 +34,10 @@ def test_set_config_works(): ) config = sklearnex.get_config() - config_d4p = daal4py.sklearn._get_config() + onedal_config = onedal._get_config() assert config["target_offload"] == "cpu:0" assert config["allow_fallback_to_host"] assert config["assume_finite"] - assert config_d4p["target_offload"] == "cpu:0" - assert config_d4p["allow_fallback_to_host"] + assert onedal_config["target_offload"] == "cpu:0" + assert onedal_config["allow_fallback_to_host"] sklearnex.set_config(**default_config) From fc01bae27a9933cdabcb255188d1e45de65ce453 Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Tue, 11 Jun 2024 08:39:52 -0700 Subject: [PATCH 07/82] integrating changes of device offloading for sklearnex primitives/estimators with daal4py backend --- daal4py/sklearn/cluster/dbscan.py | 3 --- daal4py/sklearn/cluster/k_means.py | 7 ------ daal4py/sklearn/decomposition/_pca.py | 5 ----- daal4py/sklearn/ensemble/_forest.py | 6 ----- .../linear_model/_coordinate_descent.py | 10 --------- daal4py/sklearn/linear_model/_linear.py | 2 -- daal4py/sklearn/linear_model/_ridge.py | 5 ----- daal4py/sklearn/linear_model/logistic_path.py | 7 ------ daal4py/sklearn/manifold/_t_sne.py | 3 --- daal4py/sklearn/metrics/_pairwise.py | 2 -- daal4py/sklearn/metrics/_ranking.py | 2 -- daal4py/sklearn/model_selection/_split.py | 2 -- daal4py/sklearn/neighbors/_classification.py | 4 ---- daal4py/sklearn/neighbors/_regression.py | 3 --- daal4py/sklearn/neighbors/_unsupervised.py | 2 -- daal4py/sklearn/tree/decision_tree.py | 4 ---- sklearnex/cluster/dbscan.py | 1 - sklearnex/linear_model/coordinate_descent.py | 9 ++++++++ sklearnex/linear_model/logistic_regression.py | 22 +++++++++---------- sklearnex/linear_model/ridge.py | 7 ++++++ sklearnex/manifold/t_sne.py | 4 ++++ sklearnex/metrics/pairwise.py | 3 +++ sklearnex/metrics/ranking.py | 3 +++ sklearnex/model_selection/split.py | 3 +++ 24 files changed, 39 insertions(+), 80 deletions(-) diff --git a/daal4py/sklearn/cluster/dbscan.py b/daal4py/sklearn/cluster/dbscan.py index ebfb7f7f8a..ac312d7df4 100644 --- a/daal4py/sklearn/cluster/dbscan.py +++ b/daal4py/sklearn/cluster/dbscan.py @@ -24,7 +24,6 @@ import daal4py -from .._device_offload import support_usm_ndarray from .._n_jobs_support import control_n_jobs from .._utils import PatchingConditionsChain, getFPType, make2d, sklearn_check_version @@ -83,7 +82,6 @@ def __init__( self.p = p self.n_jobs = n_jobs - @support_usm_ndarray() def fit(self, X, y=None, sample_weight=None): if sklearn_check_version("1.2"): self._validate_params() @@ -160,7 +158,6 @@ def fit(self, X, y=None, sample_weight=None): return self return super().fit(X, y, sample_weight=sample_weight) - @support_usm_ndarray() def fit_predict(self, X, y=None, sample_weight=None): return super().fit_predict(X, y, sample_weight) diff --git a/daal4py/sklearn/cluster/k_means.py b/daal4py/sklearn/cluster/k_means.py index d95e09b024..2ce44a26e6 100755 --- a/daal4py/sklearn/cluster/k_means.py +++ b/daal4py/sklearn/cluster/k_means.py @@ -34,7 +34,6 @@ import daal4py -from .._device_offload import support_usm_ndarray from .._n_jobs_support import control_n_jobs from .._utils import PatchingConditionsChain, getFPType, sklearn_check_version @@ -575,30 +574,24 @@ def __init__( algorithm=algorithm, ) - @support_usm_ndarray() def fit(self, X, y=None, sample_weight=None): return _fit(self, X, y=y, sample_weight=sample_weight) if sklearn_check_version("1.5"): - @support_usm_ndarray() def predict(self, X): return _predict(self, X) else: - @support_usm_ndarray() def predict( self, X, sample_weight="deprecated" if sklearn_check_version("1.3") else None ): return _predict(self, X, sample_weight=sample_weight) - @support_usm_ndarray() def fit_predict(self, X, y=None, sample_weight=None): return super().fit_predict(X, y, sample_weight) - score = support_usm_ndarray()(KMeans_original.score) - fit.__doc__ = KMeans_original.fit.__doc__ predict.__doc__ = KMeans_original.predict.__doc__ fit_predict.__doc__ = KMeans_original.fit_predict.__doc__ diff --git a/daal4py/sklearn/decomposition/_pca.py b/daal4py/sklearn/decomposition/_pca.py index 0eb4d90b4c..deabba7e5c 100644 --- a/daal4py/sklearn/decomposition/_pca.py +++ b/daal4py/sklearn/decomposition/_pca.py @@ -25,7 +25,6 @@ import daal4py -from .._device_offload import support_usm_ndarray from .._n_jobs_support import control_n_jobs from .._utils import PatchingConditionsChain, getFPType, sklearn_check_version @@ -376,7 +375,6 @@ def _transform_daal4py(self, X, whiten=False, scale_eigenvalues=True, check_X=Tr if sklearn_check_version("1.3"): - @support_usm_ndarray() @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y=None): """Fit the model with X. @@ -400,7 +398,6 @@ def fit(self, X, y=None): else: - @support_usm_ndarray() def fit(self, X, y=None): """Fit the model with X. @@ -431,7 +428,6 @@ def fit(self, X, y=None): self._fit(X) return self - @support_usm_ndarray() def transform(self, X): """ Apply dimensionality reduction to X. @@ -466,7 +462,6 @@ def transform(self, X): ) return PCA_original.transform(self, X) - @support_usm_ndarray() def fit_transform(self, X, y=None): """ Fit the model with X and apply the dimensionality reduction on X. diff --git a/daal4py/sklearn/ensemble/_forest.py b/daal4py/sklearn/ensemble/_forest.py index 6e4524a9d3..9e5678581f 100755 --- a/daal4py/sklearn/ensemble/_forest.py +++ b/daal4py/sklearn/ensemble/_forest.py @@ -43,7 +43,6 @@ sklearn_check_version, ) -from .._device_offload import support_usm_ndarray from .._n_jobs_support import control_n_jobs from ..utils.validation import _daal_num_features @@ -400,7 +399,6 @@ def __init__( self.minBinSize = minBinSize self.binningStrategy = binningStrategy - @support_usm_ndarray() def fit(self, X, y, sample_weight=None): """ Build a forest of trees from the training set (X, y). @@ -530,7 +528,6 @@ def fit(self, X, y, sample_weight=None): return self return super().fit(X, y, sample_weight=sample_weight) - @support_usm_ndarray() def predict(self, X): """ Predict class for X. @@ -582,7 +579,6 @@ def predict(self, X): ) return self._daal_predict_classifier(X) - @support_usm_ndarray() def predict_proba(self, X): """ Predict class probabilities for X. @@ -1037,7 +1033,6 @@ def __init__( self.minBinSize = minBinSize self.binningStrategy = binningStrategy - @support_usm_ndarray() def fit(self, X, y, sample_weight=None): """ Build a forest of trees from the training set (X, y). @@ -1172,7 +1167,6 @@ def fit(self, X, y, sample_weight=None): return self return super().fit(X, y, sample_weight=sample_weight) - @support_usm_ndarray() def predict(self, X): """ Predict class for X. diff --git a/daal4py/sklearn/linear_model/_coordinate_descent.py b/daal4py/sklearn/linear_model/_coordinate_descent.py index a35baade57..4a2eceb168 100755 --- a/daal4py/sklearn/linear_model/_coordinate_descent.py +++ b/daal4py/sklearn/linear_model/_coordinate_descent.py @@ -46,8 +46,6 @@ from sklearn.exceptions import ConvergenceWarning from sklearn.preprocessing import normalize -from .._device_offload import support_usm_ndarray - def _daal4py_check(self, X, y, check_input): _fptype = getFPType(X) @@ -686,11 +684,9 @@ def __init__( selection=selection, ) - @support_usm_ndarray() def fit(self, X, y, sample_weight=None, check_input=True): return _fit(self, X, y, sample_weight=sample_weight, check_input=check_input) - @support_usm_ndarray() def predict(self, X): if sklearn_check_version("1.0"): self._check_feature_names(X, reset=False) @@ -734,8 +730,6 @@ def dual_gap_(self, value): def dual_gap_(self): self._gap = None - score = support_usm_ndarray()(ElasticNet_original.score) - fit.__doc__ = ElasticNet_original.fit.__doc__ predict.__doc__ = ElasticNet_original.predict.__doc__ score.__doc__ = ElasticNet_original.score.__doc__ @@ -806,11 +800,9 @@ def __init__( selection=selection, ) - @support_usm_ndarray() def fit(self, X, y, sample_weight=None, check_input=True): return _fit(self, X, y, sample_weight, check_input) - @support_usm_ndarray() def predict(self, X): if sklearn_check_version("1.0"): self._check_feature_names(X, reset=False) @@ -851,8 +843,6 @@ def dual_gap_(self, value): def dual_gap_(self): self._gap = None - score = support_usm_ndarray()(Lasso_original.score) - fit.__doc__ = Lasso_original.fit.__doc__ predict.__doc__ = Lasso_original.predict.__doc__ score.__doc__ = Lasso_original.score.__doc__ diff --git a/daal4py/sklearn/linear_model/_linear.py b/daal4py/sklearn/linear_model/_linear.py index acf949d815..f5798491db 100644 --- a/daal4py/sklearn/linear_model/_linear.py +++ b/daal4py/sklearn/linear_model/_linear.py @@ -19,7 +19,6 @@ from sklearn.linear_model import LinearRegression as LinearRegression_original from sklearn.utils import check_array -from .._device_offload import support_usm_ndarray from .._utils import sklearn_check_version from ..utils.base import _daal_validate_data from ..utils.validation import _daal_check_array @@ -238,7 +237,6 @@ def __init__( positive=positive, ) - @support_usm_ndarray() def fit(self, X, y, sample_weight=None): if sklearn_check_version("1.0") and not sklearn_check_version("1.2"): self._normalize = _deprecate_normalize( diff --git a/daal4py/sklearn/linear_model/_ridge.py b/daal4py/sklearn/linear_model/_ridge.py index 7a49938013..adba4c925b 100644 --- a/daal4py/sklearn/linear_model/_ridge.py +++ b/daal4py/sklearn/linear_model/_ridge.py @@ -25,7 +25,6 @@ import daal4py -from .._device_offload import support_usm_ndarray from .._n_jobs_support import control_n_jobs from .._utils import ( PatchingConditionsChain, @@ -298,16 +297,12 @@ def __init__( self.solver = solver self.random_state = random_state - @support_usm_ndarray() def fit(self, X, y, sample_weight=None): return _fit_ridge(self, X, y, sample_weight=sample_weight) - @support_usm_ndarray() def predict(self, X): return _predict_ridge(self, X) - score = support_usm_ndarray()(Ridge_original.score) - fit.__doc__ = Ridge_original.fit.__doc__ predict.__doc__ = Ridge_original.predict.__doc__ score.__doc__ = Ridge_original.score.__doc__ diff --git a/daal4py/sklearn/linear_model/logistic_path.py b/daal4py/sklearn/linear_model/logistic_path.py index 8ab97ea9d2..519279effb 100755 --- a/daal4py/sklearn/linear_model/logistic_path.py +++ b/daal4py/sklearn/linear_model/logistic_path.py @@ -73,8 +73,6 @@ from sklearn.linear_model._logistic import _logistic_regression_path as lr_path_original from sklearn.preprocessing import LabelBinarizer, LabelEncoder -from .._device_offload import support_usm_ndarray - # Code adapted from sklearn.linear_model.logistic version 0.21 def __logistic_regression_path( @@ -880,7 +878,6 @@ def daal4py_predict(self, X, resultsToEvaluate): return LogisticRegression_original.predict_log_proba(self, X) -@support_usm_ndarray() def logistic_regression_path( X, y, @@ -997,7 +994,6 @@ def __init__( self.n_jobs = n_jobs self.l1_ratio = l1_ratio - @support_usm_ndarray() def fit(self, X, y, sample_weight=None): if sklearn_check_version("1.0"): self._check_feature_names(X, reset=True) @@ -1005,15 +1001,12 @@ def fit(self, X, y, sample_weight=None): self._validate_params() return daal4py_fit(self, X, y, sample_weight) - @support_usm_ndarray() def predict(self, X): return daal4py_predict(self, X, "computeClassLabels") - @support_usm_ndarray() def predict_log_proba(self, X): return daal4py_predict(self, X, "computeClassLogProbabilities") - @support_usm_ndarray() def predict_proba(self, X): return daal4py_predict(self, X, "computeClassProbabilities") diff --git a/daal4py/sklearn/manifold/_t_sne.py b/daal4py/sklearn/manifold/_t_sne.py index 1b06e7bdd9..614a576b3a 100755 --- a/daal4py/sklearn/manifold/_t_sne.py +++ b/daal4py/sklearn/manifold/_t_sne.py @@ -35,7 +35,6 @@ sklearn_check_version, ) -from .._device_offload import support_usm_ndarray from .._n_jobs_support import control_n_jobs from ..neighbors import NearestNeighbors @@ -47,11 +46,9 @@ class TSNE(BaseTSNE): if sklearn_check_version("1.2"): _parameter_constraints: dict = {**BaseTSNE._parameter_constraints} - @support_usm_ndarray() def fit_transform(self, X, y=None): return super().fit_transform(X, y) - @support_usm_ndarray() def fit(self, X, y=None): return super().fit(X, y) diff --git a/daal4py/sklearn/metrics/_pairwise.py b/daal4py/sklearn/metrics/_pairwise.py index 02a53458fa..432c0d60a1 100755 --- a/daal4py/sklearn/metrics/_pairwise.py +++ b/daal4py/sklearn/metrics/_pairwise.py @@ -45,7 +45,6 @@ def _precompute_metric_params(*args, **kwrds): import daal4py from daal4py.sklearn.utils.validation import _daal_check_array -from .._device_offload import support_usm_ndarray from .._utils import PatchingConditionsChain, getFPType, sklearn_check_version if sklearn_check_version("1.3"): @@ -66,7 +65,6 @@ def _daal4py_correlation_distance_dense(X): return res.correlationDistance -@support_usm_ndarray(freefunc=True) def pairwise_distances( X, Y=None, metric="euclidean", *, n_jobs=None, force_all_finite=True, **kwds ): diff --git a/daal4py/sklearn/metrics/_ranking.py b/daal4py/sklearn/metrics/_ranking.py index c541703148..10343efaee 100644 --- a/daal4py/sklearn/metrics/_ranking.py +++ b/daal4py/sklearn/metrics/_ranking.py @@ -29,7 +29,6 @@ import daal4py as d4p -from .._device_offload import support_usm_ndarray from .._utils import PatchingConditionsChain, get_patch_message, sklearn_check_version from ..utils.validation import _assert_all_finite @@ -119,7 +118,6 @@ def _daal_type_of_target(y): return result -@support_usm_ndarray(freefunc=True) def roc_auc_score( y_true, y_score, diff --git a/daal4py/sklearn/model_selection/_split.py b/daal4py/sklearn/model_selection/_split.py index 07c2de72c1..f914f278a0 100644 --- a/daal4py/sklearn/model_selection/_split.py +++ b/daal4py/sklearn/model_selection/_split.py @@ -25,7 +25,6 @@ import daal4py as d4p from daal4py.sklearn._utils import PatchingConditionsChain -from .._device_offload import support_usm_ndarray from .._utils import sklearn_check_version try: @@ -63,7 +62,6 @@ def get_dtypes(data): return None -@support_usm_ndarray(freefunc=True) def train_test_split(*arrays, **options): n_arrays = len(arrays) if n_arrays == 0: diff --git a/daal4py/sklearn/neighbors/_classification.py b/daal4py/sklearn/neighbors/_classification.py index 0a2fc14dca..391b403999 100644 --- a/daal4py/sklearn/neighbors/_classification.py +++ b/daal4py/sklearn/neighbors/_classification.py @@ -24,7 +24,6 @@ ) from sklearn.utils.validation import check_array -from .._device_offload import support_usm_ndarray from .._utils import PatchingConditionsChain, getFPType, sklearn_check_version from ._base import KNeighborsMixin, NeighborsBase, parse_auto_method, prediction_algorithm @@ -124,15 +123,12 @@ def __init__( weights if sklearn_check_version("1.0") else _check_weights(weights) ) - @support_usm_ndarray() def fit(self, X, y): return NeighborsBase._fit(self, X, y) - @support_usm_ndarray() def predict(self, X): return daal4py_classifier_predict(self, X, BaseKNeighborsClassifier.predict) - @support_usm_ndarray() def predict_proba(self, X): if sklearn_check_version("1.0"): self._check_feature_names(X, reset=False) diff --git a/daal4py/sklearn/neighbors/_regression.py b/daal4py/sklearn/neighbors/_regression.py index 2fd0ee7c94..c779875c81 100644 --- a/daal4py/sklearn/neighbors/_regression.py +++ b/daal4py/sklearn/neighbors/_regression.py @@ -19,7 +19,6 @@ from sklearn.base import RegressorMixin from sklearn.neighbors._regression import KNeighborsRegressor as BaseKNeighborsRegressor -from .._device_offload import support_usm_ndarray from .._utils import sklearn_check_version from ._base import KNeighborsMixin, NeighborsBase @@ -63,11 +62,9 @@ def __init__( def _more_tags(self): return BaseKNeighborsRegressor._more_tags(self) - @support_usm_ndarray() def fit(self, X, y): return NeighborsBase._fit(self, X, y) - @support_usm_ndarray() def predict(self, X): if sklearn_check_version("1.0"): self._check_feature_names(X, reset=False) diff --git a/daal4py/sklearn/neighbors/_unsupervised.py b/daal4py/sklearn/neighbors/_unsupervised.py index 0f4855dbea..5945ebf8f0 100644 --- a/daal4py/sklearn/neighbors/_unsupervised.py +++ b/daal4py/sklearn/neighbors/_unsupervised.py @@ -19,7 +19,6 @@ from sklearn.neighbors import NearestNeighbors as BaseNearestNeighbors from sklearn.utils.validation import _deprecate_positional_args -from .._device_offload import support_usm_ndarray from ._base import KNeighborsMixin, NeighborsBase, RadiusNeighborsMixin @@ -50,7 +49,6 @@ def __init__( n_jobs=n_jobs, ) - @support_usm_ndarray() def fit(self, X, y=None): return NeighborsBase._fit(self, X) diff --git a/daal4py/sklearn/tree/decision_tree.py b/daal4py/sklearn/tree/decision_tree.py index ad669ada4b..bdc81bf642 100644 --- a/daal4py/sklearn/tree/decision_tree.py +++ b/daal4py/sklearn/tree/decision_tree.py @@ -28,7 +28,6 @@ import daal4py as d4p -from .._device_offload import support_usm_ndarray from .._utils import getFPType, make2d @@ -141,7 +140,6 @@ def get_depth(self): ts = self._get_tree_state() return ts.max_depth - @support_usm_ndarray() def fit(self, X, y, sample_weight=None, pruning_set=None): """Build a decision tree classifier from the training set (X, y). @@ -283,14 +281,12 @@ def _daal4py_predict(self, X): res = alg.compute(X, self.daal_model_) return res.prediction.ravel() - @support_usm_ndarray() def predict(self, X, check_input=True): check_is_fitted(self, "daal_model_") X = self._validate_X_predict(X, check_input) y = self._daal4py_predict(X) return self.classes_.take(np.asarray(y, dtype=np.intp), axis=0) - @support_usm_ndarray() def predict_proba(self, X, check_input=True): check_is_fitted(self, "daal_model_") X = self._validate_X_predict(X, check_input) diff --git a/sklearnex/cluster/dbscan.py b/sklearnex/cluster/dbscan.py index 0936a4647a..173a62e9f6 100755 --- a/sklearnex/cluster/dbscan.py +++ b/sklearnex/cluster/dbscan.py @@ -17,7 +17,6 @@ import numbers from abc import ABC -import numpy as np from scipy import sparse as sp from sklearn.cluster import DBSCAN as sklearn_DBSCAN from sklearn.utils.validation import _check_sample_weight diff --git a/sklearnex/linear_model/coordinate_descent.py b/sklearnex/linear_model/coordinate_descent.py index 3fd6abd7b3..d2125a799a 100644 --- a/sklearnex/linear_model/coordinate_descent.py +++ b/sklearnex/linear_model/coordinate_descent.py @@ -15,3 +15,12 @@ # =============================================================================== from daal4py.sklearn.linear_model import ElasticNet, Lasso +from onedal._device_offload import support_usm_ndarray + +ElasticNet.fit = support_usm_ndarray()(ElasticNet.fit) +ElasticNet.predict = support_usm_ndarray()(ElasticNet.predict) +ElasticNet.score = support_usm_ndarray()(ElasticNet.score) + +Lasso.fit = support_usm_ndarray()(Lasso.fit) +Lasso.predict = support_usm_ndarray()(Lasso.predict) +Lasso.score = support_usm_ndarray()(Lasso.score) diff --git a/sklearnex/linear_model/logistic_regression.py b/sklearnex/linear_model/logistic_regression.py index 6e1883f87f..f981282826 100644 --- a/sklearnex/linear_model/logistic_regression.py +++ b/sklearnex/linear_model/logistic_regression.py @@ -21,18 +21,6 @@ from daal4py.sklearn.linear_model.logistic_path import ( LogisticRegression as LogisticRegression_daal4py, ) -from daal4py.sklearn.linear_model.logistic_path import daal4py_fit, daal4py_predict - - -class BaseLogisticRegression(ABC): - def _save_attributes(self): - assert hasattr(self, "_onedal_estimator") - self.classes_ = self._onedal_estimator.classes_ - self.coef_ = self._onedal_estimator.coef_ - self.intercept_ = self._onedal_estimator.intercept_ - self.n_features_in_ = self._onedal_estimator.n_features_in_ - self.n_iter_ = self._onedal_estimator.n_iter_ - if daal_check_version((2024, "P", 1)): import numpy as np @@ -44,6 +32,7 @@ def _save_attributes(self): from daal4py.sklearn._n_jobs_support import control_n_jobs from daal4py.sklearn._utils import sklearn_check_version + from daal4py.sklearn.linear_model.logistic_path import daal4py_fit, daal4py_predict from onedal.linear_model import LogisticRegression as onedal_LogisticRegression from onedal.utils import _num_samples @@ -51,6 +40,15 @@ def _save_attributes(self): from .._utils import PatchingConditionsChain, get_patch_message from ..utils.validation import _assert_all_finite + class BaseLogisticRegression(ABC): + def _save_attributes(self): + assert hasattr(self, "_onedal_estimator") + self.classes_ = self._onedal_estimator.classes_ + self.coef_ = self._onedal_estimator.coef_ + self.intercept_ = self._onedal_estimator.intercept_ + self.n_features_in_ = self._onedal_estimator.n_features_in_ + self.n_iter_ = self._onedal_estimator.n_iter_ + @control_n_jobs( decorated_methods=[ "fit", diff --git a/sklearnex/linear_model/ridge.py b/sklearnex/linear_model/ridge.py index 59222deaa8..cea0e323c6 100644 --- a/sklearnex/linear_model/ridge.py +++ b/sklearnex/linear_model/ridge.py @@ -15,3 +15,10 @@ # =============================================================================== from daal4py.sklearn.linear_model import Ridge +from onedal._device_offload import support_usm_ndarray + +# TODO: +# implement GPU offloading via onedal4py backend. +Ridge.fit = support_usm_ndarray()(Ridge.fit) +Ridge.predict = support_usm_ndarray()(Ridge.predict) +Ridge.score = support_usm_ndarray()(Ridge.score) diff --git a/sklearnex/manifold/t_sne.py b/sklearnex/manifold/t_sne.py index d74c91ffee..6cbc2ed9c8 100755 --- a/sklearnex/manifold/t_sne.py +++ b/sklearnex/manifold/t_sne.py @@ -15,3 +15,7 @@ # =============================================================================== from daal4py.sklearn.manifold import TSNE +from onedal._device_offload import support_usm_ndarray + +TSNE.fit = support_usm_ndarray()(TSNE.fit) +TSNE.fit_transform = support_usm_ndarray()(TSNE.fit_transform) diff --git a/sklearnex/metrics/pairwise.py b/sklearnex/metrics/pairwise.py index e42ca6ae65..dd8aec9a2e 100755 --- a/sklearnex/metrics/pairwise.py +++ b/sklearnex/metrics/pairwise.py @@ -15,3 +15,6 @@ # =============================================================================== from daal4py.sklearn.metrics import pairwise_distances +from onedal._device_offload import support_usm_ndarray + +pairwise_distances = support_usm_ndarray(freefunc=True)(pairwise_distances) diff --git a/sklearnex/metrics/ranking.py b/sklearnex/metrics/ranking.py index 7b424b8419..c3bbd4777c 100755 --- a/sklearnex/metrics/ranking.py +++ b/sklearnex/metrics/ranking.py @@ -15,3 +15,6 @@ # =============================================================================== from daal4py.sklearn.metrics import roc_auc_score +from onedal._device_offload import support_usm_ndarray + +roc_auc_score = support_usm_ndarray(freefunc=True)(roc_auc_score) diff --git a/sklearnex/model_selection/split.py b/sklearnex/model_selection/split.py index 1a2adaa4c7..fbc6ff0b69 100755 --- a/sklearnex/model_selection/split.py +++ b/sklearnex/model_selection/split.py @@ -15,3 +15,6 @@ # =============================================================================== from daal4py.sklearn.model_selection import train_test_split +from onedal._device_offload import support_usm_ndarray + +train_test_split = support_usm_ndarray(freefunc=True)(train_test_split) From 18e659939c29b08668c09cbd5cd28cddd1e2de26 Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Tue, 11 Jun 2024 13:29:00 -0700 Subject: [PATCH 08/82] minor fixes --- daal4py/sklearn/cluster/k_means.py | 2 ++ daal4py/sklearn/linear_model/_coordinate_descent.py | 2 ++ daal4py/sklearn/linear_model/_ridge.py | 2 ++ 3 files changed, 6 insertions(+) diff --git a/daal4py/sklearn/cluster/k_means.py b/daal4py/sklearn/cluster/k_means.py index 2ce44a26e6..68075efddf 100755 --- a/daal4py/sklearn/cluster/k_means.py +++ b/daal4py/sklearn/cluster/k_means.py @@ -592,6 +592,8 @@ def predict( def fit_predict(self, X, y=None, sample_weight=None): return super().fit_predict(X, y, sample_weight) + score = KMeans_original.score + fit.__doc__ = KMeans_original.fit.__doc__ predict.__doc__ = KMeans_original.predict.__doc__ fit_predict.__doc__ = KMeans_original.fit_predict.__doc__ diff --git a/daal4py/sklearn/linear_model/_coordinate_descent.py b/daal4py/sklearn/linear_model/_coordinate_descent.py index 4a2eceb168..335f5227ba 100755 --- a/daal4py/sklearn/linear_model/_coordinate_descent.py +++ b/daal4py/sklearn/linear_model/_coordinate_descent.py @@ -843,6 +843,8 @@ def dual_gap_(self, value): def dual_gap_(self): self._gap = None + score = Lasso_original.score + fit.__doc__ = Lasso_original.fit.__doc__ predict.__doc__ = Lasso_original.predict.__doc__ score.__doc__ = Lasso_original.score.__doc__ diff --git a/daal4py/sklearn/linear_model/_ridge.py b/daal4py/sklearn/linear_model/_ridge.py index adba4c925b..60552e3d3b 100644 --- a/daal4py/sklearn/linear_model/_ridge.py +++ b/daal4py/sklearn/linear_model/_ridge.py @@ -303,6 +303,8 @@ def fit(self, X, y, sample_weight=None): def predict(self, X): return _predict_ridge(self, X) + score = Ridge_original.score + fit.__doc__ = Ridge_original.fit.__doc__ predict.__doc__ = Ridge_original.predict.__doc__ score.__doc__ = Ridge_original.score.__doc__ From 43180645eefd5c76a28ecfa1ea1887fc3bb5c72f Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Tue, 11 Jun 2024 13:44:31 -0700 Subject: [PATCH 09/82] minor fix for daal4py/sklearn/linear_model/_coordinate_descent.py --- daal4py/sklearn/linear_model/_coordinate_descent.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/daal4py/sklearn/linear_model/_coordinate_descent.py b/daal4py/sklearn/linear_model/_coordinate_descent.py index 335f5227ba..a410a5bbfa 100755 --- a/daal4py/sklearn/linear_model/_coordinate_descent.py +++ b/daal4py/sklearn/linear_model/_coordinate_descent.py @@ -730,6 +730,8 @@ def dual_gap_(self, value): def dual_gap_(self): self._gap = None + score = ElasticNet_original.score + fit.__doc__ = ElasticNet_original.fit.__doc__ predict.__doc__ = ElasticNet_original.predict.__doc__ score.__doc__ = ElasticNet_original.score.__doc__ From 864c1a1ac79356073ee953e8fcf3982fe4e309d1 Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Tue, 11 Jun 2024 14:05:28 -0700 Subject: [PATCH 10/82] minor fix for daal4py/sklearn/linear_model/_linear.py --- daal4py/sklearn/linear_model/_linear.py | 1 - 1 file changed, 1 deletion(-) diff --git a/daal4py/sklearn/linear_model/_linear.py b/daal4py/sklearn/linear_model/_linear.py index f5798491db..2b82b6f8a6 100644 --- a/daal4py/sklearn/linear_model/_linear.py +++ b/daal4py/sklearn/linear_model/_linear.py @@ -265,7 +265,6 @@ def fit(self, X, y, sample_weight=None): return super(LinearRegression, self).fit(X, y=y, sample_weight=sample_weight) return _fit_linear(self, X, y, sample_weight=sample_weight) - @support_usm_ndarray() def predict(self, X): return _predict_linear(self, X) From 4142bf1a46adf2fd749abac4b9abcb7d95e817c9 Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Tue, 11 Jun 2024 15:02:32 -0700 Subject: [PATCH 11/82] fix for sklearnex/_device_offload.py --- sklearnex/_device_offload.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sklearnex/_device_offload.py b/sklearnex/_device_offload.py index ce34316829..d7bb95698d 100644 --- a/sklearnex/_device_offload.py +++ b/sklearnex/_device_offload.py @@ -18,12 +18,15 @@ from functools import wraps from onedal._device_offload import ( - _convert_to_dpnp, _copy_to_usm, _get_global_queue, _transfer_to_host, + dpnp_available, ) +if dpnp_available: + from onedal._device_offload import _convert_to_dpnp + try: from dpctl.tensor import usm_ndarray From 25d87bcce2f35c136389e069f9a59f90409d2102 Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Tue, 11 Jun 2024 15:28:09 -0700 Subject: [PATCH 12/82] fix for onedal._config --- onedal/_config.py | 4 ++-- sklearnex/tests/test_config.py | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/onedal/_config.py b/onedal/_config.py index 19c7b9e16b..4992dfd8d2 100644 --- a/onedal/_config.py +++ b/onedal/_config.py @@ -28,8 +28,8 @@ def _get_onedal_threadlocal_config(): if not hasattr(_threadlocal, "global_config"): - _threadlocal.d4p_global_config = _default_global_config.copy() - return _threadlocal.d4p_global_config + _threadlocal.global_config = _default_global_config.copy() + return _threadlocal.global_config # TODO: diff --git a/sklearnex/tests/test_config.py b/sklearnex/tests/test_config.py index f2cad35f98..eb714a806f 100644 --- a/sklearnex/tests/test_config.py +++ b/sklearnex/tests/test_config.py @@ -17,6 +17,7 @@ import sklearn import onedal +import onedal._config import sklearnex @@ -34,7 +35,7 @@ def test_set_config_works(): ) config = sklearnex.get_config() - onedal_config = onedal._get_config() + onedal_config = onedal._config._get_config() assert config["target_offload"] == "cpu:0" assert config["allow_fallback_to_host"] assert config["assume_finite"] From 778b88d5ba6ab0c83eaf2139d98cc1a02bca1d21 Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Tue, 11 Jun 2024 15:57:03 -0700 Subject: [PATCH 13/82] wrapping daal4py.sklearne Kmeans with onedal4py's support_usm_ndarray --- sklearnex/cluster/k_means.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sklearnex/cluster/k_means.py b/sklearnex/cluster/k_means.py index 41171730b6..3c7689afd5 100755 --- a/sklearnex/cluster/k_means.py +++ b/sklearnex/cluster/k_means.py @@ -15,3 +15,9 @@ # =============================================================================== from daal4py.sklearn.cluster import KMeans +from onedal._device_offload import support_usm_ndarray + +KMeans.fit = support_usm_ndarray()(KMeans.fit) +KMeans.fit_predict = support_usm_ndarray()(KMeans.fit_predict) +KMeans.predict = support_usm_ndarray()(KMeans.predict) +KMeans.score = support_usm_ndarray()(KMeans.score) From ecd731d1feabbff0f23d9a3e6755d18e0cfe7641 Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Wed, 12 Jun 2024 04:45:58 -0700 Subject: [PATCH 14/82] ENH: functional support for Array API --- onedal/_device_offload.py | 25 ++++++++++++++++++++----- sklearnex/_device_offload.py | 10 ++++++---- 2 files changed, 26 insertions(+), 9 deletions(-) diff --git a/onedal/_device_offload.py b/onedal/_device_offload.py index d553e8d005..0323153581 100644 --- a/onedal/_device_offload.py +++ b/onedal/_device_offload.py @@ -96,6 +96,7 @@ def _transfer_to_host(queue, *data): host_data = [] for item in data: usm_iface = getattr(item, "__sycl_usm_array_interface__", None) + array_api = getattr(item, "__array_namespace__", None) if usm_iface is not None: if not dpctl_available: raise RuntimeError( @@ -122,6 +123,11 @@ def _transfer_to_host(queue, *data): order=order, ) has_usm_data = True + elif array_api is not None: + # TODO: + # get info about the device, for backward conversions. + item = np.from_dlpack(item).copy() + has_host_data = True else: has_host_data = True @@ -155,11 +161,14 @@ def _get_host_inputs(*args, **kwargs): return q, hostargs, hostkwargs -def _extract_usm_iface(*args, **kwargs): +def _extract_array_attr(*args, **kwargs): allargs = (*args, *kwargs.values()) if len(allargs) == 0: - return None - return getattr(allargs[0], "__sycl_usm_array_interface__", None) + return None, None, None + usm_iface = getattr(allargs[0], "__sycl_usm_array_interface__", None) + array_api = getattr(allargs[0], "__array_namespace__", None) + dlpack_device = getattr(allargs[0], "__dlpack_device__", None) + return usm_iface, array_api, dlpack_device def _run_on_device(func, obj=None, *args, **kwargs): @@ -180,10 +189,12 @@ def _convert_to_dpnp(array): return array -def support_usm_ndarray(freefunc=False): +# TODO: +# rename support_array_api +def support_array_api(freefunc=False): def decorator(func): def wrapper_impl(obj, *args, **kwargs): - usm_iface = _extract_usm_iface(*args, **kwargs) + usm_iface, array_api, dlpack_device = _extract_array_attr(*args, **kwargs) data_queue, hostargs, hostkwargs = _get_host_inputs(*args, **kwargs) if "queue" in hostkwargs: hostkwargs["queue"] = data_queue @@ -192,6 +203,10 @@ def wrapper_impl(obj, *args, **kwargs): result = _copy_to_usm(data_queue, result) if dpnp_available and len(args) > 0 and isinstance(args[0], dpnp.ndarray): result = _convert_to_dpnp(result) + elif array_api: + # TODO: + # avoid for numpy + result = array_api.from_dlpack(result, copy=True, device=dlpack_device) return result if freefunc: diff --git a/sklearnex/_device_offload.py b/sklearnex/_device_offload.py index d7bb95698d..e1a1682e43 100644 --- a/sklearnex/_device_offload.py +++ b/sklearnex/_device_offload.py @@ -19,6 +19,7 @@ from onedal._device_offload import ( _copy_to_usm, + _extract_array_attr, _get_global_queue, _transfer_to_host, dpnp_available, @@ -97,15 +98,16 @@ def wrap_output_data(func): @wraps(func) def wrapper(self, *args, **kwargs): data = (*args, *kwargs.values()) - if len(data) == 0: - usm_iface = None - else: - usm_iface = getattr(data[0], "__sycl_usm_array_interface__", None) + usm_iface, array_api, dlpack_device = _extract_array_attr(*args, **kwargs) result = func(self, *args, **kwargs) if usm_iface is not None: result = _copy_to_usm(usm_iface["syclobj"], result) if dpnp_available and isinstance(data[0], dpnp.ndarray): result = _convert_to_dpnp(result) + elif array_api: + # TODO: + # avoid for numpy + result = array_api.from_dlpack(result, copy=True, device=dlpack_device) return result return wrapper From f18070f9abc2ff45ee5b5c0775616e4866654824 Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Fri, 14 Jun 2024 01:59:38 -0700 Subject: [PATCH 15/82] minor update for support_usm_ndarray decorator --- onedal/_device_offload.py | 4 ++-- sklearnex/cluster/k_means.py | 8 ++++---- sklearnex/covariance/incremental_covariance.py | 1 - sklearnex/linear_model/coordinate_descent.py | 12 ++++++------ sklearnex/linear_model/ridge.py | 6 +++--- sklearnex/manifold/t_sne.py | 4 ++-- sklearnex/metrics/pairwise.py | 4 +++- sklearnex/metrics/ranking.py | 2 +- sklearnex/model_selection/split.py | 2 +- 9 files changed, 22 insertions(+), 21 deletions(-) diff --git a/onedal/_device_offload.py b/onedal/_device_offload.py index 0323153581..a2d5f3c5dc 100644 --- a/onedal/_device_offload.py +++ b/onedal/_device_offload.py @@ -191,12 +191,12 @@ def _convert_to_dpnp(array): # TODO: # rename support_array_api -def support_array_api(freefunc=False): +def support_array_api(freefunc=False, queue_param=True): def decorator(func): def wrapper_impl(obj, *args, **kwargs): usm_iface, array_api, dlpack_device = _extract_array_attr(*args, **kwargs) data_queue, hostargs, hostkwargs = _get_host_inputs(*args, **kwargs) - if "queue" in hostkwargs: + if queue_param: hostkwargs["queue"] = data_queue result = _run_on_device(func, obj, *hostargs, **hostkwargs) if usm_iface is not None and hasattr(result, "__array_interface__"): diff --git a/sklearnex/cluster/k_means.py b/sklearnex/cluster/k_means.py index 3c7689afd5..eac4c22bce 100755 --- a/sklearnex/cluster/k_means.py +++ b/sklearnex/cluster/k_means.py @@ -17,7 +17,7 @@ from daal4py.sklearn.cluster import KMeans from onedal._device_offload import support_usm_ndarray -KMeans.fit = support_usm_ndarray()(KMeans.fit) -KMeans.fit_predict = support_usm_ndarray()(KMeans.fit_predict) -KMeans.predict = support_usm_ndarray()(KMeans.predict) -KMeans.score = support_usm_ndarray()(KMeans.score) +KMeans.fit = support_usm_ndarray(queue_param=False)(KMeans.fit) +KMeans.fit_predict = support_usm_ndarray(queue_param=False)(KMeans.fit_predict) +KMeans.predict = support_usm_ndarray(queue_param=False)(KMeans.predict) +KMeans.score = support_usm_ndarray(queue_param=False)(KMeans.score) diff --git a/sklearnex/covariance/incremental_covariance.py b/sklearnex/covariance/incremental_covariance.py index 63b1316fc9..75f1f8bd7e 100644 --- a/sklearnex/covariance/incremental_covariance.py +++ b/sklearnex/covariance/incremental_covariance.py @@ -25,7 +25,6 @@ from daal4py.sklearn._n_jobs_support import control_n_jobs from daal4py.sklearn._utils import daal_check_version, sklearn_check_version -from onedal._device_offload import support_usm_ndarray from onedal.covariance import ( IncrementalEmpiricalCovariance as onedal_IncrementalEmpiricalCovariance, ) diff --git a/sklearnex/linear_model/coordinate_descent.py b/sklearnex/linear_model/coordinate_descent.py index d2125a799a..e7a4b5861e 100644 --- a/sklearnex/linear_model/coordinate_descent.py +++ b/sklearnex/linear_model/coordinate_descent.py @@ -17,10 +17,10 @@ from daal4py.sklearn.linear_model import ElasticNet, Lasso from onedal._device_offload import support_usm_ndarray -ElasticNet.fit = support_usm_ndarray()(ElasticNet.fit) -ElasticNet.predict = support_usm_ndarray()(ElasticNet.predict) -ElasticNet.score = support_usm_ndarray()(ElasticNet.score) +ElasticNet.fit = support_usm_ndarray(queue_param=False)(ElasticNet.fit) +ElasticNet.predict = support_usm_ndarray(queue_param=False)(ElasticNet.predict) +ElasticNet.score = support_usm_ndarray(queue_param=False)(ElasticNet.score) -Lasso.fit = support_usm_ndarray()(Lasso.fit) -Lasso.predict = support_usm_ndarray()(Lasso.predict) -Lasso.score = support_usm_ndarray()(Lasso.score) +Lasso.fit = support_usm_ndarray(queue_param=False)(Lasso.fit) +Lasso.predict = support_usm_ndarray(queue_param=False)(Lasso.predict) +Lasso.score = support_usm_ndarray(queue_param=False)(Lasso.score) diff --git a/sklearnex/linear_model/ridge.py b/sklearnex/linear_model/ridge.py index cea0e323c6..096983564e 100644 --- a/sklearnex/linear_model/ridge.py +++ b/sklearnex/linear_model/ridge.py @@ -19,6 +19,6 @@ # TODO: # implement GPU offloading via onedal4py backend. -Ridge.fit = support_usm_ndarray()(Ridge.fit) -Ridge.predict = support_usm_ndarray()(Ridge.predict) -Ridge.score = support_usm_ndarray()(Ridge.score) +Ridge.fit = support_usm_ndarray(queue_param=False)(Ridge.fit) +Ridge.predict = support_usm_ndarray(queue_param=False)(Ridge.predict) +Ridge.score = support_usm_ndarray(queue_param=False)(Ridge.score) diff --git a/sklearnex/manifold/t_sne.py b/sklearnex/manifold/t_sne.py index 6cbc2ed9c8..3e5c99f43f 100755 --- a/sklearnex/manifold/t_sne.py +++ b/sklearnex/manifold/t_sne.py @@ -17,5 +17,5 @@ from daal4py.sklearn.manifold import TSNE from onedal._device_offload import support_usm_ndarray -TSNE.fit = support_usm_ndarray()(TSNE.fit) -TSNE.fit_transform = support_usm_ndarray()(TSNE.fit_transform) +TSNE.fit = support_usm_ndarray(queue_param=False)(TSNE.fit) +TSNE.fit_transform = support_usm_ndarray(queue_param=False)(TSNE.fit_transform) diff --git a/sklearnex/metrics/pairwise.py b/sklearnex/metrics/pairwise.py index dd8aec9a2e..041e3b5b8d 100755 --- a/sklearnex/metrics/pairwise.py +++ b/sklearnex/metrics/pairwise.py @@ -17,4 +17,6 @@ from daal4py.sklearn.metrics import pairwise_distances from onedal._device_offload import support_usm_ndarray -pairwise_distances = support_usm_ndarray(freefunc=True)(pairwise_distances) +pairwise_distances = support_usm_ndarray(freefunc=True, queue_param=False)( + pairwise_distances +) diff --git a/sklearnex/metrics/ranking.py b/sklearnex/metrics/ranking.py index c3bbd4777c..b282bb3f94 100755 --- a/sklearnex/metrics/ranking.py +++ b/sklearnex/metrics/ranking.py @@ -17,4 +17,4 @@ from daal4py.sklearn.metrics import roc_auc_score from onedal._device_offload import support_usm_ndarray -roc_auc_score = support_usm_ndarray(freefunc=True)(roc_auc_score) +roc_auc_score = support_usm_ndarray(freefunc=True, queue_param=False)(roc_auc_score) diff --git a/sklearnex/model_selection/split.py b/sklearnex/model_selection/split.py index fbc6ff0b69..db20ca17b8 100755 --- a/sklearnex/model_selection/split.py +++ b/sklearnex/model_selection/split.py @@ -17,4 +17,4 @@ from daal4py.sklearn.model_selection import train_test_split from onedal._device_offload import support_usm_ndarray -train_test_split = support_usm_ndarray(freefunc=True)(train_test_split) +train_test_split = support_usm_ndarray(freefunc=True, queue_param=False)(train_test_split) From df682e60fda5cee5e2b102fe980cf7c6193ec078 Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Fri, 14 Jun 2024 13:13:51 -0700 Subject: [PATCH 16/82] update sklearnex/dispatcher.py --- sklearnex/dispatcher.py | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/sklearnex/dispatcher.py b/sklearnex/dispatcher.py index 52b60076fc..78d8c7282f 100644 --- a/sklearnex/dispatcher.py +++ b/sklearnex/dispatcher.py @@ -111,6 +111,9 @@ def get_patch_map_core(preview=False): import sklearn.decomposition as decomposition_module import sklearn.ensemble as ensemble_module import sklearn.linear_model as linear_model_module + import sklearn.manifold as manifold_module + import sklearn.metrics as metrics_module + import sklearn.model_selection as model_selection_module import sklearn.neighbors as neighbors_module import sklearn.svm as svm_module @@ -143,6 +146,10 @@ def get_patch_map_core(preview=False): ) from .linear_model import LinearRegression as LinearRegression_sklearnex from .linear_model import LogisticRegression as LogisticRegression_sklearnex + from .manifold import TSNE as TSNE_sklearnex + from .metrics import pairwise_distances as pairwise_distances_sklearnex + from .metrics import roc_auc_score as roc_auc_score_sklearnex + from .model_selection import train_test_split as train_test_split_sklearnex from .neighbors import KNeighborsClassifier as KNeighborsClassifier_sklearnex from .neighbors import KNeighborsRegressor as KNeighborsRegressor_sklearnex from .neighbors import LocalOutlierFactor as LocalOutlierFactor_sklearnex @@ -201,6 +208,40 @@ def get_patch_map_core(preview=False): ] mapping["logisticregression"] = mapping["log_reg"] + # manifold + mapping.pop("tsne") + mapping["tsne"] = [ + [ + (manifold_module, "TSNE", TSNE_sklearnex), + None, + ] + ] + + # metrics + mapping.pop("distances") + mapping["distances"] = [ + [ + (metrics_module, "pairwise_distances", pairwise_distances_sklearnex), + None, + ] + ] + mapping.pop("roc_auc_score") + mapping["roc_auc_score"] = [ + [ + (metrics_module, "roc_auc_score", roc_auc_score_sklearnex), + None, + ] + ] + + # model_selection + mapping.pop("train_test_split") + mapping["train_test_split"] = [ + [ + (model_selection_module, "train_test_split", train_test_split_sklearnex), + None, + ] + ] + # kNN mapping.pop("knn_classifier") mapping.pop("kneighborsclassifier") From 23c84f42e1ab4894c64235f8e432876aab8b8573 Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Fri, 14 Jun 2024 14:51:48 -0700 Subject: [PATCH 17/82] fixed dispatcher --- sklearnex/dispatcher.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sklearnex/dispatcher.py b/sklearnex/dispatcher.py index 78d8c7282f..3fa5a7199a 100644 --- a/sklearnex/dispatcher.py +++ b/sklearnex/dispatcher.py @@ -219,13 +219,14 @@ def get_patch_map_core(preview=False): # metrics mapping.pop("distances") + mapping.pop("roc_auc_score") mapping["distances"] = [ [ (metrics_module, "pairwise_distances", pairwise_distances_sklearnex), None, ] ] - mapping.pop("roc_auc_score") + mapping["pairwise_distances"] = mapping["distances"] mapping["roc_auc_score"] = [ [ (metrics_module, "roc_auc_score", roc_auc_score_sklearnex), From 0483c5cde2857f63a60c3960ab111afee5398c79 Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Fri, 14 Jun 2024 16:46:53 -0700 Subject: [PATCH 18/82] fixed decorator name --- .../spmd/basic_statistics/basic_statistics.py | 4 ++-- onedal/spmd/cluster/kmeans.py | 8 ++++---- onedal/spmd/covariance/covariance.py | 4 ++-- onedal/spmd/decomposition/pca.py | 4 ++-- onedal/spmd/linear_model/linear_model.py | 6 +++--- .../spmd/linear_model/logistic_regression.py | 10 +++++----- onedal/spmd/neighbors/neighbors.py | 20 +++++++++---------- sklearnex/cluster/k_means.py | 10 +++++----- sklearnex/linear_model/coordinate_descent.py | 14 ++++++------- sklearnex/linear_model/ridge.py | 8 ++++---- sklearnex/manifold/t_sne.py | 6 +++--- sklearnex/metrics/pairwise.py | 4 ++-- sklearnex/metrics/ranking.py | 4 ++-- sklearnex/model_selection/split.py | 4 ++-- 14 files changed, 53 insertions(+), 53 deletions(-) diff --git a/onedal/spmd/basic_statistics/basic_statistics.py b/onedal/spmd/basic_statistics/basic_statistics.py index 27e37b1abc..dffbc2bb1c 100644 --- a/onedal/spmd/basic_statistics/basic_statistics.py +++ b/onedal/spmd/basic_statistics/basic_statistics.py @@ -16,11 +16,11 @@ from onedal.basic_statistics import BasicStatistics as BasicStatistics_Batch -from ..._device_offload import support_usm_ndarray +from ..._device_offload import support_array_api from .._base import BaseEstimatorSPMD class BasicStatistics(BaseEstimatorSPMD, BasicStatistics_Batch): - @support_usm_ndarray() + @support_array_api() def compute(self, data, weights=None, queue=None): return super().compute(data, weights=weights, queue=queue) diff --git a/onedal/spmd/cluster/kmeans.py b/onedal/spmd/cluster/kmeans.py index f7546eeba2..5eadaded82 100644 --- a/onedal/spmd/cluster/kmeans.py +++ b/onedal/spmd/cluster/kmeans.py @@ -18,7 +18,7 @@ from onedal.cluster import KMeansInit as KMeansInit_Batch from onedal.spmd.basic_statistics import BasicStatistics -from ..._device_offload import support_usm_ndarray +from ..._device_offload import support_array_api from .._base import BaseEstimatorSPMD @@ -37,15 +37,15 @@ def _get_basic_statistics_backend(self, result_options): def _get_kmeans_init(self, cluster_count, seed, algorithm): return KMeansInit(cluster_count=cluster_count, seed=seed, algorithm=algorithm) - @support_usm_ndarray() + @support_array_api() def fit(self, X, y=None, queue=None): return super().fit(X, queue=queue) - @support_usm_ndarray() + @support_array_api() def predict(self, X, queue=None): return super().predict(X, queue=queue) - @support_usm_ndarray() + @support_array_api() def fit_predict(self, X, y=None, queue=None): return super().fit_predict(X, queue=queue) diff --git a/onedal/spmd/covariance/covariance.py b/onedal/spmd/covariance/covariance.py index 4882b7c63c..a1509c1bfa 100644 --- a/onedal/spmd/covariance/covariance.py +++ b/onedal/spmd/covariance/covariance.py @@ -16,11 +16,11 @@ from onedal.covariance import EmpiricalCovariance as EmpiricalCovariance_Batch -from ..._device_offload import support_usm_ndarray +from ..._device_offload import support_array_api from .._base import BaseEstimatorSPMD class EmpiricalCovariance(BaseEstimatorSPMD, EmpiricalCovariance_Batch): - @support_usm_ndarray() + @support_array_api() def fit(self, X, y=None, queue=None): return super().fit(X, queue=queue) diff --git a/onedal/spmd/decomposition/pca.py b/onedal/spmd/decomposition/pca.py index e4a8753ba8..6b46fdc087 100644 --- a/onedal/spmd/decomposition/pca.py +++ b/onedal/spmd/decomposition/pca.py @@ -16,11 +16,11 @@ from onedal.decomposition.pca import PCA as PCABatch -from ..._device_offload import support_usm_ndarray +from ..._device_offload import support_array_api from .._base import BaseEstimatorSPMD class PCA(BaseEstimatorSPMD, PCABatch): - @support_usm_ndarray() + @support_array_api() def fit(self, X, y=None, queue=None): return super().fit(X, queue=queue) diff --git a/onedal/spmd/linear_model/linear_model.py b/onedal/spmd/linear_model/linear_model.py index 3c9046d742..93848a6d41 100644 --- a/onedal/spmd/linear_model/linear_model.py +++ b/onedal/spmd/linear_model/linear_model.py @@ -16,15 +16,15 @@ from onedal.linear_model import LinearRegression as LinearRegression_Batch -from ..._device_offload import support_usm_ndarray +from ..._device_offload import support_array_api from .._base import BaseEstimatorSPMD class LinearRegression(BaseEstimatorSPMD, LinearRegression_Batch): - @support_usm_ndarray() + @support_array_api() def fit(self, X, y, queue=None): return super().fit(X, y, queue=queue) - @support_usm_ndarray() + @support_array_api() def predict(self, X, queue=None): return super().predict(X, queue=queue) diff --git a/onedal/spmd/linear_model/logistic_regression.py b/onedal/spmd/linear_model/logistic_regression.py index 867da434b6..0e052b3598 100644 --- a/onedal/spmd/linear_model/logistic_regression.py +++ b/onedal/spmd/linear_model/logistic_regression.py @@ -16,23 +16,23 @@ from onedal.linear_model import LogisticRegression as LogisticRegression_Batch -from ..._device_offload import support_usm_ndarray +from ..._device_offload import support_array_api from .._base import BaseEstimatorSPMD class LogisticRegression(BaseEstimatorSPMD, LogisticRegression_Batch): - @support_usm_ndarray() + @support_array_api() def fit(self, X, y, queue=None): return super().fit(X, y, queue=queue) - @support_usm_ndarray() + @support_array_api() def predict(self, X, queue=None): return super().predict(X, queue=queue) - @support_usm_ndarray() + @support_array_api() def predict_proba(self, X, queue=None): return super().predict_proba(X, queue=queue) - @support_usm_ndarray() + @support_array_api() def predict_log_proba(self, X, queue=None): return super().predict_log_proba(X, queue=queue) diff --git a/onedal/spmd/neighbors/neighbors.py b/onedal/spmd/neighbors/neighbors.py index 1c90cef380..878e6dadeb 100644 --- a/onedal/spmd/neighbors/neighbors.py +++ b/onedal/spmd/neighbors/neighbors.py @@ -17,30 +17,30 @@ from onedal.neighbors import KNeighborsClassifier as KNeighborsClassifier_Batch from onedal.neighbors import KNeighborsRegressor as KNeighborsRegressor_Batch -from ..._device_offload import support_usm_ndarray +from ..._device_offload import support_array_api from .._base import BaseEstimatorSPMD class KNeighborsClassifier(BaseEstimatorSPMD, KNeighborsClassifier_Batch): - @support_usm_ndarray() + @support_array_api() def fit(self, X, y, queue=None): return super().fit(X, y, queue=queue) - @support_usm_ndarray() + @support_array_api() def predict(self, X, queue=None): return super().predict(X, queue=queue) - @support_usm_ndarray() + @support_array_api() def predict_proba(self, X, queue=None): raise NotImplementedError("predict_proba not supported in distributed mode.") - @support_usm_ndarray() + @support_array_api() def kneighbors(self, X=None, n_neighbors=None, return_distance=True, queue=None): return super().kneighbors(X, n_neighbors, return_distance, queue=queue) class KNeighborsRegressor(BaseEstimatorSPMD, KNeighborsRegressor_Batch): - @support_usm_ndarray() + @support_array_api() def fit(self, X, y, queue=None): if queue is not None and queue.sycl_device.is_gpu: return super()._fit(X, y, queue=queue) @@ -50,11 +50,11 @@ def fit(self, X, y, queue=None): "CPU. Consider running on it on GPU." ) - @support_usm_ndarray() + @support_array_api() def kneighbors(self, X=None, n_neighbors=None, return_distance=True, queue=None): return super().kneighbors(X, n_neighbors, return_distance, queue=queue) - @support_usm_ndarray() + @support_array_api() def predict(self, X, queue=None): return self._predict_gpu(X, queue=queue) @@ -66,10 +66,10 @@ def _get_onedal_params(self, X, y=None): class NearestNeighbors(BaseEstimatorSPMD): - @support_usm_ndarray() + @support_array_api() def fit(self, X, y, queue=None): return super().fit(X, y, queue=queue) - @support_usm_ndarray() + @support_array_api() def kneighbors(self, X=None, n_neighbors=None, return_distance=True, queue=None): return super().kneighbors(X, n_neighbors, return_distance, queue=queue) diff --git a/sklearnex/cluster/k_means.py b/sklearnex/cluster/k_means.py index eac4c22bce..3f455e8cbf 100755 --- a/sklearnex/cluster/k_means.py +++ b/sklearnex/cluster/k_means.py @@ -15,9 +15,9 @@ # =============================================================================== from daal4py.sklearn.cluster import KMeans -from onedal._device_offload import support_usm_ndarray +from onedal._device_offload import support_array_api -KMeans.fit = support_usm_ndarray(queue_param=False)(KMeans.fit) -KMeans.fit_predict = support_usm_ndarray(queue_param=False)(KMeans.fit_predict) -KMeans.predict = support_usm_ndarray(queue_param=False)(KMeans.predict) -KMeans.score = support_usm_ndarray(queue_param=False)(KMeans.score) +KMeans.fit = support_array_api(queue_param=False)(KMeans.fit) +KMeans.fit_predict = support_array_api(queue_param=False)(KMeans.fit_predict) +KMeans.predict = support_array_api(queue_param=False)(KMeans.predict) +KMeans.score = support_array_api(queue_param=False)(KMeans.score) diff --git a/sklearnex/linear_model/coordinate_descent.py b/sklearnex/linear_model/coordinate_descent.py index e7a4b5861e..f6d03da3af 100644 --- a/sklearnex/linear_model/coordinate_descent.py +++ b/sklearnex/linear_model/coordinate_descent.py @@ -15,12 +15,12 @@ # =============================================================================== from daal4py.sklearn.linear_model import ElasticNet, Lasso -from onedal._device_offload import support_usm_ndarray +from onedal._device_offload import support_array_api -ElasticNet.fit = support_usm_ndarray(queue_param=False)(ElasticNet.fit) -ElasticNet.predict = support_usm_ndarray(queue_param=False)(ElasticNet.predict) -ElasticNet.score = support_usm_ndarray(queue_param=False)(ElasticNet.score) +ElasticNet.fit = support_array_api(queue_param=False)(ElasticNet.fit) +ElasticNet.predict = support_array_api(queue_param=False)(ElasticNet.predict) +ElasticNet.score = support_array_api(queue_param=False)(ElasticNet.score) -Lasso.fit = support_usm_ndarray(queue_param=False)(Lasso.fit) -Lasso.predict = support_usm_ndarray(queue_param=False)(Lasso.predict) -Lasso.score = support_usm_ndarray(queue_param=False)(Lasso.score) +Lasso.fit = support_array_api(queue_param=False)(Lasso.fit) +Lasso.predict = support_array_api(queue_param=False)(Lasso.predict) +Lasso.score = support_array_api(queue_param=False)(Lasso.score) diff --git a/sklearnex/linear_model/ridge.py b/sklearnex/linear_model/ridge.py index 096983564e..cbe217ca1b 100644 --- a/sklearnex/linear_model/ridge.py +++ b/sklearnex/linear_model/ridge.py @@ -15,10 +15,10 @@ # =============================================================================== from daal4py.sklearn.linear_model import Ridge -from onedal._device_offload import support_usm_ndarray +from onedal._device_offload import support_array_api # TODO: # implement GPU offloading via onedal4py backend. -Ridge.fit = support_usm_ndarray(queue_param=False)(Ridge.fit) -Ridge.predict = support_usm_ndarray(queue_param=False)(Ridge.predict) -Ridge.score = support_usm_ndarray(queue_param=False)(Ridge.score) +Ridge.fit = support_array_api(queue_param=False)(Ridge.fit) +Ridge.predict = support_array_api(queue_param=False)(Ridge.predict) +Ridge.score = support_array_api(queue_param=False)(Ridge.score) diff --git a/sklearnex/manifold/t_sne.py b/sklearnex/manifold/t_sne.py index 3e5c99f43f..caced17934 100755 --- a/sklearnex/manifold/t_sne.py +++ b/sklearnex/manifold/t_sne.py @@ -15,7 +15,7 @@ # =============================================================================== from daal4py.sklearn.manifold import TSNE -from onedal._device_offload import support_usm_ndarray +from onedal._device_offload import support_array_api -TSNE.fit = support_usm_ndarray(queue_param=False)(TSNE.fit) -TSNE.fit_transform = support_usm_ndarray(queue_param=False)(TSNE.fit_transform) +TSNE.fit = support_array_api(queue_param=False)(TSNE.fit) +TSNE.fit_transform = support_array_api(queue_param=False)(TSNE.fit_transform) diff --git a/sklearnex/metrics/pairwise.py b/sklearnex/metrics/pairwise.py index 041e3b5b8d..2997e301c4 100755 --- a/sklearnex/metrics/pairwise.py +++ b/sklearnex/metrics/pairwise.py @@ -15,8 +15,8 @@ # =============================================================================== from daal4py.sklearn.metrics import pairwise_distances -from onedal._device_offload import support_usm_ndarray +from onedal._device_offload import support_array_api -pairwise_distances = support_usm_ndarray(freefunc=True, queue_param=False)( +pairwise_distances = support_array_api(freefunc=True, queue_param=False)( pairwise_distances ) diff --git a/sklearnex/metrics/ranking.py b/sklearnex/metrics/ranking.py index b282bb3f94..624be59f1b 100755 --- a/sklearnex/metrics/ranking.py +++ b/sklearnex/metrics/ranking.py @@ -15,6 +15,6 @@ # =============================================================================== from daal4py.sklearn.metrics import roc_auc_score -from onedal._device_offload import support_usm_ndarray +from onedal._device_offload import support_array_api -roc_auc_score = support_usm_ndarray(freefunc=True, queue_param=False)(roc_auc_score) +roc_auc_score = support_array_api(freefunc=True, queue_param=False)(roc_auc_score) diff --git a/sklearnex/model_selection/split.py b/sklearnex/model_selection/split.py index db20ca17b8..de70768a06 100755 --- a/sklearnex/model_selection/split.py +++ b/sklearnex/model_selection/split.py @@ -15,6 +15,6 @@ # =============================================================================== from daal4py.sklearn.model_selection import train_test_split -from onedal._device_offload import support_usm_ndarray +from onedal._device_offload import support_array_api -train_test_split = support_usm_ndarray(freefunc=True, queue_param=False)(train_test_split) +train_test_split = support_array_api(freefunc=True, queue_param=False)(train_test_split) From 459e6387fe9a1a74f0467e94ad5f8fc18c68cce7 Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Sat, 15 Jun 2024 07:40:29 -0700 Subject: [PATCH 19/82] minor update for onedal/_device_offload.py --- onedal/_device_offload.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/onedal/_device_offload.py b/onedal/_device_offload.py index a2d5f3c5dc..0c5ffc37b0 100644 --- a/onedal/_device_offload.py +++ b/onedal/_device_offload.py @@ -166,7 +166,10 @@ def _extract_array_attr(*args, **kwargs): if len(allargs) == 0: return None, None, None usm_iface = getattr(allargs[0], "__sycl_usm_array_interface__", None) - array_api = getattr(allargs[0], "__array_namespace__", None) + array_api = None + if hasattr(allargs[0], "__array_namespace__", None): + array_api = getattr(allargs[0], "__array_namespace__", None)() + dlpack_device = getattr(allargs[0], "__dlpack_device__", None) return usm_iface, array_api, dlpack_device @@ -203,6 +206,8 @@ def wrapper_impl(obj, *args, **kwargs): result = _copy_to_usm(data_queue, result) if dpnp_available and len(args) > 0 and isinstance(args[0], dpnp.ndarray): result = _convert_to_dpnp(result) + # TODO: + # add exception for numpy. elif array_api: # TODO: # avoid for numpy From 75f9c4beeb1f3500ccd616e2595c43bb02a796d6 Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Tue, 18 Jun 2024 03:33:26 -0700 Subject: [PATCH 20/82] minor updates --- onedal/_device_offload.py | 38 ++++++++++++++++++++--- onedal/tests/utils/_dataframes_support.py | 13 ++++---- sklearnex/_device_offload.py | 19 +++++++++--- 3 files changed, 54 insertions(+), 16 deletions(-) diff --git a/onedal/_device_offload.py b/onedal/_device_offload.py index 0c5ffc37b0..75a8b2698f 100644 --- a/onedal/_device_offload.py +++ b/onedal/_device_offload.py @@ -126,6 +126,7 @@ def _transfer_to_host(queue, *data): elif array_api is not None: # TODO: # get info about the device, for backward conversions. + item._array = item._array.copy() item = np.from_dlpack(item).copy() has_host_data = True else: @@ -167,10 +168,12 @@ def _extract_array_attr(*args, **kwargs): return None, None, None usm_iface = getattr(allargs[0], "__sycl_usm_array_interface__", None) array_api = None - if hasattr(allargs[0], "__array_namespace__", None): + dlpack_device = None + if hasattr(allargs[0], "__array_namespace__"): array_api = getattr(allargs[0], "__array_namespace__", None)() - dlpack_device = getattr(allargs[0], "__dlpack_device__", None) + if hasattr(allargs[0], "__dlpack_device__"): + dlpack_device = getattr(allargs[0], "__dlpack_device__", None) return usm_iface, array_api, dlpack_device @@ -192,12 +195,31 @@ def _convert_to_dpnp(array): return array +def _from_dlpack(data, xp, *args, **kwargs): + def _one_from_dlpack(data, xp, *args, **kwargs): + return xp.from_dlpack(data, *args, **kwargs) + + if isinstance(data, Iterable): + for i in range(len(data)): + data[i] = _one_from_dlpack(data[i], xp, *args, **kwargs) + return data + return _one_from_dlpack(data, xp, *args, **kwargs) + + +def _is_numpy_namespace(xp): + """Return True if xp is backed by NumPy.""" + print("\n_is_numpy_namespace call") + return xp.__name__ in {"numpy", "array_api_compat.numpy", "numpy.array_api"} + + # TODO: # rename support_array_api def support_array_api(freefunc=False, queue_param=True): def decorator(func): def wrapper_impl(obj, *args, **kwargs): - usm_iface, array_api, dlpack_device = _extract_array_attr(*args, **kwargs) + usm_iface, input_array_api, input_dlpack_device = _extract_array_attr( + *args, **kwargs + ) data_queue, hostargs, hostkwargs = _get_host_inputs(*args, **kwargs) if queue_param: hostkwargs["queue"] = data_queue @@ -208,10 +230,16 @@ def wrapper_impl(obj, *args, **kwargs): result = _convert_to_dpnp(result) # TODO: # add exception for numpy. - elif array_api: + elif ( + input_array_api + and not _is_numpy_namespace(input_array_api) + and hasattr(result, "__array_namespace__") + ): # TODO: # avoid for numpy - result = array_api.from_dlpack(result, copy=True, device=dlpack_device) + result = _from_dlpack( + result, input_array_api, copy=True, device=input_dlpack_device + ) return result if freefunc: diff --git a/onedal/tests/utils/_dataframes_support.py b/onedal/tests/utils/_dataframes_support.py index cfc40ae021..51d167e14a 100644 --- a/onedal/tests/utils/_dataframes_support.py +++ b/onedal/tests/utils/_dataframes_support.py @@ -57,7 +57,7 @@ def get_dataframes_and_queues( - dataframe_filter_="numpy,pandas,dpnp,dpctl", device_filter_="cpu,gpu" + dataframe_filter_="numpy,pandas,dpnp,dpctl,array_api", device_filter_="cpu,gpu" ): dataframes_and_queues = [] @@ -129,10 +129,11 @@ def _convert_to_dataframe(obj, sycl_queue=None, target_df=None, *args, **kwargs) # standard, but maintaining data on a device # using the method `from_dlpack` is. xp = array_api_modules[target_df] - return xp.from_dlpack( - _convert_to_dataframe( - obj, sycl_queue=sycl_queue, target_df="dpctl", *args, **kwargs - ) - ) + # return xp.from_dlpack( + # _convert_to_dataframe( + # obj, sycl_queue=sycl_queue, target_df="dpctl", *args, **kwargs + # ) + # ) + return xp.from_dlpack(obj) raise RuntimeError("Unsupported dataframe conversion") diff --git a/sklearnex/_device_offload.py b/sklearnex/_device_offload.py index e1a1682e43..8956519b99 100644 --- a/sklearnex/_device_offload.py +++ b/sklearnex/_device_offload.py @@ -20,13 +20,15 @@ from onedal._device_offload import ( _copy_to_usm, _extract_array_attr, + _from_dlpack, _get_global_queue, + _is_numpy_namespace, _transfer_to_host, dpnp_available, ) if dpnp_available: - from onedal._device_offload import _convert_to_dpnp + from onedal._device_offload import _convert_to_dpnp, _from_dlpack try: from dpctl.tensor import usm_ndarray @@ -94,6 +96,9 @@ def dispatch(obj, method_name, branches, *args, **kwargs): ) +# TODO: +# support input data +# wrap output data def wrap_output_data(func): @wraps(func) def wrapper(self, *args, **kwargs): @@ -104,10 +109,14 @@ def wrapper(self, *args, **kwargs): result = _copy_to_usm(usm_iface["syclobj"], result) if dpnp_available and isinstance(data[0], dpnp.ndarray): result = _convert_to_dpnp(result) - elif array_api: - # TODO: - # avoid for numpy - result = array_api.from_dlpack(result, copy=True, device=dlpack_device) + # TODO: + # update condition + elif ( + array_api + and not _is_numpy_namespace(array_api) + and hasattr(result, "__array_namespace__") + ): + result = _from_dlpack(result, array_api, copy=True, device=dlpack_device) return result return wrapper From 27b7a7c2ae51431a514b7a11cf91ad0d21bee7b3 Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Tue, 18 Jun 2024 05:37:18 -0700 Subject: [PATCH 21/82] update docstrings for onedal._config._get_config --- onedal/_config.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/onedal/_config.py b/onedal/_config.py index 4992dfd8d2..4159b291e4 100644 --- a/onedal/_config.py +++ b/onedal/_config.py @@ -32,17 +32,15 @@ def _get_onedal_threadlocal_config(): return _threadlocal.global_config -# TODO: -# docstrings def _get_config(): - """Retrieve current values for configuration set by :func:`set_config` + """Retrieve current values for configuration set + by :func:`sklearnex.set_config` Returns ------- config : dict - Keys are parameter names that can be passed to :func:`set_config`. - See Also - -------- - _set_config : Set global configuration. + Keys are parameter names `target_offload` and + `allow_fallback_to_host` that can be passed + to :func:`sklearnex.set_config`. """ onedal_config = _get_onedal_threadlocal_config().copy() return {**onedal_config} From dec939e371485ef2799b08e51b6af244ca45df50 Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Wed, 19 Jun 2024 03:36:53 -0700 Subject: [PATCH 22/82] reverted changes for LogReg refactoring moved refactoring into spererate #1877 --- sklearnex/linear_model/logistic_regression.py | 22 ++++++++++--------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/sklearnex/linear_model/logistic_regression.py b/sklearnex/linear_model/logistic_regression.py index f981282826..6e1883f87f 100644 --- a/sklearnex/linear_model/logistic_regression.py +++ b/sklearnex/linear_model/logistic_regression.py @@ -21,6 +21,18 @@ from daal4py.sklearn.linear_model.logistic_path import ( LogisticRegression as LogisticRegression_daal4py, ) +from daal4py.sklearn.linear_model.logistic_path import daal4py_fit, daal4py_predict + + +class BaseLogisticRegression(ABC): + def _save_attributes(self): + assert hasattr(self, "_onedal_estimator") + self.classes_ = self._onedal_estimator.classes_ + self.coef_ = self._onedal_estimator.coef_ + self.intercept_ = self._onedal_estimator.intercept_ + self.n_features_in_ = self._onedal_estimator.n_features_in_ + self.n_iter_ = self._onedal_estimator.n_iter_ + if daal_check_version((2024, "P", 1)): import numpy as np @@ -32,7 +44,6 @@ from daal4py.sklearn._n_jobs_support import control_n_jobs from daal4py.sklearn._utils import sklearn_check_version - from daal4py.sklearn.linear_model.logistic_path import daal4py_fit, daal4py_predict from onedal.linear_model import LogisticRegression as onedal_LogisticRegression from onedal.utils import _num_samples @@ -40,15 +51,6 @@ from .._utils import PatchingConditionsChain, get_patch_message from ..utils.validation import _assert_all_finite - class BaseLogisticRegression(ABC): - def _save_attributes(self): - assert hasattr(self, "_onedal_estimator") - self.classes_ = self._onedal_estimator.classes_ - self.coef_ = self._onedal_estimator.coef_ - self.intercept_ = self._onedal_estimator.intercept_ - self.n_features_in_ = self._onedal_estimator.n_features_in_ - self.n_iter_ = self._onedal_estimator.n_iter_ - @control_n_jobs( decorated_methods=[ "fit", From 38cc61e319802fd50bdaff6094131040837f680e Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Wed, 19 Jun 2024 06:38:33 -0700 Subject: [PATCH 23/82] using _get_config instead of _get_onedal_threadlocal_config --- onedal/_config.py | 11 +++++++++-- sklearnex/_config.py | 6 +++--- sklearnex/tests/test_config.py | 1 - 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/onedal/_config.py b/onedal/_config.py index 4159b291e4..9015ee4b3a 100644 --- a/onedal/_config.py +++ b/onedal/_config.py @@ -32,9 +32,14 @@ def _get_onedal_threadlocal_config(): return _threadlocal.global_config -def _get_config(): +def _get_config(copy=True): """Retrieve current values for configuration set by :func:`sklearnex.set_config` + Parameters + ---------- + copy : bool, default=True + If False, the values ​​of the global config are returned, + which can further be overwritten. Returns ------- config : dict @@ -42,5 +47,7 @@ def _get_config(): `allow_fallback_to_host` that can be passed to :func:`sklearnex.set_config`. """ - onedal_config = _get_onedal_threadlocal_config().copy() + onedal_config = _get_onedal_threadlocal_config() + if copy: + onedal_config = onedal_config.copy() return {**onedal_config} diff --git a/sklearnex/_config.py b/sklearnex/_config.py index 10b3e4b7b5..bbab9be2bb 100644 --- a/sklearnex/_config.py +++ b/sklearnex/_config.py @@ -19,7 +19,7 @@ from sklearn import get_config as skl_get_config from sklearn import set_config as skl_set_config -from onedal._config import _get_onedal_threadlocal_config +from onedal._config import _get_config as onedal_get_config def get_config(): @@ -34,7 +34,7 @@ def get_config(): set_config : Set global configuration. """ sklearn = skl_get_config() - sklearnex = _get_onedal_threadlocal_config().copy() + sklearnex = onedal_get_config() return {**sklearn, **sklearnex} @@ -58,7 +58,7 @@ def set_config(target_offload=None, allow_fallback_to_host=None, **sklearn_confi """ skl_set_config(**sklearn_configs) - local_config = _get_onedal_threadlocal_config() + local_config = onedal_get_config(copy=False) if target_offload is not None: local_config["target_offload"] = target_offload diff --git a/sklearnex/tests/test_config.py b/sklearnex/tests/test_config.py index eb714a806f..5ff30587d7 100644 --- a/sklearnex/tests/test_config.py +++ b/sklearnex/tests/test_config.py @@ -17,7 +17,6 @@ import sklearn import onedal -import onedal._config import sklearnex From cea22acb244fddbd35b1b5fb8e9b10c8c8194e79 Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Wed, 19 Jun 2024 13:32:33 -0700 Subject: [PATCH 24/82] minor fix of _get_config --- onedal/_config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onedal/_config.py b/onedal/_config.py index 9015ee4b3a..8c93929d87 100644 --- a/onedal/_config.py +++ b/onedal/_config.py @@ -50,4 +50,4 @@ def _get_config(copy=True): onedal_config = _get_onedal_threadlocal_config() if copy: onedal_config = onedal_config.copy() - return {**onedal_config} + return onedal_config From 1bb3697b4f93c431756064e5e389b41efd94d4d8 Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Thu, 20 Jun 2024 00:58:15 -0700 Subject: [PATCH 25/82] removed TODO, that covered by ticket --- onedal/_device_offload.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/onedal/_device_offload.py b/onedal/_device_offload.py index 75a8b2698f..01c427c4dc 100644 --- a/onedal/_device_offload.py +++ b/onedal/_device_offload.py @@ -48,8 +48,6 @@ def __init__(self, filter_string): self._filter_string = filter_string self.is_cpu = "cpu" in filter_string self.is_gpu = "gpu" in filter_string - # TODO: check for possibility of fp64 support - # on other devices in this dummy class self.has_aspect_fp64 = self.is_cpu if not (self.is_cpu): From 4179f22b5671b9ff83baa4d2588556e791459c58 Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Fri, 21 Jun 2024 10:24:11 +0200 Subject: [PATCH 26/82] added todo comments --- onedal/_device_offload.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/onedal/_device_offload.py b/onedal/_device_offload.py index 01c427c4dc..a71b100b64 100644 --- a/onedal/_device_offload.py +++ b/onedal/_device_offload.py @@ -180,7 +180,8 @@ def _run_on_device(func, obj=None, *args, **kwargs): return func(obj, *args, **kwargs) return func(*args, **kwargs) - +# TODO: +# move to array api module if dpnp_available: # TODO: # will be moved to _arrary_api module @@ -192,7 +193,8 @@ def _convert_to_dpnp(array): array[i] = _convert_to_dpnp(array[i]) return array - +# TODO: +# move to array api module def _from_dlpack(data, xp, *args, **kwargs): def _one_from_dlpack(data, xp, *args, **kwargs): return xp.from_dlpack(data, *args, **kwargs) @@ -203,7 +205,8 @@ def _one_from_dlpack(data, xp, *args, **kwargs): return data return _one_from_dlpack(data, xp, *args, **kwargs) - +# TODO: +# move to array api module def _is_numpy_namespace(xp): """Return True if xp is backed by NumPy.""" print("\n_is_numpy_namespace call") From 694d94b9070638c8b6b3d4463ab77a0a44f37dbd Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Thu, 20 Jun 2024 06:05:49 -0700 Subject: [PATCH 27/82] moved out from _DataParallelInteropPolicy init import of DummySyclQueue --- onedal/common/_policy.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/onedal/common/_policy.py b/onedal/common/_policy.py index 9928bbbaf5..90705854f6 100644 --- a/onedal/common/_policy.py +++ b/onedal/common/_policy.py @@ -48,12 +48,11 @@ def __init__(self): if _is_dpc_backend: + from onedal._device_offload import DummySyclQueue class _DataParallelInteropPolicy(_backend.data_parallel_policy): def __init__(self, queue): self._queue = queue - from onedal._device_offload import DummySyclQueue - if isinstance(queue, DummySyclQueue): super().__init__(self._queue.sycl_device.get_filter_string()) return From 8bf25852efa81e999de1cda865c25e0c08ab4379 Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Thu, 20 Jun 2024 06:06:44 -0700 Subject: [PATCH 28/82] removed outdated comment; will be covered on #1813 --- onedal/_device_offload.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/onedal/_device_offload.py b/onedal/_device_offload.py index a71b100b64..a4af85f938 100644 --- a/onedal/_device_offload.py +++ b/onedal/_device_offload.py @@ -183,8 +183,7 @@ def _run_on_device(func, obj=None, *args, **kwargs): # TODO: # move to array api module if dpnp_available: - # TODO: - # will be moved to _arrary_api module + def _convert_to_dpnp(array): if isinstance(array, usm_ndarray): return dpnp.array(array, copy=False) From e903fbbba20d8b4c7ac4bff620b2ab1e05d5ac8a Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Thu, 20 Jun 2024 07:59:19 -0700 Subject: [PATCH 29/82] removed TODO comment from ridge.py the comment addressed on #1843 --- sklearnex/linear_model/ridge.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/sklearnex/linear_model/ridge.py b/sklearnex/linear_model/ridge.py index cbe217ca1b..0352fdea9b 100644 --- a/sklearnex/linear_model/ridge.py +++ b/sklearnex/linear_model/ridge.py @@ -17,8 +17,6 @@ from daal4py.sklearn.linear_model import Ridge from onedal._device_offload import support_array_api -# TODO: -# implement GPU offloading via onedal4py backend. -Ridge.fit = support_array_api(queue_param=False)(Ridge.fit) -Ridge.predict = support_array_api(queue_param=False)(Ridge.predict) -Ridge.score = support_array_api(queue_param=False)(Ridge.score) +Ridge.fit = support_usm_ndarray(queue_param=False)(Ridge.fit) +Ridge.predict = support_usm_ndarray(queue_param=False)(Ridge.predict) +Ridge.score = support_usm_ndarray(queue_param=False)(Ridge.score) From 1491b389af0daa5cd4f61a73f06c93948d7b443c Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Fri, 21 Jun 2024 16:01:35 -0700 Subject: [PATCH 30/82] Added ElasticNet, Lasso, Ridge into sklearnex patching map remove unnecessary docstrings assignments --- daal4py/sklearn/cluster/k_means.py | 3 -- .../linear_model/_coordinate_descent.py | 6 --- daal4py/sklearn/linear_model/_ridge.py | 3 -- sklearnex/dispatcher.py | 42 +++++++++++++++++++ 4 files changed, 42 insertions(+), 12 deletions(-) diff --git a/daal4py/sklearn/cluster/k_means.py b/daal4py/sklearn/cluster/k_means.py index 68075efddf..103318004e 100755 --- a/daal4py/sklearn/cluster/k_means.py +++ b/daal4py/sklearn/cluster/k_means.py @@ -592,9 +592,6 @@ def predict( def fit_predict(self, X, y=None, sample_weight=None): return super().fit_predict(X, y, sample_weight) - score = KMeans_original.score - fit.__doc__ = KMeans_original.fit.__doc__ predict.__doc__ = KMeans_original.predict.__doc__ fit_predict.__doc__ = KMeans_original.fit_predict.__doc__ - score.__doc__ = KMeans_original.score.__doc__ diff --git a/daal4py/sklearn/linear_model/_coordinate_descent.py b/daal4py/sklearn/linear_model/_coordinate_descent.py index 4f46afb30a..081d5652d5 100755 --- a/daal4py/sklearn/linear_model/_coordinate_descent.py +++ b/daal4py/sklearn/linear_model/_coordinate_descent.py @@ -732,11 +732,8 @@ def dual_gap_(self, value): def dual_gap_(self): self._gap = None - score = ElasticNet_original.score - fit.__doc__ = ElasticNet_original.fit.__doc__ predict.__doc__ = ElasticNet_original.predict.__doc__ - score.__doc__ = ElasticNet_original.score.__doc__ @control_n_jobs(decorated_methods=["fit", "predict"]) @@ -847,8 +844,5 @@ def dual_gap_(self, value): def dual_gap_(self): self._gap = None - score = Lasso_original.score - fit.__doc__ = Lasso_original.fit.__doc__ predict.__doc__ = Lasso_original.predict.__doc__ - score.__doc__ = Lasso_original.score.__doc__ diff --git a/daal4py/sklearn/linear_model/_ridge.py b/daal4py/sklearn/linear_model/_ridge.py index 7fc890895f..037f458407 100644 --- a/daal4py/sklearn/linear_model/_ridge.py +++ b/daal4py/sklearn/linear_model/_ridge.py @@ -319,8 +319,5 @@ def fit(self, X, y, sample_weight=None): def predict(self, X): return _predict_ridge(self, X) - score = Ridge_original.score - fit.__doc__ = Ridge_original.fit.__doc__ predict.__doc__ = Ridge_original.predict.__doc__ - score.__doc__ = Ridge_original.score.__doc__ diff --git a/sklearnex/dispatcher.py b/sklearnex/dispatcher.py index 3fa5a7199a..8ac86f2539 100644 --- a/sklearnex/dispatcher.py +++ b/sklearnex/dispatcher.py @@ -141,11 +141,14 @@ def get_patch_map_core(preview=False): from .ensemble import ExtraTreesRegressor as ExtraTreesRegressor_sklearnex from .ensemble import RandomForestClassifier as RandomForestClassifier_sklearnex from .ensemble import RandomForestRegressor as RandomForestRegressor_sklearnex + from .linear_model import ElasticNet as ElasticNet_sklearnex from .linear_model import ( IncrementalLinearRegression as IncrementalLinearRegression_sklearnex, ) + from .linear_model import Lasso as Lasso_sklearnex from .linear_model import LinearRegression as LinearRegression_sklearnex from .linear_model import LogisticRegression as LogisticRegression_sklearnex + from .linear_model import Ridge as Ridge_sklearnex from .manifold import TSNE as TSNE_sklearnex from .metrics import pairwise_distances as pairwise_distances_sklearnex from .metrics import roc_auc_score as roc_auc_score_sklearnex @@ -175,6 +178,32 @@ def get_patch_map_core(preview=False): mapping["nusvr"] = [[(svm_module, "NuSVR", NuSVR_sklearnex), None]] mapping["nusvc"] = [[(svm_module, "NuSVC", NuSVC_sklearnex), None]] + # ElasticNet + mapping.pop("elasticnet") + mapping["elasticnet"] = [ + [ + ( + linear_model_module, + "ElasticNet", + ElasticNet_sklearnex, + ), + None, + ] + ] + + # Lasso + mapping.pop("lasso") + mapping["lasso"] = [ + [ + ( + linear_model_module, + "Lasso", + Lasso_sklearnex, + ), + None, + ] + ] + # Linear Regression mapping.pop("linear") mapping.pop("linearregression") @@ -208,6 +237,19 @@ def get_patch_map_core(preview=False): ] mapping["logisticregression"] = mapping["log_reg"] + # Ridge + mapping.pop("ridge") + mapping["ridge"] = [ + [ + ( + linear_model_module, + "Ridge", + Ridge_sklearnex, + ), + None, + ] + ] + # manifold mapping.pop("tsne") mapping["tsne"] = [ From 3dd5000d8459a5cbcb9bcdec695b2a70a6ee9193 Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Thu, 27 Jun 2024 06:15:47 -0700 Subject: [PATCH 31/82] lint --- onedal/_device_offload.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/onedal/_device_offload.py b/onedal/_device_offload.py index a4af85f938..02c511df0a 100644 --- a/onedal/_device_offload.py +++ b/onedal/_device_offload.py @@ -180,6 +180,7 @@ def _run_on_device(func, obj=None, *args, **kwargs): return func(obj, *args, **kwargs) return func(*args, **kwargs) + # TODO: # move to array api module if dpnp_available: @@ -192,6 +193,7 @@ def _convert_to_dpnp(array): array[i] = _convert_to_dpnp(array[i]) return array + # TODO: # move to array api module def _from_dlpack(data, xp, *args, **kwargs): @@ -204,6 +206,7 @@ def _one_from_dlpack(data, xp, *args, **kwargs): return data return _one_from_dlpack(data, xp, *args, **kwargs) + # TODO: # move to array api module def _is_numpy_namespace(xp): From adc6e68fae32da9194653906bdecd159a17b0871 Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Thu, 27 Jun 2024 07:10:19 -0700 Subject: [PATCH 32/82] removed debug print --- onedal/_device_offload.py | 1 - 1 file changed, 1 deletion(-) diff --git a/onedal/_device_offload.py b/onedal/_device_offload.py index 02c511df0a..8a040bedef 100644 --- a/onedal/_device_offload.py +++ b/onedal/_device_offload.py @@ -211,7 +211,6 @@ def _one_from_dlpack(data, xp, *args, **kwargs): # move to array api module def _is_numpy_namespace(xp): """Return True if xp is backed by NumPy.""" - print("\n_is_numpy_namespace call") return xp.__name__ in {"numpy", "array_api_compat.numpy", "numpy.array_api"} From 652d014b9c31c871ee6a8456878ecd222f668129 Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Thu, 27 Jun 2024 07:12:52 -0700 Subject: [PATCH 33/82] enabled more array api test --- sklearnex/tests/test_memory_usage.py | 2 +- sklearnex/tests/test_run_to_run_stability.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/sklearnex/tests/test_memory_usage.py b/sklearnex/tests/test_memory_usage.py index b072fd7814..e2318897a6 100644 --- a/sklearnex/tests/test_memory_usage.py +++ b/sklearnex/tests/test_memory_usage.py @@ -259,7 +259,7 @@ def _kfold_function_template(estimator, dataframe, data_shape, queue=None, func= @pytest.mark.parametrize("order", ["F", "C"]) @pytest.mark.parametrize( - "dataframe,queue", get_dataframes_and_queues("numpy,pandas,dpctl", "cpu") + "dataframe,queue", get_dataframes_and_queues("numpy,pandas,dpctl,array_api", "cpu") ) @pytest.mark.parametrize("estimator", CPU_ESTIMATORS.keys()) @pytest.mark.parametrize("data_shape", data_shapes) diff --git a/sklearnex/tests/test_run_to_run_stability.py b/sklearnex/tests/test_run_to_run_stability.py index 9e4a670fdf..796d78022d 100755 --- a/sklearnex/tests/test_run_to_run_stability.py +++ b/sklearnex/tests/test_run_to_run_stability.py @@ -143,7 +143,7 @@ def _run_test(estimator, method, datasets): ) -@pytest.mark.parametrize("dataframe, queue", get_dataframes_and_queues("numpy")) +@pytest.mark.parametrize("dataframe, queue", get_dataframes_and_queues("numpy,array_api")) @pytest.mark.parametrize("estimator, method", gen_models_info(PATCHED_MODELS)) def test_standard_estimator_stability(estimator, method, dataframe, queue): if estimator in ["LogisticRegression", "TSNE"]: @@ -166,7 +166,7 @@ def test_standard_estimator_stability(estimator, method, dataframe, queue): @pytest.mark.allow_sklearn_fallback -@pytest.mark.parametrize("dataframe, queue", get_dataframes_and_queues("numpy")) +@pytest.mark.parametrize("dataframe, queue", get_dataframes_and_queues("numpy,array_api")) @pytest.mark.parametrize("estimator, method", gen_models_info(SPECIAL_INSTANCES)) def test_special_estimator_stability(estimator, method, dataframe, queue): if queue is None and estimator in ["LogisticRegression(solver='newton-cg')"]: @@ -188,7 +188,7 @@ def test_special_estimator_stability(estimator, method, dataframe, queue): _run_test(est, method, datasets) -@pytest.mark.parametrize("dataframe, queue", get_dataframes_and_queues("numpy")) +@pytest.mark.parametrize("dataframe, queue", get_dataframes_and_queues("numpy,array_api")) @pytest.mark.parametrize("estimator, method", gen_models_info(SPARSE_INSTANCES)) def test_sparse_estimator_stability(estimator, method, dataframe, queue): if "KMeans" in estimator and method == "score" and queue == None: @@ -210,7 +210,7 @@ def test_sparse_estimator_stability(estimator, method, dataframe, queue): _run_test(est, method, datasets) -@pytest.mark.parametrize("dataframe, queue", get_dataframes_and_queues("numpy")) +@pytest.mark.parametrize("dataframe, queue", get_dataframes_and_queues("numpy,array_api")) @pytest.mark.parametrize("estimator, method", gen_models_info(STABILITY_INSTANCES)) def test_other_estimator_stability(estimator, method, dataframe, queue): if "KMeans" in estimator and method == "score" and queue == None: From ad4275044b23dc48388111b1c6264732ac5ecc8c Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Tue, 16 Jul 2024 03:34:14 -0700 Subject: [PATCH 34/82] created seperate array api module --- onedal/_device_offload.py | 40 +++-------------------- onedal/utils/_array_api.py | 61 ++++++++++++++++++++++++++++++++++++ sklearnex/_device_offload.py | 6 ++-- 3 files changed, 69 insertions(+), 38 deletions(-) create mode 100644 onedal/utils/_array_api.py diff --git a/onedal/_device_offload.py b/onedal/_device_offload.py index 8a040bedef..8c1fba7d55 100644 --- a/onedal/_device_offload.py +++ b/onedal/_device_offload.py @@ -22,6 +22,8 @@ from ._config import _get_config +from .utils._array_api import _from_dlpack, _is_numpy_namespace + try: from dpctl import SyclQueue from dpctl.memory import MemoryUSMDevice, as_usm_memory @@ -38,6 +40,9 @@ except ImportError: dpnp_available = False +if dpnp_available: + from .utils._array_api import _convert_to_dpnp + class DummySyclQueue: """This class is designed to act like dpctl.SyclQueue @@ -181,41 +186,6 @@ def _run_on_device(func, obj=None, *args, **kwargs): return func(*args, **kwargs) -# TODO: -# move to array api module -if dpnp_available: - - def _convert_to_dpnp(array): - if isinstance(array, usm_ndarray): - return dpnp.array(array, copy=False) - elif isinstance(array, Iterable): - for i in range(len(array)): - array[i] = _convert_to_dpnp(array[i]) - return array - - -# TODO: -# move to array api module -def _from_dlpack(data, xp, *args, **kwargs): - def _one_from_dlpack(data, xp, *args, **kwargs): - return xp.from_dlpack(data, *args, **kwargs) - - if isinstance(data, Iterable): - for i in range(len(data)): - data[i] = _one_from_dlpack(data[i], xp, *args, **kwargs) - return data - return _one_from_dlpack(data, xp, *args, **kwargs) - - -# TODO: -# move to array api module -def _is_numpy_namespace(xp): - """Return True if xp is backed by NumPy.""" - return xp.__name__ in {"numpy", "array_api_compat.numpy", "numpy.array_api"} - - -# TODO: -# rename support_array_api def support_array_api(freefunc=False, queue_param=True): def decorator(func): def wrapper_impl(obj, *args, **kwargs): diff --git a/onedal/utils/_array_api.py b/onedal/utils/_array_api.py new file mode 100644 index 0000000000..f9daf7d86b --- /dev/null +++ b/onedal/utils/_array_api.py @@ -0,0 +1,61 @@ +# ============================================================================== +# Copyright 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Tools to support array_api.""" + +from collections.abc import Iterable + +try: + from dpctl.tensor import usm_ndarray + + dpctl_available = True +except ImportError: + dpctl_available = False + +try: + import dpnp + + dpnp_available = True +except ImportError: + dpnp_available = False + + +if dpnp_available: + import dpnp + + def _convert_to_dpnp(array): + if isinstance(array, usm_ndarray): + return dpnp.array(array, copy=False) + elif isinstance(array, Iterable): + for i in range(len(array)): + array[i] = _convert_to_dpnp(array[i]) + return array + + +def _from_dlpack(data, xp, *args, **kwargs): + def _one_from_dlpack(data, xp, *args, **kwargs): + return xp.from_dlpack(data, *args, **kwargs) + + if isinstance(data, Iterable): + for i in range(len(data)): + data[i] = _one_from_dlpack(data[i], xp, *args, **kwargs) + return data + return _one_from_dlpack(data, xp, *args, **kwargs) + + +def _is_numpy_namespace(xp): + """Return True if xp is backed by NumPy.""" + return xp.__name__ in {"numpy", "array_api_compat.numpy", "numpy.array_api"} diff --git a/sklearnex/_device_offload.py b/sklearnex/_device_offload.py index e7af02070a..6d3ff9e38e 100644 --- a/sklearnex/_device_offload.py +++ b/sklearnex/_device_offload.py @@ -19,16 +19,16 @@ from onedal._device_offload import ( _copy_to_usm, _extract_array_attr, - _from_dlpack, _get_global_queue, - _is_numpy_namespace, _transfer_to_host, dpnp_available, ) +from onedal.utils._array_api import _from_dlpack, _is_numpy_namespace + if dpnp_available: import dpnp - from onedal._device_offload import _convert_to_dpnp, _from_dlpack + from onedal.utils._array_api import _convert_to_dpnp from ._config import get_config From 7bef6c411e45ac119436a81bb9d73421c60093a3 Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Wed, 17 Jul 2024 03:18:16 -0700 Subject: [PATCH 35/82] currently disabled array_api for test_memory_leaks --- sklearnex/tests/test_memory_usage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearnex/tests/test_memory_usage.py b/sklearnex/tests/test_memory_usage.py index e2318897a6..b072fd7814 100644 --- a/sklearnex/tests/test_memory_usage.py +++ b/sklearnex/tests/test_memory_usage.py @@ -259,7 +259,7 @@ def _kfold_function_template(estimator, dataframe, data_shape, queue=None, func= @pytest.mark.parametrize("order", ["F", "C"]) @pytest.mark.parametrize( - "dataframe,queue", get_dataframes_and_queues("numpy,pandas,dpctl,array_api", "cpu") + "dataframe,queue", get_dataframes_and_queues("numpy,pandas,dpctl", "cpu") ) @pytest.mark.parametrize("estimator", CPU_ESTIMATORS.keys()) @pytest.mark.parametrize("data_shape", data_shapes) From 88e2a22a60c39d41672d35f67d98e00b0970574f Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Wed, 17 Jul 2024 03:19:06 -0700 Subject: [PATCH 36/82] update _convert_to_dataframe --- onedal/tests/utils/_dataframes_support.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/onedal/tests/utils/_dataframes_support.py b/onedal/tests/utils/_dataframes_support.py index 45ecd55713..ef95556fa4 100644 --- a/onedal/tests/utils/_dataframes_support.py +++ b/onedal/tests/utils/_dataframes_support.py @@ -39,6 +39,8 @@ # GPU-no-copy. import array_api_strict + # TODO: + # get this from onedal._config._get_config # Run check if "array_api_dispatch" is configurable array_api_enabled = lambda: get_config()["array_api_dispatch"] array_api_enabled() @@ -164,6 +166,9 @@ def _convert_to_dataframe(obj, sycl_queue=None, target_df=None, *args, **kwargs) # obj, sycl_queue=sycl_queue, target_df="dpctl", *args, **kwargs # ) # ) - return xp.from_dlpack(obj) + if hasattr(obj, "__dlpack__"): + return xp.from_dlpack(obj) + else: + return xp.asarray(obj) raise RuntimeError("Unsupported dataframe conversion") From d8d0dc4d587e1fad898dccaeda96287c901ec5ab Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Wed, 17 Jul 2024 03:33:03 -0700 Subject: [PATCH 37/82] linting --- onedal/_device_offload.py | 2 +- sklearnex/_device_offload.py | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/onedal/_device_offload.py b/onedal/_device_offload.py index 849f1eabab..68f4cd695c 100644 --- a/onedal/_device_offload.py +++ b/onedal/_device_offload.py @@ -21,7 +21,6 @@ import numpy as np from ._config import _get_config - from .utils._array_api import _from_dlpack, _is_numpy_namespace try: @@ -185,6 +184,7 @@ def _run_on_device(func, obj=None, *args, **kwargs): return func(obj, *args, **kwargs) return func(*args, **kwargs) + # TODO: # update docstrings. def support_array_api(freefunc=False, queue_param=True): diff --git a/sklearnex/_device_offload.py b/sklearnex/_device_offload.py index 6d3ff9e38e..b88481e8ee 100644 --- a/sklearnex/_device_offload.py +++ b/sklearnex/_device_offload.py @@ -23,14 +23,12 @@ _transfer_to_host, dpnp_available, ) - from onedal.utils._array_api import _from_dlpack, _is_numpy_namespace if dpnp_available: import dpnp from onedal.utils._array_api import _convert_to_dpnp - from ._config import get_config From 45b920fb49172f317cf46d1696da1fce6191c0eb Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Thu, 18 Jul 2024 01:52:05 -0700 Subject: [PATCH 38/82] update condition for _transfer_to_host --- onedal/_device_offload.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/onedal/_device_offload.py b/onedal/_device_offload.py index 68f4cd695c..7766a2e418 100644 --- a/onedal/_device_offload.py +++ b/onedal/_device_offload.py @@ -125,7 +125,9 @@ def _transfer_to_host(queue, *data): order=order, ) has_usm_data = True - elif array_api is not None: + # TODO: + # update conditions. + elif array_api and not _is_numpy_namespace(array_api): # TODO: # get info about the device, for backward conversions. item._array = item._array.copy() From f74ff13de584f16e2db770e9f9fbacc2ad8c4a45 Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Fri, 26 Jul 2024 02:57:43 -0700 Subject: [PATCH 39/82] fixed for bs and ridge --- onedal/spmd/basic_statistics/basic_statistics.py | 2 +- sklearnex/preview/linear_model/ridge.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/onedal/spmd/basic_statistics/basic_statistics.py b/onedal/spmd/basic_statistics/basic_statistics.py index 5aee935814..9fd00412b5 100644 --- a/onedal/spmd/basic_statistics/basic_statistics.py +++ b/onedal/spmd/basic_statistics/basic_statistics.py @@ -27,6 +27,6 @@ class BasicStatistics(BaseEstimatorSPMD, BasicStatistics_Batch): def compute(self, data, weights=None, queue=None): return super().compute(data, weights=weights, queue=queue) - @support_usm_ndarray() + @support_array_api() def fit(self, data, sample_weight=None, queue=None): return super().fit(data, sample_weight=sample_weight, queue=queue) diff --git a/sklearnex/preview/linear_model/ridge.py b/sklearnex/preview/linear_model/ridge.py index d663055f87..e593e0732c 100644 --- a/sklearnex/preview/linear_model/ridge.py +++ b/sklearnex/preview/linear_model/ridge.py @@ -362,11 +362,11 @@ def _save_attributes(self): else: from daal4py.sklearn.linear_model._ridge import Ridge - from onedal._device_offload import support_usm_ndarray + from onedal._device_offload import support_array_api - Ridge.fit = support_usm_ndarray(queue_param=False)(Ridge.fit) - Ridge.predict = support_usm_ndarray(queue_param=False)(Ridge.predict) - Ridge.score = support_usm_ndarray(queue_param=False)(Ridge.score) + Ridge.fit = support_array_api(queue_param=False)(Ridge.fit) + Ridge.predict = support_array_api(queue_param=False)(Ridge.predict) + Ridge.score = support_array_api(queue_param=False)(Ridge.score) logging.warning( "Preview Ridge requires oneDAL version >= 2024.6 but it was not found" From 0300f72e33561a5625d30f85a7d95b9dbae8b14e Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Thu, 8 Aug 2024 01:55:33 -0700 Subject: [PATCH 40/82] update fallback when array_api_dispatch enabled --- sklearnex/_device_offload.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/sklearnex/_device_offload.py b/sklearnex/_device_offload.py index b88481e8ee..b019aed6d0 100644 --- a/sklearnex/_device_offload.py +++ b/sklearnex/_device_offload.py @@ -74,8 +74,14 @@ def dispatch(obj, method_name, branches, *args, **kwargs): patching_status.write_log(queue=q) return branches[backend](obj, *hostargs, **hostkwargs, queue=q) if backend == "sklearn": - patching_status.write_log() - return branches[backend](obj, *hostargs, **hostkwargs) + if "array_api_dispatch" in get_config() and get_config()["array_api_dispatch"]: + # TODO: + # logs require update for this branch. + patching_status.write_log() + return branches[backend](obj, *args, **kwargs) + else: + patching_status.write_log() + return branches[backend](obj, *hostargs, **hostkwargs) raise RuntimeError( f"Undefined backend {backend} in " f"{obj.__class__.__name__}.{method_name}" ) From 5cae80bdf6ad46bce0add2b9a41cb3ace2bb286f Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Thu, 8 Aug 2024 02:05:40 -0700 Subject: [PATCH 41/82] refactor sklearnex get_namespace usage --- onedal/utils/_array_api.py | 23 +++++++++++++++++ .../covariance/incremental_covariance.py | 2 +- sklearnex/decomposition/pca.py | 2 +- sklearnex/ensemble/_forest.py | 2 +- sklearnex/neighbors/_lof.py | 3 ++- sklearnex/neighbors/common.py | 2 +- sklearnex/svm/nusvc.py | 2 +- sklearnex/svm/svc.py | 2 +- sklearnex/tests/test_memory_usage.py | 3 ++- sklearnex/utils/__init__.py | 3 +-- .../utils/{_namespace.py => _array_api.py} | 25 ++++--------------- 11 files changed, 39 insertions(+), 30 deletions(-) rename sklearnex/utils/{_namespace.py => _array_api.py} (78%) diff --git a/onedal/utils/_array_api.py b/onedal/utils/_array_api.py index f9daf7d86b..09329da03c 100644 --- a/onedal/utils/_array_api.py +++ b/onedal/utils/_array_api.py @@ -59,3 +59,26 @@ def _one_from_dlpack(data, xp, *args, **kwargs): def _is_numpy_namespace(xp): """Return True if xp is backed by NumPy.""" return xp.__name__ in {"numpy", "array_api_compat.numpy", "numpy.array_api"} + + +def _get_sycl_namespace(*arrays): + """Get namespace of sycl arrays.""" + + # sycl support designed to work regardless of array_api_dispatch sklearn global value + sycl_type = {type(x): x for x in arrays if hasattr(x, "__sycl_usm_array_interface__")} + + if len(sycl_type) > 1: + raise ValueError(f"Multiple SYCL types for array inputs: {sycl_type}") + + if sycl_type: + (X,) = sycl_type.values() + + if hasattr(X, "__array_namespace__"): + return sycl_type, X.__array_namespace__(), True + elif dpnp_available and isinstance(X, dpnp.ndarray): + # convert it to dpctl.tensor with namespace. + return sycl_type, dpnp, False + else: + raise ValueError(f"SYCL type not recognized: {sycl_type}") + + return sycl_type, None, False diff --git a/sklearnex/covariance/incremental_covariance.py b/sklearnex/covariance/incremental_covariance.py index 26c5acc054..5b01f67c4d 100644 --- a/sklearnex/covariance/incremental_covariance.py +++ b/sklearnex/covariance/incremental_covariance.py @@ -35,7 +35,7 @@ from .._device_offload import dispatch, wrap_output_data from .._utils import PatchingConditionsChain, register_hyperparameters from ..metrics import pairwise_distances -from ..utils import get_namespace +from ..utils._array_api import get_namespace if sklearn_check_version("1.2"): from sklearn.utils._param_validation import Interval diff --git a/sklearnex/decomposition/pca.py b/sklearnex/decomposition/pca.py index a2353536ff..98f786c31a 100755 --- a/sklearnex/decomposition/pca.py +++ b/sklearnex/decomposition/pca.py @@ -32,7 +32,7 @@ from .._device_offload import dispatch, wrap_output_data from .._utils import PatchingConditionsChain - from ..utils import get_namespace + from ..utils._array_api import get_namespace if sklearn_check_version("1.1") and not sklearn_check_version("1.2"): from sklearn.utils import check_scalar diff --git a/sklearnex/ensemble/_forest.py b/sklearnex/ensemble/_forest.py index 64948b6ef7..4b5a56af53 100644 --- a/sklearnex/ensemble/_forest.py +++ b/sklearnex/ensemble/_forest.py @@ -57,10 +57,10 @@ from onedal.ensemble import RandomForestRegressor as onedal_RandomForestRegressor from onedal.primitives import get_tree_state_cls, get_tree_state_reg from onedal.utils import _num_features, _num_samples -from sklearnex.utils import get_namespace from .._device_offload import dispatch, wrap_output_data from .._utils import PatchingConditionsChain +from ..utils._array_api import get_namespace if sklearn_check_version("1.2"): from sklearn.utils._param_validation import Interval diff --git a/sklearnex/neighbors/_lof.py b/sklearnex/neighbors/_lof.py index 0a9a72ee1e..0d066fc62e 100644 --- a/sklearnex/neighbors/_lof.py +++ b/sklearnex/neighbors/_lof.py @@ -26,7 +26,8 @@ from sklearnex._device_offload import dispatch, wrap_output_data from sklearnex.neighbors.common import KNeighborsDispatchingBase from sklearnex.neighbors.knn_unsupervised import NearestNeighbors -from sklearnex.utils import get_namespace + +from ..utils._array_api import get_namespace @control_n_jobs(decorated_methods=["fit", "_kneighbors"]) diff --git a/sklearnex/neighbors/common.py b/sklearnex/neighbors/common.py index 92ef00250c..7ec58dc960 100644 --- a/sklearnex/neighbors/common.py +++ b/sklearnex/neighbors/common.py @@ -28,7 +28,7 @@ from onedal.utils import _check_array, _num_features, _num_samples from .._utils import PatchingConditionsChain -from ..utils import get_namespace +from ..utils._array_api import get_namespace class KNeighborsDispatchingBase: diff --git a/sklearnex/svm/nusvc.py b/sklearnex/svm/nusvc.py index 421546a203..6c7c465291 100644 --- a/sklearnex/svm/nusvc.py +++ b/sklearnex/svm/nusvc.py @@ -22,9 +22,9 @@ from daal4py.sklearn._n_jobs_support import control_n_jobs from daal4py.sklearn._utils import sklearn_check_version -from sklearnex.utils import get_namespace from .._device_offload import dispatch, wrap_output_data +from ..utils._array_api import get_namespace from ._common import BaseSVC if sklearn_check_version("1.0"): diff --git a/sklearnex/svm/svc.py b/sklearnex/svm/svc.py index 337f44ba4b..04f9d6aa6f 100644 --- a/sklearnex/svm/svc.py +++ b/sklearnex/svm/svc.py @@ -23,10 +23,10 @@ from daal4py.sklearn._n_jobs_support import control_n_jobs from daal4py.sklearn._utils import sklearn_check_version -from sklearnex.utils import get_namespace from .._device_offload import dispatch, wrap_output_data from .._utils import PatchingConditionsChain +from ..utils._array_api import get_namespace from ._common import BaseSVC if sklearn_check_version("1.0"): diff --git a/sklearnex/tests/test_memory_usage.py b/sklearnex/tests/test_memory_usage.py index b072fd7814..cd4c846c8b 100644 --- a/sklearnex/tests/test_memory_usage.py +++ b/sklearnex/tests/test_memory_usage.py @@ -38,7 +38,8 @@ from onedal.tests.utils._device_selection import get_queues, is_dpctl_available from sklearnex import config_context from sklearnex.tests._utils import PATCHED_FUNCTIONS, PATCHED_MODELS, SPECIAL_INSTANCES -from sklearnex.utils import get_namespace + +from ..utils._array_api import get_namespace if _is_dpc_backend: from onedal import _backend diff --git a/sklearnex/utils/__init__.py b/sklearnex/utils/__init__.py index 2523e39330..4c3fe21154 100755 --- a/sklearnex/utils/__init__.py +++ b/sklearnex/utils/__init__.py @@ -14,7 +14,6 @@ # limitations under the License. # =============================================================================== -from ._namespace import get_namespace from .validation import _assert_all_finite -__all__ = ["get_namespace", "_assert_all_finite"] +__all__ = ["_assert_all_finite"] diff --git a/sklearnex/utils/_namespace.py b/sklearnex/utils/_array_api.py similarity index 78% rename from sklearnex/utils/_namespace.py rename to sklearnex/utils/_array_api.py index 2f67737023..f4913ebee2 100644 --- a/sklearnex/utils/_namespace.py +++ b/sklearnex/utils/_array_api.py @@ -17,16 +17,14 @@ import numpy as np from daal4py.sklearn._utils import sklearn_check_version - -from .._device_offload import dpnp_available +from onedal.utils._array_api import _get_sycl_namespace if sklearn_check_version("1.2"): from sklearn.utils._array_api import get_namespace as sklearn_get_namespace -if dpnp_available: - import dpnp - +# TODO: +# update it for supported versions of scikit-learn. def get_namespace(*arrays): """Get namespace of arrays. @@ -74,23 +72,10 @@ def get_namespace(*arrays): True of the arrays are containers that implement the Array API spec. """ - # sycl support designed to work regardless of array_api_dispatch sklearn global value - sycl_type = {type(x): x for x in arrays if hasattr(x, "__sycl_usm_array_interface__")} - - if len(sycl_type) > 1: - raise ValueError(f"Multiple SYCL types for array inputs: {sycl_type}") + sycl_type, xp, is_array_api_compliant = _get_sycl_namespace(*arrays) if sycl_type: - - (X,) = sycl_type.values() - - if hasattr(X, "__array_namespace__"): - return X.__array_namespace__(), True - elif dpnp_available and isinstance(X, dpnp.ndarray): - return dpnp, False - else: - raise ValueError(f"SYCL type not recognized: {sycl_type}") - + return xp, is_array_api_compliant elif sklearn_check_version("1.2"): return sklearn_get_namespace(*arrays) else: From ac3aacb02e5af94980c7cfb301336bad2869ae46 Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Thu, 8 Aug 2024 02:28:04 -0700 Subject: [PATCH 42/82] updated array apiconditions for get_dataframes_and_queues --- onedal/tests/utils/_dataframes_support.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/onedal/tests/utils/_dataframes_support.py b/onedal/tests/utils/_dataframes_support.py index 16a46fcc4d..a59e4683ee 100644 --- a/onedal/tests/utils/_dataframes_support.py +++ b/onedal/tests/utils/_dataframes_support.py @@ -16,7 +16,8 @@ import pytest import scipy.sparse as sp -from sklearn import get_config + +from sklearnex import get_config try: import dpctl @@ -109,7 +110,13 @@ def get_df_and_q(dataframe: str): dataframes_and_queues.extend(get_df_and_q("dpctl")) if dpnp_available and "dpnp" in dataframe_filter_: dataframes_and_queues.extend(get_df_and_q("dpnp")) - if "array_api" in dataframe_filter_ or array_api_enabled(): + # TODO: + # condition requires refactoring. + if ( + "array_api" in dataframe_filter_ + and "array_api" in array_api_modules + or array_api_enabled() + ): dataframes_and_queues.append(pytest.param("array_api", None, id="array_api")) return dataframes_and_queues From 80da7111c13388d3b5ef208db319360fa92c173b Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Thu, 8 Aug 2024 05:30:11 -0700 Subject: [PATCH 43/82] fix import in test_memory_usage.py --- sklearnex/tests/test_memory_usage.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sklearnex/tests/test_memory_usage.py b/sklearnex/tests/test_memory_usage.py index cd4c846c8b..1f4e553aca 100644 --- a/sklearnex/tests/test_memory_usage.py +++ b/sklearnex/tests/test_memory_usage.py @@ -38,8 +38,7 @@ from onedal.tests.utils._device_selection import get_queues, is_dpctl_available from sklearnex import config_context from sklearnex.tests._utils import PATCHED_FUNCTIONS, PATCHED_MODELS, SPECIAL_INSTANCES - -from ..utils._array_api import get_namespace +from sklearnex.utils._array_api import get_namespace if _is_dpc_backend: from onedal import _backend From 1de8fc9c351f412b4c5f962b6ef414f4c90947b7 Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Thu, 8 Aug 2024 23:16:31 -0700 Subject: [PATCH 44/82] first temp commit address py312 fails --- onedal/_device_offload.py | 6 +++--- onedal/utils/_array_api.py | 6 +++++- sklearnex/_device_offload.py | 3 ++- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/onedal/_device_offload.py b/onedal/_device_offload.py index 7766a2e418..993147aa66 100644 --- a/onedal/_device_offload.py +++ b/onedal/_device_offload.py @@ -99,6 +99,8 @@ def _transfer_to_host(queue, *data): for item in data: usm_iface = getattr(item, "__sycl_usm_array_interface__", None) array_api = getattr(item, "__array_namespace__", None) + if array_api: + array_api = array_api() if usm_iface is not None: if not dpctl_available: raise RuntimeError( @@ -228,9 +230,7 @@ def wrapper_impl(obj, *args, **kwargs): ): # TODO: # avoid for numpy - result = _from_dlpack( - result, input_array_api, copy=True, device=input_dlpack_device - ) + result = _from_dlpack(result, input_array_api, device=input_dlpack_device) return result if freefunc: diff --git a/onedal/utils/_array_api.py b/onedal/utils/_array_api.py index 09329da03c..6871a975ba 100644 --- a/onedal/utils/_array_api.py +++ b/onedal/utils/_array_api.py @@ -18,6 +18,8 @@ from collections.abc import Iterable +import numpy as np + try: from dpctl.tensor import usm_ndarray @@ -49,7 +51,9 @@ def _from_dlpack(data, xp, *args, **kwargs): def _one_from_dlpack(data, xp, *args, **kwargs): return xp.from_dlpack(data, *args, **kwargs) - if isinstance(data, Iterable): + if isinstance(data, np.ndarray): + return _one_from_dlpack(data, xp, *args, **kwargs) + elif isinstance(data, Iterable): for i in range(len(data)): data[i] = _one_from_dlpack(data[i], xp, *args, **kwargs) return data diff --git a/sklearnex/_device_offload.py b/sklearnex/_device_offload.py index b019aed6d0..2ee41ebf30 100644 --- a/sklearnex/_device_offload.py +++ b/sklearnex/_device_offload.py @@ -107,7 +107,8 @@ def wrapper(self, *args, **kwargs): and not _is_numpy_namespace(array_api) and hasattr(result, "__array_namespace__") ): - result = _from_dlpack(result, array_api, copy=True, device=dlpack_device) + # result = _from_dlpack(result, array_api, copy=True, device=dlpack_device) + result = _from_dlpack(result, array_api) return result return wrapper From 621dfc2076a3f498141e49cf8c819d6d5e2d1b9a Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Fri, 9 Aug 2024 06:43:42 -0700 Subject: [PATCH 45/82] small tmp workaround for py312 --- onedal/_device_offload.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/onedal/_device_offload.py b/onedal/_device_offload.py index 993147aa66..3f42507c4c 100644 --- a/onedal/_device_offload.py +++ b/onedal/_device_offload.py @@ -230,7 +230,8 @@ def wrapper_impl(obj, *args, **kwargs): ): # TODO: # avoid for numpy - result = _from_dlpack(result, input_array_api, device=input_dlpack_device) + # result = _from_dlpack(result, input_array_api, device=input_dlpack_device) + result = _from_dlpack(result, input_array_api) return result if freefunc: From d2f4383840dcccc772fe35f47a57b6cad7f56faa Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Fri, 9 Aug 2024 19:39:35 -0700 Subject: [PATCH 46/82] removed from_dlpack from the non-zero support logic --- onedal/_device_offload.py | 47 ++++++++++++----------- onedal/tests/utils/_dataframes_support.py | 2 + onedal/utils/_array_api.py | 16 +++++++- sklearnex/_device_offload.py | 17 ++++---- sklearnex/utils/_array_api.py | 2 + 5 files changed, 52 insertions(+), 32 deletions(-) diff --git a/onedal/_device_offload.py b/onedal/_device_offload.py index 3f42507c4c..2009f05cbb 100644 --- a/onedal/_device_offload.py +++ b/onedal/_device_offload.py @@ -21,7 +21,7 @@ import numpy as np from ._config import _get_config -from .utils._array_api import _from_dlpack, _is_numpy_namespace +from .utils._array_api import _asarray, _is_numpy_namespace try: from dpctl import SyclQueue @@ -168,19 +168,26 @@ def _get_host_inputs(*args, **kwargs): return q, hostargs, hostkwargs +# TODO: +# add docstrings. def _extract_array_attr(*args, **kwargs): allargs = (*args, *kwargs.values()) if len(allargs) == 0: return None, None, None - usm_iface = getattr(allargs[0], "__sycl_usm_array_interface__", None) + # Getting first argument attr. For all sklearn-like functions + # all data provided in the first position. Other data arguments expected + # to have the same attributs. + firstarg = allargs[0] + usm_iface = getattr(firstarg, "__sycl_usm_array_interface__", None) array_api = None - dlpack_device = None - if hasattr(allargs[0], "__array_namespace__"): - array_api = getattr(allargs[0], "__array_namespace__", None)() - - if hasattr(allargs[0], "__dlpack_device__"): - dlpack_device = getattr(allargs[0], "__dlpack_device__", None) - return usm_iface, array_api, dlpack_device + array_api_device = None + # TODO: + # refactor + if hasattr(firstarg, "__array_namespace__"): + array_api = getattr(firstarg, "__array_namespace__", None)() + if array_api: + array_api_device = firstarg.device + return usm_iface, array_api, array_api_device def _run_on_device(func, obj=None, *args, **kwargs): @@ -208,7 +215,7 @@ def support_array_api(freefunc=False, queue_param=True): def decorator(func): def wrapper_impl(obj, *args, **kwargs): - usm_iface, input_array_api, input_dlpack_device = _extract_array_attr( + usm_iface, input_array_api, input_array_api_device = _extract_array_attr( *args, **kwargs ) data_queue, hostargs, hostkwargs = _get_host_inputs(*args, **kwargs) @@ -217,21 +224,17 @@ def wrapper_impl(obj, *args, **kwargs): ): hostkwargs["queue"] = data_queue result = _run_on_device(func, obj, *hostargs, **hostkwargs) - if usm_iface is not None and hasattr(result, "__array_interface__"): + # if usm_iface is not None and hasattr(result, "__array_interface__"): + if usm_iface is not None and ( + hasattr(result, "__array_interface__") + or isinstance(result, Iterable) + and hasattr(result[0], "__array_interface__") + ): result = _copy_to_usm(data_queue, result) if dpnp_available and len(args) > 0 and isinstance(args[0], dpnp.ndarray): result = _convert_to_dpnp(result) - # TODO: - # add exception for numpy. - elif ( - input_array_api - and not _is_numpy_namespace(input_array_api) - and hasattr(result, "__array_namespace__") - ): - # TODO: - # avoid for numpy - # result = _from_dlpack(result, input_array_api, device=input_dlpack_device) - result = _from_dlpack(result, input_array_api) + elif input_array_api and not _is_numpy_namespace(input_array_api): + result = _asarray(result, input_array_api, device=input_array_api_device) return result if freefunc: diff --git a/onedal/tests/utils/_dataframes_support.py b/onedal/tests/utils/_dataframes_support.py index a59e4683ee..793dbdb2ef 100644 --- a/onedal/tests/utils/_dataframes_support.py +++ b/onedal/tests/utils/_dataframes_support.py @@ -164,6 +164,8 @@ def _convert_to_dataframe(obj, sycl_queue=None, target_df=None, *args, **kwargs) # DPCtl tensor. return dpt.asarray(obj, usm_type="device", sycl_queue=sycl_queue, *args, **kwargs) elif target_df in array_api_modules: + # TODO: + # move this comment from here. # use dpctl to define gpu devices via queues and # move data to the device. This is necessary as # the standard for defining devices is diff --git a/onedal/utils/_array_api.py b/onedal/utils/_array_api.py index 6871a975ba..76d5ee9b25 100644 --- a/onedal/utils/_array_api.py +++ b/onedal/utils/_array_api.py @@ -56,8 +56,19 @@ def _one_from_dlpack(data, xp, *args, **kwargs): elif isinstance(data, Iterable): for i in range(len(data)): data[i] = _one_from_dlpack(data[i], xp, *args, **kwargs) - return data - return _one_from_dlpack(data, xp, *args, **kwargs) + return data + + +def _asarray(data, xp, *args, **kwargs): + def _one_asarray(data, xp, *args, **kwargs): + return xp.asarray(data, *args, **kwargs) + + if hasattr(data, "__array_namespace__"): + return _one_asarray(data, xp, *args, **kwargs) + elif isinstance(data, Iterable): + for i in range(len(data)): + data[i] = _one_asarray(data[i], xp, *args, **kwargs) + return data def _is_numpy_namespace(xp): @@ -80,6 +91,7 @@ def _get_sycl_namespace(*arrays): if hasattr(X, "__array_namespace__"): return sycl_type, X.__array_namespace__(), True elif dpnp_available and isinstance(X, dpnp.ndarray): + # TODO: # convert it to dpctl.tensor with namespace. return sycl_type, dpnp, False else: diff --git a/sklearnex/_device_offload.py b/sklearnex/_device_offload.py index 2ee41ebf30..aad6fa829a 100644 --- a/sklearnex/_device_offload.py +++ b/sklearnex/_device_offload.py @@ -23,7 +23,7 @@ _transfer_to_host, dpnp_available, ) -from onedal.utils._array_api import _from_dlpack, _is_numpy_namespace +from onedal.utils._array_api import _asarray, _is_numpy_namespace if dpnp_available: import dpnp @@ -90,25 +90,26 @@ def dispatch(obj, method_name, branches, *args, **kwargs): # TODO: # support input data # wrap output data +# TODO: +# add docstrings. def wrap_output_data(func): @wraps(func) def wrapper(self, *args, **kwargs): data = (*args, *kwargs.values()) - usm_iface, array_api, dlpack_device = _extract_array_attr(*args, **kwargs) + usm_iface, input_array_api, input_array_api_device = _extract_array_attr( + *args, **kwargs + ) result = func(self, *args, **kwargs) if usm_iface is not None: result = _copy_to_usm(usm_iface["syclobj"], result) if dpnp_available and isinstance(data[0], dpnp.ndarray): result = _convert_to_dpnp(result) - # TODO: - # update condition elif ( - array_api - and not _is_numpy_namespace(array_api) + input_array_api + and not _is_numpy_namespace(input_array_api) and hasattr(result, "__array_namespace__") ): - # result = _from_dlpack(result, array_api, copy=True, device=dlpack_device) - result = _from_dlpack(result, array_api) + result = _asarray(result, input_array_api, device=input_array_api_device) return result return wrapper diff --git a/sklearnex/utils/_array_api.py b/sklearnex/utils/_array_api.py index f4913ebee2..360d09fc37 100644 --- a/sklearnex/utils/_array_api.py +++ b/sklearnex/utils/_array_api.py @@ -14,6 +14,8 @@ # limitations under the License. # ============================================================================== +"""Tools to support array_api.""" + import numpy as np from daal4py.sklearn._utils import sklearn_check_version From 16ddb694263773abbcd730a38e8b08284c84b2de Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Sun, 11 Aug 2024 00:53:34 -0700 Subject: [PATCH 47/82] fixing tests for incremenatal estimators --- .../linear_model/tests/test_incremental_linear.py | 15 +++++++++------ .../decomposition/tests/test_incremental_pca.py | 8 ++++---- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/sklearnex/linear_model/tests/test_incremental_linear.py b/sklearnex/linear_model/tests/test_incremental_linear.py index 54c33239ee..b3c773ed50 100644 --- a/sklearnex/linear_model/tests/test_incremental_linear.py +++ b/sklearnex/linear_model/tests/test_incremental_linear.py @@ -46,12 +46,13 @@ def test_sklearnex_fit_on_gold_data(dataframe, queue, fit_intercept, macro_block inclin.fit(X_df, y_df) y_pred = inclin.predict(X_df) + y_pred_as_numpy = _as_numpy(y_pred) - tol = 2e-6 if y_pred.dtype == np.float32 else 1e-7 + tol = 2e-6 if y_pred_as_numpy.dtype == np.float32 else 1e-7 assert_allclose(inclin.coef_, [1], atol=tol) if fit_intercept: assert_allclose(inclin.intercept_, [0], atol=tol) - assert_allclose(_as_numpy(y_pred), y, atol=tol) + assert_allclose(y_pred_as_numpy, y, atol=tol) @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues()) @@ -84,14 +85,15 @@ def test_sklearnex_partial_fit_on_gold_data( X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe) y_pred = inclin.predict(X_df) + y_pred_as_numpy = _as_numpy(y_pred) assert inclin.n_features_in_ == 1 - tol = 2e-6 if y_pred.dtype == np.float32 else 1e-7 + tol = 2e-6 if y_pred_as_numpy.dtype == np.float32 else 1e-7 assert_allclose(inclin.coef_, [[1]], atol=tol) if fit_intercept: assert_allclose(inclin.intercept_, 3, atol=tol) - assert_allclose(_as_numpy(y_pred), y, atol=tol) + assert_allclose(y_pred_as_numpy, y, atol=tol) @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues()) @@ -124,14 +126,15 @@ def test_sklearnex_partial_fit_multitarget_on_gold_data( X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe) y_pred = inclin.predict(X_df) + y_pred_as_numpy = _as_numpy(y_pred) assert inclin.n_features_in_ == 2 - tol = 7e-6 if y_pred.dtype == np.float32 else 1e-7 + tol = 7e-6 if y_pred_as_numpy.dtype == np.float32 else 1e-7 assert_allclose(inclin.coef_, [1.0, 2.0], atol=tol) if fit_intercept: assert_allclose(inclin.intercept_, 3.0, atol=tol) - assert_allclose(_as_numpy(y_pred), y, atol=tol) + assert_allclose(y_pred_as_numpy, y, atol=tol) @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues()) diff --git a/sklearnex/preview/decomposition/tests/test_incremental_pca.py b/sklearnex/preview/decomposition/tests/test_incremental_pca.py index 786ae4fef0..da3950e577 100644 --- a/sklearnex/preview/decomposition/tests/test_incremental_pca.py +++ b/sklearnex/preview/decomposition/tests/test_incremental_pca.py @@ -200,7 +200,7 @@ def test_sklearnex_partial_fit_on_gold_data(dataframe, queue, whiten, num_blocks X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe) transformed_data = incpca.transform(X_df) - check_pca_on_gold_data(incpca, dtype, whiten, transformed_data) + check_pca_on_gold_data(incpca, dtype, whiten, _as_numpy(transformed_data)) @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues()) @@ -217,7 +217,7 @@ def test_sklearnex_fit_on_gold_data(dataframe, queue, whiten, num_blocks, dtype) incpca.fit(X_df) transformed_data = incpca.transform(X_df) - check_pca_on_gold_data(incpca, dtype, whiten, transformed_data) + check_pca_on_gold_data(incpca, dtype, whiten, _as_numpy(transformed_data)) @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues()) @@ -235,7 +235,7 @@ def test_sklearnex_fit_transform_on_gold_data( X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe) transformed_data = incpca.fit_transform(X_df) - check_pca_on_gold_data(incpca, dtype, whiten, transformed_data) + check_pca_on_gold_data(incpca, dtype, whiten, _as_numpy(transformed_data)) @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues()) @@ -263,4 +263,4 @@ def test_sklearnex_partial_fit_on_random_data( X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe) transformed_data = incpca.transform(X_df) - check_pca(incpca, dtype, whiten, X, transformed_data) + check_pca(incpca, dtype, whiten, X, _as_numpy(transformed_data)) From 9be33c2206c0e3a2e7dcf3b86a04f3c8f63cbce5 Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Sun, 11 Aug 2024 17:25:06 -0700 Subject: [PATCH 48/82] using asarray instead of dlpack conversions --- onedal/_device_offload.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/onedal/_device_offload.py b/onedal/_device_offload.py index 2009f05cbb..0e2e725356 100644 --- a/onedal/_device_offload.py +++ b/onedal/_device_offload.py @@ -127,13 +127,10 @@ def _transfer_to_host(queue, *data): order=order, ) has_usm_data = True - # TODO: - # update conditions. elif array_api and not _is_numpy_namespace(array_api): - # TODO: - # get info about the device, for backward conversions. - item._array = item._array.copy() - item = np.from_dlpack(item).copy() + # `copy`` param for the `asarray`` is not setted. + # The object is copied only if needed. + item = np.asarray(item) has_host_data = True else: has_host_data = True From e2fa37af90e731fda0abecc1d97366a554e6adff Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Sun, 11 Aug 2024 17:33:28 -0700 Subject: [PATCH 49/82] FIX: fixing spmd tests utilities for dpctl inputs --- sklearnex/tests/_utils_spmd.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/sklearnex/tests/_utils_spmd.py b/sklearnex/tests/_utils_spmd.py index 172db788be..408f4545d7 100644 --- a/sklearnex/tests/_utils_spmd.py +++ b/sklearnex/tests/_utils_spmd.py @@ -146,8 +146,11 @@ def _assert_unordered_allclose(spmd_result, batch_result, localize=False, **kwar Raises: AssertionError: If results do not match. """ + spmd_result_as_numpy = _as_numpy(spmd_result) - sorted_spmd_result = spmd_result[np.argsort(np.linalg.norm(spmd_result, axis=1))] + sorted_spmd_result = spmd_result_as_numpy[ + np.argsort(np.linalg.norm(spmd_result_as_numpy, axis=1)) + ] if localize: local_batch_result = _get_local_tensor(batch_result) sorted_batch_result = local_batch_result[ @@ -158,7 +161,7 @@ def _assert_unordered_allclose(spmd_result, batch_result, localize=False, **kwar np.argsort(np.linalg.norm(batch_result, axis=1)) ] - assert_allclose(_as_numpy(sorted_spmd_result), sorted_batch_result, **kwargs) + assert_allclose(sorted_spmd_result, sorted_batch_result, **kwargs) def _assert_kmeans_labels_allclose( @@ -179,7 +182,11 @@ def _assert_kmeans_labels_allclose( AssertionError: If clusters are not correctly assigned. """ + spmd_labels_as_numpy = _as_numpy(spmd_labels) + spmd_centers_as_numpy = _as_numpy(spmd_centers) local_batch_labels = _get_local_tensor(batch_labels) assert_allclose( - spmd_centers[_as_numpy(spmd_labels)], batch_centers[local_batch_labels], **kwargs + spmd_centers_as_numpy[spmd_labels_as_numpy], + batch_centers[local_batch_labels], + **kwargs, ) From 016f5502ecd4cef1e91f739ed60d823d9107ad07 Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Sun, 11 Aug 2024 18:28:14 -0700 Subject: [PATCH 50/82] Deselect LOF stability test with array api LocalOutlierFactor estimator with array_api input has some accuracy issues on Windows --- sklearnex/tests/test_run_to_run_stability.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/sklearnex/tests/test_run_to_run_stability.py b/sklearnex/tests/test_run_to_run_stability.py index 29857cab8e..d5bdcdce98 100755 --- a/sklearnex/tests/test_run_to_run_stability.py +++ b/sklearnex/tests/test_run_to_run_stability.py @@ -18,6 +18,7 @@ from collections.abc import Iterable from functools import partial from numbers import Number +from sys import platform import numpy as np import pytest @@ -157,6 +158,15 @@ def test_standard_estimator_stability(estimator, method, dataframe, queue): if "NearestNeighbors" in estimator and "radius" in method: pytest.skip(f"RadiusNeighbors estimator not implemented in sklearnex") + if ( + "LocalOutlierFactor" in estimator + and "array_api" in dataframe + and platform == "win32" + ): + pytest.skip( + f"LocalOutlierFactor estimator with array_api input has some accuracy issues on Windows" + ) + est = PATCHED_MODELS[estimator]() if method and not hasattr(est, method): @@ -181,6 +191,14 @@ def test_special_estimator_stability(estimator, method, dataframe, queue): pytest.skip(f"variation observed in KMeans.score") if "NearestNeighbors" in estimator and "radius" in method: pytest.skip(f"RadiusNeighbors estimator not implemented in sklearnex") + if ( + "LocalOutlierFactor" in estimator + and "array_api" in dataframe + and platform == "win32" + ): + pytest.skip( + f"LocalOutlierFactor estimator with array_api input has some accuracy issues on Windows" + ) est = SPECIAL_INSTANCES[estimator] From 3e833d737b74136fbe82ab8dc2e2f24df3bf7c79 Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Sun, 11 Aug 2024 18:30:44 -0700 Subject: [PATCH 51/82] Revert "Deselect LOF stability test with array api" This reverts commit 016f5502ecd4cef1e91f739ed60d823d9107ad07. --- sklearnex/tests/test_run_to_run_stability.py | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/sklearnex/tests/test_run_to_run_stability.py b/sklearnex/tests/test_run_to_run_stability.py index d5bdcdce98..29857cab8e 100755 --- a/sklearnex/tests/test_run_to_run_stability.py +++ b/sklearnex/tests/test_run_to_run_stability.py @@ -18,7 +18,6 @@ from collections.abc import Iterable from functools import partial from numbers import Number -from sys import platform import numpy as np import pytest @@ -158,15 +157,6 @@ def test_standard_estimator_stability(estimator, method, dataframe, queue): if "NearestNeighbors" in estimator and "radius" in method: pytest.skip(f"RadiusNeighbors estimator not implemented in sklearnex") - if ( - "LocalOutlierFactor" in estimator - and "array_api" in dataframe - and platform == "win32" - ): - pytest.skip( - f"LocalOutlierFactor estimator with array_api input has some accuracy issues on Windows" - ) - est = PATCHED_MODELS[estimator]() if method and not hasattr(est, method): @@ -191,14 +181,6 @@ def test_special_estimator_stability(estimator, method, dataframe, queue): pytest.skip(f"variation observed in KMeans.score") if "NearestNeighbors" in estimator and "radius" in method: pytest.skip(f"RadiusNeighbors estimator not implemented in sklearnex") - if ( - "LocalOutlierFactor" in estimator - and "array_api" in dataframe - and platform == "win32" - ): - pytest.skip( - f"LocalOutlierFactor estimator with array_api input has some accuracy issues on Windows" - ) est = SPECIAL_INSTANCES[estimator] From d0fc95dbb53bcb1d6db91ac2c4a4dcc1f6473b4c Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Sun, 11 Aug 2024 20:01:33 -0700 Subject: [PATCH 52/82] MAINT: minor refactoring and docstrings added --- onedal/_device_offload.py | 26 +++++++++++------- onedal/tests/utils/_dataframes_support.py | 32 +++++++++-------------- onedal/utils/_array_api.py | 15 +---------- sklearnex/_device_offload.py | 10 +++---- 4 files changed, 36 insertions(+), 47 deletions(-) diff --git a/onedal/_device_offload.py b/onedal/_device_offload.py index 0e2e725356..9eef17057a 100644 --- a/onedal/_device_offload.py +++ b/onedal/_device_offload.py @@ -165,9 +165,21 @@ def _get_host_inputs(*args, **kwargs): return q, hostargs, hostkwargs -# TODO: -# add docstrings. def _extract_array_attr(*args, **kwargs): + """Extracts USM iface, array namespace and Hardware device + the array data resides on. + + Returns + ------- + usm_iface : Dict + SUA protocol dictionary describing the array. + array_api : str + The name of the Array API namespace. + array_api_device : Array API device object + Hardware device the array data resides on. + + """ + allargs = (*args, *kwargs.values()) if len(allargs) == 0: return None, None, None @@ -178,8 +190,6 @@ def _extract_array_attr(*args, **kwargs): usm_iface = getattr(firstarg, "__sycl_usm_array_interface__", None) array_api = None array_api_device = None - # TODO: - # refactor if hasattr(firstarg, "__array_namespace__"): array_api = getattr(firstarg, "__array_namespace__", None)() if array_api: @@ -193,12 +203,11 @@ def _run_on_device(func, obj=None, *args, **kwargs): return func(*args, **kwargs) -# TODO: -# update docstrings. def support_array_api(freefunc=False, queue_param=True): """ - Handles USMArray input. Puts SYCLQueue from data to decorated function arguments. - Converts output of decorated function to dpctl.tensor/dpnp.ndarray if input was of this type. + Handles Array API input. Converts output of decorated function + to input Array API format on the same device. + Puts SYCLQueue from data to decorated function arguments. Parameters ---------- @@ -221,7 +230,6 @@ def wrapper_impl(obj, *args, **kwargs): ): hostkwargs["queue"] = data_queue result = _run_on_device(func, obj, *hostargs, **hostkwargs) - # if usm_iface is not None and hasattr(result, "__array_interface__"): if usm_iface is not None and ( hasattr(result, "__array_interface__") or isinstance(result, Iterable) diff --git a/onedal/tests/utils/_dataframes_support.py b/onedal/tests/utils/_dataframes_support.py index 793dbdb2ef..095d83d7f7 100644 --- a/onedal/tests/utils/_dataframes_support.py +++ b/onedal/tests/utils/_dataframes_support.py @@ -20,7 +20,6 @@ from sklearnex import get_config try: - import dpctl import dpctl.tensor as dpt dpctl_available = True @@ -110,19 +109,24 @@ def get_df_and_q(dataframe: str): dataframes_and_queues.extend(get_df_and_q("dpctl")) if dpnp_available and "dpnp" in dataframe_filter_: dataframes_and_queues.extend(get_df_and_q("dpnp")) - # TODO: - # condition requires refactoring. if ( "array_api" in dataframe_filter_ and "array_api" in array_api_modules or array_api_enabled() ): + # TODO: + # Generally extend Array API testing with ability to add supported + # devices which are available at runtime. dataframes_and_queues.append(pytest.param("array_api", None, id="array_api")) return dataframes_and_queues +# TODO: +# generelize it with scikit-learn's _array_api module +# `_convert_to_numpy` utility. def _as_numpy(obj, *args, **kwargs): + """Converted input object to numpy.ndarray format.""" if dpnp_available and isinstance(obj, dpnp.ndarray): return obj.asnumpy(*args, **kwargs) if dpctl_available and isinstance(obj, dpt.usm_ndarray): @@ -164,23 +168,13 @@ def _convert_to_dataframe(obj, sycl_queue=None, target_df=None, *args, **kwargs) # DPCtl tensor. return dpt.asarray(obj, usm_type="device", sycl_queue=sycl_queue, *args, **kwargs) elif target_df in array_api_modules: + # Array API input other than DPNP ndarray, DPCtl tensor or + # Numpy ndarray. + # TODO: - # move this comment from here. - # use dpctl to define gpu devices via queues and - # move data to the device. This is necessary as - # the standard for defining devices is - # purposefully not defined in the array_api - # standard, but maintaining data on a device - # using the method `from_dlpack` is. + # Generally extend Array API testing with ability to add supported + # devices which are available at runtime. xp = array_api_modules[target_df] - # return xp.from_dlpack( - # _convert_to_dataframe( - # obj, sycl_queue=sycl_queue, target_df="dpctl", *args, **kwargs - # ) - # ) - if hasattr(obj, "__dlpack__"): - return xp.from_dlpack(obj) - else: - return xp.asarray(obj) + return xp.asarray(obj) raise RuntimeError("Unsupported dataframe conversion") diff --git a/onedal/utils/_array_api.py b/onedal/utils/_array_api.py index 76d5ee9b25..74cffab995 100644 --- a/onedal/utils/_array_api.py +++ b/onedal/utils/_array_api.py @@ -18,8 +18,6 @@ from collections.abc import Iterable -import numpy as np - try: from dpctl.tensor import usm_ndarray @@ -39,6 +37,7 @@ import dpnp def _convert_to_dpnp(array): + """Converted input object to dpnp.ndarray format.""" if isinstance(array, usm_ndarray): return dpnp.array(array, copy=False) elif isinstance(array, Iterable): @@ -47,18 +46,6 @@ def _convert_to_dpnp(array): return array -def _from_dlpack(data, xp, *args, **kwargs): - def _one_from_dlpack(data, xp, *args, **kwargs): - return xp.from_dlpack(data, *args, **kwargs) - - if isinstance(data, np.ndarray): - return _one_from_dlpack(data, xp, *args, **kwargs) - elif isinstance(data, Iterable): - for i in range(len(data)): - data[i] = _one_from_dlpack(data[i], xp, *args, **kwargs) - return data - - def _asarray(data, xp, *args, **kwargs): def _one_asarray(data, xp, *args, **kwargs): return xp.asarray(data, *args, **kwargs) diff --git a/sklearnex/_device_offload.py b/sklearnex/_device_offload.py index aad6fa829a..26cd36b101 100644 --- a/sklearnex/_device_offload.py +++ b/sklearnex/_device_offload.py @@ -87,12 +87,12 @@ def dispatch(obj, method_name, branches, *args, **kwargs): ) -# TODO: -# support input data -# wrap output data -# TODO: -# add docstrings. def wrap_output_data(func): + """ + Handles Array API input. Converts output of decorated function + to input Array API format on the same device. + """ + @wraps(func) def wrapper(self, *args, **kwargs): data = (*args, *kwargs.values()) From 77b823108880a9a122ee07f27d147bdf187472fd Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Tue, 27 Aug 2024 01:27:23 -0700 Subject: [PATCH 53/82] ENH: DBSCAN via Array API DDBSCAN rewrite via Array API --- onedal/cluster/dbscan.py | 37 +++++++++++++++++++------------------ sklearnex/cluster/dbscan.py | 8 +++++++- 2 files changed, 26 insertions(+), 19 deletions(-) diff --git a/onedal/cluster/dbscan.py b/onedal/cluster/dbscan.py index f91325b65c..302d30a997 100644 --- a/onedal/cluster/dbscan.py +++ b/onedal/cluster/dbscan.py @@ -14,14 +14,14 @@ # limitations under the License. # =============================================================================== -import numpy as np +from sklearn.utils import check_array -from daal4py.sklearn._utils import get_dtype, make2d +from onedal.datatypes._data_conversion import get_dtype, make2d from ..common._base import BaseEstimator from ..common._mixin import ClusterMixin from ..datatypes import _convert_to_supported, from_table, to_table -from ..utils import _check_array +from ..utils._array_api import get_namespace class BaseDBSCAN(BaseEstimator, ClusterMixin): @@ -46,9 +46,9 @@ def __init__( self.p = p self.n_jobs = n_jobs - def _get_onedal_params(self, dtype=np.float32): + def _get_onedal_params(self, xp, dtype): return { - "fptype": "float" if dtype == np.float32 else "double", + "fptype": "float" if dtype == xp.float32 else "double", "method": "by_default", "min_observations": int(self.min_samples), "epsilon": float(self.eps), @@ -56,28 +56,30 @@ def _get_onedal_params(self, dtype=np.float32): "result_options": "core_observation_indices|responses", } - def _fit(self, X, y, sample_weight, module, queue): + def _fit(self, X, xp, is_array_api_compliant, y, sample_weight, queue): policy = self._get_policy(queue, X) - X = _check_array(X, accept_sparse="csr", dtype=[np.float64, np.float32]) + X = check_array(X, accept_sparse="csr", dtype=[xp.float64, xp.float32]) sample_weight = make2d(sample_weight) if sample_weight is not None else None X = make2d(X) - types = [np.float32, np.float64] + types = [xp.float32, xp.float64] if get_dtype(X) not in types: - X = X.astype(np.float64) + X = X.astype(xp.float64) X = _convert_to_supported(policy, X) dtype = get_dtype(X) - params = self._get_onedal_params(dtype) - result = module.compute(policy, params, to_table(X), to_table(sample_weight)) + params = self._get_onedal_params(xp, dtype) + result = self._get_backend("dbscan", "clustering", None).compute( + policy, params, to_table(X), to_table(sample_weight) + ) - self.labels_ = from_table(result.responses).ravel() + self.labels_ = from_table(result.responses).reshape(-1) if result.core_observation_indices is not None: self.core_sample_indices_ = from_table( result.core_observation_indices - ).ravel() + ).reshape(-1) else: - self.core_sample_indices_ = np.array([], dtype=np.intc) - self.components_ = np.take(X, self.core_sample_indices_, axis=0) + self.core_sample_indices_ = xp.array([], dtype=xp.int32) + self.components_ = xp.take(X, self.core_sample_indices_, axis=0) self.n_features_in_ = X.shape[1] return self @@ -105,6 +107,5 @@ def __init__( self.n_jobs = n_jobs def fit(self, X, y=None, sample_weight=None, queue=None): - return super()._fit( - X, y, sample_weight, self._get_backend("dbscan", "clustering", None), queue - ) + xp, is_array_api_compliant = get_namespace(X) + return super()._fit(X, xp, is_array_api_compliant, y, sample_weight, queue) diff --git a/sklearnex/cluster/dbscan.py b/sklearnex/cluster/dbscan.py index f8d080cfbe..fbd6ff9e8f 100755 --- a/sklearnex/cluster/dbscan.py +++ b/sklearnex/cluster/dbscan.py @@ -27,6 +27,7 @@ from .._device_offload import dispatch from .._utils import PatchingConditionsChain +from ..utils._array_api import get_namespace if sklearn_check_version("1.1") and not sklearn_check_version("1.2"): from sklearn.utils import check_scalar @@ -86,6 +87,7 @@ def __init__( def _onedal_fit(self, X, y, sample_weight=None, queue=None): if sklearn_check_version("1.0"): X = self._validate_data(X, force_all_finite=False) + xp, is_array_api_compliant = get_namespace(X) onedal_params = { "eps": self.eps, @@ -99,7 +101,9 @@ def _onedal_fit(self, X, y, sample_weight=None, queue=None): } self._onedal_estimator = self._onedal_dbscan(**onedal_params) - self._onedal_estimator.fit(X, y=y, sample_weight=sample_weight, queue=queue) + self._onedal_estimator._fit( + X, xp, is_array_api_compliant, y, sample_weight, queue=queue + ) self._save_attributes() def _onedal_supported(self, method_name, *data): @@ -173,6 +177,8 @@ def fit(self, X, y=None, sample_weight=None): if self.eps <= 0.0: raise ValueError(f"eps == {self.eps}, must be > 0.0.") + # TODO: + # should be checked for Array API inputs. if sample_weight is not None: sample_weight = _check_sample_weight(sample_weight, X) dispatch( From 2094fadb282b3d11ed0fc9ceb5f82dc3e77c6fb3 Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Fri, 30 Aug 2024 03:13:01 -0700 Subject: [PATCH 54/82] refactoring --- onedal/_device_offload.py | 9 ++++----- sklearnex/utils/_array_api.py | 2 -- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/onedal/_device_offload.py b/onedal/_device_offload.py index 9eef17057a..a4d6dffa26 100644 --- a/onedal/_device_offload.py +++ b/onedal/_device_offload.py @@ -35,13 +35,12 @@ try: import dpnp + from .utils._array_api import _convert_to_dpnp + dpnp_available = True except ImportError: dpnp_available = False -if dpnp_available: - from .utils._array_api import _convert_to_dpnp - class DummySyclQueue: """This class is designed to act like dpctl.SyclQueue @@ -166,13 +165,13 @@ def _get_host_inputs(*args, **kwargs): def _extract_array_attr(*args, **kwargs): - """Extracts USM iface, array namespace and Hardware device + """Extracts USM iface, array namespace and hardware device the array data resides on. Returns ------- usm_iface : Dict - SUA protocol dictionary describing the array. + Sycl USM Array (SUA) interface protocol dictionary describing the array. array_api : str The name of the Array API namespace. array_api_device : Array API device object diff --git a/sklearnex/utils/_array_api.py b/sklearnex/utils/_array_api.py index 360d09fc37..bc30be5021 100644 --- a/sklearnex/utils/_array_api.py +++ b/sklearnex/utils/_array_api.py @@ -25,8 +25,6 @@ from sklearn.utils._array_api import get_namespace as sklearn_get_namespace -# TODO: -# update it for supported versions of scikit-learn. def get_namespace(*arrays): """Get namespace of arrays. From 0fcaafe55e3192991118d4d12b787f5c859b4f5a Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Mon, 2 Sep 2024 08:08:54 -0700 Subject: [PATCH 55/82] minor update for _extract_array_attr --- onedal/_device_offload.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/onedal/_device_offload.py b/onedal/_device_offload.py index a4d6dffa26..6ffc00ee9f 100644 --- a/onedal/_device_offload.py +++ b/onedal/_device_offload.py @@ -187,10 +187,10 @@ def _extract_array_attr(*args, **kwargs): # to have the same attributs. firstarg = allargs[0] usm_iface = getattr(firstarg, "__sycl_usm_array_interface__", None) - array_api = None + array_api = getattr(firstarg, "__array_namespace__", None) array_api_device = None - if hasattr(firstarg, "__array_namespace__"): - array_api = getattr(firstarg, "__array_namespace__", None)() + if array_api: + array_api = array_api() if array_api: array_api_device = firstarg.device return usm_iface, array_api, array_api_device From 55217208363bcd772e14b6051c1cbd3e9ba66641 Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Mon, 2 Sep 2024 08:30:34 -0700 Subject: [PATCH 56/82] minor update --- onedal/_device_offload.py | 1 - 1 file changed, 1 deletion(-) diff --git a/onedal/_device_offload.py b/onedal/_device_offload.py index 6ffc00ee9f..1a95d33572 100644 --- a/onedal/_device_offload.py +++ b/onedal/_device_offload.py @@ -191,7 +191,6 @@ def _extract_array_attr(*args, **kwargs): array_api_device = None if array_api: array_api = array_api() - if array_api: array_api_device = firstarg.device return usm_iface, array_api, array_api_device From 7bd3df033a71275987e0043a84442cda04323911 Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Mon, 2 Sep 2024 09:12:54 -0700 Subject: [PATCH 57/82] update conditions for support_array_api --- onedal/_device_offload.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/onedal/_device_offload.py b/onedal/_device_offload.py index 1a95d33572..fe7305a194 100644 --- a/onedal/_device_offload.py +++ b/onedal/_device_offload.py @@ -228,11 +228,7 @@ def wrapper_impl(obj, *args, **kwargs): ): hostkwargs["queue"] = data_queue result = _run_on_device(func, obj, *hostargs, **hostkwargs) - if usm_iface is not None and ( - hasattr(result, "__array_interface__") - or isinstance(result, Iterable) - and hasattr(result[0], "__array_interface__") - ): + if usm_iface is not None: result = _copy_to_usm(data_queue, result) if dpnp_available and len(args) > 0 and isinstance(args[0], dpnp.ndarray): result = _convert_to_dpnp(result) From 19df8f13788867a3f3381f1f8c30e07b5fd459a6 Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Mon, 2 Sep 2024 19:13:34 +0200 Subject: [PATCH 58/82] update dispatch for array api --- sklearnex/_device_offload.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/sklearnex/_device_offload.py b/sklearnex/_device_offload.py index 26cd36b101..d50c530f4b 100644 --- a/sklearnex/_device_offload.py +++ b/sklearnex/_device_offload.py @@ -74,9 +74,14 @@ def dispatch(obj, method_name, branches, *args, **kwargs): patching_status.write_log(queue=q) return branches[backend](obj, *hostargs, **hostkwargs, queue=q) if backend == "sklearn": - if "array_api_dispatch" in get_config() and get_config()["array_api_dispatch"]: - # TODO: - # logs require update for this branch. + if ( + "array_api_dispatch" in get_config() + and get_config()["array_api_dispatch"] + and "array_api_support" in obj._get_tags() + and obj._get_tags()["array_api_support"] + ): + # If `array_api_dispatch` enabled and array api is supported for the stock scikit-learn, + # then raw inputs are used for the fallback. patching_status.write_log() return branches[backend](obj, *args, **kwargs) else: From 442826ece38068dc682f0db9d240aa9cc9c5cc49 Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Mon, 2 Sep 2024 10:56:20 -0700 Subject: [PATCH 59/82] covered by tickets TODOs removed TODOs --- onedal/tests/utils/_dataframes_support.py | 12 ------------ onedal/utils/_array_api.py | 2 -- 2 files changed, 14 deletions(-) diff --git a/onedal/tests/utils/_dataframes_support.py b/onedal/tests/utils/_dataframes_support.py index 095d83d7f7..4a62a5f4d3 100644 --- a/onedal/tests/utils/_dataframes_support.py +++ b/onedal/tests/utils/_dataframes_support.py @@ -40,9 +40,6 @@ # GPU-no-copy. import array_api_strict - # TODO: - # get this from onedal._config._get_config - # Run check if "array_api_dispatch" is configurable array_api_enabled = lambda: get_config()["array_api_dispatch"] array_api_enabled() array_api_modules = {"array_api": array_api_strict} @@ -114,17 +111,11 @@ def get_df_and_q(dataframe: str): and "array_api" in array_api_modules or array_api_enabled() ): - # TODO: - # Generally extend Array API testing with ability to add supported - # devices which are available at runtime. dataframes_and_queues.append(pytest.param("array_api", None, id="array_api")) return dataframes_and_queues -# TODO: -# generelize it with scikit-learn's _array_api module -# `_convert_to_numpy` utility. def _as_numpy(obj, *args, **kwargs): """Converted input object to numpy.ndarray format.""" if dpnp_available and isinstance(obj, dpnp.ndarray): @@ -171,9 +162,6 @@ def _convert_to_dataframe(obj, sycl_queue=None, target_df=None, *args, **kwargs) # Array API input other than DPNP ndarray, DPCtl tensor or # Numpy ndarray. - # TODO: - # Generally extend Array API testing with ability to add supported - # devices which are available at runtime. xp = array_api_modules[target_df] return xp.asarray(obj) diff --git a/onedal/utils/_array_api.py b/onedal/utils/_array_api.py index 74cffab995..7b8bf20145 100644 --- a/onedal/utils/_array_api.py +++ b/onedal/utils/_array_api.py @@ -78,8 +78,6 @@ def _get_sycl_namespace(*arrays): if hasattr(X, "__array_namespace__"): return sycl_type, X.__array_namespace__(), True elif dpnp_available and isinstance(X, dpnp.ndarray): - # TODO: - # convert it to dpctl.tensor with namespace. return sycl_type, dpnp, False else: raise ValueError(f"SYCL type not recognized: {sycl_type}") From ec634c18a650b558a5243a0ea1f0bd5638186d29 Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Wed, 4 Sep 2024 02:15:20 -0700 Subject: [PATCH 60/82] refactor _asarray --- onedal/utils/_array_api.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/onedal/utils/_array_api.py b/onedal/utils/_array_api.py index 7b8bf20145..5f69436a70 100644 --- a/onedal/utils/_array_api.py +++ b/onedal/utils/_array_api.py @@ -47,14 +47,11 @@ def _convert_to_dpnp(array): def _asarray(data, xp, *args, **kwargs): - def _one_asarray(data, xp, *args, **kwargs): - return xp.asarray(data, *args, **kwargs) - if hasattr(data, "__array_namespace__"): - return _one_asarray(data, xp, *args, **kwargs) + return xp.asarray(data, *args, **kwargs) elif isinstance(data, Iterable): for i in range(len(data)): - data[i] = _one_asarray(data[i], xp, *args, **kwargs) + data[i] = xp.asarray(data[i], *args, **kwargs) return data From 844745f591f30a537638ae96f4deaf33bb65fd69 Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Wed, 4 Sep 2024 05:53:21 -0700 Subject: [PATCH 61/82] update a bit logic --- onedal/_device_offload.py | 43 +++++++----------------------------- onedal/utils/_array_api.py | 8 +++++-- sklearnex/_device_offload.py | 14 +++++------- 3 files changed, 19 insertions(+), 46 deletions(-) diff --git a/onedal/_device_offload.py b/onedal/_device_offload.py index fe7305a194..d92cd05594 100644 --- a/onedal/_device_offload.py +++ b/onedal/_device_offload.py @@ -164,37 +164,6 @@ def _get_host_inputs(*args, **kwargs): return q, hostargs, hostkwargs -def _extract_array_attr(*args, **kwargs): - """Extracts USM iface, array namespace and hardware device - the array data resides on. - - Returns - ------- - usm_iface : Dict - Sycl USM Array (SUA) interface protocol dictionary describing the array. - array_api : str - The name of the Array API namespace. - array_api_device : Array API device object - Hardware device the array data resides on. - - """ - - allargs = (*args, *kwargs.values()) - if len(allargs) == 0: - return None, None, None - # Getting first argument attr. For all sklearn-like functions - # all data provided in the first position. Other data arguments expected - # to have the same attributs. - firstarg = allargs[0] - usm_iface = getattr(firstarg, "__sycl_usm_array_interface__", None) - array_api = getattr(firstarg, "__array_namespace__", None) - array_api_device = None - if array_api: - array_api = array_api() - array_api_device = firstarg.device - return usm_iface, array_api, array_api_device - - def _run_on_device(func, obj=None, *args, **kwargs): if obj is not None: return func(obj, *args, **kwargs) @@ -219,20 +188,24 @@ def support_array_api(freefunc=False, queue_param=True): def decorator(func): def wrapper_impl(obj, *args, **kwargs): - usm_iface, input_array_api, input_array_api_device = _extract_array_attr( - *args, **kwargs - ) + # TODO: + # refactor check the len. + data = (*args, *kwargs.values()) data_queue, hostargs, hostkwargs = _get_host_inputs(*args, **kwargs) if queue_param and not ( "queue" in hostkwargs and hostkwargs["queue"] is not None ): hostkwargs["queue"] = data_queue result = _run_on_device(func, obj, *hostargs, **hostkwargs) + usm_iface = getattr(data[0], "__sycl_usm_array_interface__", None) if usm_iface is not None: result = _copy_to_usm(data_queue, result) if dpnp_available and len(args) > 0 and isinstance(args[0], dpnp.ndarray): result = _convert_to_dpnp(result) - elif input_array_api and not _is_numpy_namespace(input_array_api): + return result + input_array_api = getattr(data[0], "__array_namespace__", print)() + input_array_api_device = data[0].device if input_array_api else None + if input_array_api: result = _asarray(result, input_array_api, device=input_array_api_device) return result diff --git a/onedal/utils/_array_api.py b/onedal/utils/_array_api.py index 5f69436a70..92cb152368 100644 --- a/onedal/utils/_array_api.py +++ b/onedal/utils/_array_api.py @@ -47,11 +47,15 @@ def _convert_to_dpnp(array): def _asarray(data, xp, *args, **kwargs): + def _one_asarray(data, xp, *args, **kwargs): + if data is not None: + return xp.asarray(data, *args, **kwargs) + if hasattr(data, "__array_namespace__"): - return xp.asarray(data, *args, **kwargs) + return _one_asarray(data, xp, *args, **kwargs) elif isinstance(data, Iterable): for i in range(len(data)): - data[i] = xp.asarray(data[i], *args, **kwargs) + data[i] = _one_asarray(data[i], xp, *args, **kwargs) return data diff --git a/sklearnex/_device_offload.py b/sklearnex/_device_offload.py index d50c530f4b..72ca7b38c9 100644 --- a/sklearnex/_device_offload.py +++ b/sklearnex/_device_offload.py @@ -18,7 +18,6 @@ from onedal._device_offload import ( _copy_to_usm, - _extract_array_attr, _get_global_queue, _transfer_to_host, dpnp_available, @@ -101,19 +100,16 @@ def wrap_output_data(func): @wraps(func) def wrapper(self, *args, **kwargs): data = (*args, *kwargs.values()) - usm_iface, input_array_api, input_array_api_device = _extract_array_attr( - *args, **kwargs - ) + usm_iface = getattr(data[0], "__sycl_usm_array_interface__", None) result = func(self, *args, **kwargs) if usm_iface is not None: result = _copy_to_usm(usm_iface["syclobj"], result) if dpnp_available and isinstance(data[0], dpnp.ndarray): result = _convert_to_dpnp(result) - elif ( - input_array_api - and not _is_numpy_namespace(input_array_api) - and hasattr(result, "__array_namespace__") - ): + return result + input_array_api = getattr(data[0], "__array_namespace__", print)() + input_array_api_device = data[0].device if input_array_api else None + if input_array_api: result = _asarray(result, input_array_api, device=input_array_api_device) return result From 752df2290fe8b01bfc5f4be0c2c433ae56c2a604 Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Wed, 4 Sep 2024 10:00:39 -0700 Subject: [PATCH 62/82] addressed test failes --- onedal/_device_offload.py | 4 +++- onedal/utils/_array_api.py | 19 ++++++++++++------- sklearnex/_device_offload.py | 2 ++ 3 files changed, 17 insertions(+), 8 deletions(-) diff --git a/onedal/_device_offload.py b/onedal/_device_offload.py index d92cd05594..2b6e412ed7 100644 --- a/onedal/_device_offload.py +++ b/onedal/_device_offload.py @@ -191,6 +191,8 @@ def wrapper_impl(obj, *args, **kwargs): # TODO: # refactor check the len. data = (*args, *kwargs.values()) + if len(data) == 0: + return _run_on_device(func, obj, *args, **kwargs) data_queue, hostargs, hostkwargs = _get_host_inputs(*args, **kwargs) if queue_param and not ( "queue" in hostkwargs and hostkwargs["queue"] is not None @@ -200,7 +202,7 @@ def wrapper_impl(obj, *args, **kwargs): usm_iface = getattr(data[0], "__sycl_usm_array_interface__", None) if usm_iface is not None: result = _copy_to_usm(data_queue, result) - if dpnp_available and len(args) > 0 and isinstance(args[0], dpnp.ndarray): + if dpnp_available and isinstance(args[0], dpnp.ndarray): result = _convert_to_dpnp(result) return result input_array_api = getattr(data[0], "__array_namespace__", print)() diff --git a/onedal/utils/_array_api.py b/onedal/utils/_array_api.py index 92cb152368..118bc5b314 100644 --- a/onedal/utils/_array_api.py +++ b/onedal/utils/_array_api.py @@ -38,6 +38,8 @@ def _convert_to_dpnp(array): """Converted input object to dpnp.ndarray format.""" + # Will be removed and `onedal.utils._array_api._asarray` will be + # used instead after DPNP Array API enabling. if isinstance(array, usm_ndarray): return dpnp.array(array, copy=False) elif isinstance(array, Iterable): @@ -47,15 +49,18 @@ def _convert_to_dpnp(array): def _asarray(data, xp, *args, **kwargs): - def _one_asarray(data, xp, *args, **kwargs): - if data is not None: - return xp.asarray(data, *args, **kwargs) - + """Converted input object to array format of xp namespace provided.""" if hasattr(data, "__array_namespace__"): - return _one_asarray(data, xp, *args, **kwargs) + return xp.asarray(data, xp, *args, **kwargs) elif isinstance(data, Iterable): - for i in range(len(data)): - data[i] = _one_asarray(data[i], xp, *args, **kwargs) + if isinstance(data, tuple): + result_data = [] + for i in range(len(data)): + result_data.append(_asarray(data[i], xp, *args, **kwargs)) + data = tuple(result_data) + else: + for i in range(len(data)): + data[i] = _asarray(data[i], xp, *args, **kwargs) return data diff --git a/sklearnex/_device_offload.py b/sklearnex/_device_offload.py index 72ca7b38c9..4d6b5737c4 100644 --- a/sklearnex/_device_offload.py +++ b/sklearnex/_device_offload.py @@ -100,6 +100,8 @@ def wrap_output_data(func): @wraps(func) def wrapper(self, *args, **kwargs): data = (*args, *kwargs.values()) + if len(data) == 0: + return func(self, *args, **kwargs) usm_iface = getattr(data[0], "__sycl_usm_array_interface__", None) result = func(self, *args, **kwargs) if usm_iface is not None: From fe38790aa3b10d4823b935998637ce8d485aa96e Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Wed, 4 Sep 2024 10:05:18 -0700 Subject: [PATCH 63/82] update docstring --- onedal/_device_offload.py | 2 -- sklearnex/_device_offload.py | 4 ++-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/onedal/_device_offload.py b/onedal/_device_offload.py index 2b6e412ed7..bcff132947 100644 --- a/onedal/_device_offload.py +++ b/onedal/_device_offload.py @@ -188,8 +188,6 @@ def support_array_api(freefunc=False, queue_param=True): def decorator(func): def wrapper_impl(obj, *args, **kwargs): - # TODO: - # refactor check the len. data = (*args, *kwargs.values()) if len(data) == 0: return _run_on_device(func, obj, *args, **kwargs) diff --git a/sklearnex/_device_offload.py b/sklearnex/_device_offload.py index 4d6b5737c4..0f10dd1225 100644 --- a/sklearnex/_device_offload.py +++ b/sklearnex/_device_offload.py @@ -93,8 +93,8 @@ def dispatch(obj, method_name, branches, *args, **kwargs): def wrap_output_data(func): """ - Handles Array API input. Converts output of decorated function - to input Array API format on the same device. + Converts and moves the output arrays of the decorated function + to match the input array type and device. """ @wraps(func) From 698904979828c68a3a82327287caf7bc2a1ef3b2 Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Wed, 4 Sep 2024 21:23:16 +0200 Subject: [PATCH 64/82] Update _array_api.py --- onedal/utils/_array_api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onedal/utils/_array_api.py b/onedal/utils/_array_api.py index 118bc5b314..4accdd3ac0 100644 --- a/onedal/utils/_array_api.py +++ b/onedal/utils/_array_api.py @@ -51,7 +51,7 @@ def _convert_to_dpnp(array): def _asarray(data, xp, *args, **kwargs): """Converted input object to array format of xp namespace provided.""" if hasattr(data, "__array_namespace__"): - return xp.asarray(data, xp, *args, **kwargs) + return xp.asarray(data, *args, **kwargs) elif isinstance(data, Iterable): if isinstance(data, tuple): result_data = [] From 047d6982034a851444ddae0970b0bad4378c0cdc Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Wed, 4 Sep 2024 13:16:08 -0700 Subject: [PATCH 65/82] minor update --- onedal/utils/_array_api.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/onedal/utils/_array_api.py b/onedal/utils/_array_api.py index 4accdd3ac0..db6fa72447 100644 --- a/onedal/utils/_array_api.py +++ b/onedal/utils/_array_api.py @@ -56,11 +56,11 @@ def _asarray(data, xp, *args, **kwargs): if isinstance(data, tuple): result_data = [] for i in range(len(data)): - result_data.append(_asarray(data[i], xp, *args, **kwargs)) + result_data.append(xp.asarray(data[i], *args, **kwargs)) data = tuple(result_data) else: for i in range(len(data)): - data[i] = _asarray(data[i], xp, *args, **kwargs) + data[i] = xp.asarray(data[i], *args, **kwargs) return data From c5f828142f603575d586075e9d8a2f779bf197aa Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Wed, 4 Sep 2024 14:07:53 -0700 Subject: [PATCH 66/82] minor updatte try --- onedal/_device_offload.py | 4 +--- onedal/utils/_array_api.py | 4 ++-- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/onedal/_device_offload.py b/onedal/_device_offload.py index bcff132947..3e26f4f1c4 100644 --- a/onedal/_device_offload.py +++ b/onedal/_device_offload.py @@ -97,9 +97,7 @@ def _transfer_to_host(queue, *data): host_data = [] for item in data: usm_iface = getattr(item, "__sycl_usm_array_interface__", None) - array_api = getattr(item, "__array_namespace__", None) - if array_api: - array_api = array_api() + array_api = getattr(item, "__array_namespace__", print)() if usm_iface is not None: if not dpctl_available: raise RuntimeError( diff --git a/onedal/utils/_array_api.py b/onedal/utils/_array_api.py index db6fa72447..4accdd3ac0 100644 --- a/onedal/utils/_array_api.py +++ b/onedal/utils/_array_api.py @@ -56,11 +56,11 @@ def _asarray(data, xp, *args, **kwargs): if isinstance(data, tuple): result_data = [] for i in range(len(data)): - result_data.append(xp.asarray(data[i], *args, **kwargs)) + result_data.append(_asarray(data[i], xp, *args, **kwargs)) data = tuple(result_data) else: for i in range(len(data)): - data[i] = xp.asarray(data[i], *args, **kwargs) + data[i] = _asarray(data[i], xp, *args, **kwargs) return data From 1b45a240f74c714f853a3330ae2f3dacc816ffd6 Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Wed, 4 Sep 2024 15:20:15 -0700 Subject: [PATCH 67/82] renamed wrapper for inputs support --- onedal/_device_offload.py | 6 +++--- .../spmd/basic_statistics/basic_statistics.py | 6 +++--- onedal/spmd/cluster/kmeans.py | 8 ++++---- onedal/spmd/covariance/covariance.py | 4 ++-- onedal/spmd/decomposition/pca.py | 4 ++-- onedal/spmd/linear_model/linear_model.py | 6 +++--- .../spmd/linear_model/logistic_regression.py | 10 +++++----- onedal/spmd/neighbors/neighbors.py | 20 +++++++++---------- sklearnex/cluster/k_means.py | 10 +++++----- sklearnex/linear_model/coordinate_descent.py | 14 ++++++------- sklearnex/linear_model/ridge.py | 8 ++++---- sklearnex/manifold/t_sne.py | 6 +++--- sklearnex/metrics/pairwise.py | 4 ++-- sklearnex/metrics/ranking.py | 4 ++-- sklearnex/model_selection/split.py | 6 ++++-- sklearnex/preview/linear_model/ridge.py | 8 ++++---- 16 files changed, 63 insertions(+), 61 deletions(-) diff --git a/onedal/_device_offload.py b/onedal/_device_offload.py index 3e26f4f1c4..b43135a158 100644 --- a/onedal/_device_offload.py +++ b/onedal/_device_offload.py @@ -168,10 +168,10 @@ def _run_on_device(func, obj=None, *args, **kwargs): return func(*args, **kwargs) -def support_array_api(freefunc=False, queue_param=True): +def support_input_format(freefunc=False, queue_param=True): """ - Handles Array API input. Converts output of decorated function - to input Array API format on the same device. + Converts and moves the output arrays of the decorated function + to match the input array type and device. Puts SYCLQueue from data to decorated function arguments. Parameters diff --git a/onedal/spmd/basic_statistics/basic_statistics.py b/onedal/spmd/basic_statistics/basic_statistics.py index 0676ffee66..8253aa6628 100644 --- a/onedal/spmd/basic_statistics/basic_statistics.py +++ b/onedal/spmd/basic_statistics/basic_statistics.py @@ -16,15 +16,15 @@ from onedal.basic_statistics import BasicStatistics as BasicStatistics_Batch -from ..._device_offload import support_array_api +from ..._device_offload import support_input_format from .._base import BaseEstimatorSPMD class BasicStatistics(BaseEstimatorSPMD, BasicStatistics_Batch): - @support_array_api() + @support_input_format() def compute(self, data, weights=None, queue=None): return super().compute(data, weights=weights, queue=queue) - @support_array_api() + @support_input_format() def fit(self, data, sample_weight=None, queue=None): return super().fit(data, sample_weight=sample_weight, queue=queue) diff --git a/onedal/spmd/cluster/kmeans.py b/onedal/spmd/cluster/kmeans.py index 5eadaded82..3f552a353b 100644 --- a/onedal/spmd/cluster/kmeans.py +++ b/onedal/spmd/cluster/kmeans.py @@ -18,7 +18,7 @@ from onedal.cluster import KMeansInit as KMeansInit_Batch from onedal.spmd.basic_statistics import BasicStatistics -from ..._device_offload import support_array_api +from ..._device_offload import support_input_format from .._base import BaseEstimatorSPMD @@ -37,15 +37,15 @@ def _get_basic_statistics_backend(self, result_options): def _get_kmeans_init(self, cluster_count, seed, algorithm): return KMeansInit(cluster_count=cluster_count, seed=seed, algorithm=algorithm) - @support_array_api() + @support_input_format() def fit(self, X, y=None, queue=None): return super().fit(X, queue=queue) - @support_array_api() + @support_input_format() def predict(self, X, queue=None): return super().predict(X, queue=queue) - @support_array_api() + @support_input_format() def fit_predict(self, X, y=None, queue=None): return super().fit_predict(X, queue=queue) diff --git a/onedal/spmd/covariance/covariance.py b/onedal/spmd/covariance/covariance.py index a1509c1bfa..fe746b0993 100644 --- a/onedal/spmd/covariance/covariance.py +++ b/onedal/spmd/covariance/covariance.py @@ -16,11 +16,11 @@ from onedal.covariance import EmpiricalCovariance as EmpiricalCovariance_Batch -from ..._device_offload import support_array_api +from ..._device_offload import support_input_format from .._base import BaseEstimatorSPMD class EmpiricalCovariance(BaseEstimatorSPMD, EmpiricalCovariance_Batch): - @support_array_api() + @support_input_format() def fit(self, X, y=None, queue=None): return super().fit(X, queue=queue) diff --git a/onedal/spmd/decomposition/pca.py b/onedal/spmd/decomposition/pca.py index 6b46fdc087..55f242f782 100644 --- a/onedal/spmd/decomposition/pca.py +++ b/onedal/spmd/decomposition/pca.py @@ -16,11 +16,11 @@ from onedal.decomposition.pca import PCA as PCABatch -from ..._device_offload import support_array_api +from ..._device_offload import support_input_format from .._base import BaseEstimatorSPMD class PCA(BaseEstimatorSPMD, PCABatch): - @support_array_api() + @support_input_format() def fit(self, X, y=None, queue=None): return super().fit(X, queue=queue) diff --git a/onedal/spmd/linear_model/linear_model.py b/onedal/spmd/linear_model/linear_model.py index 93848a6d41..11d9cbe0e8 100644 --- a/onedal/spmd/linear_model/linear_model.py +++ b/onedal/spmd/linear_model/linear_model.py @@ -16,15 +16,15 @@ from onedal.linear_model import LinearRegression as LinearRegression_Batch -from ..._device_offload import support_array_api +from ..._device_offload import support_input_format from .._base import BaseEstimatorSPMD class LinearRegression(BaseEstimatorSPMD, LinearRegression_Batch): - @support_array_api() + @support_input_format() def fit(self, X, y, queue=None): return super().fit(X, y, queue=queue) - @support_array_api() + @support_input_format() def predict(self, X, queue=None): return super().predict(X, queue=queue) diff --git a/onedal/spmd/linear_model/logistic_regression.py b/onedal/spmd/linear_model/logistic_regression.py index 0e052b3598..38529eaef7 100644 --- a/onedal/spmd/linear_model/logistic_regression.py +++ b/onedal/spmd/linear_model/logistic_regression.py @@ -16,23 +16,23 @@ from onedal.linear_model import LogisticRegression as LogisticRegression_Batch -from ..._device_offload import support_array_api +from ..._device_offload import support_input_format from .._base import BaseEstimatorSPMD class LogisticRegression(BaseEstimatorSPMD, LogisticRegression_Batch): - @support_array_api() + @support_input_format() def fit(self, X, y, queue=None): return super().fit(X, y, queue=queue) - @support_array_api() + @support_input_format() def predict(self, X, queue=None): return super().predict(X, queue=queue) - @support_array_api() + @support_input_format() def predict_proba(self, X, queue=None): return super().predict_proba(X, queue=queue) - @support_array_api() + @support_input_format() def predict_log_proba(self, X, queue=None): return super().predict_log_proba(X, queue=queue) diff --git a/onedal/spmd/neighbors/neighbors.py b/onedal/spmd/neighbors/neighbors.py index 878e6dadeb..87004e1a77 100644 --- a/onedal/spmd/neighbors/neighbors.py +++ b/onedal/spmd/neighbors/neighbors.py @@ -17,30 +17,30 @@ from onedal.neighbors import KNeighborsClassifier as KNeighborsClassifier_Batch from onedal.neighbors import KNeighborsRegressor as KNeighborsRegressor_Batch -from ..._device_offload import support_array_api +from ..._device_offload import support_input_format from .._base import BaseEstimatorSPMD class KNeighborsClassifier(BaseEstimatorSPMD, KNeighborsClassifier_Batch): - @support_array_api() + @support_input_format() def fit(self, X, y, queue=None): return super().fit(X, y, queue=queue) - @support_array_api() + @support_input_format() def predict(self, X, queue=None): return super().predict(X, queue=queue) - @support_array_api() + @support_input_format() def predict_proba(self, X, queue=None): raise NotImplementedError("predict_proba not supported in distributed mode.") - @support_array_api() + @support_input_format() def kneighbors(self, X=None, n_neighbors=None, return_distance=True, queue=None): return super().kneighbors(X, n_neighbors, return_distance, queue=queue) class KNeighborsRegressor(BaseEstimatorSPMD, KNeighborsRegressor_Batch): - @support_array_api() + @support_input_format() def fit(self, X, y, queue=None): if queue is not None and queue.sycl_device.is_gpu: return super()._fit(X, y, queue=queue) @@ -50,11 +50,11 @@ def fit(self, X, y, queue=None): "CPU. Consider running on it on GPU." ) - @support_array_api() + @support_input_format() def kneighbors(self, X=None, n_neighbors=None, return_distance=True, queue=None): return super().kneighbors(X, n_neighbors, return_distance, queue=queue) - @support_array_api() + @support_input_format() def predict(self, X, queue=None): return self._predict_gpu(X, queue=queue) @@ -66,10 +66,10 @@ def _get_onedal_params(self, X, y=None): class NearestNeighbors(BaseEstimatorSPMD): - @support_array_api() + @support_input_format() def fit(self, X, y, queue=None): return super().fit(X, y, queue=queue) - @support_array_api() + @support_input_format() def kneighbors(self, X=None, n_neighbors=None, return_distance=True, queue=None): return super().kneighbors(X, n_neighbors, return_distance, queue=queue) diff --git a/sklearnex/cluster/k_means.py b/sklearnex/cluster/k_means.py index 0c949d9d35..1d96ec323b 100755 --- a/sklearnex/cluster/k_means.py +++ b/sklearnex/cluster/k_means.py @@ -15,11 +15,11 @@ # =============================================================================== from daal4py.sklearn.cluster import KMeans -from onedal._device_offload import support_array_api +from onedal._device_offload import support_input_format # Note: `sklearnex.cluster.KMeans` only has functional # sycl GPU support. No GPU device will be offloaded. -KMeans.fit = support_array_api(queue_param=False)(KMeans.fit) -KMeans.fit_predict = support_array_api(queue_param=False)(KMeans.fit_predict) -KMeans.predict = support_array_api(queue_param=False)(KMeans.predict) -KMeans.score = support_array_api(queue_param=False)(KMeans.score) +KMeans.fit = support_input_format(queue_param=False)(KMeans.fit) +KMeans.fit_predict = support_input_format(queue_param=False)(KMeans.fit_predict) +KMeans.predict = support_input_format(queue_param=False)(KMeans.predict) +KMeans.score = support_input_format(queue_param=False)(KMeans.score) diff --git a/sklearnex/linear_model/coordinate_descent.py b/sklearnex/linear_model/coordinate_descent.py index bc113f38b1..abe594ad29 100644 --- a/sklearnex/linear_model/coordinate_descent.py +++ b/sklearnex/linear_model/coordinate_descent.py @@ -15,16 +15,16 @@ # =============================================================================== from daal4py.sklearn.linear_model import ElasticNet, Lasso -from onedal._device_offload import support_array_api +from onedal._device_offload import support_input_format # Note: `sklearnex.linear_model.ElasticNet` only has functional # sycl GPU support. No GPU device will be offloaded. -ElasticNet.fit = support_array_api(queue_param=False)(ElasticNet.fit) -ElasticNet.predict = support_array_api(queue_param=False)(ElasticNet.predict) -ElasticNet.score = support_array_api(queue_param=False)(ElasticNet.score) +ElasticNet.fit = support_input_format(queue_param=False)(ElasticNet.fit) +ElasticNet.predict = support_input_format(queue_param=False)(ElasticNet.predict) +ElasticNet.score = support_input_format(queue_param=False)(ElasticNet.score) # Note: `sklearnex.linear_model.Lasso` only has functional # sycl GPU support. No GPU device will be offloaded. -Lasso.fit = support_array_api(queue_param=False)(Lasso.fit) -Lasso.predict = support_array_api(queue_param=False)(Lasso.predict) -Lasso.score = support_array_api(queue_param=False)(Lasso.score) +Lasso.fit = support_input_format(queue_param=False)(Lasso.fit) +Lasso.predict = support_input_format(queue_param=False)(Lasso.predict) +Lasso.score = support_input_format(queue_param=False)(Lasso.score) diff --git a/sklearnex/linear_model/ridge.py b/sklearnex/linear_model/ridge.py index ab0d24d08a..f50c3446e6 100644 --- a/sklearnex/linear_model/ridge.py +++ b/sklearnex/linear_model/ridge.py @@ -15,10 +15,10 @@ # =============================================================================== from daal4py.sklearn.linear_model import Ridge -from onedal._device_offload import support_array_api +from onedal._device_offload import support_input_format # Note: `sklearnex.linear_model.Ridge` only has functional # sycl GPU support. No GPU device will be offloaded. -Ridge.fit = support_array_api(queue_param=False)(Ridge.fit) -Ridge.predict = support_array_api(queue_param=False)(Ridge.predict) -Ridge.score = support_array_api(queue_param=False)(Ridge.score) +Ridge.fit = support_input_format(queue_param=False)(Ridge.fit) +Ridge.predict = support_input_format(queue_param=False)(Ridge.predict) +Ridge.score = support_input_format(queue_param=False)(Ridge.score) diff --git a/sklearnex/manifold/t_sne.py b/sklearnex/manifold/t_sne.py index caced17934..0aa8d7df4f 100755 --- a/sklearnex/manifold/t_sne.py +++ b/sklearnex/manifold/t_sne.py @@ -15,7 +15,7 @@ # =============================================================================== from daal4py.sklearn.manifold import TSNE -from onedal._device_offload import support_array_api +from onedal._device_offload import support_input_format -TSNE.fit = support_array_api(queue_param=False)(TSNE.fit) -TSNE.fit_transform = support_array_api(queue_param=False)(TSNE.fit_transform) +TSNE.fit = support_input_format(queue_param=False)(TSNE.fit) +TSNE.fit_transform = support_input_format(queue_param=False)(TSNE.fit_transform) diff --git a/sklearnex/metrics/pairwise.py b/sklearnex/metrics/pairwise.py index 2997e301c4..8ad789dce1 100755 --- a/sklearnex/metrics/pairwise.py +++ b/sklearnex/metrics/pairwise.py @@ -15,8 +15,8 @@ # =============================================================================== from daal4py.sklearn.metrics import pairwise_distances -from onedal._device_offload import support_array_api +from onedal._device_offload import support_input_format -pairwise_distances = support_array_api(freefunc=True, queue_param=False)( +pairwise_distances = support_input_format(freefunc=True, queue_param=False)( pairwise_distances ) diff --git a/sklearnex/metrics/ranking.py b/sklearnex/metrics/ranking.py index 624be59f1b..753be6d0cd 100755 --- a/sklearnex/metrics/ranking.py +++ b/sklearnex/metrics/ranking.py @@ -15,6 +15,6 @@ # =============================================================================== from daal4py.sklearn.metrics import roc_auc_score -from onedal._device_offload import support_array_api +from onedal._device_offload import support_input_format -roc_auc_score = support_array_api(freefunc=True, queue_param=False)(roc_auc_score) +roc_auc_score = support_input_format(freefunc=True, queue_param=False)(roc_auc_score) diff --git a/sklearnex/model_selection/split.py b/sklearnex/model_selection/split.py index de70768a06..59153114b9 100755 --- a/sklearnex/model_selection/split.py +++ b/sklearnex/model_selection/split.py @@ -15,6 +15,8 @@ # =============================================================================== from daal4py.sklearn.model_selection import train_test_split -from onedal._device_offload import support_array_api +from onedal._device_offload import support_input_format -train_test_split = support_array_api(freefunc=True, queue_param=False)(train_test_split) +train_test_split = support_input_format(freefunc=True, queue_param=False)( + train_test_split +) diff --git a/sklearnex/preview/linear_model/ridge.py b/sklearnex/preview/linear_model/ridge.py index a2631d7331..a02fc9019e 100644 --- a/sklearnex/preview/linear_model/ridge.py +++ b/sklearnex/preview/linear_model/ridge.py @@ -408,11 +408,11 @@ def _save_attributes(self): else: from daal4py.sklearn.linear_model._ridge import Ridge - from onedal._device_offload import support_array_api + from onedal._device_offload import support_input_format - Ridge.fit = support_array_api(queue_param=False)(Ridge.fit) - Ridge.predict = support_array_api(queue_param=False)(Ridge.predict) - Ridge.score = support_array_api(queue_param=False)(Ridge.score) + Ridge.fit = support_input_format(queue_param=False)(Ridge.fit) + Ridge.predict = support_input_format(queue_param=False)(Ridge.predict) + Ridge.score = support_input_format(queue_param=False)(Ridge.score) logging.warning( "Preview Ridge requires oneDAL version >= 2024.6 but it was not found" From ac7288fe49ed84969fac911e0bfcf9f17a4f5f8a Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Thu, 5 Sep 2024 05:58:50 -0700 Subject: [PATCH 68/82] minor refactoring --- onedal/_device_offload.py | 25 +++++++++++++------------ sklearnex/_device_offload.py | 23 +++++++++++------------ 2 files changed, 24 insertions(+), 24 deletions(-) diff --git a/onedal/_device_offload.py b/onedal/_device_offload.py index b43135a158..9ab2722257 100644 --- a/onedal/_device_offload.py +++ b/onedal/_device_offload.py @@ -187,24 +187,25 @@ def support_input_format(freefunc=False, queue_param=True): def decorator(func): def wrapper_impl(obj, *args, **kwargs): data = (*args, *kwargs.values()) - if len(data) == 0: - return _run_on_device(func, obj, *args, **kwargs) data_queue, hostargs, hostkwargs = _get_host_inputs(*args, **kwargs) if queue_param and not ( "queue" in hostkwargs and hostkwargs["queue"] is not None ): hostkwargs["queue"] = data_queue result = _run_on_device(func, obj, *hostargs, **hostkwargs) - usm_iface = getattr(data[0], "__sycl_usm_array_interface__", None) - if usm_iface is not None: - result = _copy_to_usm(data_queue, result) - if dpnp_available and isinstance(args[0], dpnp.ndarray): - result = _convert_to_dpnp(result) - return result - input_array_api = getattr(data[0], "__array_namespace__", print)() - input_array_api_device = data[0].device if input_array_api else None - if input_array_api: - result = _asarray(result, input_array_api, device=input_array_api_device) + if len(data) > 0: + usm_iface = getattr(data[0], "__sycl_usm_array_interface__", None) + if usm_iface is not None: + result = _copy_to_usm(data_queue, result) + if dpnp_available and isinstance(args[0], dpnp.ndarray): + result = _convert_to_dpnp(result) + return result + input_array_api = getattr(data[0], "__array_namespace__", print)() + input_array_api_device = data[0].device if input_array_api else None + if input_array_api: + result = _asarray( + result, input_array_api, device=input_array_api_device + ) return result if freefunc: diff --git a/sklearnex/_device_offload.py b/sklearnex/_device_offload.py index 0f10dd1225..81df44c3af 100644 --- a/sklearnex/_device_offload.py +++ b/sklearnex/_device_offload.py @@ -100,19 +100,18 @@ def wrap_output_data(func): @wraps(func) def wrapper(self, *args, **kwargs): data = (*args, *kwargs.values()) - if len(data) == 0: - return func(self, *args, **kwargs) - usm_iface = getattr(data[0], "__sycl_usm_array_interface__", None) result = func(self, *args, **kwargs) - if usm_iface is not None: - result = _copy_to_usm(usm_iface["syclobj"], result) - if dpnp_available and isinstance(data[0], dpnp.ndarray): - result = _convert_to_dpnp(result) - return result - input_array_api = getattr(data[0], "__array_namespace__", print)() - input_array_api_device = data[0].device if input_array_api else None - if input_array_api: - result = _asarray(result, input_array_api, device=input_array_api_device) + if len(data) > 0: + usm_iface = getattr(data[0], "__sycl_usm_array_interface__", None) + if usm_iface is not None: + result = _copy_to_usm(usm_iface["syclobj"], result) + if dpnp_available and isinstance(data[0], dpnp.ndarray): + result = _convert_to_dpnp(result) + return result + input_array_api = getattr(data[0], "__array_namespace__", print)() + input_array_api_device = data[0].device if input_array_api else None + if input_array_api: + result = _asarray(result, input_array_api, device=input_array_api_device) return result return wrapper From 3915066d7c3fcdb15cb3f26e5b894708d3b0711e Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Thu, 5 Sep 2024 06:26:03 -0700 Subject: [PATCH 69/82] fix refactoring --- onedal/_device_offload.py | 25 ++++++++++++------------- sklearnex/_device_offload.py | 2 +- 2 files changed, 13 insertions(+), 14 deletions(-) diff --git a/onedal/_device_offload.py b/onedal/_device_offload.py index 9ab2722257..b43135a158 100644 --- a/onedal/_device_offload.py +++ b/onedal/_device_offload.py @@ -187,25 +187,24 @@ def support_input_format(freefunc=False, queue_param=True): def decorator(func): def wrapper_impl(obj, *args, **kwargs): data = (*args, *kwargs.values()) + if len(data) == 0: + return _run_on_device(func, obj, *args, **kwargs) data_queue, hostargs, hostkwargs = _get_host_inputs(*args, **kwargs) if queue_param and not ( "queue" in hostkwargs and hostkwargs["queue"] is not None ): hostkwargs["queue"] = data_queue result = _run_on_device(func, obj, *hostargs, **hostkwargs) - if len(data) > 0: - usm_iface = getattr(data[0], "__sycl_usm_array_interface__", None) - if usm_iface is not None: - result = _copy_to_usm(data_queue, result) - if dpnp_available and isinstance(args[0], dpnp.ndarray): - result = _convert_to_dpnp(result) - return result - input_array_api = getattr(data[0], "__array_namespace__", print)() - input_array_api_device = data[0].device if input_array_api else None - if input_array_api: - result = _asarray( - result, input_array_api, device=input_array_api_device - ) + usm_iface = getattr(data[0], "__sycl_usm_array_interface__", None) + if usm_iface is not None: + result = _copy_to_usm(data_queue, result) + if dpnp_available and isinstance(args[0], dpnp.ndarray): + result = _convert_to_dpnp(result) + return result + input_array_api = getattr(data[0], "__array_namespace__", print)() + input_array_api_device = data[0].device if input_array_api else None + if input_array_api: + result = _asarray(result, input_array_api, device=input_array_api_device) return result if freefunc: diff --git a/sklearnex/_device_offload.py b/sklearnex/_device_offload.py index 81df44c3af..cd61bc6b61 100644 --- a/sklearnex/_device_offload.py +++ b/sklearnex/_device_offload.py @@ -99,8 +99,8 @@ def wrap_output_data(func): @wraps(func) def wrapper(self, *args, **kwargs): - data = (*args, *kwargs.values()) result = func(self, *args, **kwargs) + data = (*args, *kwargs.values()) if len(data) > 0: usm_iface = getattr(data[0], "__sycl_usm_array_interface__", None) if usm_iface is not None: From 95fa66fe6f226ceff6047ced73d13418b2619e5f Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Thu, 5 Sep 2024 08:46:11 -0700 Subject: [PATCH 70/82] addressed test fails --- onedal/_device_offload.py | 10 +++++++--- sklearnex/_device_offload.py | 6 ++++-- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/onedal/_device_offload.py b/onedal/_device_offload.py index b43135a158..83b712e134 100644 --- a/onedal/_device_offload.py +++ b/onedal/_device_offload.py @@ -97,7 +97,9 @@ def _transfer_to_host(queue, *data): host_data = [] for item in data: usm_iface = getattr(item, "__sycl_usm_array_interface__", None) - array_api = getattr(item, "__array_namespace__", print)() + array_api = getattr(item, "__array_namespace__", None) + if array_api: + array_api = array_api() if usm_iface is not None: if not dpctl_available: raise RuntimeError( @@ -201,9 +203,11 @@ def wrapper_impl(obj, *args, **kwargs): if dpnp_available and isinstance(args[0], dpnp.ndarray): result = _convert_to_dpnp(result) return result - input_array_api = getattr(data[0], "__array_namespace__", print)() - input_array_api_device = data[0].device if input_array_api else None + input_array_api = getattr(data[0], "__array_namespace__", None) if input_array_api: + input_array_api = input_array_api() + input_array_api_device = data[0].device + # input_array_api_device = input_array_api.device result = _asarray(result, input_array_api, device=input_array_api_device) return result diff --git a/sklearnex/_device_offload.py b/sklearnex/_device_offload.py index cd61bc6b61..49e865a185 100644 --- a/sklearnex/_device_offload.py +++ b/sklearnex/_device_offload.py @@ -108,9 +108,11 @@ def wrapper(self, *args, **kwargs): if dpnp_available and isinstance(data[0], dpnp.ndarray): result = _convert_to_dpnp(result) return result - input_array_api = getattr(data[0], "__array_namespace__", print)() - input_array_api_device = data[0].device if input_array_api else None + input_array_api = getattr(data[0], "__array_namespace__", None) if input_array_api: + input_array_api = input_array_api() + input_array_api_device = data[0].device + # input_array_api_device = input_array_api.device result = _asarray(result, input_array_api, device=input_array_api_device) return result From 8a9e497dc7b481412ebd98697190b332137d50ec Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Thu, 5 Sep 2024 09:07:48 -0700 Subject: [PATCH 71/82] remove unnecessary comments --- onedal/_device_offload.py | 1 - sklearnex/_device_offload.py | 1 - 2 files changed, 2 deletions(-) diff --git a/onedal/_device_offload.py b/onedal/_device_offload.py index 83b712e134..03c0491508 100644 --- a/onedal/_device_offload.py +++ b/onedal/_device_offload.py @@ -207,7 +207,6 @@ def wrapper_impl(obj, *args, **kwargs): if input_array_api: input_array_api = input_array_api() input_array_api_device = data[0].device - # input_array_api_device = input_array_api.device result = _asarray(result, input_array_api, device=input_array_api_device) return result diff --git a/sklearnex/_device_offload.py b/sklearnex/_device_offload.py index 49e865a185..b3e601f840 100644 --- a/sklearnex/_device_offload.py +++ b/sklearnex/_device_offload.py @@ -112,7 +112,6 @@ def wrapper(self, *args, **kwargs): if input_array_api: input_array_api = input_array_api() input_array_api_device = data[0].device - # input_array_api_device = input_array_api.device result = _asarray(result, input_array_api, device=input_array_api_device) return result From 3ac9c82407aafcff714c3eac07d6781a46356887 Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Fri, 6 Sep 2024 03:06:47 -0700 Subject: [PATCH 72/82] enabled transform_output check --- onedal/_device_offload.py | 15 ++++++++++----- sklearnex/_device_offload.py | 14 +++++++++----- 2 files changed, 19 insertions(+), 10 deletions(-) diff --git a/onedal/_device_offload.py b/onedal/_device_offload.py index 03c0491508..2443b5c214 100644 --- a/onedal/_device_offload.py +++ b/onedal/_device_offload.py @@ -19,6 +19,7 @@ from functools import wraps import numpy as np +from sklearn import get_config from ._config import _get_config from .utils._array_api import _asarray, _is_numpy_namespace @@ -203,11 +204,15 @@ def wrapper_impl(obj, *args, **kwargs): if dpnp_available and isinstance(args[0], dpnp.ndarray): result = _convert_to_dpnp(result) return result - input_array_api = getattr(data[0], "__array_namespace__", None) - if input_array_api: - input_array_api = input_array_api() - input_array_api_device = data[0].device - result = _asarray(result, input_array_api, device=input_array_api_device) + config = get_config() + if not ("transform_output" in config and config["transform_output"]): + input_array_api = getattr(data[0], "__array_namespace__", None) + if input_array_api: + input_array_api = input_array_api() + input_array_api_device = data[0].device + result = _asarray( + result, input_array_api, device=input_array_api_device + ) return result if freefunc: diff --git a/sklearnex/_device_offload.py b/sklearnex/_device_offload.py index b3e601f840..7d72a97bad 100644 --- a/sklearnex/_device_offload.py +++ b/sklearnex/_device_offload.py @@ -108,11 +108,15 @@ def wrapper(self, *args, **kwargs): if dpnp_available and isinstance(data[0], dpnp.ndarray): result = _convert_to_dpnp(result) return result - input_array_api = getattr(data[0], "__array_namespace__", None) - if input_array_api: - input_array_api = input_array_api() - input_array_api_device = data[0].device - result = _asarray(result, input_array_api, device=input_array_api_device) + config = get_config() + if not ("transform_output" in config and config["transform_output"]): + input_array_api = getattr(data[0], "__array_namespace__", None) + if input_array_api: + input_array_api = input_array_api() + input_array_api_device = data[0].device + result = _asarray( + result, input_array_api, device=input_array_api_device + ) return result return wrapper From aa2bf820cacf07cad0eae035aac6d9f4807b48a3 Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Fri, 6 Sep 2024 07:13:53 -0700 Subject: [PATCH 73/82] update use of transform_output flag for input handlers --- onedal/_device_offload.py | 12 ++++++------ sklearnex/_device_offload.py | 12 ++++++------ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/onedal/_device_offload.py b/onedal/_device_offload.py index 2443b5c214..65d0a30bef 100644 --- a/onedal/_device_offload.py +++ b/onedal/_device_offload.py @@ -198,14 +198,14 @@ def wrapper_impl(obj, *args, **kwargs): ): hostkwargs["queue"] = data_queue result = _run_on_device(func, obj, *hostargs, **hostkwargs) - usm_iface = getattr(data[0], "__sycl_usm_array_interface__", None) - if usm_iface is not None: - result = _copy_to_usm(data_queue, result) - if dpnp_available and isinstance(args[0], dpnp.ndarray): - result = _convert_to_dpnp(result) - return result config = get_config() if not ("transform_output" in config and config["transform_output"]): + usm_iface = getattr(data[0], "__sycl_usm_array_interface__", None) + if usm_iface is not None: + result = _copy_to_usm(data_queue, result) + if dpnp_available and isinstance(args[0], dpnp.ndarray): + result = _convert_to_dpnp(result) + return result input_array_api = getattr(data[0], "__array_namespace__", None) if input_array_api: input_array_api = input_array_api() diff --git a/sklearnex/_device_offload.py b/sklearnex/_device_offload.py index 7d72a97bad..9b56bcad6a 100644 --- a/sklearnex/_device_offload.py +++ b/sklearnex/_device_offload.py @@ -102,14 +102,14 @@ def wrapper(self, *args, **kwargs): result = func(self, *args, **kwargs) data = (*args, *kwargs.values()) if len(data) > 0: - usm_iface = getattr(data[0], "__sycl_usm_array_interface__", None) - if usm_iface is not None: - result = _copy_to_usm(usm_iface["syclobj"], result) - if dpnp_available and isinstance(data[0], dpnp.ndarray): - result = _convert_to_dpnp(result) - return result config = get_config() if not ("transform_output" in config and config["transform_output"]): + usm_iface = getattr(data[0], "__sycl_usm_array_interface__", None) + if usm_iface is not None: + result = _copy_to_usm(usm_iface["syclobj"], result) + if dpnp_available and isinstance(data[0], dpnp.ndarray): + result = _convert_to_dpnp(result) + return result input_array_api = getattr(data[0], "__array_namespace__", None) if input_array_api: input_array_api = input_array_api() From 3539ee31fa9263963628bb2abc692d14b7b45f70 Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Fri, 6 Sep 2024 10:34:33 -0700 Subject: [PATCH 74/82] reverted changes for test_incremental_pca.py resolved on #1998 and #1999 --- .../preview/decomposition/tests/test_incremental_pca.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sklearnex/preview/decomposition/tests/test_incremental_pca.py b/sklearnex/preview/decomposition/tests/test_incremental_pca.py index d0a9887a1b..67929bfac8 100644 --- a/sklearnex/preview/decomposition/tests/test_incremental_pca.py +++ b/sklearnex/preview/decomposition/tests/test_incremental_pca.py @@ -200,7 +200,7 @@ def test_sklearnex_partial_fit_on_gold_data(dataframe, queue, whiten, num_blocks X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe) transformed_data = incpca.transform(X_df) - check_pca_on_gold_data(incpca, dtype, whiten, _as_numpy(transformed_data)) + check_pca_on_gold_data(incpca, dtype, whiten, transformed_data) @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues()) @@ -217,7 +217,7 @@ def test_sklearnex_fit_on_gold_data(dataframe, queue, whiten, num_blocks, dtype) incpca.fit(X_df) transformed_data = incpca.transform(X_df) - check_pca_on_gold_data(incpca, dtype, whiten, _as_numpy(transformed_data)) + check_pca_on_gold_data(incpca, dtype, whiten, transformed_data) @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues()) @@ -235,7 +235,7 @@ def test_sklearnex_fit_transform_on_gold_data( X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe) transformed_data = incpca.fit_transform(X_df) - check_pca_on_gold_data(incpca, dtype, whiten, _as_numpy(transformed_data)) + check_pca_on_gold_data(incpca, dtype, whiten, transformed_data) @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues()) @@ -263,4 +263,4 @@ def test_sklearnex_partial_fit_on_random_data( X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe) transformed_data = incpca.transform(X_df) - check_pca(incpca, dtype, whiten, X, _as_numpy(transformed_data)) + check_pca(incpca, dtype, whiten, X, transformed_data) From ad71cd0550cbb0d52e787f7a1a952ec73951d603 Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Fri, 6 Sep 2024 11:47:32 -0700 Subject: [PATCH 75/82] Revert "update use of transform_output flag for input handlers" This reverts commit aa2bf820cacf07cad0eae035aac6d9f4807b48a3. --- onedal/_device_offload.py | 12 ++++++------ sklearnex/_device_offload.py | 12 ++++++------ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/onedal/_device_offload.py b/onedal/_device_offload.py index 65d0a30bef..2443b5c214 100644 --- a/onedal/_device_offload.py +++ b/onedal/_device_offload.py @@ -198,14 +198,14 @@ def wrapper_impl(obj, *args, **kwargs): ): hostkwargs["queue"] = data_queue result = _run_on_device(func, obj, *hostargs, **hostkwargs) + usm_iface = getattr(data[0], "__sycl_usm_array_interface__", None) + if usm_iface is not None: + result = _copy_to_usm(data_queue, result) + if dpnp_available and isinstance(args[0], dpnp.ndarray): + result = _convert_to_dpnp(result) + return result config = get_config() if not ("transform_output" in config and config["transform_output"]): - usm_iface = getattr(data[0], "__sycl_usm_array_interface__", None) - if usm_iface is not None: - result = _copy_to_usm(data_queue, result) - if dpnp_available and isinstance(args[0], dpnp.ndarray): - result = _convert_to_dpnp(result) - return result input_array_api = getattr(data[0], "__array_namespace__", None) if input_array_api: input_array_api = input_array_api() diff --git a/sklearnex/_device_offload.py b/sklearnex/_device_offload.py index 9b56bcad6a..7d72a97bad 100644 --- a/sklearnex/_device_offload.py +++ b/sklearnex/_device_offload.py @@ -102,14 +102,14 @@ def wrapper(self, *args, **kwargs): result = func(self, *args, **kwargs) data = (*args, *kwargs.values()) if len(data) > 0: + usm_iface = getattr(data[0], "__sycl_usm_array_interface__", None) + if usm_iface is not None: + result = _copy_to_usm(usm_iface["syclobj"], result) + if dpnp_available and isinstance(data[0], dpnp.ndarray): + result = _convert_to_dpnp(result) + return result config = get_config() if not ("transform_output" in config and config["transform_output"]): - usm_iface = getattr(data[0], "__sycl_usm_array_interface__", None) - if usm_iface is not None: - result = _copy_to_usm(usm_iface["syclobj"], result) - if dpnp_available and isinstance(data[0], dpnp.ndarray): - result = _convert_to_dpnp(result) - return result input_array_api = getattr(data[0], "__array_namespace__", None) if input_array_api: input_array_api = input_array_api() From 870b0beefc2fea0f8f9dc892a060e9aac7794f75 Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Tue, 27 Aug 2024 01:27:23 -0700 Subject: [PATCH 76/82] ENH: DBSCAN via Array API DDBSCAN rewrite via Array API --- onedal/cluster/dbscan.py | 37 +++++++++++++++++++------------------ sklearnex/cluster/dbscan.py | 8 +++++++- 2 files changed, 26 insertions(+), 19 deletions(-) diff --git a/onedal/cluster/dbscan.py b/onedal/cluster/dbscan.py index f91325b65c..302d30a997 100644 --- a/onedal/cluster/dbscan.py +++ b/onedal/cluster/dbscan.py @@ -14,14 +14,14 @@ # limitations under the License. # =============================================================================== -import numpy as np +from sklearn.utils import check_array -from daal4py.sklearn._utils import get_dtype, make2d +from onedal.datatypes._data_conversion import get_dtype, make2d from ..common._base import BaseEstimator from ..common._mixin import ClusterMixin from ..datatypes import _convert_to_supported, from_table, to_table -from ..utils import _check_array +from ..utils._array_api import get_namespace class BaseDBSCAN(BaseEstimator, ClusterMixin): @@ -46,9 +46,9 @@ def __init__( self.p = p self.n_jobs = n_jobs - def _get_onedal_params(self, dtype=np.float32): + def _get_onedal_params(self, xp, dtype): return { - "fptype": "float" if dtype == np.float32 else "double", + "fptype": "float" if dtype == xp.float32 else "double", "method": "by_default", "min_observations": int(self.min_samples), "epsilon": float(self.eps), @@ -56,28 +56,30 @@ def _get_onedal_params(self, dtype=np.float32): "result_options": "core_observation_indices|responses", } - def _fit(self, X, y, sample_weight, module, queue): + def _fit(self, X, xp, is_array_api_compliant, y, sample_weight, queue): policy = self._get_policy(queue, X) - X = _check_array(X, accept_sparse="csr", dtype=[np.float64, np.float32]) + X = check_array(X, accept_sparse="csr", dtype=[xp.float64, xp.float32]) sample_weight = make2d(sample_weight) if sample_weight is not None else None X = make2d(X) - types = [np.float32, np.float64] + types = [xp.float32, xp.float64] if get_dtype(X) not in types: - X = X.astype(np.float64) + X = X.astype(xp.float64) X = _convert_to_supported(policy, X) dtype = get_dtype(X) - params = self._get_onedal_params(dtype) - result = module.compute(policy, params, to_table(X), to_table(sample_weight)) + params = self._get_onedal_params(xp, dtype) + result = self._get_backend("dbscan", "clustering", None).compute( + policy, params, to_table(X), to_table(sample_weight) + ) - self.labels_ = from_table(result.responses).ravel() + self.labels_ = from_table(result.responses).reshape(-1) if result.core_observation_indices is not None: self.core_sample_indices_ = from_table( result.core_observation_indices - ).ravel() + ).reshape(-1) else: - self.core_sample_indices_ = np.array([], dtype=np.intc) - self.components_ = np.take(X, self.core_sample_indices_, axis=0) + self.core_sample_indices_ = xp.array([], dtype=xp.int32) + self.components_ = xp.take(X, self.core_sample_indices_, axis=0) self.n_features_in_ = X.shape[1] return self @@ -105,6 +107,5 @@ def __init__( self.n_jobs = n_jobs def fit(self, X, y=None, sample_weight=None, queue=None): - return super()._fit( - X, y, sample_weight, self._get_backend("dbscan", "clustering", None), queue - ) + xp, is_array_api_compliant = get_namespace(X) + return super()._fit(X, xp, is_array_api_compliant, y, sample_weight, queue) diff --git a/sklearnex/cluster/dbscan.py b/sklearnex/cluster/dbscan.py index f8d080cfbe..fbd6ff9e8f 100755 --- a/sklearnex/cluster/dbscan.py +++ b/sklearnex/cluster/dbscan.py @@ -27,6 +27,7 @@ from .._device_offload import dispatch from .._utils import PatchingConditionsChain +from ..utils._array_api import get_namespace if sklearn_check_version("1.1") and not sklearn_check_version("1.2"): from sklearn.utils import check_scalar @@ -86,6 +87,7 @@ def __init__( def _onedal_fit(self, X, y, sample_weight=None, queue=None): if sklearn_check_version("1.0"): X = self._validate_data(X, force_all_finite=False) + xp, is_array_api_compliant = get_namespace(X) onedal_params = { "eps": self.eps, @@ -99,7 +101,9 @@ def _onedal_fit(self, X, y, sample_weight=None, queue=None): } self._onedal_estimator = self._onedal_dbscan(**onedal_params) - self._onedal_estimator.fit(X, y=y, sample_weight=sample_weight, queue=queue) + self._onedal_estimator._fit( + X, xp, is_array_api_compliant, y, sample_weight, queue=queue + ) self._save_attributes() def _onedal_supported(self, method_name, *data): @@ -173,6 +177,8 @@ def fit(self, X, y=None, sample_weight=None): if self.eps <= 0.0: raise ValueError(f"eps == {self.eps}, must be > 0.0.") + # TODO: + # should be checked for Array API inputs. if sample_weight is not None: sample_weight = _check_sample_weight(sample_weight, X) dispatch( From 9bcbd4e8fd3e13590bb2129fc7dfa5e2efa0d50e Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Mon, 9 Sep 2024 00:01:43 -0700 Subject: [PATCH 77/82] backup local changes --- onedal/cluster/dbscan.py | 22 +++- onedal/datatypes/_data_conversion.py | 72 ++++++++---- onedal/utils/_array_api.py | 24 ++++ sklearnex/_device_offload.py | 34 ++++++ sklearnex/cluster/dbscan.py | 7 +- sklearnex/dispatcher.py | 31 ++++++ sklearnex/utils/_array_api.py | 30 ++++- sklearnex/utils/tests/test_array_api.py | 142 ++++++++++++++++++++++++ 8 files changed, 333 insertions(+), 29 deletions(-) create mode 100644 sklearnex/utils/tests/test_array_api.py diff --git a/onedal/cluster/dbscan.py b/onedal/cluster/dbscan.py index 302d30a997..99396bc185 100644 --- a/onedal/cluster/dbscan.py +++ b/onedal/cluster/dbscan.py @@ -61,21 +61,35 @@ def _fit(self, X, xp, is_array_api_compliant, y, sample_weight, queue): X = check_array(X, accept_sparse="csr", dtype=[xp.float64, xp.float32]) sample_weight = make2d(sample_weight) if sample_weight is not None else None X = make2d(X) + if xp: + X_device = X.device + # TODO: + # revice once again the flow. types = [xp.float32, xp.float64] if get_dtype(X) not in types: X = X.astype(xp.float64) - X = _convert_to_supported(policy, X) + X = _convert_to_supported(policy, X, xp) + sample_weight = ( + _convert_to_supported(policy, sample_weight, xp) + if sample_weight is not None + else None + ) dtype = get_dtype(X) params = self._get_onedal_params(xp, dtype) result = self._get_backend("dbscan", "clustering", None).compute( - policy, params, to_table(X), to_table(sample_weight) + policy, params, to_table(X, xp), to_table(sample_weight, xp) ) - self.labels_ = from_table(result.responses).reshape(-1) + self.labels_ = from_table( + result.responses, xp=xp, queue=queue, array_api_device=X_device + ).reshape(-1) if result.core_observation_indices is not None: self.core_sample_indices_ = from_table( - result.core_observation_indices + result.core_observation_indices, + xp=xp, + queue=queue, + array_api_device=X_device, ).reshape(-1) else: self.core_sample_indices_ = xp.array([], dtype=xp.int32) diff --git a/onedal/datatypes/_data_conversion.py b/onedal/datatypes/_data_conversion.py index d1dedba81c..42013ac5d5 100644 --- a/onedal/datatypes/_data_conversion.py +++ b/onedal/datatypes/_data_conversion.py @@ -18,48 +18,76 @@ import numpy as np -from daal4py.sklearn._utils import make2d +from daal4py.sklearn._utils import get_dtype +from daal4py.sklearn._utils import make2d as d4p_make2d from onedal import _backend, _is_dpc_backend from ..utils import _is_csr -try: - import dpctl - import dpctl.tensor as dpt +# from ..utils._array_api import get_namespace +from ..utils._array_api import _is_numpy_namespace - dpctl_available = dpctl.__version__ >= "0.14" -except ImportError: - dpctl_available = False +# TODO: +# move to proper module. +# TODO +# def make2d(arg, xp=None, is_array_api_compliant=None): +def make2d(arg, xp=None): + if xp and not _is_numpy_namespace(xp) and arg.ndim == 1: + return xp.reshape(arg, (arg.size, 1)) if arg.ndim == 1 else arg + # TODO: + # reimpl via is_array_api_compliant usage. + return d4p_make2d(arg) -def _apply_and_pass(func, *args): + +# TODO: +# remove such kind of func calls +def _apply_and_pass(func, *args, **kwargs): + # TODO: + # refactor. if len(args) == 1: return func(args[0]) - return tuple(map(func, args)) - + return tuple(map(func, args, kwargs)) -def from_table(*args): - return _apply_and_pass(_backend.from_table, *args) +def convert_one_from_table(arg, xp=None, queue=None, array_api_device=None): + # TODO: + # use `array_api_device`. + result = _backend.from_table(arg) + if xp: + if xp.__name__ in {"dpctl", "dpctl.tensor"}: + result = xp.asarray(arg, sycl_queue=queue) if queue else xp.asarray(arg) + elif not _is_numpy_namespace(xp): + results = xp.asarray(result) + return result -def convert_one_to_table(arg): - if dpctl_available: - if isinstance(arg, dpt.usm_ndarray): - return _backend.dpctl_to_table(arg) +def convert_one_to_table(arg, xp=None): if not _is_csr(arg): + if xp and not _is_numpy_namespace(xp): + arg = np.asarray(arg) + # TODO: + # Check. Probably should be removed from here + # Not really realted with converting to table. arg = make2d(arg) return _backend.to_table(arg) -def to_table(*args): - return _apply_and_pass(convert_one_to_table, *args) +def from_table(*args, xp=None, queue=None, array_api_device=None): + return _apply_and_pass(convert_one_from_table, *args) + + +def to_table(*args, xp=None): + return _apply_and_pass(convert_one_to_table, *args, xp=xp) if _is_dpc_backend: from ..common._policy import _HostInteropPolicy - def _convert_to_supported(policy, *data): + def _convert_to_supported(policy, *data, xp=None): + if xp is None: + xp = np + def func(x): return x @@ -71,13 +99,13 @@ def func(x): device = policy._queue.sycl_device def convert_or_pass(x): - if (x is not None) and (x.dtype == np.float64): + if (x is not None) and (x.dtype == xp.float64): warnings.warn( "Data will be converted into float32 from " "float64 because device does not support it", RuntimeWarning, ) - return x.astype(np.float32) + return x.astype(xp.float32) else: return x @@ -88,7 +116,7 @@ def convert_or_pass(x): else: - def _convert_to_supported(policy, *data): + def _convert_to_supported(policy, *data, xp=None): def func(x): return x diff --git a/onedal/utils/_array_api.py b/onedal/utils/_array_api.py index 4accdd3ac0..cd477b5ccf 100644 --- a/onedal/utils/_array_api.py +++ b/onedal/utils/_array_api.py @@ -18,6 +18,8 @@ from collections.abc import Iterable +import numpy as np + try: from dpctl.tensor import usm_ndarray @@ -89,3 +91,25 @@ def _get_sycl_namespace(*arrays): raise ValueError(f"SYCL type not recognized: {sycl_type}") return sycl_type, None, False + + +def get_namespace(*arrays): + """Get namespace of arrays. + TBD. + Parameters + ---------- + *arrays : array objects + Array objects. + Returns + ------- + namespace : module + Namespace shared by array objects. + is_array_api : bool + True of the arrays are containers that implement the Array API spec. + """ + sycl_type, xp, is_array_api_compliant = _get_sycl_namespace(*arrays) + + if sycl_type: + return xp, is_array_api_compliant + else: + return np, True diff --git a/sklearnex/_device_offload.py b/sklearnex/_device_offload.py index 7d72a97bad..860899a0f5 100644 --- a/sklearnex/_device_offload.py +++ b/sklearnex/_device_offload.py @@ -61,6 +61,40 @@ def _get_backend(obj, queue, method_name, *data): raise RuntimeError("Device support is not implemented") +# def dispatch_with_array_api(obj, method_name, branches, xp, is_array_api_compliant, *args, **kwargs): +def dispatch_with_array_api( + obj, method_name, branches, xp, is_array_api_compliant, *args, **kwargs +): + q = _get_global_queue() + # if "array_api_support_sklearnex" in obj._get_tags() and obj._get_tags()["array_api_support_sklearnex"]: + + backend, q, patching_status = _get_backend(obj, q, method_name, *args) + + if backend == "onedal": + patching_status.write_log(queue=q) + return branches[backend](obj, *args, **kwargs, queue=q) + if backend == "sklearn": + if ( + "array_api_dispatch" in get_config() + and get_config()["array_api_dispatch"] + and "array_api_support" in obj._get_tags() + and obj._get_tags()["array_api_support"] + ): + # If `array_api_dispatch` enabled and array api is supported for the stock scikit-learn, + # then raw inputs are used for the fallback. + patching_status.write_log() + return branches[backend](obj, *args, **kwargs) + else: + patching_status.write_log() + _, hostargs = _transfer_to_host(q, *args) + _, hostvalues = _transfer_to_host(q, *kwargs.values()) + hostkwargs = dict(zip(kwargs.keys(), hostvalues)) + return branches[backend](obj, *hostargs, **hostkwargs) + raise RuntimeError( + f"Undefined backend {backend} in " f"{obj.__class__.__name__}.{method_name}" + ) + + def dispatch(obj, method_name, branches, *args, **kwargs): q = _get_global_queue() q, hostargs = _transfer_to_host(q, *args) diff --git a/sklearnex/cluster/dbscan.py b/sklearnex/cluster/dbscan.py index fbd6ff9e8f..7b34d00567 100755 --- a/sklearnex/cluster/dbscan.py +++ b/sklearnex/cluster/dbscan.py @@ -25,7 +25,7 @@ from daal4py.sklearn._utils import sklearn_check_version from onedal.cluster import DBSCAN as onedal_DBSCAN -from .._device_offload import dispatch +from .._device_offload import dispatch, dispatch_with_array_api from .._utils import PatchingConditionsChain from ..utils._array_api import get_namespace @@ -181,7 +181,7 @@ def fit(self, X, y=None, sample_weight=None): # should be checked for Array API inputs. if sample_weight is not None: sample_weight = _check_sample_weight(sample_weight, X) - dispatch( + dispatch_with_array_api( self, "fit", { @@ -195,4 +195,7 @@ def fit(self, X, y=None, sample_weight=None): return self + def _more_tags(self): + return {"array_api_support_sklearnex": True} + fit.__doc__ = sklearn_DBSCAN.fit.__doc__ diff --git a/sklearnex/dispatcher.py b/sklearnex/dispatcher.py index a4a62556f6..7a9f000aaa 100644 --- a/sklearnex/dispatcher.py +++ b/sklearnex/dispatcher.py @@ -128,6 +128,11 @@ def get_patch_map_core(preview=False): from ._config import get_config as get_config_sklearnex from ._config import set_config as set_config_sklearnex + # TODO: + # check the version of skl. + if sklearn_check_version("1.4"): + import sklearn.utils._array_api as _array_api_module + if sklearn_check_version("1.2.1"): from .utils.parallel import _FuncWrapper as _FuncWrapper_sklearnex else: @@ -165,6 +170,12 @@ def get_patch_map_core(preview=False): from .svm import NuSVC as NuSVC_sklearnex from .svm import NuSVR as NuSVR_sklearnex + # TODO: + # check the version of skl. + if sklearn_check_version("1.4"): + from .utils._array_api import _convert_to_numpy as _convert_to_numpy_sklearnex + from .utils._array_api import get_namespace as get_namespace_sklearnex + # DBSCAN mapping.pop("dbscan") mapping["dbscan"] = [[(cluster_module, "DBSCAN", DBSCAN_sklearnex), None]] @@ -440,6 +451,26 @@ def get_patch_map_core(preview=False): mapping["_funcwrapper"] = [ [(parallel_module, "_FuncWrapper", _FuncWrapper_sklearnex), None] ] + # TODO: + # check the version of skl. + if sklearn_check_version("1.4"): + # Necessary for array_api support + mapping["get_namespace"] = [ + [ + ( + _array_api_module, + "get_namespace", + get_namespace_sklearnex, + ), + None, + ] + ] + mapping["_convert_to_numpy"] = [ + [ + (_array_api_module, "_convert_to_numpy", _convert_to_numpy_sklearnex), + None, + ] + ] return mapping diff --git a/sklearnex/utils/_array_api.py b/sklearnex/utils/_array_api.py index bc30be5021..1ba0a51459 100644 --- a/sklearnex/utils/_array_api.py +++ b/sklearnex/utils/_array_api.py @@ -19,10 +19,38 @@ import numpy as np from daal4py.sklearn._utils import sklearn_check_version -from onedal.utils._array_api import _get_sycl_namespace +from onedal.utils._array_api import _asarray, _get_sycl_namespace +# TODO: +# check the version of skl. if sklearn_check_version("1.2"): from sklearn.utils._array_api import get_namespace as sklearn_get_namespace + from sklearn.utils._array_api import _convert_to_numpy as _sklearn_convert_to_numpy + +from .._device_offload import dpctl_available, dpnp_available + +if dpctl_available: + import dpctl.tensor as dpt + +if dpnp_available: + import dpnp + + +def _convert_to_numpy(array, xp): + """Convert X into a NumPy ndarray on the CPU.""" + xp_name = xp.__name__ + + # if dpctl_available and isinstance(array, dpctl.tensor): + if dpctl_available and xp_name in { + "dpctl.tensor", + }: + return dpt.to_numpy(array) + elif dpnp_available and isinstance(array, dpnp.ndarray): + return dpnp.asnumpy(array) + elif sklearn_check_version("1.2"): + return _sklearn_convert_to_numpy(array, xp) + else: + return _asarray(array, xp) def get_namespace(*arrays): diff --git a/sklearnex/utils/tests/test_array_api.py b/sklearnex/utils/tests/test_array_api.py new file mode 100644 index 0000000000..5e52122952 --- /dev/null +++ b/sklearnex/utils/tests/test_array_api.py @@ -0,0 +1,142 @@ +# ============================================================================== +# Copyright 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +import numpy as np +import pytest +from numpy.testing import assert_allclose + +from daal4py.sklearn._utils import sklearn_check_version +from onedal.tests.utils._dataframes_support import ( + _convert_to_dataframe, + get_dataframes_and_queues, +) + +array_api_dataframes_and_namespaces = { + "dpctl": "dpctl.tensor", +} + + +@pytest.mark.parametrize( + "dataframe,queue", + get_dataframes_and_queues(dataframe_filter_="numpy,dpctl", device_filter_="cpu,gpu"), +) +def test_get_namespace_with_config_context(dataframe, queue): + """Test get_namespace on NumPy ndarrays, DPCtl tensors.""" + from sklearnex import config_context + from sklearnex.utils._array_api import get_namespace + + # array_api_compat = pytest.importorskip("array_api_strict") + + X_np = np.asarray([[1, 2, 3]]) + X = _convert_to_dataframe(X_np, sycl_queue=queue, target_df=dataframe) + + with config_context(array_api_dispatch=True): + xp_out, is_array_api_compliant = get_namespace(X) + assert is_array_api_compliant + # assert xp_out is array_api_compat.numpy + + +@pytest.mark.skipif( + not sklearn_check_version("1.4"), + reason="array api dispatch requires sklearn 1.4 version", +) +@pytest.mark.parametrize( + "dataframe,queue", + get_dataframes_and_queues(dataframe_filter_="dpctl", device_filter_="cpu,gpu"), +) +def test_get_namespace_with_patching(dataframe, queue): + """Test get_namespace on NumPy ndarrays, DPCtl tensors + with `patch_sklearn` + """ + # array_api_compat = pytest.importorskip("array_api_strict") + + from sklearnex import patch_sklearn + + patch_sklearn() + + from sklearn import config_context + from sklearn.utils._array_api import get_namespace + + X_np = np.asarray([[1, 2, 3]]) + X = _convert_to_dataframe(X_np, sycl_queue=queue, target_df=dataframe) + + with config_context(array_api_dispatch=True): + xp_out, is_array_api_compliant = get_namespace(X) + assert is_array_api_compliant + assert xp_out.__name__ == array_api_dataframes_and_namespaces[dataframe] + + +@pytest.mark.skipif( + not sklearn_check_version("1.4"), + reason="array api dispatch requires sklearn 1.4 version", +) +@pytest.mark.parametrize( + "dataframe,queue", + get_dataframes_and_queues(dataframe_filter_="dpctl,dpnp", device_filter_="cpu,gpu"), +) +def test_convert_to_numpy_with_patching(dataframe, queue): + """Test get_namespace on NumPy ndarrays, DPCtl tensors + with `patch_sklearn` + """ + # array_api_compat = pytest.importorskip("array_api_strict") + + from sklearnex import patch_sklearn + + patch_sklearn() + + from sklearn import config_context + from sklearn.utils._array_api import _convert_to_numpy, get_namespace + + X_np = np.asarray([[1, 2, 3]]) + X = _convert_to_dataframe(X_np, sycl_queue=queue, target_df=dataframe) + xp, _ = get_namespace(X) + + with config_context(array_api_dispatch=True): + x_np = _convert_to_numpy(X, xp) + assert type(X_np) == type(x_np) + assert_allclose(X_np, x_np) + + +@pytest.mark.skipif( + not sklearn_check_version("1.4"), + reason="array api dispatch requires sklearn 1.4 version", +) +@pytest.mark.parametrize( + "dataframe,queue", + get_dataframes_and_queues(dataframe_filter_="dpctl", device_filter_="cpu,gpu"), +) +def test_check_array_with_patching(dataframe, queue): + """Test get_namespace on NumPy ndarrays, DPCtl tensors + with `patch_sklearn` + """ + # array_api_compat = pytest.importorskip("array_api_strict") + + from sklearnex import patch_sklearn + + patch_sklearn() + + from sklearn import config_context + from sklearn.utils import check_array + from sklearn.utils._array_api import _convert_to_numpy, get_namespace + + X_np = np.asarray([[1, 2, 3], [4, 5, 6]], dtype=np.float64) + xp, _ = get_namespace(X_np) + X_df = _convert_to_dataframe(X_np, sycl_queue=queue, target_df=dataframe) + + with config_context(array_api_dispatch=True): + X_df_res = check_array(X_df, accept_sparse="csr", dtype=[xp.float64, xp.float32]) + assert type(X_df) == type(X_df_res) + assert_allclose(_convert_to_numpy(X_df, xp), _convert_to_numpy(X_df_res, xp)) From 41d1efc9ca89ffcc790fed2dae47fd315db18d39 Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Mon, 9 Sep 2024 07:53:51 -0700 Subject: [PATCH 78/82] minor refactoring --- onedal/_device_offload.py | 11 ++++------- sklearnex/_device_offload.py | 7 +++---- 2 files changed, 7 insertions(+), 11 deletions(-) diff --git a/onedal/_device_offload.py b/onedal/_device_offload.py index 2443b5c214..0b00da8bc7 100644 --- a/onedal/_device_offload.py +++ b/onedal/_device_offload.py @@ -98,9 +98,7 @@ def _transfer_to_host(queue, *data): host_data = [] for item in data: usm_iface = getattr(item, "__sycl_usm_array_interface__", None) - array_api = getattr(item, "__array_namespace__", None) - if array_api: - array_api = array_api() + array_api = getattr(item, "__array_namespace__", lambda: None)() if usm_iface is not None: if not dpctl_available: raise RuntimeError( @@ -189,9 +187,9 @@ def support_input_format(freefunc=False, queue_param=True): def decorator(func): def wrapper_impl(obj, *args, **kwargs): - data = (*args, *kwargs.values()) - if len(data) == 0: + if len(args) == 0 and len(kwargs) == 0: return _run_on_device(func, obj, *args, **kwargs) + data = (*args, *kwargs.values()) data_queue, hostargs, hostkwargs = _get_host_inputs(*args, **kwargs) if queue_param and not ( "queue" in hostkwargs and hostkwargs["queue"] is not None @@ -206,9 +204,8 @@ def wrapper_impl(obj, *args, **kwargs): return result config = get_config() if not ("transform_output" in config and config["transform_output"]): - input_array_api = getattr(data[0], "__array_namespace__", None) + input_array_api = getattr(data[0], "__array_namespace__", lambda: None)() if input_array_api: - input_array_api = input_array_api() input_array_api_device = data[0].device result = _asarray( result, input_array_api, device=input_array_api_device diff --git a/sklearnex/_device_offload.py b/sklearnex/_device_offload.py index 7d72a97bad..aa946287dc 100644 --- a/sklearnex/_device_offload.py +++ b/sklearnex/_device_offload.py @@ -100,8 +100,8 @@ def wrap_output_data(func): @wraps(func) def wrapper(self, *args, **kwargs): result = func(self, *args, **kwargs) - data = (*args, *kwargs.values()) - if len(data) > 0: + if len(args) == 0 and len(kwargs) == 0: + data = (*args, *kwargs.values()) usm_iface = getattr(data[0], "__sycl_usm_array_interface__", None) if usm_iface is not None: result = _copy_to_usm(usm_iface["syclobj"], result) @@ -110,9 +110,8 @@ def wrapper(self, *args, **kwargs): return result config = get_config() if not ("transform_output" in config and config["transform_output"]): - input_array_api = getattr(data[0], "__array_namespace__", None) + input_array_api = getattr(data[0], "__array_namespace__", lambda: None)() if input_array_api: - input_array_api = input_array_api() input_array_api_device = data[0].device result = _asarray( result, input_array_api, device=input_array_api_device From 52e92577a5d60239f1b14db8bef7dac92fbeeb57 Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Mon, 9 Sep 2024 20:06:39 +0200 Subject: [PATCH 79/82] fixing --- sklearnex/_device_offload.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearnex/_device_offload.py b/sklearnex/_device_offload.py index aa946287dc..06f97aa679 100644 --- a/sklearnex/_device_offload.py +++ b/sklearnex/_device_offload.py @@ -100,7 +100,7 @@ def wrap_output_data(func): @wraps(func) def wrapper(self, *args, **kwargs): result = func(self, *args, **kwargs) - if len(args) == 0 and len(kwargs) == 0: + if not (len(args) == 0 and len(kwargs) == 0): data = (*args, *kwargs.values()) usm_iface = getattr(data[0], "__sycl_usm_array_interface__", None) if usm_iface is not None: From a09c3d4c156264c1ea4cc5c49e283ad7069874dd Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Tue, 10 Sep 2024 05:46:20 -0700 Subject: [PATCH 80/82] minor fix --- sklearnex/utils/_array_api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearnex/utils/_array_api.py b/sklearnex/utils/_array_api.py index 1ba0a51459..45c3e7e72a 100644 --- a/sklearnex/utils/_array_api.py +++ b/sklearnex/utils/_array_api.py @@ -27,7 +27,7 @@ from sklearn.utils._array_api import get_namespace as sklearn_get_namespace from sklearn.utils._array_api import _convert_to_numpy as _sklearn_convert_to_numpy -from .._device_offload import dpctl_available, dpnp_available +from onedal._device_offload import dpctl_available, dpnp_available if dpctl_available: import dpctl.tensor as dpt From f95197f9fd6566d26b5eebe7aad2a7bf91f3177c Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Tue, 1 Oct 2024 06:57:59 -0700 Subject: [PATCH 81/82] added array-api-compat as test dep --- requirements-test.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements-test.txt b/requirements-test.txt index d9de92c2da..8f49d5516d 100644 --- a/requirements-test.txt +++ b/requirements-test.txt @@ -12,3 +12,4 @@ lightgbm==4.5.0 catboost==1.2.7 ; python_version < '3.11' # TODO: Remove 3.11 condition when catboost supports numpy 2.0 shap==0.46.0 array-api-strict==2.0.1 +array-api-compat==1.8.0 From d17141fdd786069d6e26ddecc64ed7d5a87f5254 Mon Sep 17 00:00:00 2001 From: Samir Nasibli Date: Wed, 9 Oct 2024 14:59:04 +0200 Subject: [PATCH 82/82] Update requirements-test.txt --- requirements-test.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/requirements-test.txt b/requirements-test.txt index 847e9cc748..d5af4e380b 100644 --- a/requirements-test.txt +++ b/requirements-test.txt @@ -13,4 +13,3 @@ catboost==1.2.7 ; python_version < '3.11' # TODO: Remove 3.11 condition when cat shap==0.46.0 array-api-compat==1.9 array-api-strict==2.0.1 -array-api-compat==1.8.0