Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH: dpnp interop for sklearnex #1374

Merged
merged 10 commits into from
Jul 26, 2023
1 change: 1 addition & 0 deletions .ci/pipeline/build-and-test-lnx.yml
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ steps:
bash .ci/scripts/setup_sklearn.sh $(SKLEARN_VERSION)
pip install --upgrade -r requirements-test.txt -r requirements-test-optional.txt
pip install $(python .ci/scripts/get_compatible_scipy_version.py)
if [ $(echo $(PYTHON_VERSION) | grep '3.8\|3.9\|3.10') ]; then conda install -q -y -c intel dpnp; fi
ethanglaser marked this conversation as resolved.
Show resolved Hide resolved
pip list
displayName: 'Install testing requirements'
- script: |
Expand Down
58 changes: 58 additions & 0 deletions examples/sklearnex/knn_bf_classification_dpnp_batch.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
# ===============================================================================
# Copyright 2023 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ===============================================================================

# sklearnex kNN example for GPU offloading with DPNP ndarray:
# python ./knn_bf_classification_dpnp_batch.py.py
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
# python ./knn_bf_classification_dpnp_batch.py.py
# python ./knn_bf_classification_dpnp_batch.py

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch!


import dpctl
import dpnp
import numpy as np
from sklearn.datasets import make_classification
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

from sklearnex.neighbors import KNeighborsClassifier

X, y = make_classification(
n_samples=1000,
n_features=4,
n_informative=2,
n_redundant=0,
random_state=0,
shuffle=False,
)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

# Make sure that all DPNP ndarrays using the same device.
q = dpctl.SyclQueue("gpu") # GPU

dpnp_X_train = dpnp.asarray(X_train, usm_type="device", sycl_queue=q)
dpnp_y_train = dpnp.asarray(y_train, usm_type="device", sycl_queue=q)
dpnp_X_test = dpnp.asarray(X_test, usm_type="device", sycl_queue=q)

knn_mdl = KNeighborsClassifier(
algorithm="brute", n_neighbors=20, weights="uniform", p=2, metric="minkowski"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I suppose weights="uniform", p=2, metri="minkowski" - correspond to the Euclidean metric and normal mode of classification which are default.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should be patched for both spmd and batch examples. Let's do it on separate PR

)
knn_mdl.fit(dpnp_X_train, dpnp_y_train)

y_predict = knn_mdl.predict(dpnp_X_test)

print("Brute Force Distributed kNN classification results:")
print("Ground truth (first 5 observations):\n{}".format(y_test[:5]))
print("Classification results (first 5 observations):\n{}".format(y_predict[:5]))
print("Accuracy (2 classes): {}\n".format(accuracy_score(y_test, y_predict.asnumpy())))
ethanglaser marked this conversation as resolved.
Show resolved Hide resolved
print("Are predicted results on GPU: {}".format(y_predict.sycl_device.is_gpu))
53 changes: 53 additions & 0 deletions examples/sklearnex/random_forest_classifier_dpctl_batch.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# ===============================================================================
# Copyright 2023 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ===============================================================================

# sklearnex RF example for GPU offloading with DPCtl tensor:
# python ./random_forest_classifier_dpctl_batch.py

import dpctl
import dpctl.tensor as dpt
import numpy as np
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split

from sklearnex.preview.ensemble import RandomForestClassifier

# Make sure that all DPCtl tensors using the same device.
q = dpctl.SyclQueue("gpu") # GPU

X, y = make_classification(
n_samples=1000,
n_features=4,
n_informative=2,
n_redundant=0,
random_state=0,
shuffle=False,
)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

dpt_X_train = dpt.asarray(X_train, usm_type="device", sycl_queue=q)
dpt_y_train = dpt.asarray(y_train, usm_type="device", sycl_queue=q)
dpt_X_test = dpt.asarray(X_test, usm_type="device", sycl_queue=q)

rf = RandomForestClassifier(max_depth=2, random_state=0).fit(dpt_X_train, dpt_y_train)

pred = rf.predict(dpt_X_test)

print("Random Forest classification results:")
print("Ground truth (first 5 observations):\n{}".format(y_test[:5]))
print("Classification results (first 5 observations):\n{}".format(pred[:5]))
print("Are predicted results on GPU: {}".format(pred.sycl_device.is_gpu))
46 changes: 46 additions & 0 deletions examples/sklearnex/random_forest_regressor_dpnp_batch.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# ===============================================================================
# Copyright 2023 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ===============================================================================

# sklearnex RF example for GPU offloading with DPNP ndarray:
# python ./random_forest_regressor_dpnp_batch.py.py

import dpnp
import numpy as np
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split

from sklearnex.preview.ensemble import RandomForestRegressor

sycl_device = "gpu:0"

X, y = make_regression(
n_samples=1000, n_features=4, n_informative=2, random_state=0, shuffle=False
)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

dpnp_X_train = dpnp.asarray(X_train, device=sycl_device)
dpnp_y_train = dpnp.asarray(y_train, device=sycl_device)
dpnp_X_test = dpnp.asarray(X_test, device=sycl_device)

rf = RandomForestRegressor(max_depth=2, random_state=0).fit(dpnp_X_train, dpnp_y_train)

pred = rf.predict(dpnp_X_test)

print("Random Forest regression results:")
print("Ground truth (first 5 observations):\n{}".format(y_test[:5]))
print("Regression results (first 5 observations):\n{}".format(pred[:5]))
print("Are predicted results on GPU: {}".format(pred.sycl_device.is_gpu))
17 changes: 13 additions & 4 deletions sklearnex/_device_offload.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,6 @@

import numpy as np

from ._config import get_config
from ._utils import get_patch_message

try:
from dpctl import SyclQueue
from dpctl.memory import MemoryUSMDevice, as_usm_memory
Expand All @@ -32,6 +29,16 @@
except ImportError:
dpctl_available = False

try:
import dpnp

dpnp_available = True
except ImportError:
dpnp_available = False

from ._config import get_config
from ._utils import get_patch_message

oneapi_is_available = "daal4py.oneapi" in sys.modules
if oneapi_is_available:
from daal4py.oneapi import _get_device_name_sycl_ctxt, _get_sycl_ctxt_params
Expand Down Expand Up @@ -197,7 +204,9 @@ def wrapper(self, *args, **kwargs):
usm_iface = getattr(data[0], "__sycl_usm_array_interface__", None)
result = func(self, *args, **kwargs)
if usm_iface is not None:
return _copy_to_usm(usm_iface["syclobj"], result)
result = _copy_to_usm(usm_iface["syclobj"], result)
if dpnp_available and isinstance(data[0], dpnp.ndarray):
result = dpnp.array(result, copy=False)
Comment on lines +207 to +209
Copy link
Contributor Author

@samir-nasibli samir-nasibli Jul 25, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This and related dpnp import are only functional changes in this file, the rest is formatting.

return result

return wrapper
Loading