From b1a505fe857b0f6b7907c4305efc8acb0d313b81 Mon Sep 17 00:00:00 2001
From: Anatoly Volkov <117643568+avolkov-intel@users.noreply.github.com>
Date: Tue, 30 Apr 2024 12:37:33 +0200
Subject: [PATCH 01/65] Add sparsity support for logloss function primitive
 (#2694)

* Initial implementation of sparse gemm primitive

* 1) clang-format 2) Temporary workaround: commented out the use of  to get rid of the link error

* Revert formatting in dispatcher.hpp

* Move sparse_matrix_handle_impl into detail namespace. Fix build error.

* Fix for the previous commit

* Update MKL FPK microlibs versions

* Copyright update to UXL

* Add sparse gemv primitive and test

* Revert the workaround code in sparse matirx handle

* clang-format

* Update MKL microlibs version

* Revert the change for MKL FPK for CPU

* Update dpcpp compiler version to 2024.0.3 in public CI

* Fix APT package name for dpcpp compiler

* Fix APT package name for dpcpp compiler

* Add sparse_blas into bazel BUILD file in backend/primitives folder

* Add consts in set_csr_data API

* Add test on sparse matrix handle functionality

* chore(deps): update suzuki-shunsuke/github-action-renovate-config-validator action to v1 (#2610)

* chore(deps): update suzuki-shunsuke/github-action-renovate-config-validator action to v1

* update renovate.json config as recommended

---------

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
Co-authored-by: ethanglaser <ethan.glaser@intel.com>

* fix: distributed online covariance samples fix (#2677)

* chore(deps): update suzuki-shunsuke/github-action-renovate-config-validator action to v1.0.1 (#2678)

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>

* chore(deps): update ubuntu:22.04 docker digest to 77906da (#2683)

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>

* Add inner_iterations_count result option to LogisticRegression (#2682)

* Initial commit

* Minor

* Remove redundant wait and throw

* Initial commit

* Add naive sparse test

* Substitute check_val function with IS_CLOSE macros

* Update tests to achieve better precision

* Remove redundant fil

* Minor

* Minor

* Minor refactoring

* Fix optimizers test

* Refactor tests, minor fixes, update copyrights

* Add comments

---------

Co-authored-by: Victoriya Fedotova <victoriya.s.fedotova@intel.com>
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
Co-authored-by: ethanglaser <ethan.glaser@intel.com>
Co-authored-by: Aleksandr Solovev <aleksandr.solovev@intel.com>
Co-authored-by: Anatoly Volkov <anatoly.volkov@intel.com>
---
 .../algo/basic_statistics/test/fixture.hpp    |   5 +-
 .../logistic_regression/detail/optimizer.cpp  |   6 +-
 .../backend/primitives/objective_function.hpp |   1 +
 .../primitives/objective_function/BUILD       |   1 +
 .../primitives/objective_function/logloss.hpp | 105 +--
 .../objective_function/logloss_dpc.cpp        | 666 ++++++------------
 .../objective_function/logloss_functors.hpp   | 121 ++++
 .../logloss_functors_dpc.cpp                  | 536 ++++++++++++++
 .../objective_function/test/fixture.hpp       | 226 ++++--
 .../objective_function/test/logloss_dpc.cpp   |  71 +-
 .../test/logloss_perf_dpc.cpp                 |   2 +-
 .../test/logloss_spmd_dpc.cpp                 |  15 +-
 .../objective_function/test/spmd_fixture.hpp  |  26 +-
 .../optimizers/test/newton_cg_dpc.cpp         |   2 +-
 .../backend/primitives/sparse_blas/handle.hpp |   4 +-
 .../sparse_blas/test/handle_dpc.cpp           |  36 +
 .../dal/test/engine/csr_table_builder.hpp     |  14 +-
 17 files changed, 1154 insertions(+), 683 deletions(-)
 create mode 100644 cpp/oneapi/dal/backend/primitives/objective_function/logloss_functors.hpp
 create mode 100644 cpp/oneapi/dal/backend/primitives/objective_function/logloss_functors_dpc.cpp
 create mode 100644 cpp/oneapi/dal/backend/primitives/sparse_blas/test/handle_dpc.cpp

diff --git a/cpp/oneapi/dal/algo/basic_statistics/test/fixture.hpp b/cpp/oneapi/dal/algo/basic_statistics/test/fixture.hpp
index eee7eed8c41..3ba569a6e5a 100644
--- a/cpp/oneapi/dal/algo/basic_statistics/test/fixture.hpp
+++ b/cpp/oneapi/dal/algo/basic_statistics/test/fixture.hpp
@@ -107,7 +107,8 @@ class basic_statistics_test : public te::crtp_algo_fixture<TestType, Derived> {
         check_for_exception_for_non_requested_results(compute_mode, compute_result);
     }
 
-    void csr_general_checks(const te::csr_table_builder& data, bs::result_option_id compute_mode) {
+    void csr_general_checks(const te::csr_table_builder<>& data,
+                            bs::result_option_id compute_mode) {
         const auto desc =
             bs::descriptor<float_t, basic_statistics::method::sparse>{}.set_result_options(
                 compute_mode);
@@ -121,7 +122,7 @@ class basic_statistics_test : public te::crtp_algo_fixture<TestType, Derived> {
 
     // TODO: Fix DAAL code. On big datasets there is an error in computing.
     // To reproduce it remove this check from test case in batch.cpp
-    bool not_cpu_friendly(const te::csr_table_builder& data) {
+    bool not_cpu_friendly(const te::csr_table_builder<>& data) {
         auto policy = this->get_policy();
         return (data.row_count_ > 100 || data.column_count_ > 100) && policy.is_cpu();
     }
diff --git a/cpp/oneapi/dal/algo/logistic_regression/detail/optimizer.cpp b/cpp/oneapi/dal/algo/logistic_regression/detail/optimizer.cpp
index b56df6bf0e9..c051591e8ec 100644
--- a/cpp/oneapi/dal/algo/logistic_regression/detail/optimizer.cpp
+++ b/cpp/oneapi/dal/algo/logistic_regression/detail/optimizer.cpp
@@ -37,7 +37,7 @@ class newton_cg_optimizer_impl : public optimizer_impl {
               tol_(tol),
               inner_iter_(0) {}
 
-    optimizer_type get_optimizer_type() override {
+    optimizer_type get_optimizer_type() final {
         return optimizer_type::newton_cg;
     }
 
@@ -45,12 +45,12 @@ class newton_cg_optimizer_impl : public optimizer_impl {
         return tol_;
     }
 
-    std::int64_t get_max_iter() override {
+    std::int64_t get_max_iter() final {
         return max_iter_;
     }
 
     // this parameter is set after minimize function was called
-    std::int64_t get_inner_iter() override {
+    std::int64_t get_inner_iter() final {
         return inner_iter_;
     }
 
diff --git a/cpp/oneapi/dal/backend/primitives/objective_function.hpp b/cpp/oneapi/dal/backend/primitives/objective_function.hpp
index 6803d825954..013de5b0c98 100644
--- a/cpp/oneapi/dal/backend/primitives/objective_function.hpp
+++ b/cpp/oneapi/dal/backend/primitives/objective_function.hpp
@@ -17,3 +17,4 @@
 #pragma once
 
 #include "oneapi/dal/backend/primitives/objective_function/logloss.hpp"
+#include "oneapi/dal/backend/primitives/objective_function/logloss_functors.hpp"
diff --git a/cpp/oneapi/dal/backend/primitives/objective_function/BUILD b/cpp/oneapi/dal/backend/primitives/objective_function/BUILD
index 91609568c7f..6d183debdcb 100644
--- a/cpp/oneapi/dal/backend/primitives/objective_function/BUILD
+++ b/cpp/oneapi/dal/backend/primitives/objective_function/BUILD
@@ -11,6 +11,7 @@ dal_module(
         "@onedal//cpp/oneapi/dal/backend/primitives:common",
         "@onedal//cpp/oneapi/dal/backend/primitives:blas",
         "@onedal//cpp/oneapi/dal/backend/primitives/optimizers",
+        "@onedal//cpp/oneapi/dal/backend/primitives:sparse_blas",
     ],
 )
 
diff --git a/cpp/oneapi/dal/backend/primitives/objective_function/logloss.hpp b/cpp/oneapi/dal/backend/primitives/objective_function/logloss.hpp
index 9a7513d4dde..28a7fa9212c 100644
--- a/cpp/oneapi/dal/backend/primitives/objective_function/logloss.hpp
+++ b/cpp/oneapi/dal/backend/primitives/objective_function/logloss.hpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2023 Intel Corporation
+* Copyright contributors to the oneDAL project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -21,6 +21,7 @@
 #include "oneapi/dal/backend/primitives/optimizers/common.hpp"
 #include "oneapi/dal/table/common.hpp"
 #include "oneapi/dal/backend/communicator.hpp"
+#include "oneapi/dal/backend/primitives/sparse_blas/handle.hpp"
 
 namespace oneapi::dal::backend::primitives {
 
@@ -32,6 +33,14 @@ sycl::event compute_probabilities(sycl::queue& q,
                                   bool fit_intercept = true,
                                   const event_vector& deps = {});
 
+template <typename Float>
+sycl::event compute_probabilities_sparse(sycl::queue& q,
+                                         const ndview<Float, 1>& parameters,
+                                         sparse_matrix_handle& sp_handler,
+                                         ndview<Float, 1>& probabilities,
+                                         bool fit_intercept = true,
+                                         const event_vector& deps = {});
+
 template <typename Float>
 sycl::event compute_logloss(sycl::queue& q,
                             const ndview<std::int32_t, 1>& labels,
@@ -50,6 +59,16 @@ sycl::event compute_logloss_with_der(sycl::queue& q,
                                      bool fit_intercept = true,
                                      const event_vector& deps = {});
 
+template <typename Float>
+sycl::event compute_logloss_with_der_sparse(sycl::queue& q,
+                                            sparse_matrix_handle& sp_handler,
+                                            const ndview<std::int32_t, 1>& labels,
+                                            const ndview<Float, 1>& probabilities,
+                                            ndview<Float, 1>& out,
+                                            ndview<Float, 1>& out_derivative,
+                                            bool fit_intercept = true,
+                                            const event_vector& deps = {});
+
 template <typename Float>
 sycl::event compute_derivative(sycl::queue& q,
                                const ndview<Float, 2>& data,
@@ -104,88 +123,4 @@ sycl::event compute_raw_hessian(sycl::queue& q,
                                 ndview<Float, 1>& out_hessian,
                                 const event_vector& deps = {});
 
-using comm_t = backend::communicator<spmd::device_memory_access::usm>;
-
-template <typename Float>
-class logloss_hessian_product : public base_matrix_operator<Float> {
-public:
-    logloss_hessian_product(sycl::queue& q,
-                            const table& data,
-                            Float L2 = Float(0),
-                            bool fit_intercept = true,
-                            std::int64_t bsz = -1);
-    logloss_hessian_product(sycl::queue& q,
-                            comm_t comm,
-                            const table& data,
-                            Float L2 = Float(0),
-                            bool fit_intercept = true,
-                            std::int64_t bsz = -1);
-    sycl::event operator()(const ndview<Float, 1>& vec,
-                           ndview<Float, 1>& out,
-                           const event_vector& deps) final;
-    ndview<Float, 1>& get_raw_hessian();
-
-private:
-    sycl::event compute_with_fit_intercept(const ndview<Float, 1>& vec,
-                                           ndview<Float, 1>& out,
-                                           const event_vector& deps);
-    sycl::event compute_without_fit_intercept(const ndview<Float, 1>& vec,
-                                              ndview<Float, 1>& out,
-                                              const event_vector& deps);
-
-    sycl::queue q_;
-    comm_t comm_;
-    const table data_;
-    Float L2_;
-    bool fit_intercept_;
-    ndarray<Float, 1> raw_hessian_;
-    ndarray<Float, 1> buffer_;
-    ndarray<Float, 1> tmp_gpu_;
-    const std::int64_t n_;
-    const std::int64_t p_;
-    const std::int64_t bsz_;
-};
-
-template <typename Float>
-class logloss_function : public base_function<Float> {
-public:
-    logloss_function(sycl::queue queue,
-                     const table& data,
-                     const ndview<std::int32_t, 1>& labels,
-                     Float L2 = 0.0,
-                     bool fit_intercept = true,
-                     std::int64_t bsz = -1);
-    logloss_function(sycl::queue queue,
-                     comm_t comm,
-                     const table& data,
-                     const ndview<std::int32_t, 1>& labels,
-                     Float L2 = 0.0,
-                     bool fit_intercept = true,
-                     std::int64_t bsz = -1);
-    Float get_value() final;
-    ndview<Float, 1>& get_gradient() final;
-    base_matrix_operator<Float>& get_hessian_product() final;
-
-    event_vector update_x(const ndview<Float, 1>& x,
-                          bool need_hessp = false,
-                          const event_vector& deps = {}) final;
-
-private:
-    sycl::queue q_;
-    comm_t comm_;
-    const table data_;
-    const ndview<std::int32_t, 1> labels_;
-    const std::int64_t n_;
-    const std::int64_t p_;
-    Float L2_;
-    bool fit_intercept_;
-    const std::int64_t bsz_;
-    ndarray<Float, 1> probabilities_;
-    ndarray<Float, 1> gradient_;
-    ndarray<Float, 1> buffer_;
-    logloss_hessian_product<Float> hessp_;
-    const std::int64_t dimension_;
-    Float value_;
-};
-
 } // namespace oneapi::dal::backend::primitives
diff --git a/cpp/oneapi/dal/backend/primitives/objective_function/logloss_dpc.cpp b/cpp/oneapi/dal/backend/primitives/objective_function/logloss_dpc.cpp
index a288239f1d2..e3dce105dbc 100644
--- a/cpp/oneapi/dal/backend/primitives/objective_function/logloss_dpc.cpp
+++ b/cpp/oneapi/dal/backend/primitives/objective_function/logloss_dpc.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2023 Intel Corporation
+* Copyright contributors to the oneDAL project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -18,6 +18,7 @@
 #include "oneapi/dal/backend/primitives/blas/gemv.hpp"
 #include "oneapi/dal/backend/primitives/element_wise.hpp"
 #include "oneapi/dal/detail/profiler.hpp"
+#include "oneapi/dal/backend/primitives/sparse_blas.hpp"
 
 namespace oneapi::dal::backend::primitives {
 
@@ -40,7 +41,7 @@ sycl::event compute_probabilities(sycl::queue& q,
     ONEDAL_ASSERT(parameters.get_dimension(0) == fit_intercept ? p + 1 : p);
     ONEDAL_ASSERT(probabilities.get_dimension(0) == n);
 
-    auto fill_event = fill<Float>(q, probabilities, Float(1), {});
+    auto fill_event = fill<Float>(q, probabilities, Float(1), deps);
     using oneapi::dal::backend::operator+;
 
     Float w0 = fit_intercept ? parameters.get_slice(0, 1).at_device(q, 0l) : 0; // Poor perfomance
@@ -72,6 +73,58 @@ sycl::event compute_probabilities(sycl::queue& q,
     });
 }
 
+template <typename Float>
+sycl::event compute_probabilities_sparse(sycl::queue& q,
+                                         const ndview<Float, 1>& parameters,
+                                         sparse_matrix_handle& sp_handler,
+                                         ndview<Float, 1>& probabilities,
+                                         bool fit_intercept,
+                                         const event_vector& deps) {
+    ONEDAL_ASSERT(probabilities.has_mutable_data());
+    ONEDAL_PROFILER_TASK(compute_probabilities_sparse, q);
+
+    const std::int64_t n = probabilities.get_dimension(0);
+    const std::int64_t p = parameters.get_dimension(0) - (fit_intercept ? 1 : 0);
+
+    auto fill_event = fill<Float>(q, probabilities, Float(1), deps);
+    Float w0 = fit_intercept ? parameters.get_slice(0, 1).at_device(q, 0l) : 0; // Poor perfomance
+    ndview<Float, 1> param_suf = fit_intercept ? parameters.get_slice(1, p + 1) : parameters;
+
+    sycl::event gemv_event;
+    {
+        gemv_event = gemv(q,
+                          transpose::nontrans,
+                          sp_handler,
+                          param_suf,
+                          probabilities,
+                          Float(1),
+                          w0,
+                          { fill_event });
+        // To ensure sparse blas kernel stability
+        gemv_event.wait_and_throw();
+    }
+
+    auto* const prob_ptr = probabilities.get_mutable_data();
+
+    const Float bottom = sizeof(Float) == 4 ? 1e-7 : 1e-15;
+    const Float top = Float(1.0) - bottom;
+    // Log Loss is undefined for p = 0 and p = 1 so probabilities are clipped into [eps, 1 - eps]
+
+    return q.submit([&](sycl::handler& cgh) {
+        cgh.depends_on(gemv_event);
+        const auto range = make_range_1d(n);
+        cgh.parallel_for(range, [=](sycl::id<1> idx) {
+            prob_ptr[idx] = 1 / (1 + sycl::exp(-prob_ptr[idx]));
+            if (prob_ptr[idx] < bottom) {
+                prob_ptr[idx] = bottom;
+            }
+            if (prob_ptr[idx] > top) {
+                prob_ptr[idx] = top;
+            }
+        });
+    });
+}
+
 template <typename Float>
 sycl::event compute_logloss(sycl::queue& q,
                             const ndview<std::int32_t, 1>& labels,
@@ -108,6 +161,92 @@ sycl::event compute_logloss(sycl::queue& q,
     return loss_event;
 }
 
+template <typename Float>
+sycl::event compute_logloss_with_der_sparse(sycl::queue& q,
+                                            sparse_matrix_handle& sp_handler,
+                                            const ndview<std::int32_t, 1>& labels,
+                                            const ndview<Float, 1>& probabilities,
+                                            ndview<Float, 1>& out,
+                                            ndview<Float, 1>& out_derivative,
+                                            bool fit_intercept,
+                                            const event_vector& deps) {
+    ONEDAL_PROFILER_TASK(compute_logloss_with_grad_sparse, q);
+
+    const std::int64_t n = labels.get_dimension(0);
+    const std::int64_t p = out_derivative.get_dimension(0) - (fit_intercept ? 1 : 0);
+
+    ONEDAL_ASSERT(labels.has_data());
+    ONEDAL_ASSERT(probabilities.has_data());
+    ONEDAL_ASSERT(out.has_mutable_data());
+    ONEDAL_ASSERT(out_derivative.has_mutable_data());
+    ONEDAL_ASSERT(out.get_dimension(0) == 1);
+    ONEDAL_ASSERT(probabilities.get_dimension(0) == n);
+
+    auto derivative_object = ndarray<Float, 1>::empty(q, { n }, sycl::usm::alloc::device);
+
+    auto* const der_obj_ptr = derivative_object.get_mutable_data();
+    const auto* const proba_ptr = probabilities.get_data();
+    const auto* const labels_ptr = labels.get_data();
+    auto* const out_ptr = out.get_mutable_data();
+    auto* const out_derivative_ptr = out_derivative.get_mutable_data();
+
+    auto loss_event = q.submit([&](sycl::handler& cgh) {
+        using oneapi::dal::backend::operator+;
+        using sycl::reduction;
+
+        cgh.depends_on(deps);
+        auto sum_reduction_logloss = reduction(out_ptr, sycl::plus<>());
+        const auto wg_size = propose_wg_size(q);
+        const auto range = make_multiple_nd_range_1d(n, wg_size);
+
+        cgh.parallel_for(range, sum_reduction_logloss, [=](sycl::nd_item<1> id, auto& sum_logloss) {
+            auto idx = id.get_group_linear_id() * wg_size + id.get_local_linear_id();
+            if (idx >= std::size_t(n))
+                return;
+            const Float prob = proba_ptr[idx];
+            const float label = labels_ptr[idx];
+            sum_logloss += -label * sycl::log(prob) - (1 - label) * sycl::log(1 - prob);
+            der_obj_ptr[idx] = prob - label;
+        });
+    });
+    sycl::event derw0_event = sycl::event{};
+    if (fit_intercept) {
+        derw0_event = q.submit([&](sycl::handler& cgh) {
+            using oneapi::dal::backend::operator+;
+            using sycl::reduction;
+
+            cgh.depends_on(deps + loss_event);
+            auto sum_reduction_derivative_w0 = reduction(out_derivative_ptr, sycl::plus<>());
+            const auto wg_size = propose_wg_size(q);
+            const auto range = make_multiple_nd_range_1d(n, wg_size);
+
+            cgh.parallel_for(range,
+                             sum_reduction_derivative_w0,
+                             [=](sycl::nd_item<1> id, auto& sum_dw0) {
+                                 auto idx =
+                                     id.get_group_linear_id() * wg_size + id.get_local_linear_id();
+                                 if (idx >= std::size_t(n))
+                                     return;
+                                 sum_dw0 += der_obj_ptr[idx];
+                             });
+        });
+    }
+
+    auto out_der_suffix = fit_intercept ? out_derivative.get_slice(1, p + 1) : out_derivative;
+    sycl::event gemv_event;
+    {
+        gemv_event = gemv(q,
+                          transpose::trans,
+                          sp_handler,
+                          derivative_object,
+                          out_der_suffix,
+                          Float(1),
+                          Float(0),
+                          { loss_event, derw0_event });
+    }
+    return gemv_event;
+}
+
 template <typename Float>
 sycl::event compute_logloss_with_der(sycl::queue& q,
                                      const ndview<Float, 2>& data,
@@ -469,452 +608,83 @@ sycl::event compute_raw_hessian(sycl::queue& q,
     return element_wise(q, kernel, probabilities, Float(0), out_hessian, deps);
 }
 
-std::int64_t get_block_size(std::int64_t n, std::int64_t p) {
-    constexpr std::int64_t max_alloc_size = 1 << 21;
-    return p > max_alloc_size ? 512 : max_alloc_size / p;
-}
-
-template <typename Float>
-logloss_hessian_product<Float>::logloss_hessian_product(sycl::queue& q,
-                                                        const table& data,
-                                                        Float L2,
-                                                        bool fit_intercept,
-                                                        std::int64_t bsz)
-        : q_(q),
-          data_(data),
-          L2_(L2),
-          fit_intercept_(fit_intercept),
-          n_(data.get_row_count()),
-          p_(data.get_column_count()),
-          bsz_(bsz == -1 ? get_block_size(n_, p_) : bsz) {
-    raw_hessian_ = ndarray<Float, 1>::empty(q_, { n_ }, sycl::usm::alloc::device);
-    buffer_ = ndarray<Float, 1>::empty(q_, { n_ }, sycl::usm::alloc::device);
-    tmp_gpu_ = ndarray<Float, 1>::empty(q_, { p_ + 1 }, sycl::usm::alloc::device);
-}
-
-template <typename Float>
-logloss_hessian_product<Float>::logloss_hessian_product(sycl::queue& q,
-                                                        comm_t comm,
-                                                        const table& data,
-                                                        Float L2,
-                                                        bool fit_intercept,
-                                                        std::int64_t bsz)
-        : q_(q),
-          comm_(comm),
-          data_(data),
-          L2_(L2),
-          fit_intercept_(fit_intercept),
-          n_(data.get_row_count()),
-          p_(data.get_column_count()),
-          bsz_(bsz == -1 ? get_block_size(n_, p_) : bsz) {
-    raw_hessian_ = ndarray<Float, 1>::empty(q_, { n_ }, sycl::usm::alloc::device);
-    buffer_ = ndarray<Float, 1>::empty(q_, { n_ }, sycl::usm::alloc::device);
-    tmp_gpu_ = ndarray<Float, 1>::empty(q_, { p_ + 1 }, sycl::usm::alloc::device);
-}
-
-template <typename Float>
-ndview<Float, 1>& logloss_hessian_product<Float>::get_raw_hessian() {
-    return raw_hessian_;
-}
-
-template <typename Float>
-sycl::event logloss_hessian_product<Float>::compute_with_fit_intercept(const ndview<Float, 1>& vec,
-                                                                       ndview<Float, 1>& out,
-                                                                       const event_vector& deps) {
-    ONEDAL_PROFILER_TASK(compute_hessp_with_fit_intercept, q_);
-    auto* const tmp_ptr = tmp_gpu_.get_mutable_data();
-    ONEDAL_ASSERT(vec.get_dimension(0) == p_ + 1);
-    ONEDAL_ASSERT(out.get_dimension(0) == p_ + 1);
-    auto fill_buffer_event = fill<Float>(q_, buffer_, Float(1), deps);
-    auto out_suf = out.get_slice(1, p_ + 1);
-    auto tmp_suf = tmp_gpu_.slice(1, p_);
-    auto out_bias = out.get_slice(0, 1);
-    auto vec_suf = vec.get_slice(1, p_ + 1);
-    ndview<Float, 1> tmp_ndview = tmp_gpu_;
-
-    sycl::event fill_out_event = fill<Float>(q_, out, Float(0), deps);
-
-    Float v0 = vec.at_device(q_, 0, deps);
-
-    const uniform_blocking blocking(n_, bsz_);
-
-    row_accessor<const Float> data_accessor(data_);
-    event_vector last_iter_deps = { fill_buffer_event, fill_out_event };
-
-    for (std::int64_t b = 0; b < blocking.get_block_count(); ++b) {
-        const auto last = blocking.get_block_end_index(b);
-        const auto first = blocking.get_block_start_index(b);
-        const auto length = last - first;
-        auto x_rows = data_accessor.pull(q_, { first, last }, sycl::usm::alloc::device);
-        auto x_nd = pr::ndarray<Float, 2>::wrap(x_rows, { length, p_ });
-        auto buffer_batch = buffer_.slice(first, length);
-        sycl::event event_xv = gemv(q_, x_nd, vec_suf, buffer_batch, Float(1), v0, last_iter_deps);
-        event_xv.wait_and_throw(); // Without this line gemv does not work correctly
-
-        auto* const buffer_ptr = buffer_batch.get_mutable_data();
-        const auto* const hess_ptr = raw_hessian_.get_data() + first;
-
-        auto fill_tmp_event = fill<Float>(q_, tmp_gpu_, Float(0), last_iter_deps);
-
-        sycl::event event_dxv = q_.submit([&](sycl::handler& cgh) {
-            cgh.depends_on({ event_xv, fill_tmp_event });
-            const auto range = make_range_1d(length);
-            auto sum_reduction = sycl::reduction(tmp_ptr, sycl::plus<>());
-            cgh.parallel_for(range, sum_reduction, [=](sycl::id<1> idx, auto& sum_v0) {
-                buffer_ptr[idx] = buffer_ptr[idx] * hess_ptr[idx];
-                sum_v0 += buffer_ptr[idx];
-            });
-        });
-
-        sycl::event event_xtdxv =
-            gemv(q_, x_nd.t(), buffer_batch, tmp_suf, Float(1), Float(0), { event_dxv });
-        event_xtdxv.wait_and_throw(); // Without this line gemv does not work correctly
-
-        sycl::event update_result_e =
-            element_wise(q_, sycl::plus<>(), out, tmp_ndview, out, { event_xtdxv });
-
-        last_iter_deps = { update_result_e };
-    }
-
-    if (comm_.get_rank_count() > 1) {
-        sycl::event::wait_and_throw(last_iter_deps);
-        {
-            ONEDAL_PROFILER_TASK(hessp_allreduce);
-            auto hessp_arr = dal::array<Float>::wrap(q_, out.get_mutable_data(), out.get_count());
-            comm_.allreduce(hessp_arr).wait();
-        }
-    }
-
-    const Float regularization_factor = L2_;
-
-    const auto kernel_regularization = [=](const Float a, const Float param) {
-        return a + param * regularization_factor;
-    };
-
-    auto add_regularization_event =
-        element_wise(q_, kernel_regularization, out_suf, vec_suf, out_suf, last_iter_deps);
-    return add_regularization_event;
-}
-
-template <typename Float>
-sycl::event logloss_hessian_product<Float>::compute_without_fit_intercept(
-    const ndview<Float, 1>& vec,
-    ndview<Float, 1>& out,
-    const event_vector& deps) {
-    ONEDAL_PROFILER_TASK(compute_hessp_without_fit_intercept, q_);
-    ONEDAL_ASSERT(vec.get_dimension(0) == p_);
-    ONEDAL_ASSERT(out.get_dimension(0) == p_);
-
-    sycl::event fill_out_event = fill<Float>(q_, out, Float(0), deps);
-
-    const uniform_blocking blocking(n_, bsz_);
-
-    ndview<Float, 1> tmp_ndview = tmp_gpu_.slice(0, p_);
-
-    row_accessor<const Float> data_accessor(data_);
-    event_vector last_iter_deps = { fill_out_event };
-
-    for (std::int64_t b = 0; b < blocking.get_block_count(); ++b) {
-        const auto last = blocking.get_block_end_index(b);
-        const auto first = blocking.get_block_start_index(b);
-        const auto length = last - first;
-        ONEDAL_ASSERT(0l < length);
-        auto x_rows = data_accessor.pull(q_, { first, last }, sycl::usm::alloc::device);
-        auto x_nd = pr::ndarray<Float, 2>::wrap(x_rows, { length, p_ });
-        ndview<Float, 1> buffer_batch = buffer_.slice(first, length);
-        ndview<Float, 1> hess_batch = raw_hessian_.slice(first, length);
-
-        sycl::event event_xv =
-            gemv(q_, x_nd, vec, buffer_batch, Float(1), Float(0), last_iter_deps);
-        event_xv.wait_and_throw(); // Without this line gemv does not work correctly
-
-        constexpr sycl::multiplies<Float> kernel_mul{};
-        auto event_dxv =
-            element_wise(q_, kernel_mul, buffer_batch, hess_batch, buffer_batch, { event_xv });
-
-        auto fill_tmp_event = fill<Float>(q_, tmp_ndview, Float(0), last_iter_deps);
-
-        sycl::event event_xtdxv = gemv(q_,
-                                       x_nd.t(),
-                                       buffer_batch,
-                                       tmp_ndview,
-                                       Float(1),
-                                       Float(0),
-                                       { event_dxv, fill_tmp_event });
-        event_xtdxv.wait_and_throw(); // Without this line gemv does not work correctly
-
-        sycl::event update_grad_e =
-            element_wise(q_, sycl::plus<>(), out, tmp_ndview, out, { event_xtdxv });
-        last_iter_deps = { update_grad_e };
-    }
-
-    if (comm_.get_rank_count() > 1) {
-        {
-            ONEDAL_PROFILER_TASK(hessp_allreduce);
-            auto hessp_arr = dal::array<Float>::wrap(q_,
-                                                     out.get_mutable_data(),
-                                                     out.get_count(),
-                                                     last_iter_deps);
-            comm_.allreduce(hessp_arr).wait();
-        }
-    }
-
-    const Float regularization_factor = L2_;
-
-    const auto kernel_regularization = [=](const Float a, const Float param) {
-        return a + param * regularization_factor;
-    };
-
-    auto add_regularization_event =
-        element_wise(q_, kernel_regularization, out, vec, out, last_iter_deps);
-
-    return add_regularization_event;
-}
-
-template <typename Float>
-sycl::event logloss_hessian_product<Float>::operator()(const ndview<Float, 1>& vec,
-                                                       ndview<Float, 1>& out,
-                                                       const event_vector& deps) {
-    if (fit_intercept_) {
-        return compute_with_fit_intercept(vec, out, deps);
-    }
-    else {
-        return compute_without_fit_intercept(vec, out, deps);
-    }
-}
-
-template <typename Float>
-logloss_function<Float>::logloss_function(sycl::queue q,
-                                          const table& data,
-                                          const ndview<std::int32_t, 1>& labels,
-                                          Float L2,
-                                          bool fit_intercept,
-                                          std::int64_t bsz)
-        : q_(q),
-          data_(data),
-          labels_(labels),
-          n_(data.get_row_count()),
-          p_(data.get_column_count()),
-          L2_(L2),
-          fit_intercept_(fit_intercept),
-          bsz_(bsz == -1 ? get_block_size(n_, p_) : bsz),
-          hessp_(q, data, L2, fit_intercept, bsz_),
-          dimension_(fit_intercept ? p_ + 1 : p_) {
-    ONEDAL_ASSERT(labels.get_dimension(0) == n_);
-    probabilities_ = ndarray<Float, 1>::empty(q_, { n_ }, sycl::usm::alloc::device);
-    gradient_ = ndarray<Float, 1>::empty(q_, { dimension_ }, sycl::usm::alloc::device);
-    buffer_ = ndarray<Float, 1>::empty(q_, { p_ + 2 }, sycl::usm::alloc::device);
-}
-
-template <typename Float>
-logloss_function<Float>::logloss_function(sycl::queue q,
-                                          comm_t comm,
-                                          const table& data,
-                                          const ndview<std::int32_t, 1>& labels,
-                                          Float L2,
-                                          bool fit_intercept,
-                                          std::int64_t bsz)
-        : q_(q),
-          comm_(comm),
-          data_(data),
-          labels_(labels),
-          n_(data.get_row_count()),
-          p_(data.get_column_count()),
-          L2_(L2),
-          fit_intercept_(fit_intercept),
-          bsz_(bsz == -1 ? get_block_size(n_, p_) : bsz),
-          hessp_(q, comm, data, L2, fit_intercept, bsz_),
-          dimension_(fit_intercept ? p_ + 1 : p_) {
-    ONEDAL_ASSERT(labels.get_dimension(0) == n_);
-    probabilities_ = ndarray<Float, 1>::empty(q_, { n_ }, sycl::usm::alloc::device);
-    gradient_ = ndarray<Float, 1>::empty(q_, { dimension_ }, sycl::usm::alloc::device);
-    buffer_ = ndarray<Float, 1>::empty(q_, { p_ + 2 }, sycl::usm::alloc::device);
-}
-
-template <typename Float>
-event_vector logloss_function<Float>::update_x(const ndview<Float, 1>& x,
-                                               bool need_hessp,
-                                               const event_vector& deps) {
-    ONEDAL_PROFILER_TASK(logloss_function_update_weights, q_);
-    using dal::backend::operator+;
-    value_ = 0;
-    auto fill_event = fill(q_, gradient_, Float(0), deps);
-    const uniform_blocking blocking(n_, bsz_);
-
-    event_vector last_iter_e = { fill_event };
-
-    ndview<Float, 1> grad_ndview = gradient_;
-    ndview<Float, 1> grad_batch = buffer_.slice(1, dimension_);
-    ndview<Float, 1> loss_batch = buffer_.slice(0, 1);
-
-    ndview<Float, 1> raw_hessian = hessp_.get_raw_hessian();
-
-    for (std::int64_t b = 0; b < blocking.get_block_count(); ++b) {
-        const auto first = blocking.get_block_start_index(b);
-        const auto last = blocking.get_block_end_index(b);
-        const std::int64_t cursize = last - first;
-        ONEDAL_ASSERT(0l < cursize);
-
-        const auto data_rows =
-            row_accessor<const Float>(data_).pull(q_, { first, last }, sycl::usm::alloc::device);
-        const auto data_batch = ndarray<Float, 2>::wrap(data_rows, { cursize, p_ });
-        const auto labels_batch = labels_.get_slice(first, first + cursize);
-        auto prob_batch = probabilities_.slice(first, cursize);
-        sycl::event prob_e =
-            compute_probabilities(q_, x, data_batch, prob_batch, fit_intercept_, last_iter_e);
-
-        constexpr Float zero(0);
-
-        auto fill_buffer_e = fill(q_, buffer_, zero, last_iter_e);
-
-        sycl::event compute_e = compute_logloss_with_der(q_,
-                                                         data_batch,
-                                                         labels_batch,
-                                                         prob_batch,
-                                                         loss_batch,
-                                                         grad_batch,
-                                                         fit_intercept_,
-                                                         { fill_buffer_e, prob_e });
-
-        sycl::event update_grad_e =
-            element_wise(q_, sycl::plus<>(), grad_ndview, grad_batch, grad_ndview, { compute_e });
-
-        value_ += loss_batch.at_device(q_, 0, { compute_e });
-
-        last_iter_e = { update_grad_e };
-
-        if (need_hessp) {
-            auto raw_hessian_batch = raw_hessian.get_slice(first, first + cursize);
-            auto hess_e = compute_raw_hessian(q_, prob_batch, raw_hessian_batch, { prob_e });
-            last_iter_e = last_iter_e + hess_e;
-        }
-
-        // TODO: Delete this wait_and_throw
-        // ensure that while event is running in the background data is not overwritten
-        wait_or_pass(last_iter_e).wait_and_throw();
-    }
-
-    if (comm_.get_rank_count() > 1) {
-        {
-            ONEDAL_PROFILER_TASK(gradient_allreduce);
-            auto gradient_arr = dal::array<Float>::wrap(q_,
-                                                        gradient_.get_mutable_data(),
-                                                        gradient_.get_count(),
-                                                        last_iter_e);
-            comm_.allreduce(gradient_arr).wait();
-        }
-        {
-            ONEDAL_PROFILER_TASK(value_allreduce);
-            comm_.allreduce(value_).wait();
-        }
-    }
-
-    if (L2_ > 0) {
-        auto fill_loss_e = fill(q_, loss_batch, Float(0), { last_iter_e });
-        auto loss_ptr = loss_batch.get_mutable_data();
-        auto grad_ptr = gradient_.get_mutable_data();
-        auto w_ptr = x.get_data();
-        Float regularization_factor = L2_;
-
-        auto regularization_e = q_.submit([&](sycl::handler& cgh) {
-            cgh.depends_on(last_iter_e + fill_loss_e);
-            const auto range = make_range_1d(p_);
-            const std::int64_t st_id = fit_intercept_;
-            auto sum_reduction = sycl::reduction(loss_ptr, sycl::plus<>());
-            cgh.parallel_for(range, sum_reduction, [=](sycl::id<1> idx, auto& sum_v0) {
-                const Float param = w_ptr[st_id + idx];
-                grad_ptr[st_id + idx] += regularization_factor * param;
-                sum_v0 += regularization_factor * param * param / 2;
-            });
-        });
-
-        value_ += loss_batch.at_device(q_, 0, { regularization_e });
-
-        last_iter_e = { regularization_e };
-    }
-
-    return last_iter_e;
-}
-
-template <typename Float>
-Float logloss_function<Float>::get_value() {
-    return value_;
-}
-template <typename Float>
-ndview<Float, 1>& logloss_function<Float>::get_gradient() {
-    return gradient_;
-}
-
-template <typename Float>
-base_matrix_operator<Float>& logloss_function<Float>::get_hessian_product() {
-    return hessp_;
-}
-
-#define INSTANTIATE(F)                                                               \
-    template sycl::event compute_probabilities<F>(sycl::queue&,                      \
-                                                  const ndview<F, 1>&,               \
-                                                  const ndview<F, 2>&,               \
-                                                  ndview<F, 1>&,                     \
-                                                  bool,                              \
-                                                  const event_vector&);              \
-    template sycl::event compute_logloss<F>(sycl::queue&,                            \
-                                            const ndview<std::int32_t, 1>&,          \
-                                            const ndview<F, 1>&,                     \
-                                            ndview<F, 1>&,                           \
-                                            bool,                                    \
-                                            const event_vector&);                    \
-    template sycl::event compute_logloss_with_der<F>(sycl::queue&,                   \
-                                                     const ndview<F, 2>&,            \
-                                                     const ndview<std::int32_t, 1>&, \
-                                                     const ndview<F, 1>&,            \
-                                                     ndview<F, 1>&,                  \
-                                                     ndview<F, 1>&,                  \
-                                                     bool,                           \
-                                                     const event_vector&);           \
-    template sycl::event compute_derivative<F>(sycl::queue&,                         \
-                                               const ndview<F, 2>&,                  \
-                                               const ndview<std::int32_t, 1>&,       \
-                                               const ndview<F, 1>&,                  \
-                                               ndview<F, 1>&,                        \
-                                               bool,                                 \
-                                               const event_vector&);                 \
-    template sycl::event add_regularization_loss<F>(sycl::queue&,                    \
-                                                    const ndview<F, 1>&,             \
-                                                    ndview<F, 1>&,                   \
-                                                    F,                               \
-                                                    F,                               \
-                                                    bool,                            \
-                                                    const event_vector&);            \
-    template sycl::event add_regularization_gradient_loss<F>(sycl::queue&,           \
-                                                             const ndview<F, 1>&,    \
-                                                             ndview<F, 1>&,          \
-                                                             ndview<F, 1>&,          \
-                                                             F,                      \
-                                                             F,                      \
-                                                             bool,                   \
-                                                             const event_vector&);   \
-    template sycl::event add_regularization_gradient<F>(sycl::queue&,                \
-                                                        const ndview<F, 1>&,         \
-                                                        ndview<F, 1>&,               \
-                                                        F,                           \
-                                                        F,                           \
-                                                        bool,                        \
-                                                        const event_vector&);        \
-    template sycl::event compute_hessian<F>(sycl::queue&,                            \
-                                            const ndview<F, 2>&,                     \
-                                            const ndview<std::int32_t, 1>&,          \
-                                            const ndview<F, 1>&,                     \
-                                            ndview<F, 2>&,                           \
-                                            const F,                                 \
-                                            const F,                                 \
-                                            bool,                                    \
-                                            const event_vector&);                    \
-    template sycl::event compute_raw_hessian<F>(sycl::queue&,                        \
-                                                const ndview<F, 1>&,                 \
-                                                ndview<F, 1>&,                       \
-                                                const event_vector&);                \
-    template class logloss_hessian_product<F>;                                       \
-    template class logloss_function<F>;
+#define INSTANTIATE(F)                                                                      \
+    template sycl::event compute_probabilities<F>(sycl::queue&,                             \
+                                                  const ndview<F, 1>&,                      \
+                                                  const ndview<F, 2>&,                      \
+                                                  ndview<F, 1>&,                            \
+                                                  bool,                                     \
+                                                  const event_vector&);                     \
+    template sycl::event compute_probabilities_sparse<F>(sycl::queue&,                      \
+                                                         const ndview<F, 1>&,               \
+                                                         sparse_matrix_handle&,             \
+                                                         ndview<F, 1>&,                     \
+                                                         bool,                              \
+                                                         const event_vector&);              \
+    template sycl::event compute_logloss<F>(sycl::queue&,                                   \
+                                            const ndview<std::int32_t, 1>&,                 \
+                                            const ndview<F, 1>&,                            \
+                                            ndview<F, 1>&,                                  \
+                                            bool,                                           \
+                                            const event_vector&);                           \
+    template sycl::event compute_logloss_with_der<F>(sycl::queue&,                          \
+                                                     const ndview<F, 2>&,                   \
+                                                     const ndview<std::int32_t, 1>&,        \
+                                                     const ndview<F, 1>&,                   \
+                                                     ndview<F, 1>&,                         \
+                                                     ndview<F, 1>&,                         \
+                                                     bool,                                  \
+                                                     const event_vector&);                  \
+    template sycl::event compute_logloss_with_der_sparse<F>(sycl::queue&,                   \
+                                                            sparse_matrix_handle&,          \
+                                                            const ndview<std::int32_t, 1>&, \
+                                                            const ndview<F, 1>&,            \
+                                                            ndview<F, 1>&,                  \
+                                                            ndview<F, 1>&,                  \
+                                                            bool,                           \
+                                                            const event_vector&);           \
+    template sycl::event compute_derivative<F>(sycl::queue&,                                \
+                                               const ndview<F, 2>&,                         \
+                                               const ndview<std::int32_t, 1>&,              \
+                                               const ndview<F, 1>&,                         \
+                                               ndview<F, 1>&,                               \
+                                               bool,                                        \
+                                               const event_vector&);                        \
+    template sycl::event add_regularization_loss<F>(sycl::queue&,                           \
+                                                    const ndview<F, 1>&,                    \
+                                                    ndview<F, 1>&,                          \
+                                                    F,                                      \
+                                                    F,                                      \
+                                                    bool,                                   \
+                                                    const event_vector&);                   \
+    template sycl::event add_regularization_gradient_loss<F>(sycl::queue&,                  \
+                                                             const ndview<F, 1>&,           \
+                                                             ndview<F, 1>&,                 \
+                                                             ndview<F, 1>&,                 \
+                                                             F,                             \
+                                                             F,                             \
+                                                             bool,                          \
+                                                             const event_vector&);          \
+    template sycl::event add_regularization_gradient<F>(sycl::queue&,                       \
+                                                        const ndview<F, 1>&,                \
+                                                        ndview<F, 1>&,                      \
+                                                        F,                                  \
+                                                        F,                                  \
+                                                        bool,                               \
+                                                        const event_vector&);               \
+    template sycl::event compute_hessian<F>(sycl::queue&,                                   \
+                                            const ndview<F, 2>&,                            \
+                                            const ndview<std::int32_t, 1>&,                 \
+                                            const ndview<F, 1>&,                            \
+                                            ndview<F, 2>&,                                  \
+                                            const F,                                        \
+                                            const F,                                        \
+                                            bool,                                           \
+                                            const event_vector&);                           \
+    template sycl::event compute_raw_hessian<F>(sycl::queue&,                               \
+                                                const ndview<F, 1>&,                        \
+                                                ndview<F, 1>&,                              \
+                                                const event_vector&);
 
 INSTANTIATE(float);
 INSTANTIATE(double);
diff --git a/cpp/oneapi/dal/backend/primitives/objective_function/logloss_functors.hpp b/cpp/oneapi/dal/backend/primitives/objective_function/logloss_functors.hpp
new file mode 100644
index 00000000000..b00a788a324
--- /dev/null
+++ b/cpp/oneapi/dal/backend/primitives/objective_function/logloss_functors.hpp
@@ -0,0 +1,121 @@
+/*******************************************************************************
+* Copyright contributors to the oneDAL project
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#pragma once
+
+#include "oneapi/dal/backend/primitives/utils.hpp"
+#include "oneapi/dal/backend/primitives/ndarray.hpp"
+#include "oneapi/dal/backend/primitives/optimizers/common.hpp"
+#include "oneapi/dal/table/common.hpp"
+#include "oneapi/dal/backend/communicator.hpp"
+#include "oneapi/dal/backend/primitives/sparse_blas/handle.hpp"
+
+namespace oneapi::dal::backend::primitives {
+
+using comm_t = backend::communicator<spmd::device_memory_access::usm>;
+
+template <typename Float>
+class logloss_hessian_product : public base_matrix_operator<Float> {
+    friend dal::detail::pimpl_accessor;
+
+public:
+    logloss_hessian_product(sycl::queue& q,
+                            const table& data,
+                            Float L2 = Float(0),
+                            bool fit_intercept = true,
+                            std::int64_t bsz = -1);
+    logloss_hessian_product(sycl::queue& q,
+                            comm_t comm,
+                            const table& data,
+                            Float L2 = Float(0),
+                            bool fit_intercept = true,
+                            std::int64_t bsz = -1);
+    sycl::event operator()(const ndview<Float, 1>& vec,
+                           ndview<Float, 1>& out,
+                           const event_vector& deps) final;
+    ndview<Float, 1>& get_raw_hessian();
+
+private:
+    void reserve_memory();
+
+    sycl::event compute_with_fit_intercept(const ndview<Float, 1>& vec,
+                                           ndview<Float, 1>& out,
+                                           const event_vector& deps);
+    sycl::event compute_without_fit_intercept(const ndview<Float, 1>& vec,
+                                              ndview<Float, 1>& out,
+                                              const event_vector& deps);
+    sycl::queue& q_;
+    comm_t comm_;
+    const table data_;
+    dal::detail::pimpl<sparse_matrix_handle> sp_handle_;
+    ndarray<Float, 1> raw_hessian_;
+    ndarray<Float, 1> buffer_;
+    ndarray<Float, 1> tmp_gpu_;
+    const std::int64_t n_;
+    const std::int64_t p_;
+    Float L2_;
+    bool fit_intercept_;
+    const std::int64_t bsz_;
+};
+
+template <typename Float>
+class logloss_function : public base_function<Float> {
+    friend dal::detail::pimpl_accessor;
+
+public:
+    logloss_function(sycl::queue& queue,
+                     const table& data,
+                     const ndview<std::int32_t, 1>& labels,
+                     Float L2 = 0.0,
+                     bool fit_intercept = true,
+                     std::int64_t bsz = -1);
+    logloss_function(sycl::queue& queue,
+                     comm_t comm,
+                     const table& data,
+                     const ndview<std::int32_t, 1>& labels,
+                     Float L2 = 0.0,
+                     bool fit_intercept = true,
+                     std::int64_t bsz = -1);
+    Float get_value() final;
+    ndview<Float, 1>& get_gradient() final;
+    base_matrix_operator<Float>& get_hessian_product() final;
+
+    event_vector update_x(const ndview<Float, 1>& x,
+                          bool need_hessp = false,
+                          const event_vector& deps = {}) final;
+
+private:
+    void reserve_memory();
+
+    sycl::queue& q_;
+    comm_t comm_;
+    const table data_;
+    dal::detail::pimpl<sparse_matrix_handle> sp_handle_;
+    const ndview<std::int32_t, 1> labels_;
+    ndarray<Float, 1> probabilities_;
+    ndarray<Float, 1> gradient_;
+    ndarray<Float, 1> buffer_;
+    const std::int64_t n_;
+    const std::int64_t p_;
+    Float L2_;
+    bool fit_intercept_;
+    const std::int64_t bsz_;
+    const std::int64_t dimension_;
+    Float value_;
+    logloss_hessian_product<Float> hessp_;
+};
+
+} // namespace oneapi::dal::backend::primitives
diff --git a/cpp/oneapi/dal/backend/primitives/objective_function/logloss_functors_dpc.cpp b/cpp/oneapi/dal/backend/primitives/objective_function/logloss_functors_dpc.cpp
new file mode 100644
index 00000000000..4fc3f16270f
--- /dev/null
+++ b/cpp/oneapi/dal/backend/primitives/objective_function/logloss_functors_dpc.cpp
@@ -0,0 +1,536 @@
+/*******************************************************************************
+* Copyright contributors to the oneDAL project
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#include "oneapi/dal/backend/primitives/objective_function/logloss.hpp"
+#include "oneapi/dal/backend/primitives/objective_function/logloss_functors.hpp"
+#include "oneapi/dal/backend/primitives/blas/gemv.hpp"
+#include "oneapi/dal/backend/primitives/element_wise.hpp"
+#include "oneapi/dal/detail/profiler.hpp"
+#include "oneapi/dal/backend/primitives/sparse_blas.hpp"
+
+namespace oneapi::dal::backend::primitives {
+
+namespace pr = dal::backend::primitives;
+
+std::int64_t get_block_size(std::int64_t n, std::int64_t p) {
+    constexpr std::int64_t max_alloc_size = 1 << 21;
+    return p > max_alloc_size ? 512 : max_alloc_size / p;
+}
+
+template <typename Float>
+void logloss_hessian_product<Float>::reserve_memory() {
+    raw_hessian_ = ndarray<Float, 1>::empty(q_, { n_ }, sycl::usm::alloc::device);
+    buffer_ = ndarray<Float, 1>::empty(q_, { n_ }, sycl::usm::alloc::device);
+    tmp_gpu_ = ndarray<Float, 1>::empty(q_, { p_ + 1 }, sycl::usm::alloc::device);
+    if (data_.get_kind() == dal::csr_table::kind()) {
+        sp_handle_.reset(new sparse_matrix_handle(q_));
+        set_csr_data(q_, *sp_handle_, static_cast<const csr_table&>(data_));
+    }
+}
+
+template <typename Float>
+logloss_hessian_product<Float>::logloss_hessian_product(sycl::queue& q,
+                                                        const table& data,
+                                                        Float L2,
+                                                        bool fit_intercept,
+                                                        std::int64_t bsz)
+        : q_(q),
+          data_(data),
+          n_(data.get_row_count()),
+          p_(data.get_column_count()),
+          L2_(L2),
+          fit_intercept_(fit_intercept),
+          bsz_(bsz == -1 ? get_block_size(n_, p_) : bsz) {
+    this->reserve_memory();
+}
+
+template <typename Float>
+logloss_hessian_product<Float>::logloss_hessian_product(sycl::queue& q,
+                                                        comm_t comm,
+                                                        const table& data,
+                                                        Float L2,
+                                                        bool fit_intercept,
+                                                        std::int64_t bsz)
+        : q_(q),
+          comm_(comm),
+          data_(data),
+          n_(data.get_row_count()),
+          p_(data.get_column_count()),
+          L2_(L2),
+          fit_intercept_(fit_intercept),
+          bsz_(bsz == -1 ? get_block_size(n_, p_) : bsz) {
+    this->reserve_memory();
+}
+
+template <typename Float>
+ndview<Float, 1>& logloss_hessian_product<Float>::get_raw_hessian() {
+    return raw_hessian_;
+}
+
+template <typename Float>
+sycl::event logloss_hessian_product<Float>::compute_with_fit_intercept(const ndview<Float, 1>& vec,
+                                                                       ndview<Float, 1>& out,
+                                                                       const event_vector& deps) {
+    ONEDAL_PROFILER_TASK(compute_hessp_with_fit_intercept, q_);
+    auto* const tmp_ptr = tmp_gpu_.get_mutable_data();
+    ONEDAL_ASSERT(vec.get_dimension(0) == p_ + 1);
+    ONEDAL_ASSERT(out.get_dimension(0) == p_ + 1);
+    auto fill_buffer_event = fill<Float>(q_, buffer_, Float(1), deps);
+    auto out_suf = out.get_slice(1, p_ + 1);
+    auto tmp_suf = tmp_gpu_.slice(1, p_);
+    auto out_bias = out.get_slice(0, 1);
+    auto vec_suf = vec.get_slice(1, p_ + 1);
+    ndview<Float, 1> tmp_ndview = tmp_gpu_;
+
+    sycl::event fill_out_event = fill<Float>(q_, out, Float(0), deps);
+
+    const Float v0 = vec.at_device(q_, 0, deps);
+    event_vector last_iter_deps = { fill_buffer_event, fill_out_event };
+
+    if (data_.get_kind() == dal::csr_table::kind()) {
+        const auto* const hess_ptr = raw_hessian_.get_data();
+        auto* const out_ptr = out.get_mutable_data();
+        auto* const buffer_ptr = buffer_.get_mutable_data();
+        sycl::event event_xv;
+        {
+            event_xv = gemv(q_,
+                            transpose::nontrans,
+                            *sp_handle_,
+                            vec_suf,
+                            buffer_,
+                            Float(1),
+                            v0,
+                            last_iter_deps);
+            // to ensure sparse blas kernel stability
+            event_xv.wait_and_throw();
+        }
+
+        sycl::event event_dxv = q_.submit([&](sycl::handler& cgh) {
+            cgh.depends_on({ event_xv });
+            const auto range = make_range_1d(n_);
+            auto sum_reduction = sycl::reduction(out_ptr, sycl::plus<>());
+            cgh.parallel_for(range, sum_reduction, [=](sycl::id<1> idx, auto& sum_v0) {
+                buffer_ptr[idx] = buffer_ptr[idx] * hess_ptr[idx];
+                sum_v0 += buffer_ptr[idx];
+            });
+        });
+        sycl::event event_xtdxv;
+        {
+            event_xtdxv = gemv(q_,
+                               transpose::trans,
+                               *sp_handle_,
+                               buffer_,
+                               out_suf,
+                               Float(1),
+                               Float(0),
+                               { event_dxv });
+            // To ensure sparse blas kernel stability
+            event_xtdxv.wait_and_throw();
+        }
+        last_iter_deps = { event_xtdxv };
+    }
+    else {
+        const uniform_blocking blocking(n_, bsz_);
+        row_accessor<const Float> data_accessor(data_);
+
+        for (std::int64_t b = 0; b < blocking.get_block_count(); ++b) {
+            const auto last = blocking.get_block_end_index(b);
+            const auto first = blocking.get_block_start_index(b);
+            const auto length = last - first;
+            auto x_rows = data_accessor.pull(q_, { first, last }, sycl::usm::alloc::device);
+            auto x_nd = pr::ndarray<Float, 2>::wrap(x_rows, { length, p_ });
+            auto buffer_batch = buffer_.slice(first, length);
+            sycl::event event_xv =
+                gemv(q_, x_nd, vec_suf, buffer_batch, Float(1), v0, last_iter_deps);
+            event_xv.wait_and_throw(); // Without this line gemv does not work correctly
+
+            auto* const buffer_ptr = buffer_batch.get_mutable_data();
+            const auto* const hess_ptr = raw_hessian_.get_data() + first;
+
+            auto fill_tmp_event = fill<Float>(q_, tmp_gpu_, Float(0), last_iter_deps);
+
+            sycl::event event_dxv = q_.submit([&](sycl::handler& cgh) {
+                cgh.depends_on({ event_xv, fill_tmp_event });
+                const auto range = make_range_1d(length);
+                auto sum_reduction = sycl::reduction(tmp_ptr, sycl::plus<>());
+                cgh.parallel_for(range, sum_reduction, [=](sycl::id<1> idx, auto& sum_v0) {
+                    buffer_ptr[idx] = buffer_ptr[idx] * hess_ptr[idx];
+                    sum_v0 += buffer_ptr[idx];
+                });
+            });
+
+            sycl::event event_xtdxv =
+                gemv(q_, x_nd.t(), buffer_batch, tmp_suf, Float(1), Float(0), { event_dxv });
+            event_xtdxv.wait_and_throw(); // Without this line gemv does not work correctly
+
+            sycl::event update_result_e =
+                element_wise(q_, sycl::plus<>(), out, tmp_ndview, out, { event_xtdxv });
+
+            last_iter_deps = { update_result_e };
+        }
+    }
+
+    if (comm_.get_rank_count() > 1) {
+        sycl::event::wait_and_throw(last_iter_deps);
+        {
+            ONEDAL_PROFILER_TASK(hessp_allreduce);
+            auto hessp_arr = dal::array<Float>::wrap(q_, out.get_mutable_data(), out.get_count());
+            comm_.allreduce(hessp_arr).wait();
+        }
+    }
+
+    const Float regularization_factor = L2_;
+
+    const auto kernel_regularization = [=](const Float a, const Float param) {
+        return a + param * regularization_factor;
+    };
+
+    auto add_regularization_event =
+        element_wise(q_, kernel_regularization, out_suf, vec_suf, out_suf, last_iter_deps);
+    return add_regularization_event;
+}
+
+template <typename Float>
+sycl::event logloss_hessian_product<Float>::compute_without_fit_intercept(
+    const ndview<Float, 1>& vec,
+    ndview<Float, 1>& out,
+    const event_vector& deps) {
+    ONEDAL_PROFILER_TASK(compute_hessp_without_fit_intercept, q_);
+    ONEDAL_ASSERT(vec.get_dimension(0) == p_);
+    ONEDAL_ASSERT(out.get_dimension(0) == p_);
+
+    ndview<Float, 1> buffer_view_ = buffer_;
+    ndview<Float, 1> hess_view_ = raw_hessian_;
+
+    sycl::event fill_out_event = fill<Float>(q_, out, Float(0), deps);
+
+    event_vector last_iter_deps = { fill_out_event };
+
+    if (data_.get_kind() == dal::csr_table::kind()) {
+        sycl::event event_xv = gemv(q_,
+                                    transpose::nontrans,
+                                    *sp_handle_,
+                                    vec,
+                                    buffer_,
+                                    Float(1),
+                                    Float(0),
+                                    last_iter_deps);
+        event_xv.wait_and_throw(); // Without this line gemv does not work correctly
+
+        constexpr sycl::multiplies<Float> kernel_mul{};
+        auto event_dxv =
+            element_wise(q_, kernel_mul, buffer_view_, hess_view_, buffer_view_, { event_xv });
+
+        sycl::event event_xtdxv = gemv(q_,
+                                       transpose::trans,
+                                       *sp_handle_,
+                                       buffer_,
+                                       out,
+                                       Float(1),
+                                       Float(0),
+                                       { event_dxv });
+        event_xtdxv.wait_and_throw(); // Without this line gemv does not work correctly
+
+        last_iter_deps = { event_xtdxv };
+    }
+    else {
+        const uniform_blocking blocking(n_, bsz_);
+        ndview<Float, 1> tmp_ndview = tmp_gpu_.slice(0, p_);
+        row_accessor<const Float> data_accessor(data_);
+
+        for (std::int64_t b = 0; b < blocking.get_block_count(); ++b) {
+            const auto last = blocking.get_block_end_index(b);
+            const auto first = blocking.get_block_start_index(b);
+            const auto length = last - first;
+            ONEDAL_ASSERT(0l < length);
+            auto x_rows = data_accessor.pull(q_, { first, last }, sycl::usm::alloc::device);
+            auto x_nd = pr::ndarray<Float, 2>::wrap(x_rows, { length, p_ });
+            ndview<Float, 1> buffer_batch = buffer_.slice(first, length);
+            ndview<Float, 1> hess_batch = raw_hessian_.slice(first, length);
+
+            sycl::event event_xv =
+                gemv(q_, x_nd, vec, buffer_batch, Float(1), Float(0), last_iter_deps);
+            event_xv.wait_and_throw(); // Without this line gemv does not work correctly
+
+            constexpr sycl::multiplies<Float> kernel_mul{};
+            auto event_dxv =
+                element_wise(q_, kernel_mul, buffer_batch, hess_batch, buffer_batch, { event_xv });
+
+            auto fill_tmp_event = fill<Float>(q_, tmp_ndview, Float(0), last_iter_deps);
+
+            sycl::event event_xtdxv = gemv(q_,
+                                           x_nd.t(),
+                                           buffer_batch,
+                                           tmp_ndview,
+                                           Float(1),
+                                           Float(0),
+                                           { event_dxv, fill_tmp_event });
+            event_xtdxv.wait_and_throw(); // Without this line gemv does not work correctly
+
+            sycl::event update_grad_e =
+                element_wise(q_, sycl::plus<>(), out, tmp_ndview, out, { event_xtdxv });
+            last_iter_deps = { update_grad_e };
+        }
+    }
+
+    if (comm_.get_rank_count() > 1) {
+        {
+            ONEDAL_PROFILER_TASK(hessp_allreduce);
+            auto hessp_arr = dal::array<Float>::wrap(q_,
+                                                     out.get_mutable_data(),
+                                                     out.get_count(),
+                                                     last_iter_deps);
+            comm_.allreduce(hessp_arr).wait();
+        }
+    }
+
+    const Float regularization_factor = L2_;
+
+    const auto kernel_regularization = [=](const Float a, const Float param) {
+        return a + param * regularization_factor;
+    };
+
+    auto add_regularization_event =
+        element_wise(q_, kernel_regularization, out, vec, out, last_iter_deps);
+
+    return add_regularization_event;
+}
+
+template <typename Float>
+sycl::event logloss_hessian_product<Float>::operator()(const ndview<Float, 1>& vec,
+                                                       ndview<Float, 1>& out,
+                                                       const event_vector& deps) {
+    if (fit_intercept_) {
+        return compute_with_fit_intercept(vec, out, deps);
+    }
+    else {
+        return compute_without_fit_intercept(vec, out, deps);
+    }
+}
+
+template <typename Float>
+void logloss_function<Float>::reserve_memory() {
+    probabilities_ = ndarray<Float, 1>::empty(q_, { n_ }, sycl::usm::alloc::device);
+    gradient_ = ndarray<Float, 1>::empty(q_, { dimension_ }, sycl::usm::alloc::device);
+    buffer_ = ndarray<Float, 1>::empty(q_, { p_ + 2 }, sycl::usm::alloc::device);
+    if (data_.get_kind() == dal::csr_table::kind()) {
+        sp_handle_.reset(new sparse_matrix_handle(q_));
+        set_csr_data(q_, *sp_handle_, static_cast<const csr_table&>(data_));
+    }
+}
+
+template <typename Float>
+logloss_function<Float>::logloss_function(sycl::queue& q,
+                                          const table& data,
+                                          const ndview<std::int32_t, 1>& labels,
+                                          Float L2,
+                                          bool fit_intercept,
+                                          std::int64_t bsz)
+        : q_(q),
+          data_(data),
+          labels_(labels),
+          n_(data.get_row_count()),
+          p_(data.get_column_count()),
+          L2_(L2),
+          fit_intercept_(fit_intercept),
+          bsz_(bsz == -1l ? get_block_size(n_, p_) : bsz),
+          dimension_(fit_intercept ? p_ + 1 : p_),
+          hessp_(q, data, L2, fit_intercept, bsz_) {
+    ONEDAL_ASSERT(labels.get_dimension(0) == n_);
+    this->reserve_memory();
+}
+
+template <typename Float>
+logloss_function<Float>::logloss_function(sycl::queue& q,
+                                          comm_t comm,
+                                          const table& data,
+                                          const ndview<std::int32_t, 1>& labels,
+                                          Float L2,
+                                          bool fit_intercept,
+                                          std::int64_t bsz)
+        : q_(q),
+          comm_(comm),
+          data_(data),
+          labels_(labels),
+          n_(data.get_row_count()),
+          p_(data.get_column_count()),
+          L2_(L2),
+          fit_intercept_(fit_intercept),
+          bsz_(bsz == -1 ? get_block_size(n_, p_) : bsz),
+          dimension_(fit_intercept ? p_ + 1 : p_),
+          hessp_(q, comm, data, L2, fit_intercept, bsz_) {
+    ONEDAL_ASSERT(labels.get_dimension(0) == n_);
+    this->reserve_memory();
+}
+
+template <typename Float>
+event_vector logloss_function<Float>::update_x(const ndview<Float, 1>& x,
+                                               bool need_hessp,
+                                               const event_vector& deps) {
+    ONEDAL_PROFILER_TASK(logloss_function_update_weights, q_);
+    using dal::backend::operator+;
+    value_ = 0;
+    auto fill_event = fill(q_, gradient_, Float(0), deps);
+    ndview<Float, 1> grad_ndview = gradient_;
+    ndview<Float, 1> raw_hessian = hessp_.get_raw_hessian();
+    ndview<Float, 1> loss_batch = buffer_.slice(0, 1);
+    event_vector last_iter_e = { fill_event };
+    constexpr Float zero(0);
+
+    if (data_.get_kind() == dal::csr_table::kind()) {
+        auto prob_e = compute_probabilities_sparse(q_,
+                                                   x,
+                                                   *sp_handle_,
+                                                   probabilities_,
+                                                   fit_intercept_,
+                                                   { fill_event });
+
+        auto fill_loss_e = fill(q_, loss_batch, zero, deps);
+
+        sycl::event compute_e = compute_logloss_with_der_sparse(q_,
+                                                                *sp_handle_,
+                                                                labels_,
+                                                                probabilities_,
+                                                                loss_batch,
+                                                                grad_ndview,
+                                                                fit_intercept_,
+                                                                { fill_loss_e, prob_e });
+
+        value_ = loss_batch.at_device(q_, 0, { compute_e });
+
+        last_iter_e = { compute_e };
+
+        if (need_hessp) {
+            auto hess_e = compute_raw_hessian(q_, probabilities_, raw_hessian, { prob_e });
+            last_iter_e = last_iter_e + hess_e;
+        }
+    }
+    else {
+        const uniform_blocking blocking(n_, bsz_);
+        ndview<Float, 1> grad_batch = buffer_.slice(1, dimension_);
+
+        for (std::int64_t b = 0; b < blocking.get_block_count(); ++b) {
+            const auto first = blocking.get_block_start_index(b);
+            const auto last = blocking.get_block_end_index(b);
+            const std::int64_t cursize = last - first;
+            ONEDAL_ASSERT(0l < cursize);
+
+            const auto data_rows = row_accessor<const Float>(data_).pull(q_,
+                                                                         { first, last },
+                                                                         sycl::usm::alloc::device);
+            const auto data_batch = ndarray<Float, 2>::wrap(data_rows, { cursize, p_ });
+            const auto labels_batch = labels_.get_slice(first, first + cursize);
+            auto prob_batch = probabilities_.slice(first, cursize);
+            sycl::event prob_e =
+                compute_probabilities(q_, x, data_batch, prob_batch, fit_intercept_, last_iter_e);
+
+            auto fill_buffer_e = fill(q_, buffer_, zero, last_iter_e);
+
+            sycl::event compute_e = compute_logloss_with_der(q_,
+                                                             data_batch,
+                                                             labels_batch,
+                                                             prob_batch,
+                                                             loss_batch,
+                                                             grad_batch,
+                                                             fit_intercept_,
+                                                             { fill_buffer_e, prob_e });
+
+            sycl::event update_grad_e = element_wise(q_,
+                                                     sycl::plus<>(),
+                                                     grad_ndview,
+                                                     grad_batch,
+                                                     grad_ndview,
+                                                     { compute_e });
+
+            value_ += loss_batch.at_device(q_, 0, { compute_e });
+
+            last_iter_e = { update_grad_e };
+
+            if (need_hessp) {
+                auto raw_hessian_batch = raw_hessian.get_slice(first, first + cursize);
+                auto hess_e = compute_raw_hessian(q_, prob_batch, raw_hessian_batch, { prob_e });
+                last_iter_e = last_iter_e + hess_e;
+            }
+
+            // TODO: Delete this wait_and_throw
+            // ensure that while event is running in the background data is not overwritten
+            wait_or_pass(last_iter_e).wait_and_throw();
+        }
+    }
+    if (comm_.get_rank_count() > 1) {
+        {
+            ONEDAL_PROFILER_TASK(gradient_allreduce);
+            auto gradient_arr = dal::array<Float>::wrap(q_,
+                                                        gradient_.get_mutable_data(),
+                                                        gradient_.get_count(),
+                                                        last_iter_e);
+            comm_.allreduce(gradient_arr).wait();
+        }
+        {
+            ONEDAL_PROFILER_TASK(value_allreduce);
+            comm_.allreduce(value_).wait();
+        }
+    }
+
+    if (L2_ > 0) {
+        auto fill_loss_e = fill(q_, loss_batch, Float(0), { last_iter_e });
+        auto loss_ptr = loss_batch.get_mutable_data();
+        auto grad_ptr = gradient_.get_mutable_data();
+        auto w_ptr = x.get_data();
+        Float regularization_factor = L2_;
+
+        auto regularization_e = q_.submit([&](sycl::handler& cgh) {
+            cgh.depends_on(last_iter_e + fill_loss_e);
+            const auto range = make_range_1d(p_);
+            const std::int64_t st_id = fit_intercept_;
+            auto sum_reduction = sycl::reduction(loss_ptr, sycl::plus<>());
+            cgh.parallel_for(range, sum_reduction, [=](sycl::id<1> idx, auto& sum_v0) {
+                const Float param = w_ptr[st_id + idx];
+                grad_ptr[st_id + idx] += regularization_factor * param;
+                sum_v0 += regularization_factor * param * param / 2;
+            });
+        });
+
+        value_ += loss_batch.at_device(q_, 0, { regularization_e });
+
+        last_iter_e = { regularization_e };
+    }
+
+    return last_iter_e;
+}
+
+template <typename Float>
+Float logloss_function<Float>::get_value() {
+    return value_;
+}
+template <typename Float>
+ndview<Float, 1>& logloss_function<Float>::get_gradient() {
+    return gradient_;
+}
+
+template <typename Float>
+base_matrix_operator<Float>& logloss_function<Float>::get_hessian_product() {
+    return hessp_;
+}
+
+#define INSTANTIATE_FUNCTORS(F)                \
+    template class logloss_hessian_product<F>; \
+    template class logloss_function<F>;
+
+INSTANTIATE_FUNCTORS(float)
+INSTANTIATE_FUNCTORS(double)
+
+} // namespace oneapi::dal::backend::primitives
diff --git a/cpp/oneapi/dal/backend/primitives/objective_function/test/fixture.hpp b/cpp/oneapi/dal/backend/primitives/objective_function/test/fixture.hpp
index 39cae7db796..fabe919b34e 100644
--- a/cpp/oneapi/dal/backend/primitives/objective_function/test/fixture.hpp
+++ b/cpp/oneapi/dal/backend/primitives/objective_function/test/fixture.hpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2023 Intel Corporation
+* Copyright contributors to the oneDAL project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -17,39 +17,39 @@
 #include <type_traits>
 
 #include "oneapi/dal/backend/primitives/objective_function/logloss.hpp"
+#include "oneapi/dal/backend/primitives/objective_function/logloss_functors.hpp"
 #include "oneapi/dal/test/engine/common.hpp"
 #include "oneapi/dal/test/engine/fixtures.hpp"
+#include "oneapi/dal/test/engine/csr_table_builder.hpp"
 #include "oneapi/dal/table/row_accessor.hpp"
+#include "oneapi/dal/table/csr_accessor.hpp"
 #include "oneapi/dal/detail/debug.hpp"
 
 #include "oneapi/dal/backend/primitives/rng/rng_engine.hpp"
 
 namespace oneapi::dal::backend::primitives::test {
 
-using oneapi::dal::detail::operator<<;
-
 namespace te = dal::test::engine;
 
-template <ndorder order>
-struct order_tag {
-    static constexpr ndorder value = order;
+template <bool fit_intercept>
+struct fit_intercept_tag {
+    static constexpr bool value = fit_intercept;
 };
 
-using c_order = order_tag<ndorder::c>;
-using f_order = order_tag<ndorder::f>;
+using use_fit_intercept = fit_intercept_tag<true>;
+using no_fit_intercept = fit_intercept_tag<false>;
+
+using logloss_types = COMBINE_TYPES((float, double), (use_fit_intercept, no_fit_intercept));
+
+#define IS_CLOSE(ftype, real, expected, rtol, atol) \
+    REQUIRE(abs(real - expected) < atol);           \
+    REQUIRE(abs(real - expected) / std::max(std::abs(expected), (ftype)1.0) < rtol);
 
 template <typename Param>
-class logloss_test : public te::float_algo_fixture<Param> {
+class logloss_test : public te::float_algo_fixture<std::tuple_element_t<0, Param>> {
 public:
-    using float_t = Param;
-
-    void check_val(const float_t real,
-                   const float_t expected,
-                   const float_t rtol,
-                   const float_t atol) {
-        REQUIRE(abs(real - expected) < atol);
-        REQUIRE(abs(real - expected) / std::max(std::abs(expected), (float_t)1.0) < rtol);
-    }
+    using float_t = std::tuple_element_t<0, Param>;
+    bool fit_intercept_ = std::tuple_element_t<1, Param>::value;
 
     void generate_input(std::int64_t n = -1, std::int64_t p = -1) {
         if (n == -1 || p == -1) {
@@ -60,7 +60,6 @@ class logloss_test : public te::float_algo_fixture<Param> {
             this->n_ = n;
             this->p_ = p;
         }
-
         const auto dataframe =
             GENERATE_DATAFRAME(te::dataframe_builder{ n_, p_ }.fill_uniform(-0.5, 0.5));
         const auto parameters =
@@ -77,6 +76,32 @@ class logloss_test : public te::float_algo_fixture<Param> {
         }
     }
 
+    void generate_sparse_input(std::int64_t n = -1, std::int64_t p = -1) {
+        if (n == -1 || p == -1) {
+            this->n_ = GENERATE(7, 827, 13, 216);
+            this->p_ = GENERATE(4, 17, 41, 256);
+        }
+        else {
+            this->n_ = n;
+            this->p_ = p;
+        }
+
+        auto builder = te::csr_table_builder<float_t>(n_, p_, 0.3, sparse_indexing::zero_based);
+        this->data_ = builder.build_csr_table(this->get_policy());
+        this->dense_data_ = builder.build_dense_table();
+
+        const auto parameters =
+            GENERATE_DATAFRAME(te::dataframe_builder{ 1, p_ + 1 }.fill_uniform(-1, 1));
+        this->params_ = parameters.get_table(this->get_homogen_table_id());
+        this->labels_ =
+            ndarray<std::int32_t, 1>::empty(this->get_queue(), { n_ }, sycl::usm::alloc::host);
+        std::srand(2007 + n_);
+        auto* const ptr_lab = this->labels_.get_mutable_data();
+        for (std::int64_t i = 0; i < n_; ++i) {
+            ptr_lab[i] = std::rand() % 2;
+        }
+    }
+
     void run_test(const float_t L1 = 0,
                   const float_t L2 = 0,
                   bool fit_intercept = true,
@@ -93,6 +118,57 @@ class logloss_test : public te::float_algo_fixture<Param> {
         SUCCEED();
     }
 
+    void run_sparse_test(const float_t L2 = 0, bool fit_intercept = true) {
+        constexpr float_t rtol = sizeof(float_t) > 4 ? 1e-6 : 5e-4;
+        constexpr float_t atol = sizeof(float_t) > 4 ? 1e-6 : 1e-1;
+
+        REQUIRE(this->data_.get_kind() == csr_table::kind());
+
+        auto data_array = row_accessor<const float_t>{ this->dense_data_ }.pull(this->get_queue());
+        auto data_host = ndarray<float_t, 2>::wrap(data_array.get_data(), { n_, p_ });
+
+        std::int64_t dim = fit_intercept ? this->p_ + 1 : this->p_;
+        auto param_array = row_accessor<const float_t>{ this->params_ }.pull(this->get_queue());
+        auto params_host = ndarray<float_t, 1>::wrap(param_array.get_data(), { dim });
+        auto params_gpu = params_host.to_device(this->get_queue());
+        auto labels_gpu = this->labels_.to_device(this->get_queue());
+
+        float_t gth_logloss =
+            naive_logloss(data_host, params_host, this->labels_, float_t(0), L2, fit_intercept);
+
+        auto gth_probs =
+            ndarray<float_t, 1>::empty(this->get_queue(), { n_ }, sycl::usm::alloc::host);
+        naive_probabilities(data_host, params_host, this->labels_, gth_probs, fit_intercept);
+
+        auto gth_gradient =
+            ndarray<float_t, 1>::empty(this->get_queue(), { dim }, sycl::usm::alloc::host);
+        naive_derivative(data_host,
+                         gth_probs,
+                         params_host,
+                         this->labels_,
+                         gth_gradient,
+                         float_t(0),
+                         L2,
+                         fit_intercept);
+
+        auto gth_hessian = ndarray<float_t, 2>::empty(this->get_queue(),
+                                                      { p_ + 1, p_ + 1 },
+                                                      sycl::usm::alloc::host);
+        naive_hessian(data_host, gth_probs, gth_hessian, L2, fit_intercept);
+
+        test_functors(data_,
+                      labels_gpu,
+                      params_gpu,
+                      gth_gradient,
+                      gth_hessian,
+                      gth_logloss,
+                      L2,
+                      fit_intercept,
+                      false,
+                      rtol,
+                      atol);
+    }
+
     void test_gold_input(bool fit_intercept = true) {
         constexpr std::int64_t n = 5;
         constexpr std::int64_t p = 3;
@@ -181,7 +257,7 @@ class logloss_test : public te::float_algo_fixture<Param> {
         logloss_reg_event.wait_and_throw();
 
         const float_t val_logloss1 = out_logloss.to_host(this->get_queue(), {}).at(0);
-        check_val(val_logloss1, logloss, rtol, atol);
+        IS_CLOSE(float_t, val_logloss1, logloss, rtol, atol);
 
         auto fill_event = fill<float_t>(this->get_queue(), out_logloss, float_t(0), {});
         auto [out_derivative, out_der_e] =
@@ -206,7 +282,7 @@ class logloss_test : public te::float_algo_fixture<Param> {
         auto out_derivative_host = out_derivative.to_host(this->get_queue());
 
         const float_t val_logloss2 = out_logloss.to_host(this->get_queue(), {}).at(0);
-        check_val(val_logloss2, logloss, rtol, atol);
+        IS_CLOSE(float_t, val_logloss2, logloss, rtol, atol);
 
         auto [out_derivative2, out_der_e2] =
             ndarray<float_t, 1>::zeros(this->get_queue(), { dim }, sycl::usm::alloc::device);
@@ -265,29 +341,17 @@ class logloss_test : public te::float_algo_fixture<Param> {
                              atol);
 
         if (L1 == 0) {
-            std::int64_t bsz = -1;
-            if (batch_test) {
-                bsz = GENERATE(4, 8, 16, 20, 37, 512);
-            }
-            // logloss_function has different regularization so we need to multiply it by 2 to allign with other implementations
-            auto functor = logloss_function<float_t>(this->get_queue(),
-                                                     data_,
-                                                     labels_gpu,
-                                                     L2 * 2,
-                                                     fit_intercept,
-                                                     bsz);
-            auto set_point_event = functor.update_x(params_gpu, true, {});
-            wait_or_pass(set_point_event).wait_and_throw();
-
-            check_val(logloss, functor.get_value(), rtol, atol);
-            auto grad_func = functor.get_gradient();
-            auto grad_func_host = grad_func.to_host(this->get_queue());
-            std::int64_t dim = fit_intercept ? p + 1 : p;
-            for (std::int64_t i = 0; i < dim; ++i) {
-                check_val(out_derivative_host.at(i), grad_func_host.at(i), rtol, atol);
-            }
-            base_matrix_operator<float_t>& hessp = functor.get_hessian_product();
-            test_hessian_product(hessian_host, hessp, fit_intercept, L2, rtol, atol);
+            test_functors(data_,
+                          labels_gpu,
+                          params_gpu,
+                          out_derivative_host,
+                          hessian_host,
+                          logloss,
+                          L2,
+                          fit_intercept,
+                          batch_test,
+                          rtol,
+                          atol);
         }
     }
 
@@ -339,15 +403,16 @@ class logloss_test : public te::float_algo_fixture<Param> {
         return logloss;
     }
 
-    double naive_logloss(const ndview<float_t, 2>& data_host,
-                         const ndview<float_t, 1>& params_host,
-                         const ndview<std::int32_t, 1>& labels_host,
-                         const float_t L1,
-                         const float_t L2,
-                         bool fit_intercept) {
+    float_t naive_logloss(const ndview<float_t, 2>& data_host,
+                          const ndview<float_t, 1>& params_host,
+                          const ndview<std::int32_t, 1>& labels_host,
+                          float_t L1,
+                          float_t L2,
+                          bool fit_intercept) {
         const std::int64_t n = data_host.get_dimension(0);
         const std::int64_t p = data_host.get_dimension(1);
 
+        // We use double for gth computation to achieve better precision
         double logloss = 0;
         std::int64_t st = fit_intercept;
         for (std::int64_t i = 0; i < n; ++i) {
@@ -358,7 +423,10 @@ class logloss_test : public te::float_algo_fixture<Param> {
             if (fit_intercept) {
                 pred += (double)params_host.at(0);
             }
-            logloss += std::log(1 + std::exp(-(2 * labels_host.at(i) - 1) * pred));
+            // We cast argument to float_t to ensure correct clipping
+            double prob = clip_prob(float_t(1.0) / (float_t)(1 + std::exp(-pred)));
+            logloss -=
+                labels_host.at(i) * std::log(prob) + (1 - labels_host.at(i)) * std::log(1 - prob);
         }
         for (std::int64_t i = 0; i < p; ++i) {
             logloss += L1 * abs(params_host.at(i + st));
@@ -376,14 +444,14 @@ class logloss_test : public te::float_algo_fixture<Param> {
         const std::int64_t p = data.get_dimension(1);
         std::int64_t st_ind = fit_intercept;
         for (std::int64_t i = 0; i < n; ++i) {
-            float_t pred = 0;
+            double pred = 0;
             for (std::int64_t j = 0; j < p; ++j) {
                 pred += params.at(j + st_ind) * data.at(i, j);
             }
             if (fit_intercept) {
                 pred += params.at(0);
             }
-            out_prob.at(i) = float_t(1) / (1 + std::exp(-pred));
+            out_prob.at(i) = clip_prob((double)1 / (1 + std::exp(-pred)));
         }
     }
 
@@ -470,7 +538,7 @@ class logloss_test : public te::float_algo_fixture<Param> {
                          fit_intercept);
 
         for (std::int64_t i = 0; i < dim; ++i) {
-            check_val(out_derivative.at(i), derivative.at(i), rtol, atol);
+            IS_CLOSE(float_t, out_derivative.at(i), derivative.at(i), rtol, atol);
         }
     }
 
@@ -489,7 +557,7 @@ class logloss_test : public te::float_algo_fixture<Param> {
 
         for (std::int64_t i = 0; i <= p; ++i) {
             for (std::int64_t j = 0; j <= p; ++j) {
-                check_val(out_hessian.at(i, j), hessian.at(i, j), rtol, atol);
+                IS_CLOSE(float_t, out_hessian.at(i, j), hessian.at(i, j), rtol, atol);
             }
         }
     }
@@ -519,20 +587,62 @@ class logloss_test : public te::float_algo_fixture<Param> {
             auto out_vector_host = out_vector.to_host(this->get_queue());
             const std::int64_t st = fit_intercept ? 0 : 1;
 
+            // We use double for gth computations to achieve better precision
             for (std::int64_t i = st; i < p + 1; ++i) {
-                float_t correct = 0;
+                double correct = 0;
                 for (std::int64_t j = st; j < p + 1; ++j) {
-                    correct += vec_host.at(j - st) * hessian_host.at(i, j);
+                    correct += static_cast<double>(vec_host.at(j - st)) *
+                               static_cast<double>(hessian_host.at(i, j));
                 }
-                check_val(out_vector_host.at(i - st), correct, rtol, atol);
+                IS_CLOSE(float_t, out_vector_host.at(i - st), (float_t)correct, rtol, atol);
             }
         }
     }
 
+    void test_functors(table& data,
+                       ndview<std::int32_t, 1>& labels_gpu,
+                       ndview<float_t, 1>& params_gpu,
+                       ndview<float_t, 1>& gth_grad,
+                       ndview<float_t, 2>& gth_hessian,
+                       float_t gth_logloss,
+                       const float_t L2 = 0,
+                       bool fit_intercept = true,
+                       bool batch_test = false,
+                       const float_t rtol = 1e-3,
+                       const float_t atol = 1e-3) {
+        const std::int64_t p = gth_hessian.get_dimension(0) - 1;
+        std::int64_t bsz = -1;
+        if (batch_test) {
+            bsz = GENERATE(4, 8, 16, 20, 37, 512);
+        }
+        // logloss_function has different regularization so we need to multiply it by 2 to align with other implementations
+
+        auto functor = logloss_function<float_t>(this->get_queue(),
+                                                 data,
+                                                 labels_gpu,
+                                                 L2 * 2,
+                                                 fit_intercept,
+                                                 bsz);
+        auto set_point_event = functor.update_x(params_gpu, true, {});
+        wait_or_pass(set_point_event).wait_and_throw();
+
+        IS_CLOSE(float_t, gth_logloss, functor.get_value(), rtol, atol);
+        auto grad_func = functor.get_gradient();
+        auto grad_func_host = grad_func.to_host(this->get_queue());
+        std::int64_t dim = fit_intercept ? p + 1 : p;
+
+        for (std::int64_t i = 0; i < dim; ++i) {
+            IS_CLOSE(float_t, gth_grad.at(i), grad_func_host.at(i), rtol, atol);
+        }
+        base_matrix_operator<float_t>& hessp = functor.get_hessian_product();
+        test_hessian_product(gth_hessian, hessp, fit_intercept, L2, rtol, atol);
+    }
+
 protected:
     std::int64_t n_;
     std::int64_t p_;
     table data_;
+    table dense_data_;
     table params_;
     ndarray<std::int32_t, 1> labels_;
 };
diff --git a/cpp/oneapi/dal/backend/primitives/objective_function/test/logloss_dpc.cpp b/cpp/oneapi/dal/backend/primitives/objective_function/test/logloss_dpc.cpp
index b0ba99ac85a..1bd51dfc14b 100644
--- a/cpp/oneapi/dal/backend/primitives/objective_function/test/logloss_dpc.cpp
+++ b/cpp/oneapi/dal/backend/primitives/objective_function/test/logloss_dpc.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2023 Intel Corporation
+* Copyright contributors to the oneDAL project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -18,86 +18,45 @@
 
 namespace oneapi::dal::backend::primitives::test {
 
-TEMPLATE_TEST_M(logloss_test, "gold input test - double", "[logloss]", double) {
+TEMPLATE_LIST_TEST_M(logloss_test, "gold input test", "[logloss]", logloss_types) {
     SKIP_IF(this->not_float64_friendly());
     SKIP_IF(this->get_policy().is_cpu());
-    this->test_gold_input();
+    this->test_gold_input(this->fit_intercept_);
 }
 
-TEMPLATE_TEST_M(logloss_test, "gold input test - double - no fit_intercept", "[logloss]", double) {
-    SKIP_IF(this->not_float64_friendly());
-    SKIP_IF(this->get_policy().is_cpu());
-    this->test_gold_input(false);
-}
-
-TEMPLATE_TEST_M(logloss_test, "gold input test - float", "[logloss]", float) {
-    SKIP_IF(this->get_policy().is_cpu());
-    this->test_gold_input();
-}
-
-TEMPLATE_TEST_M(logloss_test, "gold input test - float - no fit intercept", "[logloss]", float) {
-    SKIP_IF(this->get_policy().is_cpu());
-    this->test_gold_input(false);
-}
-
-TEMPLATE_TEST_M(logloss_test, "test random input - double without L1", "[logloss]", double) {
+TEMPLATE_LIST_TEST_M(logloss_test, "test random input without L1", "[logloss]", logloss_types) {
     SKIP_IF(this->not_float64_friendly());
     SKIP_IF(this->get_policy().is_cpu());
     this->generate_input();
-    this->run_test(0.0, 1.3);
+    this->run_test(0.0f, 1.3f, this->fit_intercept_);
 }
 
-TEMPLATE_TEST_M(logloss_test,
-                "test random input - double without L1 - no fit intercept",
-                "[logloss]",
-                double) {
+TEMPLATE_LIST_TEST_M(logloss_test, "batch test", "[logloss]", logloss_types) {
     SKIP_IF(this->not_float64_friendly());
     SKIP_IF(this->get_policy().is_cpu());
     this->generate_input();
-    this->run_test(0.0, 1.3, false);
+    this->run_test(0.0f, 1.3f, this->fit_intercept_, true);
 }
 
-TEMPLATE_TEST_M(logloss_test, "batch test - double", "[logloss]", double) {
+TEMPLATE_LIST_TEST_M(logloss_test, "test random input with L1", "[logloss]", logloss_types) {
     SKIP_IF(this->not_float64_friendly());
     SKIP_IF(this->get_policy().is_cpu());
     this->generate_input();
-    this->run_test(0.0, 1.3, true, true);
+    this->run_test(0.4f, 1.3f, this->fit_intercept_);
 }
 
-TEMPLATE_TEST_M(logloss_test, "batch test - double - no fit intercept", "[logloss]", double) {
+TEMPLATE_LIST_TEST_M(logloss_test, "sparse data test without L2", "[logloss]", logloss_types) {
     SKIP_IF(this->not_float64_friendly());
     SKIP_IF(this->get_policy().is_cpu());
-    this->generate_input();
-    this->run_test(0.0, 1.3, false, true);
+    this->generate_sparse_input();
+    this->run_sparse_test(0.0f, this->fit_intercept_);
 }
 
-TEMPLATE_TEST_M(logloss_test, "test random input - double with L1", "[logloss]", double) {
+TEMPLATE_LIST_TEST_M(logloss_test, "sparse data test", "[logloss]", logloss_types) {
     SKIP_IF(this->not_float64_friendly());
     SKIP_IF(this->get_policy().is_cpu());
-    this->generate_input();
-    this->run_test(0.4, 1.3);
-}
-
-TEMPLATE_TEST_M(logloss_test,
-                "test random input - double with L1 -- no fit intercept",
-                "[logloss]",
-                double) {
-    SKIP_IF(this->not_float64_friendly());
-    SKIP_IF(this->get_policy().is_cpu());
-    this->generate_input();
-    this->run_test(0.4, 1.3, false);
-}
-
-TEMPLATE_TEST_M(logloss_test, "test random input - float", "[logloss]", float) {
-    SKIP_IF(this->get_policy().is_cpu());
-    this->generate_input();
-    this->run_test(0.4, 1.3);
-}
-
-TEMPLATE_TEST_M(logloss_test, "test random input - float - no fit intercept", "[logloss]", float) {
-    SKIP_IF(this->get_policy().is_cpu());
-    this->generate_input();
-    this->run_test(0.4, 1.3, false);
+    this->generate_sparse_input();
+    this->run_sparse_test(1.3f, this->fit_intercept_);
 }
 
 } // namespace oneapi::dal::backend::primitives::test
diff --git a/cpp/oneapi/dal/backend/primitives/objective_function/test/logloss_perf_dpc.cpp b/cpp/oneapi/dal/backend/primitives/objective_function/test/logloss_perf_dpc.cpp
index cdb3b7ddd5c..6d76198ed78 100644
--- a/cpp/oneapi/dal/backend/primitives/objective_function/test/logloss_perf_dpc.cpp
+++ b/cpp/oneapi/dal/backend/primitives/objective_function/test/logloss_perf_dpc.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2023 Intel Corporation
+* Copyright contributors to the oneDAL project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
diff --git a/cpp/oneapi/dal/backend/primitives/objective_function/test/logloss_spmd_dpc.cpp b/cpp/oneapi/dal/backend/primitives/objective_function/test/logloss_spmd_dpc.cpp
index 203e406736f..d86d583e354 100644
--- a/cpp/oneapi/dal/backend/primitives/objective_function/test/logloss_spmd_dpc.cpp
+++ b/cpp/oneapi/dal/backend/primitives/objective_function/test/logloss_spmd_dpc.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2023 Intel Corporation
+* Copyright contributors to the oneDAL project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -18,15 +18,12 @@
 
 namespace oneapi::dal::backend::primitives::test {
 
-TEMPLATE_TEST_M(logloss_spmd_test, "spmd test - double", "[logloss spmd]", double) {
-    SKIP_IF(this->not_float64_friendly());
-    SKIP_IF(this->get_policy().is_cpu());
-    this->generate_input();
-    this->run_spmd(-1, 1.0, true);
-    this->run_spmd(-1, 1.0, false);
-}
+using logloss_spmd_types = COMBINE_TYPES((float, double), (use_fit_intercept));
 
-TEMPLATE_TEST_M(logloss_spmd_test, "spmd test - float", "[logloss spmd]", float) {
+TEMPLATE_LIST_TEST_M(logloss_spmd_test,
+                     "spmd test - double",
+                     "[logloss spmd]",
+                     logloss_spmd_types) {
     SKIP_IF(this->not_float64_friendly());
     SKIP_IF(this->get_policy().is_cpu());
     this->generate_input();
diff --git a/cpp/oneapi/dal/backend/primitives/objective_function/test/spmd_fixture.hpp b/cpp/oneapi/dal/backend/primitives/objective_function/test/spmd_fixture.hpp
index a9369ac619e..e902dd452e1 100644
--- a/cpp/oneapi/dal/backend/primitives/objective_function/test/spmd_fixture.hpp
+++ b/cpp/oneapi/dal/backend/primitives/objective_function/test/spmd_fixture.hpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2023 Intel Corporation
+* Copyright contributors to the oneDAL project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -26,7 +26,9 @@ namespace de = dal::detail;
 template <typename Param>
 class logloss_spmd_test : public logloss_test<Param> {
 public:
-    using float_t = Param;
+    using float_t = std::tuple_element_t<0, Param>;
+    bool fit_intercept_ = std::tuple_element_t<1, Param>::value;
+    // using float_t = Param;
     using comm_t = te::thread_communicator<spmd::device_memory_access::usm>;
 
     std::vector<std::pair<table, ndview<std::int32_t, 1>>>
@@ -54,7 +56,7 @@ class logloss_spmd_test : public logloss_test<Param> {
         return result;
     }
 
-    std::vector<logloss_function<float_t>> get_functors(comm_t comm,
+    std::vector<logloss_function<float_t>> get_functors(comm_t& comm,
                                                         std::int64_t thr_cnt,
                                                         table data,
                                                         ndview<std::int32_t, 1>& labels,
@@ -138,12 +140,12 @@ class logloss_spmd_test : public logloss_test<Param> {
 
         this->naive_probabilities(data_host, params_host, this->labels_, probs_gth, fit_intercept);
 
-        double logloss_gth = this->naive_logloss(data_host,
-                                                 params_host,
-                                                 this->labels_,
-                                                 float_t(0.0),
-                                                 float_t(L2),
-                                                 fit_intercept);
+        float_t logloss_gth = this->naive_logloss(data_host,
+                                                  params_host,
+                                                  this->labels_,
+                                                  float_t(0.0),
+                                                  float_t(L2),
+                                                  fit_intercept);
         this->naive_derivative(data_host,
                                probs_gth,
                                params_host,
@@ -153,13 +155,13 @@ class logloss_spmd_test : public logloss_test<Param> {
                                float_t(L2),
                                fit_intercept);
         for (std::int64_t k = 0; k < thr_cnt; ++k) {
-            this->check_val(std::get<0>(results[k]), logloss_gth, rtol, atol);
+            IS_CLOSE(float_t, std::get<0>(results[k]), logloss_gth, rtol, atol);
         }
 
         for (int k = 0; k < thr_cnt; ++k) {
             auto grad_host = std::get<1>(results[k]).to_host(this->get_queue());
             for (int j = 0; j < dim; ++j) {
-                this->check_val(grad_host.at(j), grad_gth.at(j), rtol, atol);
+                IS_CLOSE(float_t, grad_host.at(j), grad_gth.at(j), rtol, atol);
             }
         }
 
@@ -179,7 +181,7 @@ class logloss_spmd_test : public logloss_test<Param> {
             for (std::int64_t k = 0; k < thr_cnt; ++k) {
                 auto hessp_host = std::get<2>(results[k])[ij].to_host(this->get_queue());
                 for (std::int64_t j = 0; j < dim; ++j) {
-                    this->check_val(hessp_host.at(j), hessp_gth.at(j), rtol, atol);
+                    IS_CLOSE(float_t, hessp_host.at(j), hessp_gth.at(j), rtol, atol);
                 }
             }
         }
diff --git a/cpp/oneapi/dal/backend/primitives/optimizers/test/newton_cg_dpc.cpp b/cpp/oneapi/dal/backend/primitives/optimizers/test/newton_cg_dpc.cpp
index 914bda60f1f..62dd0140e28 100644
--- a/cpp/oneapi/dal/backend/primitives/optimizers/test/newton_cg_dpc.cpp
+++ b/cpp/oneapi/dal/backend/primitives/optimizers/test/newton_cg_dpc.cpp
@@ -25,7 +25,7 @@
 #include "oneapi/dal/backend/primitives/rng/rng_engine.hpp"
 #include <math.h>
 
-#include "oneapi/dal/backend/primitives/objective_function/logloss.hpp"
+#include "oneapi/dal/backend/primitives/objective_function.hpp"
 
 namespace oneapi::dal::backend::primitives::test {
 
diff --git a/cpp/oneapi/dal/backend/primitives/sparse_blas/handle.hpp b/cpp/oneapi/dal/backend/primitives/sparse_blas/handle.hpp
index 69ede40e9ff..86de2e8a4af 100644
--- a/cpp/oneapi/dal/backend/primitives/sparse_blas/handle.hpp
+++ b/cpp/oneapi/dal/backend/primitives/sparse_blas/handle.hpp
@@ -27,13 +27,13 @@ namespace oneapi::dal::backend::primitives {
 
 /// Handle that is used to store the information about the data in starse format
 class sparse_matrix_handle {
-    friend detail::pimpl_accessor;
+    friend dal::detail::pimpl_accessor;
 
 public:
     sparse_matrix_handle(sycl::queue& queue);
 
 private:
-    detail::pimpl<detail::sparse_matrix_handle_impl> impl_;
+    dal::detail::pimpl<dal::detail::sparse_matrix_handle_impl> impl_;
 };
 
 #endif // ONEDAL_DATA_PARALLEL
diff --git a/cpp/oneapi/dal/backend/primitives/sparse_blas/test/handle_dpc.cpp b/cpp/oneapi/dal/backend/primitives/sparse_blas/test/handle_dpc.cpp
new file mode 100644
index 00000000000..f9f32a54e89
--- /dev/null
+++ b/cpp/oneapi/dal/backend/primitives/sparse_blas/test/handle_dpc.cpp
@@ -0,0 +1,36 @@
+/*******************************************************************************
+* Copyright contributors to the oneDAL project
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#include "oneapi/dal/backend/primitives/sparse_blas.hpp"
+#include "oneapi/dal/test/engine/common.hpp"
+
+namespace oneapi::dal::backend::primitives::test {
+
+TEST("can construct sparse matrix handle") {
+    DECLARE_TEST_POLICY(policy);
+    // DPC++ Sparse BLAS from micro MKL libs is not supported on CPU
+    SKIP_IF(policy.is_cpu());
+
+    try {
+        sparse_matrix_handle h(policy.get_queue());
+    }
+    catch (...) {
+        REQUIRE(false);
+    }
+    SUCCEED();
+}
+
+} // namespace oneapi::dal::backend::primitives::test
diff --git a/cpp/oneapi/dal/test/engine/csr_table_builder.hpp b/cpp/oneapi/dal/test/engine/csr_table_builder.hpp
index 2e4656f388c..6c542fb106b 100644
--- a/cpp/oneapi/dal/test/engine/csr_table_builder.hpp
+++ b/cpp/oneapi/dal/test/engine/csr_table_builder.hpp
@@ -19,7 +19,8 @@
 
 namespace oneapi::dal::test::engine {
 
-csr_table copy_data_to_csr(const dal::array<float>& data,
+template <typename Float = float>
+csr_table copy_data_to_csr(const dal::array<Float>& data,
                            const dal::array<std::int64_t>& column_indices,
                            const dal::array<std::int64_t>& row_offsets,
                            const sparse_indexing indexing,
@@ -29,7 +30,7 @@ csr_table copy_data_to_csr(const dal::array<float>& data,
     auto data_ptr = data.get_data();
     auto col_indices_ptr = column_indices.get_data();
     auto nnz_count = row_offs_ptr[row_count] - row_offs_ptr[0];
-    const auto copied_data = dal::array<float>::empty(nnz_count);
+    const auto copied_data = dal::array<Float>::empty(nnz_count);
     const auto copied_col_indices = dal::array<std::int64_t>::empty(nnz_count);
     const auto copied_row_offsets = dal::array<std::int64_t>::empty(row_count + 1);
 
@@ -51,8 +52,9 @@ csr_table copy_data_to_csr(const dal::array<float>& data,
 }
 
 #ifdef ONEDAL_DATA_PARALLEL
+template <typename Float = float>
 csr_table copy_data_to_csr(sycl::queue& queue,
-                           const dal::array<float>& data,
+                           const dal::array<Float>& data,
                            const dal::array<std::int64_t>& column_indices,
                            const dal::array<std::int64_t>& row_offsets,
                            const sparse_indexing indexing,
@@ -60,12 +62,12 @@ csr_table copy_data_to_csr(sycl::queue& queue,
                            const std::int64_t row_count) {
     auto row_offs_ptr = row_offsets.get_data();
     auto nnz_count = row_offs_ptr[row_count] - row_offs_ptr[0];
-    const auto copied_data = dal::array<float>::empty(queue, nnz_count, sycl::usm::alloc::device);
+    const auto copied_data = dal::array<Float>::empty(queue, nnz_count, sycl::usm::alloc::device);
     const auto copied_col_indices =
         dal::array<std::int64_t>::empty(queue, nnz_count, sycl::usm::alloc::device);
     const auto copied_row_offsets =
         dal::array<std::int64_t>::empty(queue, row_count + 1, sycl::usm::alloc::device);
-    auto data_event = queue.copy<float>(data.get_data(), copied_data.get_mutable_data(), nnz_count);
+    auto data_event = queue.copy<Float>(data.get_data(), copied_data.get_mutable_data(), nnz_count);
     auto col_indices_event = queue.copy<std::int64_t>(column_indices.get_data(),
                                                       copied_col_indices.get_mutable_data(),
                                                       nnz_count);
@@ -84,8 +86,8 @@ csr_table copy_data_to_csr(sycl::queue& queue,
 /**
 * Generates random CSR table based on inputs
 */
+template <typename Float = float>
 struct csr_table_builder {
-    using Float = float;
     std::int64_t row_count_, column_count_;
     float nonzero_fraction_;
     sparse_indexing indexing_;

From 8a44abcacdf56215a2249f1e8b4422004c25c181 Mon Sep 17 00:00:00 2001
From: Maria Petrova <maria.petrova@intel.com>
Date: Tue, 30 Apr 2024 13:41:45 +0200
Subject: [PATCH 02/65] Fix a misprint in Quality Metrics for Multi-class docs
 (#2701)

---
 .../quality_metrics/default/for-multi-class-classification.rst  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/daal/algorithms/quality_metrics/default/for-multi-class-classification.rst b/docs/source/daal/algorithms/quality_metrics/default/for-multi-class-classification.rst
index 6b3177557a7..58d5157d3c4 100644
--- a/docs/source/daal/algorithms/quality_metrics/default/for-multi-class-classification.rst
+++ b/docs/source/daal/algorithms/quality_metrics/default/for-multi-class-classification.rst
@@ -47,7 +47,7 @@ Further definitions use the following notations:
    * - :math:`\text{fp}_i`
      - false positive
      - the number of observations that were incorrectly assigned to the class :math:`C_1`
-   * - :math:`\text{fn_i}`
+   * - :math:`\text{fn}_i`
      - false negative
      - the number of observations that were not recognized as belonging to the class :math:`C_1`
 

From 693898ea0cf1536fdd9401ed3a20d306516e43ca Mon Sep 17 00:00:00 2001
From: Aleksandr Solovev <aleksandr.solovev@intel.com>
Date: Tue, 30 Apr 2024 14:50:25 +0200
Subject: [PATCH 03/65] fix: online distr PCA samples (#2754)

---
 samples/oneapi/dpc/ccl/sources/pca_online_distr_ccl.cpp | 8 ++++----
 samples/oneapi/dpc/mpi/sources/pca_online_distr_mpi.cpp | 8 ++++----
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/samples/oneapi/dpc/ccl/sources/pca_online_distr_ccl.cpp b/samples/oneapi/dpc/ccl/sources/pca_online_distr_ccl.cpp
index c33c899581b..68046122b8d 100644
--- a/samples/oneapi/dpc/ccl/sources/pca_online_distr_ccl.cpp
+++ b/samples/oneapi/dpc/ccl/sources/pca_online_distr_ccl.cpp
@@ -33,7 +33,7 @@ namespace dal = oneapi::dal;
 
 void run(sycl::queue& queue) {
     const auto data_file_name = get_data_path("data/pca_normalized.csv");
-
+    const std::int64_t nBlocks = 10;
     const auto data = dal::read<dal::table>(queue, dal::csv::data_source{ data_file_name });
 
     const auto pca_desc = dal::pca::descriptor{};
@@ -45,12 +45,12 @@ void run(sycl::queue& queue) {
     auto input_vec = split_table_by_rows<float>(queue, data, rank_count);
 
     auto input_blocks = split_table_by_rows<float>(queue, input_vec[rank_id], nBlocks);
-    dal::covariance::partial_train_result<> partial_result;
+    dal::pca::partial_train_result<> partial_result;
 
     for (std::int64_t i = 0; i < nBlocks; i++) {
-        partial_result = dal::partial_train(queue, cov_desc, partial_result, input_blocks[i]);
+        partial_result = dal::partial_train(queue, pca_desc, partial_result, input_blocks[i]);
     }
-    const auto result = dal::preview::finalize_train(comm, cov_desc, partial_result);
+    const auto result = dal::preview::finalize_train(comm, pca_desc, partial_result);
 
     if (comm.get_rank() == 0) {
         std::cout << "Eigenvectors:\n" << result.get_eigenvectors() << std::endl;
diff --git a/samples/oneapi/dpc/mpi/sources/pca_online_distr_mpi.cpp b/samples/oneapi/dpc/mpi/sources/pca_online_distr_mpi.cpp
index 12d37b0a69c..2b94c5f9d97 100644
--- a/samples/oneapi/dpc/mpi/sources/pca_online_distr_mpi.cpp
+++ b/samples/oneapi/dpc/mpi/sources/pca_online_distr_mpi.cpp
@@ -33,7 +33,7 @@ namespace dal = oneapi::dal;
 
 void run(sycl::queue& queue) {
     const auto data_file_name = get_data_path("data/pca_normalized.csv");
-
+    const std::int64_t nBlocks = 10;
     const auto data = dal::read<dal::table>(queue, dal::csv::data_source{ data_file_name });
 
     const auto pca_desc = dal::pca::descriptor{};
@@ -45,12 +45,12 @@ void run(sycl::queue& queue) {
     auto input_vec = split_table_by_rows<float>(queue, data, rank_count);
 
     auto input_blocks = split_table_by_rows<float>(queue, input_vec[rank_id], nBlocks);
-    dal::covariance::partial_train_result<> partial_result;
+    dal::pca::partial_train_result<> partial_result;
 
     for (std::int64_t i = 0; i < nBlocks; i++) {
-        partial_result = dal::partial_train(queue, cov_desc, partial_result, input_blocks[i]);
+        partial_result = dal::partial_train(queue, pca_desc, partial_result, input_blocks[i]);
     }
-    const auto result = dal::preview::finalize_train(comm, cov_desc, partial_result);
+    const auto result = dal::preview::finalize_train(comm, pca_desc, partial_result);
 
     if (comm.get_rank() == 0) {
         std::cout << "Eigenvectors:\n" << result.get_eigenvectors() << std::endl;

From 8e7fc7ac6a48f80b2fa31100102e1a4b81db6848 Mon Sep 17 00:00:00 2001
From: ethanglaser <42726565+ethanglaser@users.noreply.github.com>
Date: Tue, 30 Apr 2024 06:23:17 -0700
Subject: [PATCH 04/65] FIX: distributed knn double sqrt bug (#2733)

* FIX: distributed knn double sqrt bug

* fixed

* Update cpp/oneapi/dal/algo/knn/backend/gpu/infer_kernel_impl_dpc.hpp

* Update cpp/oneapi/dal/algo/knn/backend/gpu/infer_kernel_impl_dpc.hpp
---
 .../knn/backend/gpu/infer_kernel_impl_dpc.hpp    | 16 ++++++++--------
 .../backend/gpu/infer_kernel_impl_dpc_distr.hpp  | 16 ++--------------
 2 files changed, 10 insertions(+), 22 deletions(-)

diff --git a/cpp/oneapi/dal/algo/knn/backend/gpu/infer_kernel_impl_dpc.hpp b/cpp/oneapi/dal/algo/knn/backend/gpu/infer_kernel_impl_dpc.hpp
index 9a31ef369ae..1ba10bf8737 100644
--- a/cpp/oneapi/dal/algo/knn/backend/gpu/infer_kernel_impl_dpc.hpp
+++ b/cpp/oneapi/dal/algo/knn/backend/gpu/infer_kernel_impl_dpc.hpp
@@ -286,12 +286,12 @@ class knn_callback {
 
         const auto& [first, last] = bnds;
         ONEDAL_ASSERT(last > first);
-        auto& queue = this->queue_;
 
         bk::event_vector ndeps{ deps.cbegin(), deps.cend() };
-        auto sq_event = copy_with_sqrt(queue, inp_dts, inp_dts, deps);
-        if (this->compute_sqrt_)
-            ndeps.push_back(sq_event);
+        if (this->compute_sqrt_) {
+            auto sqrt_event = copy_with_sqrt(this->queue_, inp_dts, inp_dts, deps);
+            ndeps.push_back(sqrt_event);
+        }
 
         auto out_rps = this->responses_.get_slice(first, last);
         ONEDAL_ASSERT((last - first) == out_rps.get_count());
@@ -310,12 +310,12 @@ class knn_callback {
 
         const auto& [first, last] = bnds;
         ONEDAL_ASSERT(last > first);
-        auto& queue = this->queue_;
 
         bk::event_vector ndeps{ deps.cbegin(), deps.cend() };
-        auto sq_event = copy_with_sqrt(queue, inp_dts, inp_dts, deps);
-        if (this->compute_sqrt_)
-            ndeps.push_back(sq_event);
+        if (this->compute_sqrt_) {
+            auto sqrt_event = copy_with_sqrt(this->queue_, inp_dts, inp_dts, deps);
+            ndeps.push_back(sqrt_event);
+        }
 
         auto out_rps = this->responses_.get_slice(first, last);
         ONEDAL_ASSERT((last - first) == out_rps.get_count());
diff --git a/cpp/oneapi/dal/algo/knn/backend/gpu/infer_kernel_impl_dpc_distr.hpp b/cpp/oneapi/dal/algo/knn/backend/gpu/infer_kernel_impl_dpc_distr.hpp
index e67d555616a..daf3caa9187 100644
--- a/cpp/oneapi/dal/algo/knn/backend/gpu/infer_kernel_impl_dpc_distr.hpp
+++ b/cpp/oneapi/dal/algo/knn/backend/gpu/infer_kernel_impl_dpc_distr.hpp
@@ -347,16 +347,10 @@ class knn_callback_distr {
 
         const auto& [first, last] = bnds;
         ONEDAL_ASSERT(last > first);
-        auto& queue = this->queue_;
-
-        bk::event_vector ndeps{ deps.cbegin(), deps.cend() };
-        auto sq_event = copy_with_sqrt(queue, inp_dts, inp_dts, deps);
-        if (this->compute_sqrt_)
-            ndeps.push_back(sq_event);
 
         auto out_rps = this->responses_.get_slice(first, last);
         ONEDAL_ASSERT((last - first) == out_rps.get_count());
-        return (*(this->distance_voting_))(tmp_rps, inp_dts, out_rps, ndeps);
+        return (*(this->distance_voting_))(tmp_rps, inp_dts, out_rps, deps);
     }
 
     template <typename T = Task, typename = detail::enable_if_regression_t<T>>
@@ -371,16 +365,10 @@ class knn_callback_distr {
 
         const auto& [first, last] = bnds;
         ONEDAL_ASSERT(last > first);
-        auto& queue = this->queue_;
-
-        bk::event_vector ndeps{ deps.cbegin(), deps.cend() };
-        auto sq_event = copy_with_sqrt(queue, inp_dts, inp_dts, deps);
-        if (this->compute_sqrt_)
-            ndeps.push_back(sq_event);
 
         auto out_rps = this->responses_.get_slice(first, last);
         ONEDAL_ASSERT((last - first) == out_rps.get_count());
-        return (*(this->distance_regression_))(tmp_rps, inp_dts, out_rps, ndeps);
+        return (*(this->distance_regression_))(tmp_rps, inp_dts, out_rps, deps);
     }
 
     sycl::event output_responses(const std::pair<idx_t, idx_t>& bnds,

From e1f633db952601b738d7b6d27828848408dd52c3 Mon Sep 17 00:00:00 2001
From: Aleksei Khomenko <aleksei.khomenko@intel.com>
Date: Wed, 1 May 2024 10:29:18 +0200
Subject: [PATCH 05/65] chore: add OpenSSF Scorecard GitHub Action (#2753)

---
 .github/workflows/openssf-scorecard.yml | 37 +++++++++++++++++++++++++
 README.md                               |  2 +-
 2 files changed, 38 insertions(+), 1 deletion(-)
 create mode 100644 .github/workflows/openssf-scorecard.yml

diff --git a/.github/workflows/openssf-scorecard.yml b/.github/workflows/openssf-scorecard.yml
new file mode 100644
index 00000000000..b02da1d0610
--- /dev/null
+++ b/.github/workflows/openssf-scorecard.yml
@@ -0,0 +1,37 @@
+name: OpenSSF Scorecard
+on:
+  # For Branch-Protection check. Only the default branch is supported. See
+  # https://github.com/ossf/scorecard/blob/main/docs/checks.md#branch-protection
+  branch_protection_rule:
+  # To guarantee Maintained check is occasionally updated. See
+  # https://github.com/ossf/scorecard/blob/main/docs/checks.md#maintained
+  schedule:
+    - cron: '0 21 * * 5'
+  push:
+    branches: [ "main" ]
+
+# Declare default permissions as read only.
+permissions: read-all
+
+jobs:
+  analysis:
+    name: Scorecard analysis
+    runs-on: ubuntu-latest
+    permissions:
+      # Needed to upload the results to code-scanning dashboard.
+      security-events: write
+      # Needed to publish results and get a badge (see publish_results below).
+      id-token: write
+
+    steps:
+      - name: "Checkout code"
+        uses: actions/checkout@1d96c772d19495a3b5c517cd2bc0cb401ea0529f # v4.1.3
+        with:
+          persist-credentials: false
+
+      - name: "Run analysis"
+        uses: ossf/scorecard-action@0864cf19026789058feabb7e87baa5f140aac736 # v2.3.1
+        with:
+          results_file: results.sarif
+          results_format: sarif
+          publish_results: true
diff --git a/README.md b/README.md
index e90f3028d47..25de32bd284 100644
--- a/README.md
+++ b/README.md
@@ -19,7 +19,7 @@
 
 [Installation](#installation)&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;[Documentation](#documentation)&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;[Support](#support)&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;[Examples](#examples)&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;[How to Contribute](CONTRIBUTING.md)&nbsp;&nbsp;&nbsp;
 
-[![Build Status](https://dev.azure.com/daal/DAAL/_apis/build/status/oneapi-src.oneDAL?branchName=main)](https://dev.azure.com/daal/DAAL/_build/latest?definitionId=5&branchName=main) [![License](https://img.shields.io/github/license/oneapi-src/oneDAL.svg)](https://github.com/oneapi-src/oneDAL/blob/main/LICENSE) [![Join the community on GitHub Discussions](https://badgen.net/badge/join%20the%20discussion/on%20github/black?icon=github)](https://github.com/oneapi-src/oneDAL/discussions)
+[![Build Status](https://dev.azure.com/daal/DAAL/_apis/build/status/oneapi-src.oneDAL?branchName=main)](https://dev.azure.com/daal/DAAL/_build/latest?definitionId=5&branchName=main) [![License](https://img.shields.io/github/license/oneapi-src/oneDAL.svg)](https://github.com/oneapi-src/oneDAL/blob/main/LICENSE) [![OpenSSF Best Practices](https://www.bestpractices.dev/projects/8859/badge)](https://www.bestpractices.dev/projects/8859) [![Join the community on GitHub Discussions](https://badgen.net/badge/join%20the%20discussion/on%20github/black?icon=github)](https://github.com/oneapi-src/oneDAL/discussions)
 
 oneAPI Data Analytics Library (oneDAL) is a powerful machine learning library that helps you accelerate big data analysis at all stages: **preprocessing**, **transformation**, **analysis**, **modeling**, **validation**, and **decision making**.
 

From 38d1ebf32130ad3a56442894885d2f36331a628d Mon Sep 17 00:00:00 2001
From: Keeran Rothenfusser <141222236+keeranroth@users.noreply.github.com>
Date: Thu, 2 May 2024 18:47:41 +0200
Subject: [PATCH 06/65] CI fixes in OpenBLAS builds (#2762)

The riscv64 and x86_64 OpenBLAS builds have been failing. For the riscv64
build, the location of the qemu debian package changed. We update the
URL to obtain, as well as making sure that failing to obtain the right
QEMU will cause an error in CI.

For the x86_64 build, illegal instructions are being reported, which I
can't reproduce locally. My intuition is that there is something wrong
with the cached build, so we update the key for the build. The naming in
the key now matches that of the aarch64 and riscv64 builds.
---
 .ci/env/apt.sh      | 6 ++++--
 .ci/pipeline/ci.yml | 2 +-
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/.ci/env/apt.sh b/.ci/env/apt.sh
index 4b8c2edb80c..a9aefe5ecae 100755
--- a/.ci/env/apt.sh
+++ b/.ci/env/apt.sh
@@ -63,10 +63,12 @@ function install_qemu_emulation_apt {
 }
 
 function install_qemu_emulation_deb {
-    qemu_deb=qemu-user-static_8.2.2+ds-2+b1_amd64.deb
-    wget http://ftp.de.debian.org/debian/pool/main/q/qemu/${qemu_deb}
+    qemu_deb=qemu-user-static_8.2.3+ds-2_amd64.deb
+    set -eo pipefail
+    wget http://ftp.debian.org/debian/pool/main/q/qemu/${qemu_deb}
     sudo dpkg -i ${qemu_deb}
     sudo systemctl restart systemd-binfmt.service
+    set +eo pipefail
 }
 
 function install_llvm_version {
diff --git a/.ci/pipeline/ci.yml b/.ci/pipeline/ci.yml
index 9b33760202d..02fd1c90f87 100755
--- a/.ci/pipeline/ci.yml
+++ b/.ci/pipeline/ci.yml
@@ -363,7 +363,7 @@ jobs:
     displayName: 'System info'
   - task: Cache@2
     inputs:
-      key: '"gnu" | "x86_64" | "openblas" | "$(OPENBLAS_VERSION)" | "$(VM_IMAGE)"'
+      key: '"gcc" | "x86_64" | "openblas" | "$(OPENBLAS_VERSION)" | "$(VM_IMAGE)"'
       path: $(OPENBLAS_CACHE_DIR)
       cacheHitVar: OPENBLAS_RESTORED
   - script: |

From 764c1ba213a2f83340c33f1ccb2f6a71e9f7ea63 Mon Sep 17 00:00:00 2001
From: Mariia Vtiurina <mariia.vtiurina@intel.com>
Date: Mon, 6 May 2024 10:58:38 -0400
Subject: [PATCH 07/65] update version to 2024.5.0 (#2730)

---
 deploy/pkg-config/pkg-config.tpl           | 2 +-
 dev/bazel/config/config.bzl                | 2 +-
 docs/doxygen/doxygen_conf_cpp.txt          | 2 +-
 docs/doxygen/doxygen_conf_cpp_examples.txt | 2 +-
 docs/doxygen/doxygen_conf_cpp_web.txt      | 2 +-
 makefile.ver                               | 2 +-
 6 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/deploy/pkg-config/pkg-config.tpl b/deploy/pkg-config/pkg-config.tpl
index 520bde0a9bf..ad6dfefe07c 100755
--- a/deploy/pkg-config/pkg-config.tpl
+++ b/deploy/pkg-config/pkg-config.tpl
@@ -22,7 +22,7 @@ includedir=${{prefix}}/include
 #info
 Name: oneDAL
 Description: Intel(R) oneAPI Data Analytics Library
-Version: 2024.4
+Version: 2024.5
 URL: https://www.intel.com/content/www/us/en/developer/tools/oneapi/onedal.html
 #Link line
 Libs: {libs}
diff --git a/dev/bazel/config/config.bzl b/dev/bazel/config/config.bzl
index 658e9c0704a..e096130f837 100644
--- a/dev/bazel/config/config.bzl
+++ b/dev/bazel/config/config.bzl
@@ -210,7 +210,7 @@ def _declare_onedal_config_impl(repo_ctx):
         substitutions = {
             "%{auto_cpu}":         auto_cpu,
             "%{version_major}":    "2024",
-            "%{version_minor}":    "4",
+            "%{version_minor}":    "5",
             "%{version_update}":   "0",
             "%{version_build}":    utils.datestamp(repo_ctx),
             "%{version_buildrev}": "work",
diff --git a/docs/doxygen/doxygen_conf_cpp.txt b/docs/doxygen/doxygen_conf_cpp.txt
index 9b71b554edc..e3851064cfa 100644
--- a/docs/doxygen/doxygen_conf_cpp.txt
+++ b/docs/doxygen/doxygen_conf_cpp.txt
@@ -38,7 +38,7 @@ PROJECT_NAME           = "C++ API Reference for Intel(R) oneAPI Data Analytics L
 # could be handy for archiving the generated documentation or if some version
 # control system is used.
 
-PROJECT_NUMBER         = "2024.4"
+PROJECT_NUMBER         = "2024.5"
 
 # Using the PROJECT_BRIEF tag one can provide an optional one line description
 # for a project that appears at the top of each page and should give viewer a
diff --git a/docs/doxygen/doxygen_conf_cpp_examples.txt b/docs/doxygen/doxygen_conf_cpp_examples.txt
index 5eb3161865b..124e9c67e91 100644
--- a/docs/doxygen/doxygen_conf_cpp_examples.txt
+++ b/docs/doxygen/doxygen_conf_cpp_examples.txt
@@ -38,7 +38,7 @@ PROJECT_NAME           = "C++ API Reference for Intel(R) oneAPI Data Analytics L
 # could be handy for archiving the generated documentation or if some version
 # control system is used.
 
-PROJECT_NUMBER         = "2024.4"
+PROJECT_NUMBER         = "2024.5"
 
 # Using the PROJECT_BRIEF tag one can provide an optional one line description
 # for a project that appears at the top of each page and should give viewer a
diff --git a/docs/doxygen/doxygen_conf_cpp_web.txt b/docs/doxygen/doxygen_conf_cpp_web.txt
index 7e06b3c224f..24f72f030aa 100644
--- a/docs/doxygen/doxygen_conf_cpp_web.txt
+++ b/docs/doxygen/doxygen_conf_cpp_web.txt
@@ -38,7 +38,7 @@ PROJECT_NAME           = "C++ API Reference for Intel(R) oneAPI Data Analytics L
 # could be handy for archiving the generated documentation or if some version
 # control system is used.
 
-PROJECT_NUMBER         = "2024.4"
+PROJECT_NUMBER         = "2024.5"
 
 # Using the PROJECT_BRIEF tag one can provide an optional one line description
 # for a project that appears at the top of each page and should give viewer a
diff --git a/makefile.ver b/makefile.ver
index 45b346d284f..38352e3d048 100644
--- a/makefile.ver
+++ b/makefile.ver
@@ -15,7 +15,7 @@
 #===============================================================================
 
 MAJOR   =       2024
-MINOR   =       4
+MINOR   =       5
 UPDATE  =       0
 BUILD   =       $(shell date +'%Y%m%d')
 STATUS  =       P

From 21bc18334105dc8cb6dabe05f0d92b2e5271f4f2 Mon Sep 17 00:00:00 2001
From: Ian Faust <icfaust@gmail.com>
Date: Tue, 7 May 2024 10:20:16 +0200
Subject: [PATCH 08/65] [enh] add functors for infinite and NaN checking in DAL
 (#2670)

* Update functors.hpp

* Update functors.hpp

* clang-formatting

* switch to logical_or

* save some CI time

* fix math in isinf functor

* Update functors.hpp

* Apply suggestions from code review

Co-authored-by: Victoriya Fedotova <viktoria.nn@gmail.com>

* Update functors.hpp

* Update functors.hpp

* Follow type rules

* add isinfornan, logical_or and isinf to testing

* Update functors.hpp

* Update functors.hpp

* add instantiations|

* formatting

* isnan -> isinf

* remove duplication

* add missing INSTANTIATE_FLOAT calls

* broken tests

* fix naming issues

* code factor doesn't like me

* float -> float_t

* clang-formatting and making consistent

* fix error in tests

* fixes for bazel testing

* formatting

* fixing issues in bazel testing

* switch to this

* clang-format

* spelling error

* fix issues with available testing

* get_data?

* infinity -> infinity()

* remove get_data

* another attempt

* fix testing issues

* clang format

* naming update

* using GENERATE

* mistake ifnan -> infnan

* ifnan -> infnan

* add infval/maxval to CAPTURE

* attempts at fixing bazel test failures

* find out which place

* fix ternary argument

* is override_init misnamed??

* return to stride

* test modification

* use fill_diag due to striding

* accomodate weirdness in the dataframe generation

* float change

* checking cw

* another try

* updates

* unused variable

* tests now pass, readd inf at 0

* formatting

* Update reduction_rm_random_dpc.cpp

* Update reduction_rm_random_dpc.cpp

* clang formatting

* formatting change

* change ordering

* fix

---------

Co-authored-by: Victoriya Fedotova <viktoria.nn@gmail.com>
---
 .../backend/primitives/reduction/functors.hpp |  39 +++++
 .../primitives/reduction/reduction_1d_dpc.cpp |   3 +
 .../primitives/reduction/reduction_dpc.cpp    |   3 +
 .../reduction/reduction_rm_cw_atomic_dpc.cpp  |   3 +
 .../reduction_rm_cw_blocking_dpc.cpp          |   3 +
 .../reduction/reduction_rm_cw_naive_dpc.cpp   |   3 +
 .../reduction_rm_cw_naive_local_dpc.cpp       |   3 +
 .../reduction/reduction_rm_cw_wrapper_dpc.cpp |   3 +
 .../reduction_rm_rw_blocking_dpc.cpp          |   3 +
 .../reduction/reduction_rm_rw_narrow_dpc.cpp  |   3 +
 .../reduction/reduction_rm_rw_wide_dpc.cpp    |   3 +
 .../reduction/reduction_rm_rw_wrapper_dpc.cpp |   3 +
 .../reduction/test/reduction_1d_dpc.cpp       |  76 +++++++++-
 .../reduction/test/reduction_dpc.cpp          | 103 +++++++++++++-
 .../test/reduction_rm_random_dpc.cpp          | 134 +++++++++++++++++-
 .../test/reduction_rm_uniform_dpc.cpp         |  23 ++-
 16 files changed, 400 insertions(+), 8 deletions(-)

diff --git a/cpp/oneapi/dal/backend/primitives/reduction/functors.hpp b/cpp/oneapi/dal/backend/primitives/reduction/functors.hpp
index 0da7340d14c..bccb6ade072 100644
--- a/cpp/oneapi/dal/backend/primitives/reduction/functors.hpp
+++ b/cpp/oneapi/dal/backend/primitives/reduction/functors.hpp
@@ -51,6 +51,30 @@ struct square {
     }
 };
 
+template <typename T>
+struct isinfornan {
+    using tag_t = reduce_unary_op_tag;
+    bool operator()(const T& arg) const {
+#ifdef ONEDAL_DATA_PARALLEL
+        return static_cast<T>(sycl::isinf(arg) || sycl::isnan(arg));
+#else
+        return static_cast<T>(isinf(arg) || (arg != arg));
+#endif
+    }
+};
+
+template <typename T>
+struct isinf {
+    using tag_t = reduce_unary_op_tag;
+    bool operator()(const T& arg) const {
+#ifdef ONEDAL_DATA_PARALLEL
+        return static_cast<T>(sycl::isinf(arg));
+#else
+        return static_cast<T>(isinf(arg));
+#endif
+    }
+};
+
 struct reduce_binary_op_tag;
 
 template <typename T>
@@ -99,6 +123,21 @@ struct min {
     }
 };
 
+template <typename T>
+struct logical_or {
+    using tag_t = reduce_binary_op_tag;
+    constexpr static inline T init_value = false;
+#ifdef ONEDAL_DATA_PARALLEL
+    constexpr static inline sycl::logical_or<T> native{};
+#else
+    constexpr static inline std::logical_or<T> native{};
+};
+#endif
+    T operator()(const T& a, const T& b) const {
+        return native(a, b);
+    }
+};
+
 template <typename Float, typename BinaryOp>
 constexpr bool is_typed_sum_op_v = std::is_same_v<sum<Float>, BinaryOp>;
 
diff --git a/cpp/oneapi/dal/backend/primitives/reduction/reduction_1d_dpc.cpp b/cpp/oneapi/dal/backend/primitives/reduction/reduction_1d_dpc.cpp
index 152121afdcb..b4d6d73c629 100644
--- a/cpp/oneapi/dal/backend/primitives/reduction/reduction_1d_dpc.cpp
+++ b/cpp/oneapi/dal/backend/primitives/reduction/reduction_1d_dpc.cpp
@@ -88,6 +88,9 @@ INSTANTIATE_FLOAT(sum, identity)
 INSTANTIATE_FLOAT(sum, abs)
 INSTANTIATE_FLOAT(sum, square)
 
+INSTANTIATE_FLOAT(logical_or, isinfornan)
+INSTANTIATE_FLOAT(logical_or, isinf)
+
 #undef INSTANTIATE_FLOAT
 
 #undef INSTANTIATE_LAYOUT
diff --git a/cpp/oneapi/dal/backend/primitives/reduction/reduction_dpc.cpp b/cpp/oneapi/dal/backend/primitives/reduction/reduction_dpc.cpp
index 7e1251cb915..da2ac3f13e2 100644
--- a/cpp/oneapi/dal/backend/primitives/reduction/reduction_dpc.cpp
+++ b/cpp/oneapi/dal/backend/primitives/reduction/reduction_dpc.cpp
@@ -215,6 +215,9 @@ INSTANTIATE_FLOAT(sum, identity)
 INSTANTIATE_FLOAT(sum, abs)
 INSTANTIATE_FLOAT(sum, square)
 
+INSTANTIATE_FLOAT(logical_or, isinfornan)
+INSTANTIATE_FLOAT(logical_or, isinf)
+
 #undef INSTANTIATE_FLOAT
 
 #undef INSTANTIATE_LAYOUT
diff --git a/cpp/oneapi/dal/backend/primitives/reduction/reduction_rm_cw_atomic_dpc.cpp b/cpp/oneapi/dal/backend/primitives/reduction/reduction_rm_cw_atomic_dpc.cpp
index 05d19df5f45..a373b161911 100644
--- a/cpp/oneapi/dal/backend/primitives/reduction/reduction_rm_cw_atomic_dpc.cpp
+++ b/cpp/oneapi/dal/backend/primitives/reduction/reduction_rm_cw_atomic_dpc.cpp
@@ -196,6 +196,9 @@ INSTANTIATE_FLOAT(sum, identity)
 INSTANTIATE_FLOAT(sum, abs)
 INSTANTIATE_FLOAT(sum, square)
 
+INSTANTIATE_FLOAT(logical_or, isinfornan)
+INSTANTIATE_FLOAT(logical_or, isinf)
+
 #undef INSTANTIATE_FLOAT
 
 #undef INSTANTIATE
diff --git a/cpp/oneapi/dal/backend/primitives/reduction/reduction_rm_cw_blocking_dpc.cpp b/cpp/oneapi/dal/backend/primitives/reduction/reduction_rm_cw_blocking_dpc.cpp
index 4c5b93598d7..75414e4d98d 100644
--- a/cpp/oneapi/dal/backend/primitives/reduction/reduction_rm_cw_blocking_dpc.cpp
+++ b/cpp/oneapi/dal/backend/primitives/reduction/reduction_rm_cw_blocking_dpc.cpp
@@ -116,6 +116,9 @@ INSTANTIATE_FLOAT(sum, identity)
 INSTANTIATE_FLOAT(sum, abs)
 INSTANTIATE_FLOAT(sum, square)
 
+INSTANTIATE_FLOAT(logical_or, isinfornan)
+INSTANTIATE_FLOAT(logical_or, isinf)
+
 #undef INSTANTIATE_FLOAT
 
 #undef INSTANTIATE
diff --git a/cpp/oneapi/dal/backend/primitives/reduction/reduction_rm_cw_naive_dpc.cpp b/cpp/oneapi/dal/backend/primitives/reduction/reduction_rm_cw_naive_dpc.cpp
index 2d4420a9232..8449211779c 100644
--- a/cpp/oneapi/dal/backend/primitives/reduction/reduction_rm_cw_naive_dpc.cpp
+++ b/cpp/oneapi/dal/backend/primitives/reduction/reduction_rm_cw_naive_dpc.cpp
@@ -157,6 +157,9 @@ INSTANTIATE_FLOAT(sum, identity)
 INSTANTIATE_FLOAT(sum, abs)
 INSTANTIATE_FLOAT(sum, square)
 
+INSTANTIATE_FLOAT(logical_or, isinfornan)
+INSTANTIATE_FLOAT(logical_or, isinf)
+
 #undef INSTANTIATE_FLOAT
 
 #undef INSTANTIATE
diff --git a/cpp/oneapi/dal/backend/primitives/reduction/reduction_rm_cw_naive_local_dpc.cpp b/cpp/oneapi/dal/backend/primitives/reduction/reduction_rm_cw_naive_local_dpc.cpp
index 2e9efed192a..20a31d62ea6 100644
--- a/cpp/oneapi/dal/backend/primitives/reduction/reduction_rm_cw_naive_local_dpc.cpp
+++ b/cpp/oneapi/dal/backend/primitives/reduction/reduction_rm_cw_naive_local_dpc.cpp
@@ -196,6 +196,9 @@ INSTANTIATE_FLOAT(sum, identity)
 INSTANTIATE_FLOAT(sum, abs)
 INSTANTIATE_FLOAT(sum, square)
 
+INSTANTIATE_FLOAT(logical_or, isinfornan)
+INSTANTIATE_FLOAT(logical_or, isinf)
+
 #undef INSTANTIATE_FLOAT
 
 #undef INSTANTIATE
diff --git a/cpp/oneapi/dal/backend/primitives/reduction/reduction_rm_cw_wrapper_dpc.cpp b/cpp/oneapi/dal/backend/primitives/reduction/reduction_rm_cw_wrapper_dpc.cpp
index b6d25e479c7..71fb1d21b5e 100644
--- a/cpp/oneapi/dal/backend/primitives/reduction/reduction_rm_cw_wrapper_dpc.cpp
+++ b/cpp/oneapi/dal/backend/primitives/reduction/reduction_rm_cw_wrapper_dpc.cpp
@@ -136,6 +136,9 @@ INSTANTIATE_FLOAT(sum, identity)
 INSTANTIATE_FLOAT(sum, abs)
 INSTANTIATE_FLOAT(sum, square)
 
+INSTANTIATE_FLOAT(logical_or, isinfornan)
+INSTANTIATE_FLOAT(logical_or, isinf)
+
 #undef INSTANTIATE_FLOAT
 
 #undef INSTANTIATE
diff --git a/cpp/oneapi/dal/backend/primitives/reduction/reduction_rm_rw_blocking_dpc.cpp b/cpp/oneapi/dal/backend/primitives/reduction/reduction_rm_rw_blocking_dpc.cpp
index 96cd9d05da9..25f2befe449 100644
--- a/cpp/oneapi/dal/backend/primitives/reduction/reduction_rm_rw_blocking_dpc.cpp
+++ b/cpp/oneapi/dal/backend/primitives/reduction/reduction_rm_rw_blocking_dpc.cpp
@@ -117,6 +117,9 @@ INSTANTIATE_FLOAT(sum, identity)
 INSTANTIATE_FLOAT(sum, abs)
 INSTANTIATE_FLOAT(sum, square)
 
+INSTANTIATE_FLOAT(logical_or, isinfornan)
+INSTANTIATE_FLOAT(logical_or, isinf)
+
 #undef INSTANTIATE_FLOAT
 
 } // namespace oneapi::dal::backend::primitives
diff --git a/cpp/oneapi/dal/backend/primitives/reduction/reduction_rm_rw_narrow_dpc.cpp b/cpp/oneapi/dal/backend/primitives/reduction/reduction_rm_rw_narrow_dpc.cpp
index d99775b9ad4..8e39c040400 100644
--- a/cpp/oneapi/dal/backend/primitives/reduction/reduction_rm_rw_narrow_dpc.cpp
+++ b/cpp/oneapi/dal/backend/primitives/reduction/reduction_rm_rw_narrow_dpc.cpp
@@ -159,6 +159,9 @@ INSTANTIATE_FLOAT(sum, identity)
 INSTANTIATE_FLOAT(sum, abs)
 INSTANTIATE_FLOAT(sum, square)
 
+INSTANTIATE_FLOAT(logical_or, isinfornan)
+INSTANTIATE_FLOAT(logical_or, isinf)
+
 #undef INSTANTIATE_FLOAT
 
 #undef INSTANTIATE
diff --git a/cpp/oneapi/dal/backend/primitives/reduction/reduction_rm_rw_wide_dpc.cpp b/cpp/oneapi/dal/backend/primitives/reduction/reduction_rm_rw_wide_dpc.cpp
index f6e35aaa896..39b106357a6 100644
--- a/cpp/oneapi/dal/backend/primitives/reduction/reduction_rm_rw_wide_dpc.cpp
+++ b/cpp/oneapi/dal/backend/primitives/reduction/reduction_rm_rw_wide_dpc.cpp
@@ -156,6 +156,9 @@ INSTANTIATE_FLOAT(sum, identity)
 INSTANTIATE_FLOAT(sum, abs)
 INSTANTIATE_FLOAT(sum, square)
 
+INSTANTIATE_FLOAT(logical_or, isinfornan)
+INSTANTIATE_FLOAT(logical_or, isinf)
+
 #undef INSTANTIATE_FLOAT
 
 } // namespace oneapi::dal::backend::primitives
diff --git a/cpp/oneapi/dal/backend/primitives/reduction/reduction_rm_rw_wrapper_dpc.cpp b/cpp/oneapi/dal/backend/primitives/reduction/reduction_rm_rw_wrapper_dpc.cpp
index 057899731f5..e0e8c7f40d9 100644
--- a/cpp/oneapi/dal/backend/primitives/reduction/reduction_rm_rw_wrapper_dpc.cpp
+++ b/cpp/oneapi/dal/backend/primitives/reduction/reduction_rm_rw_wrapper_dpc.cpp
@@ -132,6 +132,9 @@ INSTANTIATE_FLOAT(sum, identity)
 INSTANTIATE_FLOAT(sum, abs)
 INSTANTIATE_FLOAT(sum, square)
 
+INSTANTIATE_FLOAT(logical_or, isinfornan)
+INSTANTIATE_FLOAT(logical_or, isinf)
+
 #undef INSTANTIATE_FLOAT
 
 #undef INSTANTIATE
diff --git a/cpp/oneapi/dal/backend/primitives/reduction/test/reduction_1d_dpc.cpp b/cpp/oneapi/dal/backend/primitives/reduction/test/reduction_1d_dpc.cpp
index f2d645ce742..1e9b76e7548 100644
--- a/cpp/oneapi/dal/backend/primitives/reduction/test/reduction_1d_dpc.cpp
+++ b/cpp/oneapi/dal/backend/primitives/reduction/test/reduction_1d_dpc.cpp
@@ -16,6 +16,7 @@
 
 #include <array>
 #include <cmath>
+#include <limits>
 #include <type_traits>
 
 #include "oneapi/dal/test/engine/common.hpp"
@@ -34,6 +35,11 @@ namespace pr = oneapi::dal::backend::primitives;
 using reduction_types = std::tuple<std::tuple<float, sum<float>, square<float>>,
                                    std::tuple<double, sum<double>, square<double>>>;
 
+using finiteness_types = std::tuple<std::tuple<float, sum<float>, identity<float>>,
+                                    std::tuple<double, sum<double>, identity<double>>,
+                                    std::tuple<float, logical_or<float>, isinfornan<float>>,
+                                    std::tuple<double, logical_or<double>, isinfornan<double>>>;
+
 template <typename Param>
 class reduction_test_random_1d : public te::float_algo_fixture<std::tuple_element_t<0, Param>> {
 public:
@@ -87,7 +93,7 @@ class reduction_test_random_1d : public te::float_algo_fixture<std::tuple_elemen
         SUCCEED();
     }
 
-private:
+protected:
     table input_table_;
     const binary_t binary_{};
     const unary_t unary_{};
@@ -103,4 +109,72 @@ TEMPLATE_LIST_TEST_M(reduction_test_random_1d,
     this->test_1d_reduce();
 }
 
+template <typename Param>
+class infinite_sum_test_random_1d : public reduction_test_random_1d<Param> {
+public:
+    using float_t = std::tuple_element_t<0, Param>;
+    using binary_t = std::tuple_element_t<1, Param>;
+    using unary_t = std::tuple_element_t<2, Param>;
+
+    void generate(bool maxval) {
+        this->n_ = GENERATE(17, 999, 1, 5, 1001);
+        CAPTURE(this->n_, maxval);
+        generate_input(maxval);
+    }
+
+    void generate_input(bool maxval) {
+        double mininp = 0.9 * (double)maxval * std::numeric_limits<double>::max() - 1.0f;
+        double maxinp = (double)maxval * std::numeric_limits<double>::max();
+        const auto train_dataframe =
+            GENERATE_DATAFRAME(te::dataframe_builder{ 1, this->n_ }.fill_uniform(mininp, maxinp));
+        this->input_table_ = train_dataframe.get_table(this->get_homogen_table_id());
+    }
+};
+
+TEMPLATE_LIST_TEST_M(infinite_sum_test_random_1d,
+                     "Randomly filled array with infinite sum",
+                     "[reduction][1d][small]",
+                     finiteness_types) {
+    SKIP_IF(this->not_float64_friendly());
+
+    const bool use_infnan = GENERATE(0, 1);
+    this->generate(use_infnan);
+    this->test_1d_reduce();
+}
+
+template <typename Param>
+class single_infinite_test_random_1d : public reduction_test_random_1d<Param> {
+public:
+    using float_t = std::tuple_element_t<0, Param>;
+    using binary_t = std::tuple_element_t<1, Param>;
+    using unary_t = std::tuple_element_t<2, Param>;
+
+    void generate(bool infval) {
+        this->n_ = GENERATE(17, 999, 1, 5, 1001);
+        CAPTURE(this->n_, infval);
+        generate_input(infval);
+    }
+
+    void generate_input(bool infval) {
+        const auto train_dataframe =
+            GENERATE_DATAFRAME(te::dataframe_builder{ 1, this->n_ }.fill_uniform(-0.2, 0.5));
+        auto train_data = train_dataframe.get_array().get_mutable_data();
+        // train_data is a float ndarray
+        train_data[5] = infval ? std::numeric_limits<float>::infinity()
+                               : std::numeric_limits<float>::quiet_NaN();
+        this->input_table_ = train_dataframe.get_table(this->get_homogen_table_id());
+    }
+};
+
+TEMPLATE_LIST_TEST_M(single_infinite_test_random_1d,
+                     "Randomly filled array with a single inf or nan",
+                     "[reduction][1d][small]",
+                     finiteness_types) {
+    SKIP_IF(this->not_float64_friendly());
+
+    const bool use_infnan = GENERATE(0, 1);
+    this->generate(use_infnan);
+    this->test_1d_reduce();
+}
+
 } // namespace oneapi::dal::backend::primitives::test
diff --git a/cpp/oneapi/dal/backend/primitives/reduction/test/reduction_dpc.cpp b/cpp/oneapi/dal/backend/primitives/reduction/test/reduction_dpc.cpp
index a233517ef78..da3def6c6b3 100644
--- a/cpp/oneapi/dal/backend/primitives/reduction/test/reduction_dpc.cpp
+++ b/cpp/oneapi/dal/backend/primitives/reduction/test/reduction_dpc.cpp
@@ -16,6 +16,7 @@
 
 #include <array>
 #include <cmath>
+#include <limits>
 #include <type_traits>
 
 #include "oneapi/dal/test/engine/common.hpp"
@@ -34,6 +35,11 @@ namespace pr = oneapi::dal::backend::primitives;
 using reduction_types = std::tuple<std::tuple<float, sum<float>, square<float>>,
                                    std::tuple<double, sum<double>, square<double>>>;
 
+using finiteness_types = std::tuple<std::tuple<float, sum<float>, identity<float>>,
+                                    std::tuple<double, sum<double>, identity<double>>,
+                                    std::tuple<float, logical_or<float>, isinfornan<float>>,
+                                    std::tuple<double, logical_or<double>, isinfornan<double>>>;
+
 template <typename Param>
 class reduction_test_random : public te::float_algo_fixture<std::tuple_element_t<0, Param>> {
 public:
@@ -262,7 +268,7 @@ class reduction_test_random : public te::float_algo_fixture<std::tuple_element_t
         check_output_cm_rw(host_output);
     }
 
-private:
+protected:
     const binary_t binary_{};
     const unary_t unary_{};
     std::int64_t height_;
@@ -284,4 +290,99 @@ TEMPLATE_LIST_TEST_M(reduction_test_random,
     this->test_cm_rw_reduce();
 }
 
+template <typename Param>
+class infinite_sum_test_random : public reduction_test_random<Param> {
+public:
+    using float_t = std::tuple_element_t<0, Param>;
+    using binary_t = std::tuple_element_t<1, Param>;
+    using unary_t = std::tuple_element_t<2, Param>;
+
+    void generate(bool maxval) {
+        this->height_ = GENERATE(17, 999, 1, 5, 1001);
+        this->width_ = GENERATE(7, 707, 1, 251, 5);
+        this->override_init_ = true; // poorly named variable
+        CAPTURE(this->override_init_, this->width_, this->height_, maxval);
+        generate_input(maxval);
+        this->generate_offset();
+    }
+
+    void generate_input(bool maxval) {
+        double mininp = 0.9 * (double)maxval * std::numeric_limits<double>::max() - 1.0f;
+        double maxinp = (double)maxval * std::numeric_limits<double>::max();
+        const auto train_dataframe = GENERATE_DATAFRAME(
+            te::dataframe_builder{ this->height_, this->width_ }.fill_uniform(mininp, maxinp));
+        this->input_table_ = train_dataframe.get_table(this->get_homogen_table_id());
+    }
+};
+
+TEMPLATE_LIST_TEST_M(infinite_sum_test_random,
+                     "Randomly filled reduction with infinte sum",
+                     "[reduction][rm][small]",
+                     finiteness_types) {
+    SKIP_IF(this->not_float64_friendly());
+    const bool use_infnan = GENERATE(0, 1);
+    this->generate(use_infnan);
+    SECTION("Reduce Row-Major by Rows") {
+        this->test_rm_rw_reduce();
+    }
+    SECTION("Reduce Row-Major by Cols") {
+        this->test_rm_cw_reduce();
+    }
+    SECTION("Reduce Col-Major by Rows") {
+        this->test_cm_cw_reduce();
+    }
+    SECTION("Reduce Row-Major by Cols") {
+        this->test_cm_rw_reduce();
+    }
+}
+
+template <typename Param>
+class single_infinite_test_random : public reduction_test_random<Param> {
+public:
+    using float_t = std::tuple_element_t<0, Param>;
+    using binary_t = std::tuple_element_t<1, Param>;
+    using unary_t = std::tuple_element_t<2, Param>;
+
+    void generate(bool infval) {
+        this->height_ = GENERATE(17, 999, 1, 5, 1001);
+        this->width_ = GENERATE(7, 707, 1, 251, 5);
+        this->override_init_ = true; // poorly named variable
+        CAPTURE(this->override_init_, this->width_, this->height_, infval);
+        generate_input(infval);
+        this->generate_offset();
+    }
+
+    void generate_input(bool infval) {
+        const auto train_dataframe = GENERATE_DATAFRAME(
+            te::dataframe_builder{ this->height_, this->width_ }.fill_uniform(-3.0, 4.0));
+        auto train_data = train_dataframe.get_array().get_mutable_data();
+
+        // train_data is a float array
+        train_data[5] = infval ? std::numeric_limits<float>::infinity()
+                               : std::numeric_limits<float>::quiet_NaN();
+        this->input_table_ = train_dataframe.get_table(this->get_homogen_table_id());
+    }
+};
+
+TEMPLATE_LIST_TEST_M(single_infinite_test_random,
+                     "Randomly filled reduction with single inf or nan",
+                     "[reduction][rm][small]",
+                     finiteness_types) {
+    SKIP_IF(this->not_float64_friendly());
+    const bool use_infnan = GENERATE(0, 1);
+    this->generate(use_infnan);
+    SECTION("Reduce Row-Major by Rows") {
+        this->test_rm_rw_reduce();
+    }
+    SECTION("Reduce Row-Major by Cols") {
+        this->test_rm_cw_reduce();
+    }
+    SECTION("Reduce Col-Major by Rows") {
+        this->test_cm_cw_reduce();
+    }
+    SECTION("Reduce Col-Major by Cols") {
+        this->test_cm_rw_reduce();
+    }
+}
+
 } // namespace oneapi::dal::backend::primitives::test
diff --git a/cpp/oneapi/dal/backend/primitives/reduction/test/reduction_rm_random_dpc.cpp b/cpp/oneapi/dal/backend/primitives/reduction/test/reduction_rm_random_dpc.cpp
index 3d7ca3ce8f3..b443a284eac 100644
--- a/cpp/oneapi/dal/backend/primitives/reduction/test/reduction_rm_random_dpc.cpp
+++ b/cpp/oneapi/dal/backend/primitives/reduction/test/reduction_rm_random_dpc.cpp
@@ -16,6 +16,7 @@
 
 #include <array>
 #include <cmath>
+#include <limits>
 #include <type_traits>
 
 #include "oneapi/dal/test/engine/common.hpp"
@@ -45,6 +46,13 @@ using reduction_types = std::tuple<std::tuple<float, sum<float>, identity<float>
                                    std::tuple<double, max<double>, identity<double>>,
                                    std::tuple<double, min<double>, identity<double>>>;
 
+using finiteness_types = std::tuple<std::tuple<float, sum<float>, identity<float>>,
+                                    std::tuple<double, sum<double>, identity<double>>,
+                                    std::tuple<float, logical_or<float>, isinfornan<float>>,
+                                    std::tuple<float, logical_or<float>, isinf<float>>,
+                                    std::tuple<double, logical_or<double>, isinfornan<double>>,
+                                    std::tuple<double, logical_or<double>, isinf<double>>>;
+
 template <typename Param>
 class reduction_rm_test_random : public te::float_algo_fixture<std::tuple_element_t<0, Param>> {
 public:
@@ -98,7 +106,8 @@ class reduction_rm_test_random : public te::float_algo_fixture<std::tuple_elemen
         auto res = array<float_t>::full(width_, binary_.init_value);
         auto* res_ptr = res.get_mutable_data();
         for (std::int64_t j = 0; j < height_; ++j) {
-            const auto row_acc = row_accessor<const float_t>{ input_table_ }.pull({ j, j + 1 });
+            //input_table_ is a float ndarray
+            const auto row_acc = row_accessor<const float>{ input_table_ }.pull({ j, j + 1 });
             for (std::int64_t i = 0; i < width_; ++i) {
                 const auto val = row_acc[i];
                 res_ptr[i] = binary_(res_ptr[i], unary_(val));
@@ -111,7 +120,7 @@ class reduction_rm_test_random : public te::float_algo_fixture<std::tuple_elemen
         auto res = array<float_t>::full(height_, binary_.init_value);
         auto* res_ptr = res.get_mutable_data();
         for (std::int64_t j = 0; j < height_; ++j) {
-            const auto row_acc = row_accessor<const float_t>{ input_table_ }.pull({ j, j + 1 });
+            const auto row_acc = row_accessor<const float>{ input_table_ }.pull({ j, j + 1 });
             for (std::int64_t i = 0; i < width_; ++i) {
                 const auto val = row_acc[i];
                 res_ptr[j] = binary_(res_ptr[j], unary_(val));
@@ -127,7 +136,7 @@ class reduction_rm_test_random : public te::float_algo_fixture<std::tuple_elemen
         for (auto i = 0; i < height_; ++i) {
             const auto diff = arr[i] - gtv[i];
             if (diff < -tol || tol < diff) {
-                CAPTURE(gtv[i], arr[i], diff, tol);
+                CAPTURE(i, gtv[i], arr[i], diff, tol);
                 FAIL();
             }
         }
@@ -140,7 +149,7 @@ class reduction_rm_test_random : public te::float_algo_fixture<std::tuple_elemen
         for (auto i = 0; i < width_; ++i) {
             const auto diff = arr[i] - gtv[i];
             if (diff < -tol || tol < diff) {
-                CAPTURE(gtv[i], arr[i], diff, tol);
+                CAPTURE(i, gtv[i], arr[i], diff, tol);
                 FAIL();
             }
         }
@@ -242,7 +251,7 @@ class reduction_rm_test_random : public te::float_algo_fixture<std::tuple_elemen
         check_output_cw(out_array);
     }
 
-private:
+protected:
     const binary_t binary_{};
     const unary_t unary_{};
     std::int64_t width_;
@@ -275,4 +284,119 @@ TEMPLATE_LIST_TEST_M(reduction_rm_test_random,
     this->test_raw_cw_reduce_wrapper();
 }
 
+template <typename Param>
+class infinite_sum_rm_test_random : public reduction_rm_test_random<Param> {
+public:
+    using float_t = std::tuple_element_t<0, Param>;
+    using binary_t = std::tuple_element_t<1, Param>;
+    using unary_t = std::tuple_element_t<2, Param>;
+
+    void generate(bool maxval) {
+        this->width_ = GENERATE(7, 707, 5);
+        this->stride_ = GENERATE(707, 812, 1024);
+        this->height_ = GENERATE(17, 999, 1, 1001);
+        CAPTURE(this->width_, this->stride_, this->height_, maxval);
+        generate_input(maxval);
+    }
+
+    void generate_input(bool maxval) {
+        float mininp = 0.9 * (float)maxval * std::numeric_limits<float>::max() - 1.0f;
+        float maxinp = (float)maxval * std::numeric_limits<float>::max();
+        const auto train_dataframe = GENERATE_DATAFRAME(
+            te::dataframe_builder{ this->height_, this->stride_ }.fill_uniform(mininp, maxinp));
+        this->input_table_ = train_dataframe.get_table(this->get_homogen_table_id());
+    }
+};
+
+TEMPLATE_LIST_TEST_M(infinite_sum_rm_test_random,
+                     "Randomly filled Row-Major Row-Wise reduction with infinte sum",
+                     "[reduction][rm][small]",
+                     finiteness_types) {
+    SKIP_IF(this->not_float64_friendly());
+
+    const bool use_infnan = GENERATE(0, 1);
+    this->generate(use_infnan);
+    SKIP_IF(this->should_be_skipped());
+    this->test_raw_rw_reduce_wide();
+    this->test_raw_rw_reduce_narrow();
+    this->test_raw_rw_reduce_wrapper();
+}
+
+TEMPLATE_LIST_TEST_M(infinite_sum_rm_test_random,
+                     "Randomly filled Row-Major Col-Wise reduction with infinte sum",
+                     "[reduction][rm][small]",
+                     finiteness_types) {
+    SKIP_IF(this->not_float64_friendly());
+
+    const bool use_infnan = GENERATE(0, 1);
+    this->generate(use_infnan);
+    SKIP_IF(this->should_be_skipped());
+    this->test_raw_cw_reduce_naive();
+    this->test_raw_cw_reduce_atomic();
+    this->test_raw_cw_reduce_wrapper();
+}
+
+template <typename Param>
+class single_infinite_rm_test_random : public reduction_rm_test_random<Param> {
+public:
+    using float_t = std::tuple_element_t<0, Param>;
+    using binary_t = std::tuple_element_t<1, Param>;
+    using unary_t = std::tuple_element_t<2, Param>;
+    void generate(bool infval) {
+        this->width_ = GENERATE(7, 707, 5);
+        this->stride_ = GENERATE(707, 812, 1024);
+        this->height_ = GENERATE(17, 999, 1, 1001);
+        CAPTURE(this->width_, this->stride_, this->height_, infval);
+        generate_input(infval);
+    }
+
+    void generate_input(bool infval) {
+        float infinp = infval ? std::numeric_limits<float>::infinity()
+                              : std::numeric_limits<float>::quiet_NaN();
+        const auto train_dataframe = GENERATE_DATAFRAME(
+            te::dataframe_builder{ this->height_, this->stride_ }.fill_uniform(-0.2, 0.5));
+
+        // train_data is a float ndarray
+        auto train_data = train_dataframe.get_array().get_mutable_data();
+        train_data[0] = infinp;
+        this->input_table_ = train_dataframe.get_table(this->get_homogen_table_id());
+        // no inf added to see what will happen in testing
+    }
+};
+
+TEMPLATE_LIST_TEST_M(single_infinite_rm_test_random,
+                     "Randomly filled Row-Major Row-Wise reduction with single inf or nan",
+                     "[reduction][rm][small]",
+                     finiteness_types) {
+    SKIP_IF(this->not_float64_friendly());
+
+    const bool use_infnan = GENERATE(0, 1);
+    this->generate(use_infnan);
+    SKIP_IF(this->should_be_skipped());
+    this->test_raw_rw_reduce_wide();
+    this->test_raw_rw_reduce_narrow();
+    this->test_raw_rw_reduce_wrapper();
+}
+
+TEMPLATE_LIST_TEST_M(single_infinite_rm_test_random,
+                     "Randomly filled Row-Major Col-Wise reduction with single inf or nan",
+                     "[reduction][rm][small]",
+                     finiteness_types) {
+    SKIP_IF(this->not_float64_friendly());
+
+    const bool use_infnan = GENERATE(0, 1);
+    this->generate(use_infnan);
+    SKIP_IF(this->should_be_skipped());
+    SECTION("Reduce Naive") {
+        this->test_raw_cw_reduce_naive();
+    }
+    // Investigation into atomic reduction discrepancies ongoing
+    //SECTION("Reduce Atomic") {
+    // this->test_raw_cw_reduce_atomic();
+    //}
+    SECTION("Reduce Wrapper") {
+        this->test_raw_cw_reduce_wrapper();
+    }
+}
+
 } // namespace oneapi::dal::backend::primitives::test
diff --git a/cpp/oneapi/dal/backend/primitives/reduction/test/reduction_rm_uniform_dpc.cpp b/cpp/oneapi/dal/backend/primitives/reduction/test/reduction_rm_uniform_dpc.cpp
index 69e1c7d2879..330fe1c46b4 100644
--- a/cpp/oneapi/dal/backend/primitives/reduction/test/reduction_rm_uniform_dpc.cpp
+++ b/cpp/oneapi/dal/backend/primitives/reduction/test/reduction_rm_uniform_dpc.cpp
@@ -16,6 +16,7 @@
 
 #include <array>
 #include <cmath>
+#include <limits>
 #include <type_traits>
 
 #include "oneapi/dal/test/engine/common.hpp"
@@ -39,7 +40,11 @@ using reduction_types = std::tuple<std::tuple<float, sum<float>, identity<float>
                                    std::tuple<float, sum<float>, abs<float>>,
                                    std::tuple<double, sum<double>, identity<double>>,
                                    std::tuple<double, sum<double>, square<double>>,
-                                   std::tuple<double, sum<double>, abs<double>>>;
+                                   std::tuple<double, sum<double>, abs<double>>,
+                                   std::tuple<float, logical_or<float>, isinfornan<float>>,
+                                   std::tuple<float, logical_or<float>, isinf<float>>,
+                                   std::tuple<double, logical_or<double>, isinfornan<double>>,
+                                   std::tuple<double, logical_or<double>, isinf<double>>>;
 
 template <typename Param>
 class reduction_rm_test_uniform : public te::float_algo_fixture<std::tuple_element_t<0, Param>> {
@@ -119,6 +124,14 @@ class reduction_rm_test_uniform : public te::float_algo_fixture<std::tuple_eleme
                 return (arg_ * arg_);
             }
         }
+        if (std::is_same_v<logical_or<float_t>, binary_t>) {
+            if (std::is_same_v<isinf<float_t>, unary_t>) {
+                return static_cast<float>(std::isinf(arg_));
+            }
+            if (std::is_same_v<isinfornan<float_t>, unary_t>) {
+                return static_cast<float>(std::isinf(arg_) || std::isnan(arg_));
+            }
+        }
         ONEDAL_ASSERT(false);
         return 0;
     }
@@ -157,6 +170,14 @@ class reduction_rm_test_uniform : public te::float_algo_fixture<std::tuple_eleme
                 return (arg_ * arg_);
             }
         }
+        if (std::is_same_v<logical_or<float_t>, binary_t>) {
+            if (std::is_same_v<isinf<float_t>, unary_t>) {
+                return static_cast<float_t>(std::isinf(arg_));
+            }
+            if (std::is_same_v<isinfornan<float_t>, unary_t>) {
+                return static_cast<float_t>(std::isinf(arg_) || std::isnan(arg_));
+            }
+        }
         ONEDAL_ASSERT(false);
         return 0;
     }

From b95dd51d3bd491df4f0219d10566bb0eea9522be Mon Sep 17 00:00:00 2001
From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com>
Date: Tue, 7 May 2024 23:53:14 +0100
Subject: [PATCH 09/65] chore(deps): update dependency jinja2 to v3.1.4
 [security] (#2765)

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
---
 docs/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/requirements.txt b/docs/requirements.txt
index 741d74a508f..b2584e5a1a9 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -10,7 +10,7 @@ idna==3.7
 imagesize==1.4.1
 importlib-metadata==7.0.0
 importlib-resources==6.1.1
-Jinja2==3.1.3
+Jinja2==3.1.4
 lxml==5.1.0
 MarkupSafe==2.1.3
 packaging==24.0

From 6f56ddb079cfb4a425daffd4ffa216b86ab89517 Mon Sep 17 00:00:00 2001
From: Victoriya Fedotova <victoriya.s.fedotova@intel.com>
Date: Wed, 8 May 2024 10:29:18 +0200
Subject: [PATCH 10/65] Fix coverity 'invalid iterator' failure (#2766)

---
 cpp/oneapi/dal/detail/cpu_info_impl.cpp | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/cpp/oneapi/dal/detail/cpu_info_impl.cpp b/cpp/oneapi/dal/detail/cpu_info_impl.cpp
index 9d5194e2f3b..e15df79169f 100644
--- a/cpp/oneapi/dal/detail/cpu_info_impl.cpp
+++ b/cpp/oneapi/dal/detail/cpu_info_impl.cpp
@@ -15,6 +15,7 @@
 *******************************************************************************/
 
 #include "oneapi/dal/detail/cpu_info_impl.hpp"
+#include "oneapi/dal/detail/error_messages.hpp"
 
 #include <sstream>
 
@@ -52,11 +53,19 @@ std::string to_string(cpu_extension extension) {
 }
 
 cpu_vendor cpu_info_impl::get_cpu_vendor() const {
-    return std::any_cast<detail::cpu_vendor>(info_.find("vendor")->second);
+    const auto entry = info_.find("vendor");
+    if (entry == info_.end()) {
+        throw invalid_argument{ error_messages::invalid_key() };
+    }
+    return std::any_cast<detail::cpu_vendor>(entry->second);
 }
 
 cpu_extension cpu_info_impl::get_top_cpu_extension() const {
-    return std::any_cast<cpu_extension>(info_.find("top_cpu_extension")->second);
+    const auto entry = info_.find("top_cpu_extension");
+    if (entry == info_.end()) {
+        throw invalid_argument{ error_messages::invalid_key() };
+    }
+    return std::any_cast<cpu_extension>(entry->second);
 }
 
 std::string cpu_info_impl::dump() const {

From a26d8523d30c5fdcb382e64c28c9dc584b99dc80 Mon Sep 17 00:00:00 2001
From: Anatoly Volkov <117643568+avolkov-intel@users.noreply.github.com>
Date: Wed, 8 May 2024 14:53:12 +0200
Subject: [PATCH 11/65] Add sparsity support for LogisticRegression algorithm
 (#2755)

* Add sparsity support for logistic regression algorithm

* Update copyrights, add new exception type, remove comments

* Update docs

* Remove redundant copyright changes

* Minor

* Minor

* Minor
---
 .../backend/cpu/infer_kernel_dense_batch.cpp  |  13 ++
 .../backend/cpu/train_kernel_dense_batch.cpp  |  14 ++
 .../logistic_regression/backend/gpu/BUILD     |   1 +
 .../backend/gpu/infer_kernel_sparse_dpc.cpp   | 138 +++++++++++++++
 .../backend/gpu/train_kernel_common.hpp       |  34 ++++
 .../backend/gpu/train_kernel_common_dpc.cpp   | 128 ++++++++++++++
 .../gpu/train_kernel_dense_batch_dpc.cpp      |  92 ++--------
 .../backend/gpu/train_kernel_sparse_dpc.cpp   |  91 ++++++++++
 .../dal/algo/logistic_regression/common.hpp   |   8 +-
 .../detail/infer_ops_dpc.cpp                  |   3 +
 .../detail/train_ops_dpc.cpp                  |   3 +
 .../parameters/cpu/train_parameters.cpp       |  13 ++
 .../parameters/gpu/train_parameters_dpc.cpp   |  13 ++
 .../algo/logistic_regression/test/fixture.hpp | 159 ++++++++++++------
 .../logistic_regression/test/spmd_dpc.cpp     |   4 +-
 .../logistic_regression/test/spmd_fixture.hpp |  10 +-
 cpp/oneapi/dal/detail/error_messages.cpp      |   2 +
 cpp/oneapi/dal/detail/error_messages.hpp      |   1 +
 .../algorithms/logistic-regression/index.rst  |   8 +
 19 files changed, 598 insertions(+), 137 deletions(-)
 create mode 100644 cpp/oneapi/dal/algo/logistic_regression/backend/gpu/infer_kernel_sparse_dpc.cpp
 create mode 100644 cpp/oneapi/dal/algo/logistic_regression/backend/gpu/train_kernel_common.hpp
 create mode 100644 cpp/oneapi/dal/algo/logistic_regression/backend/gpu/train_kernel_common_dpc.cpp
 create mode 100644 cpp/oneapi/dal/algo/logistic_regression/backend/gpu/train_kernel_sparse_dpc.cpp

diff --git a/cpp/oneapi/dal/algo/logistic_regression/backend/cpu/infer_kernel_dense_batch.cpp b/cpp/oneapi/dal/algo/logistic_regression/backend/cpu/infer_kernel_dense_batch.cpp
index 0bc8ddb219d..88f04248e38 100644
--- a/cpp/oneapi/dal/algo/logistic_regression/backend/cpu/infer_kernel_dense_batch.cpp
+++ b/cpp/oneapi/dal/algo/logistic_regression/backend/cpu/infer_kernel_dense_batch.cpp
@@ -34,7 +34,20 @@ struct infer_kernel_cpu<Float, method::dense_batch, Task> {
     }
 };
 
+template <typename Float, typename Task>
+struct infer_kernel_cpu<Float, method::sparse, Task> {
+    infer_result<Task> operator()(const context_cpu& ctx,
+                                  const detail::descriptor_base<Task>& desc,
+                                  const infer_input<Task>& input) const {
+        throw unimplemented(
+            dal::detail::error_messages::log_reg_sparse_method_is_not_implemented_for_cpu());
+    }
+};
+
 template struct infer_kernel_cpu<float, method::dense_batch, task::classification>;
 template struct infer_kernel_cpu<double, method::dense_batch, task::classification>;
 
+template struct infer_kernel_cpu<float, method::sparse, task::classification>;
+template struct infer_kernel_cpu<double, method::sparse, task::classification>;
+
 } // namespace oneapi::dal::logistic_regression::backend
diff --git a/cpp/oneapi/dal/algo/logistic_regression/backend/cpu/train_kernel_dense_batch.cpp b/cpp/oneapi/dal/algo/logistic_regression/backend/cpu/train_kernel_dense_batch.cpp
index e5728534b1d..b2f7dc0e951 100644
--- a/cpp/oneapi/dal/algo/logistic_regression/backend/cpu/train_kernel_dense_batch.cpp
+++ b/cpp/oneapi/dal/algo/logistic_regression/backend/cpu/train_kernel_dense_batch.cpp
@@ -32,7 +32,21 @@ struct train_kernel_cpu<Float, method::dense_batch, Task> {
     }
 };
 
+template <typename Float, typename Task>
+struct train_kernel_cpu<Float, method::sparse, Task> {
+    train_result<Task> operator()(const context_cpu& ctx,
+                                  const detail::descriptor_base<Task>& desc,
+                                  const detail::train_parameters<Task>& params,
+                                  const train_input<Task>& input) const {
+        throw unimplemented(
+            dal::detail::error_messages::log_reg_sparse_method_is_not_implemented_for_cpu());
+    }
+};
+
 template struct train_kernel_cpu<float, method::dense_batch, task::classification>;
 template struct train_kernel_cpu<double, method::dense_batch, task::classification>;
 
+template struct train_kernel_cpu<float, method::sparse, task::classification>;
+template struct train_kernel_cpu<double, method::sparse, task::classification>;
+
 } // namespace oneapi::dal::logistic_regression::backend
diff --git a/cpp/oneapi/dal/algo/logistic_regression/backend/gpu/BUILD b/cpp/oneapi/dal/algo/logistic_regression/backend/gpu/BUILD
index c22cb27010c..4d9e7679fe7 100644
--- a/cpp/oneapi/dal/algo/logistic_regression/backend/gpu/BUILD
+++ b/cpp/oneapi/dal/algo/logistic_regression/backend/gpu/BUILD
@@ -13,6 +13,7 @@ dal_module(
         "@onedal//cpp/oneapi/dal/backend/primitives:lapack",
         "@onedal//cpp/oneapi/dal/backend/primitives:objective_function",
         "@onedal//cpp/oneapi/dal/backend/primitives:optimizers",
+        "@onedal//cpp/oneapi/dal/backend/primitives:sparse_blas",
         "@onedal//cpp/oneapi/dal/algo/logistic_regression:core",
     ],
 )
diff --git a/cpp/oneapi/dal/algo/logistic_regression/backend/gpu/infer_kernel_sparse_dpc.cpp b/cpp/oneapi/dal/algo/logistic_regression/backend/gpu/infer_kernel_sparse_dpc.cpp
new file mode 100644
index 00000000000..51fab5a66e1
--- /dev/null
+++ b/cpp/oneapi/dal/algo/logistic_regression/backend/gpu/infer_kernel_sparse_dpc.cpp
@@ -0,0 +1,138 @@
+/*******************************************************************************
+* Copyright contributors to the oneDAL project
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#include "oneapi/dal/algo/logistic_regression/backend/model_impl.hpp"
+#include "oneapi/dal/algo/logistic_regression/backend/gpu/infer_kernel.hpp"
+#include "oneapi/dal/algo/logistic_regression/common.hpp"
+#include "oneapi/dal/algo/logistic_regression/train_types.hpp"
+
+#include "oneapi/dal/backend/dispatcher.hpp"
+#include "oneapi/dal/backend/interop/common.hpp"
+#include "oneapi/dal/backend/interop/common_dpc.hpp"
+#include "oneapi/dal/backend/interop/error_converter.hpp"
+#include "oneapi/dal/backend/interop/table_conversion.hpp"
+#include "oneapi/dal/backend/primitives/blas.hpp"
+#include "oneapi/dal/backend/primitives/ndarray.hpp"
+#include "oneapi/dal/backend/primitives/ndindexer.hpp"
+#include "oneapi/dal/backend/primitives/objective_function.hpp"
+#include "oneapi/dal/backend/primitives/sparse_blas.hpp"
+
+#include "oneapi/dal/detail/profiler.hpp"
+#include "oneapi/dal/table/csr_accessor.hpp"
+
+namespace oneapi::dal::logistic_regression::backend {
+
+using dal::backend::context_gpu;
+
+namespace be = dal::backend;
+namespace pr = be::primitives;
+namespace interop = dal::backend::interop;
+
+template <typename Float, typename Task>
+static infer_result<Task> call_dal_kernel(const context_gpu& ctx,
+                                          const detail::descriptor_base<Task>& desc,
+                                          const table& infer,
+                                          const model<Task>& m) {
+    auto queue = ctx.get_queue();
+    ONEDAL_PROFILER_TASK(logreg_infer_kernel, queue);
+
+    constexpr auto alloc = sycl::usm::alloc::device;
+
+    const auto& betas = m.get_packed_coefficients();
+
+    const auto sample_count = infer.get_row_count();
+    const auto feature_count = infer.get_column_count();
+    const bool fit_intercept = desc.get_compute_intercept();
+    ONEDAL_ASSERT((feature_count + 1) == betas.get_column_count());
+    ONEDAL_ASSERT(1 == betas.get_row_count());
+    ONEDAL_ASSERT(infer.get_kind() == dal::csr_table::kind());
+
+    pr::ndarray<Float, 1> params = pr::table2ndarray_1d<Float>(queue, betas, alloc);
+    pr::ndview<Float, 1> params_suf = fit_intercept ? params : params.slice(1, feature_count);
+
+    pr::ndarray<Float, 1> probs = pr::ndarray<Float, 1>::empty(queue, { sample_count }, alloc);
+    pr::ndarray<std::int32_t, 1> responses =
+        pr::ndarray<std::int32_t, 1>::empty(queue, { sample_count }, alloc);
+
+    auto [csr_data, column_indices, row_offsets] =
+        csr_accessor<const Float>(static_cast<const csr_table&>(infer))
+            .pull(queue, { 0, -1 }, sparse_indexing::zero_based);
+
+    auto csr_data_gpu =
+        pr::ndarray<Float, 1>::wrap(csr_data.get_data(), csr_data.get_count()).to_device(queue);
+    auto column_indices_gpu =
+        pr::ndarray<std::int64_t, 1>::wrap(column_indices.get_data(), column_indices.get_count())
+            .to_device(queue);
+    auto row_offsets_gpu =
+        pr::ndarray<std::int64_t, 1>::wrap(row_offsets.get_data(), row_offsets.get_count())
+            .to_device(queue);
+
+    table data_gpu = csr_table::wrap(queue,
+                                     csr_data_gpu.get_data(),
+                                     column_indices_gpu.get_data(),
+                                     row_offsets_gpu.get_data(),
+                                     sample_count,
+                                     feature_count,
+                                     sparse_indexing::zero_based);
+
+    dal::backend::primitives::sparse_matrix_handle sp_handle(queue);
+    set_csr_data(queue, sp_handle, static_cast<const csr_table&>(data_gpu));
+
+    sycl::event probabilities_event =
+        compute_probabilities_sparse(queue, params_suf, sp_handle, probs, fit_intercept, {});
+
+    const auto* const prob_ptr = probs.get_data();
+    auto* const resp_ptr = responses.get_mutable_data();
+
+    auto fill_resp_event = queue.submit([&](sycl::handler& cgh) {
+        cgh.depends_on(probabilities_event);
+        const auto range = be::make_range_1d(sample_count);
+        cgh.parallel_for(range, [=](sycl::id<1> idx) {
+            constexpr Float half = 0.5f;
+            resp_ptr[idx] = prob_ptr[idx] < half ? 0 : 1;
+        });
+    });
+
+    auto resp_table =
+        homogen_table::wrap(responses.flatten(queue, { fill_resp_event }), sample_count, 1);
+    auto prob_table =
+        homogen_table::wrap(probs.flatten(queue, { probabilities_event }), sample_count, 1);
+
+    auto result = infer_result<Task>().set_responses(resp_table).set_probabilities(prob_table);
+
+    return result;
+}
+
+template <typename Float, typename Task>
+static infer_result<Task> infer(const context_gpu& ctx,
+                                const detail::descriptor_base<Task>& desc,
+                                const infer_input<Task>& input) {
+    return call_dal_kernel<Float, Task>(ctx, desc, input.get_data(), input.get_model());
+}
+
+template <typename Float, typename Task>
+struct infer_kernel_gpu<Float, method::sparse, Task> {
+    infer_result<Task> operator()(const context_gpu& ctx,
+                                  const detail::descriptor_base<Task>& desc,
+                                  const infer_input<Task>& input) const {
+        return infer<Float, Task>(ctx, desc, input);
+    }
+};
+
+template struct infer_kernel_gpu<float, method::sparse, task::classification>;
+template struct infer_kernel_gpu<double, method::sparse, task::classification>;
+
+} // namespace oneapi::dal::logistic_regression::backend
diff --git a/cpp/oneapi/dal/algo/logistic_regression/backend/gpu/train_kernel_common.hpp b/cpp/oneapi/dal/algo/logistic_regression/backend/gpu/train_kernel_common.hpp
new file mode 100644
index 00000000000..baf648c12dc
--- /dev/null
+++ b/cpp/oneapi/dal/algo/logistic_regression/backend/gpu/train_kernel_common.hpp
@@ -0,0 +1,34 @@
+/*******************************************************************************
+* Copyright contributors to the oneDAL project
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#include "oneapi/dal/algo/logistic_regression/common.hpp"
+
+#include "oneapi/dal/detail/common.hpp"
+#include "oneapi/dal/backend/dispatcher.hpp"
+#include "oneapi/dal/algo/logistic_regression/train_types.hpp"
+
+namespace oneapi::dal::logistic_regression::backend {
+
+using dal::backend::context_gpu;
+
+template <typename Float, typename Task>
+train_result<Task> call_dal_kernel(const context_gpu& ctx,
+                                   const detail::descriptor_base<Task>& desc,
+                                   const detail::train_parameters<Task>& params,
+                                   const table& data,
+                                   const table& resp);
+
+} // namespace oneapi::dal::logistic_regression::backend
diff --git a/cpp/oneapi/dal/algo/logistic_regression/backend/gpu/train_kernel_common_dpc.cpp b/cpp/oneapi/dal/algo/logistic_regression/backend/gpu/train_kernel_common_dpc.cpp
new file mode 100644
index 00000000000..d409ce3816d
--- /dev/null
+++ b/cpp/oneapi/dal/algo/logistic_regression/backend/gpu/train_kernel_common_dpc.cpp
@@ -0,0 +1,128 @@
+/*******************************************************************************
+* Copyright contributors to the oneDAL project
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#include "oneapi/dal/algo/logistic_regression/backend/gpu/train_kernel.hpp"
+#include "oneapi/dal/algo/logistic_regression/backend/gpu/train_kernel_common.hpp"
+#include "oneapi/dal/algo/logistic_regression/backend/model_impl.hpp"
+#include "oneapi/dal/algo/logistic_regression/backend/optimizer_impl.hpp"
+#include "oneapi/dal/algo/logistic_regression/common.hpp"
+#include "oneapi/dal/algo/logistic_regression/train_types.hpp"
+#include "oneapi/dal/backend/dispatcher.hpp"
+#include "oneapi/dal/backend/primitives/ndarray.hpp"
+#include "oneapi/dal/backend/primitives/lapack.hpp"
+#include "oneapi/dal/backend/primitives/utils.hpp"
+#include "oneapi/dal/backend/primitives/objective_function.hpp"
+#include "oneapi/dal/backend/primitives/optimizers.hpp"
+#include "oneapi/dal/detail/profiler.hpp"
+#include "oneapi/dal/detail/common.hpp"
+
+namespace oneapi::dal::logistic_regression::backend {
+
+using dal::backend::context_gpu;
+
+namespace be = dal::backend;
+namespace pr = be::primitives;
+
+template <typename Float, typename Task>
+train_result<Task> call_dal_kernel(const context_gpu& ctx,
+                                   const detail::descriptor_base<Task>& desc,
+                                   const detail::train_parameters<Task>& params,
+                                   const table& data,
+                                   const table& resp) {
+    using dal::detail::check_mul_overflow;
+
+    auto queue = ctx.get_queue();
+
+    ONEDAL_PROFILER_TASK(log_reg_train_kernel, queue);
+
+    using model_t = model<Task>;
+    using model_impl_t = detail::model_impl<Task>;
+
+    auto opt_impl = detail::get_optimizer_impl(desc);
+
+    if (!opt_impl) {
+        throw internal_error{ dal::detail::error_messages::unknown_optimizer() };
+    }
+
+    const auto sample_count = data.get_row_count();
+    const auto feature_count = data.get_column_count();
+    ONEDAL_ASSERT(sample_count == resp.get_row_count());
+    const auto responses_nd =
+        pr::table2ndarray_1d<std::int32_t>(queue, resp, sycl::usm::alloc::device);
+
+    const std::int64_t bsize = params.get_gpu_macro_block();
+
+    const Float l2 = Float(1.0) / desc.get_inverse_regularization();
+    const bool fit_intercept = desc.get_compute_intercept();
+
+    auto& comm = ctx.get_communicator();
+
+    pr::logloss_function<Float> loss_func =
+        pr::logloss_function(queue, comm, data, responses_nd, l2, fit_intercept, bsize);
+
+    auto [x, fill_event] =
+        pr::ndarray<Float, 1>::zeros(queue, { feature_count + 1 }, sycl::usm::alloc::device);
+
+    pr::ndview<Float, 1> x_suf = fit_intercept ? x : x.slice(1, feature_count);
+
+    auto [train_event, iter_num] = opt_impl->minimize(queue, loss_func, x_suf, { fill_event });
+
+    auto all_coefs = homogen_table::wrap(x.flatten(queue, { train_event }), 1, feature_count + 1);
+
+    const auto model_impl = std::make_shared<model_impl_t>(all_coefs);
+    const auto model = dal::detail::make_private<model_t>(model_impl);
+
+    const auto options = desc.get_result_options();
+    auto result = train_result<Task>().set_model(model).set_result_options(options);
+
+    if (options.test(result_options::intercept)) {
+        ONEDAL_ASSERT(fit_intercept);
+        table intercept_table =
+            homogen_table::wrap(x.slice(0, 1).flatten(queue, { train_event }), 1, 1);
+        result.set_intercept(intercept_table);
+    }
+
+    if (options.test(result_options::coefficients)) {
+        auto coefs_array = x.slice(1, feature_count).flatten(queue, { train_event });
+        auto coefs_table = homogen_table::wrap(coefs_array, 1, feature_count);
+        result.set_coefficients(coefs_table);
+    }
+
+    if (options.test(result_options::iterations_count)) {
+        result.set_iterations_count(iter_num);
+    }
+
+    if (options.test(result_options::inner_iterations_count)) {
+        result.set_inner_iterations_count(opt_impl->get_inner_iter());
+    }
+
+    return result;
+}
+
+template train_result<task::classification> call_dal_kernel<float, task::classification>(
+    const context_gpu&,
+    const detail::descriptor_base<task::classification>&,
+    const detail::train_parameters<task::classification>&,
+    const table&,
+    const table&);
+template train_result<task::classification> call_dal_kernel<double, task::classification>(
+    const context_gpu&,
+    const detail::descriptor_base<task::classification>&,
+    const detail::train_parameters<task::classification>&,
+    const table&,
+    const table&);
+
+} // namespace oneapi::dal::logistic_regression::backend
diff --git a/cpp/oneapi/dal/algo/logistic_regression/backend/gpu/train_kernel_dense_batch_dpc.cpp b/cpp/oneapi/dal/algo/logistic_regression/backend/gpu/train_kernel_dense_batch_dpc.cpp
index e24cdb02539..778e3330841 100644
--- a/cpp/oneapi/dal/algo/logistic_regression/backend/gpu/train_kernel_dense_batch_dpc.cpp
+++ b/cpp/oneapi/dal/algo/logistic_regression/backend/gpu/train_kernel_dense_batch_dpc.cpp
@@ -31,6 +31,7 @@
 #include "oneapi/dal/backend/primitives/objective_function.hpp"
 #include "oneapi/dal/backend/primitives/optimizers.hpp"
 #include "oneapi/dal/algo/logistic_regression/backend/optimizer_impl.hpp"
+#include "oneapi/dal/algo/logistic_regression/backend/gpu/train_kernel_common.hpp"
 
 namespace oneapi::dal::logistic_regression::backend {
 
@@ -39,93 +40,20 @@ using dal::backend::context_gpu;
 namespace be = dal::backend;
 namespace pr = be::primitives;
 
-template <typename Float, typename Task>
-static train_result<Task> call_dal_kernel(const context_gpu& ctx,
-                                          const detail::descriptor_base<Task>& desc,
-                                          const detail::train_parameters<Task>& params,
-                                          const table& data,
-                                          const table& resp) {
-    using dal::detail::check_mul_overflow;
-
-    auto queue = ctx.get_queue();
-
-    ONEDAL_PROFILER_TASK(log_reg_train_kernel, queue);
-
-    using model_t = model<Task>;
-    using model_impl_t = detail::model_impl<Task>;
-
-    auto opt_impl = detail::get_optimizer_impl(desc);
-
-    if (!opt_impl) {
-        throw internal_error{ dal::detail::error_messages::unknown_optimizer() };
-    }
-
-    const auto sample_count = data.get_row_count();
-    const auto feature_count = data.get_column_count();
-    ONEDAL_ASSERT(sample_count == resp.get_row_count());
-    const auto responses_nd =
-        pr::table2ndarray_1d<std::int32_t>(queue, resp, sycl::usm::alloc::device);
-
-    const std::int64_t bsize = params.get_gpu_macro_block();
-
-    const Float l2 = Float(1.0) / desc.get_inverse_regularization();
-    const bool fit_intercept = desc.get_compute_intercept();
-
-    // TODO: add check if the dataset can be moved to gpu
-    // Move data to gpu
-    pr::ndarray<Float, 2> data_nd = pr::table2ndarray<Float>(queue, data, sycl::usm::alloc::device);
-    table data_gpu = homogen_table::wrap(data_nd.flatten(queue, {}), sample_count, feature_count);
-
-    auto& comm = ctx.get_communicator();
-
-    pr::logloss_function<Float> loss_func =
-        pr::logloss_function(queue, comm, data_gpu, responses_nd, l2, fit_intercept, bsize);
-
-    auto [x, fill_event] =
-        pr::ndarray<Float, 1>::zeros(queue, { feature_count + 1 }, sycl::usm::alloc::device);
-
-    pr::ndview<Float, 1> x_suf = fit_intercept ? x : x.slice(1, feature_count);
-
-    auto [train_event, iter_num] = opt_impl->minimize(queue, loss_func, x_suf, { fill_event });
-
-    auto all_coefs = homogen_table::wrap(x.flatten(queue, { train_event }), 1, feature_count + 1);
-
-    const auto model_impl = std::make_shared<model_impl_t>(all_coefs);
-    const auto model = dal::detail::make_private<model_t>(model_impl);
-
-    const auto options = desc.get_result_options();
-    auto result = train_result<Task>().set_model(model).set_result_options(options);
-
-    if (options.test(result_options::intercept)) {
-        ONEDAL_ASSERT(fit_intercept);
-        table intercept_table =
-            homogen_table::wrap(x.slice(0, 1).flatten(queue, { train_event }), 1, 1);
-        result.set_intercept(intercept_table);
-    }
-
-    if (options.test(result_options::coefficients)) {
-        auto coefs_array = x.slice(1, feature_count).flatten(queue, { train_event });
-        auto coefs_table = homogen_table::wrap(coefs_array, 1, feature_count);
-        result.set_coefficients(coefs_table);
-    }
-
-    if (options.test(result_options::iterations_count)) {
-        result.set_iterations_count(iter_num);
-    }
-
-    if (options.test(result_options::inner_iterations_count)) {
-        result.set_inner_iterations_count(opt_impl->get_inner_iter());
-    }
-
-    return result;
-}
-
 template <typename Float, typename Task>
 static train_result<Task> train(const context_gpu& ctx,
                                 const detail::descriptor_base<Task>& desc,
                                 const detail::train_parameters<Task>& params,
                                 const train_input<Task>& input) {
-    return call_dal_kernel<Float, Task>(ctx, desc, params, input.get_data(), input.get_responses());
+    // TODO: add check if the dataset can be moved to gpu
+    // Move data to gpu
+    const auto sample_count = input.get_data().get_row_count();
+    const auto feature_count = input.get_data().get_column_count();
+    auto queue = ctx.get_queue();
+    pr::ndarray<Float, 2> data_nd =
+        pr::table2ndarray<Float>(queue, input.get_data(), sycl::usm::alloc::device);
+    table data_gpu = homogen_table::wrap(data_nd.flatten(queue, {}), sample_count, feature_count);
+    return call_dal_kernel<Float, Task>(ctx, desc, params, data_gpu, input.get_responses());
 }
 
 template <typename Float, typename Task>
diff --git a/cpp/oneapi/dal/algo/logistic_regression/backend/gpu/train_kernel_sparse_dpc.cpp b/cpp/oneapi/dal/algo/logistic_regression/backend/gpu/train_kernel_sparse_dpc.cpp
new file mode 100644
index 00000000000..6b19c32c37d
--- /dev/null
+++ b/cpp/oneapi/dal/algo/logistic_regression/backend/gpu/train_kernel_sparse_dpc.cpp
@@ -0,0 +1,91 @@
+/*******************************************************************************
+* Copyright contributors to the oneDAL project
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#include "oneapi/dal/detail/profiler.hpp"
+
+#include "oneapi/dal/detail/common.hpp"
+#include "oneapi/dal/backend/dispatcher.hpp"
+#include "oneapi/dal/backend/primitives/ndarray.hpp"
+#include "oneapi/dal/backend/primitives/lapack.hpp"
+#include "oneapi/dal/backend/primitives/utils.hpp"
+
+#include "oneapi/dal/table/csr_accessor.hpp"
+
+#include "oneapi/dal/algo/logistic_regression/common.hpp"
+#include "oneapi/dal/algo/logistic_regression/train_types.hpp"
+#include "oneapi/dal/algo/logistic_regression/backend/model_impl.hpp"
+#include "oneapi/dal/algo/logistic_regression/backend/gpu/train_kernel.hpp"
+#include "oneapi/dal/backend/primitives/objective_function.hpp"
+#include "oneapi/dal/backend/primitives/optimizers.hpp"
+#include "oneapi/dal/algo/logistic_regression/backend/optimizer_impl.hpp"
+#include "oneapi/dal/algo/logistic_regression/backend/gpu/train_kernel_common.hpp"
+
+namespace oneapi::dal::logistic_regression::backend {
+
+using dal::backend::context_gpu;
+
+namespace be = dal::backend;
+namespace pr = be::primitives;
+
+template <typename Float, typename Task>
+static train_result<Task> train(const context_gpu& ctx,
+                                const detail::descriptor_base<Task>& desc,
+                                const detail::train_parameters<Task>& params,
+                                const train_input<Task>& input) {
+    // TODO: add check if the dataset can be moved to gpu
+    // Move data to gpu
+    const auto sample_count = input.get_data().get_row_count();
+    const auto feature_count = input.get_data().get_column_count();
+    auto queue = ctx.get_queue();
+
+    auto [csr_data, column_indices, row_offsets] =
+        csr_accessor<const Float>(static_cast<const csr_table&>(input.get_data()))
+            .pull(queue, { 0, -1 }, sparse_indexing::zero_based);
+
+    auto csr_data_gpu =
+        pr::ndarray<Float, 1>::wrap(csr_data.get_data(), csr_data.get_count()).to_device(queue);
+    auto column_indices_gpu =
+        pr::ndarray<std::int64_t, 1>::wrap(column_indices.get_data(), column_indices.get_count())
+            .to_device(queue);
+    auto row_offsets_gpu =
+        pr::ndarray<std::int64_t, 1>::wrap(row_offsets.get_data(), row_offsets.get_count())
+            .to_device(queue);
+
+    table data_gpu = csr_table::wrap(queue,
+                                     csr_data_gpu.get_data(),
+                                     column_indices_gpu.get_data(),
+                                     row_offsets_gpu.get_data(),
+                                     sample_count,
+                                     feature_count,
+                                     sparse_indexing::zero_based);
+
+    return call_dal_kernel<Float, Task>(ctx, desc, params, data_gpu, input.get_responses());
+}
+
+template <typename Float, typename Task>
+struct train_kernel_gpu<Float, method::sparse, Task> {
+    train_result<Task> operator()(const context_gpu& ctx,
+                                  const detail::descriptor_base<Task>& desc,
+                                  const detail::train_parameters<Task>& params,
+                                  const train_input<Task>& input) const {
+        return train<Float, Task>(ctx, desc, params, input);
+    }
+};
+
+template struct train_kernel_gpu<float, method::sparse, task::classification>;
+template struct train_kernel_gpu<double, method::sparse, task::classification>;
+
+} // namespace oneapi::dal::logistic_regression::backend
diff --git a/cpp/oneapi/dal/algo/logistic_regression/common.hpp b/cpp/oneapi/dal/algo/logistic_regression/common.hpp
index b817b5b1101..664705d260c 100644
--- a/cpp/oneapi/dal/algo/logistic_regression/common.hpp
+++ b/cpp/oneapi/dal/algo/logistic_regression/common.hpp
@@ -44,12 +44,15 @@ namespace method {
 namespace v1 {
 /// Tag-type that denotes :ref:`dense_batch <logreg_t_math_dense_batch>` computational method.
 struct dense_batch {};
+/// Tag-type that denotes :ref:`sparse <logreg_t_math_sparse>` computational method.
+struct sparse {};
 
 /// Alias tag-type for the dense_batch method
 using by_default = dense_batch;
 } // namespace v1
 
 using v1::dense_batch;
+using v1::sparse;
 using v1::by_default;
 
 } // namespace method
@@ -105,7 +108,8 @@ template <typename Float>
 constexpr bool is_valid_float_v = dal::detail::is_one_of_v<Float, float, double>;
 
 template <typename Method>
-constexpr bool is_valid_method_v = dal::detail::is_one_of_v<Method, method::dense_batch>;
+constexpr bool is_valid_method_v =
+    dal::detail::is_one_of_v<Method, method::dense_batch, method::sparse>;
 
 template <typename Task>
 constexpr bool is_valid_task_v = dal::detail::is_one_of_v<Task, task::classification>;
@@ -168,7 +172,7 @@ namespace v1 {
 ///                     intermediate computations. Can be :expr:`float` or
 ///                     :expr:`double`.
 /// @tparam Method      Tag-type that specifies an implementation of algorithm. Can
-///                     be :expr:`method::dense_batch`.
+///                     be :expr:`method::dense_batch` or :expr:`method::sparse`.
 /// @tparam Task        Tag-type that specifies type of the problem to solve. Can
 ///                     be :expr:`task::classification`.
 /// @tparam Optimizer   The descriptor of the optimizer used for minimization. Can
diff --git a/cpp/oneapi/dal/algo/logistic_regression/detail/infer_ops_dpc.cpp b/cpp/oneapi/dal/algo/logistic_regression/detail/infer_ops_dpc.cpp
index e28a5b722b2..9bb1d7c35ad 100644
--- a/cpp/oneapi/dal/algo/logistic_regression/detail/infer_ops_dpc.cpp
+++ b/cpp/oneapi/dal/algo/logistic_regression/detail/infer_ops_dpc.cpp
@@ -43,5 +43,8 @@ struct infer_ops_dispatcher<Policy, Float, Method, Task> {
 INSTANTIATE(float, method::dense_batch, task::classification)
 INSTANTIATE(double, method::dense_batch, task::classification)
 
+INSTANTIATE(float, method::sparse, task::classification)
+INSTANTIATE(double, method::sparse, task::classification)
+
 } // namespace v1
 } // namespace oneapi::dal::logistic_regression::detail
diff --git a/cpp/oneapi/dal/algo/logistic_regression/detail/train_ops_dpc.cpp b/cpp/oneapi/dal/algo/logistic_regression/detail/train_ops_dpc.cpp
index 603f509578c..8ea686e5ebd 100644
--- a/cpp/oneapi/dal/algo/logistic_regression/detail/train_ops_dpc.cpp
+++ b/cpp/oneapi/dal/algo/logistic_regression/detail/train_ops_dpc.cpp
@@ -70,5 +70,8 @@ struct train_ops_dispatcher<Policy, Float, Method, Task> {
 INSTANTIATE(float, method::dense_batch, task::classification)
 INSTANTIATE(double, method::dense_batch, task::classification)
 
+INSTANTIATE(float, method::sparse, task::classification)
+INSTANTIATE(double, method::sparse, task::classification)
+
 } // namespace v1
 } // namespace oneapi::dal::logistic_regression::detail
diff --git a/cpp/oneapi/dal/algo/logistic_regression/parameters/cpu/train_parameters.cpp b/cpp/oneapi/dal/algo/logistic_regression/parameters/cpu/train_parameters.cpp
index 96e7d8e0ddb..523fbba91a0 100644
--- a/cpp/oneapi/dal/algo/logistic_regression/parameters/cpu/train_parameters.cpp
+++ b/cpp/oneapi/dal/algo/logistic_regression/parameters/cpu/train_parameters.cpp
@@ -55,9 +55,22 @@ struct train_parameters_cpu<Float, method::dense_batch, Task> {
     }
 };
 
+template <typename Float, typename Task>
+struct train_parameters_cpu<Float, method::sparse, Task> {
+    using params_t = detail::train_parameters<Task>;
+    params_t operator()(const context_cpu& ctx,
+                        const detail::descriptor_base<Task>& desc,
+                        const train_input<Task>& input) const {
+        return params_t{};
+    }
+};
+
 template struct ONEDAL_EXPORT
     train_parameters_cpu<float, method::dense_batch, task::classification>;
 template struct ONEDAL_EXPORT
     train_parameters_cpu<double, method::dense_batch, task::classification>;
 
+template struct ONEDAL_EXPORT train_parameters_cpu<float, method::sparse, task::classification>;
+template struct ONEDAL_EXPORT train_parameters_cpu<double, method::sparse, task::classification>;
+
 } // namespace oneapi::dal::logistic_regression::parameters
diff --git a/cpp/oneapi/dal/algo/logistic_regression/parameters/gpu/train_parameters_dpc.cpp b/cpp/oneapi/dal/algo/logistic_regression/parameters/gpu/train_parameters_dpc.cpp
index f2537443c68..d83f8a2888e 100644
--- a/cpp/oneapi/dal/algo/logistic_regression/parameters/gpu/train_parameters_dpc.cpp
+++ b/cpp/oneapi/dal/algo/logistic_regression/parameters/gpu/train_parameters_dpc.cpp
@@ -55,9 +55,22 @@ struct train_parameters_gpu<Float, method::dense_batch, Task> {
     }
 };
 
+template <typename Float, typename Task>
+struct train_parameters_gpu<Float, method::sparse, Task> {
+    using params_t = detail::train_parameters<Task>;
+    params_t operator()(const context_gpu& ctx,
+                        const detail::descriptor_base<Task>& desc,
+                        const train_input<Task>& input) const {
+        return params_t{};
+    }
+};
+
 template struct ONEDAL_EXPORT
     train_parameters_gpu<float, method::dense_batch, task::classification>;
 template struct ONEDAL_EXPORT
     train_parameters_gpu<double, method::dense_batch, task::classification>;
 
+template struct ONEDAL_EXPORT train_parameters_gpu<float, method::sparse, task::classification>;
+template struct ONEDAL_EXPORT train_parameters_gpu<double, method::sparse, task::classification>;
+
 } // namespace oneapi::dal::logistic_regression::parameters
diff --git a/cpp/oneapi/dal/algo/logistic_regression/test/fixture.hpp b/cpp/oneapi/dal/algo/logistic_regression/test/fixture.hpp
index 83092265519..d5e41f4c31c 100644
--- a/cpp/oneapi/dal/algo/logistic_regression/test/fixture.hpp
+++ b/cpp/oneapi/dal/algo/logistic_regression/test/fixture.hpp
@@ -28,6 +28,7 @@
 
 #include "oneapi/dal/test/engine/fixtures.hpp"
 #include "oneapi/dal/test/engine/math.hpp"
+#include "oneapi/dal/test/engine/csr_table_builder.hpp"
 
 namespace oneapi::dal::logistic_regression::test {
 
@@ -55,6 +56,10 @@ class log_reg_test : public te::crtp_algo_fixture<TestType, Derived> {
         return static_cast<Derived*>(this);
     }
 
+    bool is_sparse() {
+        return std::is_same_v<method_t, logistic_regression::method::sparse>;
+    }
+
     auto get_descriptor(double tol = 1e-4, std::int64_t maxiter = 100) const {
         result_option_id resopts = result_options::coefficients;
 
@@ -67,7 +72,9 @@ class log_reg_test : public te::crtp_algo_fixture<TestType, Derived> {
             .set_optimizer(optimizer_desc);
     }
 
-    virtual void gen_dimensions(std::int64_t n = -1, std::int64_t p = -1) {
+    virtual void gen_dimensions(std::int64_t n = -1,
+                                std::int64_t p = -1,
+                                double train_size_coef = 0.7) {
         if (n == -1 || p == -1) {
             this->n_ = GENERATE(100, 200, 1000, 10000, 50000);
             this->p_ = GENERATE(10, 20, 30);
@@ -76,6 +83,8 @@ class log_reg_test : public te::crtp_algo_fixture<TestType, Derived> {
             this->n_ = n;
             this->p_ = p;
         }
+        this->train_size_ = (this->n_ * train_size_coef);
+        this->test_size_ = this->n_ - this->train_size_;
     }
 
     float_t predict_proba(float_t* ptr, float_t* params_ptr, float_t intercept) {
@@ -84,7 +93,7 @@ class log_reg_test : public te::crtp_algo_fixture<TestType, Derived> {
             val += ptr[j] * params_ptr[j];
         }
         val += intercept;
-        return float_t(1) / (1 + std::exp(-val));
+        return float_t(1.0) / (1.0 + std::exp(-val));
     }
 
     void gen_input(bool fit_intercept = true, double C = 1.0, std::int64_t seed = 2007) {
@@ -93,31 +102,73 @@ class log_reg_test : public te::crtp_algo_fixture<TestType, Derived> {
 
         std::int64_t dim = fit_intercept_ ? p_ + 1 : p_;
 
-        X_host_ = array<float_t>::zeros(n_ * p_);
-        auto* x_ptr = X_host_.get_mutable_data();
+        std::mt19937 rnd(seed + n_ + p_);
 
-        y_host_ = array<std::int32_t>::zeros(n_);
-        auto* y_ptr = y_host_.get_mutable_data();
+        std::uniform_real_distribution<> dis_params(-3.0, 3.0);
 
-        params_host_ = array<float_t>::zeros(dim);
-        auto* params_ptr = params_host_.get_mutable_data();
+        if (this->is_sparse()) {
+            auto builder_train =
+                te::csr_table_builder<float_t>(train_size_, p_, 0.5, sparse_indexing::zero_based);
+            this->X_train_ = builder_train.build_csr_table(this->get_policy());
 
-        std::mt19937 rnd(seed + n_ + p_);
-        std::uniform_real_distribution<> dis_data(-10.0, 10.0);
-        std::uniform_real_distribution<> dis_params(-3.0, 3.0);
+            auto builder_test =
+                te::csr_table_builder<float_t>(test_size_, p_, 0.5, sparse_indexing::zero_based);
+            this->X_test_ = builder_test.build_csr_table(this->get_policy());
+
+            table dense_train = builder_train.build_dense_table();
+            table dense_test = builder_test.build_dense_table();
 
-        for (std::int64_t i = 0; i < n_; ++i) {
-            for (std::int64_t j = 0; j < p_; ++j) {
-                *(x_ptr + i * p_ + j) = dis_data(rnd);
+            X_train_host_ = row_accessor<const float_t>(dense_train)
+                                .pull(this->get_queue(), { 0, -1 }, sycl::usm::alloc::host);
+            X_test_host_ = row_accessor<const float_t>(dense_test)
+                               .pull(this->get_queue(), { 0, -1 }, sycl::usm::alloc::host);
+        }
+        else {
+            std::uniform_real_distribution<> dis_data(-10.0, 10.0);
+            X_train_host_ = array<float_t>::zeros(train_size_ * p_);
+            X_test_host_ = array<float_t>::zeros(test_size_ * p_);
+
+            auto* x_ptr = X_train_host_.get_mutable_data();
+            for (std::int64_t i = 0; i < train_size_; ++i) {
+                for (std::int64_t j = 0; j < p_; ++j) {
+                    *(x_ptr + i * p_ + j) = dis_data(rnd);
+                }
+            }
+            x_ptr = X_test_host_.get_mutable_data();
+            for (std::int64_t i = 0; i < test_size_; ++i) {
+                for (std::int64_t j = 0; j < p_; ++j) {
+                    *(x_ptr + i * p_ + j) = dis_data(rnd);
+                }
             }
+            X_train_ =
+                homogen_table::wrap<float_t>(X_train_host_.get_mutable_data(), train_size_, p_);
+            X_test_ = homogen_table::wrap<float_t>(X_test_host_.get_mutable_data(), test_size_, p_);
         }
 
+        auto* x_ptr = X_train_host_.get_mutable_data();
+        y_train_host_ = array<std::int32_t>::zeros(train_size_);
+        auto* y_ptr = y_train_host_.get_mutable_data();
+
+        params_host_ = array<float_t>::zeros(dim);
+        auto* params_ptr = params_host_.get_mutable_data();
+
         for (std::int64_t i = 0; i < dim; ++i) {
             *(params_ptr + i) = dis_params(rnd);
         }
 
         constexpr float_t half = 0.5;
-        for (std::int64_t i = 0; i < n_; ++i) {
+        for (std::int64_t i = 0; i < train_size_; ++i) {
+            float_t val = predict_proba(x_ptr + i * p_,
+                                        params_ptr + (std::int64_t)fit_intercept_,
+                                        fit_intercept_ ? *params_ptr : 0);
+            y_ptr[i] = bool(val < half);
+        }
+
+        x_ptr = X_test_host_.get_mutable_data();
+        y_test_host_ = array<std::int32_t>::zeros(test_size_);
+        y_ptr = y_test_host_.get_mutable_data();
+
+        for (std::int64_t i = 0; i < test_size_; ++i) {
             float_t val = predict_proba(x_ptr + i * p_,
                                         params_ptr + (std::int64_t)fit_intercept_,
                                         fit_intercept_ ? *params_ptr : 0);
@@ -126,18 +177,12 @@ class log_reg_test : public te::crtp_algo_fixture<TestType, Derived> {
     }
 
     void run_test(double tol = 1e-4, std::int64_t maxiter = 100) {
-        std::int64_t train_size = n_ * 0.7;
-        std::int64_t test_size = n_ - train_size;
-
-        table X_train = homogen_table::wrap<float_t>(X_host_.get_mutable_data(), train_size, p_);
-        table X_test = homogen_table::wrap<float_t>(X_host_.get_mutable_data() + train_size * p_,
-                                                    test_size,
-                                                    p_);
         table y_train =
-            homogen_table::wrap<std::int32_t>(y_host_.get_mutable_data(), train_size, 1);
-
+            homogen_table::wrap<std::int32_t>(y_train_host_.get_mutable_data(), train_size_, 1);
+        table y_table =
+            homogen_table::wrap<std::int32_t>(y_test_host_.get_mutable_data(), test_size_, 1);
         const auto desc = this->get_descriptor(tol, maxiter);
-        const auto train_res = this->train(desc, X_train, y_train);
+        const auto train_res = this->train(desc, X_train_, y_train);
         table intercept;
         array<float_t> bias_host;
         if (fit_intercept_) {
@@ -150,7 +195,7 @@ class log_reg_test : public te::crtp_algo_fixture<TestType, Derived> {
         std::int64_t train_acc = 0;
         std::int64_t test_acc = 0;
 
-        const auto infer_res = this->infer(desc, X_test, train_res.get_model());
+        const auto infer_res = this->infer(desc, X_test_, train_res.get_model());
 
         table resp_table = infer_res.get_responses();
         auto resp_host = row_accessor<const std::int32_t>(resp_table).pull({ 0, -1 });
@@ -158,38 +203,43 @@ class log_reg_test : public te::crtp_algo_fixture<TestType, Derived> {
         table prob_table = infer_res.get_probabilities();
         auto prob_host = row_accessor<const float_t>(prob_table).pull({ 0, -1 });
 
-        for (std::int64_t i = 0; i < n_; ++i) {
-            float_t val = predict_proba(X_host_.get_mutable_data() + i * p_,
+        for (std::int64_t i = 0; i < train_size_; ++i) {
+            float_t val = predict_proba(X_train_host_.get_mutable_data() + i * p_,
                                         coefs_host.get_mutable_data(),
                                         fit_intercept_ ? *bias_host.get_mutable_data() : 0);
-            std::int32_t resp = 0;
-            if (val >= 0.5) {
-                resp = 1;
-            }
-            if (resp == *(y_host_.get_mutable_data() + i)) {
-                bool is_train = i < train_size;
-                train_acc += std::int64_t(is_train);
-                test_acc += std::int64_t(!is_train);
-            }
-            if (i >= train_size) {
-                REQUIRE(abs(val - *(prob_host.get_mutable_data() + i - train_size)) < 1e-5);
-                REQUIRE(*(resp_host.get_mutable_data() + i - train_size) == resp);
+            std::int32_t resp = val >= 0.5 ? 1 : 0;
+            if (resp == *(y_train_host_.get_mutable_data() + i)) {
+                train_acc += 1;
             }
         }
+
         std::int64_t acc_algo = 0;
-        for (std::int64_t i = 0; i < test_size; ++i) {
-            if (*(resp_host.get_mutable_data() + i) ==
-                *(y_host_.get_mutable_data() + train_size + i)) {
+        for (std::int64_t i = 0; i < test_size_; ++i) {
+            float_t val = predict_proba(X_test_host_.get_mutable_data() + i * p_,
+                                        coefs_host.get_mutable_data(),
+                                        fit_intercept_ ? *bias_host.get_mutable_data() : 0);
+            std::int32_t resp = val >= 0.5 ? 1 : 0;
+            if (resp == *(y_test_host_.get_mutable_data() + i)) {
+                test_acc += 1;
+            }
+
+            REQUIRE(abs(val - *(prob_host.get_mutable_data() + i)) < 1e-5);
+            REQUIRE(*(resp_host.get_mutable_data() + i) == resp);
+
+            if (*(resp_host.get_mutable_data() + i) == *(y_test_host_.get_mutable_data() + i)) {
                 acc_algo++;
             }
         }
 
         float_t min_train_acc = 0.95;
         float_t min_test_acc = n_ < 500 ? 0.7 : 0.85;
+        if (this->is_sparse() && n_ < 500) {
+            min_test_acc = 0.65;
+        }
 
-        REQUIRE(train_size * min_train_acc < train_acc);
-        REQUIRE(test_size * min_test_acc < test_acc);
-        REQUIRE(test_size * min_test_acc < acc_algo);
+        REQUIRE(train_size_ * min_train_acc < train_acc);
+        REQUIRE(test_size_ * min_test_acc < test_acc);
+        REQUIRE(test_size_ * min_test_acc < acc_algo);
         REQUIRE(test_acc == acc_algo);
     }
 
@@ -198,14 +248,23 @@ class log_reg_test : public te::crtp_algo_fixture<TestType, Derived> {
     double C_ = 1.0;
     std::int64_t n_ = 0;
     std::int64_t p_ = 0;
-    array<float_t> X_host_;
+    std::int64_t train_size_ = 0;
+    std::int64_t test_size_ = 0;
+    array<float_t> X_train_host_;
+    array<float_t> X_test_host_;
     array<float_t> params_host_;
-    array<std::int32_t> y_host_;
-    array<std::int32_t> resp_;
+    array<std::int32_t> y_train_host_;
+    array<std::int32_t> y_test_host_;
+
+    array<std::int32_t> resp_train_;
+    array<std::int32_t> resp_test_;
+    table X_train_;
+    table X_test_;
 };
 
 using log_reg_types = COMBINE_TYPES((float, double),
-                                    (logistic_regression::method::dense_batch),
+                                    (logistic_regression::method::dense_batch,
+                                     logistic_regression::method::sparse),
                                     (logistic_regression::task::classification));
 
 } // namespace oneapi::dal::logistic_regression::test
diff --git a/cpp/oneapi/dal/algo/logistic_regression/test/spmd_dpc.cpp b/cpp/oneapi/dal/algo/logistic_regression/test/spmd_dpc.cpp
index 0e61b8056cb..c1ecf0b1adc 100644
--- a/cpp/oneapi/dal/algo/logistic_regression/test/spmd_dpc.cpp
+++ b/cpp/oneapi/dal/algo/logistic_regression/test/spmd_dpc.cpp
@@ -21,7 +21,7 @@ namespace oneapi::dal::logistic_regression::test {
 TEMPLATE_LIST_TEST_M(log_reg_spmd_test,
                      "LogReg common flow - fit intercept",
                      "[lr][spmd]",
-                     log_reg_types) {
+                     log_reg_spmd_types) {
     SKIP_IF(this->get_policy().is_cpu());
     SKIP_IF(this->not_float64_friendly());
 
@@ -35,7 +35,7 @@ TEMPLATE_LIST_TEST_M(log_reg_spmd_test,
 TEMPLATE_LIST_TEST_M(log_reg_spmd_test,
                      "LogReg common flow - no fit intercept",
                      "[lr][spmd]",
-                     log_reg_types) {
+                     log_reg_spmd_types) {
     SKIP_IF(this->get_policy().is_cpu());
     SKIP_IF(this->not_float64_friendly());
 
diff --git a/cpp/oneapi/dal/algo/logistic_regression/test/spmd_fixture.hpp b/cpp/oneapi/dal/algo/logistic_regression/test/spmd_fixture.hpp
index 120d4258750..89b7859b7cd 100644
--- a/cpp/oneapi/dal/algo/logistic_regression/test/spmd_fixture.hpp
+++ b/cpp/oneapi/dal/algo/logistic_regression/test/spmd_fixture.hpp
@@ -37,7 +37,9 @@ class log_reg_spmd_test : public log_reg_test<TestType, log_reg_spmd_test<TestTy
         return results[0];
     }
 
-    void gen_dimensions(std::int64_t n = -1, std::int64_t p = -1) override {
+    void gen_dimensions(std::int64_t n = -1,
+                        std::int64_t p = -1,
+                        double train_size_coef = 0.7) override {
         if (n == -1 || p == -1) {
             this->n_ = GENERATE(50, 99);
             this->p_ = GENERATE(3, 10);
@@ -46,6 +48,8 @@ class log_reg_spmd_test : public log_reg_test<TestType, log_reg_spmd_test<TestTy
             this->n_ = n;
             this->p_ = p;
         }
+        this->train_size_ = (this->n_ * train_size_coef);
+        this->test_size_ = this->n_ - this->train_size_;
     }
 
     template <typename... Args>
@@ -82,4 +86,8 @@ class log_reg_spmd_test : public log_reg_test<TestType, log_reg_spmd_test<TestTy
     std::int64_t rank_count_;
 };
 
+using log_reg_spmd_types = COMBINE_TYPES((float, double),
+                                         (logistic_regression::method::dense_batch),
+                                         (logistic_regression::task::classification));
+
 } // namespace oneapi::dal::logistic_regression::test
diff --git a/cpp/oneapi/dal/detail/error_messages.cpp b/cpp/oneapi/dal/detail/error_messages.cpp
index d05952a154a..a49ce87794e 100644
--- a/cpp/oneapi/dal/detail/error_messages.cpp
+++ b/cpp/oneapi/dal/detail/error_messages.cpp
@@ -310,6 +310,8 @@ MSG(l1_coef_neq_zero,
     "Currently L1 regularization is not supported, so l1_coef should be equal to zero")
 MSG(log_reg_dense_batch_method_is_not_implemented_for_cpu,
     "LogisticRegression is not implemented for CPU")
+MSG(log_reg_sparse_method_is_not_implemented_for_cpu,
+    "LogisticRegression does not have sparsity support for CPU")
 MSG(unknown_optimizer, "Custom optimizers are not supported, use on of provided by the library")
 
 /* Decision Forest */
diff --git a/cpp/oneapi/dal/detail/error_messages.hpp b/cpp/oneapi/dal/detail/error_messages.hpp
index 43cda2f7977..ec4452f65a9 100644
--- a/cpp/oneapi/dal/detail/error_messages.hpp
+++ b/cpp/oneapi/dal/detail/error_messages.hpp
@@ -247,6 +247,7 @@ class ONEDAL_EXPORT error_messages {
     MSG(inverse_regularization_leq_zero);
     MSG(l1_coef_neq_zero);
     MSG(log_reg_dense_batch_method_is_not_implemented_for_cpu);
+    MSG(log_reg_sparse_method_is_not_implemented_for_cpu);
     MSG(unknown_optimizer);
 
     /* Louvain */
diff --git a/docs/source/onedal/algorithms/logistic-regression/index.rst b/docs/source/onedal/algorithms/logistic-regression/index.rst
index fced9085d39..7c6dff57a3d 100644
--- a/docs/source/onedal/algorithms/logistic-regression/index.rst
+++ b/docs/source/onedal/algorithms/logistic-regression/index.rst
@@ -60,6 +60,14 @@ During training, the data is divided into batches, and the gradients from each b
 
 Refer to :ref:`Mathematical formulation: Newton-CG <newton_cg_c_math>`.
 
+.. _logreg_t_math_sparse:
+
+Training Method: *sparse*
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Using this method you can train Logistic Regression model on sparse data. All you need is to provide matrix with feature vectors as 
+sparse table. Find more info about sparse tables here :ref:`Compressed Sparse Rows (CSR) Table: <csr_table>`.
+
 .. _logreg_i_math:
 
 Inference

From b992d6c84c7950892d729183cf6fcee2b202021e Mon Sep 17 00:00:00 2001
From: Ian Faust <icfaust@gmail.com>
Date: Wed, 8 May 2024 16:03:44 +0200
Subject: [PATCH 12/65] attempt at fixing bazel test issues (#2769)

---
 .../primitives/reduction/test/reduction_dpc.cpp  | 10 ++++++----
 .../reduction/test/reduction_rm_random_dpc.cpp   | 16 ++++++++--------
 2 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/cpp/oneapi/dal/backend/primitives/reduction/test/reduction_dpc.cpp b/cpp/oneapi/dal/backend/primitives/reduction/test/reduction_dpc.cpp
index da3def6c6b3..cc0c26ceee0 100644
--- a/cpp/oneapi/dal/backend/primitives/reduction/test/reduction_dpc.cpp
+++ b/cpp/oneapi/dal/backend/primitives/reduction/test/reduction_dpc.cpp
@@ -307,8 +307,8 @@ class infinite_sum_test_random : public reduction_test_random<Param> {
     }
 
     void generate_input(bool maxval) {
-        double mininp = 0.9 * (double)maxval * std::numeric_limits<double>::max() - 1.0f;
-        double maxinp = (double)maxval * std::numeric_limits<double>::max();
+        float mininp = 0.9 * (float)maxval * std::numeric_limits<float>::max() - 1.0f;
+        float maxinp = (float)maxval * std::numeric_limits<float>::max();
         const auto train_dataframe = GENERATE_DATAFRAME(
             te::dataframe_builder{ this->height_, this->width_ }.fill_uniform(mininp, maxinp));
         this->input_table_ = train_dataframe.get_table(this->get_homogen_table_id());
@@ -319,7 +319,8 @@ TEMPLATE_LIST_TEST_M(infinite_sum_test_random,
                      "Randomly filled reduction with infinte sum",
                      "[reduction][rm][small]",
                      finiteness_types) {
-    SKIP_IF(this->not_float64_friendly());
+    // Temporary workaround: skip tests on architectures that do not support native float64
+    SKIP_IF(!this->get_policy().has_native_float64());
     const bool use_infnan = GENERATE(0, 1);
     this->generate(use_infnan);
     SECTION("Reduce Row-Major by Rows") {
@@ -368,7 +369,8 @@ TEMPLATE_LIST_TEST_M(single_infinite_test_random,
                      "Randomly filled reduction with single inf or nan",
                      "[reduction][rm][small]",
                      finiteness_types) {
-    SKIP_IF(this->not_float64_friendly());
+    // Temporary workaround: skip tests on architectures that do not support native float64
+    SKIP_IF(!this->get_policy().has_native_float64());
     const bool use_infnan = GENERATE(0, 1);
     this->generate(use_infnan);
     SECTION("Reduce Row-Major by Rows") {
diff --git a/cpp/oneapi/dal/backend/primitives/reduction/test/reduction_rm_random_dpc.cpp b/cpp/oneapi/dal/backend/primitives/reduction/test/reduction_rm_random_dpc.cpp
index b443a284eac..f6e719d8fdb 100644
--- a/cpp/oneapi/dal/backend/primitives/reduction/test/reduction_rm_random_dpc.cpp
+++ b/cpp/oneapi/dal/backend/primitives/reduction/test/reduction_rm_random_dpc.cpp
@@ -312,8 +312,8 @@ TEMPLATE_LIST_TEST_M(infinite_sum_rm_test_random,
                      "Randomly filled Row-Major Row-Wise reduction with infinte sum",
                      "[reduction][rm][small]",
                      finiteness_types) {
-    SKIP_IF(this->not_float64_friendly());
-
+    // Temporary workaround: skip tests on architectures that do not support native float64
+    SKIP_IF(!this->get_policy().has_native_float64());
     const bool use_infnan = GENERATE(0, 1);
     this->generate(use_infnan);
     SKIP_IF(this->should_be_skipped());
@@ -326,8 +326,8 @@ TEMPLATE_LIST_TEST_M(infinite_sum_rm_test_random,
                      "Randomly filled Row-Major Col-Wise reduction with infinte sum",
                      "[reduction][rm][small]",
                      finiteness_types) {
-    SKIP_IF(this->not_float64_friendly());
-
+    // Temporary workaround: skip tests on architectures that do not support native float64
+    SKIP_IF(!this->get_policy().has_native_float64());
     const bool use_infnan = GENERATE(0, 1);
     this->generate(use_infnan);
     SKIP_IF(this->should_be_skipped());
@@ -368,8 +368,8 @@ TEMPLATE_LIST_TEST_M(single_infinite_rm_test_random,
                      "Randomly filled Row-Major Row-Wise reduction with single inf or nan",
                      "[reduction][rm][small]",
                      finiteness_types) {
-    SKIP_IF(this->not_float64_friendly());
-
+    // Temporary workaround: skip tests on architectures that do not support native float64
+    SKIP_IF(!this->get_policy().has_native_float64());
     const bool use_infnan = GENERATE(0, 1);
     this->generate(use_infnan);
     SKIP_IF(this->should_be_skipped());
@@ -382,8 +382,8 @@ TEMPLATE_LIST_TEST_M(single_infinite_rm_test_random,
                      "Randomly filled Row-Major Col-Wise reduction with single inf or nan",
                      "[reduction][rm][small]",
                      finiteness_types) {
-    SKIP_IF(this->not_float64_friendly());
-
+    // Temporary workaround: skip tests on architectures that do not support native float64
+    SKIP_IF(!this->get_policy().has_native_float64());
     const bool use_infnan = GENERATE(0, 1);
     this->generate(use_infnan);
     SKIP_IF(this->should_be_skipped());

From bcefbe66a0c28d74def4f5c5e7b30d2e839b8d6b Mon Sep 17 00:00:00 2001
From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com>
Date: Wed, 8 May 2024 15:47:11 +0100
Subject: [PATCH 13/65] chore(deps): update dependency catch2 to v3.6.0 (#2764)

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
---
 WORKSPACE | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/WORKSPACE b/WORKSPACE
index 48cfa890abe..7020d6b5bae 100644
--- a/WORKSPACE
+++ b/WORKSPACE
@@ -108,9 +108,9 @@ onedal_repo(
 
 http_archive(
     name = "catch2",
-    url = "https://github.com/catchorg/Catch2/archive/v3.5.4.tar.gz",
-    sha256 = "b7754b711242c167d8f60b890695347f90a1ebc95949a045385114165d606dbb",
-    strip_prefix = "Catch2-3.5.4",
+    url = "https://github.com/catchorg/Catch2/archive/v3.6.0.tar.gz",
+    sha256 = "485932259a75c7c6b72d4b874242c489ea5155d17efa345eb8cc72159f49f356",
+    strip_prefix = "Catch2-3.6.0",
 )
 
 http_archive(

From 3d9e1f9243648bde74873811c1a85f7c109a62fb Mon Sep 17 00:00:00 2001
From: Ian Faust <icfaust@gmail.com>
Date: Fri, 10 May 2024 07:22:13 +0200
Subject: [PATCH 14/65] Update openssf-scorecard.yml (#2773)

---
 .github/workflows/openssf-scorecard.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/openssf-scorecard.yml b/.github/workflows/openssf-scorecard.yml
index b02da1d0610..ed3e7cbd046 100644
--- a/.github/workflows/openssf-scorecard.yml
+++ b/.github/workflows/openssf-scorecard.yml
@@ -16,6 +16,7 @@ permissions: read-all
 jobs:
   analysis:
     name: Scorecard analysis
+    if: github.repository == 'oneapi-src/oneDAL'
     runs-on: ubuntu-latest
     permissions:
       # Needed to upload the results to code-scanning dashboard.

From ea49c384161303cf7510681503dfa079bd9a401a Mon Sep 17 00:00:00 2001
From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com>
Date: Fri, 10 May 2024 11:36:07 +0100
Subject: [PATCH 15/65] chore(deps): update ossf/scorecard-action action to
 v2.3.3 (#2774)

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
---
 .github/workflows/openssf-scorecard.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/openssf-scorecard.yml b/.github/workflows/openssf-scorecard.yml
index ed3e7cbd046..75b88c49be9 100644
--- a/.github/workflows/openssf-scorecard.yml
+++ b/.github/workflows/openssf-scorecard.yml
@@ -31,7 +31,7 @@ jobs:
           persist-credentials: false
 
       - name: "Run analysis"
-        uses: ossf/scorecard-action@0864cf19026789058feabb7e87baa5f140aac736 # v2.3.1
+        uses: ossf/scorecard-action@dc50aa9510b46c811795eb24b2f1ba02a914e534 # v2.3.3
         with:
           results_file: results.sarif
           results_format: sarif

From 940621298804aa5e2220b0457b2e793250e23cd1 Mon Sep 17 00:00:00 2001
From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com>
Date: Fri, 10 May 2024 11:43:42 +0100
Subject: [PATCH 16/65] chore(deps): update actions/checkout action to v4.1.5
 (#2761)

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
---
 .github/workflows/openssf-scorecard.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/openssf-scorecard.yml b/.github/workflows/openssf-scorecard.yml
index 75b88c49be9..d640c74b50f 100644
--- a/.github/workflows/openssf-scorecard.yml
+++ b/.github/workflows/openssf-scorecard.yml
@@ -26,7 +26,7 @@ jobs:
 
     steps:
       - name: "Checkout code"
-        uses: actions/checkout@1d96c772d19495a3b5c517cd2bc0cb401ea0529f # v4.1.3
+        uses: actions/checkout@44c2b7a8a4ea60a981eaca3cf939b5f4305c123b # v4.1.5
         with:
           persist-credentials: false
 

From b44775071094330fc6d744cb31bb54fcc66fbb2a Mon Sep 17 00:00:00 2001
From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com>
Date: Fri, 10 May 2024 11:44:50 +0100
Subject: [PATCH 17/65] chore(deps): update actions/checkout digest to 0ad4b8f
 (#2771)

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
---
 .github/workflows/docker-validation-ci.yml      | 2 +-
 .github/workflows/docker-validation-nightly.yml | 2 +-
 .github/workflows/renovate-validation.yml       | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/docker-validation-ci.yml b/.github/workflows/docker-validation-ci.yml
index ce4c16c8bbc..ab4455e51d0 100644
--- a/.github/workflows/docker-validation-ci.yml
+++ b/.github/workflows/docker-validation-ci.yml
@@ -17,6 +17,6 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: Checkout
-        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4
+        uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4
       - name: Build docker image
         run: docker build . --file dev/docker/onedal-dev.Dockerfile --tag onedal-dev:latest
diff --git a/.github/workflows/docker-validation-nightly.yml b/.github/workflows/docker-validation-nightly.yml
index ecaab5ed10f..8794f9963b9 100644
--- a/.github/workflows/docker-validation-nightly.yml
+++ b/.github/workflows/docker-validation-nightly.yml
@@ -30,7 +30,7 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: Checkout
-        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4
+        uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4
       - name: Build docker image
         run: docker build . --file dev/docker/onedal-dev.Dockerfile --tag onedal-dev:latest
       - name: Building oneDAL
diff --git a/.github/workflows/renovate-validation.yml b/.github/workflows/renovate-validation.yml
index 3241cb94564..41b5666c90d 100644
--- a/.github/workflows/renovate-validation.yml
+++ b/.github/workflows/renovate-validation.yml
@@ -23,7 +23,7 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: Checkout
-        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4
+        uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4
       - name: Validate
         uses: suzuki-shunsuke/github-action-renovate-config-validator@v1.0.1
         with:

From f7f2754adf3cf450047c569c0fa1df4481f4c4cf Mon Sep 17 00:00:00 2001
From: Aleksei Khomenko <aleksei.khomenko@intel.com>
Date: Thu, 16 May 2024 18:42:19 +0200
Subject: [PATCH 18/65] docs: update `README.md` with UXL identity (#2784)

---
 README.md | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 25de32bd284..8cd4e957622 100644
--- a/README.md
+++ b/README.md
@@ -15,7 +15,7 @@
 * limitations under the License.
 *******************************************************************************/-->
 
-# oneAPI Data Analytics Library <!-- omit in toc --> <img align="right" width="100" height="100" src="https://spec.oneapi.io/oneapi-logo-white-scaled.jpg">
+# oneAPI Data Analytics Library <!-- omit in toc --> <img align="right" width="200" height="100" src="https://raw.githubusercontent.com/uxlfoundation/artwork/e98f1a7a3d305c582d02c5f532e41487b710d470/foundation/uxl-foundation-logo-horizontal-color.svg">
 
 [Installation](#installation)&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;[Documentation](#documentation)&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;[Support](#support)&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;[Examples](#examples)&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;[How to Contribute](CONTRIBUTING.md)&nbsp;&nbsp;&nbsp;
 
@@ -25,7 +25,7 @@ oneAPI Data Analytics Library (oneDAL) is a powerful machine learning library th
 
 The library implements classical machine learning algorithms. The boost in their performance is achieved by leveraging the capabilities of Intel&reg; hardware.
 
-oneDAL is part of [oneAPI](https://oneapi.io). The current branch implements version 1.1 of oneAPI Specification.
+The oneDAL is part of the [UXL Foundation](http://www.uxlfoundation.org) and is an implementation of the [oneAPI specification](https://spec.oneapi.io) for oneDAL component.
 
 ## Usage
 
@@ -93,6 +93,15 @@ oneDAL K-Means fit, strong scaling result | oneDAL K-Means fit, weak scaling res
 
 >*Technical details: FPType: float32; HW: Intel Xeon Processor E5-2698 v3 @2.3GHz, 2 sockets, 16 cores per socket; SW: Intel® DAAL (2019.3), MPI4Py (3.0.0), Intel® Distribution Of Python (IDP) 3.6.8; Details available in the article https://arxiv.org/abs/1909.11822*
 
+## Governance
+
+The oneDAL project is governed by the UXL Foundation and you can get involved in this project in multiple ways. It is possible to join the [AI Special Interest Group (SIG)](https://github.com/uxlfoundation/foundation/tree/main/ai) meetings where the group discuss and demonstrates work using this project. Members can also join the Open Source and Specification Working Group meetings.
+
+You can also join the mailing lists for the [UXL Foundation](https://lists.uxlfoundation.org/g/main/subgroups) to be informed of when meetings are happening and receive the latest information and discussions.
+
+You can contribute to this project and also contribute to the specification for this project, read the [CONTRIBUTING](CONTRIBUTING.md) page for more information.
+
+
 ## Support
 
 Ask questions and engage in discussions with oneDAL developers, contributers, and other users through the following channels:

From fb35edcab66829769b31951e03ca411659983fec Mon Sep 17 00:00:00 2001
From: Robert Cohn <rscohn2@gmail.com>
Date: Thu, 16 May 2024 21:30:37 -0400
Subject: [PATCH 19/65] slack notification when PR is labelled RFC (#2778)

* slack notification when PR is labelled RFC

Signed-off-by: Robert Cohn <robert.s.cohn@intel.com>

* chore(workflows): add slack notification for RFCs

---------

Signed-off-by: Robert Cohn <robert.s.cohn@intel.com>
Co-authored-by: homksei <aleksei.khomenko@intel.com>
---
 .github/workflows/slack-pr-notification.yml | 27 +++++++++++++++++++++
 1 file changed, 27 insertions(+)
 create mode 100644 .github/workflows/slack-pr-notification.yml

diff --git a/.github/workflows/slack-pr-notification.yml b/.github/workflows/slack-pr-notification.yml
new file mode 100644
index 00000000000..09e1c854ada
--- /dev/null
+++ b/.github/workflows/slack-pr-notification.yml
@@ -0,0 +1,27 @@
+name: Slack PR Notification
+on:
+  # use pull_request_target to run on PRs from forks and have access to secrets
+  pull_request_target:
+    types: [labeled]
+
+env:
+  SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
+  channel: "onedal"
+
+permissions:
+  pull-requests: read
+
+jobs:
+  rfc:
+    name: RFC Notification
+    runs-on: ubuntu-latest
+    # Trigger when labeling a PR with "RFC"
+    if: |
+      github.event.action == 'labeled' &&
+      contains(toJson(github.event.pull_request.labels.*.name), '"RFC"')
+    steps:
+    - name: Notify Slack
+      uses: slackapi/slack-github-action@70cd7be8e40a46e8b0eced40b0de447bdb42f68e # v1.26.0
+      with:
+        channel-id: ${{ env.channel }}
+        slack-message: "${{ github.actor }} posted a RFC: ${{ github.event.pull_request.title }}. URL: ${{ github.event.pull_request.html_url }}"

From 5c3a22c37804ed80065d056211d5eb5c59b33b9c Mon Sep 17 00:00:00 2001
From: Victoriya Fedotova <victoriya.s.fedotova@intel.com>
Date: Fri, 17 May 2024 10:04:40 +0200
Subject: [PATCH 20/65] Add system related parameters to performance related
 algorithms' parameters (#2724)

* 'system_parameters' class holding system-related performance parameters implemented.

* all the performance-related hyperparameters classes like 'covariance::detail::compute_parameters' are now derived from system_parameters class because in the future those system-related parameters might be chosen on per-algorithm basis.
---
 cpp/oneapi/dal/BUILD                          |  3 +
 cpp/oneapi/dal/algo/covariance/BUILD          |  2 +
 .../dal/algo/covariance/compute_types.hpp     |  3 +-
 .../covariance/test/compute_parameters.cpp    | 25 +++++
 cpp/oneapi/dal/algo/decision_forest/BUILD     |  2 +
 .../dal/algo/decision_forest/infer_types.hpp  |  3 +-
 cpp/oneapi/dal/algo/linear_regression/BUILD   |  2 +
 .../algo/linear_regression/train_types.hpp    |  3 +-
 cpp/oneapi/dal/detail/cpu.cpp                 |  6 +-
 cpp/oneapi/dal/detail/cpu.hpp                 |  1 +
 cpp/oneapi/dal/detail/cpu_info.cpp            | 12 +--
 cpp/oneapi/dal/detail/cpu_info_impl.cpp       | 15 +--
 cpp/oneapi/dal/detail/cpu_info_impl.hpp       |  4 +-
 cpp/oneapi/dal/detail/parameters/BUILD        | 15 +++
 .../detail/parameters/system_parameters.cpp   | 52 +++++++++++
 .../detail/parameters/system_parameters.hpp   | 70 ++++++++++++++
 .../parameters/system_parameters_impl.cpp     | 93 +++++++++++++++++++
 .../parameters/system_parameters_impl.hpp     | 56 +++++++++++
 makefile                                      | 24 ++---
 19 files changed, 361 insertions(+), 30 deletions(-)
 create mode 100644 cpp/oneapi/dal/detail/parameters/BUILD
 create mode 100644 cpp/oneapi/dal/detail/parameters/system_parameters.cpp
 create mode 100644 cpp/oneapi/dal/detail/parameters/system_parameters.hpp
 create mode 100644 cpp/oneapi/dal/detail/parameters/system_parameters_impl.cpp
 create mode 100644 cpp/oneapi/dal/detail/parameters/system_parameters_impl.hpp

diff --git a/cpp/oneapi/dal/BUILD b/cpp/oneapi/dal/BUILD
index 0bd5a48269d..ff6d770cc7c 100644
--- a/cpp/oneapi/dal/BUILD
+++ b/cpp/oneapi/dal/BUILD
@@ -65,6 +65,7 @@ dal_public_includes(
         ":optional",
         "@onedal//cpp/oneapi/dal/detail/mpi",
         "@onedal//cpp/oneapi/dal/detail/ccl",
+        "@onedal//cpp/oneapi/dal/detail/parameters",
         "@onedal//cpp/oneapi/dal/algo:parameters",
     ],
 )
@@ -84,6 +85,7 @@ dal_static_lib(
     dal_deps = [
         ":static",
         "@onedal//cpp/oneapi/dal/algo:parameters",
+        "@onedal//cpp/oneapi/dal/detail/parameters",
     ],
 )
 
@@ -102,6 +104,7 @@ dal_dynamic_lib(
     dal_deps = [
         ":dynamic",
         "@onedal//cpp/oneapi/dal/algo:parameters",
+        "@onedal//cpp/oneapi/dal/detail/parameters",
     ],
 )
 
diff --git a/cpp/oneapi/dal/algo/covariance/BUILD b/cpp/oneapi/dal/algo/covariance/BUILD
index 2770e6fc9aa..3a5f16a283e 100644
--- a/cpp/oneapi/dal/algo/covariance/BUILD
+++ b/cpp/oneapi/dal/algo/covariance/BUILD
@@ -9,6 +9,7 @@ dal_module(
     auto = True,
     dal_deps = [
         "@onedal//cpp/oneapi/dal:core",
+        "@onedal//cpp/oneapi/dal/detail/parameters",
     ],
 )
 
@@ -16,6 +17,7 @@ dal_module(
     name = "parameters",
     dal_deps = [
         "@onedal//cpp/oneapi/dal/algo/covariance/parameters",
+        "@onedal//cpp/oneapi/dal/detail/parameters",
     ],
 )
 
diff --git a/cpp/oneapi/dal/algo/covariance/compute_types.hpp b/cpp/oneapi/dal/algo/covariance/compute_types.hpp
index 5800272ffaf..33f7e3d73e0 100644
--- a/cpp/oneapi/dal/algo/covariance/compute_types.hpp
+++ b/cpp/oneapi/dal/algo/covariance/compute_types.hpp
@@ -17,6 +17,7 @@
 #pragma once
 
 #include "oneapi/dal/algo/covariance/common.hpp"
+#include "oneapi/dal/detail/parameters/system_parameters.hpp"
 
 namespace oneapi::dal::covariance {
 
@@ -35,7 +36,7 @@ template <typename Task>
 class partial_compute_result_impl;
 
 template <typename Task = task::by_default>
-class compute_parameters : public base {
+class compute_parameters : public dal::detail::system_parameters {
 public:
     explicit compute_parameters();
     compute_parameters(compute_parameters&&) = default;
diff --git a/cpp/oneapi/dal/algo/covariance/test/compute_parameters.cpp b/cpp/oneapi/dal/algo/covariance/test/compute_parameters.cpp
index 4c9a13d0baf..6563b7f26c4 100644
--- a/cpp/oneapi/dal/algo/covariance/test/compute_parameters.cpp
+++ b/cpp/oneapi/dal/algo/covariance/test/compute_parameters.cpp
@@ -88,4 +88,29 @@ TEMPLATE_LIST_TEST_M(covariance_params_test,
     this->general_checks(input, input_data_table_id);
 }
 
+TEST("can dump system-related parameters") {
+    detail::compute_parameters hp{};
+    std::string hp_dump;
+#ifdef ONEDAL_DATA_PARALLEL
+    DECLARE_TEST_POLICY(policy);
+    auto& q = policy.get_queue();
+    hp_dump = hp.dump(q);
+#else
+    hp_dump = hp.dump();
+#endif
+    std::cout << "System-related parameters: " << hp_dump << std::endl;
+    REQUIRE(hp_dump.size() > 0);
+}
+
+TEST("can retrieve system-related parameters") {
+    detail::compute_parameters hp{};
+    REQUIRE(static_cast<uint64_t>(hp.get_top_enabled_cpu_extension()) >= 0);
+    REQUIRE(hp.get_max_number_of_threads() > 0);
+#ifdef ONEDAL_DATA_PARALLEL
+    DECLARE_TEST_POLICY(policy);
+    auto& q = policy.get_queue();
+    REQUIRE(hp.get_max_workgroup_size(q) > 0);
+#endif
+}
+
 } // namespace oneapi::dal::covariance::test
diff --git a/cpp/oneapi/dal/algo/decision_forest/BUILD b/cpp/oneapi/dal/algo/decision_forest/BUILD
index 159f9351ef9..0a47587af06 100644
--- a/cpp/oneapi/dal/algo/decision_forest/BUILD
+++ b/cpp/oneapi/dal/algo/decision_forest/BUILD
@@ -10,6 +10,7 @@ dal_module(
     dal_deps = [
         "@onedal//cpp/oneapi/dal:core",
         "@onedal//cpp/oneapi/dal/algo/decision_forest/backend:model_impl",
+        "@onedal//cpp/oneapi/dal/detail/parameters",
     ],
 )
 
@@ -17,6 +18,7 @@ dal_module(
     name = "parameters",
     dal_deps = [
         "@onedal//cpp/oneapi/dal/algo/decision_forest/parameters",
+        "@onedal//cpp/oneapi/dal/detail/parameters",
     ],
 )
 
diff --git a/cpp/oneapi/dal/algo/decision_forest/infer_types.hpp b/cpp/oneapi/dal/algo/decision_forest/infer_types.hpp
index fcb45b1dd45..ab962b4831d 100644
--- a/cpp/oneapi/dal/algo/decision_forest/infer_types.hpp
+++ b/cpp/oneapi/dal/algo/decision_forest/infer_types.hpp
@@ -18,6 +18,7 @@
 #pragma once
 
 #include "oneapi/dal/algo/decision_forest/common.hpp"
+#include "oneapi/dal/detail/parameters/system_parameters.hpp"
 
 namespace oneapi::dal::decision_forest {
 
@@ -33,7 +34,7 @@ template <typename Task>
 struct infer_parameters_impl;
 
 template <typename Task = task::by_default>
-class infer_parameters : public base {
+class infer_parameters : public dal::detail::system_parameters {
 public:
     explicit infer_parameters();
     infer_parameters(infer_parameters&&) = default;
diff --git a/cpp/oneapi/dal/algo/linear_regression/BUILD b/cpp/oneapi/dal/algo/linear_regression/BUILD
index e6ab83d5f19..58a3ebf1ea4 100644
--- a/cpp/oneapi/dal/algo/linear_regression/BUILD
+++ b/cpp/oneapi/dal/algo/linear_regression/BUILD
@@ -10,6 +10,7 @@ dal_module(
     dal_deps = [
         "@onedal//cpp/oneapi/dal:core",
         "@onedal//cpp/oneapi/dal/algo/linear_regression/backend:model_impl",
+        "@onedal//cpp/oneapi/dal/detail/parameters",
     ],
 )
 
@@ -17,6 +18,7 @@ dal_module(
     name = "parameters",
     dal_deps = [
         "@onedal//cpp/oneapi/dal/algo/linear_regression/parameters",
+        "@onedal//cpp/oneapi/dal/detail/parameters",
     ],
 )
 
diff --git a/cpp/oneapi/dal/algo/linear_regression/train_types.hpp b/cpp/oneapi/dal/algo/linear_regression/train_types.hpp
index d0b748333c3..cc96db9ccfb 100644
--- a/cpp/oneapi/dal/algo/linear_regression/train_types.hpp
+++ b/cpp/oneapi/dal/algo/linear_regression/train_types.hpp
@@ -17,6 +17,7 @@
 #pragma once
 
 #include "oneapi/dal/algo/linear_regression/common.hpp"
+#include "oneapi/dal/detail/parameters/system_parameters.hpp"
 
 namespace oneapi::dal::linear_regression {
 
@@ -35,7 +36,7 @@ template <typename Task>
 class partial_train_result_impl;
 
 template <typename Task = task::by_default>
-class train_parameters : public base {
+class train_parameters : public dal::detail::system_parameters {
 public:
     explicit train_parameters();
     train_parameters(train_parameters&&) = default;
diff --git a/cpp/oneapi/dal/detail/cpu.cpp b/cpp/oneapi/dal/detail/cpu.cpp
index 8aa1b3ce531..1369da2d231 100644
--- a/cpp/oneapi/dal/detail/cpu.cpp
+++ b/cpp/oneapi/dal/detail/cpu.cpp
@@ -20,7 +20,8 @@
 namespace oneapi::dal::detail {
 namespace v1 {
 
-inline constexpr cpu_extension from_daal_cpu_type(daal::CpuType cpu) {
+cpu_extension from_daal_cpu_type(int cpu_type) {
+    daal::CpuType cpu = static_cast<daal::CpuType>(cpu_type);
     switch (cpu) {
 #if defined(TARGET_X86_64)
         case daal::sse2: return cpu_extension::sse2;
@@ -35,6 +36,7 @@ inline constexpr cpu_extension from_daal_cpu_type(daal::CpuType cpu) {
     }
     return cpu_extension::none;
 }
+
 cpu_extension detect_top_cpu_extension() {
     if (!__daal_serv_cpu_extensions_available()) {
 #if defined(TARGET_X86_64)
@@ -45,7 +47,7 @@ cpu_extension detect_top_cpu_extension() {
         return detail::cpu_extension::rv64;
 #endif
     }
-    const auto daal_cpu = (daal::CpuType)__daal_serv_cpu_detect(0);
+    const auto daal_cpu = __daal_serv_cpu_detect(0);
 
     return from_daal_cpu_type(daal_cpu);
 }
diff --git a/cpp/oneapi/dal/detail/cpu.hpp b/cpp/oneapi/dal/detail/cpu.hpp
index e2bae4a3566..7b6e282e006 100644
--- a/cpp/oneapi/dal/detail/cpu.hpp
+++ b/cpp/oneapi/dal/detail/cpu.hpp
@@ -50,6 +50,7 @@ enum class cpu_extension : uint64_t {
 #endif
 };
 
+cpu_extension from_daal_cpu_type(int);
 cpu_extension detect_top_cpu_extension();
 
 } // namespace v1
diff --git a/cpp/oneapi/dal/detail/cpu_info.cpp b/cpp/oneapi/dal/detail/cpu_info.cpp
index 67ae32e9cac..78e879e1920 100644
--- a/cpp/oneapi/dal/detail/cpu_info.cpp
+++ b/cpp/oneapi/dal/detail/cpu_info.cpp
@@ -31,21 +31,21 @@ namespace v1 {
 
 cpu_info::cpu_info() {
 #if defined(TARGET_X86_64)
-    impl_ = detail::pimpl<cpu_info_iface>(new cpu_info_x86());
+    impl_ = detail::pimpl<cpu_info_iface>(std::make_unique<cpu_info_x86>());
 #elif defined(TARGET_ARM)
-    impl_ = detail::pimpl<cpu_info_iface>(new cpu_info_arm());
+    impl_ = detail::pimpl<cpu_info_iface>(std::make_unique<cpu_info_arm>());
 #elif defined(TARGET_RISCV64)
-    impl_ = detail::pimpl<cpu_info_iface>(new cpu_info_riscv64());
+    impl_ = detail::pimpl<cpu_info_iface>(std::make_unique<cpu_info_riscv64>());
 #endif
 }
 
 cpu_info::cpu_info(const cpu_extension cpu_extension_) {
 #if defined(TARGET_X86_64)
-    impl_ = detail::pimpl<cpu_info_iface>(new cpu_info_x86(cpu_extension_));
+    impl_ = detail::pimpl<cpu_info_iface>(std::make_unique<cpu_info_x86>(cpu_extension_));
 #elif defined(TARGET_ARM)
-    impl_ = detail::pimpl<cpu_info_iface>(new cpu_info_arm(cpu_extension_));
+    impl_ = detail::pimpl<cpu_info_iface>(std::make_unique<cpu_info_arm>(cpu_extension_));
 #elif defined(TARGET_RISCV64)
-    impl_ = detail::pimpl<cpu_info_iface>(new cpu_info_riscv64(cpu_extension_));
+    impl_ = detail::pimpl<cpu_info_iface>(std::make_unique<cpu_info_riscv64>(cpu_extension_));
 #endif
 }
 
diff --git a/cpp/oneapi/dal/detail/cpu_info_impl.cpp b/cpp/oneapi/dal/detail/cpu_info_impl.cpp
index e15df79169f..8af80578a0c 100644
--- a/cpp/oneapi/dal/detail/cpu_info_impl.cpp
+++ b/cpp/oneapi/dal/detail/cpu_info_impl.cpp
@@ -69,22 +69,22 @@ cpu_extension cpu_info_impl::get_top_cpu_extension() const {
 }
 
 std::string cpu_info_impl::dump() const {
-    std::stringstream ss;
-    for (auto it = info_.begin(); it != info_.end(); ++it) {
-        ss << it->first << " : ";
-        print_any(it->second, ss);
+    std::ostringstream ss;
+    for (auto const& [name, value] : info_) {
+        ss << name << " : ";
+        print_any(value, ss);
         ss << "; ";
     }
     return std::move(ss).str();
 }
 
 template <typename T>
-void cpu_info_impl::print(const std::any& value, std::stringstream& ss) const {
+void cpu_info_impl::print(const std::any& value, std::ostringstream& ss) const {
     T typed_value = std::any_cast<T>(value);
     ss << to_string(typed_value);
 }
 
-void cpu_info_impl::print_any(const std::any& value, std::stringstream& ss) const {
+void cpu_info_impl::print_any(const std::any& value, std::ostringstream& ss) const {
     const std::type_info& ti = value.type();
     if (ti == typeid(cpu_extension)) {
         print<cpu_extension>(value, ss);
@@ -92,6 +92,9 @@ void cpu_info_impl::print_any(const std::any& value, std::stringstream& ss) cons
     else if (ti == typeid(cpu_vendor)) {
         print<cpu_vendor>(value, ss);
     }
+    else {
+        throw unimplemented{ dal::detail::error_messages::unsupported_data_type() };
+    }
 }
 
 } // namespace v1
diff --git a/cpp/oneapi/dal/detail/cpu_info_impl.hpp b/cpp/oneapi/dal/detail/cpu_info_impl.hpp
index 2f5c7ea711f..76b395b316c 100644
--- a/cpp/oneapi/dal/detail/cpu_info_impl.hpp
+++ b/cpp/oneapi/dal/detail/cpu_info_impl.hpp
@@ -40,9 +40,9 @@ class cpu_info_impl : public cpu_info_iface {
     std::map<std::string, std::any> info_;
 
     template <typename T>
-    void print(const std::any& value, std::stringstream& ss) const;
+    void print(const std::any& value, std::ostringstream& ss) const;
 
-    void print_any(const std::any& value, std::stringstream& ss) const;
+    void print_any(const std::any& value, std::ostringstream& ss) const;
 };
 
 } // namespace v1
diff --git a/cpp/oneapi/dal/detail/parameters/BUILD b/cpp/oneapi/dal/detail/parameters/BUILD
new file mode 100644
index 00000000000..e0d6c7373ca
--- /dev/null
+++ b/cpp/oneapi/dal/detail/parameters/BUILD
@@ -0,0 +1,15 @@
+load(
+    "@onedal//dev/bazel:dal.bzl",
+    "dal_module",
+    "dal_test_suite",
+)
+
+package(default_visibility = ["//visibility:public"])
+
+dal_module(
+    name = "parameters",
+    auto = True,
+    dal_deps = [
+        "@onedal//cpp/oneapi/dal:common",
+    ]
+)
diff --git a/cpp/oneapi/dal/detail/parameters/system_parameters.cpp b/cpp/oneapi/dal/detail/parameters/system_parameters.cpp
new file mode 100644
index 00000000000..8216564d939
--- /dev/null
+++ b/cpp/oneapi/dal/detail/parameters/system_parameters.cpp
@@ -0,0 +1,52 @@
+/*******************************************************************************
+* Copyright contributors to the oneDAL project
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#include "oneapi/dal/detail/parameters/system_parameters.hpp"
+
+namespace oneapi::dal {
+
+namespace detail {
+
+system_parameters::system_parameters()
+        : impl_(detail::pimpl<system_parameters_impl>(std::make_unique<system_parameters_impl>())) {
+}
+
+cpu_extension system_parameters::get_top_enabled_cpu_extension() const {
+    return impl_->get_top_enabled_cpu_extension();
+}
+
+std::uint32_t system_parameters::get_max_number_of_threads() const {
+    return impl_->get_max_number_of_threads();
+}
+
+std::string system_parameters::dump() const {
+    return impl_->dump();
+}
+
+#ifdef ONEDAL_DATA_PARALLEL
+
+std::uint32_t system_parameters::get_max_workgroup_size(sycl::queue& queue) const {
+    return impl_->get_max_workgroup_size(queue);
+}
+
+std::string system_parameters::dump(sycl::queue& queue) const {
+    return impl_->dump(queue);
+}
+
+#endif
+
+} // namespace detail
+} // namespace oneapi::dal
diff --git a/cpp/oneapi/dal/detail/parameters/system_parameters.hpp b/cpp/oneapi/dal/detail/parameters/system_parameters.hpp
new file mode 100644
index 00000000000..b173edbfbf7
--- /dev/null
+++ b/cpp/oneapi/dal/detail/parameters/system_parameters.hpp
@@ -0,0 +1,70 @@
+/*******************************************************************************
+* Copyright contributors to the oneDAL project
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#pragma once
+
+#include <string>
+
+#include "oneapi/dal/detail/common.hpp"
+#include "oneapi/dal/detail/parameters/system_parameters_impl.hpp"
+
+namespace oneapi::dal {
+
+namespace detail {
+
+/// Stores system-related parameters that affect the performance of the algorithms.
+/// Those parameters can differ from the `get_global_context().get_cpu_info()`.
+///
+/// `cpu_info` reports the parameters available in hardware, where `system_parameters`
+/// are the software-enabled parameters that can differ from `cpu_info`.
+class system_parameters : public base {
+public:
+    /// Creates a new default `system_parameters` instance.
+    explicit system_parameters();
+
+    /// Host related parameters.
+
+    /// Top enabled CPU instruction set.
+    cpu_extension get_top_enabled_cpu_extension() const;
+
+    /// Maximal number of threads available to the algorithm.
+    std::uint32_t get_max_number_of_threads() const;
+
+#ifdef ONEDAL_DATA_PARALLEL
+    /// Device related parameters.
+
+    /// Maximal SYCL workgroup size on the device.
+    ///
+    /// @param queue                  The SYCL* queue object
+    std::uint32_t get_max_workgroup_size(sycl::queue& queue) const;
+#endif
+
+    /// Logs host parameters in the format: name_1: value_1; ... ; name_N: value_N.
+    std::string dump() const;
+
+#ifdef ONEDAL_DATA_PARALLEL
+    /// Logs host and device parameters in the format: name_1: value_1; ... ; name_N: value_N.
+    ///
+    /// @param queue                  The SYCL* queue object
+    std::string dump(sycl::queue& queue) const;
+#endif
+
+private:
+    detail::pimpl<system_parameters_impl> impl_;
+};
+
+} // namespace detail
+} // namespace oneapi::dal
diff --git a/cpp/oneapi/dal/detail/parameters/system_parameters_impl.cpp b/cpp/oneapi/dal/detail/parameters/system_parameters_impl.cpp
new file mode 100644
index 00000000000..14219a45226
--- /dev/null
+++ b/cpp/oneapi/dal/detail/parameters/system_parameters_impl.cpp
@@ -0,0 +1,93 @@
+/*******************************************************************************
+* Copyright contributors to the oneDAL project
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#include "oneapi/dal/backend/common.hpp"
+#include "oneapi/dal/detail/cpu_info_impl.hpp"
+#include "oneapi/dal/detail/error_messages.hpp"
+#include "oneapi/dal/detail/parameters/system_parameters_impl.hpp"
+#include <daal/src/services/service_defines.h>
+#include <daal/include/services/internal/daal_kernel_defines.h>
+
+#include <sstream>
+
+namespace oneapi::dal::detail {
+namespace v1 {
+
+system_parameters_impl::system_parameters_impl() {
+    using daal::services::Environment;
+    Environment* env = Environment::getInstance();
+    sys_info_["top_enabled_cpu_extension"] =
+        from_daal_cpu_type(DAAL_KERNEL_BUILD_MAX_INSTRUCTION_SET_ID);
+    sys_info_["max_number_of_threads"] = static_cast<std::uint32_t>(env->getNumberOfThreads());
+}
+
+cpu_extension system_parameters_impl::get_top_enabled_cpu_extension() const {
+    const auto entry = sys_info_.find("top_enabled_cpu_extension");
+    if (entry == sys_info_.end()) {
+        throw invalid_argument{ error_messages::invalid_key() };
+    }
+    return std::any_cast<cpu_extension>(entry->second);
+}
+
+std::uint32_t system_parameters_impl::get_max_number_of_threads() const {
+    const auto entry = sys_info_.find("max_number_of_threads");
+    if (entry == sys_info_.end()) {
+        throw invalid_argument{ error_messages::invalid_key() };
+    }
+    return std::any_cast<std::uint32_t>(entry->second);
+}
+
+void system_parameters_impl::print_any(const std::any& value, std::ostringstream& ss) const {
+    const std::type_info& ti = value.type();
+    if (ti == typeid(cpu_extension)) {
+        ss << to_string(std::any_cast<cpu_extension>(value));
+    }
+    else if (ti == typeid(std::uint32_t)) {
+        ss << std::any_cast<std::uint32_t>(value);
+    }
+    else {
+        throw unimplemented{ dal::detail::error_messages::unsupported_data_type() };
+    }
+}
+
+std::string system_parameters_impl::dump() const {
+    std::ostringstream ss;
+    for (auto const& [name, value] : sys_info_) {
+        ss << name << " : ";
+        print_any(value, ss);
+        ss << "; ";
+    }
+    return std::move(ss).str();
+}
+
+#ifdef ONEDAL_DATA_PARALLEL
+
+std::uint32_t system_parameters_impl::get_max_workgroup_size(sycl::queue& queue) const {
+    return dal::backend::device_max_wg_size(queue);
+}
+
+std::string system_parameters_impl::dump(sycl::queue& queue) const {
+    std::ostringstream ss;
+    ss << "max_workgroup_size"
+       << " : " << get_max_workgroup_size(queue) << "; ";
+    ss << dump();
+    return std::move(ss).str();
+}
+
+#endif
+
+} // namespace v1
+} // namespace oneapi::dal::detail
diff --git a/cpp/oneapi/dal/detail/parameters/system_parameters_impl.hpp b/cpp/oneapi/dal/detail/parameters/system_parameters_impl.hpp
new file mode 100644
index 00000000000..57da24d7bc6
--- /dev/null
+++ b/cpp/oneapi/dal/detail/parameters/system_parameters_impl.hpp
@@ -0,0 +1,56 @@
+/*******************************************************************************
+* Copyright contributors to the oneDAL project
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#pragma once
+
+#include "oneapi/dal/detail/cpu.hpp"
+
+#ifdef ONEDAL_DATA_PARALLEL
+#include <sycl/sycl.hpp>
+#endif
+
+#include <any>
+#include <map>
+
+namespace oneapi::dal::detail {
+namespace v1 {
+
+class system_parameters_impl {
+public:
+    explicit system_parameters_impl();
+
+    cpu_extension get_top_enabled_cpu_extension() const;
+    std::uint32_t get_max_number_of_threads() const;
+
+#ifdef ONEDAL_DATA_PARALLEL
+    std::uint32_t get_max_workgroup_size(sycl::queue& queue) const;
+#endif
+
+    std::string dump() const;
+
+#ifdef ONEDAL_DATA_PARALLEL
+    std::string dump(sycl::queue& queue) const;
+#endif
+
+private:
+    std::map<std::string, std::any> sys_info_;
+
+    void print_any(const std::any& value, std::ostringstream& ss) const;
+};
+
+} // namespace v1
+using v1::system_parameters_impl;
+} // namespace oneapi::dal::detail
diff --git a/makefile b/makefile
index 1ca3cd701f7..1006bfb00aa 100644
--- a/makefile
+++ b/makefile
@@ -563,14 +563,16 @@ ONEAPI.srcdirs.base := $(ONEAPI.srcdir) \
                        $(addprefix $(ONEAPI.srcdir)/io/, $(ONEAPI.IO))
 ONEAPI.srcdirs.detail := $(foreach x,$(ONEAPI.srcdirs.base),$(shell find $x -maxdepth 1 -type d -name detail))
 ONEAPI.srcdirs.backend := $(foreach x,$(ONEAPI.srcdirs.base),$(shell find $x -maxdepth 1 -type d -name backend))
-ONEAPI.srcdirs.parameters := $(foreach x,$(ONEAPI.srcdirs.base),$(shell find $x -maxdepth 1 -type d -name parameters))
+ONEAPI.srcdirs.parameters := $(ONEAPI.srcdir)/detail/parameters \
+                             $(foreach x,$(ONEAPI.srcdirs.base),$(shell find $x -maxdepth 1 -type d -name parameters))
 ONEAPI.srcdirs := $(ONEAPI.srcdirs.base) $(ONEAPI.srcdirs.detail) $(ONEAPI.srcdirs.backend) $(ONEAPI.srcdirs.parameters)
 
-ONEAPI.srcs.all.exclude := ! -path "*_test.*" ! -path "*/test/*"
+ONEAPI.srcs.all.exclude := ! -path "*_test.*" ! -path "*/test/*" ! -path "*/detail/parameters/*"
+ONEAPI.srcs.parameters.exclude := ! -path "*_test.*" ! -path "*/test/*"
 ONEAPI.srcs.all := $(foreach x,$(ONEAPI.srcdirs.base),$(shell find $x -maxdepth 1 -type f -name "*.cpp" $(ONEAPI.srcs.all.exclude))) \
                    $(foreach x,$(ONEAPI.srcdirs.detail),$(shell find $x -type f -name "*.cpp" $(ONEAPI.srcs.all.exclude))) \
                    $(foreach x,$(ONEAPI.srcdirs.backend),$(shell find $x -type f -name "*.cpp" $(ONEAPI.srcs.all.exclude))) \
-                   $(foreach x,$(ONEAPI.srcdirs.parameters),$(shell find $x -type f -name "*.cpp" $(ONEAPI.srcs.all.exclude)))
+                   $(foreach x,$(ONEAPI.srcdirs.parameters),$(shell find $x -type f -name "*.cpp" $(ONEAPI.srcs.parameters.exclude)))
 ONEAPI.srcs.all	:= $(ONEAPI.srcs.all:./%=%)
 ONEAPI.srcs.dpc := $(filter %_dpc.cpp,$(ONEAPI.srcs.all))
 ONEAPI.srcs     := $(filter-out %_dpc.cpp,$(ONEAPI.srcs.all))
@@ -705,14 +707,14 @@ $(ONEAPI.objs_y.dpc): COPT += $(-fPIC) $(-cxx17) $(-DEBC) $(-EHsc) $(pedantic.op
 $(eval $(call update_copt_from_dispatcher_tag,$(ONEAPI.objs_y.dpc),.dpcpp))
 
 # Filtering parameter files
-PARAMETERS.objs_a.filtered := $(filter %parameters.$(o),$(ONEAPI.objs_a))
-ONEAPI.objs_a.filtered := $(filter-out %parameters.$(o),$(ONEAPI.objs_a))
-PARAMETERS.objs_y.filtered := $(filter %parameters.$(o),$(ONEAPI.objs_y))
-ONEAPI.objs_y.filtered := $(filter-out %parameters.$(o),$(ONEAPI.objs_y))
-PARAMETERS.objs_a.dpc.filtered := $(filter %parameters.$(o) %parameters_dpc.$(o),$(ONEAPI.objs_a.dpc))
-ONEAPI.objs_a.dpc.filtered := $(filter-out %parameters.$(o) %parameters_dpc.$(o),$(ONEAPI.objs_a.dpc))
-PARAMETERS.objs_y.dpc.filtered := $(filter %parameters.$(o) %parameters_dpc.$(o),$(ONEAPI.objs_y.dpc))
-ONEAPI.objs_y.dpc.filtered := $(filter-out %parameters.$(o) %parameters_dpc.$(o),$(ONEAPI.objs_y.dpc))
+PARAMETERS.objs_a.filtered := $(filter %parameters.$(o) %parameters_impl.$(o),$(ONEAPI.objs_a))
+ONEAPI.objs_a.filtered := $(filter-out %parameters.$(o) %parameters_impl.$(o),$(ONEAPI.objs_a))
+PARAMETERS.objs_y.filtered := $(filter %parameters.$(o) %parameters_impl.$(o),$(ONEAPI.objs_y))
+ONEAPI.objs_y.filtered := $(filter-out %parameters.$(o) %parameters_impl.$(o),$(ONEAPI.objs_y))
+PARAMETERS.objs_a.dpc.filtered := $(filter %parameters.$(o) %parameters_impl.$(o) %parameters_dpc.$(o),$(ONEAPI.objs_a.dpc))
+ONEAPI.objs_a.dpc.filtered := $(filter-out %parameters.$(o) %parameters_impl.$(o) %parameters_dpc.$(o),$(ONEAPI.objs_a.dpc))
+PARAMETERS.objs_y.dpc.filtered := $(filter %parameters.$(o) %parameters_impl.$(o) %parameters_dpc.$(o),$(ONEAPI.objs_y.dpc))
+ONEAPI.objs_y.dpc.filtered := $(filter-out %parameters.$(o) %parameters_impl.$(o) %parameters_dpc.$(o),$(ONEAPI.objs_y.dpc))
 
 # Actual compilation
 $(foreach x,$(ONEAPI.objs_a.filtered),$(eval $(call .ONEAPI.compile,$x,$(ONEAPI.tmpdir_a),C)))

From 908955d2f1de26389b0c647cfafdb92da52e22b4 Mon Sep 17 00:00:00 2001
From: Dhanus M Lal <73832063+DhanusML@users.noreply.github.com>
Date: Fri, 17 May 2024 13:51:03 +0530
Subject: [PATCH 21/65] Bugfix in
 daal::data_management::features::internal::getIndexNumType<unsigned long>
 function (#2776)

getIndexNumType is used by other functions that perform type conversions.
This function was disabled for non x86_64 devices (when __x86_64__ was not defined).
Modified the conditions in the macro to enable this for aarch64 and riscv64.

This change fixes the following examples that were failing on aarch64:

1. assoc_rules_apriori_batch
2. cd_dense_batch
3. cor_dense_distr
4. cor_dense_online
5. cov_dense_distr
6. cov_dense_online
7. elastic_net_dense_batch
8. lasso_reg_dense_batch
9. low_order_moms_csr_distr
10. low_order_moms_csr_online
11. low_order_moms_dense_distr
12. low_order_moms_dense_online
13. pivoted_qr_dense_batch

Signed-off-by: Dhanus M Lal <Dhanus.MLal@fujitsu.com>
---
 .../data_management/features/internal/helpers.h    |  3 ++-
 examples/daal/cpp/target_excludes.cmake            | 14 --------------
 2 files changed, 2 insertions(+), 15 deletions(-)

diff --git a/cpp/daal/include/data_management/features/internal/helpers.h b/cpp/daal/include/data_management/features/internal/helpers.h
index 6826da1588b..f26ccfa8284 100644
--- a/cpp/daal/include/data_management/features/internal/helpers.h
+++ b/cpp/daal/include/data_management/features/internal/helpers.h
@@ -1,6 +1,7 @@
 /* file: helpers.h */
 /*******************************************************************************
 * Copyright 2014 Intel Corporation
+* Copyright contributors to the oneDAL project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -225,7 +226,7 @@ inline IndexNumType getIndexNumType<unsigned long>()
 }
 #endif
 
-#if !(defined(_WIN32) || defined(_WIN64)) && defined(__x86_64__)
+#if !(defined(_WIN32) || defined(_WIN64)) && (defined(__x86_64__) || defined(TARGET_ARM) || defined(TARGET_RISCV64))
 template <>
 inline IndexNumType getIndexNumType<size_t>()
 {
diff --git a/examples/daal/cpp/target_excludes.cmake b/examples/daal/cpp/target_excludes.cmake
index 2b338146cee..52947f3cbe9 100644
--- a/examples/daal/cpp/target_excludes.cmake
+++ b/examples/daal/cpp/target_excludes.cmake
@@ -42,30 +42,16 @@ elseif((CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64") AND
        (CMAKE_C_COMPILER MATCHES "gcc"))
     set(EXCLUDE_LIST
         ${EXCLUDE_LIST}
-        "assoc_rules_apriori_batch"
-        "cd_dense_batch"
         "cholesky_dense_batch"
         "cor_csr_distr"
         "cor_csr_online"
-        "cor_dense_distr"
-        "cor_dense_online"
         "cov_csr_distr"
         "cov_csr_online"
-        "cov_dense_distr"
-        "cov_dense_online"
-        "elastic_net_dense_batch"
         "enable_thread_pinning"
-        "lasso_reg_dense_batch"
         "lin_reg_metrics_dense_batch"
         "lin_reg_qr_dense_batch"
         "lin_reg_qr_dense_online"
-        "low_order_moms_csr_distr"
-        "low_order_moms_csr_online"
-        "low_order_moms_dense_distr"
-        "low_order_moms_dense_online"
         "out_detect_mult_dense_batch"
-        "pca_metrics_dense_batch"
-        "pivoted_qr_dense_batch"
     )
 elseif((CMAKE_SYSTEM_PROCESSOR STREQUAL "riscv64") AND
        (CMAKE_C_COMPILER MATCHES "clang"))

From d6f4dc362d89f4061e36b620d8bf42e02614a9d2 Mon Sep 17 00:00:00 2001
From: Khalil <khalil.asadzade02@gmail.com>
Date: Tue, 21 May 2024 15:04:38 +0200
Subject: [PATCH 22/65] Ridge Regression support in oneapi (#2743)

---
 ...ssion_train_dense_normeq_batch_fpt_cpu.cpp |   2 +-
 ...ssion_train_dense_normeq_batch_fpt_cpu.cpp |   2 +-
 ...sion_train_dense_normeq_online_fpt_cpu.cpp |   2 +-
 .../algo/linear_regression/backend/cpu/BUILD  |   1 +
 .../cpu/finalize_train_kernel_norm_eq.cpp     |  72 +++++++----
 .../cpu/partial_train_kernel_norm_eq.cpp      |   8 +-
 .../backend/cpu/train_kernel_norm_eq.cpp      |  76 ++++++-----
 .../gpu/finalize_train_kernel_norm_eq_dpc.cpp |  19 ++-
 .../linear_regression/backend/gpu/misc.hpp    |  66 ++++++++++
 .../gpu/partial_train_kernel_norm_eq_dpc.cpp  |  16 ++-
 .../backend/gpu/train_kernel_norm_eq_dpc.cpp  |  18 ++-
 .../dal/algo/linear_regression/common.cpp     |  12 ++
 .../dal/algo/linear_regression/common.hpp     |  20 +++
 .../dal/algo/linear_regression/test/batch.cpp |  10 +-
 .../algo/linear_regression/test/fixture.hpp   | 122 ++++++++++++++----
 .../algo/linear_regression/test/online.cpp    |  10 +-
 .../dal/algo/linear_regression/test/spmd.cpp  |   2 +-
 .../test/train_parameters.cpp                 |   2 +-
 18 files changed, 357 insertions(+), 103 deletions(-)
 create mode 100644 cpp/oneapi/dal/algo/linear_regression/backend/gpu/misc.hpp

diff --git a/cpp/daal/src/algorithms/linear_regression/linear_regression_train_dense_normeq_batch_fpt_cpu.cpp b/cpp/daal/src/algorithms/linear_regression/linear_regression_train_dense_normeq_batch_fpt_cpu.cpp
index ef9ab58c256..bb0a22d089e 100644
--- a/cpp/daal/src/algorithms/linear_regression/linear_regression_train_dense_normeq_batch_fpt_cpu.cpp
+++ b/cpp/daal/src/algorithms/linear_regression/linear_regression_train_dense_normeq_batch_fpt_cpu.cpp
@@ -39,7 +39,7 @@ template class BatchContainer<DAAL_FPTYPE, normEqDense, DAAL_CPU>;
 }
 namespace internal
 {
-template class BatchKernel<DAAL_FPTYPE, normEqDense, DAAL_CPU>;
+template class DAAL_EXPORT BatchKernel<DAAL_FPTYPE, normEqDense, DAAL_CPU>;
 }
 } // namespace training
 } // namespace linear_regression
diff --git a/cpp/daal/src/algorithms/ridge_regression/ridge_regression_train_dense_normeq_batch_fpt_cpu.cpp b/cpp/daal/src/algorithms/ridge_regression/ridge_regression_train_dense_normeq_batch_fpt_cpu.cpp
index 2c71f4d64a0..e1ed3085861 100644
--- a/cpp/daal/src/algorithms/ridge_regression/ridge_regression_train_dense_normeq_batch_fpt_cpu.cpp
+++ b/cpp/daal/src/algorithms/ridge_regression/ridge_regression_train_dense_normeq_batch_fpt_cpu.cpp
@@ -41,7 +41,7 @@ template class BatchContainer<DAAL_FPTYPE, normEqDense, DAAL_CPU>;
 
 namespace internal
 {
-template class BatchKernel<DAAL_FPTYPE, normEqDense, DAAL_CPU>;
+template class DAAL_EXPORT BatchKernel<DAAL_FPTYPE, normEqDense, DAAL_CPU>;
 
 } // namespace internal
 } // namespace training
diff --git a/cpp/daal/src/algorithms/ridge_regression/ridge_regression_train_dense_normeq_online_fpt_cpu.cpp b/cpp/daal/src/algorithms/ridge_regression/ridge_regression_train_dense_normeq_online_fpt_cpu.cpp
index 867f3a23b56..c82553c834a 100644
--- a/cpp/daal/src/algorithms/ridge_regression/ridge_regression_train_dense_normeq_online_fpt_cpu.cpp
+++ b/cpp/daal/src/algorithms/ridge_regression/ridge_regression_train_dense_normeq_online_fpt_cpu.cpp
@@ -40,7 +40,7 @@ template class OnlineContainer<DAAL_FPTYPE, normEqDense, DAAL_CPU>;
 
 namespace internal
 {
-template class OnlineKernel<DAAL_FPTYPE, normEqDense, DAAL_CPU>;
+template class DAAL_EXPORT OnlineKernel<DAAL_FPTYPE, normEqDense, DAAL_CPU>;
 
 } // namespace internal
 } // namespace training
diff --git a/cpp/oneapi/dal/algo/linear_regression/backend/cpu/BUILD b/cpp/oneapi/dal/algo/linear_regression/backend/cpu/BUILD
index 55adfee47a9..7bd3d6e679d 100644
--- a/cpp/oneapi/dal/algo/linear_regression/backend/cpu/BUILD
+++ b/cpp/oneapi/dal/algo/linear_regression/backend/cpu/BUILD
@@ -15,5 +15,6 @@ dal_module(
         "@onedal//cpp/daal:core",
         "@onedal//cpp/daal/src/algorithms/linear_model:kernel",
         "@onedal//cpp/daal/src/algorithms/linear_regression:kernel",
+        "@onedal//cpp/daal/src/algorithms/ridge_regression:kernel"
     ],
 )
diff --git a/cpp/oneapi/dal/algo/linear_regression/backend/cpu/finalize_train_kernel_norm_eq.cpp b/cpp/oneapi/dal/algo/linear_regression/backend/cpu/finalize_train_kernel_norm_eq.cpp
index 5540641d8fd..88b1c58ccc4 100644
--- a/cpp/oneapi/dal/algo/linear_regression/backend/cpu/finalize_train_kernel_norm_eq.cpp
+++ b/cpp/oneapi/dal/algo/linear_regression/backend/cpu/finalize_train_kernel_norm_eq.cpp
@@ -16,6 +16,7 @@
 
 #include <daal/src/algorithms/linear_regression/linear_regression_train_kernel.h>
 #include <daal/src/algorithms/linear_regression/linear_regression_hyperparameter_impl.h>
+#include <daal/src/algorithms/ridge_regression/ridge_regression_train_kernel.h>
 
 #include "oneapi/dal/backend/interop/common.hpp"
 #include "oneapi/dal/backend/interop/error_converter.hpp"
@@ -37,21 +38,26 @@ namespace be = dal::backend;
 namespace pr = be::primitives;
 namespace interop = dal::backend::interop;
 namespace daal_lr = daal::algorithms::linear_regression;
+namespace daal_rr = daal::algorithms::ridge_regression;
 
-using daal_hyperparameters_t = daal_lr::internal::Hyperparameter;
+using daal_lr_hyperparameters_t = daal_lr::internal::Hyperparameter;
 
-constexpr auto daal_method = daal_lr::training::normEqDense;
+constexpr auto daal_lr_method = daal_lr::training::normEqDense;
+constexpr auto daal_rr_method = daal_rr::training::normEqDense;
 
 template <typename Float, daal::CpuType Cpu>
-using online_kernel_t = daal_lr::training::internal::OnlineKernel<Float, daal_method, Cpu>;
+using online_lr_kernel_t = daal_lr::training::internal::OnlineKernel<Float, daal_lr_method, Cpu>;
+
+template <typename Float, daal::CpuType Cpu>
+using online_rr_kernel_t = daal_rr::training::internal::OnlineKernel<Float, daal_rr_method, Cpu>;
 
 template <typename Float, typename Task>
-static daal_hyperparameters_t convert_parameters(const detail::train_parameters<Task>& params) {
+static daal_lr_hyperparameters_t convert_parameters(const detail::train_parameters<Task>& params) {
     using daal_lr::internal::HyperparameterId;
 
     const std::int64_t block = params.get_cpu_macro_block();
 
-    daal_hyperparameters_t daal_hyperparameter;
+    daal_lr_hyperparameters_t daal_hyperparameter;
     auto status = daal_hyperparameter.set(HyperparameterId::denseUpdateStepBlockSize, block);
     interop::status_to_exception(status);
 
@@ -68,36 +74,58 @@ static train_result<Task> call_daal_kernel(const context_cpu& ctx,
     using model_t = model<Task>;
     using model_impl_t = detail::model_impl<Task>;
 
-    const bool beta = desc.get_compute_intercept();
+    const bool compute_intercept = desc.get_compute_intercept();
 
     const auto response_count = input.get_partial_xty().get_row_count();
     const auto ext_feature_count = input.get_partial_xty().get_column_count();
 
-    const auto feature_count = ext_feature_count - beta;
+    const auto feature_count = ext_feature_count - compute_intercept;
 
     const auto betas_size = check_mul_overflow(response_count, feature_count + 1);
     auto betas_arr = array<Float>::zeros(betas_size);
 
-    const daal_hyperparameters_t& hp = convert_parameters<Float>(params);
-
     auto xtx_daal_table = interop::convert_to_daal_table<Float>(input.get_partial_xtx());
     auto xty_daal_table = interop::convert_to_daal_table<Float>(input.get_partial_xty());
     auto betas_daal_table =
         interop::convert_to_daal_homogen_table(betas_arr, response_count, feature_count + 1);
 
-    {
-        const auto status = dal::backend::dispatch_by_cpu(ctx, [&](auto cpu) {
-            constexpr auto cpu_type = interop::to_daal_cpu_type<decltype(cpu)>::value;
-            return online_kernel_t<Float, cpu_type>().finalizeCompute(*xtx_daal_table,
-                                                                      *xty_daal_table,
-                                                                      *xtx_daal_table,
-                                                                      *xty_daal_table,
-                                                                      *betas_daal_table,
-                                                                      beta,
-                                                                      &hp);
-        });
-
-        interop::status_to_exception(status);
+    double alpha = desc.get_alpha();
+    if (alpha != 0.0) {
+        auto ridge_matrix_array = array<Float>::full(1, static_cast<Float>(alpha));
+        auto ridge_matrix = interop::convert_to_daal_homogen_table<Float>(ridge_matrix_array, 1, 1);
+
+        {
+            const auto status = dal::backend::dispatch_by_cpu(ctx, [&](auto cpu) {
+                constexpr auto cpu_type = interop::to_daal_cpu_type<decltype(cpu)>::value;
+                return online_rr_kernel_t<Float, cpu_type>().finalizeCompute(*xtx_daal_table,
+                                                                             *xty_daal_table,
+                                                                             *xtx_daal_table,
+                                                                             *xty_daal_table,
+                                                                             *betas_daal_table,
+                                                                             compute_intercept,
+                                                                             *ridge_matrix);
+            });
+
+            interop::status_to_exception(status);
+        }
+    }
+    else {
+        const daal_lr_hyperparameters_t& hp = convert_parameters<Float>(params);
+
+        {
+            const auto status = dal::backend::dispatch_by_cpu(ctx, [&](auto cpu) {
+                constexpr auto cpu_type = interop::to_daal_cpu_type<decltype(cpu)>::value;
+                return online_lr_kernel_t<Float, cpu_type>().finalizeCompute(*xtx_daal_table,
+                                                                             *xty_daal_table,
+                                                                             *xtx_daal_table,
+                                                                             *xty_daal_table,
+                                                                             *betas_daal_table,
+                                                                             compute_intercept,
+                                                                             &hp);
+            });
+
+            interop::status_to_exception(status);
+        }
     }
 
     auto betas_table = homogen_table::wrap(betas_arr, response_count, feature_count + 1);
diff --git a/cpp/oneapi/dal/algo/linear_regression/backend/cpu/partial_train_kernel_norm_eq.cpp b/cpp/oneapi/dal/algo/linear_regression/backend/cpu/partial_train_kernel_norm_eq.cpp
index 7cac1aa47b7..d5d9f61003c 100644
--- a/cpp/oneapi/dal/algo/linear_regression/backend/cpu/partial_train_kernel_norm_eq.cpp
+++ b/cpp/oneapi/dal/algo/linear_regression/backend/cpu/partial_train_kernel_norm_eq.cpp
@@ -62,14 +62,14 @@ static partial_train_result<Task> call_daal_kernel(const context_cpu& ctx,
                                                    const partial_train_input<Task>& input) {
     using dal::detail::check_mul_overflow;
 
-    const bool beta = desc.get_compute_intercept();
+    const bool compute_intercept = desc.get_compute_intercept();
 
     const auto feature_count = input.get_data().get_column_count();
     const auto response_count = input.get_responses().get_column_count();
 
     const daal_hyperparameters_t& hp = convert_parameters<Float>(params);
 
-    const auto ext_feature_count = feature_count + beta;
+    const auto ext_feature_count = feature_count + compute_intercept;
 
     const bool has_xtx_data = input.get_prev().get_partial_xtx().has_data();
     if (has_xtx_data) {
@@ -85,7 +85,7 @@ static partial_train_result<Task> call_daal_kernel(const context_cpu& ctx,
                                                                                   *y_daal_table,
                                                                                   *daal_xtx,
                                                                                   *daal_xty,
-                                                                                  beta,
+                                                                                  compute_intercept,
                                                                                   &hp);
 
             interop::status_to_exception(status);
@@ -117,7 +117,7 @@ static partial_train_result<Task> call_daal_kernel(const context_cpu& ctx,
                                                                                   *y_daal_table,
                                                                                   *xtx_daal_table,
                                                                                   *xty_daal_table,
-                                                                                  beta,
+                                                                                  compute_intercept,
                                                                                   &hp);
 
             interop::status_to_exception(status);
diff --git a/cpp/oneapi/dal/algo/linear_regression/backend/cpu/train_kernel_norm_eq.cpp b/cpp/oneapi/dal/algo/linear_regression/backend/cpu/train_kernel_norm_eq.cpp
index dbea53a33f6..0e6e1f8cd10 100644
--- a/cpp/oneapi/dal/algo/linear_regression/backend/cpu/train_kernel_norm_eq.cpp
+++ b/cpp/oneapi/dal/algo/linear_regression/backend/cpu/train_kernel_norm_eq.cpp
@@ -16,6 +16,7 @@
 
 #include <daal/src/algorithms/linear_regression/linear_regression_train_kernel.h>
 #include <daal/src/algorithms/linear_regression/linear_regression_hyperparameter_impl.h>
+#include <daal/src/algorithms/ridge_regression/ridge_regression_train_kernel.h>
 
 #include "oneapi/dal/backend/interop/common.hpp"
 #include "oneapi/dal/backend/interop/error_converter.hpp"
@@ -39,21 +40,26 @@ namespace be = dal::backend;
 namespace pr = be::primitives;
 namespace interop = dal::backend::interop;
 namespace daal_lr = daal::algorithms::linear_regression;
+namespace daal_rr = daal::algorithms::ridge_regression;
 
-using daal_hyperparameters_t = daal_lr::internal::Hyperparameter;
+using daal_lr_hyperparameters_t = daal_lr::internal::Hyperparameter;
 
-constexpr auto daal_method = daal_lr::training::normEqDense;
+constexpr auto daal_lr_method = daal_lr::training::normEqDense;
+constexpr auto daal_rr_method = daal_rr::training::normEqDense;
 
 template <typename Float, daal::CpuType Cpu>
-using online_kernel_t = daal_lr::training::internal::OnlineKernel<Float, daal_method, Cpu>;
+using batch_lr_kernel_t = daal_lr::training::internal::BatchKernel<Float, daal_lr_method, Cpu>;
+
+template <typename Float, daal::CpuType Cpu>
+using batch_rr_kernel_t = daal_rr::training::internal::BatchKernel<Float, daal_rr_method, Cpu>;
 
 template <typename Float, typename Task>
-static daal_hyperparameters_t convert_parameters(const detail::train_parameters<Task>& params) {
+static daal_lr_hyperparameters_t convert_parameters(const detail::train_parameters<Task>& params) {
     using daal_lr::internal::HyperparameterId;
 
     const std::int64_t block = params.get_cpu_macro_block();
 
-    daal_hyperparameters_t daal_hyperparameter;
+    daal_lr_hyperparameters_t daal_hyperparameter;
     auto status = daal_hyperparameter.set(HyperparameterId::denseUpdateStepBlockSize, block);
     interop::status_to_exception(status);
 
@@ -97,33 +103,41 @@ static train_result<Task> call_daal_kernel(const context_cpu& ctx,
     auto x_daal_table = interop::convert_to_daal_table<Float>(data);
     auto y_daal_table = interop::convert_to_daal_table<Float>(resp);
 
-    const daal_hyperparameters_t& hp = convert_parameters<Float>(params);
-
-    {
-        const auto status = interop::call_daal_kernel<Float, online_kernel_t>(ctx,
-                                                                              *x_daal_table,
-                                                                              *y_daal_table,
-                                                                              *xtx_daal_table,
-                                                                              *xty_daal_table,
-                                                                              intp,
-                                                                              &hp);
-
-        interop::status_to_exception(status);
+    double alpha = desc.get_alpha();
+    if (alpha != 0.0) {
+        auto ridge_matrix_array = array<Float>::full(1, static_cast<Float>(alpha));
+        auto ridge_matrix = interop::convert_to_daal_homogen_table<Float>(ridge_matrix_array, 1, 1);
+
+        {
+            const auto status =
+                interop::call_daal_kernel<Float, batch_rr_kernel_t>(ctx,
+                                                                    *x_daal_table,
+                                                                    *y_daal_table,
+                                                                    *xtx_daal_table,
+                                                                    *xty_daal_table,
+                                                                    *betas_daal_table,
+                                                                    intp,
+                                                                    *ridge_matrix);
+
+            interop::status_to_exception(status);
+        }
     }
-
-    {
-        const auto status = dal::backend::dispatch_by_cpu(ctx, [&](auto cpu) {
-            constexpr auto cpu_type = interop::to_daal_cpu_type<decltype(cpu)>::value;
-            return online_kernel_t<Float, cpu_type>().finalizeCompute(*xtx_daal_table,
-                                                                      *xty_daal_table,
-                                                                      *xtx_daal_table,
-                                                                      *xty_daal_table,
-                                                                      *betas_daal_table,
-                                                                      intp,
-                                                                      &hp);
-        });
-
-        interop::status_to_exception(status);
+    else {
+        const daal_lr_hyperparameters_t& hp = convert_parameters<Float>(params);
+
+        {
+            const auto status =
+                interop::call_daal_kernel<Float, batch_lr_kernel_t>(ctx,
+                                                                    *x_daal_table,
+                                                                    *y_daal_table,
+                                                                    *xtx_daal_table,
+                                                                    *xty_daal_table,
+                                                                    *betas_daal_table,
+                                                                    intp,
+                                                                    &hp);
+
+            interop::status_to_exception(status);
+        }
     }
 
     auto betas_table = homogen_table::wrap(betas_arr, response_count, feature_count + 1);
diff --git a/cpp/oneapi/dal/algo/linear_regression/backend/gpu/finalize_train_kernel_norm_eq_dpc.cpp b/cpp/oneapi/dal/algo/linear_regression/backend/gpu/finalize_train_kernel_norm_eq_dpc.cpp
index 733bb46b0b3..d3431663249 100644
--- a/cpp/oneapi/dal/algo/linear_regression/backend/gpu/finalize_train_kernel_norm_eq_dpc.cpp
+++ b/cpp/oneapi/dal/algo/linear_regression/backend/gpu/finalize_train_kernel_norm_eq_dpc.cpp
@@ -27,6 +27,7 @@
 #include "oneapi/dal/algo/linear_regression/backend/model_impl.hpp"
 #include "oneapi/dal/algo/linear_regression/backend/gpu/finalize_train_kernel.hpp"
 #include "oneapi/dal/algo/linear_regression/backend/gpu/update_kernel.hpp"
+#include "oneapi/dal/algo/linear_regression/backend/gpu/misc.hpp"
 
 namespace oneapi::dal::linear_regression::backend {
 
@@ -47,14 +48,14 @@ static train_result<Task> call_dal_kernel(const context_gpu& ctx,
 
     auto& queue = ctx.get_queue();
 
-    const bool beta = desc.get_compute_intercept();
+    const bool compute_intercept = desc.get_compute_intercept();
 
     constexpr auto uplo = pr::mkl::uplo::upper;
     constexpr auto alloc = sycl::usm::alloc::device;
 
     const auto response_count = input.get_partial_xty().get_row_count();
     const auto ext_feature_count = input.get_partial_xty().get_column_count();
-    const auto feature_count = ext_feature_count - beta;
+    const auto feature_count = ext_feature_count - compute_intercept;
 
     const pr::ndshape<2> xtx_shape{ ext_feature_count, ext_feature_count };
 
@@ -69,9 +70,21 @@ static train_result<Task> call_dal_kernel(const context_gpu& ctx,
     const auto betas_size = check_mul_overflow(response_count, feature_count + 1);
     auto betas_arr = array<Float>::zeros(queue, betas_size, alloc);
 
+    double alpha = desc.get_alpha();
+    sycl::event ridge_event;
+    if (alpha != 0.0) {
+        ridge_event = add_ridge_penalty<Float>(queue, xtx_nd, compute_intercept, alpha);
+    }
+
     auto nxtx = pr::ndarray<Float, 2>::empty(queue, xtx_shape, alloc);
     auto nxty = pr::ndview<Float, 2>::wrap_mutable(betas_arr, betas_shape);
-    auto solve_event = pr::solve_system<uplo>(queue, beta, xtx_nd, xty_nd, nxtx, nxty, {});
+    auto solve_event = pr::solve_system<uplo>(queue,
+                                              compute_intercept,
+                                              xtx_nd,
+                                              xty_nd,
+                                              nxtx,
+                                              nxty,
+                                              { ridge_event });
     sycl::event::wait_and_throw({ solve_event });
 
     auto betas = homogen_table::wrap(betas_arr, response_count, feature_count + 1);
diff --git a/cpp/oneapi/dal/algo/linear_regression/backend/gpu/misc.hpp b/cpp/oneapi/dal/algo/linear_regression/backend/gpu/misc.hpp
new file mode 100644
index 00000000000..5ad5ba647ec
--- /dev/null
+++ b/cpp/oneapi/dal/algo/linear_regression/backend/gpu/misc.hpp
@@ -0,0 +1,66 @@
+/*******************************************************************************
+* Copyright contributors to the oneDAL project
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#include "oneapi/dal/detail/profiler.hpp"
+#include "oneapi/dal/backend/primitives/ndarray.hpp"
+
+namespace oneapi::dal::linear_regression::backend {
+
+#ifdef ONEDAL_DATA_PARALLEL
+
+using alloc = sycl::usm::alloc;
+namespace bk = dal::backend;
+namespace pr = dal::backend::primitives;
+
+/// Adds ridge penalty to the diagonal elements of the xtx matrix
+
+///
+/// @tparam Float Floating-point type used to perform computations
+///
+/// @param[in]  q                 The SYCL queue
+/// @param[in]  xtx               The input matrix to which the ridge penalty is added
+/// @param[in]  compute_intercept Flag indicating whether the intercept term is used in the matrix, extending it with extra dimension if true
+/// @param[in]  alpha             The regularization parameter
+/// @param[in]  deps              Events indicating the availability of the `xtx` for reading or writing
+///
+/// @return A SYCL event indicating the availability of the matrix for reading and writing
+template <typename Float>
+sycl::event add_ridge_penalty(sycl::queue& q,
+                              const pr::ndarray<Float, 2>& xtx,
+                              bool compute_intercept,
+                              Float alpha,
+                              const bk::event_vector& deps = {}) {
+    ONEDAL_ASSERT(xtx.has_mutable_data());
+    ONEDAL_ASSERT(be::is_known_usm(q, xtx.get_mutable_data()));
+    ONEDAL_ASSERT(xtx.get_dimension(0) == xtx.get_dimension(1));
+
+    Float* xtx_ptr = xtx.get_mutable_data();
+    std::int64_t feature_count = xtx.get_dimension(0);
+    std::int64_t original_feature_count = feature_count - compute_intercept;
+
+    return q.submit([&](sycl::handler& cgh) {
+        const auto range = be::make_range_1d(original_feature_count);
+        cgh.depends_on(deps);
+        std::int64_t step = feature_count + 1;
+        cgh.parallel_for(range, [=](sycl::id<1> idx) {
+            xtx_ptr[idx * step] += alpha;
+        });
+    });
+}
+
+} // namespace oneapi::dal::linear_regression::backend
+
+#endif // ONEDAL_DATA_PARALLEL
diff --git a/cpp/oneapi/dal/algo/linear_regression/backend/gpu/partial_train_kernel_norm_eq_dpc.cpp b/cpp/oneapi/dal/algo/linear_regression/backend/gpu/partial_train_kernel_norm_eq_dpc.cpp
index dff0548afe4..a9aa7c373e4 100644
--- a/cpp/oneapi/dal/algo/linear_regression/backend/gpu/partial_train_kernel_norm_eq_dpc.cpp
+++ b/cpp/oneapi/dal/algo/linear_regression/backend/gpu/partial_train_kernel_norm_eq_dpc.cpp
@@ -45,11 +45,11 @@ static partial_train_result<Task> call_dal_kernel(const context_gpu& ctx,
 
     constexpr auto alloc = sycl::usm::alloc::device;
 
-    const bool beta = desc.get_compute_intercept();
+    const bool compute_intercept = desc.get_compute_intercept();
 
     const auto feature_count = input.get_data().get_column_count();
     const auto response_count = input.get_responses().get_column_count();
-    const std::int64_t ext_feature_count = feature_count + beta;
+    const std::int64_t ext_feature_count = feature_count + compute_intercept;
 
     const pr::ndshape<2> xty_shape{ response_count, ext_feature_count };
     const pr::ndshape<2> xtx_shape{ ext_feature_count, ext_feature_count };
@@ -74,8 +74,10 @@ static partial_train_result<Task> call_dal_kernel(const context_gpu& ctx,
                                                                input_.get_partial_xty(),
                                                                sycl::usm::alloc::device);
         auto copy_xty_event = copy(queue, xty, xty_nd, { fill_xty_event });
-        auto last_xtx_event = update_xtx(queue, beta, data_nd, xtx, { copy_xtx_event });
-        auto last_xty_event = update_xty(queue, beta, data_nd, res_nd, xty, { copy_xty_event });
+        auto last_xtx_event =
+            update_xtx(queue, compute_intercept, data_nd, xtx, { copy_xtx_event });
+        auto last_xty_event =
+            update_xty(queue, compute_intercept, data_nd, res_nd, xty, { copy_xty_event });
 
         result.set_partial_xtx(homogen_table::wrap(xtx.flatten(queue, { last_xtx_event }),
                                                    ext_feature_count,
@@ -97,8 +99,10 @@ static partial_train_result<Task> call_dal_kernel(const context_gpu& ctx,
         auto [xtx, fill_xtx_event] =
             pr::ndarray<Float, 2, pr::ndorder::c>::zeros(queue, xtx_shape, alloc);
 
-        auto last_xty_event = update_xty(queue, beta, data_nd, res_nd, xty, { fill_xty_event });
-        auto last_xtx_event = update_xtx(queue, beta, data_nd, xtx, { fill_xtx_event });
+        auto last_xty_event =
+            update_xty(queue, compute_intercept, data_nd, res_nd, xty, { fill_xty_event });
+        auto last_xtx_event =
+            update_xtx(queue, compute_intercept, data_nd, xtx, { fill_xtx_event });
 
         result.set_partial_xtx(homogen_table::wrap(xtx.flatten(queue, { last_xtx_event }),
                                                    ext_feature_count,
diff --git a/cpp/oneapi/dal/algo/linear_regression/backend/gpu/train_kernel_norm_eq_dpc.cpp b/cpp/oneapi/dal/algo/linear_regression/backend/gpu/train_kernel_norm_eq_dpc.cpp
index bf0cd04c00e..25b08aa7710 100644
--- a/cpp/oneapi/dal/algo/linear_regression/backend/gpu/train_kernel_norm_eq_dpc.cpp
+++ b/cpp/oneapi/dal/algo/linear_regression/backend/gpu/train_kernel_norm_eq_dpc.cpp
@@ -29,6 +29,7 @@
 #include "oneapi/dal/algo/linear_regression/backend/model_impl.hpp"
 #include "oneapi/dal/algo/linear_regression/backend/gpu/train_kernel.hpp"
 #include "oneapi/dal/algo/linear_regression/backend/gpu/update_kernel.hpp"
+#include "oneapi/dal/algo/linear_regression/backend/gpu/misc.hpp"
 
 namespace oneapi::dal::linear_regression::backend {
 
@@ -62,8 +63,8 @@ static train_result<Task> call_dal_kernel(const context_gpu& ctx,
     const auto feature_count = data.get_column_count();
     const auto response_count = resp.get_column_count();
     ONEDAL_ASSERT(sample_count == resp.get_row_count());
-    const bool beta = desc.get_compute_intercept();
-    const std::int64_t ext_feature_count = feature_count + beta;
+    const bool compute_intercept = desc.get_compute_intercept();
+    const std::int64_t ext_feature_count = feature_count + compute_intercept;
 
     const auto betas_size = check_mul_overflow(response_count, feature_count + 1);
     auto betas_arr = array<Float>::zeros(queue, betas_size, alloc);
@@ -95,8 +96,8 @@ static train_result<Task> call_dal_kernel(const context_gpu& ctx,
         auto y_arr = y_accessor.pull(queue, { first, last }, alloc);
         auto y = pr::ndview<Float, 2>::wrap(y_arr.get_data(), { length, response_count });
 
-        last_xty_event = update_xty(queue, beta, x, y, xty, { last_xty_event });
-        last_xtx_event = update_xtx(queue, beta, x, xtx, { last_xtx_event });
+        last_xty_event = update_xty(queue, compute_intercept, x, y, xty, { last_xty_event });
+        last_xtx_event = update_xtx(queue, compute_intercept, x, xtx, { last_xtx_event });
 
         // We keep the latest slice of data up to date because of pimpl -
         // it virtually extend lifetime of pulled arrays
@@ -105,6 +106,12 @@ static train_result<Task> call_dal_kernel(const context_gpu& ctx,
 
     const be::event_vector solve_deps{ last_xty_event, last_xtx_event };
 
+    double alpha = desc.get_alpha();
+    if (alpha != 0.0) {
+        last_xtx_event =
+            add_ridge_penalty<Float>(queue, xtx, compute_intercept, alpha, { last_xtx_event });
+    }
+
     auto& comm = ctx.get_communicator();
     if (comm.get_rank_count() > 1) {
         sycl::event::wait_and_throw(solve_deps);
@@ -122,7 +129,8 @@ static train_result<Task> call_dal_kernel(const context_gpu& ctx,
 
     auto nxtx = pr::ndarray<Float, 2>::empty(queue, xtx_shape, alloc);
     auto nxty = pr::ndview<Float, 2>::wrap_mutable(betas_arr, betas_shape);
-    auto solve_event = pr::solve_system<uplo>(queue, beta, xtx, xty, nxtx, nxty, solve_deps);
+    auto solve_event =
+        pr::solve_system<uplo>(queue, compute_intercept, xtx, xty, nxtx, nxty, solve_deps);
     sycl::event::wait_and_throw({ solve_event });
 
     auto betas = homogen_table::wrap(betas_arr, response_count, feature_count + 1);
diff --git a/cpp/oneapi/dal/algo/linear_regression/common.cpp b/cpp/oneapi/dal/algo/linear_regression/common.cpp
index 70fd04f221e..949898f3524 100644
--- a/cpp/oneapi/dal/algo/linear_regression/common.cpp
+++ b/cpp/oneapi/dal/algo/linear_regression/common.cpp
@@ -42,6 +42,8 @@ class descriptor_impl : public base {
     explicit descriptor_impl() = default;
 
     bool compute_intercept = true;
+    double alpha = 0;
+
     result_option_id result_options = get_default_result_options<Task>();
 };
 
@@ -81,6 +83,16 @@ void descriptor_base<Task>::set_compute_intercept_impl(bool compute_intercept) {
     impl_->compute_intercept = compute_intercept;
 }
 
+template <typename Task>
+double descriptor_base<Task>::get_alpha() const {
+    return impl_->alpha;
+}
+
+template <typename Task>
+void descriptor_base<Task>::set_alpha_impl(double value) {
+    impl_->alpha = value;
+}
+
 template class ONEDAL_EXPORT descriptor_base<task::regression>;
 
 } // namespace v1
diff --git a/cpp/oneapi/dal/algo/linear_regression/common.hpp b/cpp/oneapi/dal/algo/linear_regression/common.hpp
index 633e919f1bb..57d597a984d 100644
--- a/cpp/oneapi/dal/algo/linear_regression/common.hpp
+++ b/cpp/oneapi/dal/algo/linear_regression/common.hpp
@@ -112,10 +112,12 @@ class descriptor_base : public base {
     descriptor_base(bool compute_intercept);
 
     bool get_compute_intercept() const;
+    double get_alpha() const;
     result_option_id get_result_options() const;
 
 protected:
     void set_compute_intercept_impl(bool compute_intercept);
+    void set_alpha_impl(double alpha);
     void set_result_options_impl(const result_option_id& value);
 
 private:
@@ -165,6 +167,14 @@ class descriptor : public detail::descriptor_base<Task> {
     /// Creates a new instance of the class with default parameters
     explicit descriptor() : base_t(true) {}
 
+    explicit descriptor(bool compute_intercept, double alpha) : base_t(compute_intercept) {
+        set_alpha(alpha);
+    }
+
+    explicit descriptor(double alpha) : base_t(true) {
+        set_alpha(alpha);
+    }
+
     /// Defines should intercept be taken into consideration.
     bool get_compute_intercept() const {
         return base_t::get_compute_intercept();
@@ -175,6 +185,16 @@ class descriptor : public detail::descriptor_base<Task> {
         return *this;
     }
 
+    /// Defines regularization term alpha used in Ridge Regression
+    double get_alpha() const {
+        return base_t::get_alpha();
+    }
+
+    auto& set_alpha(double value) {
+        base_t::set_alpha_impl(value);
+        return *this;
+    }
+
     /// Choose which results should be computed and returned.
     result_option_id get_result_options() const {
         return base_t::get_result_options();
diff --git a/cpp/oneapi/dal/algo/linear_regression/test/batch.cpp b/cpp/oneapi/dal/algo/linear_regression/test/batch.cpp
index 270b34b9ddc..00ec7babbb9 100644
--- a/cpp/oneapi/dal/algo/linear_regression/test/batch.cpp
+++ b/cpp/oneapi/dal/algo/linear_regression/test/batch.cpp
@@ -47,7 +47,15 @@ TEMPLATE_LIST_TEST_M(lr_batch_test, "LR common flow", "[lr][batch]", lr_types) {
 
     this->generate(777);
 
-    this->run_and_check();
+    this->run_and_check_linear();
+}
+
+TEMPLATE_LIST_TEST_M(lr_batch_test, "RR common flow", "[rr][batch]", lr_types) {
+    SKIP_IF(this->not_float64_friendly());
+
+    this->generate(777);
+
+    this->run_and_check_ridge();
 }
 
 } // namespace oneapi::dal::linear_regression::test
diff --git a/cpp/oneapi/dal/algo/linear_regression/test/fixture.hpp b/cpp/oneapi/dal/algo/linear_regression/test/fixture.hpp
index a8994a7c704..aedf0165454 100644
--- a/cpp/oneapi/dal/algo/linear_regression/test/fixture.hpp
+++ b/cpp/oneapi/dal/algo/linear_regression/test/fixture.hpp
@@ -123,6 +123,17 @@ class lr_test : public te::crtp_algo_fixture<TestType, Derived> {
         return result;
     }
 
+    double generate_alpha(std::int64_t seed) const {
+        std::mt19937 gen(seed);
+
+        double alpha_min = 1;
+        double alpha_max = 5;
+
+        std::uniform_real_distribution<double> dist(alpha_min, alpha_max);
+
+        return dist(gen);
+    }
+
     void check_table_dimensions(const table& x_train,
                                 const table& y_train,
                                 const table& x_test,
@@ -144,13 +155,14 @@ class lr_test : public te::crtp_algo_fixture<TestType, Derived> {
 
         this->bias_ = std::move(bias);
         this->beta_ = std::move(beta);
+        this->alpha_ = generate_alpha(seed);
     }
 
-    auto get_descriptor() const {
+    auto get_descriptor(double alpha = 0.0) const {
         result_option_id resopts = result_options::coefficients;
         if (this->intercept_)
             resopts = resopts | result_options::intercept;
-        return linear_regression::descriptor<float_t, method_t, task_t>(intercept_)
+        return linear_regression::descriptor<float_t, method_t, task_t>(intercept_, alpha)
             .set_result_options(resopts);
     }
 
@@ -191,7 +203,25 @@ class lr_test : public te::crtp_algo_fixture<TestType, Derived> {
         }
     }
 
-    void run_and_check(std::int64_t seed = 888, double tol = 1e-2) {
+    void check_coefficient_shrinkage(const table& lr_coeffs,
+                                     const table& rr_coeffs,
+                                     double tol = 1e-3) {
+        row_accessor<const float_t> lr_acc(lr_coeffs);
+        row_accessor<const float_t> rr_acc(rr_coeffs);
+        const auto lr_arr = lr_acc.pull({ 0, -1 });
+        const auto rr_arr = rr_acc.pull({ 0, -1 });
+
+        double lr_norm_squared = 0, rr_norm_squared = 0;
+        for (std::int64_t i = 0; i < lr_arr.get_count(); ++i) {
+            lr_norm_squared += lr_arr[i] * lr_arr[i];
+            rr_norm_squared += rr_arr[i] * rr_arr[i];
+        }
+
+        REQUIRE(rr_norm_squared <= lr_norm_squared + tol);
+    }
+
+    std::tuple<table, table, table, table> prepare_inputs(std::int64_t seed = 888,
+                                                          double tol = 1e-2) {
         using namespace ::oneapi::dal::detail;
 
         std::mt19937 meta_gen(seed);
@@ -214,6 +244,29 @@ class lr_test : public te::crtp_algo_fixture<TestType, Derived> {
         auto y_test = compute_responses(this->beta_, this->bias_, x_test);
 
         check_table_dimensions(x_train, y_train, x_test, y_test);
+        return { x_train, y_train, x_test, y_test };
+    }
+
+    void run_and_check_ridge(std::int64_t seed = 888, double tol = 1e-2) {
+        table x_train, y_train, x_test, y_test;
+        std::tie(x_train, y_train, x_test, y_test) = prepare_inputs(seed, tol);
+
+        const auto linear_desc = this->get_descriptor();
+        const auto linear_train_res = this->train(linear_desc, x_train, y_train);
+
+        const auto ridge_desc = this->get_descriptor(this->alpha_);
+        const auto ridge_train_res = this->train(ridge_desc, x_train, y_train);
+
+        SECTION("Checking coefficient shrinkage") {
+            this->check_coefficient_shrinkage(linear_train_res.get_coefficients(),
+                                              ridge_train_res.get_coefficients(),
+                                              tol);
+        }
+    }
+
+    void run_and_check_linear(std::int64_t seed = 888, double tol = 1e-2) {
+        table x_train, y_train, x_test, y_test;
+        std::tie(x_train, y_train, x_test, y_test) = prepare_inputs(seed, tol);
 
         const auto desc = this->get_descriptor();
         const auto train_res = this->train(desc, x_train, y_train);
@@ -234,6 +287,7 @@ class lr_test : public te::crtp_algo_fixture<TestType, Derived> {
             check_if_close(infer_res.get_responses(), y_test, tol);
         }
     }
+
     template <typename Float>
     std::vector<dal::table> split_table_by_rows(const dal::table& t, std::int64_t split_count) {
         ONEDAL_ASSERT(0l < split_count);
@@ -259,31 +313,12 @@ class lr_test : public te::crtp_algo_fixture<TestType, Derived> {
 
         return result;
     }
-    void run_and_check_online(std::int64_t nBlocks) {
-        using namespace ::oneapi::dal::detail;
 
+    void run_and_check_linear_online(std::int64_t nBlocks) {
         std::int64_t seed = 888;
         double tol = 1e-2;
-
-        std::mt19937 meta_gen(seed);
-        const std::int64_t train_seed = meta_gen();
-        const auto train_dataframe = GENERATE_DATAFRAME(
-            te::dataframe_builder{ this->s_count_, this->f_count_ }.fill_uniform(-5.5,
-                                                                                 3.5,
-                                                                                 train_seed));
-        auto x_train = train_dataframe.get_table(this->get_homogen_table_id());
-
-        const std::int64_t test_seed = meta_gen();
-        const auto test_dataframe = GENERATE_DATAFRAME(
-            te::dataframe_builder{ this->t_count_, this->f_count_ }.fill_uniform(-3.5,
-                                                                                 5.5,
-                                                                                 test_seed));
-        auto x_test = test_dataframe.get_table(this->get_homogen_table_id());
-
-        auto y_train = compute_responses(this->beta_, this->bias_, x_train);
-        auto y_test = compute_responses(this->beta_, this->bias_, x_test);
-
-        check_table_dimensions(x_train, y_train, x_test, y_test);
+        table x_train, y_train, x_test, y_test;
+        std::tie(x_train, y_train, x_test, y_test) = prepare_inputs(seed, tol);
 
         const auto desc = this->get_descriptor();
         dal::linear_regression::partial_train_result<> partial_result;
@@ -312,8 +347,45 @@ class lr_test : public te::crtp_algo_fixture<TestType, Derived> {
         }
     }
 
+    void run_and_check_ridge_online(std::int64_t nBlocks) {
+        std::int64_t seed = 888;
+        double tol = 1e-2;
+        table x_train, y_train, x_test, y_test;
+        std::tie(x_train, y_train, x_test, y_test) = prepare_inputs(seed, tol);
+
+        auto input_table_x = split_table_by_rows<double>(x_train, nBlocks);
+        auto input_table_y = split_table_by_rows<double>(y_train, nBlocks);
+
+        const auto linear_desc = this->get_descriptor();
+        dal::linear_regression::partial_train_result<> linear_partial_result;
+        for (std::int64_t i = 0; i < nBlocks; i++) {
+            linear_partial_result = this->partial_train(linear_desc,
+                                                        linear_partial_result,
+                                                        input_table_x[i],
+                                                        input_table_y[i]);
+        }
+        auto linear_train_res = this->finalize_train(linear_desc, linear_partial_result);
+
+        const auto ridge_desc = this->get_descriptor(this->alpha_);
+        dal::linear_regression::partial_train_result<> ridge_partial_result;
+        for (std::int64_t i = 0; i < nBlocks; i++) {
+            ridge_partial_result = this->partial_train(ridge_desc,
+                                                       ridge_partial_result,
+                                                       input_table_x[i],
+                                                       input_table_y[i]);
+        }
+        auto ridge_train_res = this->finalize_train(ridge_desc, ridge_partial_result);
+
+        SECTION("Checking coefficient shrinkage") {
+            this->check_coefficient_shrinkage(linear_train_res.get_coefficients(),
+                                              ridge_train_res.get_coefficients(),
+                                              tol);
+        }
+    }
+
 protected:
     bool intercept_ = true;
+    float_t alpha_;
     std::int64_t t_count_;
     std::int64_t s_count_;
     std::int64_t f_count_;
diff --git a/cpp/oneapi/dal/algo/linear_regression/test/online.cpp b/cpp/oneapi/dal/algo/linear_regression/test/online.cpp
index 2724768491b..c16e1c06f26 100644
--- a/cpp/oneapi/dal/algo/linear_regression/test/online.cpp
+++ b/cpp/oneapi/dal/algo/linear_regression/test/online.cpp
@@ -47,7 +47,15 @@ TEMPLATE_LIST_TEST_M(lr_online_test, "LR common flow", "[lr][online]", lr_types)
     this->generate(777);
     const int64_t nBlocks = GENERATE(1, 3, 5, 8);
 
-    this->run_and_check_online(nBlocks);
+    this->run_and_check_linear_online(nBlocks);
+}
+
+TEMPLATE_LIST_TEST_M(lr_online_test, "RR common flow", "[rr][online]", lr_types) {
+    SKIP_IF(this->not_float64_friendly());
+    this->generate(777);
+    const int64_t nBlocks = GENERATE(1, 3, 5, 8);
+
+    this->run_and_check_ridge_online(nBlocks);
 }
 
 } // namespace oneapi::dal::linear_regression::test
diff --git a/cpp/oneapi/dal/algo/linear_regression/test/spmd.cpp b/cpp/oneapi/dal/algo/linear_regression/test/spmd.cpp
index d0cca4e943c..62223f03fdd 100644
--- a/cpp/oneapi/dal/algo/linear_regression/test/spmd.cpp
+++ b/cpp/oneapi/dal/algo/linear_regression/test/spmd.cpp
@@ -25,7 +25,7 @@ TEMPLATE_LIST_TEST_M(lr_spmd_test, "LR common flow", "[lr][spmd]", lr_types) {
     this->generate(777);
     this->set_rank_count(GENERATE(2, 3));
 
-    this->run_and_check();
+    this->run_and_check_linear();
 }
 
 } // namespace oneapi::dal::linear_regression::test
diff --git a/cpp/oneapi/dal/algo/linear_regression/test/train_parameters.cpp b/cpp/oneapi/dal/algo/linear_regression/test/train_parameters.cpp
index 48f9ead5d3a..835b8ecc1b4 100644
--- a/cpp/oneapi/dal/algo/linear_regression/test/train_parameters.cpp
+++ b/cpp/oneapi/dal/algo/linear_regression/test/train_parameters.cpp
@@ -89,7 +89,7 @@ TEMPLATE_LIST_TEST_M(lr_train_params_test, "LR train params", "[lr][train][param
     this->generate(999);
     this->generate_parameters();
 
-    this->run_and_check();
+    this->run_and_check_linear();
 }
 
 } // namespace oneapi::dal::linear_regression::test

From 99570f8f4147491857567e6cb6b475a58f54e7fe Mon Sep 17 00:00:00 2001
From: Alexander Andreev <alexander.andreev@intel.com>
Date: Wed, 22 May 2024 14:49:31 +0100
Subject: [PATCH 23/65] Fix usage of sequential execution branches (#2783)

* Fix usage of sequential execution branches

* clang-format-14 specific formatting fix

* Completely remove unnecessary TBB layer flag
---
 cpp/daal/BUILD                                |   1 -
 .../linear_model_train_normeq_merge_impl.i    |  18 +-
 .../src/threading/service_thread_pinner.cpp   | 122 ++---
 cpp/daal/src/threading/threading.cpp          | 448 ++++++++----------
 makefile                                      |   3 -
 5 files changed, 244 insertions(+), 348 deletions(-)
 mode change 100755 => 100644 cpp/daal/src/threading/service_thread_pinner.cpp

diff --git a/cpp/daal/BUILD b/cpp/daal/BUILD
index 7463af41c55..4f15e2b0c3a 100644
--- a/cpp/daal/BUILD
+++ b/cpp/daal/BUILD
@@ -143,7 +143,6 @@ daal_module(
     name = "threading_tbb",
     srcs = glob(["src/threading/**/*.cpp"]),
     local_defines = [
-        "__DO_TBB_LAYER__",
         "__TBB_NO_IMPLICIT_LINKAGE",
         "__TBB_LEGACY_MODE",
         "TBB_SUPPRESS_DEPRECATED_MESSAGES",
diff --git a/cpp/daal/src/algorithms/linear_model/linear_model_train_normeq_merge_impl.i b/cpp/daal/src/algorithms/linear_model/linear_model_train_normeq_merge_impl.i
index e7e7ac56f31..e76c65cd094 100644
--- a/cpp/daal/src/algorithms/linear_model/linear_model_train_normeq_merge_impl.i
+++ b/cpp/daal/src/algorithms/linear_model/linear_model_train_normeq_merge_impl.i
@@ -43,22 +43,6 @@ using namespace daal::data_management;
 using namespace daal::internal;
 using namespace daal::services::internal;
 
-template <CpuType cpu, typename F>
-void conditional_threader_for(bool condition, size_t n, size_t threadsRequest, const F & processIteration)
-{
-    if (condition)
-    {
-        daal::threader_for(n, threadsRequest, processIteration);
-    }
-    else
-    {
-        for (size_t i = 0; i < n; i++)
-        {
-            processIteration(i);
-        }
-    }
-}
-
 template <typename algorithmFPType, CpuType cpu>
 Status MergeKernel<algorithmFPType, cpu>::merge(const NumericTable & partialTable, algorithmFPType * result, bool threadingCondition)
 {
@@ -69,7 +53,7 @@ Status MergeKernel<algorithmFPType, cpu>::merge(const NumericTable & partialTabl
     algorithmFPType * partialResult = const_cast<algorithmFPType *>(block.get());
 
     size_t resultSize = nRows * partialTable.getNumberOfColumns();
-    conditional_threader_for<cpu>(threadingCondition, resultSize, resultSize, [=](size_t i) { result[i] += partialResult[i]; });
+    daal::conditional_threader_for(threadingCondition, resultSize, [=](size_t i) { result[i] += partialResult[i]; });
     return Status();
 }
 
diff --git a/cpp/daal/src/threading/service_thread_pinner.cpp b/cpp/daal/src/threading/service_thread_pinner.cpp
old mode 100755
new mode 100644
index 069a163c0a5..d6e878f5b20
--- a/cpp/daal/src/threading/service_thread_pinner.cpp
+++ b/cpp/daal/src/threading/service_thread_pinner.cpp
@@ -27,39 +27,37 @@
     #include "services/daal_memory.h"
     #include "src/threading/threading.h"
 
-    #if defined(__DO_TBB_LAYER__)
-
-        #define USE_TASK_ARENA_CURRENT_SLOT 1
-        #define LOG_PINNING                 1
-        #define TBB_PREVIEW_TASK_ARENA      1
-        #define TBB_PREVIEW_LOCAL_OBSERVER  1
-
-        #include "tbb/tbb.h"
-        #include <tbb/task_arena.h>
-        #include <tbb/task_scheduler_observer.h>
-        #include <tbb/parallel_reduce.h>
-        #include <tbb/blocked_range.h>
-        #include <tbb/tick_count.h>
-        #include <tbb/scalable_allocator.h>
-        #include "services/daal_atomic_int.h"
+    #define USE_TASK_ARENA_CURRENT_SLOT 1
+    #define LOG_PINNING                 1
+    #define TBB_PREVIEW_TASK_ARENA      1
+    #define TBB_PREVIEW_LOCAL_OBSERVER  1
+
+    #include "tbb/tbb.h"
+    #include <tbb/task_arena.h>
+    #include <tbb/task_scheduler_observer.h>
+    #include <tbb/parallel_reduce.h>
+    #include <tbb/blocked_range.h>
+    #include <tbb/tick_count.h>
+    #include <tbb/scalable_allocator.h>
+    #include "services/daal_atomic_int.h"
 using namespace daal::services;
 
-        #if defined(_WIN32) || defined(_WIN64)
-            #include <Windows.h>
-            #define __PINNER_WINDOWS__
+    #if defined(_WIN32) || defined(_WIN64)
+        #include <Windows.h>
+        #define __PINNER_WINDOWS__
 
-            #if defined(_WIN64)
-                #define MASK_WIDTH 64
-            #else
-                #define MASK_WIDTH 32
-            #endif
+        #if defined(_WIN64)
+            #define MASK_WIDTH 64
+        #else
+            #define MASK_WIDTH 32
+        #endif
 
-        #else // LINUX
-            #include <sched.h>
-            #define __PINNER_LINUX__
+    #else // LINUX
+        #include <sched.h>
+        #define __PINNER_LINUX__
 
-            #ifdef __FreeBSD__
-                #include <pthread_np.h>
+        #ifdef __FreeBSD__
+            #include <pthread_np.h>
 
 cpu_set_t * __sched_cpualloc(size_t count)
 {
@@ -73,25 +71,25 @@ int sched_getaffinity(pid_t pid, size_t cpusetsize, cpu_set_t * mask)
 {
     return cpuset_getaffinity(CPU_LEVEL_WHICH, CPU_WHICH_PID, pid == 0 ? -1 : pid, cpusetsize, mask);
 }
-            #endif
-
         #endif
 
+    #endif
+
 struct cpu_mask_t
 {
     int status;
-        #if defined(_WIN32) || defined(_WIN64)
+    #if defined(_WIN32) || defined(_WIN64)
     GROUP_AFFINITY ga;
-        #else
+    #else
     int ncpus;
     int bit_parts_size;
     cpu_set_t * cpu_set;
-        #endif
+    #endif
     cpu_mask_t()
     {
         status = 0;
 
-        #if defined __PINNER_LINUX__
+    #if defined __PINNER_LINUX__
 
         ncpus          = 0;
         bit_parts_size = 0;
@@ -113,10 +111,10 @@ struct cpu_mask_t
         }
 
         if (cpu_set == NULL)
-        #else // defined __PINNER_WINDOWS__
+    #else // defined __PINNER_WINDOWS__
         bool retval = GetThreadGroupAffinity(GetCurrentThread(), &ga);
         if (!retval)
-        #endif
+    #endif
         {
             status--;
         }
@@ -128,13 +126,13 @@ struct cpu_mask_t
     {
         if (status == 0)
         {
-        #if defined __PINNER_LINUX__
+    #if defined __PINNER_LINUX__
             int err = pthread_getaffinity_np(pthread_self(), bit_parts_size, cpu_set);
             if (err)
-        #else // defined __PINNER_WINDOWS__
+    #else // defined __PINNER_WINDOWS__
             bool retval = GetThreadGroupAffinity(GetCurrentThread(), &ga);
             if (!retval)
-        #endif
+    #endif
             {
                 status--;
             }
@@ -147,15 +145,15 @@ struct cpu_mask_t
     {
         if (status == 0)
         {
-        #if defined __PINNER_LINUX__
+    #if defined __PINNER_LINUX__
 
             int err = pthread_setaffinity_np(pthread_self(), bit_parts_size, cpu_set);
             if (err)
-        #else // defined __PINNER_WINDOWS__
+    #else // defined __PINNER_WINDOWS__
 
             bool retval = SetThreadGroupAffinity(GetCurrentThread(), &ga, NULL);
             if (!retval)
-        #endif
+    #endif
             {
                 status--;
             }
@@ -168,13 +166,13 @@ struct cpu_mask_t
     {
         if (status == 0)
         {
-        #if defined __PINNER_LINUX__
+    #if defined __PINNER_LINUX__
             CPU_ZERO_S(bit_parts_size, cpu_set);
             CPU_SET_S(cpu_idx, bit_parts_size, cpu_set);
-        #else // defined __PINNER_WINDOWS__
+    #else // defined __PINNER_WINDOWS__
             ga.Group = cpu_idx / MASK_WIDTH;
             ga.Mask  = cpu_idx % MASK_WIDTH;
-        #endif
+    #endif
         }
 
         return status;
@@ -184,12 +182,12 @@ struct cpu_mask_t
 
     ~cpu_mask_t()
     {
-        #if defined __PINNER_LINUX__
+    #if defined __PINNER_LINUX__
         if (cpu_set != NULL)
         {
             CPU_FREE(cpu_set);
         }
-        #endif
+    #endif
 
         return;
     } // ~cpu_mask_t()
@@ -388,34 +386,4 @@ DAAL_EXPORT void _thread_pinner_on_scheduler_exit(bool p)
     IMPL->on_scheduler_exit(p);
 }
 
-    #else /* if __DO_TBB_LAYER__ is not defined */
-
-DAAL_EXPORT void * _getThreadPinner(bool create_pinner, void (*read_topo)(int &, int &, int &, int **), void (*deleter)(void *))
-{
-    return NULL;
-}
-
-DAAL_EXPORT void _thread_pinner_thread_pinner_init(void (*f)(int &, int &, int &, int **), void (*deleter)(void *)) {}
-DAAL_EXPORT void _thread_pinner_execute(daal::services::internal::thread_pinner_task_t & task)
-{
-    task();
-}
-DAAL_EXPORT bool _thread_pinner_get_pinning()
-{
-    return false;
-}
-DAAL_EXPORT bool _thread_pinner_set_pinning(bool p)
-{
-    return true;
-}
-DAAL_EXPORT int _thread_pinner_get_status()
-{
-    return 0;
-}
-
-DAAL_EXPORT void _thread_pinner_on_scheduler_entry(bool p) {}
-DAAL_EXPORT void _thread_pinner_on_scheduler_exit(bool p) {}
-
-    #endif /* if __DO_TBB_LAYER__ is not defined */
-
 #endif /* #if !defined (DAAL_THREAD_PINNING_DISABLED) */
diff --git a/cpp/daal/src/threading/threading.cpp b/cpp/daal/src/threading/threading.cpp
index 7fa0127a5ab..8af63a075be 100644
--- a/cpp/daal/src/threading/threading.cpp
+++ b/cpp/daal/src/threading/threading.cpp
@@ -23,61 +23,46 @@
 
 #include "src/threading/threading.h"
 #include "services/daal_memory.h"
+#include "src/algorithms/service_qsort.h"
 
-#if defined(__DO_TBB_LAYER__)
-    #define TBB_PREVIEW_GLOBAL_CONTROL 1
-    #define TBB_PREVIEW_TASK_ARENA     1
+#define TBB_PREVIEW_GLOBAL_CONTROL 1
+#define TBB_PREVIEW_TASK_ARENA     1
 
-    #include <stdlib.h> // malloc and free
-    #include <tbb/tbb.h>
-    #include <tbb/spin_mutex.h>
-    #include <tbb/scalable_allocator.h>
-    #include <tbb/global_control.h>
-    #include <tbb/task_arena.h>
-    #include "services/daal_atomic_int.h"
+#include <stdlib.h> // malloc and free
+#include <tbb/tbb.h>
+#include <tbb/spin_mutex.h>
+#include <tbb/scalable_allocator.h>
+#include <tbb/global_control.h>
+#include <tbb/task_arena.h>
+#include "services/daal_atomic_int.h"
 
-    #if defined(TBB_INTERFACE_VERSION) && TBB_INTERFACE_VERSION >= 12002
-        #include <tbb/task.h>
-    #endif
+#if defined(TBB_INTERFACE_VERSION) && TBB_INTERFACE_VERSION >= 12002
+    #include <tbb/task.h>
+#endif
 
 using namespace daal::services;
-#else
-    #include "src/externals/service_service.h"
-    #include "src/algorithms/service_qsort.h"
-#endif
 
 DAAL_EXPORT void * _threaded_scalable_malloc(const size_t size, const size_t alignment)
 {
-#if defined(__DO_TBB_LAYER__)
     return scalable_aligned_malloc(size, alignment);
-#else
-    return daal::internal::Service<>::serv_malloc(size, alignment);
-#endif
 }
 
 DAAL_EXPORT void _threaded_scalable_free(void * ptr)
 {
-#if defined(__DO_TBB_LAYER__)
     scalable_aligned_free(ptr);
-#else
-    daal::internal::Service<>::serv_free(ptr);
-#endif
 }
 
 DAAL_EXPORT void _daal_tbb_task_scheduler_free(void *& globalControl)
 {
-#if defined(__DO_TBB_LAYER__)
     if (globalControl)
     {
         delete reinterpret_cast<tbb::global_control *>(globalControl);
         globalControl = nullptr;
     }
-#endif
 }
 
 DAAL_EXPORT size_t _setNumberOfThreads(const size_t numThreads, void ** globalControl)
 {
-#if defined(__DO_TBB_LAYER__)
     static tbb::spin_mutex mt;
     tbb::spin_mutex::scoped_lock lock(mt);
     if (numThreads != 0)
@@ -87,180 +72,209 @@ DAAL_EXPORT size_t _setNumberOfThreads(const size_t numThreads, void ** globalCo
         daal::threader_env()->setNumberOfThreads(numThreads);
         return numThreads;
     }
-#endif
     daal::threader_env()->setNumberOfThreads(1);
     return 1;
 }
 
 DAAL_EXPORT void _daal_threader_for(int n, int threads_request, const void * a, daal::functype func)
 {
-#if defined(__DO_TBB_LAYER__)
-    tbb::parallel_for(tbb::blocked_range<int>(0, n, 1), [&](tbb::blocked_range<int> r) {
+    if (daal::threader_env()->getNumberOfThreads() > 1)
+    {
+        tbb::parallel_for(tbb::blocked_range<int>(0, n, 1), [&](tbb::blocked_range<int> r) {
+            int i;
+            for (i = r.begin(); i < r.end(); i++)
+            {
+                func(i, a);
+            }
+        });
+    }
+    else
+    {
         int i;
-        for (i = r.begin(); i < r.end(); i++)
+        for (i = 0; i < n; i++)
         {
             func(i, a);
         }
-    });
-#elif defined(__DO_SEQ_LAYER__)
-    int i;
-    for (i = 0; i < n; i++)
-    {
-        func(i, a);
     }
-#endif
 }
 
 DAAL_EXPORT void _daal_threader_for_int64(int64_t n, const void * a, daal::functype_int64 func)
 {
-#if defined(__DO_TBB_LAYER__)
-    tbb::parallel_for(tbb::blocked_range<int64_t>(0, n, 1), [&](tbb::blocked_range<int64_t> r) {
+    if (daal::threader_env()->getNumberOfThreads() > 1)
+    {
+        tbb::parallel_for(tbb::blocked_range<int64_t>(0, n, 1), [&](tbb::blocked_range<int64_t> r) {
+            int64_t i;
+            for (i = r.begin(); i < r.end(); i++)
+            {
+                func(i, a);
+            }
+        });
+    }
+    else
+    {
         int64_t i;
-        for (i = r.begin(); i < r.end(); i++)
+        for (i = 0; i < n; i++)
         {
             func(i, a);
         }
-    });
-#elif defined(__DO_SEQ_LAYER__)
-    int64_t i;
-    for (i = 0; i < n; i++)
-    {
-        func(i, a);
     }
-#endif
 }
 
 DAAL_EXPORT void _daal_threader_for_blocked_size(size_t n, size_t block, const void * a, daal::functype_blocked_size func)
 {
-#if defined(__DO_TBB_LAYER__)
-    tbb::parallel_for(tbb::blocked_range<size_t>(0ul, n, block), [=](tbb::blocked_range<size_t> r) -> void { return func(r.begin(), r.end(), a); });
-#elif defined(__DO_SEQ_LAYER__)
-    func(0ul, n, a);
-#endif
+    if (daal::threader_env()->getNumberOfThreads() > 1)
+    {
+        tbb::parallel_for(tbb::blocked_range<size_t>(0ul, n, block),
+                          [=](tbb::blocked_range<size_t> r) -> void { return func(r.begin(), r.end(), a); });
+    }
+    else
+    {
+        func(0ul, n, a);
+    }
 }
 
 DAAL_EXPORT void _daal_threader_for_simple(int n, int threads_request, const void * a, daal::functype func)
 {
-#if defined(__DO_TBB_LAYER__)
-    tbb::parallel_for(
-        tbb::blocked_range<int>(0, n, 1),
-        [&](tbb::blocked_range<int> r) {
-            int i;
-            for (i = r.begin(); i < r.end(); i++)
-            {
-                func(i, a);
-            }
-        },
-        tbb::simple_partitioner {});
-#elif defined(__DO_SEQ_LAYER__)
-    int i;
-    for (i = 0; i < n; i++)
+    if (daal::threader_env()->getNumberOfThreads() > 1)
     {
-        func(i, a);
+        tbb::parallel_for(
+            tbb::blocked_range<int>(0, n, 1),
+            [&](tbb::blocked_range<int> r) {
+                int i;
+                for (i = r.begin(); i < r.end(); i++)
+                {
+                    func(i, a);
+                }
+            },
+            tbb::simple_partitioner {});
+    }
+    else
+    {
+        int i;
+        for (i = 0; i < n; i++)
+        {
+            func(i, a);
+        }
     }
-#endif
 }
 
 DAAL_EXPORT void _daal_threader_for_int32ptr(const int * begin, const int * end, const void * a, daal::functype_int32ptr func)
 {
-#if defined(__DO_TBB_LAYER__)
-    tbb::parallel_for(tbb::blocked_range<const int *>(begin, end, 1), [&](tbb::blocked_range<const int *> r) {
+    if (daal::threader_env()->getNumberOfThreads() > 1)
+    {
+        tbb::parallel_for(tbb::blocked_range<const int *>(begin, end, 1), [&](tbb::blocked_range<const int *> r) {
+            const int * i;
+            for (i = r.begin(); i != r.end(); i++)
+            {
+                func(i, a);
+            }
+        });
+    }
+    else
+    {
         const int * i;
-        for (i = r.begin(); i != r.end(); i++)
+        for (i = begin; i != end; ++i)
         {
             func(i, a);
         }
-    });
-#elif defined(__DO_SEQ_LAYER__)
-    const int * i;
-    for (i = begin; i != end; ++i)
-    {
-        func(i, a);
     }
-#endif
 }
 
 DAAL_EXPORT int64_t _daal_parallel_reduce_int32_int64(int32_t n, int64_t init, const void * a, daal::loop_functype_int32_int64 loop_func,
                                                       const void * b, daal::reduction_functype_int64 reduction_func)
 {
-#if defined(__DO_TBB_LAYER__)
-    return tbb::parallel_reduce(
-        tbb::blocked_range<int32_t>(0, n), init,
-        [&](const tbb::blocked_range<int32_t> & r, int64_t value_for_reduce) { return loop_func(r.begin(), r.end(), value_for_reduce, a); },
-        [&](int64_t x, int64_t y) { return reduction_func(x, y, b); }, tbb::auto_partitioner {});
-
-#elif defined(__DO_SEQ_LAYER__)
-    int64_t value_for_reduce = init;
-    return loop_func(0, n, value_for_reduce, a);
-#endif
+    if (daal::threader_env()->getNumberOfThreads() > 1)
+    {
+        return tbb::parallel_reduce(
+            tbb::blocked_range<int32_t>(0, n), init,
+            [&](const tbb::blocked_range<int32_t> & r, int64_t value_for_reduce) { return loop_func(r.begin(), r.end(), value_for_reduce, a); },
+            [&](int64_t x, int64_t y) { return reduction_func(x, y, b); }, tbb::auto_partitioner {});
+    }
+    else
+    {
+        int64_t value_for_reduce = init;
+        return loop_func(0, n, value_for_reduce, a);
+    }
 }
 
 DAAL_EXPORT int64_t _daal_parallel_reduce_int32_int64_simple(int32_t n, int64_t init, const void * a, daal::loop_functype_int32_int64 loop_func,
                                                              const void * b, daal::reduction_functype_int64 reduction_func)
 {
-#if defined(__DO_TBB_LAYER__)
-    return tbb::parallel_reduce(
-        tbb::blocked_range<int32_t>(0, n), init,
-        [&](const tbb::blocked_range<int32_t> & r, int64_t value_for_reduce) { return loop_func(r.begin(), r.end(), value_for_reduce, a); },
-        [&](int64_t x, int64_t y) { return reduction_func(x, y, b); }, tbb::simple_partitioner {});
-
-#elif defined(__DO_SEQ_LAYER__)
-    int64_t value_for_reduce = init;
-    return loop_func(0, n, value_for_reduce, a);
-#endif
+    if (daal::threader_env()->getNumberOfThreads() > 1)
+    {
+        return tbb::parallel_reduce(
+            tbb::blocked_range<int32_t>(0, n), init,
+            [&](const tbb::blocked_range<int32_t> & r, int64_t value_for_reduce) { return loop_func(r.begin(), r.end(), value_for_reduce, a); },
+            [&](int64_t x, int64_t y) { return reduction_func(x, y, b); }, tbb::simple_partitioner {});
+    }
+    else
+    {
+        int64_t value_for_reduce = init;
+        return loop_func(0, n, value_for_reduce, a);
+    }
 }
 
 DAAL_EXPORT int64_t _daal_parallel_reduce_int32ptr_int64_simple(const int32_t * begin, const int32_t * end, int64_t init, const void * a,
                                                                 daal::loop_functype_int32ptr_int64 loop_func, const void * b,
                                                                 daal::reduction_functype_int64 reduction_func)
 {
-#if defined(__DO_TBB_LAYER__)
-    return tbb::parallel_reduce(
-        tbb::blocked_range<const int32_t *>(begin, end), init,
-        [&](const tbb::blocked_range<const int32_t *> & r, int64_t value_for_reduce) { return loop_func(r.begin(), r.end(), value_for_reduce, a); },
-        [&](int64_t x, int64_t y) { return reduction_func(x, y, b); }, tbb::simple_partitioner {});
-
-#elif defined(__DO_SEQ_LAYER__)
-    int64_t value_for_reduce = init;
-    return loop_func(begin, end, value_for_reduce, a);
-#endif
+    if (daal::threader_env()->getNumberOfThreads() > 1)
+    {
+        return tbb::parallel_reduce(
+            tbb::blocked_range<const int32_t *>(begin, end), init,
+            [&](const tbb::blocked_range<const int32_t *> & r, int64_t value_for_reduce) {
+                return loop_func(r.begin(), r.end(), value_for_reduce, a);
+            },
+            [&](int64_t x, int64_t y) { return reduction_func(x, y, b); }, tbb::simple_partitioner {});
+    }
+    else
+    {
+        int64_t value_for_reduce = init;
+        return loop_func(begin, end, value_for_reduce, a);
+    }
 }
 
 DAAL_EXPORT void _daal_static_threader_for(size_t n, const void * a, daal::functype_static func)
 {
-#if defined(__DO_TBB_LAYER__)
-    const size_t nthreads           = _daal_threader_get_max_threads();
-    const size_t nblocks_per_thread = n / nthreads + !!(n % nthreads);
-
-    tbb::parallel_for(
-        tbb::blocked_range<size_t>(0, nthreads, 1),
-        [&](tbb::blocked_range<size_t> r) {
-            const size_t tid   = r.begin();
-            const size_t begin = tid * nblocks_per_thread;
-            const size_t end   = n < begin + nblocks_per_thread ? n : begin + nblocks_per_thread;
-
-            for (size_t i = begin; i < end; ++i)
-            {
-                func(i, tid, a);
-            }
-        },
-        tbb::static_partitioner());
-#elif defined(__DO_SEQ_LAYER__)
-    for (size_t i = 0; i < n; i++)
+    if (daal::threader_env()->getNumberOfThreads() > 1)
     {
-        func(i, 0, a);
+        const size_t nthreads           = _daal_threader_get_max_threads();
+        const size_t nblocks_per_thread = n / nthreads + !!(n % nthreads);
+
+        tbb::parallel_for(
+            tbb::blocked_range<size_t>(0, nthreads, 1),
+            [&](tbb::blocked_range<size_t> r) {
+                const size_t tid   = r.begin();
+                const size_t begin = tid * nblocks_per_thread;
+                const size_t end   = n < begin + nblocks_per_thread ? n : begin + nblocks_per_thread;
+
+                for (size_t i = begin; i < end; ++i)
+                {
+                    func(i, tid, a);
+                }
+            },
+            tbb::static_partitioner());
+    }
+    else
+    {
+        for (size_t i = 0; i < n; i++)
+        {
+            func(i, 0, a);
+        }
     }
-#endif
 }
 
 template <typename F>
 DAAL_EXPORT void _daal_parallel_sort_template(F * begin_p, F * end_p)
 {
-#if defined(__DO_TBB_LAYER__)
-    tbb::parallel_sort(begin_p, end_p);
-#elif defined(__DO_SEQ_LAYER__)
-    daal::algorithms::internal::qSort<F>(end_p - begin_p, begin_p);
-#endif
+    if (daal::threader_env()->getNumberOfThreads() > 1)
+    {
+        tbb::parallel_sort(begin_p, end_p);
+    }
+    else
+    {
+        daal::algorithms::internal::qSort<F>(end_p - begin_p, begin_p);
+    }
 }
 
 #define DAAL_PARALLEL_SORT_IMPL(TYPE, NAMESUFFIX)                                   \
@@ -279,124 +293,109 @@ DAAL_PARALLEL_SORT_IMPL(daal::IdxValType<double>, pair_fp64_uint64)
 
 DAAL_EXPORT void _daal_threader_for_blocked(int n, int threads_request, const void * a, daal::functype2 func)
 {
-#if defined(__DO_TBB_LAYER__)
-    tbb::parallel_for(tbb::blocked_range<int>(0, n, 1), [&](tbb::blocked_range<int> r) { func(r.begin(), r.end() - r.begin(), a); });
-#elif defined(__DO_SEQ_LAYER__)
-    func(0, n, a);
-#endif
+    if (daal::threader_env()->getNumberOfThreads() > 1)
+    {
+        tbb::parallel_for(tbb::blocked_range<int>(0, n, 1), [&](tbb::blocked_range<int> r) { func(r.begin(), r.end() - r.begin(), a); });
+    }
+    else
+    {
+        func(0, n, a);
+    }
 }
 
 DAAL_EXPORT void _daal_threader_for_optional(int n, int threads_request, const void * a, daal::functype func)
 {
-#if defined(__DO_TBB_LAYER__)
-    if (_daal_is_in_parallel())
+    if (daal::threader_env()->getNumberOfThreads() > 1)
     {
-        int i;
-        for (i = 0; i < n; i++)
+        if (_daal_is_in_parallel())
         {
-            func(i, a);
+            int i;
+            for (i = 0; i < n; i++)
+            {
+                func(i, a);
+            }
+        }
+        else
+        {
+            _daal_threader_for(n, threads_request, a, func);
         }
     }
     else
     {
         _daal_threader_for(n, threads_request, a, func);
     }
-#elif defined(__DO_SEQ_LAYER__)
-    _daal_threader_for(n, threads_request, a, func);
-#endif
 }
 
 DAAL_EXPORT void _daal_threader_for_break(int n, int threads_request, const void * a, daal::functype_break func)
 {
-#if defined(__DO_TBB_LAYER__)
-    tbb::task_group_context context;
-    tbb::parallel_for(
-        tbb::blocked_range<int>(0, n, 1),
-        [&](tbb::blocked_range<int> r) {
-            int i;
-            for (i = r.begin(); i < r.end(); ++i)
-            {
-                bool needBreak = false;
-                func(i, needBreak, a);
-                if (needBreak) context.cancel_group_execution();
-            }
-        },
-        context);
-#elif defined(__DO_SEQ_LAYER__)
-    int i;
-    for (i = 0; i < n; ++i)
+    if (daal::threader_env()->getNumberOfThreads() > 1)
     {
-        bool needBreak = false;
-        func(i, needBreak, a);
-        if (needBreak) break;
+        tbb::task_group_context context;
+        tbb::parallel_for(
+            tbb::blocked_range<int>(0, n, 1),
+            [&](tbb::blocked_range<int> r) {
+                int i;
+                for (i = r.begin(); i < r.end(); ++i)
+                {
+                    bool needBreak = false;
+                    func(i, needBreak, a);
+                    if (needBreak) context.cancel_group_execution();
+                }
+            },
+            context);
+    }
+    else
+    {
+        int i;
+        for (i = 0; i < n; ++i)
+        {
+            bool needBreak = false;
+            func(i, needBreak, a);
+            if (needBreak) break;
+        }
     }
-#endif
 }
 
 DAAL_EXPORT int _daal_threader_get_max_threads()
 {
-#if defined(__DO_TBB_LAYER__)
     return tbb::this_task_arena::max_concurrency();
-#elif defined(__DO_SEQ_LAYER__)
-    return 1;
-#endif
 }
 
 DAAL_EXPORT int _daal_threader_get_current_thread_index()
 {
-#if defined(__DO_TBB_LAYER__)
     return tbb::this_task_arena::current_thread_index();
-#elif defined(__DO_SEQ_LAYER__)
-    return 0;
-#endif
 }
 
 DAAL_EXPORT void * _daal_get_tls_ptr(void * a, daal::tls_functype func)
 {
-#if defined(__DO_TBB_LAYER__)
     tbb::enumerable_thread_specific<void *> * p = new tbb::enumerable_thread_specific<void *>([=]() -> void * { return func(a); });
     return (void *)p;
-#elif defined(__DO_SEQ_LAYER__)
-    return func(a);
-#endif
 }
 
 DAAL_EXPORT void _daal_del_tls_ptr(void * tlsPtr)
 {
-#if defined(__DO_TBB_LAYER__)
     tbb::enumerable_thread_specific<void *> * p = static_cast<tbb::enumerable_thread_specific<void *> *>(tlsPtr);
     delete p;
-#elif defined(__DO_SEQ_LAYER__)
-#endif
 }
 
 DAAL_EXPORT void * _daal_get_tls_local(void * tlsPtr)
 {
-#if defined(__DO_TBB_LAYER__)
     tbb::enumerable_thread_specific<void *> * p = static_cast<tbb::enumerable_thread_specific<void *> *>(tlsPtr);
     return p->local();
-#elif defined(__DO_SEQ_LAYER__)
-    return tlsPtr;
-#endif
 }
 
 DAAL_EXPORT void _daal_reduce_tls(void * tlsPtr, void * a, daal::tls_reduce_functype func)
 {
-#if defined(__DO_TBB_LAYER__)
     tbb::enumerable_thread_specific<void *> * p = static_cast<tbb::enumerable_thread_specific<void *> *>(tlsPtr);
 
     for (auto it = p->begin(); it != p->end(); ++it)
     {
         func((*it), a);
     }
-#elif defined(__DO_SEQ_LAYER__)
-    func(tlsPtr, a);
-#endif
 }
 
 DAAL_EXPORT void _daal_parallel_reduce_tls(void * tlsPtr, void * a, daal::tls_reduce_functype func)
 {
-#if defined(__DO_TBB_LAYER__)
     size_t n                                    = 0;
     tbb::enumerable_thread_specific<void *> * p = static_cast<tbb::enumerable_thread_specific<void *> *>(tlsPtr);
 
@@ -416,51 +415,34 @@ DAAL_EXPORT void _daal_parallel_reduce_tls(void * tlsPtr, void * a, daal::tls_re
             ::free(aDataPtr);
         }
     }
-#elif defined(__DO_SEQ_LAYER__)
-    func(tlsPtr, a);
-#endif
 }
 
 DAAL_EXPORT void * _daal_new_mutex()
 {
-#if defined(__DO_TBB_LAYER__)
     return new tbb::spin_mutex();
-#elif defined(__DO_SEQ_LAYER__)
-    return NULL;
-#endif
 }
 
 DAAL_EXPORT void _daal_lock_mutex(void * mutexPtr)
 {
-#if defined(__DO_TBB_LAYER__)
     static_cast<tbb::spin_mutex *>(mutexPtr)->lock();
-#endif
 }
 
 DAAL_EXPORT void _daal_unlock_mutex(void * mutexPtr)
 {
-#if defined(__DO_TBB_LAYER__)
     static_cast<tbb::spin_mutex *>(mutexPtr)->unlock();
-#endif
 }
 
 DAAL_EXPORT void _daal_del_mutex(void * mutexPtr)
 {
-#if defined(__DO_TBB_LAYER__)
     delete static_cast<tbb::spin_mutex *>(mutexPtr);
-#endif
 }
 
 DAAL_EXPORT bool _daal_is_in_parallel()
 {
-#if defined(__DO_TBB_LAYER__)
-    #if defined(TBB_INTERFACE_VERSION) && TBB_INTERFACE_VERSION >= 12002
+#if defined(TBB_INTERFACE_VERSION) && TBB_INTERFACE_VERSION >= 12002
     return tbb::task::current_context() != nullptr;
-    #else
-    return tbb::task::self().state() == tbb::task::executing;
-    #endif
 #else
-    return false;
+    return tbb::task::self().state() == tbb::task::executing;
 #endif
 }
 
@@ -470,7 +452,6 @@ DAAL_EXPORT void * _daal_threader_env()
     return &env;
 }
 
-#if defined(__DO_TBB_LAYER__)
 template <typename T, typename Key, typename Pred>
 //Returns an index of the first element in the range[ar, ar + n) that is not less than(i.e.greater or equal to) value.
 size_t lower_bound(size_t n, const T * ar, const Key & value)
@@ -630,19 +611,19 @@ class Collection
     size_t _capacity;
 };
 
-    #if _WIN32 || _WIN64
+#if _WIN32 || _WIN64
 typedef DWORD ThreadId;
 ThreadId getCurrentThreadId()
 {
     return ::GetCurrentThreadId();
 }
-    #else
+#else
 typedef pthread_t ThreadId;
 ThreadId getCurrentThreadId()
 {
     return pthread_self();
 }
-    #endif // _WIN32||_WIN64
+#endif // _WIN32||_WIN64
 
 class LocalStorage
 {
@@ -826,38 +807,5 @@ DAAL_EXPORT void _daal_wait_task_group(void * taskGroupPtr)
     ((tbb::task_group *)taskGroupPtr)->wait();
 }
 
-#else
-DAAL_EXPORT void * _daal_get_ls_ptr(void * a, daal::tls_functype func)
-{
-    return func(a);
-}
-
-DAAL_EXPORT void * _daal_get_ls_local(void * lsPtr)
-{
-    return lsPtr;
-}
-
-DAAL_EXPORT void _daal_reduce_ls(void * lsPtr, void * a, daal::tls_reduce_functype func)
-{
-    func(lsPtr, a);
-}
-
-DAAL_EXPORT void _daal_del_ls_ptr(void * lsPtr) {}
-
-DAAL_EXPORT void _daal_release_ls_local(void * lsPtr, void * p) {}
-
-DAAL_EXPORT void * _daal_new_task_group()
-{
-    return nullptr;
-}
-
-DAAL_EXPORT void _daal_del_task_group(void * taskGroupPtr) {}
-
-DAAL_EXPORT void _daal_run_task_group(void * taskGroupPtr, daal::task * task) {}
-
-DAAL_EXPORT void _daal_wait_task_group(void * taskGroupPtr) {}
-
-#endif
-
 namespace daal
 {}
diff --git a/makefile b/makefile
index 1006bfb00aa..96081ea0354 100644
--- a/makefile
+++ b/makefile
@@ -489,7 +489,6 @@ $(CORE.objs_a): COPT += -D__TBB_NO_IMPLICIT_LINKAGE -DDAAL_NOTHROW_EXCEPTIONS \
                         -DDAAL_HIDE_DEPRECATED -DTBB_USE_ASSERT=0 -D_ENABLE_ATOMIC_ALIGNMENT_FIX \
                         $(if $(CHECK_DLL_SIG),-DDAAL_CHECK_DLL_SIG)
 $(CORE.objs_a): COPT += @$(CORE.tmpdir_a)/inc_a_folders.txt
-$(filter %threading.$o, $(CORE.objs_a)): COPT += -D__DO_TBB_LAYER__
 
 $(eval $(call append_uarch_copt,$(CORE.objs_a)))
 
@@ -500,7 +499,6 @@ $(CORE.objs_y): COPT += -D__DAAL_IMPLEMENTATION \
                         -DDAAL_HIDE_DEPRECATED -DTBB_USE_ASSERT=0 -D_ENABLE_ATOMIC_ALIGNMENT_FIX \
                         $(if $(CHECK_DLL_SIG),-DDAAL_CHECK_DLL_SIG)
 $(CORE.objs_y): COPT += @$(CORE.tmpdir_y)/inc_y_folders.txt
-$(filter %threading.$o, $(CORE.objs_y)): COPT += -D__DO_TBB_LAYER__
 
 $(eval $(call append_uarch_copt,$(CORE.objs_y)))
 
@@ -844,7 +842,6 @@ THR_TBB.objs := $(THR_TBB.objs_a) $(THR_TBB.objs_y)
 THR.objs := $(THR.objs_a) $(THR.objs_y)
 
 $(THR.objs): COPT += $(-fPIC) $(-cxx11) $(-Zl) $(-DEBC) -DDAAL_HIDE_DEPRECATED -DTBB_USE_ASSERT=0 -D_ENABLE_ATOMIC_ALIGNMENT_FIX
-$(THR_TBB.objs): COPT += -D__DO_TBB_LAYER__
 
 $(THR.objs_a): $(THR.tmpdir_a)/thr_inc_a_folders.txt
 $(THR.objs_a): COPT += @$(THR.tmpdir_a)/thr_inc_a_folders.txt

From f06b35aa99ddb1f06de7f7b36ccffeb7b3d66ebc Mon Sep 17 00:00:00 2001
From: Victoriya Fedotova <victoriya.s.fedotova@intel.com>
Date: Fri, 24 May 2024 13:09:19 +0200
Subject: [PATCH 24/65] Improve SYCL kernels in CSR table (#2794)

---
 cpp/oneapi/dal/table/backend/csr_kernels.cpp | 62 ++++++++------------
 1 file changed, 26 insertions(+), 36 deletions(-)

diff --git a/cpp/oneapi/dal/table/backend/csr_kernels.cpp b/cpp/oneapi/dal/table/backend/csr_kernels.cpp
index 58a73b74549..8e5aef236b2 100644
--- a/cpp/oneapi/dal/table/backend/csr_kernels.cpp
+++ b/cpp/oneapi/dal/table/backend/csr_kernels.cpp
@@ -408,6 +408,7 @@ bool is_sorted(sycl::queue& queue,
     // number of pairs of the subsequent elements in the data array that are sorted in desccending order,
     // i.e. for which data[i] > data[i + 1] is true.
     std::int64_t count_descending_pairs = 0L;
+
     sycl::buffer<std::int64_t, 1> count_buf(&count_descending_pairs, sycl::range<1>(1));
 
     // count the number of pairs of the subsequent elements in the data array that are sorted
@@ -418,10 +419,9 @@ bool is_sorted(sycl::queue& queue,
             auto count_descending_reduction =
                 sycl::reduction(count_buf, cgh, sycl::ext::oneapi::plus<std::int64_t>());
 
-            cgh.parallel_for(sycl::nd_range<1>{ count - 1, 1 },
+            cgh.parallel_for(sycl::range<1>{ dal::detail::integral_cast<std::size_t>(count - 1) },
                              count_descending_reduction,
-                             [=](sycl::nd_item<1> idx, auto& count_descending) {
-                                 const auto i = idx.get_global_id(0);
+                             [=](sycl::id<1> i, auto& count_descending) {
                                  if (data[i] > data[i + 1])
                                      count_descending.combine(1);
                              });
@@ -485,39 +485,29 @@ out_of_bound_type check_bounds(const array<T>& arr,
     sycl::buffer<std::int64_t, 1> count_lt_buf(&count_lt_min, sycl::range<1>(1));
     sycl::buffer<std::int64_t, 1> count_gt_buf(&count_gt_max, sycl::range<1>(1));
 
-    // count the number of elements which are less than min_vaule using sycl::reduction
-    auto event_count_lt_min = queue.submit([&](sycl::handler& cgh) {
-        cgh.depends_on(dependencies);
-        auto count_lt_reduction =
-            sycl::reduction(count_lt_buf, cgh, sycl::ext::oneapi::plus<std::int64_t>());
-
-        cgh.parallel_for(sycl::nd_range<1>{ count, 1 },
-                         count_lt_reduction,
-                         [=](sycl::nd_item<1> idx, auto& count_lt) {
-                             const auto i = idx.get_global_id(0);
-                             if (data[i] < min_value) {
-                                 count_lt.combine(1);
-                             }
-                         });
-    });
-
-    // count the number of elements which are greater than max_vaule using sycl::reduction
-    auto event_count_gt_max = queue.submit([&](sycl::handler& cgh) {
-        cgh.depends_on(dependencies);
-        auto count_gt_reduction =
-            sycl::reduction(count_gt_buf, cgh, sycl::ext::oneapi::plus<std::int64_t>());
-
-        cgh.parallel_for(sycl::nd_range<1>{ count, 1 },
-                         count_gt_reduction,
-                         [=](sycl::nd_item<1> idx, auto& count_gt) {
-                             const auto i = idx.get_global_id(0);
-                             if (data[i] > max_value) {
-                                 count_gt.combine(1);
-                             }
-                         });
-    });
-
-    sycl::event::wait_and_throw({ event_count_lt_min, event_count_gt_max });
+    // count the number of elements which are less than min_vaule and
+    // the the number of elements which are greater than max_value using sycl::reduction
+    queue
+        .submit([&](sycl::handler& cgh) {
+            cgh.depends_on(dependencies);
+            auto count_lt_reduction =
+                sycl::reduction(count_lt_buf, cgh, sycl::ext::oneapi::plus<std::int64_t>());
+            auto count_gt_reduction =
+                sycl::reduction(count_gt_buf, cgh, sycl::ext::oneapi::plus<std::int64_t>());
+
+            cgh.parallel_for(sycl::range<1>{ dal::detail::integral_cast<std::size_t>(count) },
+                             count_lt_reduction,
+                             count_gt_reduction,
+                             [=](sycl::id<1> i, auto& count_lt, auto& count_gt) {
+                                 if (data[i] < min_value) {
+                                     count_lt.combine(1);
+                                 }
+                                 if (data[i] > max_value) {
+                                     count_gt.combine(1);
+                                 }
+                             });
+        })
+        .wait_and_throw();
 
     out_of_bound_type result{ out_of_bound_type::within_bounds };
     if (count_lt_min > 0)

From 3f7fbbcb405502a798d8d3cb5b5a22070ce546fc Mon Sep 17 00:00:00 2001
From: Maria Petrova <maria.petrova@intel.com>
Date: Tue, 28 May 2024 14:26:06 +0200
Subject: [PATCH 25/65] Update requests version (#2804)

---
 docs/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/requirements.txt b/docs/requirements.txt
index b2584e5a1a9..d149096a0f8 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -19,7 +19,7 @@ Pygments==2.16.1
 pyparsing==3.1.1
 pytz==2024.1
 PyYAML==6.0.1
-requests==2.31.0
+requests==2.32.0
 six==1.16.0
 snowballstemmer==2.2.0
 soupsieve==2.5

From 5d2f6c79f590c00e24ce8e0cc73b10d4c0db1eaf Mon Sep 17 00:00:00 2001
From: Dhanus M Lal <73832063+DhanusML@users.noreply.github.com>
Date: Wed, 29 May 2024 00:33:25 +0530
Subject: [PATCH 26/65] Changed openblas build to ILP64 (#2801)

* Changed openblas build to ILP64

Default openblas build is LP64 on 64 bit machines. But DAAL_INT is 64-bit
signed integer. This causes errors during openblas calls (mainly LAPACK calls),
where the argument is expected to be 32-bit integers or pointers to 32-bit integers,
but a 64-bit integer is given. This causes many examples that use LAPACK routines
to fail (when using openblas as the BLAS/LAPACK backend).

An option to switch ILP64 interface has been added to the openblas build script.

The examples that are passing have also been removed from the exclude list.

Signed-off-by: Dhanus M Lal <Dhanus.MLal@fujitsu.com>

* added ILP64 to the cache key for gcc+openblas builds

Signed-off-by: Dhanus M Lal <Dhanus.MLal@fujitsu.com>

* updated cache key and changed qemu version

Changed qemu package version and added the ILP64 tag to openblas cache key
for the LLVM+openblas builds

Signed-off-by: Dhanus M Lal <Dhanus.MLal@fujitsu.com>

---------

Signed-off-by: Dhanus M Lal <Dhanus.MLal@fujitsu.com>
---
 .ci/env/apt.sh                          | 2 +-
 .ci/env/openblas.sh                     | 8 ++++++++
 .ci/pipeline/ci.yml                     | 8 ++++----
 examples/daal/cpp/CMakeLists.txt        | 5 +----
 examples/daal/cpp/target_excludes.cmake | 5 -----
 5 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/.ci/env/apt.sh b/.ci/env/apt.sh
index a9aefe5ecae..e9a5549841f 100755
--- a/.ci/env/apt.sh
+++ b/.ci/env/apt.sh
@@ -63,7 +63,7 @@ function install_qemu_emulation_apt {
 }
 
 function install_qemu_emulation_deb {
-    qemu_deb=qemu-user-static_8.2.3+ds-2_amd64.deb
+    qemu_deb=qemu-user-static_8.2.1+ds-1~bpo12+1_amd64.deb
     set -eo pipefail
     wget http://ftp.debian.org/debian/pool/main/q/qemu/${qemu_deb}
     sudo dpkg -i ${qemu_deb}
diff --git a/.ci/env/openblas.sh b/.ci/env/openblas.sh
index 613885a641e..dc86b1d96d3 100755
--- a/.ci/env/openblas.sh
+++ b/.ci/env/openblas.sh
@@ -37,6 +37,7 @@ show_help() {
 --prefix:The path where OpenBLAS will be installed
 --version:The version of OpenBLAS to install. This is a git reference from the OpenBLAS repo, and defaults to ${BLAS_DEFAULT_VERSION}
 --sysroot:If cross-compiling with LLVM, determines the location of the target architecture sysroot
+--ilp64 <on/off>: whether or not to use the ILP64 build
 '
 }
 
@@ -74,6 +75,9 @@ while [[ $# -gt 0 ]]; do
         --sysroot)
         sysroot="$2"
         shift;;
+        --ilp64)
+        ilp64=on
+        shift;;
         --help)
         show_help
         exit 0
@@ -89,6 +93,7 @@ done
 target=${target:-ARMV8}
 host_compiler=${host_compiler:-gcc}
 compiler=${compiler:-aarch64-linux-gnu-gcc}
+openblas_ilp64=${ilp64:-on}
 
 target_arch=${target_arch:-$(uname -m)}
 OPENBLAS_DEFAULT_PREFIX="${ONEDAL_DIR}/__deps/openblas_${target_arch}"
@@ -160,6 +165,9 @@ pushd "${blas_src_dir}"
         USE_THREAD=0
         USE_LOCKING=1)
   fi
+  if [ "${openblas_ilp64}" == "on" ]; then
+      make_options+=( 'BINARY=64' 'INTERFACE64=1' )
+  fi
   # Clean
   echo make "${make_options[@]}" clean
   make "${make_options[@]}" clean
diff --git a/.ci/pipeline/ci.yml b/.ci/pipeline/ci.yml
index 02fd1c90f87..e5481e4ca68 100755
--- a/.ci/pipeline/ci.yml
+++ b/.ci/pipeline/ci.yml
@@ -114,7 +114,7 @@ jobs:
     displayName: 'System info'
   - task: Cache@2
     inputs:
-      key: '"gcc" | "aarch64" | "openblas" | "$(OPENBLAS_VERSION)" | "$(VM_IMAGE)"'
+      key: '"gcc" | "aarch64" | "openblas" | "$(OPENBLAS_VERSION)" | "$(VM_IMAGE)" | "ILP64"'
       path: $(OPENBLAS_CACHE_DIR)
       cacheHitVar: OPENBLAS_RESTORED
   - script: |
@@ -201,7 +201,7 @@ jobs:
     displayName: 'System info'
   - task: Cache@2
     inputs:
-      key: '"clang" | "18" | "aarch64" | "openblas" | "$(OPENBLAS_VERSION)" | "$(VM_IMAGE)"'
+      key: '"clang" | "18" | "aarch64" | "openblas" | "$(OPENBLAS_VERSION)" | "$(VM_IMAGE)" | "ILP64"'
       path: $(OPENBLAS_CACHE_DIR)
       cacheHitVar: OPENBLAS_RESTORED
   - script: |
@@ -293,7 +293,7 @@ jobs:
     displayName: 'System info'
   - task: Cache@2
     inputs:
-      key: '"clang" | "riscv64" | "openblas" | "$(OPENBLAS_VERSION)"'
+      key: '"clang" | "riscv64" | "openblas" | "$(OPENBLAS_VERSION)" | "ILP64"'
       path: $(OPENBLAS_CACHE_DIR)
       cacheHitVar: OPENBLAS_RESTORED
   - script: |
@@ -363,7 +363,7 @@ jobs:
     displayName: 'System info'
   - task: Cache@2
     inputs:
-      key: '"gcc" | "x86_64" | "openblas" | "$(OPENBLAS_VERSION)" | "$(VM_IMAGE)"'
+      key: '"gcc" | "x86_64" | "openblas" | "$(OPENBLAS_VERSION)" | "$(VM_IMAGE)" | "ILP64"'
       path: $(OPENBLAS_CACHE_DIR)
       cacheHitVar: OPENBLAS_RESTORED
   - script: |
diff --git a/examples/daal/cpp/CMakeLists.txt b/examples/daal/cpp/CMakeLists.txt
index fc7fdfd6f06..2ad80cbe34a 100644
--- a/examples/daal/cpp/CMakeLists.txt
+++ b/examples/daal/cpp/CMakeLists.txt
@@ -44,10 +44,7 @@ if(REF_BACKEND)
         ${EXCLUDE_LIST}
         "source/boosting/brownboost_dense_batch.cpp"
         "source/em/em_gmm_dense_batch.cpp"
-        "source/linear_regression/lin_reg_qr_dense_distr.cpp"
-        "source/pca/pca_cor*"
-        "source/qr/qr_dense*"
-        "source/svd/svd_dense_batch.cpp"
+        "source/pca/pca_cor_csr*"
     )
 endif()
 
diff --git a/examples/daal/cpp/target_excludes.cmake b/examples/daal/cpp/target_excludes.cmake
index 52947f3cbe9..01f1cc665bc 100644
--- a/examples/daal/cpp/target_excludes.cmake
+++ b/examples/daal/cpp/target_excludes.cmake
@@ -42,16 +42,11 @@ elseif((CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64") AND
        (CMAKE_C_COMPILER MATCHES "gcc"))
     set(EXCLUDE_LIST
         ${EXCLUDE_LIST}
-        "cholesky_dense_batch"
         "cor_csr_distr"
         "cor_csr_online"
         "cov_csr_distr"
         "cov_csr_online"
         "enable_thread_pinning"
-        "lin_reg_metrics_dense_batch"
-        "lin_reg_qr_dense_batch"
-        "lin_reg_qr_dense_online"
-        "out_detect_mult_dense_batch"
     )
 elseif((CMAKE_SYSTEM_PROCESSOR STREQUAL "riscv64") AND
        (CMAKE_C_COMPILER MATCHES "clang"))

From 9041a8a81bba1de65affd08f597ae37ea9933ea1 Mon Sep 17 00:00:00 2001
From: Emm Walsh <148352712+emmwalsh@users.noreply.github.com>
Date: Wed, 29 May 2024 13:55:09 +0100
Subject: [PATCH 27/65] Update Build Applications Page (#2797)

* Update data-analytics-pipeline.rst

Removed the second level header as it is showing up twice in IDZ.

* Update build-application.rst

* Update build-application.rst

* Update build-application.rst

* Revert "Merge branch 'main' of https://github.com/emmwalsh/oneDAL"

This reverts commit 86910ff22321957a2cbfd9a11e18b599439ae7d7, reversing
changes made to 908955d2f1de26389b0c647cfafdb92da52e22b4.

* Update data-analytics-pipeline.rst
---
 docs/source/data-analytics-pipeline.rst            |  3 ---
 docs/source/onedal/build_app/build-application.rst | 14 +-------------
 2 files changed, 1 insertion(+), 16 deletions(-)

diff --git a/docs/source/data-analytics-pipeline.rst b/docs/source/data-analytics-pipeline.rst
index 107c2ff1ff0..4cd9f32845e 100644
--- a/docs/source/data-analytics-pipeline.rst
+++ b/docs/source/data-analytics-pipeline.rst
@@ -19,9 +19,6 @@ Introduction
 
 .. _onedal_data_analytics_pipeline:
 
-Data Analytics Pipeline
-***********************
-
 |full_name| (|short_name|) is a library that provides building
 blocks covering all stages of data analytics: data acquisition from a
 data source, preprocessing, transformation, data mining, modeling,
diff --git a/docs/source/onedal/build_app/build-application.rst b/docs/source/onedal/build_app/build-application.rst
index 7574d8b9cbf..5e7249b29bb 100644
--- a/docs/source/onedal/build_app/build-application.rst
+++ b/docs/source/onedal/build_app/build-application.rst
@@ -29,7 +29,7 @@ Applications on Linux* OS
 
 #. Install |short_name|.
 
-#. Set environment variables by calling ``<install dir>/setvars.sh``.
+#. Set environment variables by calling ``<install dir>/vars.sh``.
 
 #. Build the application using ``icpx`` (Linux* OS) and ``icx-cl`` (Windows* OS) commands:
 
@@ -117,13 +117,11 @@ Applications on Windows* OS
 
       * - Static linking
         - 
-          | onedal_core.lib,
           | onedal_core.lib,
           | onedal_thread.lib
       * - Dynamic linking
         - 
           | onedal_core_dll.lib
-          | onedal_core_dll.lib
 
    You may also add debug versions of the libraries based on the threading mode and linking method:
 
@@ -137,10 +135,6 @@ Applications on Windows* OS
 
       * - Static linking
         -
-          | onedal_cored.lib,
-          | onedald.lib,
-          | onedal_dpcd.lib,
-          | onedal_sycld.lib,
           | onedal_cored.lib,
           | onedald.lib,
           | onedal_dpcd.lib,
@@ -148,12 +142,6 @@ Applications on Windows* OS
           | onedal_threadd.lib
       * - Dynamic linking
         -
-          | onedal_cored_dll.lib (onedal_cored_dll.2.lib),
-          | onedald_dll.lib (onedald_dll.2.lib),
-          | onedal_dpcd_dll.lib (onedal_dpcd_dll.2.lib),
-          | onedald.2.dll,
-          | onedal_cored.2.dll,
-          | onedal_dpcd.2.dll,
           | onedal_cored_dll.lib (onedal_cored_dll.2.lib),
           | onedald_dll.lib (onedald_dll.2.lib),
           | onedal_dpcd_dll.lib (onedal_dpcd_dll.2.lib),

From 00cfccbe1df4dcb985c80b4293485d23f6416782 Mon Sep 17 00:00:00 2001
From: Aleksandr Solovev <aleksandr.solovev@intel.com>
Date: Wed, 29 May 2024 16:08:35 +0200
Subject: [PATCH 28/65] fix: reimplement custom daal calls for VC (#2803)

---
 .../backend/cpu/compute_kernel.cpp            | 32 +++----
 .../cpu/finalize_train_kernel_norm_eq.cpp     | 72 ++++++---------
 .../backend/cpu/finalize_train_kernel_cov.cpp | 91 ++++++++-----------
 .../backend/cpu/finalize_train_kernel_svd.cpp | 18 ++--
 4 files changed, 87 insertions(+), 126 deletions(-)

diff --git a/cpp/oneapi/dal/algo/basic_statistics/backend/cpu/compute_kernel.cpp b/cpp/oneapi/dal/algo/basic_statistics/backend/cpu/compute_kernel.cpp
index 4c02a680251..3e88891de82 100644
--- a/cpp/oneapi/dal/algo/basic_statistics/backend/cpu/compute_kernel.cpp
+++ b/cpp/oneapi/dal/algo/basic_statistics/backend/cpu/compute_kernel.cpp
@@ -155,25 +155,19 @@ result_t call_daal_kernel_with_weights(const context_cpu& ctx,
         daal_result.set(daal_lom::ResultId::sumSquaresCentered,
                         daal_partial.get(daal_lom::PartialResultId::partialSumSquaresCentered));
     }
-
-    {
-        const auto status = dal::backend::dispatch_by_cpu(ctx, [&](auto cpu) {
-            constexpr auto cpu_type = interop::to_daal_cpu_type<decltype(cpu)>::value;
-            return daal_lom_online_kernel_t<Float, cpu_type>{}.finalizeCompute(
-                daal_partial.get(daal_lom::PartialResultId::nObservations).get(),
-                daal_partial.get(daal_lom::PartialResultId::partialSum).get(),
-                daal_partial.get(daal_lom::PartialResultId::partialSumSquares).get(),
-                daal_partial.get(daal_lom::PartialResultId::partialSumSquaresCentered).get(),
-                daal_result.get(daal_lom::ResultId::mean).get(),
-                daal_result.get(daal_lom::ResultId::secondOrderRawMoment).get(),
-                daal_result.get(daal_lom::ResultId::variance).get(),
-                daal_result.get(daal_lom::ResultId::standardDeviation).get(),
-                daal_result.get(daal_lom::ResultId::variation).get(),
-                &daal_parameter);
-        });
-
-        interop::status_to_exception(status);
-    }
+    interop::status_to_exception(
+        interop::call_daal_kernel_finalize_compute<Float, daal_lom_online_kernel_t>(
+            ctx,
+            daal_partial.get(daal_lom::PartialResultId::nObservations).get(),
+            daal_partial.get(daal_lom::PartialResultId::partialSum).get(),
+            daal_partial.get(daal_lom::PartialResultId::partialSumSquares).get(),
+            daal_partial.get(daal_lom::PartialResultId::partialSumSquaresCentered).get(),
+            daal_result.get(daal_lom::ResultId::mean).get(),
+            daal_result.get(daal_lom::ResultId::secondOrderRawMoment).get(),
+            daal_result.get(daal_lom::ResultId::variance).get(),
+            daal_result.get(daal_lom::ResultId::standardDeviation).get(),
+            daal_result.get(daal_lom::ResultId::variation).get(),
+            &daal_parameter));
 
     auto result =
         get_result<Float, task_t>(desc, daal_result).set_result_options(desc.get_result_options());
diff --git a/cpp/oneapi/dal/algo/linear_regression/backend/cpu/finalize_train_kernel_norm_eq.cpp b/cpp/oneapi/dal/algo/linear_regression/backend/cpu/finalize_train_kernel_norm_eq.cpp
index 88b1c58ccc4..c29f9d8cdfe 100644
--- a/cpp/oneapi/dal/algo/linear_regression/backend/cpu/finalize_train_kernel_norm_eq.cpp
+++ b/cpp/oneapi/dal/algo/linear_regression/backend/cpu/finalize_train_kernel_norm_eq.cpp
@@ -31,11 +31,10 @@
 
 namespace oneapi::dal::linear_regression::backend {
 
-using daal::services::Status;
 using dal::backend::context_cpu;
 
-namespace be = dal::backend;
-namespace pr = be::primitives;
+namespace bk = dal::backend;
+namespace pr = bk::primitives;
 namespace interop = dal::backend::interop;
 namespace daal_lr = daal::algorithms::linear_regression;
 namespace daal_rr = daal::algorithms::ridge_regression;
@@ -65,10 +64,10 @@ static daal_lr_hyperparameters_t convert_parameters(const detail::train_paramete
 }
 
 template <typename Float, typename Task>
-static train_result<Task> call_daal_kernel(const context_cpu& ctx,
-                                           const detail::descriptor_base<Task>& desc,
-                                           const detail::train_parameters<Task>& params,
-                                           const partial_train_result<Task>& input) {
+static train_result<Task> call_daal_kernel_finalize(const context_cpu& ctx,
+                                                    const detail::descriptor_base<Task>& desc,
+                                                    const detail::train_parameters<Task>& params,
+                                                    const partial_train_result<Task>& input) {
     using dal::detail::check_mul_overflow;
 
     using model_t = model<Task>;
@@ -94,38 +93,27 @@ static train_result<Task> call_daal_kernel(const context_cpu& ctx,
         auto ridge_matrix_array = array<Float>::full(1, static_cast<Float>(alpha));
         auto ridge_matrix = interop::convert_to_daal_homogen_table<Float>(ridge_matrix_array, 1, 1);
 
-        {
-            const auto status = dal::backend::dispatch_by_cpu(ctx, [&](auto cpu) {
-                constexpr auto cpu_type = interop::to_daal_cpu_type<decltype(cpu)>::value;
-                return online_rr_kernel_t<Float, cpu_type>().finalizeCompute(*xtx_daal_table,
-                                                                             *xty_daal_table,
-                                                                             *xtx_daal_table,
-                                                                             *xty_daal_table,
-                                                                             *betas_daal_table,
-                                                                             compute_intercept,
-                                                                             *ridge_matrix);
-            });
-
-            interop::status_to_exception(status);
-        }
+        interop::status_to_exception(
+            interop::call_daal_kernel_finalize_compute<Float, online_rr_kernel_t>(ctx,
+                                                                                  *xtx_daal_table,
+                                                                                  *xty_daal_table,
+                                                                                  *xtx_daal_table,
+                                                                                  *xty_daal_table,
+                                                                                  *betas_daal_table,
+                                                                                  compute_intercept,
+                                                                                  *ridge_matrix));
     }
     else {
         const daal_lr_hyperparameters_t& hp = convert_parameters<Float>(params);
-
-        {
-            const auto status = dal::backend::dispatch_by_cpu(ctx, [&](auto cpu) {
-                constexpr auto cpu_type = interop::to_daal_cpu_type<decltype(cpu)>::value;
-                return online_lr_kernel_t<Float, cpu_type>().finalizeCompute(*xtx_daal_table,
-                                                                             *xty_daal_table,
-                                                                             *xtx_daal_table,
-                                                                             *xty_daal_table,
-                                                                             *betas_daal_table,
-                                                                             compute_intercept,
-                                                                             &hp);
-            });
-
-            interop::status_to_exception(status);
-        }
+        interop::status_to_exception(
+            interop::call_daal_kernel_finalize_compute<Float, online_lr_kernel_t>(ctx,
+                                                                                  *xtx_daal_table,
+                                                                                  *xty_daal_table,
+                                                                                  *xtx_daal_table,
+                                                                                  *xty_daal_table,
+                                                                                  *betas_daal_table,
+                                                                                  compute_intercept,
+                                                                                  &hp));
     }
 
     auto betas_table = homogen_table::wrap(betas_arr, response_count, feature_count + 1);
@@ -167,11 +155,11 @@ static train_result<Task> call_daal_kernel(const context_cpu& ctx,
 }
 
 template <typename Float, typename Task>
-static train_result<Task> train(const context_cpu& ctx,
-                                const detail::descriptor_base<Task>& desc,
-                                const detail::train_parameters<Task>& params,
-                                const partial_train_result<Task>& input) {
-    return call_daal_kernel<Float>(ctx, desc, params, input);
+static train_result<Task> finalize_train(const context_cpu& ctx,
+                                         const detail::descriptor_base<Task>& desc,
+                                         const detail::train_parameters<Task>& params,
+                                         const partial_train_result<Task>& input) {
+    return call_daal_kernel_finalize<Float>(ctx, desc, params, input);
 }
 
 template <typename Float, typename Task>
@@ -180,7 +168,7 @@ struct finalize_train_kernel_cpu<Float, method::norm_eq, Task> {
                                   const detail::descriptor_base<Task>& desc,
                                   const detail::train_parameters<Task>& params,
                                   const partial_train_result<Task>& input) const {
-        return train<Float, Task>(ctx, desc, params, input);
+        return finalize_train<Float, Task>(ctx, desc, params, input);
     }
 };
 
diff --git a/cpp/oneapi/dal/algo/pca/backend/cpu/finalize_train_kernel_cov.cpp b/cpp/oneapi/dal/algo/pca/backend/cpu/finalize_train_kernel_cov.cpp
index 176ae2e39c6..df19493418f 100644
--- a/cpp/oneapi/dal/algo/pca/backend/cpu/finalize_train_kernel_cov.cpp
+++ b/cpp/oneapi/dal/algo/pca/backend/cpu/finalize_train_kernel_cov.cpp
@@ -125,63 +125,44 @@ static train_result<Task> call_daal_kernel_finalize_train(const context_cpu& ctx
             &daal_parameter,
             &daal_hyperparameter));
 
-    {
-        const auto status = dal::backend::dispatch_by_cpu(ctx, [&](auto cpu) {
-            constexpr auto cpu_type = interop::to_daal_cpu_type<decltype(cpu)>::value;
-            return daal_pca_cor_kernel_t<Float, cpu_type>().computeCorrelationEigenvalues(
-                *daal_cor_matrix,
-                *daal_eigenvectors,
-                *daal_eigenvalues);
-        });
-
-        interop::status_to_exception(status);
+    interop::status_to_exception(dal::backend::dispatch_by_cpu(ctx, [&](auto cpu) {
+        return daal_pca_cor_kernel_t<
+                   Float,
+                   dal::backend::interop::to_daal_cpu_type<decltype(cpu)>::value>()
+            .computeCorrelationEigenvalues(*daal_cor_matrix, *daal_eigenvectors, *daal_eigenvalues);
+    }));
+
+    if (desc.get_deterministic()) {
+        interop::status_to_exception(dal::backend::dispatch_by_cpu(ctx, [&](auto cpu) {
+            return daal_pca_cor_kernel_t<
+                       Float,
+                       dal::backend::interop::to_daal_cpu_type<decltype(cpu)>::value>()
+                .signFlipEigenvectors(*daal_eigenvectors);
+        }));
     }
 
-    {
-        if (desc.get_deterministic()) {
-            const auto status = dal::backend::dispatch_by_cpu(ctx, [&](auto cpu) {
-                constexpr auto cpu_type = interop::to_daal_cpu_type<decltype(cpu)>::value;
-                return daal_pca_cor_kernel_t<Float, cpu_type>().signFlipEigenvectors(
-                    *daal_eigenvectors);
-            });
-
-            interop::status_to_exception(status);
-        }
-    }
-
-    {
-        const auto status = dal::backend::dispatch_by_cpu(ctx, [&](auto cpu) {
-            constexpr auto cpu_type = interop::to_daal_cpu_type<decltype(cpu)>::value;
-            return daal_pca_cor_kernel_t<Float, cpu_type>().computeSingularValues(
-                *daal_eigenvalues,
-                *daal_singular_values,
-                row_count);
-        });
-
-        interop::status_to_exception(status);
-    }
-
-    {
-        const auto status = dal::backend::dispatch_by_cpu(ctx, [&](auto cpu) {
-            constexpr auto cpu_type = interop::to_daal_cpu_type<decltype(cpu)>::value;
-            return daal_pca_cor_kernel_t<Float, cpu_type>().computeVariancesFromCov(
-                *daal_cor_matrix,
-                *daal_variances);
-        });
-        interop::status_to_exception(status);
-    }
-
-    {
-        const auto status = dal::backend::dispatch_by_cpu(ctx, [&](auto cpu) {
-            constexpr auto cpu_type = interop::to_daal_cpu_type<decltype(cpu)>::value;
-            return daal_pca_cor_kernel_t<Float, cpu_type>().computeExplainedVariancesRatio(
-                *daal_eigenvalues,
-                *daal_variances,
-                *daal_explained_variances_ratio);
-        });
-
-        interop::status_to_exception(status);
-    }
+    interop::status_to_exception(dal::backend::dispatch_by_cpu(ctx, [&](auto cpu) {
+        return daal_pca_cor_kernel_t<
+                   Float,
+                   dal::backend::interop::to_daal_cpu_type<decltype(cpu)>::value>()
+            .computeSingularValues(*daal_eigenvalues, *daal_singular_values, row_count);
+    }));
+
+    interop::status_to_exception(dal::backend::dispatch_by_cpu(ctx, [&](auto cpu) {
+        return daal_pca_cor_kernel_t<
+                   Float,
+                   dal::backend::interop::to_daal_cpu_type<decltype(cpu)>::value>()
+            .computeVariancesFromCov(*daal_cor_matrix, *daal_variances);
+    }));
+
+    interop::status_to_exception(dal::backend::dispatch_by_cpu(ctx, [&](auto cpu) {
+        return daal_pca_cor_kernel_t<
+                   Float,
+                   dal::backend::interop::to_daal_cpu_type<decltype(cpu)>::value>()
+            .computeExplainedVariancesRatio(*daal_eigenvalues,
+                                            *daal_variances,
+                                            *daal_explained_variances_ratio);
+    }));
 
     if (desc.get_result_options().test(result_options::eigenvectors)) {
         result.set_eigenvectors(homogen_table::wrap(arr_eigvec, component_count, column_count));
diff --git a/cpp/oneapi/dal/algo/pca/backend/cpu/finalize_train_kernel_svd.cpp b/cpp/oneapi/dal/algo/pca/backend/cpu/finalize_train_kernel_svd.cpp
index 127d19ed4c4..3ff7d9b40e3 100644
--- a/cpp/oneapi/dal/algo/pca/backend/cpu/finalize_train_kernel_svd.cpp
+++ b/cpp/oneapi/dal/algo/pca/backend/cpu/finalize_train_kernel_svd.cpp
@@ -97,16 +97,14 @@ static train_result<Task> call_daal_kernel_finalize_train(const context_cpu& ctx
         result.set_singular_values(homogen_table::wrap(reshaped_eigval, 1, component_count));
 
         if (desc.get_normalization_mode() == normalization::mean_center) {
-            const auto status = dal::backend::dispatch_by_cpu(ctx, [&](auto cpu) {
-                constexpr auto cpu_type = interop::to_daal_cpu_type<decltype(cpu)>::value;
-                return daal_svd_kernel_t<Float, cpu_type>().computeEigenValues(
-                    *daal_singular_values,
-                    *daal_eigenvalues,
-                    rows_count_global);
-            });
-
-            interop::status_to_exception(status);
-            result.set_eigenvalues(homogen_table::wrap(reshaped_eigval, 1, component_count));
+            interop::status_to_exception(dal::backend::dispatch_by_cpu(ctx, [&](auto cpu) {
+                return daal_svd_kernel_t<
+                           Float,
+                           dal::backend::interop::to_daal_cpu_type<decltype(cpu)>::value>()
+                    .computeEigenValues(*daal_singular_values,
+                                        *daal_eigenvalues,
+                                        rows_count_global);
+            }));
         }
         else {
             result.set_eigenvalues(homogen_table::wrap(reshaped_eigval, 1, component_count));

From 7958b941bdbb220b6d1bbb483ae0833020628b17 Mon Sep 17 00:00:00 2001
From: Ian Faust <icfaust@gmail.com>
Date: Tue, 4 Jun 2024 10:51:21 +0200
Subject: [PATCH 29/65] [bug, CI] fix error handling in windows builds (#2802)

* Update build.bat

* Update test.bat

* Update build.bat

* Update build.bat

* Update build.bat

* Update build.bat

* Update build.bat

* Update build.bat
---
 .ci/scripts/build.bat | 17 +++++++++--------
 .ci/scripts/test.bat  |  9 +++++----
 2 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/.ci/scripts/build.bat b/.ci/scripts/build.bat
index d218464707c..76c78c620cd 100644
--- a/.ci/scripts/build.bat
+++ b/.ci/scripts/build.bat
@@ -19,8 +19,8 @@ rem %1 - Make target
 rem %2 - Compiler
 rem %3 - Instruction set
 
-for /f "tokens=*" %%i in ('python -c "from multiprocessing import cpu_count; print(cpu_count())"') do set CPUCOUNT=%%i
-echo CPUCOUNT=%CPUCOUNT%
+set errorcode=0
+echo CPUCOUNT=%NUMBER_OF_PROCESSORS%
 
 echo PATH=C:\msys64\usr\bin;%PATH%
 set PATH=C:\msys64\usr\bin;%PATH%
@@ -29,15 +29,16 @@ echo pacman -S --noconfirm msys/make msys/dos2unix
 pacman -S --noconfirm msys/make msys/dos2unix
 
 echo call .ci\env\tbb.bat
-call .ci\env\tbb.bat
+if "%TBBROOT%"=="" if not exist .\__deps\tbb\win\tbb call .ci\env\tbb.bat || set errorcode=1
 
 echo call .\dev\download_micromkl.bat
-call .\dev\download_micromkl.bat
+if "%MKLGPUFPKROOT%"=="" if not exist .\__deps\mklgpufpk\win call .\dev\download_micromkl.bat || set errorcode=1
 
 echo call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall" x64
-call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall" x64
+if "%VISUALSTUDIOVERSION%"=="" call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall" x64 || set errorcode=1
 
-echo make %1 -j%CPUCOUNT% COMPILER=%2 PLAT=win32e REQCPU=%3
-make %1 -j%CPUCOUNT% COMPILER=%2 PLAT=win32e REQCPU=%3
+echo make %1 -j%NUMBER_OF_PROCESSORS% COMPILER=%2 PLAT=win32e REQCPU=%3
+make %1 -j%NUMBER_OF_PROCESSORS% COMPILER=%2 PLAT=win32e REQCPU=%3 || set errorcode=1
 
-cmake -DINSTALL_DIR=__release_win_vc\daal\latest\lib\cmake\oneDAL -DARCH_DIR=intel64 -P cmake\scripts\generate_config.cmake
+cmake -DINSTALL_DIR=__release_win_vc\daal\latest\lib\cmake\oneDAL -DARCH_DIR=intel64 -P cmake\scripts\generate_config.cmake || set errorcode=1
+EXIT /B %errorcode%
diff --git a/.ci/scripts/test.bat b/.ci/scripts/test.bat
index 74909f3652b..aaced0689be 100644
--- a/.ci/scripts/test.bat
+++ b/.ci/scripts/test.bat
@@ -35,10 +35,10 @@ echo PATH=C:\msys64\usr\bin;%PATH%
 set PATH=C:\msys64\usr\bin;%PATH%
 
 echo call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall" x64
-call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall" x64
+call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall" x64 || set errorcode=1
 
 echo call __release_win_vc\daal\latest\env\vars.bat
-call __release_win_vc\daal\latest\env\vars.bat
+call __release_win_vc\daal\latest\env\vars.bat || set errorcode=1
 
 echo set LIB=%~dp0..\..\__release_win_vc\tbb\latest\lib\intel64\vc_mt;%LIB%
 set LIB=%~dp0..\..\__release_win_vc\tbb\latest\lib\intel64\vc_mt;%LIB%
@@ -69,9 +69,9 @@ if "%build_system%"=="cmake" (
 
     set results_dir=_cmake_results\intel_intel64_%cmake_link_mode_short%\Release
     echo cmake -B Build -S . -DONEDAL_LINK=%cmake_link_mode% -DTBB_DIR=%TBB_DIR%
-    cmake -B Build -S . -DONEDAL_LINK=%cmake_link_mode% -DTBB_DIR=%TBB_DIR%
+    cmake -B Build -S . -DONEDAL_LINK=%cmake_link_mode% -DTBB_DIR=%TBB_DIR% || set errorcode=1
     set solution_name=%examples:\=_%
-    msbuild.exe "Build\!solution_name!_examples.sln" /p:Configuration=Release
+    msbuild.exe "Build\!solution_name!_examples.sln" /p:Configuration=Release || set errorcode=1
 
     for /f "delims=." %%F in ('dir /B !results_dir!\*.exe 2^> nul') do (
         set example=%%F
@@ -100,3 +100,4 @@ if "%build_system%"=="cmake" (
     if "%examples%"=="daal\cpp" nmake %linking% compiler=%compiler%
     if "%examples%"=="oneapi\cpp" nmake %linking% compiler=%compiler%
 )
+EXIT /B %errorcode%

From ec0bcd82d373ff14c684acbf1ca3213281bc5796 Mon Sep 17 00:00:00 2001
From: Emm Walsh <148352712+emmwalsh@users.noreply.github.com>
Date: Wed, 5 Jun 2024 13:35:55 +0100
Subject: [PATCH 30/65] Update security md (#2798)

* Update data-analytics-pipeline.rst

* Update build-application.rst

* Update SECURITY.md

* Revert "Update build-application.rst"

This reverts commit 08d7ce450f725f79e55abe06ad97b260e07868bf.

* Revert "Update data-analytics-pipeline.rst"

This reverts commit 78e7e2e2923c1489aaf86c4bf6aeb36519a702bd.

* Update SECURITY.md

Update SECURITY.md

removed asterix
---
 SECURITY.md | 89 ++++++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 84 insertions(+), 5 deletions(-)

diff --git a/SECURITY.md b/SECURITY.md
index eb482d90983..49a0208db6e 100755
--- a/SECURITY.md
+++ b/SECURITY.md
@@ -1,12 +1,91 @@
+<!--
+******************************************************************************
+* Copyright 2023 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/-->
+
 # Security Policy
 
+As an open-source project, we understand the importance of and responsibility
+for security. This Security Policy outlines our guidelines and procedures to
+ensure the highest level of security and trust for Intel(R) oneAPI Data Analytics Library.
+
+## Supported Versions
+
+Security vulnerabilities are fixed in the [latest version][1]
+and delivered as a patch release. We don't guarantee security fixes to be
+back-ported to older Intel(R) oneAPI Data Analytics Library versions.
+
 ## Report a Vulnerability
 
-Please report security issues or vulnerabilities to the [Intel® Security Center].
+We are very grateful to the security researchers and users that report back
+security vulnerabilities. We investigate every report thoroughly.
+We strongly encourage you to report security vulnerabilities to us privately,
+before disclosing them on public forums or opening a public GitHub* issue. 
+
+Report a vulnerability to us in one of two ways:
+
+* Open a draft **[GitHub* Security Advisory][2]**
+* Send an e-mail to: **security@uxlfoundation.org**.
+
+Along with the report, provide the following info:
+
+  * A descriptive title.
+  * Your name and affiliation (if any).
+  * A description of the technical details of the vulnerabilities.
+  * A minimal example of the vulnerability so we can reproduce your findings.
+  * An explanation of who can exploit this vulnerability, and what they gain
+  doing so. 
+  * Whether this vulnerability is public or known to third parties. If it is,
+  provide details.
+
+### When Should I Report a Vulnerability?
+
+* You think you discovered a potential security vulnerability in Intel(R) oneAPI Data Analytics Library.
+* You are unsure how the potential vulnerability affects Intel(R) oneAPI Data Analytics Library.
+* You think you discovered a vulnerability in another project or 3rd party
+component on which Intel(R) oneAPI Data Analytics Library depends. If the issue is not fixed in the 3rd party
+component, try to report directly there first.
+
+### When Should I NOT Report a Vulnerability?
+
+* You got an automated scan hit and are unable to provide details.
+* You need help using Intel(R) oneAPI Data Analytics Library for security.
+* You need help applying security-related updates.
+* Your issue is not security-related.
+
+## Security Reports Review Process
+
+We aim to respond quickly to your inquiry and coordinate a fix and
+disclosure with you. All confirmed security vulnerabilities will be addressed
+according to severity level and impact on Intel(R) oneAPI Data Analytics Library. Normally, security issues are fixed in the next planned release.
+
+## Disclosure Policy
+
+We will publish security advisories using the 
+[**GitHub Security Advisories feature**][3]
+to keep our community well-informed, and will credit you for your findings
+unless you prefer to stay anonymous. We request that you refrain from
+exploiting the vulnerability or making it public before the official disclosure.
+
+We will disclose the vulnerabilities and bugs as soon as possible once
+mitigation is implemented and available. 
 
-For more information on how Intel® works to resolve security issues, see
-[Vulnerability Handling Guidelines].
+## Feedback on This Policy
 
-[Intel® Security Center]:https://www.intel.com/security
+If you have any suggestions on how this Policy could be improved, submit
+an issue or a pull request to this repository. **Do not** report
+potential vulnerabilities or security flaws via a pull request.
 
-[Vulnerability Handling Guidelines]:https://www.intel.com/content/www/us/en/security-center/vulnerability-handling-guidelines.html
+[1]: https://github.com/oneapi-src/oneDAL/releases
\ No newline at end of file

From 801c154e0be9672116f0016c40272826f49c4008 Mon Sep 17 00:00:00 2001
From: Ian Faust <icfaust@gmail.com>
Date: Wed, 5 Jun 2024 16:24:02 +0200
Subject: [PATCH 31/65] [bug] fix dpc compilation for OS=win and COMPILER=vc
 (#2810)

* Update deps.mk

* Create build_oneDAL.yml

* Update build_oneDAL.yml

* Update deps.mk

* remove CI job

* Update deps.mk
---
 dev/make/deps.mk | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/dev/make/deps.mk b/dev/make/deps.mk
index 07c5d37971c..eb50d8006b7 100644
--- a/dev/make/deps.mk
+++ b/dev/make/deps.mk
@@ -79,9 +79,9 @@ $1 = $$(if $$(or $$(.sources-changed),$$(and $$(.mkfiles-changed),$$(call .trigg
 dep-gen-enhanced-common   = $(call $(SELF),$1 $(.copt-gen-deps)) && $(.keep-raw-deps) sed -n $(sed.-i) $(sed.fix-deps) $(sed.rm-abs-paths) -e '/./{ p; $(sed.mk-phony-targets)}' $(.dep-file-tmp)
 dep-gen-enhanced.icc   = $(dep-gen-enhanced-common)
 dep-gen-enhanced.icl   = $(dep-gen-enhanced-common)
-dep-gen-enhanced.icx   = $(dep-gen-enhanced-common)
+dep-gen-enhanced.icx   = $(if $(COMPILER_is_vc),,$(dep-gen-enhanced-common))
 dep-gen-enhanced.g++   = $(dep-gen-enhanced-common)
-dep-gen-enhanced.dpcpp = $(if $(OS_is_win),,$(dep-gen-enhanced-common))
+dep-gen-enhanced.icpx   = $(dep-gen-enhanced-common)
 cmd-enhanced-with-dep-gen = $(or $(dep-gen-enhanced.$(call get-command-name,$($(SELF)))),$($(SELF)))
 
 $(call .inject.dep.gen, C.COMPILE,     $$(cmd-enhanced-with-dep-gen))

From 799043ed8262ce1f4ee3bc063e756bde27c37e1b Mon Sep 17 00:00:00 2001
From: Dhanus M Lal <73832063+DhanusML@users.noreply.github.com>
Date: Thu, 6 Jun 2024 13:12:00 +0530
Subject: [PATCH 32/65] Added `csrmultd` and `csrmv` reference implementation
 (#2807)

* Added csrmultd and csrmv ref implementation

The reference implementation of these functions were missing.
These are required by algorithms that compute covariance
of data in CSR format.

* csrmultd
xcsrmultd calls either csrmultd or csrmultd_transpose depending
on the first argument.

**Description**: $C = op(A)B$, where $A$ and $B$ are CSR matrices
(3-array format, 1-based indexing) and $C$ is a dense matrix
stored in column major format. $op$ is either identity
tranformation or the transpose. Implementation is based on the
MKL specification (https://www.intel.com/content/www/us/en/docs/onemkl/developer-reference-c/2024-0/mkl-csrmultd.html)

* csrmv
xcsrmv also calls either csrmv or csrmv_transpose depending on the first argument.
Support for both 1-based and 0-based indexing has been added, as per
the MKL specification (https://www.intel.com/content/www/us/en/docs/onemkl/developer-reference-c/2024-0/mkl-csrmv.html).

**Description**: $y \leftarrow \beta y + \alpha op(A)x$,
where $x$ and $y$ are 1D arrays, $\alpha$ and $\beta$ are
scalars and $op$ is either identity or transpose.
$A$ is stored in CSR (4-array format).

This enables the following examples with the openblas backend:
1. pca_cor_csr_batch
2. pca_cor_csr_online
3. pca_cor_csr_distr
4. cov_csr_batch
5. cov_csr_online
6. cov_csr_distr
7. cor_csr_batch
8. cor_csr_online
9. cor_csr_distr

These have been removed from the exclude list.

Signed-off-by: Dhanus M Lal <Dhanus.MLal@fujitsu.com>

* clang format fix

Signed-off-by: Dhanus M Lal <Dhanus.MLal@fujitsu.com>

---------

Signed-off-by: Dhanus M Lal <Dhanus.MLal@fujitsu.com>
---
 cpp/daal/src/externals/service_spblas_ref.h | 119 +++++++++++++++++++-
 examples/daal/cpp/CMakeLists.txt            |   2 +-
 examples/daal/cpp/target_excludes.cmake     |   4 -
 3 files changed, 118 insertions(+), 7 deletions(-)

diff --git a/cpp/daal/src/externals/service_spblas_ref.h b/cpp/daal/src/externals/service_spblas_ref.h
index a42c888aee6..2106e689a0e 100644
--- a/cpp/daal/src/externals/service_spblas_ref.h
+++ b/cpp/daal/src/externals/service_spblas_ref.h
@@ -1,6 +1,7 @@
 /* file: service_spblas_ref.h */
 /*******************************************************************************
 * Copyright 2023 Intel Corporation
+* Copyright contributors to the oneDAL project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -24,6 +25,8 @@
 #ifndef __SERVICE_SPBLAS_REF_H__
 #define __SERVICE_SPBLAS_REF_H__
 
+#include "src/externals/service_memory.h" // required for memset
+
 namespace daal
 {
 namespace internal
@@ -38,13 +41,125 @@ struct RefSpBlas
     static void xcsrmultd(const char * transa, const DAAL_INT * m, const DAAL_INT * n, const DAAL_INT * k, fpType * a, DAAL_INT * ja, DAAL_INT * ia,
                           fpType * b, DAAL_INT * jb, DAAL_INT * ib, fpType * c, DAAL_INT * ldc)
     {
-        services::throwIfPossible(services::Status(services::ErrorMethodNotImplemented));
+        if (*transa == 'n' || *transa == 'N')
+        {
+            csrmultd(m, n, k, a, ja, ia, b, jb, ib, c, ldc);
+        }
+        else
+        {
+            csrmultd_transpose(m, n, k, a, ja, ia, b, jb, ib, c, ldc);
+        }
+    }
+
+    static void csrmultd(const DAAL_INT * m, const DAAL_INT * n, const DAAL_INT * k, fpType * a, DAAL_INT * ja, DAAL_INT * ia, fpType * b,
+                         DAAL_INT * jb, DAAL_INT * ib, fpType * c, DAAL_INT * ldc)
+    {
+        DAAL_INT indexing = 1; // 1-based indexing
+        DAAL_INT row_b, row_c, col_c, val_ptr_a, val_ptr_b;
+        fpType a_elt, b_elt;
+        DAAL_OVERFLOW_CHECK_BY_MULTIPLICATION_THROW_IF_POSSIBLE(DAAL_INT, *ldc, (*k) - 1);
+        for (DAAL_INT col_c = 0; col_c < *k; col_c++) //flush the matrix c
+        {
+            services::internal::service_memset<fpType, cpu>(c + col_c * (*ldc), fpType(0), *m);
+        }
+        for (row_c = 0; row_c < *m; row_c++) // row_a = row_c
+        {
+            for (val_ptr_a = ia[row_c] - indexing; val_ptr_a < ia[row_c + 1] - indexing; val_ptr_a++)
+            {
+                row_b = ja[val_ptr_a] - indexing;
+                a_elt = a[val_ptr_a];
+                for (val_ptr_b = ib[row_b] - indexing; val_ptr_b < ib[row_b + 1] - indexing; val_ptr_b++)
+                {
+                    col_c = jb[val_ptr_b] - indexing;
+                    b_elt = b[val_ptr_b];
+                    c[col_c * (*ldc) + row_c] += a_elt * b_elt;
+                }
+            }
+        }
+    }
+
+    static void csrmultd_transpose(const DAAL_INT * m, const DAAL_INT * n, const DAAL_INT * k, fpType * a, DAAL_INT * ja, DAAL_INT * ia, fpType * b,
+                                   DAAL_INT * jb, DAAL_INT * ib, fpType * c, DAAL_INT * ldc)
+    {
+        DAAL_INT indexing = 1;
+        DAAL_INT row_a, row_b, row_c, col_c, val_ptr_a, val_ptr_b;
+        fpType a_elt, b_elt;
+        DAAL_OVERFLOW_CHECK_BY_MULTIPLICATION_THROW_IF_POSSIBLE(DAAL_INT, *ldc, (*n) - 1);
+        for (DAAL_INT col_c = 0; col_c < *k; col_c++) //flush the matrix c
+        {
+            services::internal::service_memset<fpType, cpu>(c + col_c * (*ldc), fpType(0), *n);
+        }
+        for (row_a = 0; row_a < *m; row_a++)
+        {
+            row_b = row_a;
+            for (val_ptr_b = ib[row_b] - indexing; val_ptr_b < ib[row_b + 1] - indexing; val_ptr_b++)
+            {
+                b_elt = b[val_ptr_b];
+                col_c = jb[val_ptr_b] - indexing; //col_c = col_b
+                for (val_ptr_a = ia[row_a] - indexing; val_ptr_a < ia[row_a + 1] - indexing; val_ptr_a++)
+                {
+                    row_c = ja[val_ptr_a] - indexing; //row_c = col_a
+                    a_elt = a[val_ptr_a];
+                    c[col_c * (*ldc) + row_c] += a_elt * b_elt;
+                }
+            }
+        }
     }
 
     static void xcsrmv(const char * transa, const DAAL_INT * m, const DAAL_INT * k, const fpType * alpha, const char * matdescra, const fpType * val,
                        const DAAL_INT * indx, const DAAL_INT * pntrb, const DAAL_INT * pntre, const fpType * x, const fpType * beta, fpType * y)
     {
-        services::throwIfPossible(services::Status(services::ErrorMethodNotImplemented));
+        if (*transa == 'n' || *transa == 'N')
+        {
+            csrmv(m, k, alpha, matdescra, val, indx, pntrb, pntre, x, beta, y);
+        }
+        else
+        {
+            csrmv_transpose(m, k, alpha, matdescra, val, indx, pntrb, pntre, x, beta, y);
+        }
+    }
+
+    static void csrmv(const DAAL_INT * m, const DAAL_INT * k, const fpType * alpha, const char * matdescra, const fpType * val, const DAAL_INT * indx,
+                      const DAAL_INT * pntrb, const DAAL_INT * pntre, const fpType * x, const fpType * beta, fpType * y)
+    {
+        DAAL_INT indexing = 1;
+        if (matdescra[3] == 'C') indexing = 0; // if fourth entry is 'C' zero based
+        DAAL_INT curr_row_start, curr_row_end, i, k_ind;
+        for (DAAL_INT row_num = 0; row_num < *m; row_num++)
+        {
+            y[row_num] *= (*beta);
+            curr_row_start = pntrb[row_num] - indexing;
+            curr_row_end   = pntre[row_num] - indexing;
+            for (i = curr_row_start; i < curr_row_end; i++)
+            {
+                k_ind = indx[i] - indexing;
+                y[row_num] += (*alpha) * x[k_ind] * val[i];
+            }
+        }
+    }
+
+    static void csrmv_transpose(const DAAL_INT * m, const DAAL_INT * k, const fpType * alpha, const char * matdescra, const fpType * val,
+                                const DAAL_INT * indx, const DAAL_INT * pntrb, const DAAL_INT * pntre, const fpType * x, const fpType * beta,
+                                fpType * y)
+    {
+        DAAL_INT indexing = 1;
+        if (matdescra[3] == 'C') indexing = 0; // if fourth entry is 'C' zero based
+        for (DAAL_INT _i = 0; _i < *k; _i++)
+        {
+            y[_i] *= *beta;
+        }
+        fpType coeff;
+        DAAL_INT row_num, i, curr_row_start, curr_row_end;
+        for (row_num = 0; row_num < *m; row_num++)
+        {
+            coeff          = (*alpha) * x[row_num];
+            curr_row_start = pntrb[row_num] - indexing;
+            curr_row_end   = pntre[row_num] - indexing;
+            for (i = curr_row_start; i < curr_row_end; i++)
+            {
+                y[indx[i] - indexing] += coeff * val[i];
+            }
+        }
     }
 
     static void xcsrmm(const char * transa, const DAAL_INT * m, const DAAL_INT * n, const DAAL_INT * k, const fpType * alpha, const char * matdescra,
diff --git a/examples/daal/cpp/CMakeLists.txt b/examples/daal/cpp/CMakeLists.txt
index 2ad80cbe34a..4deb10fd3f5 100644
--- a/examples/daal/cpp/CMakeLists.txt
+++ b/examples/daal/cpp/CMakeLists.txt
@@ -1,5 +1,6 @@
 #===============================================================================
 # Copyright 2021 Intel Corporation
+# Copyright contributors to the oneDAL project
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -44,7 +45,6 @@ if(REF_BACKEND)
         ${EXCLUDE_LIST}
         "source/boosting/brownboost_dense_batch.cpp"
         "source/em/em_gmm_dense_batch.cpp"
-        "source/pca/pca_cor_csr*"
     )
 endif()
 
diff --git a/examples/daal/cpp/target_excludes.cmake b/examples/daal/cpp/target_excludes.cmake
index 01f1cc665bc..99d1cb9331b 100644
--- a/examples/daal/cpp/target_excludes.cmake
+++ b/examples/daal/cpp/target_excludes.cmake
@@ -42,10 +42,6 @@ elseif((CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64") AND
        (CMAKE_C_COMPILER MATCHES "gcc"))
     set(EXCLUDE_LIST
         ${EXCLUDE_LIST}
-        "cor_csr_distr"
-        "cor_csr_online"
-        "cov_csr_distr"
-        "cov_csr_online"
         "enable_thread_pinning"
     )
 elseif((CMAKE_SYSTEM_PROCESSOR STREQUAL "riscv64") AND

From bbe13a4bcaca8eabd3751b0ccf58ed9a044254f9 Mon Sep 17 00:00:00 2001
From: Mariia Vtiurina <mariia.vtiurina@intel.com>
Date: Thu, 6 Jun 2024 09:36:22 -0400
Subject: [PATCH 33/65] update version to 2024.6.0 (#2770)

---
 deploy/pkg-config/pkg-config.tpl           | 2 +-
 dev/bazel/config/config.bzl                | 2 +-
 docs/doxygen/doxygen_conf_cpp.txt          | 2 +-
 docs/doxygen/doxygen_conf_cpp_examples.txt | 2 +-
 docs/doxygen/doxygen_conf_cpp_web.txt      | 2 +-
 makefile.ver                               | 2 +-
 6 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/deploy/pkg-config/pkg-config.tpl b/deploy/pkg-config/pkg-config.tpl
index ad6dfefe07c..0145dc1cc19 100755
--- a/deploy/pkg-config/pkg-config.tpl
+++ b/deploy/pkg-config/pkg-config.tpl
@@ -22,7 +22,7 @@ includedir=${{prefix}}/include
 #info
 Name: oneDAL
 Description: Intel(R) oneAPI Data Analytics Library
-Version: 2024.5
+Version: 2024.6
 URL: https://www.intel.com/content/www/us/en/developer/tools/oneapi/onedal.html
 #Link line
 Libs: {libs}
diff --git a/dev/bazel/config/config.bzl b/dev/bazel/config/config.bzl
index e096130f837..b1bf70b2c10 100644
--- a/dev/bazel/config/config.bzl
+++ b/dev/bazel/config/config.bzl
@@ -210,7 +210,7 @@ def _declare_onedal_config_impl(repo_ctx):
         substitutions = {
             "%{auto_cpu}":         auto_cpu,
             "%{version_major}":    "2024",
-            "%{version_minor}":    "5",
+            "%{version_minor}":    "6",
             "%{version_update}":   "0",
             "%{version_build}":    utils.datestamp(repo_ctx),
             "%{version_buildrev}": "work",
diff --git a/docs/doxygen/doxygen_conf_cpp.txt b/docs/doxygen/doxygen_conf_cpp.txt
index e3851064cfa..1ac2861b4bc 100644
--- a/docs/doxygen/doxygen_conf_cpp.txt
+++ b/docs/doxygen/doxygen_conf_cpp.txt
@@ -38,7 +38,7 @@ PROJECT_NAME           = "C++ API Reference for Intel(R) oneAPI Data Analytics L
 # could be handy for archiving the generated documentation or if some version
 # control system is used.
 
-PROJECT_NUMBER         = "2024.5"
+PROJECT_NUMBER         = "2024.6"
 
 # Using the PROJECT_BRIEF tag one can provide an optional one line description
 # for a project that appears at the top of each page and should give viewer a
diff --git a/docs/doxygen/doxygen_conf_cpp_examples.txt b/docs/doxygen/doxygen_conf_cpp_examples.txt
index 124e9c67e91..52ac8ef8627 100644
--- a/docs/doxygen/doxygen_conf_cpp_examples.txt
+++ b/docs/doxygen/doxygen_conf_cpp_examples.txt
@@ -38,7 +38,7 @@ PROJECT_NAME           = "C++ API Reference for Intel(R) oneAPI Data Analytics L
 # could be handy for archiving the generated documentation or if some version
 # control system is used.
 
-PROJECT_NUMBER         = "2024.5"
+PROJECT_NUMBER         = "2024.6"
 
 # Using the PROJECT_BRIEF tag one can provide an optional one line description
 # for a project that appears at the top of each page and should give viewer a
diff --git a/docs/doxygen/doxygen_conf_cpp_web.txt b/docs/doxygen/doxygen_conf_cpp_web.txt
index 24f72f030aa..28e11e19666 100644
--- a/docs/doxygen/doxygen_conf_cpp_web.txt
+++ b/docs/doxygen/doxygen_conf_cpp_web.txt
@@ -38,7 +38,7 @@ PROJECT_NAME           = "C++ API Reference for Intel(R) oneAPI Data Analytics L
 # could be handy for archiving the generated documentation or if some version
 # control system is used.
 
-PROJECT_NUMBER         = "2024.5"
+PROJECT_NUMBER         = "2024.6"
 
 # Using the PROJECT_BRIEF tag one can provide an optional one line description
 # for a project that appears at the top of each page and should give viewer a
diff --git a/makefile.ver b/makefile.ver
index 38352e3d048..b00ff4ae913 100644
--- a/makefile.ver
+++ b/makefile.ver
@@ -15,7 +15,7 @@
 #===============================================================================
 
 MAJOR   =       2024
-MINOR   =       5
+MINOR   =       6
 UPDATE  =       0
 BUILD   =       $(shell date +'%Y%m%d')
 STATUS  =       P

From 24fa3d1f298d1cbd256140fedc69d49806b68e17 Mon Sep 17 00:00:00 2001
From: Emm Walsh <148352712+emmwalsh@users.noreply.github.com>
Date: Wed, 12 Jun 2024 12:59:02 +0100
Subject: [PATCH 34/65] Fix broken links (#2813)

* Update data-analytics-pipeline.rst

* Update build-application.rst

* fixed broken links
---
 .../daal/algorithms/association_rules/association-rules.rst | 2 +-
 docs/source/daal/algorithms/boosting/adaboost.rst           | 2 +-
 docs/source/daal/algorithms/boosting/brownboost.rst         | 2 +-
 docs/source/daal/algorithms/boosting/logitboost.rst         | 2 +-
 docs/source/daal/algorithms/cholesky/cholesky.rst           | 2 +-
 .../correlation-and-variance-covariance-matrices.rst        | 2 +-
 docs/source/daal/algorithms/dbscan/index.rst                | 2 +-
 .../decision_forest/decision-forest-classification.rst      | 6 +++---
 .../decision_forest/decision-forest-regression.rst          | 6 +++---
 .../decision_tree/decision-tree-classification.rst          | 4 ++--
 .../algorithms/decision_tree/decision-tree-regression.rst   | 4 ++--
 docs/source/daal/algorithms/distance/correlation.rst        | 2 +-
 docs/source/daal/algorithms/distance/cosine.rst             | 2 +-
 docs/source/daal/algorithms/distributions/bernoulli.rst     | 2 +-
 docs/source/daal/algorithms/distributions/normal.rst        | 2 +-
 docs/source/daal/algorithms/distributions/uniform.rst       | 2 +-
 docs/source/daal/algorithms/em/expectation-maximization.rst | 2 +-
 .../gradient-boosted-trees-classification.rst               | 4 ++--
 .../gradient-boosted-trees-regression.rst                   | 4 ++--
 .../implicit_als/implicit-alternating-least-squares.rst     | 2 +-
 .../k-nearest-neighbors-knn-classifier.rst                  | 4 ++--
 docs/source/daal/algorithms/kmeans/k-means-clustering.rst   | 2 +-
 .../linear_ridge_regression/linear-regression.rst           | 2 +-
 .../linear-ridge-regression-computation.rst                 | 4 ++--
 .../algorithms/logistic_regression/logistic-regression.rst  | 4 ++--
 .../source/daal/algorithms/moments/moments-of-low-order.rst | 2 +-
 .../daal/algorithms/naive_bayes/naive-bayes-classifier.rst  | 2 +-
 docs/source/daal/algorithms/normalization/min-max.rst       | 2 +-
 docs/source/daal/algorithms/normalization/z-score.rst       | 2 +-
 .../objective-functions/cross-entropy.rst                   | 2 +-
 .../solvers/adaptive-subgradient-method.rst                 | 2 +-
 .../daal/algorithms/optimization-solvers/solvers/lbfgs.rst  | 4 ++--
 .../stochastic-average-gradient-accelerated-method.rst      | 2 +-
 .../solvers/stochastic-gradient-descent-algorithm.rst       | 4 ++--
 .../algorithms/outlier_detection/multivariate-bacon.rst     | 2 +-
 .../daal/algorithms/outlier_detection/multivariate.rst      | 2 +-
 .../source/daal/algorithms/outlier_detection/univariate.rst | 2 +-
 .../daal/algorithms/pca/principal-component-analysis.rst    | 2 +-
 docs/source/daal/algorithms/pca/transform.rst               | 2 +-
 docs/source/daal/algorithms/qr/qr-pivoted.rst               | 2 +-
 docs/source/daal/algorithms/qr/qr-without-pivoting.rst      | 2 +-
 docs/source/daal/algorithms/quantiles/index.rst             | 2 +-
 docs/source/daal/algorithms/sorting/index.rst               | 2 +-
 docs/source/daal/algorithms/stump/classification.rst        | 2 +-
 docs/source/daal/algorithms/stump/regression.rst            | 2 +-
 .../daal/algorithms/svd/singular-value-decomposition.rst    | 2 +-
 .../algorithms/svm/support-vector-machine-classifier.rst    | 2 +-
 .../algorithms/svm_multi_class/multi-class-classifier.rst   | 2 +-
 48 files changed, 61 insertions(+), 61 deletions(-)

diff --git a/docs/source/daal/algorithms/association_rules/association-rules.rst b/docs/source/daal/algorithms/association_rules/association-rules.rst
index 3e1761aba00..26714d092fc 100644
--- a/docs/source/daal/algorithms/association_rules/association-rules.rst
+++ b/docs/source/daal/algorithms/association_rules/association-rules.rst
@@ -235,7 +235,7 @@ Examples
 
     Batch Processing:
 
-    - :daal4py_example:`association_rules_batch.py`
+    - :daal4py_example:`association_rules.py`
 
 Performance Considerations
 **************************
diff --git a/docs/source/daal/algorithms/boosting/adaboost.rst b/docs/source/daal/algorithms/boosting/adaboost.rst
index 9e2212060d0..0a20f221006 100644
--- a/docs/source/daal/algorithms/boosting/adaboost.rst
+++ b/docs/source/daal/algorithms/boosting/adaboost.rst
@@ -141,4 +141,4 @@ Examples
   
   .. tab:: Python*
 
-    - :daal4py_example:`adaboost_batch.py`
+    - :daal4py_example:`adaboost.py`
diff --git a/docs/source/daal/algorithms/boosting/brownboost.rst b/docs/source/daal/algorithms/boosting/brownboost.rst
index 27f127e3445..75d4b6da0cb 100644
--- a/docs/source/daal/algorithms/boosting/brownboost.rst
+++ b/docs/source/daal/algorithms/boosting/brownboost.rst
@@ -192,4 +192,4 @@ Examples
 
     Batch Processing:
 
-    - :daal4py_example:`brownboost_batch.py`
+    - :daal4py_example:`brownboost.py`
diff --git a/docs/source/daal/algorithms/boosting/logitboost.rst b/docs/source/daal/algorithms/boosting/logitboost.rst
index 537b7d4389d..06dec03a048 100644
--- a/docs/source/daal/algorithms/boosting/logitboost.rst
+++ b/docs/source/daal/algorithms/boosting/logitboost.rst
@@ -193,4 +193,4 @@ Examples
 
     Batch Processing:
 
-    - :daal4py_example:`logitboost_batch.py`
+    - :daal4py_example:`logitboost.py`
diff --git a/docs/source/daal/algorithms/cholesky/cholesky.rst b/docs/source/daal/algorithms/cholesky/cholesky.rst
index 3bef41b6f3e..2cff2d92bbc 100644
--- a/docs/source/daal/algorithms/cholesky/cholesky.rst
+++ b/docs/source/daal/algorithms/cholesky/cholesky.rst
@@ -115,7 +115,7 @@ Examples
 
     Batch Processing:
 
-    - :daal4py_example:`cholesky_batch.py`
+    - :daal4py_example:`cholesky.py`
 
 
 Performance Considerations
diff --git a/docs/source/daal/algorithms/covariance/correlation-and-variance-covariance-matrices.rst b/docs/source/daal/algorithms/covariance/correlation-and-variance-covariance-matrices.rst
index b4b4cc0b6af..759e3e9829e 100644
--- a/docs/source/daal/algorithms/covariance/correlation-and-variance-covariance-matrices.rst
+++ b/docs/source/daal/algorithms/covariance/correlation-and-variance-covariance-matrices.rst
@@ -88,7 +88,7 @@ Examples
 
     Batch Processing:
 
-    - :daal4py_example:`covariance_batch.py`
+    - :daal4py_example:`covariance.py`
 
     Online Processing:
 
diff --git a/docs/source/daal/algorithms/dbscan/index.rst b/docs/source/daal/algorithms/dbscan/index.rst
index 50f69b3ba84..82a3ff22dd5 100644
--- a/docs/source/daal/algorithms/dbscan/index.rst
+++ b/docs/source/daal/algorithms/dbscan/index.rst
@@ -92,7 +92,7 @@ Examples
 
    Batch Processing:
 
-   - :daal4py_example:`dbscan_batch.py`
+   - :daal4py_example:`dbscan.py`
 
    Distributed Processing:
 
diff --git a/docs/source/daal/algorithms/decision_forest/decision-forest-classification.rst b/docs/source/daal/algorithms/decision_forest/decision-forest-classification.rst
index 28e6a7634b4..42d5fc06177 100644
--- a/docs/source/daal/algorithms/decision_forest/decision-forest-classification.rst
+++ b/docs/source/daal/algorithms/decision_forest/decision-forest-classification.rst
@@ -269,7 +269,7 @@ Examples
 
     Batch Processing:
 
-    - :daal4py_example:`decision_forest_classification_default_dense_batch.py`
-    - :daal4py_example:`decision_forest_classification_hist_batch.py`
-    - :daal4py_example:`decision_forest_classification_traverse_batch.py`
+    - :daal4py_example:`decision_forest_classification_default_dense.py`
+    - :daal4py_example:`decision_forest_classification_hist.py`
+    - :daal4py_example:`decision_forest_classification_traverse.py`
 
diff --git a/docs/source/daal/algorithms/decision_forest/decision-forest-regression.rst b/docs/source/daal/algorithms/decision_forest/decision-forest-regression.rst
index 552829735ed..db3077f023b 100644
--- a/docs/source/daal/algorithms/decision_forest/decision-forest-regression.rst
+++ b/docs/source/daal/algorithms/decision_forest/decision-forest-regression.rst
@@ -174,6 +174,6 @@ Examples
 
     Batch Processing:
 
-    - :daal4py_example:`decision_forest_regression_default_dense_batch.py`
-    - :daal4py_example:`decision_forest_regression_hist_batch.py`
-    - :daal4py_example:`decision_forest_regression_traverse_batch.py`
+    - :daal4py_example:`decision_forest_regression_default_dense.py`
+    - :daal4py_example:`decision_forest_regression_hist.py`
+    - :daal4py_example:`decision_forest_regression_traverse.py`
diff --git a/docs/source/daal/algorithms/decision_tree/decision-tree-classification.rst b/docs/source/daal/algorithms/decision_tree/decision-tree-classification.rst
index 6d72c037a59..4087dd407df 100644
--- a/docs/source/daal/algorithms/decision_tree/decision-tree-classification.rst
+++ b/docs/source/daal/algorithms/decision_tree/decision-tree-classification.rst
@@ -217,5 +217,5 @@ Examples
 
     Batch Processing:
 
-    - :daal4py_example:`decision_tree_classification_batch.py`
-    - :daal4py_example:`decision_tree_classification_traverse_batch.py`
+    - :daal4py_example:`decision_tree_classification.py`
+    - :daal4py_example:`decision_tree_classification_traverse.py`
diff --git a/docs/source/daal/algorithms/decision_tree/decision-tree-regression.rst b/docs/source/daal/algorithms/decision_tree/decision-tree-regression.rst
index 03fed253842..bbf1d36d63d 100644
--- a/docs/source/daal/algorithms/decision_tree/decision-tree-regression.rst
+++ b/docs/source/daal/algorithms/decision_tree/decision-tree-regression.rst
@@ -167,5 +167,5 @@ Examples
 
     Batch Processing:
 
-    - :daal4py_example:`decision_tree_regression_batch.py`
-    - :daal4py_example:`decision_tree_regression_traverse_batch.py`
+    - :daal4py_example:`decision_tree_regression.py`
+    - :daal4py_example:`decision_tree_regression_traverse.py`
diff --git a/docs/source/daal/algorithms/distance/correlation.rst b/docs/source/daal/algorithms/distance/correlation.rst
index 7e232bfb060..469e02e7f8b 100644
--- a/docs/source/daal/algorithms/distance/correlation.rst
+++ b/docs/source/daal/algorithms/distance/correlation.rst
@@ -124,7 +124,7 @@ Examples
 
     Batch Processing:
 
-    - :daal4py_example:`correlation_distance_batch.py`
+    - :daal4py_example:`correlation_distance.py`
 
 Performance Considerations
 **************************
diff --git a/docs/source/daal/algorithms/distance/cosine.rst b/docs/source/daal/algorithms/distance/cosine.rst
index e40525b367c..d3ea497e95b 100644
--- a/docs/source/daal/algorithms/distance/cosine.rst
+++ b/docs/source/daal/algorithms/distance/cosine.rst
@@ -118,7 +118,7 @@ Examples
 
     Batch Processing:
 
-    - :daal4py_example:`cosine_distance_batch.py`
+    - :daal4py_example:`cosine_distance.py`
 
 Performance Considerations
 **************************
diff --git a/docs/source/daal/algorithms/distributions/bernoulli.rst b/docs/source/daal/algorithms/distributions/bernoulli.rst
index c7d719f60bd..b21332635ec 100644
--- a/docs/source/daal/algorithms/distributions/bernoulli.rst
+++ b/docs/source/daal/algorithms/distributions/bernoulli.rst
@@ -83,7 +83,7 @@ Examples
 
     Batch Processing:
 
-    - :daal4py_example:`distributions_bernoulli_batch.py`
+    - :daal4py_example:`distributions_bernoulli.py`
 
 Performance Considerations
 **************************
diff --git a/docs/source/daal/algorithms/distributions/normal.rst b/docs/source/daal/algorithms/distributions/normal.rst
index 30e66ee7902..e0da45b632d 100644
--- a/docs/source/daal/algorithms/distributions/normal.rst
+++ b/docs/source/daal/algorithms/distributions/normal.rst
@@ -73,4 +73,4 @@ Examples
 
     Batch Processing:
 
-    - :daal4py_example:`distributions_normal_batch.py`
+    - :daal4py_example:`distributions_normal.py`
diff --git a/docs/source/daal/algorithms/distributions/uniform.rst b/docs/source/daal/algorithms/distributions/uniform.rst
index 15c30a2dc50..c795ee56d68 100644
--- a/docs/source/daal/algorithms/distributions/uniform.rst
+++ b/docs/source/daal/algorithms/distributions/uniform.rst
@@ -83,4 +83,4 @@ Examples
 
     Batch Processing:
 
-    - :daal4py_example:`distributions_uniform_batch.py`
+    - :daal4py_example:`distributions_uniform.py`
diff --git a/docs/source/daal/algorithms/em/expectation-maximization.rst b/docs/source/daal/algorithms/em/expectation-maximization.rst
index dc27069b430..f20a5a207d2 100644
--- a/docs/source/daal/algorithms/em/expectation-maximization.rst
+++ b/docs/source/daal/algorithms/em/expectation-maximization.rst
@@ -456,7 +456,7 @@ Examples
 
     Batch Processing:
 
-    - :daal4py_example:`em_gmm_batch.py`
+    - :daal4py_example:`em_gmm.py`
 
 Performance Considerations
 ==========================
diff --git a/docs/source/daal/algorithms/gradient_boosted_trees/gradient-boosted-trees-classification.rst b/docs/source/daal/algorithms/gradient_boosted_trees/gradient-boosted-trees-classification.rst
index 49f67a4d5c2..356e6906511 100644
--- a/docs/source/daal/algorithms/gradient_boosted_trees/gradient-boosted-trees-classification.rst
+++ b/docs/source/daal/algorithms/gradient_boosted_trees/gradient-boosted-trees-classification.rst
@@ -196,5 +196,5 @@ Examples
 
     Batch Processing:
 
-    - :daal4py_example:`gradient_boosted_classification_batch.py`
-    - :daal4py_example:`gradient_boosted_classification_traverse_batch.py`
+    - :daal4py_example:`gradient_boosted_classification.py`
+    - :daal4py_example:`gradient_boosted_classification_traverse.py`
diff --git a/docs/source/daal/algorithms/gradient_boosted_trees/gradient-boosted-trees-regression.rst b/docs/source/daal/algorithms/gradient_boosted_trees/gradient-boosted-trees-regression.rst
index 863e2ae462d..d096cd14f01 100644
--- a/docs/source/daal/algorithms/gradient_boosted_trees/gradient-boosted-trees-regression.rst
+++ b/docs/source/daal/algorithms/gradient_boosted_trees/gradient-boosted-trees-regression.rst
@@ -172,5 +172,5 @@ Examples
 
     Batch Processing:
 
-    - :daal4py_example:`gradient_boosted_regression_batch.py`
-    - :daal4py_example:`gradient_boosted_regression_traverse_batch.py`
+    - :daal4py_example:`gradient_boosted_regression.py`
+    - :daal4py_example:`gradient_boosted_regression_traverse.py`
diff --git a/docs/source/daal/algorithms/implicit_als/implicit-alternating-least-squares.rst b/docs/source/daal/algorithms/implicit_als/implicit-alternating-least-squares.rst
index 682e580f556..7d4d109ed25 100644
--- a/docs/source/daal/algorithms/implicit_als/implicit-alternating-least-squares.rst
+++ b/docs/source/daal/algorithms/implicit_als/implicit-alternating-least-squares.rst
@@ -130,7 +130,7 @@ Examples
 
     Batch Processing:
 
-    - :daal4py_example:`implicit_als_batch.py`
+    - :daal4py_example:`implicit_als.py`
 
 
 Performance Considerations
diff --git a/docs/source/daal/algorithms/k_nearest_neighbors/k-nearest-neighbors-knn-classifier.rst b/docs/source/daal/algorithms/k_nearest_neighbors/k-nearest-neighbors-knn-classifier.rst
index 192b8b8a849..a871d0a6e3d 100644
--- a/docs/source/daal/algorithms/k_nearest_neighbors/k-nearest-neighbors-knn-classifier.rst
+++ b/docs/source/daal/algorithms/k_nearest_neighbors/k-nearest-neighbors-knn-classifier.rst
@@ -323,5 +323,5 @@ Examples
 
     Batch Processing:
 
-    - :daal4py_example:`kdtree_knn_classification_batch.py`
-    - :daal4py_example:`bf_knn_classification_batch.py`
+    - :daal4py_example:`kdtree_knn_classification.py`
+    - :daal4py_example:`bf_knn_classification.py`
diff --git a/docs/source/daal/algorithms/kmeans/k-means-clustering.rst b/docs/source/daal/algorithms/kmeans/k-means-clustering.rst
index 3ce239c9109..ba0df9b4f1e 100644
--- a/docs/source/daal/algorithms/kmeans/k-means-clustering.rst
+++ b/docs/source/daal/algorithms/kmeans/k-means-clustering.rst
@@ -289,7 +289,7 @@ Examples
 
     Batch Processing:
 
-    - :daal4py_example:`kmeans_batch.py`
+    - :daal4py_example:`kmeans.py`
 
     Distributed Processing
 
diff --git a/docs/source/daal/algorithms/linear_ridge_regression/linear-regression.rst b/docs/source/daal/algorithms/linear_ridge_regression/linear-regression.rst
index f34b149615f..90a93d65beb 100644
--- a/docs/source/daal/algorithms/linear_ridge_regression/linear-regression.rst
+++ b/docs/source/daal/algorithms/linear_ridge_regression/linear-regression.rst
@@ -108,4 +108,4 @@ Examples
 
   .. tab:: Python*
 
-    - :daal4py_example:`lin_reg_model_builder.py`
+    - :daal4py_example:`lin_reg_model.py`
diff --git a/docs/source/daal/algorithms/linear_ridge_regression/linear-ridge-regression-computation.rst b/docs/source/daal/algorithms/linear_ridge_regression/linear-ridge-regression-computation.rst
index db56a69da18..1a9351afc4b 100644
--- a/docs/source/daal/algorithms/linear_ridge_regression/linear-ridge-regression-computation.rst
+++ b/docs/source/daal/algorithms/linear_ridge_regression/linear-ridge-regression-computation.rst
@@ -455,8 +455,8 @@ Examples
 
     Batch Processing:
 
-    - :daal4py_example:`linear_regression_batch.py`
-    - :daal4py_example:`ridge_regression_batch.py`
+    - :daal4py_example:`linear_regression.py`
+    - :daal4py_example:`ridge_regression.py`
 
     Online Processing:
 
diff --git a/docs/source/daal/algorithms/logistic_regression/logistic-regression.rst b/docs/source/daal/algorithms/logistic_regression/logistic-regression.rst
index 3e0a5abb76f..04dc52c14e2 100644
--- a/docs/source/daal/algorithms/logistic_regression/logistic-regression.rst
+++ b/docs/source/daal/algorithms/logistic_regression/logistic-regression.rst
@@ -274,5 +274,5 @@ Examples
 
     Batch Processing:
 
-    - :daal4py_example:`log_reg_dense_batch.py`
-    - :daal4py_example:`log_reg_binary_dense_batch.py`
+    - :daal4py_example:`log_reg_dense.py`
+    - :daal4py_example:`log_reg_binary_dense.py`
diff --git a/docs/source/daal/algorithms/moments/moments-of-low-order.rst b/docs/source/daal/algorithms/moments/moments-of-low-order.rst
index fb341fdd13b..70ac92e0b20 100644
--- a/docs/source/daal/algorithms/moments/moments-of-low-order.rst
+++ b/docs/source/daal/algorithms/moments/moments-of-low-order.rst
@@ -108,7 +108,7 @@ Examples
 
     Batch Processing:
 
-    - :daal4py_example:`low_order_moms_dense_batch.py`
+    - :daal4py_example:`low_order_moms_dense.py`
 
     Online Processing:
 
diff --git a/docs/source/daal/algorithms/naive_bayes/naive-bayes-classifier.rst b/docs/source/daal/algorithms/naive_bayes/naive-bayes-classifier.rst
index 5740a82957f..3bc7a604307 100644
--- a/docs/source/daal/algorithms/naive_bayes/naive-bayes-classifier.rst
+++ b/docs/source/daal/algorithms/naive_bayes/naive-bayes-classifier.rst
@@ -104,7 +104,7 @@ Examples
 
       Batch Processing:
 
-      - :daal4py_example:`naive_bayes_batch.py`
+      - :daal4py_example:`naive_bayes.py`
 
       Online Processing:
 
diff --git a/docs/source/daal/algorithms/normalization/min-max.rst b/docs/source/daal/algorithms/normalization/min-max.rst
index 7d86c24b9f0..68058fb48ee 100644
--- a/docs/source/daal/algorithms/normalization/min-max.rst
+++ b/docs/source/daal/algorithms/normalization/min-max.rst
@@ -137,4 +137,4 @@ Examples
 
     Batch Processing:
 
-    - :daal4py_example:`normalization_minmax_batch.py`
+    - :daal4py_example:`normalization_minmax.py`
diff --git a/docs/source/daal/algorithms/normalization/z-score.rst b/docs/source/daal/algorithms/normalization/z-score.rst
index 61800faf5a7..cfa8324aebf 100644
--- a/docs/source/daal/algorithms/normalization/z-score.rst
+++ b/docs/source/daal/algorithms/normalization/z-score.rst
@@ -191,4 +191,4 @@ Examples
 
     Batch Processing:
 
-    - :daal4py_example:`normalization_zscore_batch.py`
+    - :daal4py_example:`normalization_zscore.py`
diff --git a/docs/source/daal/algorithms/optimization-solvers/objective-functions/cross-entropy.rst b/docs/source/daal/algorithms/optimization-solvers/objective-functions/cross-entropy.rst
index 58d37049dc6..06e398be40e 100644
--- a/docs/source/daal/algorithms/optimization-solvers/objective-functions/cross-entropy.rst
+++ b/docs/source/daal/algorithms/optimization-solvers/objective-functions/cross-entropy.rst
@@ -251,4 +251,4 @@ Examples
 
     .. tab:: Python*
 
-        - :daal4py_example:`lbfgs_cr_entr_loss_batch.py`
+        - :daal4py_example:`lbfgs_cr_entr_loss.py`
diff --git a/docs/source/daal/algorithms/optimization-solvers/solvers/adaptive-subgradient-method.rst b/docs/source/daal/algorithms/optimization-solvers/solvers/adaptive-subgradient-method.rst
index a78b19dd517..ae7f0946a9e 100644
--- a/docs/source/daal/algorithms/optimization-solvers/solvers/adaptive-subgradient-method.rst
+++ b/docs/source/daal/algorithms/optimization-solvers/solvers/adaptive-subgradient-method.rst
@@ -150,4 +150,4 @@ Examples
 
   .. tab:: Python*
 
-      - :daal4py_example:`adagrad_mse_batch.py`
+      - :daal4py_example:`adagrad_mse.py`
diff --git a/docs/source/daal/algorithms/optimization-solvers/solvers/lbfgs.rst b/docs/source/daal/algorithms/optimization-solvers/solvers/lbfgs.rst
index 9b7e64a4027..aad70393c8b 100644
--- a/docs/source/daal/algorithms/optimization-solvers/solvers/lbfgs.rst
+++ b/docs/source/daal/algorithms/optimization-solvers/solvers/lbfgs.rst
@@ -306,5 +306,5 @@ Examples
 
      Batch Processing:
 
-     - :daal4py_example:`lbfgs_cr_entr_loss_batch.py`
-     - :daal4py_example:`lbfgs_mse_batch.py`
+     - :daal4py_example:`lbfgs_cr_entr_loss.py`
+     - :daal4py_example:`lbfgs_mse.py`
diff --git a/docs/source/daal/algorithms/optimization-solvers/solvers/stochastic-average-gradient-accelerated-method.rst b/docs/source/daal/algorithms/optimization-solvers/solvers/stochastic-average-gradient-accelerated-method.rst
index d38f3e2ac7a..8cadad8915c 100644
--- a/docs/source/daal/algorithms/optimization-solvers/solvers/stochastic-average-gradient-accelerated-method.rst
+++ b/docs/source/daal/algorithms/optimization-solvers/solvers/stochastic-average-gradient-accelerated-method.rst
@@ -185,4 +185,4 @@ Examples
 
     Batch Processing:
 
-    - :daal4py_example:`saga_batch.py`
+    - :daal4py_example:`saga.py`
diff --git a/docs/source/daal/algorithms/optimization-solvers/solvers/stochastic-gradient-descent-algorithm.rst b/docs/source/daal/algorithms/optimization-solvers/solvers/stochastic-gradient-descent-algorithm.rst
index 57ca4cf518f..41fcb76b236 100644
--- a/docs/source/daal/algorithms/optimization-solvers/solvers/stochastic-gradient-descent-algorithm.rst
+++ b/docs/source/daal/algorithms/optimization-solvers/solvers/stochastic-gradient-descent-algorithm.rst
@@ -226,8 +226,8 @@ Examples
 
     Batch Processing:
 
-    - :daal4py_example:`sgd_logistic_loss_batch.py`
-    - :daal4py_example:`sgd_mse_batch.py`
+    - :daal4py_example:`sgd_logistic_loss.py`
+    - :daal4py_example:`sgd_mse.py`
 
 .. Python*:
 
diff --git a/docs/source/daal/algorithms/outlier_detection/multivariate-bacon.rst b/docs/source/daal/algorithms/outlier_detection/multivariate-bacon.rst
index 6a1412e3171..d611040849b 100644
--- a/docs/source/daal/algorithms/outlier_detection/multivariate-bacon.rst
+++ b/docs/source/daal/algorithms/outlier_detection/multivariate-bacon.rst
@@ -156,4 +156,4 @@ Examples
 
     Batch Processing:
 
-    - :daal4py_example:`bacon_outlier_batch.py`
+    - :daal4py_example:`bacon_outlier.py`
diff --git a/docs/source/daal/algorithms/outlier_detection/multivariate.rst b/docs/source/daal/algorithms/outlier_detection/multivariate.rst
index deef0b23466..73f090e69c7 100644
--- a/docs/source/daal/algorithms/outlier_detection/multivariate.rst
+++ b/docs/source/daal/algorithms/outlier_detection/multivariate.rst
@@ -151,7 +151,7 @@ Examples
 
     Batch Processing:
 
-    - :daal4py_example:`multivariate_outlier_batch.py`
+    - :daal4py_example:`multivariate_outlier.py`
 
 Performance Considerations
 **************************
diff --git a/docs/source/daal/algorithms/outlier_detection/univariate.rst b/docs/source/daal/algorithms/outlier_detection/univariate.rst
index df79ff7314e..b0335e24656 100644
--- a/docs/source/daal/algorithms/outlier_detection/univariate.rst
+++ b/docs/source/daal/algorithms/outlier_detection/univariate.rst
@@ -157,4 +157,4 @@ Examples
 
     Batch Processing:
 
-    - :daal4py_example:`univariate_outlier_batch.py`
+    - :daal4py_example:`univariate_outlier.py`
diff --git a/docs/source/daal/algorithms/pca/principal-component-analysis.rst b/docs/source/daal/algorithms/pca/principal-component-analysis.rst
index 00bd6e8d184..c03ddb97061 100644
--- a/docs/source/daal/algorithms/pca/principal-component-analysis.rst
+++ b/docs/source/daal/algorithms/pca/principal-component-analysis.rst
@@ -138,7 +138,7 @@ Examples
 
       Batch Processing:
 
-      - :daal4py_example:`pca_batch.py`
+      - :daal4py_example:`pca.py`
 
       Distributed Processing:
 
diff --git a/docs/source/daal/algorithms/pca/transform.rst b/docs/source/daal/algorithms/pca/transform.rst
index 4d141434c99..d680773ec19 100644
--- a/docs/source/daal/algorithms/pca/transform.rst
+++ b/docs/source/daal/algorithms/pca/transform.rst
@@ -137,4 +137,4 @@ Examples
 
     Batch Processing:
 
-    - :daal4py_example:`pca_transform_batch.py`
+    - :daal4py_example:`pca_transform.py`
diff --git a/docs/source/daal/algorithms/qr/qr-pivoted.rst b/docs/source/daal/algorithms/qr/qr-pivoted.rst
index d729968b52a..35aef56d166 100644
--- a/docs/source/daal/algorithms/qr/qr-pivoted.rst
+++ b/docs/source/daal/algorithms/qr/qr-pivoted.rst
@@ -148,4 +148,4 @@ Examples
 
     Batch Processing:
 
-    - :daal4py_example:`pivoted_qr_batch.py`
+    - :daal4py_example:`pivoted_qr.py`
diff --git a/docs/source/daal/algorithms/qr/qr-without-pivoting.rst b/docs/source/daal/algorithms/qr/qr-without-pivoting.rst
index f8f2a7bf55e..bec3b9405a3 100644
--- a/docs/source/daal/algorithms/qr/qr-without-pivoting.rst
+++ b/docs/source/daal/algorithms/qr/qr-without-pivoting.rst
@@ -66,7 +66,7 @@ Examples
 
     Batch Processing:
 
-    - :daal4py_example:`qr_batch.py`
+    - :daal4py_example:`qr.py`
 
     Online Processing:
 
diff --git a/docs/source/daal/algorithms/quantiles/index.rst b/docs/source/daal/algorithms/quantiles/index.rst
index c45636e3b7d..487d017f417 100644
--- a/docs/source/daal/algorithms/quantiles/index.rst
+++ b/docs/source/daal/algorithms/quantiles/index.rst
@@ -123,4 +123,4 @@ Examples
 
     Batch Processing:
 
-    - :daal4py_example:`quantiles_batch.py`
+    - :daal4py_example:`quantiles.py`
diff --git a/docs/source/daal/algorithms/sorting/index.rst b/docs/source/daal/algorithms/sorting/index.rst
index b6806d9339b..d7abb8d781d 100644
--- a/docs/source/daal/algorithms/sorting/index.rst
+++ b/docs/source/daal/algorithms/sorting/index.rst
@@ -111,4 +111,4 @@ Examples
 
     Batch Processing:
 
-    - :daal4py_example:`sorting_batch.py`
+    - :daal4py_example:`sorting.py`
diff --git a/docs/source/daal/algorithms/stump/classification.rst b/docs/source/daal/algorithms/stump/classification.rst
index b575bf90cf7..610a9a61e27 100644
--- a/docs/source/daal/algorithms/stump/classification.rst
+++ b/docs/source/daal/algorithms/stump/classification.rst
@@ -120,4 +120,4 @@ Examples
 
     Batch Processing:
 
-    - :daal4py_example:`stump_classification_batch.py`
+    - :daal4py_example:`stump_classification.py`
diff --git a/docs/source/daal/algorithms/stump/regression.rst b/docs/source/daal/algorithms/stump/regression.rst
index cb5e5099582..bb1b6ba691d 100644
--- a/docs/source/daal/algorithms/stump/regression.rst
+++ b/docs/source/daal/algorithms/stump/regression.rst
@@ -94,4 +94,4 @@ Examples
 
     Batch Processing:
 
-    - :daal4py_example:`stump_regression_batch.py`
+    - :daal4py_example:`stump_regression.py`
diff --git a/docs/source/daal/algorithms/svd/singular-value-decomposition.rst b/docs/source/daal/algorithms/svd/singular-value-decomposition.rst
index 80a77dc3051..ee9e24554c7 100644
--- a/docs/source/daal/algorithms/svd/singular-value-decomposition.rst
+++ b/docs/source/daal/algorithms/svd/singular-value-decomposition.rst
@@ -77,7 +77,7 @@ Examples
 
       Batch Processing:
 
-      - :daal4py_example:`svd_batch.py`
+      - :daal4py_example:`svd.py`
 
       Online Processing:
 
diff --git a/docs/source/daal/algorithms/svm/support-vector-machine-classifier.rst b/docs/source/daal/algorithms/svm/support-vector-machine-classifier.rst
index b5aaf81f25f..f04871e8fc7 100644
--- a/docs/source/daal/algorithms/svm/support-vector-machine-classifier.rst
+++ b/docs/source/daal/algorithms/svm/support-vector-machine-classifier.rst
@@ -264,7 +264,7 @@ Examples
 
     Batch Processing:
 
-    - :daal4py_example:`svm_batch.py`
+    - :daal4py_example:`svm.py`
 
 Performance Considerations
 **************************
diff --git a/docs/source/daal/algorithms/svm_multi_class/multi-class-classifier.rst b/docs/source/daal/algorithms/svm_multi_class/multi-class-classifier.rst
index 666f0fefd4f..6c5405fe720 100644
--- a/docs/source/daal/algorithms/svm_multi_class/multi-class-classifier.rst
+++ b/docs/source/daal/algorithms/svm_multi_class/multi-class-classifier.rst
@@ -255,4 +255,4 @@ Examples
 
     Batch Processing:
 
-    -  :daal4py_example:`svm_multiclass_batch.py`
+    -  :daal4py_example:`svm_multiclass.py`

From b0a5a0e252ecaac033a1bd005b8748959265a1d3 Mon Sep 17 00:00:00 2001
From: Aleksei Khomenko <aleksei.khomenko@intel.com>
Date: Mon, 17 Jun 2024 16:46:07 +0200
Subject: [PATCH 35/65] docs: update `Installation` section in `README.md`
 (#2788)

---
 README.md | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 8cd4e957622..ec2315ccded 100644
--- a/README.md
+++ b/README.md
@@ -38,9 +38,24 @@ Deprecation Notice: The Java interfaces are removed from the oneDAL library.
 
 ## Installation
 
-Check [System Requirements](https://oneapi-src.github.io/oneDAL/system-requirements.html) before installing oneDAL.
+Check the [System Requirements](https://oneapi-src.github.io/oneDAL/system-requirements.html) before installing to ensure compatibility with your system.
+
+There are several options available for installing oneDAL:
+
+- **Binary Distribution**: You can download pre-built binary packages from the following sources:
+    - Intel® oneAPI:
+        - Download as Part of the [Intel® oneAPI Base Toolkit](https://www.intel.com/content/www/us/en/developer/tools/oneapi/onedal.html#gs.8xrue2)
+        - Download as the Stand-Alone [Intel® oneAPI Data Analytics Library](https://www.intel.com/content/www/us/en/developer/tools/oneapi/onedal.html#gs.8xrue2)
+    - Anaconda:
+        | Channel | Version |
+        |:-------:|:-------:|
+        | intel | [![Anaconda-Server Intel Badge](https://anaconda.org/intel/dal-devel/badges/version.svg)](https://anaconda.org/intel/dal-devel) |
+        | conda-forge | [![Anaconda-Server Conda-forge Badge](https://anaconda.org/conda-forge/dal-devel/badges/version.svg)](https://anaconda.org/conda-forge/dal-devel) |
+
+    - [NuGet](https://www.nuget.org/packages/inteldal.devel.linux-x64)
+
+- **Source Distribution**: You can build the library from source. To do this, [download the specific version of oneDAL](https://github.com/oneapi-src/oneDAL/releases) from the official GitHub repository and follow the instructions in the [INSTALL.md](INSTALL.md).
 
-You can [download the specific version of oneDAL](https://github.com/oneapi-src/oneDAL/releases) or [install it from sources](INSTALL.md).
 
 ## Examples
 

From 580a776f99542a79e55963eaafc9f0d51274cbbe Mon Sep 17 00:00:00 2001
From: msa <111298646+md-shafiul-alam@users.noreply.github.com>
Date: Thu, 20 Jun 2024 10:56:20 -0400
Subject: [PATCH 36/65] fix memory leak in hashmap (#2811)

---
 cpp/oneapi/dal/detail/hash_map.hpp | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/cpp/oneapi/dal/detail/hash_map.hpp b/cpp/oneapi/dal/detail/hash_map.hpp
index 2e5d50e4922..ab47ba52028 100644
--- a/cpp/oneapi/dal/detail/hash_map.hpp
+++ b/cpp/oneapi/dal/detail/hash_map.hpp
@@ -71,6 +71,14 @@ class hash_map {
     }
 
     ~hash_map() {
+        for (std::int64_t i = 0; i < capacity_; i++) {
+            entry_ptr current = entries_[i];
+            while (current) {
+                entry_ptr next = current->get_next();
+                delete current;
+                current = next;
+            }
+        }
         delete[] entries_;
         entries_ = nullptr;
         capacity_ = 0;

From c0208f8fb4a4b1d499ba5121a8e81a8884cda410 Mon Sep 17 00:00:00 2001
From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com>
Date: Thu, 20 Jun 2024 09:32:00 -0700
Subject: [PATCH 37/65] chore(deps): update actions/checkout action to v4.1.7
 (#2787)

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
---
 .github/workflows/openssf-scorecard.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/openssf-scorecard.yml b/.github/workflows/openssf-scorecard.yml
index d640c74b50f..89f5d7b860e 100644
--- a/.github/workflows/openssf-scorecard.yml
+++ b/.github/workflows/openssf-scorecard.yml
@@ -26,7 +26,7 @@ jobs:
 
     steps:
       - name: "Checkout code"
-        uses: actions/checkout@44c2b7a8a4ea60a981eaca3cf939b5f4305c123b # v4.1.5
+        uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
         with:
           persist-credentials: false
 

From 7b3982225aa3a28817f391a430d9cc89776adaf0 Mon Sep 17 00:00:00 2001
From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com>
Date: Thu, 20 Jun 2024 09:32:41 -0700
Subject: [PATCH 38/65] chore(deps): update dependency bazel_skylib to v1.7.1
 (#2806)

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
---
 MODULE.bazel | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MODULE.bazel b/MODULE.bazel
index 2eb13463dbc..14f7c53ec9b 100644
--- a/MODULE.bazel
+++ b/MODULE.bazel
@@ -1,3 +1,3 @@
 module(name = "onedal")
 
-bazel_dep(name = "bazel_skylib", version = "1.6.1")
+bazel_dep(name = "bazel_skylib", version = "1.7.1")

From ca8c588efddb9e585dfeecb5ddb39135596b0d54 Mon Sep 17 00:00:00 2001
From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com>
Date: Thu, 20 Jun 2024 09:33:53 -0700
Subject: [PATCH 39/65] chore(deps): update actions/checkout digest to 692973e
 (#2792)

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
---
 .github/workflows/docker-validation-ci.yml      | 2 +-
 .github/workflows/docker-validation-nightly.yml | 2 +-
 .github/workflows/renovate-validation.yml       | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/docker-validation-ci.yml b/.github/workflows/docker-validation-ci.yml
index ab4455e51d0..380172f82a5 100644
--- a/.github/workflows/docker-validation-ci.yml
+++ b/.github/workflows/docker-validation-ci.yml
@@ -17,6 +17,6 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: Checkout
-        uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4
+        uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4
       - name: Build docker image
         run: docker build . --file dev/docker/onedal-dev.Dockerfile --tag onedal-dev:latest
diff --git a/.github/workflows/docker-validation-nightly.yml b/.github/workflows/docker-validation-nightly.yml
index 8794f9963b9..ea6ec499025 100644
--- a/.github/workflows/docker-validation-nightly.yml
+++ b/.github/workflows/docker-validation-nightly.yml
@@ -30,7 +30,7 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: Checkout
-        uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4
+        uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4
       - name: Build docker image
         run: docker build . --file dev/docker/onedal-dev.Dockerfile --tag onedal-dev:latest
       - name: Building oneDAL
diff --git a/.github/workflows/renovate-validation.yml b/.github/workflows/renovate-validation.yml
index 41b5666c90d..83825305388 100644
--- a/.github/workflows/renovate-validation.yml
+++ b/.github/workflows/renovate-validation.yml
@@ -23,7 +23,7 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: Checkout
-        uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4
+        uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4
       - name: Validate
         uses: suzuki-shunsuke/github-action-renovate-config-validator@v1.0.1
         with:

From 51017967a9ded8aaa7bc865ee7f5934a4a729feb Mon Sep 17 00:00:00 2001
From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com>
Date: Thu, 20 Jun 2024 11:49:31 -0700
Subject: [PATCH 40/65] chore(deps): update dependency urllib3 to v2.2.2
 [security] (#2821)

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
---
 docs/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/requirements.txt b/docs/requirements.txt
index d149096a0f8..cc4847b879c 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -36,5 +36,5 @@ sphinxcontrib-jsmath==1.0.1
 sphinxcontrib-qthelp==1.0.6
 sphinxcontrib-serializinghtml==1.1.9
 typing-extensions==4.8.0
-urllib3==2.1.0
+urllib3==2.2.2
 zipp==3.17.0

From c3d61d37ebcefe958fac70a54d79653c9127586a Mon Sep 17 00:00:00 2001
From: Nikolay Petrov <nikolay.a.petrov@intel.com>
Date: Thu, 20 Jun 2024 17:34:11 -0700
Subject: [PATCH 41/65] How to contact maintainers + some clarifications
 (#2822)

* adding slack link

* contacting maintainers

* Updating details on bazel tests

* Apply suggestions from code review

Co-authored-by: ethanglaser <42726565+ethanglaser@users.noreply.github.com>

* Apply suggestions from code review

Co-authored-by: ethanglaser <42726565+ethanglaser@users.noreply.github.com>

* Update CONTRIBUTING.md

* Update CONTRIBUTING.md

---------

Co-authored-by: ethanglaser <42726565+ethanglaser@users.noreply.github.com>
---
 CONTRIBUTING.md | 11 ++++++++++-
 README.md       |  6 +++++-
 2 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 9e3d04f3eed..9fc5122a5c7 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -23,6 +23,14 @@ We welcome community contributions to Intel(R) oneAPI Data Analytics Library. Yo
 
 Refer to our guidelines on [pull requests](#pull-requests) and [issues](#issues) before you proceed.
 
+## Contacting maintainers
+You may reach out to Intel project maintainers privately at onedal.maintainers@intel.com.
+[Codeowners](https://github.com/oneapi-src/oneDAL/blob/main/.github/CODEOWNERS) configuration defines specific maintainers for corresponding code sections, however it's currently limited to Intel members. With further migration to UXL we will be changing this, but here are non-Intel contacts: 
+
+For ARM specifics you may contact: [@rakshithgb-fujitsu](https://github.com/rakshithgb-fujitsu/)
+
+For RISC-V specifics you may contact: [@keeranroth](https://github.com/keeranroth/)
+
 ## Issues
 
 Use [GitHub issues](https://github.com/oneapi-src/oneDAL/issues) to:
@@ -35,8 +43,9 @@ Use [GitHub issues](https://github.com/oneapi-src/oneDAL/issues) to:
 
 To contribute your changes directly to the repository, do the following:
 - Make sure you can build the product and run all the examples with your patch.
+- Product uses bazel for validation and your changes should pass tests. Please add new tests as necessary. [Bazel Guide for oneDAL](https://github.com/oneapi-src/oneDAL/tree/main/dev/bazel)
 - Make sure your code is in line with our [coding style](#code-style) as `clang-format` is one of the checks in our public CI.
-- For a larger feature, provide a relevant example.
+- For a larger feature, provide a relevant example, and tests.
 - [Document](#documentation-guidelines) your code.
 - [Submit](https://github.com/oneapi-src/oneDAL/pulls) a pull request into the `main` branch.
 
diff --git a/README.md b/README.md
index ec2315ccded..ba5bb8ec858 100644
--- a/README.md
+++ b/README.md
@@ -132,7 +132,11 @@ To report a vulnerability, refer to [Intel vulnerability reporting policy](https
 
 ### Contribute <!-- omit in toc -->
 
-We welcome community contributions. Check our [contributing guidelines](CONTRIBUTING.md) to learn more.
+We welcome community contributions. Check our [contributing guidelines](CONTRIBUTING.md) to learn more. You can also contact the oneDAL team via [UXL Foundation Slack] using
+[#onedal] channel.
+
+[UXL Foundation Slack]: https://slack-invite.uxlfoundation.org/
+[#onedal]: https://uxlfoundation.slack.com/channels/onedal
 
 ## License <!-- omit in toc -->
 

From 89a56676a0d92ba3936f3f900dbb82565ac99650 Mon Sep 17 00:00:00 2001
From: Victoriya Fedotova <victoriya.s.fedotova@intel.com>
Date: Mon, 24 Jun 2024 10:25:22 +0200
Subject: [PATCH 42/65] Fix "loop not vectorized" warning when running with
 newer icpx compiler (#2818)

Co-authored-by: Anatoly Volkov <117643568+avolkov-intel@users.noreply.github.com>
---
 cpp/oneapi/dal/table/backend/csr_kernels.cpp | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/cpp/oneapi/dal/table/backend/csr_kernels.cpp b/cpp/oneapi/dal/table/backend/csr_kernels.cpp
index 8e5aef236b2..7bf510318bd 100644
--- a/cpp/oneapi/dal/table/backend/csr_kernels.cpp
+++ b/cpp/oneapi/dal/table/backend/csr_kernels.cpp
@@ -14,6 +14,7 @@
 * limitations under the License.
 *******************************************************************************/
 
+#include "oneapi/dal/backend/common.hpp"
 #include "oneapi/dal/table/backend/csr_kernels.hpp"
 #include "oneapi/dal/table/backend/convert.hpp"
 
@@ -411,6 +412,12 @@ bool is_sorted(sycl::queue& queue,
 
     sycl::buffer<std::int64_t, 1> count_buf(&count_descending_pairs, sycl::range<1>(1));
 
+    const auto count_m1 = count - 1LL;
+    const auto wg_size = dal::backend::device_max_wg_size(queue);
+    const size_t count_m1_unsigned = static_cast<size_t>(count_m1);
+
+    const size_t wg_count = (count_m1 + wg_size - 1) / wg_size;
+
     // count the number of pairs of the subsequent elements in the data array that are sorted
     // in desccending order using sycl::reduction
     queue
@@ -419,10 +426,11 @@ bool is_sorted(sycl::queue& queue,
             auto count_descending_reduction =
                 sycl::reduction(count_buf, cgh, sycl::ext::oneapi::plus<std::int64_t>());
 
-            cgh.parallel_for(sycl::range<1>{ dal::detail::integral_cast<std::size_t>(count - 1) },
+            cgh.parallel_for(sycl::nd_range<1>{ wg_count * wg_size, wg_size },
                              count_descending_reduction,
-                             [=](sycl::id<1> i, auto& count_descending) {
-                                 if (data[i] > data[i + 1])
+                             [=](sycl::nd_item<1> idx, auto& count_descending) {
+                                 const auto i = idx.get_global_id(0);
+                                 if (i < count_m1_unsigned && data[i + 1] < data[i])
                                      count_descending.combine(1);
                              });
         })

From d87fa3f8892ba6cf78e4132b659d8e1bf17f4f76 Mon Sep 17 00:00:00 2001
From: Aleksandr Solovev <aleksandr.solovev@intel.com>
Date: Wed, 26 Jun 2024 15:33:37 +0200
Subject: [PATCH 43/65] Adding TBB scheduler handle (#2829)

Co-authored-by: Victoriya Fedotova <viktoria.nn@gmail.com>
---
 cpp/daal/include/services/env_detect.h        |  4 +++
 .../src/externals/core_threading_win_dll.cpp  | 32 ++++++++++++++++---
 cpp/daal/src/services/env_detect.cpp          | 15 +++++++--
 .../src/threading/service_thread_pinner.cpp   |  3 ++
 cpp/daal/src/threading/threading.cpp          | 27 ++++++++++++++++
 cpp/daal/src/threading/threading.h            |  7 ++++
 6 files changed, 81 insertions(+), 7 deletions(-)

diff --git a/cpp/daal/include/services/env_detect.h b/cpp/daal/include/services/env_detect.h
index f561de5ae2c..d132c55794d 100644
--- a/cpp/daal/include/services/env_detect.h
+++ b/cpp/daal/include/services/env_detect.h
@@ -198,6 +198,10 @@ class DAAL_EXPORT Environment : public Base
     void initNumberOfThreads();
 
     env _env;
+    // Pointer to the oneapi::tbb::task_scheduler_handle class object, global for oneDAL.
+    // The oneapi::tbb::task_scheduler_handle and the oneapi::tbb::finalize function
+    // allow user to wait for completion of worker threads.
+    void * _schedulerHandle;
     void * _globalControl;
     SharedPtr<services::internal::sycl::ExecutionContextIface> _executionContext;
 };
diff --git a/cpp/daal/src/externals/core_threading_win_dll.cpp b/cpp/daal/src/externals/core_threading_win_dll.cpp
index bfd7ac01a32..37c4f7d0e2b 100644
--- a/cpp/daal/src/externals/core_threading_win_dll.cpp
+++ b/cpp/daal/src/externals/core_threading_win_dll.cpp
@@ -143,7 +143,9 @@ typedef void (*_daal_wait_task_group_t)(void * taskGroupPtr);
 
 typedef bool (*_daal_is_in_parallel_t)();
 typedef void (*_daal_tbb_task_scheduler_free_t)(void *& globalControl);
+typedef void (*_daal_tbb_task_scheduler_handle_free_t)(void *& schedulerHandle);
 typedef size_t (*_setNumberOfThreads_t)(const size_t, void **);
+typedef size_t (*_setSchedulerHandle_t)(void **);
 typedef void * (*_daal_threader_env_t)();
 
 typedef void (*_daal_parallel_sort_int32_t)(int *, int *);
@@ -205,10 +207,12 @@ static _daal_del_task_group_t _daal_del_task_group_ptr   = NULL;
 static _daal_run_task_group_t _daal_run_task_group_ptr   = NULL;
 static _daal_wait_task_group_t _daal_wait_task_group_ptr = NULL;
 
-static _daal_is_in_parallel_t _daal_is_in_parallel_ptr                   = NULL;
-static _daal_tbb_task_scheduler_free_t _daal_tbb_task_scheduler_free_ptr = NULL;
-static _setNumberOfThreads_t _setNumberOfThreads_ptr                     = NULL;
-static _daal_threader_env_t _daal_threader_env_ptr                       = NULL;
+static _daal_is_in_parallel_t _daal_is_in_parallel_ptr                                 = NULL;
+static _daal_tbb_task_scheduler_free_t _daal_tbb_task_scheduler_free_ptr               = NULL;
+static _daal_tbb_task_scheduler_handle_free_t _daal_tbb_task_scheduler_handle_free_ptr = NULL;
+static _setNumberOfThreads_t _setNumberOfThreads_ptr                                   = NULL;
+static _setSchedulerHandle_t _setSchedulerHandle_ptr                                   = NULL;
+static _daal_threader_env_t _daal_threader_env_ptr                                     = NULL;
 
 static _daal_parallel_sort_int32_t _daal_parallel_sort_int32_ptr                         = NULL;
 static _daal_parallel_sort_uint64_t _daal_parallel_sort_uint64_ptr                       = NULL;
@@ -657,6 +661,16 @@ DAAL_EXPORT void _daal_tbb_task_scheduler_free(void *& init)
     return _daal_tbb_task_scheduler_free_ptr(init);
 }
 
+DAAL_EXPORT void _daal_tbb_task_scheduler_handle_free(void *& init)
+{
+    load_daal_thr_dll();
+    if (_daal_tbb_task_scheduler_handle_free_ptr == NULL)
+    {
+        _daal_tbb_task_scheduler_handle_free_ptr = (_daal_tbb_task_scheduler_handle_free_t)load_daal_thr_func("_daal_tbb_task_scheduler_handle_free");
+    }
+    return _daal_tbb_task_scheduler_handle_free_ptr(init);
+}
+
 DAAL_EXPORT size_t _setNumberOfThreads(const size_t numThreads, void ** init)
 {
     load_daal_thr_dll();
@@ -667,6 +681,16 @@ DAAL_EXPORT size_t _setNumberOfThreads(const size_t numThreads, void ** init)
     return _setNumberOfThreads_ptr(numThreads, init);
 }
 
+DAAL_EXPORT size_t _setSchedulerHandle(void ** init)
+{
+    load_daal_thr_dll();
+    if (_setSchedulerHandle_ptr == NULL)
+    {
+        _setSchedulerHandle_ptr = (_setSchedulerHandle_t)load_daal_thr_func("_setSchedulerHandle");
+    }
+    return _setSchedulerHandle_ptr(init);
+}
+
 DAAL_EXPORT void * _daal_threader_env()
 {
     load_daal_thr_dll();
diff --git a/cpp/daal/src/services/env_detect.cpp b/cpp/daal/src/services/env_detect.cpp
index 6698ede0d3a..286416ed571 100644
--- a/cpp/daal/src/services/env_detect.cpp
+++ b/cpp/daal/src/services/env_detect.cpp
@@ -125,7 +125,7 @@ DAAL_EXPORT void daal::services::Environment::setDynamicLibraryThreadingTypeOnWi
     initNumberOfThreads();
 }
 
-DAAL_EXPORT daal::services::Environment::Environment() : _globalControl {}
+DAAL_EXPORT daal::services::Environment::Environment() : _schedulerHandle {}, _globalControl {}
 {
     _env.cpuid_init_flag = false;
     _env.cpuid           = -1;
@@ -137,7 +137,14 @@ DAAL_EXPORT daal::services::Environment::Environment(const Environment & e) : da
 DAAL_EXPORT void daal::services::Environment::initNumberOfThreads()
 {
     if (isInit) return;
-
+        // Initializes global oneapi::tbb::task_scheduler_handle object in oneDAL to prevent the unexpected
+        // destruction of the calling thread.
+        // When the oneapi::tbb::finalize function is called with an oneapi::tbb::task_scheduler_handle
+        // instance, it blocks the calling thread until the completion of all worker
+        // threads that were implicitly created by the library.
+#if defined(TARGET_X86_64)
+    daal::setSchedulerHandle(&_schedulerHandle);
+#endif
     /* if HT enabled - set _numThreads to physical cores num */
     if (daal::internal::ServiceInst::serv_get_ht())
     {
@@ -156,7 +163,6 @@ DAAL_EXPORT void daal::services::Environment::initNumberOfThreads()
 DAAL_EXPORT daal::services::Environment::~Environment()
 {
     daal::services::daal_free_buffers();
-    _daal_tbb_task_scheduler_free(_globalControl);
 }
 
 void daal::services::Environment::_cpu_detect(int enable)
@@ -171,6 +177,9 @@ void daal::services::Environment::_cpu_detect(int enable)
 DAAL_EXPORT void daal::services::Environment::setNumberOfThreads(const size_t numThreads)
 {
     isInit = true;
+#if defined(TARGET_X86_64)
+    daal::setSchedulerHandle(&_schedulerHandle);
+#endif
     daal::setNumberOfThreads(numThreads, &_globalControl);
 }
 
diff --git a/cpp/daal/src/threading/service_thread_pinner.cpp b/cpp/daal/src/threading/service_thread_pinner.cpp
index d6e878f5b20..786a589946a 100644
--- a/cpp/daal/src/threading/service_thread_pinner.cpp
+++ b/cpp/daal/src/threading/service_thread_pinner.cpp
@@ -234,6 +234,9 @@ class thread_pinner_impl_t : public tbb::task_scheduler_observer
 thread_pinner_impl_t::thread_pinner_impl_t(void (*read_topo)(int &, int &, int &, int **), void (*deleter)(void *))
     : pinner_arena(nthreads = daal::threader_get_threads_number()), tbb::task_scheduler_observer(pinner_arena), topo_deleter(deleter)
 {
+    #if defined(TARGET_X86_64)
+    pinner_arena.initialize();
+    #endif
     do_pinning = (nthreads > 0) ? true : false;
     is_pinning.set(0);
 
diff --git a/cpp/daal/src/threading/threading.cpp b/cpp/daal/src/threading/threading.cpp
index 8af63a075be..15c39368238 100644
--- a/cpp/daal/src/threading/threading.cpp
+++ b/cpp/daal/src/threading/threading.cpp
@@ -61,6 +61,33 @@ DAAL_EXPORT void _daal_tbb_task_scheduler_free(void *& globalControl)
     }
 }
 
+DAAL_EXPORT void _daal_tbb_task_scheduler_handle_free(void *& schedulerHandle)
+{
+    // Note: TBB 13 deletes task_scheduler_handle itself during the destruction of thread context
+
+    // #if defined(TARGET_X86_64)
+    //     if (schedulerHandle)
+    //     {
+    //         delete reinterpret_cast<tbb::task_scheduler_handle *>(schedulerHandle);
+    //         schedulerHandle = nullptr;
+    //     }
+    // #endif
+}
+
+DAAL_EXPORT size_t _setSchedulerHandle(void ** schedulerHandle)
+{
+#if defined(TARGET_X86_64)
+    #if (TBB_INTERFACE_VERSION < 12120)
+    schedulerHandle = nullptr;
+    #else
+    *schedulerHandle = reinterpret_cast<void *>(new tbb::task_scheduler_handle(tbb::attach {}));
+    #endif
+    // It is necessary for initializing tbb in cases where DAAL does not use it.
+    tbb::task_arena {}.initialize();
+#endif
+    return 0;
+}
+
 DAAL_EXPORT size_t _setNumberOfThreads(const size_t numThreads, void ** globalControl)
 {
     static tbb::spin_mutex mt;
diff --git a/cpp/daal/src/threading/threading.h b/cpp/daal/src/threading/threading.h
index 4d00c789494..0b4a9881b97 100644
--- a/cpp/daal/src/threading/threading.h
+++ b/cpp/daal/src/threading/threading.h
@@ -102,7 +102,9 @@ extern "C"
     DAAL_EXPORT void _daal_wait_task_group(void * taskGroupPtr);
 
     DAAL_EXPORT void _daal_tbb_task_scheduler_free(void *& globalControl);
+    DAAL_EXPORT void _daal_tbb_task_scheduler_handle_free(void *& schedulerHandle);
     DAAL_EXPORT size_t _setNumberOfThreads(const size_t numThreads, void ** globalControl);
+    DAAL_EXPORT size_t _setSchedulerHandle(void ** schedulerHandle);
 
     DAAL_EXPORT void * _daal_threader_env();
 
@@ -183,6 +185,11 @@ inline size_t threader_get_threads_number()
     return threader_env()->getNumberOfThreads();
 }
 
+inline size_t setSchedulerHandle(void ** schedulerHandle)
+{
+    return _setSchedulerHandle(schedulerHandle);
+}
+
 inline size_t setNumberOfThreads(const size_t numThreads, void ** globalControl)
 {
     return _setNumberOfThreads(numThreads, globalControl);

From c4635ce843c23c81a9a0a3637d866fff803b52f8 Mon Sep 17 00:00:00 2001
From: Emm Walsh <148352712+emmwalsh@users.noreply.github.com>
Date: Wed, 3 Jul 2024 23:14:32 +0100
Subject: [PATCH 44/65] Update security md (#2837)

* Update data-analytics-pipeline.rst

* Update build-application.rst

* Update SECURITY.md

* Revert "Update build-application.rst"

This reverts commit 08d7ce450f725f79e55abe06ad97b260e07868bf.

* Revert "Update data-analytics-pipeline.rst"

This reverts commit 78e7e2e2923c1489aaf86c4bf6aeb36519a702bd.

* Update SECURITY.md

Update SECURITY.md

removed asterix

* Update SECURITY.md

fixed errors
---
 SECURITY.md | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/SECURITY.md b/SECURITY.md
index 49a0208db6e..3fa4dfa8799 100755
--- a/SECURITY.md
+++ b/SECURITY.md
@@ -19,13 +19,13 @@
 
 As an open-source project, we understand the importance of and responsibility
 for security. This Security Policy outlines our guidelines and procedures to
-ensure the highest level of security and trust for Intel(R) oneAPI Data Analytics Library.
+ensure the highest level of security and trust for oneDAL
 
 ## Supported Versions
 
 Security vulnerabilities are fixed in the [latest version][1]
 and delivered as a patch release. We don't guarantee security fixes to be
-back-ported to older Intel(R) oneAPI Data Analytics Library versions.
+back-ported to older oneDAL versions.
 
 ## Report a Vulnerability
 
@@ -52,16 +52,16 @@ Along with the report, provide the following info:
 
 ### When Should I Report a Vulnerability?
 
-* You think you discovered a potential security vulnerability in Intel(R) oneAPI Data Analytics Library.
-* You are unsure how the potential vulnerability affects Intel(R) oneAPI Data Analytics Library.
+* You think you discovered a potential security vulnerability in oneDAL
+* You are unsure how the potential vulnerability affects oneDAL
 * You think you discovered a vulnerability in another project or 3rd party
-component on which Intel(R) oneAPI Data Analytics Library depends. If the issue is not fixed in the 3rd party
+component on which oneDAL depends. If the issue is not fixed in the 3rd party
 component, try to report directly there first.
 
 ### When Should I NOT Report a Vulnerability?
 
 * You got an automated scan hit and are unable to provide details.
-* You need help using Intel(R) oneAPI Data Analytics Library for security.
+* You need help using oneDAL for security.
 * You need help applying security-related updates.
 * Your issue is not security-related.
 
@@ -69,7 +69,7 @@ component, try to report directly there first.
 
 We aim to respond quickly to your inquiry and coordinate a fix and
 disclosure with you. All confirmed security vulnerabilities will be addressed
-according to severity level and impact on Intel(R) oneAPI Data Analytics Library. Normally, security issues are fixed in the next planned release.
+according to severity level and impact on oneDAL Normally, security issues are fixed in the next planned release.
 
 ## Disclosure Policy
 
@@ -88,4 +88,6 @@ If you have any suggestions on how this Policy could be improved, submit
 an issue or a pull request to this repository. **Do not** report
 potential vulnerabilities or security flaws via a pull request.
 
-[1]: https://github.com/oneapi-src/oneDAL/releases
\ No newline at end of file
+[1]: https://github.com/oneapi-src/oneDAL/releases
+[2]: https://github.com/oneapi-src/oneDAL/security/advisories/new
+[3]: https://github.com/oneapi-src/oneDAL/security/advisories

From 24062c28a5d753049916cfa8510f93a65c794bf3 Mon Sep 17 00:00:00 2001
From: Aleksei Khomenko <aleksei.khomenko@intel.com>
Date: Thu, 4 Jul 2024 00:16:07 +0200
Subject: [PATCH 45/65] docs: add `Scorecard` badge (#2838)

---
 README.md | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index ba5bb8ec858..2c19f382da3 100644
--- a/README.md
+++ b/README.md
@@ -19,7 +19,11 @@
 
 [Installation](#installation)&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;[Documentation](#documentation)&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;[Support](#support)&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;[Examples](#examples)&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;[How to Contribute](CONTRIBUTING.md)&nbsp;&nbsp;&nbsp;
 
-[![Build Status](https://dev.azure.com/daal/DAAL/_apis/build/status/oneapi-src.oneDAL?branchName=main)](https://dev.azure.com/daal/DAAL/_build/latest?definitionId=5&branchName=main) [![License](https://img.shields.io/github/license/oneapi-src/oneDAL.svg)](https://github.com/oneapi-src/oneDAL/blob/main/LICENSE) [![OpenSSF Best Practices](https://www.bestpractices.dev/projects/8859/badge)](https://www.bestpractices.dev/projects/8859) [![Join the community on GitHub Discussions](https://badgen.net/badge/join%20the%20discussion/on%20github/black?icon=github)](https://github.com/oneapi-src/oneDAL/discussions)
+[![Build Status](https://dev.azure.com/daal/DAAL/_apis/build/status/oneapi-src.oneDAL?branchName=main)](https://dev.azure.com/daal/DAAL/_build/latest?definitionId=5&branchName=main)
+[![License](https://img.shields.io/github/license/oneapi-src/oneDAL.svg)](https://github.com/oneapi-src/oneDAL/blob/main/LICENSE)
+[![OpenSSF Best Practices](https://www.bestpractices.dev/projects/8859/badge)](https://www.bestpractices.dev/projects/8859)
+[![OpenSSF Scorecard](https://api.securityscorecards.dev/projects/github.com/oneapi-src/oneDAL/badge)](https://securityscorecards.dev/viewer/?uri=github.com/oneapi-src/oneDAL)
+[![Join the community on GitHub Discussions](https://badgen.net/badge/join%20the%20discussion/on%20github/black?icon=github)](https://github.com/oneapi-src/oneDAL/discussions)
 
 oneAPI Data Analytics Library (oneDAL) is a powerful machine learning library that helps you accelerate big data analysis at all stages: **preprocessing**, **transformation**, **analysis**, **modeling**, **validation**, and **decision making**.
 

From 1f667744c0f8d42cb7698061e9faff0aae9ea6da Mon Sep 17 00:00:00 2001
From: ethanglaser <42726565+ethanglaser@users.noreply.github.com>
Date: Wed, 3 Jul 2024 21:14:44 -0700
Subject: [PATCH 46/65] CI: conda fixes for public CI (#2842)

---
 .ci/env/environment.yml | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/.ci/env/environment.yml b/.ci/env/environment.yml
index 0a704f66f14..61655fe9e76 100644
--- a/.ci/env/environment.yml
+++ b/.ci/env/environment.yml
@@ -1,7 +1,6 @@
 name: ci-env
 channels:
   - conda-forge
-  - intel
   - defaults
 dependencies:
-  - impi-devel=2021.10.0
+  - impi-devel=2021.12.0

From 90e6b1a7eca8c6088dabc0fd903ff2af14ba8c84 Mon Sep 17 00:00:00 2001
From: Dhanus M Lal <73832063+DhanusML@users.noreply.github.com>
Date: Fri, 5 Jul 2024 12:49:50 +0530
Subject: [PATCH 47/65] Added 2c_mom reference implementation (#2834)

This routine computes the population variance of
data stored in column-major format (each row represents a
sample from the distribution).
Separate functions for single and double precision
implemented.

This implementation enabled the example
em_gmm_dense_batch. This has been removed from the
exclude list.

Signed-off-by: Dhanus M Lal <Dhanus.MLal@fujitsu.com>
---
 cpp/daal/src/externals/service_stat_ref.h | 68 +++++++++++++++++++++--
 examples/daal/cpp/CMakeLists.txt          |  1 -
 2 files changed, 64 insertions(+), 5 deletions(-)

diff --git a/cpp/daal/src/externals/service_stat_ref.h b/cpp/daal/src/externals/service_stat_ref.h
index 81a44ce1434..d01eef06d55 100644
--- a/cpp/daal/src/externals/service_stat_ref.h
+++ b/cpp/daal/src/externals/service_stat_ref.h
@@ -1,6 +1,7 @@
 /* file: service_stat_ref.h */
 /*******************************************************************************
 * Copyright 2023 Intel Corporation
+* Copyright contributors to the oneDAL project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -24,8 +25,9 @@
 #ifndef __SERVICE_STAT_REF_H__
 #define __SERVICE_STAT_REF_H__
 
-typedef void (*func_type)(DAAL_INT, DAAL_INT, DAAL_INT, void *);
+#include "src/externals/service_memory.h"
 
+typedef void (*func_type)(DAAL_INT, DAAL_INT, DAAL_INT, void *);
 extern "C"
 {
 #define __DAAL_VSL_SS_MATRIX_STORAGE_COLS           0x00020000
@@ -194,8 +196,37 @@ struct RefStatistics<double, cpu>
 
     static int x2c_mom(const double * data, const __int64 nFeatures, const __int64 nVectors, double * variance, const __int64 method)
     {
-        int errcode = 0;
-
+        // E(x-\mu)^2 = E(x^2) - \mu^2
+        int errcode  = 0;
+        double * sum = (double *)daal::services::internal::service_calloc<double, cpu>(nFeatures, sizeof(double));
+        if (!sum) return -4;
+        daal::services::internal::service_memset<double, cpu>(variance, double(0), nFeatures);
+        DAAL_INT feature_ptr, vec_ptr;
+        double wtInv      = (double)1 / nVectors;
+        double wtInvMinus = (double)1 / (nVectors - 1);
+        double pt         = 0;
+        for (vec_ptr = 0; vec_ptr < nVectors; ++vec_ptr)
+        {
+#pragma omp simd
+            for (feature_ptr = 0; feature_ptr < nFeatures; ++feature_ptr)
+            {
+                pt = data[vec_ptr * nFeatures + feature_ptr];
+                sum[feature_ptr] += pt;
+                variance[feature_ptr] += (pt * pt); // 2RSum
+            }
+        }
+        double sumSqDivN; // S^2/n = n*\mu^2
+#pragma omp simd
+        for (feature_ptr = 0; feature_ptr < nFeatures; ++feature_ptr)
+        {
+            sumSqDivN = sum[feature_ptr];
+            sumSqDivN *= sumSqDivN;
+            sumSqDivN *= wtInv;
+            variance[feature_ptr] -= sumSqDivN; // (2RSum-S^2/n)
+            variance[feature_ptr] *= wtInvMinus;
+        }
+        daal::services::internal::service_free<double, cpu>(sum);
+        sum = NULL;
         return errcode;
     }
 
@@ -276,8 +307,37 @@ struct RefStatistics<float, cpu>
 
     static int x2c_mom(const float * data, const __int64 nFeatures, const __int64 nVectors, float * variance, const __int64 method)
     {
+        // E(x-\mu)^2 = E(x^2) - \mu^2
         int errcode = 0;
-
+        float * sum = (float *)daal::services::internal::service_calloc<float, cpu>(nFeatures, sizeof(float));
+        if (!sum) return -4;
+        daal::services::internal::service_memset<float, cpu>(variance, float(0), nFeatures);
+        DAAL_INT feature_ptr, vec_ptr;
+        float wtInv      = (float)1 / nVectors;
+        float wtInvMinus = (float)1 / (nVectors - 1);
+        float pt         = 0;
+        for (vec_ptr = 0; vec_ptr < nVectors; ++vec_ptr)
+        {
+#pragma omp simd
+            for (feature_ptr = 0; feature_ptr < nFeatures; ++feature_ptr)
+            {
+                pt = data[vec_ptr * nFeatures + feature_ptr];
+                sum[feature_ptr] += pt;
+                variance[feature_ptr] += (pt * pt); // 2RSum
+            }
+        }
+        float sumSqDivN; // S^2/n = n*\mu^2
+#pragma omp simd
+        for (feature_ptr = 0; feature_ptr < nFeatures; ++feature_ptr)
+        {
+            sumSqDivN = sum[feature_ptr];
+            sumSqDivN *= sumSqDivN;
+            sumSqDivN *= wtInv;
+            variance[feature_ptr] -= sumSqDivN; // (2RSum-S^2/n)
+            variance[feature_ptr] *= wtInvMinus;
+        }
+        daal::services::internal::service_free<float, cpu>(sum);
+        sum = NULL;
         return errcode;
     }
 
diff --git a/examples/daal/cpp/CMakeLists.txt b/examples/daal/cpp/CMakeLists.txt
index 4deb10fd3f5..ed41306e810 100644
--- a/examples/daal/cpp/CMakeLists.txt
+++ b/examples/daal/cpp/CMakeLists.txt
@@ -44,7 +44,6 @@ if(REF_BACKEND)
     set(EXCLUDE_LIST
         ${EXCLUDE_LIST}
         "source/boosting/brownboost_dense_batch.cpp"
-        "source/em/em_gmm_dense_batch.cpp"
     )
 endif()
 

From 08a6c32dc499e117652e47d07d308a5cb66e9584 Mon Sep 17 00:00:00 2001
From: Maria Petrova <maria.petrova@intel.com>
Date: Mon, 8 Jul 2024 11:22:49 +0200
Subject: [PATCH 48/65] Adjust modulefile according to the latest requirements
 (#2823)

---
 deploy/local/dal | 16 +++++-----------
 1 file changed, 5 insertions(+), 11 deletions(-)

diff --git a/deploy/local/dal b/deploy/local/dal
index 6ddff7e537b..dd64884cf86 100644
--- a/deploy/local/dal
+++ b/deploy/local/dal
@@ -1,3 +1,4 @@
+
 #%Module1.0###################################################################
 #===============================================================================
 # Copyright 2020 Intel Corporation
@@ -33,9 +34,9 @@ if { $tcl_version < $min_tcl_ver } {
 set scriptpath "${ModulesCurrentModulefile}"
 set scriptpath "[file dirname [file normalize "$scriptpath/___"]]"
 
-# define componentroot, modulefileroot, modulefilename and modulefilever
+# define componentroot, modulefilepath, modulefilename and modulefilever
 set modulefilename "[file tail [file dirname "${scriptpath}"]]"
-set modulefilever "[file tail "$scriptpath"]"
+set modulefilever "[file tail "${scriptpath}"]"
 set modulefilepath "${scriptpath}"
 set componentroot "[file dirname [file dirname [file dirname [file dirname "${scriptpath}"]]]]"
 
@@ -51,8 +52,7 @@ set moduleinfoname [file dirname [module-info name]]
 
 proc ModulesHelp { } {
     global moduleinfoname
-    global modulefilever
-    module whatis "${modulefilename}/${modulefilever}"
+    puts "module whatis ${moduleinfoname}"
 }
 
 ##############################################################################
@@ -61,14 +61,8 @@ proc ModulesHelp { } {
 
 # Set intermediate variables
 set dalroot "$componentroot"
-set daalroot "$componentroot/$modulefilever"
-if {[string equal [info machine] "aarch64"]} {
-    set daal_target_arch "arm"
-} else {
-    set daal_target_arch "intel64"
-}
 
-module-whatis "oneAPI Data Analytics Library for $daal_target_arch."
+set daal_target_arch "intel64"
 
 # Setup environment variables
 setenv          DAL_MAJOR_BINARY   1

From 4a44e745f3e17b08a7e44c5b112864a836ec653e Mon Sep 17 00:00:00 2001
From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com>
Date: Mon, 8 Jul 2024 14:09:15 -0700
Subject: [PATCH 49/65] chore(deps): update dependency fmt to v11 (#2835)

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
---
 WORKSPACE | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/WORKSPACE b/WORKSPACE
index 7020d6b5bae..c20ee8fc7e3 100644
--- a/WORKSPACE
+++ b/WORKSPACE
@@ -115,8 +115,8 @@ http_archive(
 
 http_archive(
     name = "fmt",
-    url = "https://github.com/fmtlib/fmt/archive/10.2.1.tar.gz",
-    sha256 = "1250e4cc58bf06ee631567523f48848dc4596133e163f02615c97f78bab6c811",
-    strip_prefix = "fmt-10.2.1",
+    url = "https://github.com/fmtlib/fmt/archive/11.0.1.tar.gz",
+    sha256 = "7d009f7f89ac84c0a83f79ed602463d092fbf66763766a907c97fd02b100f5e9",
+    strip_prefix = "fmt-11.0.1",
     build_file = "@onedal//dev/bazel/deps:fmt.tpl.BUILD",
 )

From cbed3c6df643446ca851c08a674cad1ad771fa84 Mon Sep 17 00:00:00 2001
From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com>
Date: Mon, 8 Jul 2024 14:11:29 -0700
Subject: [PATCH 50/65] chore(deps): update dependency certifi to v2024.7.4
 [security] (#2843)

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
---
 docs/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/requirements.txt b/docs/requirements.txt
index cc4847b879c..4e5cde1286b 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -1,7 +1,7 @@
 alabaster==0.7.13
 Babel==2.13.1
 beautifulsoup4==4.12.2
-certifi==2024.2.2
+certifi==2024.7.4
 chardet==5.2.0
 click==8.1.7
 colorama==0.4.6

From 2954b9ed8ca631806e1395d6287af07d426418ba Mon Sep 17 00:00:00 2001
From: ethanglaser <42726565+ethanglaser@users.noreply.github.com>
Date: Wed, 10 Jul 2024 21:32:29 -0700
Subject: [PATCH 51/65] CI: remove intel channel from public CI (#2847)

* CI: remove intel channel from public CI

* temporary for quick check

* revert temporary
---
 .ci/pipeline/ci.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.ci/pipeline/ci.yml b/.ci/pipeline/ci.yml
index e5481e4ca68..e8393098b3f 100755
--- a/.ci/pipeline/ci.yml
+++ b/.ci/pipeline/ci.yml
@@ -583,7 +583,7 @@ jobs:
     displayName: 'System info'
   - script: |
       conda update -y -q conda
-      conda create -q -y -n CB -c intel python=$(python.version) tbb mpich
+      conda create -q -y -n CB -c conda-forge python=$(python.version) tbb mpich
     displayName: 'Conda create'
   - script: |
       git clone https://github.com/intel/scikit-learn-intelex.git daal4py

From 4bf6aafb47074e4204cddb72008c7ae943f4b275 Mon Sep 17 00:00:00 2001
From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com>
Date: Thu, 11 Jul 2024 10:42:20 -0700
Subject: [PATCH 52/65] chore(deps): update dependency zipp to v3.19.1
 [security] (#2845)

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
---
 docs/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/requirements.txt b/docs/requirements.txt
index 4e5cde1286b..b57976de83f 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -37,4 +37,4 @@ sphinxcontrib-qthelp==1.0.6
 sphinxcontrib-serializinghtml==1.1.9
 typing-extensions==4.8.0
 urllib3==2.2.2
-zipp==3.17.0
+zipp==3.19.1

From 6ae4f4e8a3883448b275c304e28c5c51f08e12a8 Mon Sep 17 00:00:00 2001
From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com>
Date: Thu, 11 Jul 2024 22:52:44 -0700
Subject: [PATCH 53/65] chore(deps): update dependency importlib-metadata to v8
 (#2831)

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
---
 docs/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/requirements.txt b/docs/requirements.txt
index b57976de83f..6f23e800c49 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -8,7 +8,7 @@ colorama==0.4.6
 docutils~=0.18.0
 idna==3.7
 imagesize==1.4.1
-importlib-metadata==7.0.0
+importlib-metadata==8.0.0
 importlib-resources==6.1.1
 Jinja2==3.1.4
 lxml==5.1.0

From e68ca9b03a4eae76661890391d576976a024ee82 Mon Sep 17 00:00:00 2001
From: Ian Faust <icfaust@gmail.com>
Date: Wed, 17 Jul 2024 17:04:13 +0200
Subject: [PATCH 54/65] [enhancement] Add finiteness_checker algo to
 `cpp/oneapi/dal/algo` (#2781)

* inital commit

* follow on updates

* improvements

* formatting

* remove variable

* update copyrights

* readd descriptor

* fix variations

* attempt to add into build flow

* formatting

* change bazel dependency

* change header

* Update common.cpp

* Update common.hpp

* Update common.hpp

* Update compute_types.hpp

* Update compute_kernel.hpp

* Update compute_kernel_dense.cpp

* Update compute_kernel_dense.cpp

* Update compute_kernel_dense.cpp

* Update compute_kernel_dense.cpp

* Update compute_kernel_dense.cpp

* Update compute_kernel_dense.cpp

* Update BUILD

* clang-format

* Update compute_kernel_dense_dpc.cpp

* Update compute_kernel_dense_dpc.cpp

* Update compute_kernel_dense_dpc.cpp

* Update compute_kernel_dense_dpc.cpp

* formatting

* x->data

* formatting

* code quality improvement

* formatting

* clang-formatting

* Update compute_kernel_dense_dpc.cpp

* Update compute_kernel_dense_dpc.cpp

* formatting

* updates based off of review

* Update compute_types.hpp

* Update compute_types.cpp

* Update compute_types.hpp

* Update compute_kernel.hpp

* initial test implementation:

* Update batch.cpp

* Update batch.cpp

* Update batch.cpp

* formatting

* clang-formatting

* fixed tests

* Update compute_kernel_dense_dpc.cpp
---
 cpp/oneapi/dal/algo/BUILD                     |   1 +
 cpp/oneapi/dal/algo/finiteness_checker/BUILD  |  61 ++++++++
 .../backend/cpu/compute_kernel.hpp            |  39 +++++
 .../backend/cpu/compute_kernel_dense.cpp      |  73 +++++++++
 .../backend/gpu/compute_kernel.hpp            |  39 +++++
 .../backend/gpu/compute_kernel_dense_dpc.cpp  |  81 ++++++++++
 .../dal/algo/finiteness_checker/common.cpp    |  45 ++++++
 .../dal/algo/finiteness_checker/common.hpp    | 140 ++++++++++++++++++
 .../dal/algo/finiteness_checker/compute.hpp   |  31 ++++
 .../algo/finiteness_checker/compute_types.cpp |  71 +++++++++
 .../algo/finiteness_checker/compute_types.hpp |  98 ++++++++++++
 .../finiteness_checker/detail/compute_ops.cpp |  44 ++++++
 .../finiteness_checker/detail/compute_ops.hpp |  77 ++++++++++
 .../detail/compute_ops_dpc.cpp                |  58 ++++++++
 .../algo/finiteness_checker/test/batch.cpp    | 104 +++++++++++++
 makefile.lst                                  |   1 +
 16 files changed, 963 insertions(+)
 create mode 100644 cpp/oneapi/dal/algo/finiteness_checker/BUILD
 create mode 100644 cpp/oneapi/dal/algo/finiteness_checker/backend/cpu/compute_kernel.hpp
 create mode 100644 cpp/oneapi/dal/algo/finiteness_checker/backend/cpu/compute_kernel_dense.cpp
 create mode 100644 cpp/oneapi/dal/algo/finiteness_checker/backend/gpu/compute_kernel.hpp
 create mode 100644 cpp/oneapi/dal/algo/finiteness_checker/backend/gpu/compute_kernel_dense_dpc.cpp
 create mode 100644 cpp/oneapi/dal/algo/finiteness_checker/common.cpp
 create mode 100644 cpp/oneapi/dal/algo/finiteness_checker/common.hpp
 create mode 100644 cpp/oneapi/dal/algo/finiteness_checker/compute.hpp
 create mode 100644 cpp/oneapi/dal/algo/finiteness_checker/compute_types.cpp
 create mode 100644 cpp/oneapi/dal/algo/finiteness_checker/compute_types.hpp
 create mode 100644 cpp/oneapi/dal/algo/finiteness_checker/detail/compute_ops.cpp
 create mode 100644 cpp/oneapi/dal/algo/finiteness_checker/detail/compute_ops.hpp
 create mode 100644 cpp/oneapi/dal/algo/finiteness_checker/detail/compute_ops_dpc.cpp
 create mode 100644 cpp/oneapi/dal/algo/finiteness_checker/test/batch.cpp

diff --git a/cpp/oneapi/dal/algo/BUILD b/cpp/oneapi/dal/algo/BUILD
index ab93feb58c0..e93804d2e7e 100644
--- a/cpp/oneapi/dal/algo/BUILD
+++ b/cpp/oneapi/dal/algo/BUILD
@@ -18,6 +18,7 @@ ALGOS = [
     "cosine_distance",
     "dbscan",
     "decision_tree",
+    "finiteness_checker",
     "jaccard",
     "kmeans",
     "kmeans_init",
diff --git a/cpp/oneapi/dal/algo/finiteness_checker/BUILD b/cpp/oneapi/dal/algo/finiteness_checker/BUILD
new file mode 100644
index 00000000000..de390465f9d
--- /dev/null
+++ b/cpp/oneapi/dal/algo/finiteness_checker/BUILD
@@ -0,0 +1,61 @@
+package(default_visibility = ["//visibility:public"])
+load("@onedal//dev/bazel:dal.bzl",
+    "dal_module",
+    "dal_test_suite",
+)
+
+dal_module(
+    name = "finiteness_checker",
+    auto = True,
+    dal_deps = [
+        "@onedal//cpp/oneapi/dal:core",
+        "@onedal//cpp/oneapi/dal/backend/primitives:reduction",
+    ],
+    extra_deps = [
+        "@onedal//cpp/daal:data_management",
+    ]
+)
+
+dal_test_suite(
+    name = "cpu_tests",
+    private = True,
+    compile_as = [ "c++" ],
+    srcs = glob([
+        "backend/cpu/test/*.cpp",
+    ]),
+    dal_deps = [
+        ":finiteness_checker",
+    ],
+)
+
+dal_test_suite(
+    name = "gpu_tests",
+    private = True,
+    compile_as = [ "dpc++" ],
+    srcs = glob([
+        "backend/gpu/test/*.cpp",
+    ]),
+    dal_deps = [
+        ":finiteness_checker",
+    ],
+)
+
+dal_test_suite(
+    name = "interface_tests",
+    framework = "catch2",
+    srcs = glob([
+        "test/*.cpp",
+    ]),
+    dal_deps = [
+        ":finiteness_checker",
+    ],
+)
+
+dal_test_suite(
+    name = "tests",
+    tests = [
+        ":cpu_tests",
+        ":gpu_tests",
+        ":interface_tests",
+    ],
+)
diff --git a/cpp/oneapi/dal/algo/finiteness_checker/backend/cpu/compute_kernel.hpp b/cpp/oneapi/dal/algo/finiteness_checker/backend/cpu/compute_kernel.hpp
new file mode 100644
index 00000000000..c1a1b8fc3c4
--- /dev/null
+++ b/cpp/oneapi/dal/algo/finiteness_checker/backend/cpu/compute_kernel.hpp
@@ -0,0 +1,39 @@
+/*******************************************************************************
+* Copyright contributors to the oneDAL project
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#pragma once
+
+#include "oneapi/dal/algo/finiteness_checker/compute_types.hpp"
+#include "oneapi/dal/backend/dispatcher.hpp"
+#include "oneapi/dal/table/homogen.hpp"
+
+namespace oneapi::dal::finiteness_checker::backend {
+
+template <typename Float, typename Method, typename Task>
+struct compute_kernel_cpu {
+    compute_result<Task> operator()(const dal::backend::context_cpu& ctx,
+                                    const detail::descriptor_base<Task>& params,
+                                    const compute_input<Task>& input) const;
+
+#ifdef ONEDAL_DATA_PARALLEL
+    void operator()(const dal::backend::context_cpu& ctx,
+                    const detail::descriptor_base<Task>& params,
+                    const table& data,
+                    bool& res) const;
+#endif
+};
+
+} // namespace oneapi::dal::finiteness_checker::backend
diff --git a/cpp/oneapi/dal/algo/finiteness_checker/backend/cpu/compute_kernel_dense.cpp b/cpp/oneapi/dal/algo/finiteness_checker/backend/cpu/compute_kernel_dense.cpp
new file mode 100644
index 00000000000..60d965046ec
--- /dev/null
+++ b/cpp/oneapi/dal/algo/finiteness_checker/backend/cpu/compute_kernel_dense.cpp
@@ -0,0 +1,73 @@
+/*******************************************************************************
+* Copyright contributors to the oneDAL project
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#include <daal/include/data_management/data/internal/finiteness_checker.h>
+
+#include "oneapi/dal/algo/finiteness_checker/backend/cpu/compute_kernel.hpp"
+#include "oneapi/dal/backend/interop/common.hpp"
+#include "oneapi/dal/backend/interop/error_converter.hpp"
+#include "oneapi/dal/backend/interop/table_conversion.hpp"
+#include "oneapi/dal/exceptions.hpp"
+
+#include "oneapi/dal/table/row_accessor.hpp"
+
+namespace oneapi::dal::finiteness_checker::backend {
+
+using dal::backend::context_cpu;
+using input_t = compute_input<task::compute>;
+using result_t = compute_result<task::compute>;
+using descriptor_t = detail::descriptor_base<task::compute>;
+
+namespace interop = dal::backend::interop;
+
+template <typename Float>
+static result_t call_daal_kernel(const context_cpu& ctx,
+                                 const descriptor_t& desc,
+                                 const table& data) {
+    const auto daal_data = interop::convert_to_daal_table<Float>(data);
+
+    return result_t().set_finite(
+        daal::data_management::internal::allValuesAreFinite<Float>(*daal_data.get(),
+                                                                   desc.get_allow_NaN()));
+}
+
+template <typename Float>
+static result_t compute(const context_cpu& ctx, const descriptor_t& desc, const input_t& input) {
+    return call_daal_kernel<Float>(ctx, desc, input.get_data());
+}
+
+template <typename Float>
+struct compute_kernel_cpu<Float, method::dense, task::compute> {
+    result_t operator()(const context_cpu& ctx,
+                        const descriptor_t& desc,
+                        const input_t& input) const {
+        return compute<Float>(ctx, desc, input);
+    }
+
+#ifdef ONEDAL_DATA_PARALLEL
+    void operator()(const context_cpu& ctx,
+                    const descriptor_t& desc,
+                    const table& data,
+                    bool& res) const {
+        throw unimplemented(dal::detail::error_messages::method_not_implemented());
+    }
+#endif
+};
+
+template struct compute_kernel_cpu<float, method::dense, task::compute>;
+template struct compute_kernel_cpu<double, method::dense, task::compute>;
+
+} // namespace oneapi::dal::finiteness_checker::backend
diff --git a/cpp/oneapi/dal/algo/finiteness_checker/backend/gpu/compute_kernel.hpp b/cpp/oneapi/dal/algo/finiteness_checker/backend/gpu/compute_kernel.hpp
new file mode 100644
index 00000000000..51fc6d4a35e
--- /dev/null
+++ b/cpp/oneapi/dal/algo/finiteness_checker/backend/gpu/compute_kernel.hpp
@@ -0,0 +1,39 @@
+/*******************************************************************************
+* Copyright contributors to the oneDAL project
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#pragma once
+
+#include "oneapi/dal/algo/finiteness_checker/compute_types.hpp"
+#include "oneapi/dal/backend/dispatcher.hpp"
+#include "oneapi/dal/table/homogen.hpp"
+
+namespace oneapi::dal::finiteness_checker::backend {
+
+template <typename Float, typename Method, typename Task>
+struct compute_kernel_gpu {
+    compute_result<Task> operator()(const dal::backend::context_gpu& ctx,
+                                    const detail::descriptor_base<Task>& params,
+                                    const compute_input<Task>& input) const;
+
+#ifdef ONEDAL_DATA_PARALLEL
+    void operator()(const dal::backend::context_gpu& ctx,
+                    const detail::descriptor_base<Task>& params,
+                    const table& data,
+                    bool& res);
+#endif
+};
+
+} // namespace oneapi::dal::finiteness_checker::backend
diff --git a/cpp/oneapi/dal/algo/finiteness_checker/backend/gpu/compute_kernel_dense_dpc.cpp b/cpp/oneapi/dal/algo/finiteness_checker/backend/gpu/compute_kernel_dense_dpc.cpp
new file mode 100644
index 00000000000..09389a2c122
--- /dev/null
+++ b/cpp/oneapi/dal/algo/finiteness_checker/backend/gpu/compute_kernel_dense_dpc.cpp
@@ -0,0 +1,81 @@
+/*******************************************************************************
+* Copyright contributors to the oneDAL project
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#include "oneapi/dal/algo/finiteness_checker/backend/gpu/compute_kernel.hpp"
+#include "oneapi/dal/backend/primitives/reduction.hpp"
+#include "oneapi/dal/backend/primitives/utils.hpp"
+#include "oneapi/dal/detail/profiler.hpp"
+
+namespace oneapi::dal::finiteness_checker::backend {
+
+using dal::backend::context_gpu;
+using input_t = compute_input<task::compute>;
+using result_t = compute_result<task::compute>;
+using descriptor_t = detail::descriptor_base<task::compute>;
+
+namespace pr = dal::backend::primitives;
+
+template <typename Float>
+bool compute_finiteness(sycl::queue& queue,
+                        const pr::ndview<Float, 1>& data_1d,
+                        bool allowNaN,
+                        const dal::backend::event_vector& deps = {}) {
+    Float out;
+
+    if (allowNaN) {
+        ONEDAL_PROFILER_TASK(finiteness_checker.reduce, queue);
+        out = pr::reduce_1d(queue, data_1d, pr::logical_or<Float>{}, pr::isinf<Float>{}, deps);
+    }
+    else {
+        ONEDAL_PROFILER_TASK(finiteness_checker.reduce, queue);
+        out = pr::reduce_1d(queue, data_1d, pr::logical_or<Float>{}, pr::isinfornan<Float>{}, deps);
+    }
+    // invert out to match daal implementation (assert result is finite)
+    return !static_cast<bool>(out);
+}
+
+template <typename Float>
+static result_t compute(const context_gpu& ctx, const descriptor_t& desc, const input_t& input) {
+    auto& queue = ctx.get_queue();
+    const auto data = input.get_data();
+    const auto data_1d = pr::table2ndarray_1d<Float>(queue, data, sycl::usm::alloc::device);
+    return result_t{}.set_finite(compute_finiteness(queue, data_1d, desc.get_allow_NaN()));
+}
+
+template <typename Float>
+struct compute_kernel_gpu<Float, method::dense, task::compute> {
+    result_t operator()(const context_gpu& ctx,
+                        const descriptor_t& desc,
+                        const input_t& input) const {
+        return compute<Float>(ctx, desc, input);
+    }
+
+#ifdef ONEDAL_DATA_PARALLEL
+    void operator()(const context_gpu& ctx,
+                    const descriptor_t& desc,
+                    const table& data,
+                    bool& res) {
+        auto& queue = ctx.get_queue();
+        const auto data_1d = pr::table2ndarray_1d<Float>(queue, data, sycl::usm::alloc::device);
+        res = compute_finiteness(queue, data_1d, desc.get_allow_NaN());
+    }
+#endif
+};
+
+template struct compute_kernel_gpu<float, method::dense, task::compute>;
+template struct compute_kernel_gpu<double, method::dense, task::compute>;
+
+} // namespace oneapi::dal::finiteness_checker::backend
diff --git a/cpp/oneapi/dal/algo/finiteness_checker/common.cpp b/cpp/oneapi/dal/algo/finiteness_checker/common.cpp
new file mode 100644
index 00000000000..29b8ea6aa21
--- /dev/null
+++ b/cpp/oneapi/dal/algo/finiteness_checker/common.cpp
@@ -0,0 +1,45 @@
+/*******************************************************************************
+* Copyright contributors to the oneDAL project
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#include "oneapi/dal/algo/finiteness_checker/common.hpp"
+#include "oneapi/dal/exceptions.hpp"
+
+namespace oneapi::dal::finiteness_checker::detail {
+namespace v1 {
+
+template <typename Task>
+class descriptor_impl : public base {
+public:
+    bool allowNaN = false;
+};
+
+template <typename Task>
+descriptor_base<Task>::descriptor_base() : impl_(new descriptor_impl<Task>{}) {}
+
+template <typename Task>
+bool descriptor_base<Task>::get_allow_NaN() const {
+    return impl_->allowNaN;
+}
+
+template <typename Task>
+void descriptor_base<Task>::set_allow_NaN(bool value) {
+    impl_->allowNaN = value;
+}
+
+template class ONEDAL_EXPORT descriptor_base<task::compute>;
+
+} // namespace v1
+} // namespace oneapi::dal::finiteness_checker::detail
diff --git a/cpp/oneapi/dal/algo/finiteness_checker/common.hpp b/cpp/oneapi/dal/algo/finiteness_checker/common.hpp
new file mode 100644
index 00000000000..e9e5b36930c
--- /dev/null
+++ b/cpp/oneapi/dal/algo/finiteness_checker/common.hpp
@@ -0,0 +1,140 @@
+/*******************************************************************************
+* Copyright contributors to the oneDAL project
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#pragma once
+
+#include "oneapi/dal/detail/common.hpp"
+#include "oneapi/dal/table/common.hpp"
+
+namespace oneapi::dal::finiteness_checker {
+
+namespace task {
+namespace v1 {
+
+/// Tag-type that parameterizes entities that are used to compute statistics, distance, and so on.
+struct compute {};
+/// Alias tag-type for the dense method.
+using by_default = compute;
+} // namespace v1
+
+using v1::compute;
+using v1::by_default;
+
+} // namespace task
+
+namespace method {
+namespace v1 {
+struct dense {};
+using by_default = dense;
+} // namespace v1
+
+using v1::dense;
+using v1::by_default;
+
+} // namespace method
+
+namespace detail {
+namespace v1 {
+struct descriptor_tag {};
+
+template <typename Task>
+class descriptor_impl;
+
+template <typename Float>
+constexpr bool is_valid_float_v = dal::detail::is_one_of_v<Float, float, double>;
+
+template <typename Method>
+constexpr bool is_valid_method_v = dal::detail::is_one_of_v<Method, method::dense>;
+
+template <typename Task>
+constexpr bool is_valid_task_v = dal::detail::is_one_of_v<Task, task::compute>;
+
+template <typename Task = task::by_default>
+class descriptor_base : public base {
+    static_assert(is_valid_task_v<Task>);
+
+public:
+    using tag_t = descriptor_tag;
+    using float_t = float;
+    using method_t = method::by_default;
+    using task_t = Task;
+
+    descriptor_base();
+
+    bool get_allow_NaN() const;
+
+protected:
+    void set_allow_NaN(bool);
+
+private:
+    dal::detail::pimpl<descriptor_impl<Task>> impl_;
+};
+
+} // namespace v1
+
+using v1::descriptor_tag;
+using v1::descriptor_impl;
+using v1::descriptor_base;
+
+using v1::is_valid_float_v;
+using v1::is_valid_method_v;
+using v1::is_valid_task_v;
+
+} // namespace detail
+
+namespace v1 {
+
+/// @tparam Float  The floating-point type that the algorithm uses for
+///                intermediate computations. Can be :expr:`float` or
+///                :expr:`double`.
+/// @tparam Method Tag-type that specifies an implementation of algorithm. Can
+///                be :expr:`method::dense`.
+/// @tparam Task   Tag-type that specifies the type of the problem to solve. Can
+///                be :expr:`task::compute`.
+template <typename Float = float,
+          typename Method = method::by_default,
+          typename Task = task::by_default>
+class descriptor : public detail::descriptor_base<Task> {
+    static_assert(detail::is_valid_float_v<Float>);
+    static_assert(detail::is_valid_method_v<Method>);
+    static_assert(detail::is_valid_task_v<Task>);
+
+    using base_t = detail::descriptor_base<Task>;
+
+public:
+    using float_t = Float;
+    using method_t = Method;
+    using task_t = Task;
+
+    /// Creates a new instance of the class with the default property values.
+    descriptor() = default;
+
+    /// @remark default = False
+    bool get_allow_NaN() const {
+        return base_t::get_allow_NaN();
+    }
+
+    auto& set_allow_NaN(bool value) {
+        base_t::set_allow_NaN(value);
+        return *this;
+    }
+};
+
+} // namespace v1
+
+using v1::descriptor;
+
+} // namespace oneapi::dal::finiteness_checker
diff --git a/cpp/oneapi/dal/algo/finiteness_checker/compute.hpp b/cpp/oneapi/dal/algo/finiteness_checker/compute.hpp
new file mode 100644
index 00000000000..92252303610
--- /dev/null
+++ b/cpp/oneapi/dal/algo/finiteness_checker/compute.hpp
@@ -0,0 +1,31 @@
+/*******************************************************************************
+* Copyright contributors to the oneDAL project
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#pragma once
+
+#include "oneapi/dal/algo/finiteness_checker/compute_types.hpp"
+#include "oneapi/dal/algo/finiteness_checker/detail/compute_ops.hpp"
+#include "oneapi/dal/compute.hpp"
+
+namespace oneapi::dal::detail {
+namespace v1 {
+
+template <typename Descriptor>
+struct compute_ops<Descriptor, dal::finiteness_checker::detail::descriptor_tag>
+        : dal::finiteness_checker::detail::compute_ops<Descriptor> {};
+
+} // namespace v1
+} // namespace oneapi::dal::detail
diff --git a/cpp/oneapi/dal/algo/finiteness_checker/compute_types.cpp b/cpp/oneapi/dal/algo/finiteness_checker/compute_types.cpp
new file mode 100644
index 00000000000..65c29d6630a
--- /dev/null
+++ b/cpp/oneapi/dal/algo/finiteness_checker/compute_types.cpp
@@ -0,0 +1,71 @@
+/*******************************************************************************
+* Copyright contributors to the oneDAL project
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#include "oneapi/dal/algo/finiteness_checker/compute_types.hpp"
+#include "oneapi/dal/detail/common.hpp"
+
+namespace oneapi::dal::finiteness_checker {
+
+template <typename Task>
+class detail::v1::compute_input_impl : public base {
+public:
+    compute_input_impl(const table& data) : data(data) {}
+    table data;
+};
+
+template <typename Task>
+class detail::v1::compute_result_impl : public base {
+public:
+    bool finite;
+};
+
+using detail::v1::compute_input_impl;
+using detail::v1::compute_result_impl;
+
+namespace v1 {
+
+template <typename Task>
+compute_input<Task>::compute_input(const table& data) : impl_(new compute_input_impl<Task>(data)) {}
+
+template <typename Task>
+const table& compute_input<Task>::get_data() const {
+    return impl_->data;
+}
+
+template <typename Task>
+void compute_input<Task>::set_data_impl(const table& value) {
+    impl_->data = value;
+}
+
+template class ONEDAL_EXPORT compute_input<task::compute>;
+
+template <typename Task>
+compute_result<Task>::compute_result() : impl_(new compute_result_impl<Task>{}) {}
+
+template <typename Task>
+bool compute_result<Task>::get_finite() const {
+    return impl_->finite;
+}
+
+template <typename Task>
+void compute_result<Task>::set_finite_impl(const bool& value) {
+    impl_->finite = value;
+}
+
+template class ONEDAL_EXPORT compute_result<task::compute>;
+
+} // namespace v1
+} // namespace oneapi::dal::finiteness_checker
diff --git a/cpp/oneapi/dal/algo/finiteness_checker/compute_types.hpp b/cpp/oneapi/dal/algo/finiteness_checker/compute_types.hpp
new file mode 100644
index 00000000000..30091328498
--- /dev/null
+++ b/cpp/oneapi/dal/algo/finiteness_checker/compute_types.hpp
@@ -0,0 +1,98 @@
+/*******************************************************************************
+* Copyright contributors to the oneDAL project
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#pragma once
+
+#include "oneapi/dal/algo/finiteness_checker/common.hpp"
+
+namespace oneapi::dal::finiteness_checker {
+
+namespace detail {
+namespace v1 {
+template <typename Task>
+class compute_input_impl;
+
+template <typename Task>
+class compute_result_impl;
+} // namespace v1
+
+using v1::compute_input_impl;
+using v1::compute_result_impl;
+
+} // namespace detail
+
+namespace v1 {
+
+/// @tparam Task Tag-type that specifies the type of the problem to solve. Can
+///              be :expr:`task::compute`.
+template <typename Task = task::by_default>
+class compute_input : public base {
+    static_assert(detail::is_valid_task_v<Task>);
+
+public:
+    using task_t = Task;
+
+    /// Creates a new instance of the class with the given :literal:`data`.
+    compute_input(const table& data);
+
+    /// @remark default = table{}
+    const table& get_data() const;
+
+    auto& set_data(const table& data) {
+        set_data_impl(data);
+        return *this;
+    }
+
+protected:
+    void set_data_impl(const table& data);
+
+private:
+    dal::detail::pimpl<detail::compute_input_impl<Task>> impl_;
+};
+
+/// @tparam Task Tag-type that specifies the type of the problem to solve. Can
+///              be :expr:`task::compute`.
+template <typename Task = task::by_default>
+class compute_result : public base {
+    static_assert(detail::is_valid_task_v<Task>);
+
+public:
+    using task_t = Task;
+
+    /// Creates a new instance of the class with the default property values.
+    compute_result();
+
+    /// A boolean with the result finiteness.
+    bool get_finite() const;
+
+    auto& set_finite(const bool& value) {
+        set_finite_impl(value);
+        return *this;
+    }
+
+protected:
+    void set_finite_impl(const bool&);
+
+private:
+    dal::detail::pimpl<detail::compute_result_impl<Task>> impl_;
+};
+
+} // namespace v1
+
+using v1::compute_input;
+using v1::compute_result;
+
+} // namespace oneapi::dal::finiteness_checker
diff --git a/cpp/oneapi/dal/algo/finiteness_checker/detail/compute_ops.cpp b/cpp/oneapi/dal/algo/finiteness_checker/detail/compute_ops.cpp
new file mode 100644
index 00000000000..03822b8ca9e
--- /dev/null
+++ b/cpp/oneapi/dal/algo/finiteness_checker/detail/compute_ops.cpp
@@ -0,0 +1,44 @@
+/*******************************************************************************
+* Copyright contributors to the oneDAL project
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#include "oneapi/dal/algo/finiteness_checker/detail/compute_ops.hpp"
+#include "oneapi/dal/algo/finiteness_checker/backend/cpu/compute_kernel.hpp"
+#include "oneapi/dal/backend/dispatcher.hpp"
+
+namespace oneapi::dal::finiteness_checker::detail {
+namespace v1 {
+
+using dal::detail::host_policy;
+
+template <typename Float, typename Method, typename Task>
+struct compute_ops_dispatcher<host_policy, Float, Method, Task> {
+    compute_result<Task> operator()(const host_policy& ctx,
+                                    const descriptor_base<Task>& desc,
+                                    const compute_input<Task>& input) const {
+        using kernel_dispatcher_t = dal::backend::kernel_dispatcher< //
+            KERNEL_SINGLE_NODE_CPU(backend::compute_kernel_cpu<Float, Method, Task>)>;
+        return kernel_dispatcher_t()(ctx, desc, input);
+    }
+};
+
+#define INSTANTIATE(F, M, T) \
+    template struct ONEDAL_EXPORT compute_ops_dispatcher<host_policy, F, M, T>;
+
+INSTANTIATE(float, method::dense, task::compute)
+INSTANTIATE(double, method::dense, task::compute)
+
+} // namespace v1
+} // namespace oneapi::dal::finiteness_checker::detail
diff --git a/cpp/oneapi/dal/algo/finiteness_checker/detail/compute_ops.hpp b/cpp/oneapi/dal/algo/finiteness_checker/detail/compute_ops.hpp
new file mode 100644
index 00000000000..a974d9dc57f
--- /dev/null
+++ b/cpp/oneapi/dal/algo/finiteness_checker/detail/compute_ops.hpp
@@ -0,0 +1,77 @@
+/*******************************************************************************
+* Copyright contributors to the oneDAL project
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#pragma once
+
+#include "oneapi/dal/algo/finiteness_checker/compute_types.hpp"
+#include "oneapi/dal/detail/error_messages.hpp"
+#include "oneapi/dal/table/homogen.hpp"
+
+namespace oneapi::dal::finiteness_checker::detail {
+namespace v1 {
+
+template <typename Context, typename Float, typename Method, typename Task, typename... Options>
+struct compute_ops_dispatcher {
+    compute_result<Task> operator()(const Context&,
+                                    const descriptor_base<Task>& desc,
+                                    const compute_input<Task>&) const;
+
+#ifdef ONEDAL_DATA_PARALLEL
+    void operator()(const Context&,
+                    const descriptor_base<Task>& desc,
+                    const table& data,
+                    const bool&);
+#endif
+};
+
+template <typename Descriptor>
+struct compute_ops {
+    using float_t = typename Descriptor::float_t;
+    using method_t = typename Descriptor::method_t;
+    using task_t = typename Descriptor::task_t;
+    using input_t = compute_input<task_t>;
+    using result_t = compute_result<task_t>;
+    using descriptor_base_t = descriptor_base<task_t>;
+
+    void check_preconditions(const Descriptor& params, const input_t& input) const {
+        using msg = dal::detail::error_messages;
+
+        if (!input.get_data().has_data()) {
+            throw domain_error(msg::input_data_is_empty());
+        }
+    }
+
+    template <typename Context>
+    auto operator()(const Context& ctx, const Descriptor& desc, const input_t& input) const {
+        check_preconditions(desc, input);
+        const auto result =
+            compute_ops_dispatcher<Context, float_t, method_t, task_t>()(ctx, desc, input);
+        return result;
+    }
+
+#ifdef ONEDAL_DATA_PARALLEL
+    template <typename Context>
+    void operator()(const Context& ctx, const Descriptor& desc, const table& data, bool& res) {
+        compute_ops_dispatcher<Context, float_t, method_t, task_t>()(ctx, desc, data, res);
+    }
+#endif
+};
+
+} // namespace v1
+
+using v1::compute_ops;
+
+} // namespace oneapi::dal::finiteness_checker::detail
diff --git a/cpp/oneapi/dal/algo/finiteness_checker/detail/compute_ops_dpc.cpp b/cpp/oneapi/dal/algo/finiteness_checker/detail/compute_ops_dpc.cpp
new file mode 100644
index 00000000000..6e2b210043d
--- /dev/null
+++ b/cpp/oneapi/dal/algo/finiteness_checker/detail/compute_ops_dpc.cpp
@@ -0,0 +1,58 @@
+/*******************************************************************************
+* Copyright contributors to the oneDAL project
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#include "oneapi/dal/algo/finiteness_checker/backend/cpu/compute_kernel.hpp"
+#include "oneapi/dal/algo/finiteness_checker/backend/gpu/compute_kernel.hpp"
+#include "oneapi/dal/algo/finiteness_checker/detail/compute_ops.hpp"
+#include "oneapi/dal/backend/dispatcher.hpp"
+
+namespace oneapi::dal::finiteness_checker::detail {
+namespace v1 {
+
+using dal::detail::data_parallel_policy;
+
+template <typename Float, typename Method, typename Task>
+struct compute_ops_dispatcher<data_parallel_policy, Float, Method, Task> {
+    compute_result<Task> operator()(const data_parallel_policy& ctx,
+                                    const descriptor_base<Task>& params,
+                                    const compute_input<Task>& input) const {
+        using kernel_dispatcher_t = dal::backend::kernel_dispatcher<
+            KERNEL_SINGLE_NODE_CPU(backend::compute_kernel_cpu<Float, Method, Task>),
+            KERNEL_SINGLE_NODE_GPU(backend::compute_kernel_gpu<Float, Method, Task>)>;
+        return kernel_dispatcher_t{}(ctx, params, input);
+    }
+
+#ifdef ONEDAL_DATA_PARALLEL
+    void operator()(const data_parallel_policy& ctx,
+                    const descriptor_base<Task>& params,
+                    const table& data,
+                    bool& res) {
+        using kernel_dispatcher_t = dal::backend::kernel_dispatcher<
+            KERNEL_SINGLE_NODE_CPU(backend::compute_kernel_cpu<Float, Method, Task>),
+            KERNEL_SINGLE_NODE_GPU(backend::compute_kernel_gpu<Float, Method, Task>)>;
+        kernel_dispatcher_t{}(ctx, params, data, res);
+    }
+#endif
+};
+
+#define INSTANTIATE(F, M, T) \
+    template struct ONEDAL_EXPORT compute_ops_dispatcher<data_parallel_policy, F, M, T>;
+
+INSTANTIATE(float, method::dense, task::compute)
+INSTANTIATE(double, method::dense, task::compute)
+
+} // namespace v1
+} // namespace oneapi::dal::finiteness_checker::detail
diff --git a/cpp/oneapi/dal/algo/finiteness_checker/test/batch.cpp b/cpp/oneapi/dal/algo/finiteness_checker/test/batch.cpp
new file mode 100644
index 00000000000..e99b65cda38
--- /dev/null
+++ b/cpp/oneapi/dal/algo/finiteness_checker/test/batch.cpp
@@ -0,0 +1,104 @@
+/*******************************************************************************
+* Copyright contributors to the oneDAL project
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#include <limits>
+#include <cmath>
+
+#include "oneapi/dal/algo/finiteness_checker/compute.hpp"
+
+#include "oneapi/dal/test/engine/fixtures.hpp"
+#include "oneapi/dal/test/engine/math.hpp"
+
+namespace oneapi::dal::finiteness_checker::test {
+
+namespace te = dal::test::engine;
+
+template <typename TestType>
+class finite_checker_batch_test : public te::float_algo_fixture<std::tuple_element_t<0, TestType>> {
+public:
+    using Float = std::tuple_element_t<0, TestType>;
+    using Method = std::tuple_element_t<1, TestType>;
+
+    void check_finiteness(const te::dataframe& x_data,
+                          bool allowNaN,
+                          double value,
+                          const te::table_id& x_data_table_id) {
+        const table x = x_data.get_table(this->get_policy(), x_data_table_id);
+
+        INFO("create descriptor");
+        const auto finiteness_desc =
+            finiteness_checker::descriptor<Float, Method>{}.set_allow_NaN(allowNaN);
+
+        INFO("run compute");
+        const bool compute_result = this->compute(finiteness_desc, x).get_finite();
+        if (compute_result == (std::isinf(value) || (std::isnan(value) && !allowNaN))) {
+            CAPTURE(compute_result, value, allowNaN);
+            FAIL();
+        }
+        SUCCEED();
+    }
+};
+
+using finiteness_types = COMBINE_TYPES((float, double), (finiteness_checker::method::dense));
+
+TEMPLATE_LIST_TEST_M(finite_checker_batch_test,
+                     "finiteness checker typical",
+                     "[finiteness_checker][integration][batch]",
+                     finiteness_types) {
+    SKIP_IF(this->not_float64_friendly());
+
+    // Initialize values
+    const te::dataframe x_data =
+        GENERATE_DATAFRAME(te::dataframe_builder{ 50, 50 }.fill_normal(0, 1, 7777),
+                           te::dataframe_builder{ 100, 50 }.fill_normal(0, 1, 7777),
+                           te::dataframe_builder{ 250, 50 }.fill_normal(0, 1, 7777),
+                           te::dataframe_builder{ 1100, 50 }.fill_normal(0, 1, 7777));
+    auto x_data_mutable = x_data.get_array().get_mutable_data();
+    const double value = GENERATE(0.0,
+                                  -std::numeric_limits<double>::infinity(),
+                                  std::numeric_limits<double>::infinity(),
+                                  std::numeric_limits<double>::quiet_NaN());
+    const bool allowNaN = GENERATE(0, 1);
+    x_data_mutable[45] = value;
+
+    // Homogen floating point type is the same as algorithm's floating point type
+    const auto x_data_table_id = this->get_homogen_table_id();
+
+    this->check_finiteness(x_data, allowNaN, value, x_data_table_id);
+}
+
+TEMPLATE_LIST_TEST_M(finite_checker_batch_test,
+                     "finiteness_checker compute one element matrix",
+                     "[finiteness_checker][integration][batch]",
+                     finiteness_types) {
+    SKIP_IF(this->not_float64_friendly());
+
+    // Initialize values to doubles
+    const double value = GENERATE(0.0,
+                                  -std::numeric_limits<double>::infinity(),
+                                  std::numeric_limits<double>::infinity(),
+                                  std::numeric_limits<double>::quiet_NaN());
+    const bool allowNaN = GENERATE(0, 1);
+
+    const te::dataframe x_data = GENERATE_DATAFRAME(te::dataframe_builder{ 1, 1 }.fill(value));
+
+    // Homogen floating point type is the same as algorithm's floating point type
+    const auto x_data_table_id = this->get_homogen_table_id();
+
+    this->check_finiteness(x_data, allowNaN, value, x_data_table_id);
+}
+
+} // namespace oneapi::dal::finiteness_checker::test
diff --git a/makefile.lst b/makefile.lst
index de7afb1090c..92dc52ff521 100755
--- a/makefile.lst
+++ b/makefile.lst
@@ -228,6 +228,7 @@ ONEAPI.ALGOS :=          \
     dbscan               \
     decision_forest      \
     decision_tree        \
+    finiteness_checker   \
     kmeans               \
     kmeans_init          \
     knn                  \

From 50584aae7d4fc775e990ea70c34ac77b32441cf9 Mon Sep 17 00:00:00 2001
From: Aleksei Khomenko <aleksei.khomenko@intel.com>
Date: Wed, 17 Jul 2024 21:55:15 +0300
Subject: [PATCH 55/65] chore(deps): update GPU FPK (#2680)

* chore(deps): update GPU FPK

* Update apt.sh

---------

Co-authored-by: Ian Faust <icfaust@gmail.com>
---
 .ci/env/apt.sh            | 2 +-
 WORKSPACE                 | 4 ++--
 dev/download_micromkl.bat | 2 +-
 dev/download_micromkl.sh  | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/.ci/env/apt.sh b/.ci/env/apt.sh
index e9a5549841f..0ad52830315 100755
--- a/.ci/env/apt.sh
+++ b/.ci/env/apt.sh
@@ -32,7 +32,7 @@ function add_repo {
 }
 
 function install_dpcpp {
-    sudo apt-get install -y intel-oneapi-compiler-dpcpp-cpp-2024.1
+    sudo apt-get install -y intel-oneapi-compiler-dpcpp-cpp-2024.2
     sudo bash -c 'echo libintelocl.so > /etc/OpenCL/vendors/intel-cpu.icd'
 }
 
diff --git a/WORKSPACE b/WORKSPACE
index c20ee8fc7e3..da391633d7d 100644
--- a/WORKSPACE
+++ b/WORKSPACE
@@ -33,8 +33,8 @@ micromkl_repo(
 micromkl_dpc_repo(
     name = "micromkl_dpc",
     root_env_var = "MKLGPUFPKROOT",
-    url = "https://github.com/oneapi-src/oneDAL/releases/download/Dependencies/mklgpufpk_lnx_2024-02-20.tgz",
-    sha256 = "1c60914461aafa5e5512181c7d5c1fdbdeff83746dbd980fe97074a3b65fc1ed",
+    url = "https://github.com/oneapi-src/oneDAL/releases/download/Dependencies/mklgpufpk_lnx_20240605.tgz",
+    sha256 = "0787a92e9580ed6b9fb97d054a0ed77994dbc18b4b3fb099451cb1e6ebdf4f16",
 )
 
 load("@onedal//dev/bazel/deps:openblas.bzl", "openblas_repo")
diff --git a/dev/download_micromkl.bat b/dev/download_micromkl.bat
index 6a30a2a44e3..a38515735a5 100755
--- a/dev/download_micromkl.bat
+++ b/dev/download_micromkl.bat
@@ -20,7 +20,7 @@ powershell.exe -command "if ($PSVersionTable.PSVersion.Major -ge 3) {exit 1} els
 
 set MKLURLROOT=https://github.com/oneapi-src/oneDAL/releases/download/Dependencies/
 set MKLVERSION=20230413
-set MKLGPUVERSION="2024-02-20"
+set MKLGPUVERSION=20240605
 
 set MKLPACKAGE=mklfpk_win_%MKLVERSION%
 set MKLGPUPACKAGE=mklgpufpk_win_%MKLGPUVERSION%
diff --git a/dev/download_micromkl.sh b/dev/download_micromkl.sh
index 0aaef938e9a..6eb52ddca76 100755
--- a/dev/download_micromkl.sh
+++ b/dev/download_micromkl.sh
@@ -18,7 +18,7 @@
 MKLFPK_URL_ROOT="https://github.com/oneapi-src/oneDAL/releases/download/Dependencies/"
 MKLFPK_VERSION="20230413"
 MKLFPK_VERSION_MAC="20210426"
-MKLGPUFPK_VERSION="2024-02-20"
+MKLGPUFPK_VERSION="20240605"
 WITH_GPU=true
 
 while true ; do

From 7b7f61e9f03ebd875f68bf9db12633b28757b6af Mon Sep 17 00:00:00 2001
From: Khalil <khalil.asadzade02@gmail.com>
Date: Thu, 18 Jul 2024 15:13:54 +0200
Subject: [PATCH 56/65] feature: Linear Regression online spmd support (#2846)

---
 .../gpu/finalize_train_kernel_norm_eq_dpc.cpp | 121 ++---------------
 .../finalize_train_kernel_norm_eq_impl.hpp    |  51 +++++++
 ...finalize_train_kernel_norm_eq_impl_dpc.cpp | 127 ++++++++++++++++++
 .../linear_regression/backend/gpu/misc.hpp    |   4 +-
 .../backend/gpu/train_kernel_norm_eq_dpc.cpp  |  17 ++-
 .../detail/finalize_train_ops_dpc.cpp         |  12 +-
 .../algo/linear_regression/test/fixture.hpp   |   3 +
 .../linear_regression/test/online_spmd.cpp    | 126 +++++++++++++++++
 8 files changed, 336 insertions(+), 125 deletions(-)
 create mode 100644 cpp/oneapi/dal/algo/linear_regression/backend/gpu/finalize_train_kernel_norm_eq_impl.hpp
 create mode 100644 cpp/oneapi/dal/algo/linear_regression/backend/gpu/finalize_train_kernel_norm_eq_impl_dpc.cpp
 create mode 100644 cpp/oneapi/dal/algo/linear_regression/test/online_spmd.cpp

diff --git a/cpp/oneapi/dal/algo/linear_regression/backend/gpu/finalize_train_kernel_norm_eq_dpc.cpp b/cpp/oneapi/dal/algo/linear_regression/backend/gpu/finalize_train_kernel_norm_eq_dpc.cpp
index d3431663249..a74723e1b00 100644
--- a/cpp/oneapi/dal/algo/linear_regression/backend/gpu/finalize_train_kernel_norm_eq_dpc.cpp
+++ b/cpp/oneapi/dal/algo/linear_regression/backend/gpu/finalize_train_kernel_norm_eq_dpc.cpp
@@ -14,129 +14,32 @@
 * limitations under the License.
 *******************************************************************************/
 
-#include "oneapi/dal/detail/common.hpp"
-#include "oneapi/dal/backend/dispatcher.hpp"
-#include "oneapi/dal/backend/primitives/ndarray.hpp"
-#include "oneapi/dal/backend/primitives/lapack.hpp"
-#include "oneapi/dal/backend/primitives/utils.hpp"
-
-#include "oneapi/dal/table/row_accessor.hpp"
-
-#include "oneapi/dal/algo/linear_regression/common.hpp"
-#include "oneapi/dal/algo/linear_regression/train_types.hpp"
-#include "oneapi/dal/algo/linear_regression/backend/model_impl.hpp"
 #include "oneapi/dal/algo/linear_regression/backend/gpu/finalize_train_kernel.hpp"
-#include "oneapi/dal/algo/linear_regression/backend/gpu/update_kernel.hpp"
-#include "oneapi/dal/algo/linear_regression/backend/gpu/misc.hpp"
-
-namespace oneapi::dal::linear_regression::backend {
-
-using dal::backend::context_gpu;
-
-namespace be = dal::backend;
-namespace pr = be::primitives;
-
-template <typename Float, typename Task>
-static train_result<Task> call_dal_kernel(const context_gpu& ctx,
-                                          const detail::descriptor_base<Task>& desc,
-                                          const detail::train_parameters<Task>& params,
-                                          const partial_train_result<Task>& input) {
-    using dal::detail::check_mul_overflow;
-
-    using model_t = model<Task>;
-    using model_impl_t = detail::model_impl<Task>;
-
-    auto& queue = ctx.get_queue();
-
-    const bool compute_intercept = desc.get_compute_intercept();
-
-    constexpr auto uplo = pr::mkl::uplo::upper;
-    constexpr auto alloc = sycl::usm::alloc::device;
-
-    const auto response_count = input.get_partial_xty().get_row_count();
-    const auto ext_feature_count = input.get_partial_xty().get_column_count();
-    const auto feature_count = ext_feature_count - compute_intercept;
-
-    const pr::ndshape<2> xtx_shape{ ext_feature_count, ext_feature_count };
-
-    const auto xtx_nd =
-        pr::table2ndarray<Float>(queue, input.get_partial_xtx(), sycl::usm::alloc::device);
-    const auto xty_nd = pr::table2ndarray<Float, pr::ndorder::f>(queue,
-                                                                 input.get_partial_xty(),
-                                                                 sycl::usm::alloc::device);
-
-    const pr::ndshape<2> betas_shape{ response_count, feature_count + 1 };
+#include "oneapi/dal/algo/linear_regression/backend/gpu/finalize_train_kernel_norm_eq_impl.hpp"
 
-    const auto betas_size = check_mul_overflow(response_count, feature_count + 1);
-    auto betas_arr = array<Float>::zeros(queue, betas_size, alloc);
-
-    double alpha = desc.get_alpha();
-    sycl::event ridge_event;
-    if (alpha != 0.0) {
-        ridge_event = add_ridge_penalty<Float>(queue, xtx_nd, compute_intercept, alpha);
-    }
-
-    auto nxtx = pr::ndarray<Float, 2>::empty(queue, xtx_shape, alloc);
-    auto nxty = pr::ndview<Float, 2>::wrap_mutable(betas_arr, betas_shape);
-    auto solve_event = pr::solve_system<uplo>(queue,
-                                              compute_intercept,
-                                              xtx_nd,
-                                              xty_nd,
-                                              nxtx,
-                                              nxty,
-                                              { ridge_event });
-    sycl::event::wait_and_throw({ solve_event });
-
-    auto betas = homogen_table::wrap(betas_arr, response_count, feature_count + 1);
-
-    const auto model_impl = std::make_shared<model_impl_t>(betas);
-    const auto model = dal::detail::make_private<model_t>(model_impl);
-
-    const auto options = desc.get_result_options();
-    auto result = train_result<Task>().set_model(model).set_result_options(options);
-
-    if (options.test(result_options::intercept)) {
-        auto arr = array<Float>::zeros(queue, response_count, alloc);
-        auto dst = pr::ndview<Float, 2>::wrap_mutable(arr, { 1l, response_count });
-        const auto src = nxty.get_col_slice(0l, 1l).t();
-
-        pr::copy(queue, dst, src).wait_and_throw();
-
-        auto intercept = homogen_table::wrap(arr, 1l, response_count);
-        result.set_intercept(intercept);
-    }
-
-    if (options.test(result_options::coefficients)) {
-        const auto size = check_mul_overflow(response_count, feature_count);
-
-        auto arr = array<Float>::zeros(queue, size, alloc);
-        const auto src = nxty.get_col_slice(1l, feature_count + 1);
-        auto dst = pr::ndview<Float, 2>::wrap_mutable(arr, { response_count, feature_count });
+#include "oneapi/dal/detail/common.hpp"
 
-        pr::copy(queue, dst, src).wait_and_throw();
+#include "oneapi/dal/backend/dispatcher.hpp"
 
-        auto coefficients = homogen_table::wrap(arr, response_count, feature_count);
-        result.set_coefficients(coefficients);
-    }
+namespace oneapi::dal::linear_regression::backend {
 
-    return result;
-}
+namespace bk = dal::backend;
 
 template <typename Float, typename Task>
-static train_result<Task> train(const context_gpu& ctx,
-                                const detail::descriptor_base<Task>& desc,
-                                const detail::train_parameters<Task>& params,
-                                const partial_train_result<Task>& input) {
-    return call_dal_kernel<Float, Task>(ctx, desc, params, input);
+static train_result<Task> finalize_train(const bk::context_gpu& ctx,
+                                         const detail::descriptor_base<Task>& desc,
+                                         const detail::train_parameters<Task>& params,
+                                         const partial_train_result<Task>& input) {
+    return finalize_train_kernel_norm_eq_impl<Float, Task>(ctx)(desc, params, input);
 }
 
 template <typename Float, typename Task>
 struct finalize_train_kernel_gpu<Float, method::norm_eq, Task> {
-    train_result<Task> operator()(const context_gpu& ctx,
+    train_result<Task> operator()(const bk::context_gpu& ctx,
                                   const detail::descriptor_base<Task>& desc,
                                   const detail::train_parameters<Task>& params,
                                   const partial_train_result<Task>& input) const {
-        return train<Float, Task>(ctx, desc, params, input);
+        return finalize_train<Float, Task>(ctx, desc, params, input);
     }
 };
 
diff --git a/cpp/oneapi/dal/algo/linear_regression/backend/gpu/finalize_train_kernel_norm_eq_impl.hpp b/cpp/oneapi/dal/algo/linear_regression/backend/gpu/finalize_train_kernel_norm_eq_impl.hpp
new file mode 100644
index 00000000000..6eeaf17c0da
--- /dev/null
+++ b/cpp/oneapi/dal/algo/linear_regression/backend/gpu/finalize_train_kernel_norm_eq_impl.hpp
@@ -0,0 +1,51 @@
+/*******************************************************************************
+* Copyright contributors to the oneDAL project
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#pragma once
+
+#include "oneapi/dal/algo/linear_regression/backend/gpu/finalize_train_kernel.hpp"
+#include "oneapi/dal/backend/primitives/utils.hpp"
+
+#ifdef ONEDAL_DATA_PARALLEL
+
+namespace oneapi::dal::linear_regression::backend {
+
+namespace bk = dal::backend;
+
+template <typename Float, typename Task>
+class finalize_train_kernel_norm_eq_impl {
+    using comm_t = bk::communicator<spmd::device_memory_access::usm>;
+    using input_t = partial_train_result<Task>;
+    using result_t = train_result<Task>;
+    using descriptor_t = detail::descriptor_base<Task>;
+    using train_parameters_t = detail::train_parameters<Task>;
+
+public:
+    finalize_train_kernel_norm_eq_impl(const bk::context_gpu& ctx)
+            : q(ctx.get_queue()),
+              comm_(ctx.get_communicator()) {}
+    result_t operator()(const descriptor_t& desc,
+                        const train_parameters_t& params,
+                        const input_t& input);
+
+private:
+    sycl::queue q;
+    comm_t comm_;
+};
+
+} // namespace oneapi::dal::linear_regression::backend
+
+#endif // ONEDAL_DATA_PARALLEL
diff --git a/cpp/oneapi/dal/algo/linear_regression/backend/gpu/finalize_train_kernel_norm_eq_impl_dpc.cpp b/cpp/oneapi/dal/algo/linear_regression/backend/gpu/finalize_train_kernel_norm_eq_impl_dpc.cpp
new file mode 100644
index 00000000000..c470f45403e
--- /dev/null
+++ b/cpp/oneapi/dal/algo/linear_regression/backend/gpu/finalize_train_kernel_norm_eq_impl_dpc.cpp
@@ -0,0 +1,127 @@
+/*******************************************************************************
+* Copyright contributors to the oneDAL project
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#include "oneapi/dal/algo/linear_regression/backend/gpu/finalize_train_kernel_norm_eq_impl.hpp"
+#include "oneapi/dal/algo/linear_regression/backend/gpu/misc.hpp"
+#include "oneapi/dal/algo/linear_regression/backend/model_impl.hpp"
+
+#include "oneapi/dal/backend/primitives/lapack.hpp"
+
+namespace oneapi::dal::linear_regression::backend {
+
+namespace be = dal::backend;
+namespace pr = be::primitives;
+
+using be::context_gpu;
+
+template <typename Float, typename Task>
+train_result<Task> finalize_train_kernel_norm_eq_impl<Float, Task>::operator()(
+    const detail::descriptor_base<Task>& desc,
+    const detail::train_parameters<Task>& params,
+    const partial_train_result<Task>& input) {
+    using dal::detail::check_mul_overflow;
+
+    using model_t = model<Task>;
+    using model_impl_t = detail::model_impl<Task>;
+
+    const bool compute_intercept = desc.get_compute_intercept();
+
+    constexpr auto uplo = pr::mkl::uplo::upper;
+    constexpr auto alloc = sycl::usm::alloc::device;
+
+    const auto response_count = input.get_partial_xty().get_row_count();
+    const auto ext_feature_count = input.get_partial_xty().get_column_count();
+    const auto feature_count = ext_feature_count - compute_intercept;
+
+    const pr::ndshape<2> xtx_shape{ ext_feature_count, ext_feature_count };
+
+    const auto xtx_nd =
+        pr::table2ndarray<Float>(q, input.get_partial_xtx(), sycl::usm::alloc::device);
+    const auto xty_nd = pr::table2ndarray<Float, pr::ndorder::f>(q,
+                                                                 input.get_partial_xty(),
+                                                                 sycl::usm::alloc::device);
+
+    const pr::ndshape<2> betas_shape{ response_count, feature_count + 1 };
+
+    const auto betas_size = check_mul_overflow(response_count, feature_count + 1);
+    auto betas_arr = array<Float>::zeros(q, betas_size, alloc);
+
+    if (comm_.get_rank_count() > 1) {
+        {
+            ONEDAL_PROFILER_TASK(xtx_allreduce);
+            auto xtx_arr =
+                dal::array<Float>::wrap(q, xtx_nd.get_mutable_data(), xtx_nd.get_count());
+            comm_.allreduce(xtx_arr).wait();
+        }
+        {
+            ONEDAL_PROFILER_TASK(xty_allreduce);
+            auto xty_arr =
+                dal::array<Float>::wrap(q, xty_nd.get_mutable_data(), xty_nd.get_count());
+            comm_.allreduce(xty_arr).wait();
+        }
+    }
+
+    double alpha = desc.get_alpha();
+    sycl::event ridge_event;
+    if (alpha != 0.0) {
+        ridge_event = add_ridge_penalty<Float>(q, xtx_nd, compute_intercept, alpha);
+    }
+
+    auto nxtx = pr::ndarray<Float, 2>::empty(q, xtx_shape, alloc);
+    auto nxty = pr::ndview<Float, 2>::wrap_mutable(betas_arr, betas_shape);
+    auto solve_event =
+        pr::solve_system<uplo>(q, compute_intercept, xtx_nd, xty_nd, nxtx, nxty, { ridge_event });
+    sycl::event::wait_and_throw({ solve_event });
+
+    auto betas = homogen_table::wrap(betas_arr, response_count, feature_count + 1);
+
+    const auto model_impl = std::make_shared<model_impl_t>(betas);
+    const auto model = dal::detail::make_private<model_t>(model_impl);
+
+    const auto options = desc.get_result_options();
+    auto result = train_result<Task>().set_model(model).set_result_options(options);
+
+    if (options.test(result_options::intercept)) {
+        auto arr = array<Float>::zeros(q, response_count, alloc);
+        auto dst = pr::ndview<Float, 2>::wrap_mutable(arr, { 1l, response_count });
+        const auto src = nxty.get_col_slice(0l, 1l).t();
+
+        pr::copy(q, dst, src).wait_and_throw();
+
+        auto intercept = homogen_table::wrap(arr, 1l, response_count);
+        result.set_intercept(intercept);
+    }
+
+    if (options.test(result_options::coefficients)) {
+        const auto size = check_mul_overflow(response_count, feature_count);
+
+        auto arr = array<Float>::zeros(q, size, alloc);
+        const auto src = nxty.get_col_slice(1l, feature_count + 1);
+        auto dst = pr::ndview<Float, 2>::wrap_mutable(arr, { response_count, feature_count });
+
+        pr::copy(q, dst, src).wait_and_throw();
+
+        auto coefficients = homogen_table::wrap(arr, response_count, feature_count);
+        result.set_coefficients(coefficients);
+    }
+
+    return result;
+}
+
+template class finalize_train_kernel_norm_eq_impl<float, task::regression>;
+template class finalize_train_kernel_norm_eq_impl<double, task::regression>;
+
+} // namespace oneapi::dal::linear_regression::backend
diff --git a/cpp/oneapi/dal/algo/linear_regression/backend/gpu/misc.hpp b/cpp/oneapi/dal/algo/linear_regression/backend/gpu/misc.hpp
index 5ad5ba647ec..723fde68fb9 100644
--- a/cpp/oneapi/dal/algo/linear_regression/backend/gpu/misc.hpp
+++ b/cpp/oneapi/dal/algo/linear_regression/backend/gpu/misc.hpp
@@ -44,7 +44,7 @@ sycl::event add_ridge_penalty(sycl::queue& q,
                               Float alpha,
                               const bk::event_vector& deps = {}) {
     ONEDAL_ASSERT(xtx.has_mutable_data());
-    ONEDAL_ASSERT(be::is_known_usm(q, xtx.get_mutable_data()));
+    ONEDAL_ASSERT(bk::is_known_usm(q, xtx.get_mutable_data()));
     ONEDAL_ASSERT(xtx.get_dimension(0) == xtx.get_dimension(1));
 
     Float* xtx_ptr = xtx.get_mutable_data();
@@ -52,7 +52,7 @@ sycl::event add_ridge_penalty(sycl::queue& q,
     std::int64_t original_feature_count = feature_count - compute_intercept;
 
     return q.submit([&](sycl::handler& cgh) {
-        const auto range = be::make_range_1d(original_feature_count);
+        const auto range = bk::make_range_1d(original_feature_count);
         cgh.depends_on(deps);
         std::int64_t step = feature_count + 1;
         cgh.parallel_for(range, [=](sycl::id<1> idx) {
diff --git a/cpp/oneapi/dal/algo/linear_regression/backend/gpu/train_kernel_norm_eq_dpc.cpp b/cpp/oneapi/dal/algo/linear_regression/backend/gpu/train_kernel_norm_eq_dpc.cpp
index 25b08aa7710..04d76fe86b7 100644
--- a/cpp/oneapi/dal/algo/linear_regression/backend/gpu/train_kernel_norm_eq_dpc.cpp
+++ b/cpp/oneapi/dal/algo/linear_regression/backend/gpu/train_kernel_norm_eq_dpc.cpp
@@ -104,17 +104,9 @@ static train_result<Task> call_dal_kernel(const context_gpu& ctx,
         old_x_arr = std::move(x_arr), old_y_arr = std::move(y_arr);
     }
 
-    const be::event_vector solve_deps{ last_xty_event, last_xtx_event };
-
-    double alpha = desc.get_alpha();
-    if (alpha != 0.0) {
-        last_xtx_event =
-            add_ridge_penalty<Float>(queue, xtx, compute_intercept, alpha, { last_xtx_event });
-    }
-
     auto& comm = ctx.get_communicator();
     if (comm.get_rank_count() > 1) {
-        sycl::event::wait_and_throw(solve_deps);
+        sycl::event::wait_and_throw({ last_xty_event, last_xtx_event });
         {
             ONEDAL_PROFILER_TASK(xtx_allreduce);
             auto xtx_arr = dal::array<Float>::wrap(queue, xtx.get_mutable_data(), xtx.get_count());
@@ -127,6 +119,13 @@ static train_result<Task> call_dal_kernel(const context_gpu& ctx,
         }
     }
 
+    double alpha = desc.get_alpha();
+    if (alpha != 0.0) {
+        last_xtx_event =
+            add_ridge_penalty<Float>(queue, xtx, compute_intercept, alpha, { last_xtx_event });
+    }
+    const be::event_vector solve_deps{ last_xty_event, last_xtx_event };
+
     auto nxtx = pr::ndarray<Float, 2>::empty(queue, xtx_shape, alloc);
     auto nxty = pr::ndview<Float, 2>::wrap_mutable(betas_arr, betas_shape);
     auto solve_event =
diff --git a/cpp/oneapi/dal/algo/linear_regression/detail/finalize_train_ops_dpc.cpp b/cpp/oneapi/dal/algo/linear_regression/detail/finalize_train_ops_dpc.cpp
index 3592aeefccb..21a5ce8108d 100644
--- a/cpp/oneapi/dal/algo/linear_regression/detail/finalize_train_ops_dpc.cpp
+++ b/cpp/oneapi/dal/algo/linear_regression/detail/finalize_train_ops_dpc.cpp
@@ -38,7 +38,7 @@ struct finalize_train_ops_dispatcher<Policy, Float, Method, Task> {
                                              const partial_train_result<Task>& input) const {
         using kernel_dispatcher_t = dal::backend::kernel_dispatcher<
             KERNEL_SINGLE_NODE_CPU(parameters::train_parameters_cpu<Float, Method, Task>),
-            KERNEL_SINGLE_NODE_GPU(parameters::train_parameters_gpu<Float, Method, Task>)>;
+            KERNEL_UNIVERSAL_SPMD_GPU(parameters::train_parameters_gpu<Float, Method, Task>)>;
         return kernel_dispatcher_t{}(ctx, desc, input);
     }
 
@@ -56,14 +56,16 @@ struct finalize_train_ops_dispatcher<Policy, Float, Method, Task> {
                                const partial_train_result<Task>& input) const {
         using kernel_dispatcher_t = dal::backend::kernel_dispatcher<
             KERNEL_SINGLE_NODE_CPU(backend::finalize_train_kernel_cpu<Float, Method, Task>),
-            KERNEL_SINGLE_NODE_GPU(backend::finalize_train_kernel_gpu<Float, Method, Task>)>;
+            KERNEL_UNIVERSAL_SPMD_GPU(backend::finalize_train_kernel_gpu<Float, Method, Task>)>;
         return kernel_dispatcher_t{}(ctx, desc, params, input);
     }
 };
 
-#define INSTANTIATE(F, M, T)      \
-    template struct ONEDAL_EXPORT \
-        finalize_train_ops_dispatcher<dal::detail::data_parallel_policy, F, M, T>;
+#define INSTANTIATE(F, M, T)                                                       \
+    template struct ONEDAL_EXPORT                                                  \
+        finalize_train_ops_dispatcher<dal::detail::data_parallel_policy, F, M, T>; \
+    template struct ONEDAL_EXPORT                                                  \
+        finalize_train_ops_dispatcher<dal::detail::spmd_data_parallel_policy, F, M, T>;
 
 INSTANTIATE(float, method::norm_eq, task::regression)
 INSTANTIATE(double, method::norm_eq, task::regression)
diff --git a/cpp/oneapi/dal/algo/linear_regression/test/fixture.hpp b/cpp/oneapi/dal/algo/linear_regression/test/fixture.hpp
index aedf0165454..fb935174cfe 100644
--- a/cpp/oneapi/dal/algo/linear_regression/test/fixture.hpp
+++ b/cpp/oneapi/dal/algo/linear_regression/test/fixture.hpp
@@ -54,6 +54,9 @@ class lr_test : public te::crtp_algo_fixture<TestType, Derived> {
     using test_input_t = infer_input<task_t>;
     using test_result_t = infer_result<task_t>;
 
+    using partial_input_t = partial_train_input<>;
+    using partial_result_t = partial_train_result<>;
+
     te::table_id get_homogen_table_id() const {
         return te::table_id::homogen<float_t>();
     }
diff --git a/cpp/oneapi/dal/algo/linear_regression/test/online_spmd.cpp b/cpp/oneapi/dal/algo/linear_regression/test/online_spmd.cpp
new file mode 100644
index 00000000000..c0f7968adfc
--- /dev/null
+++ b/cpp/oneapi/dal/algo/linear_regression/test/online_spmd.cpp
@@ -0,0 +1,126 @@
+/*******************************************************************************
+* Copyright contributors to the oneDAL project
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#include "oneapi/dal/algo/linear_regression/test/fixture.hpp"
+#include "oneapi/dal/test/engine/tables.hpp"
+#include "oneapi/dal/test/engine/io.hpp"
+
+namespace oneapi::dal::linear_regression::test {
+
+namespace te = dal::test::engine;
+namespace la = te::linalg;
+namespace linear_regression = oneapi::dal::linear_regression;
+
+template <typename TestType>
+class lr_online_spmd_test : public lr_test<TestType, lr_online_spmd_test<TestType>> {
+public:
+    using base_t = lr_test<TestType, lr_online_spmd_test<TestType>>;
+    using float_t = typename base_t::float_t;
+    using input_t = typename base_t::train_input_t;
+    using partial_input_t = typename base_t::partial_input_t;
+    using partial_result_t = typename base_t::partial_result_t;
+    using result_t = typename base_t::train_result_t;
+
+    void set_rank_count(std::int64_t rank_count) {
+        n_rank = rank_count;
+    }
+
+    std::int64_t get_rank_count() {
+        return n_rank;
+    }
+
+    void generate_dimensions() {
+        this->t_count_ = GENERATE(307, 12999);
+        this->s_count_ = GENERATE(10000);
+        this->f_count_ = GENERATE(2, 17);
+        this->r_count_ = GENERATE(2, 15);
+        this->intercept_ = GENERATE(0, 1);
+    }
+
+    template <typename... Args>
+    result_t finalize_train_override(Args&&... args) {
+        return this->finalize_train_via_spmd_threads_and_merge(n_rank, std::forward<Args>(args)...);
+    }
+
+    result_t merge_finalize_train_result_override(const std::vector<result_t>& results) {
+        return results[0];
+    }
+
+    template <typename... Args>
+    std::vector<partial_result_t> split_finalize_train_input_override(std::int64_t split_count,
+                                                                      Args&&... args) {
+        ONEDAL_ASSERT(split_count == n_rank);
+        const std::vector<partial_result_t> input{ std::forward<Args>(args)... };
+
+        return input;
+    }
+
+    void run_and_check_linear_online_spmd(std::int64_t n_rank,
+                                          std::int64_t n_blocks,
+                                          std::int64_t seed = 888,
+                                          double tol = 1e-2) {
+        table x_train, y_train, x_test, y_test;
+        std::tie(x_train, y_train, x_test, y_test) = this->prepare_inputs(seed, tol);
+
+        const auto desc = this->get_descriptor();
+        std::vector<partial_result_t> partial_results;
+        auto input_table_x = base_t::template split_table_by_rows<double>(x_train, n_rank);
+        auto input_table_y = base_t::template split_table_by_rows<double>(y_train, n_rank);
+        for (int64_t i = 0; i < n_rank; i++) {
+            partial_result_t partial_result;
+            auto input_table_x_blocks =
+                base_t::template split_table_by_rows<double>(input_table_x[i], n_blocks);
+            auto input_table_y_blocks =
+                base_t::template split_table_by_rows<double>(input_table_y[i], n_blocks);
+            for (int64_t j = 0; j < n_blocks; j++) {
+                partial_result = this->partial_train(desc,
+                                                     partial_result,
+                                                     input_table_x_blocks[j],
+                                                     input_table_y_blocks[j]);
+            }
+            partial_results.push_back(partial_result);
+        }
+
+        const auto train_result = this->finalize_train_override(desc, partial_results);
+
+        SECTION("Checking intercept values") {
+            if (desc.get_result_options().test(result_options::intercept))
+                base_t::check_if_close(train_result.get_intercept(), base_t::bias_, tol);
+        }
+
+        SECTION("Checking coefficient values") {
+            if (desc.get_result_options().test(result_options::coefficients))
+                base_t::check_if_close(train_result.get_coefficients(), base_t::beta_, tol);
+        }
+    }
+
+private:
+    std::int64_t n_rank;
+};
+
+TEMPLATE_LIST_TEST_M(lr_online_spmd_test, "lr common flow", "[lr][integration][spmd]", lr_types) {
+    SKIP_IF(this->get_policy().is_cpu());
+    SKIP_IF(this->not_float64_friendly());
+
+    this->generate(777);
+
+    this->set_rank_count(GENERATE(1, 2, 4));
+    std::int64_t n_blocks = GENERATE(1, 3, 10);
+
+    this->run_and_check_linear_online_spmd(this->get_rank_count(), n_blocks);
+}
+
+} // namespace oneapi::dal::linear_regression::test

From 133dc6ad24583ca04925610a23facfe175860f13 Mon Sep 17 00:00:00 2001
From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com>
Date: Mon, 22 Jul 2024 12:08:20 -0700
Subject: [PATCH 57/65] chore(deps): update dependency fmt to v11.0.2 (#2851)

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
---
 WORKSPACE | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/WORKSPACE b/WORKSPACE
index da391633d7d..3cba5bcd224 100644
--- a/WORKSPACE
+++ b/WORKSPACE
@@ -115,8 +115,8 @@ http_archive(
 
 http_archive(
     name = "fmt",
-    url = "https://github.com/fmtlib/fmt/archive/11.0.1.tar.gz",
-    sha256 = "7d009f7f89ac84c0a83f79ed602463d092fbf66763766a907c97fd02b100f5e9",
-    strip_prefix = "fmt-11.0.1",
+    url = "https://github.com/fmtlib/fmt/archive/11.0.2.tar.gz",
+    sha256 = "6cb1e6d37bdcb756dbbe59be438790db409cdb4868c66e888d5df9f13f7c027f",
+    strip_prefix = "fmt-11.0.2",
     build_file = "@onedal//dev/bazel/deps:fmt.tpl.BUILD",
 )

From 3e7fba195dafdcc12909181a0c3e94be3d562d5b Mon Sep 17 00:00:00 2001
From: msa <111298646+md-shafiul-alam@users.noreply.github.com>
Date: Tue, 23 Jul 2024 16:18:08 -0400
Subject: [PATCH 58/65] update qemu emulation version (#2852)

---
 .ci/env/apt.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.ci/env/apt.sh b/.ci/env/apt.sh
index 0ad52830315..2665ed9a65d 100755
--- a/.ci/env/apt.sh
+++ b/.ci/env/apt.sh
@@ -63,7 +63,7 @@ function install_qemu_emulation_apt {
 }
 
 function install_qemu_emulation_deb {
-    qemu_deb=qemu-user-static_8.2.1+ds-1~bpo12+1_amd64.deb
+    qemu_deb=qemu-user-static_9.0.1+ds-1~bpo12+1_amd64.deb
     set -eo pipefail
     wget http://ftp.debian.org/debian/pool/main/q/qemu/${qemu_deb}
     sudo dpkg -i ${qemu_deb}

From 8375ba68b5f555c0446d7a731928ce9c94c922b1 Mon Sep 17 00:00:00 2001
From: Alexandra <alexandra.epanchinzeva@intel.com>
Date: Wed, 24 Jul 2024 11:08:15 +0200
Subject: [PATCH 59/65] Update CODEOWNERS (#2840)

* Update CODEOWNERS

* Remove user without write access

---------

Co-authored-by: Alexander Andreev <alexander.andreev@intel.com>
---
 .github/CODEOWNERS | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index 5871fb84661..d6a47717935 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -1,6 +1,6 @@
 # Documentation owners and reviewers
-/docs/         @Vika-F @maria-Petrova @Alexsandruss @aepanchi
-*.md           @Vika-F @maria-Petrova @Alexsandruss @aepanchi
+/docs/         @Vika-F @maria-Petrova @Alexsandruss @bdmoore1
+*.md           @Vika-F @maria-Petrova @Alexsandruss @bdmoore1
 
 # TTP files
 third-party*   @maria-Petrova
@@ -22,7 +22,7 @@ deploy/        @Alexsandruss @napetrov @homksei @ahuber21 @ethanglaser
 dev/           @Alexsandruss @napetrov @homksei @ahuber21 @ethanglaser 
 
 # C++ code
-cpp/           @Alexsandruss @samir-nasibli @KulikovNikita @Alexandr-Solovev
+cpp/           @Alexsandruss @samir-nasibli @Alexandr-Solovev
 
 # Tree based methods
 dtrees         @razdoburdin @ahuber21 @avolkov-intel @icfaust

From bb9e92edc58f2ad63e0000dc06a22d7c0d103596 Mon Sep 17 00:00:00 2001
From: Aleksei Khomenko <aleksei.khomenko@intel.com>
Date: Wed, 24 Jul 2024 16:27:50 +0300
Subject: [PATCH 60/65] fix(samples): correct multiline command handling in
 `setup_samples.cmake` (#2832)

---
 samples/cmake/setup_samples.cmake | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/samples/cmake/setup_samples.cmake b/samples/cmake/setup_samples.cmake
index afaf9042417..dd7d3cc758b 100644
--- a/samples/cmake/setup_samples.cmake
+++ b/samples/cmake/setup_samples.cmake
@@ -111,7 +111,7 @@ function(add_samples samples_paths)
         set_target_properties(${sample} PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${PROJECT_SOURCE_DIR}/_cmake_results/${CPU_ARCHITECTURE}_${LINK_TYPE}")
 
         add_custom_target(run_${sample}
-            COMMAND ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} \\
+            COMMAND ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG}
                     ${MPIEXEC_MAX_NUMPROCS} -ppn ${MPIEXEC_NUMPROCS_PER_NODE} $<TARGET_FILE:${sample}>
             DEPENDS ${sample}
             WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}

From 9266bff3615ef2301697c716ff29d5fa6b06a127 Mon Sep 17 00:00:00 2001
From: Alexander Andreev <alexander.andreev@intel.com>
Date: Mon, 29 Jul 2024 12:38:17 +0100
Subject: [PATCH 61/65] Correct PCA nComponents description (#2853)

---
 cpp/daal/include/algorithms/pca/pca_types.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/daal/include/algorithms/pca/pca_types.h b/cpp/daal/include/algorithms/pca/pca_types.h
index 8c8472b140b..9a9daa4f987 100644
--- a/cpp/daal/include/algorithms/pca/pca_types.h
+++ b/cpp/daal/include/algorithms/pca/pca_types.h
@@ -670,7 +670,7 @@ class DAAL_EXPORT BaseBatchParameter : public daal::algorithms::Parameter
     BaseBatchParameter();
 
     DAAL_UINT64 resultsToCompute; /*!< 64 bit integer flag that indicates the results to compute */
-    size_t nComponents;           /*!< number of components for reduced implementation */
+    size_t nComponents;           /*!< number of components for reduced implementation (applicable for batch mode only) */
     bool isDeterministic;         /*!< sign flip if required */
     bool doScale;                 /*!< scaling if required */
     bool isCorrelation;           /*!< correlation is provided */

From b81e5adb92179a787f87c6de7b460137ff3cf6a2 Mon Sep 17 00:00:00 2001
From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com>
Date: Mon, 29 Jul 2024 22:00:52 -0700
Subject: [PATCH 62/65] chore(deps): update ossf/scorecard-action action to
 v2.4.0 (#2854)

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
---
 .github/workflows/openssf-scorecard.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/openssf-scorecard.yml b/.github/workflows/openssf-scorecard.yml
index 89f5d7b860e..42deb5ad780 100644
--- a/.github/workflows/openssf-scorecard.yml
+++ b/.github/workflows/openssf-scorecard.yml
@@ -31,7 +31,7 @@ jobs:
           persist-credentials: false
 
       - name: "Run analysis"
-        uses: ossf/scorecard-action@dc50aa9510b46c811795eb24b2f1ba02a914e534 # v2.3.3
+        uses: ossf/scorecard-action@62b2cac7ed8198b15735ed49ab1e5cf35480ba46 # v2.4.0
         with:
           results_file: results.sarif
           results_format: sarif

From d73195c4a808add37e8d00069a337dea7cd097a2 Mon Sep 17 00:00:00 2001
From: Alexander Andreev <alexander.andreev@intel.com>
Date: Wed, 31 Jul 2024 12:37:24 +0100
Subject: [PATCH 63/65] Update sklearnex building and testing CI job (#2861)

---
 .ci/pipeline/ci.yml | 37 +++++++++++++++----------------------
 1 file changed, 15 insertions(+), 22 deletions(-)

diff --git a/.ci/pipeline/ci.yml b/.ci/pipeline/ci.yml
index e8393098b3f..788839d68b6 100755
--- a/.ci/pipeline/ci.yml
+++ b/.ci/pipeline/ci.yml
@@ -558,7 +558,7 @@ jobs:
       fi
     displayName: 'bazel-cache-limit'
 
-- job: LinuxDaal4py
+- job: LinuxSklearnex
   dependsOn: LinuxMakeGNU_MKL
   timeoutInMinutes: 0
   variables:
@@ -586,54 +586,47 @@ jobs:
       conda create -q -y -n CB -c conda-forge python=$(python.version) tbb mpich
     displayName: 'Conda create'
   - script: |
-      git clone https://github.com/intel/scikit-learn-intelex.git daal4py
-    displayName: Clone daal4py
+      git clone https://github.com/intel/scikit-learn-intelex.git sklearnex
+    displayName: Clone sklearnex
   - script: |
       source /usr/share/miniconda/etc/profile.d/conda.sh
       conda activate CB
-      pip install -r daal4py/dependencies-dev
-      pip install -r daal4py/requirements-test.txt
+      pip install -r sklearnex/dependencies-dev
+      pip install -r sklearnex/requirements-test.txt
     displayName: Create python environment
   - script: |
       source /usr/share/miniconda/etc/profile.d/conda.sh
       conda activate CB
       export DALROOT=$(Pipeline.Workspace)/daal/latest
       source ${DALROOT}/env/vars.sh
-      cd daal4py
+      cd sklearnex
       export PYTHON=python
       ./conda-recipe/build.sh
-    displayName: daal4py build
+    displayName: sklearnex build
   - task: PublishPipelineArtifact@1
     inputs:
-      artifactName: '$(platform.type) daal4py build'
-      targetPath: '$(Build.Repository.LocalPath)/daal4py'
-    displayName: 'Upload daal4py build artifacts'
+      artifactName: '$(platform.type) sklearnex build'
+      targetPath: '$(Build.Repository.LocalPath)/sklearnex'
+    displayName: 'Upload sklearnex build artifacts'
     continueOnError: true
-  - script: |
-      . /usr/share/miniconda/etc/profile.d/conda.sh
-      conda activate CB
-      export DALROOT=$(Pipeline.Workspace)/daal/latest
-      cd daal4py
-      python setup_sklearnex.py install --single-version-externally-managed --record=record.txt
-    displayName: sklearnex build
   - script: |
       source /usr/share/miniconda/etc/profile.d/conda.sh
       conda activate CB
       source $(Pipeline.Workspace)/daal/latest/env/vars.sh
-      ./daal4py/conda-recipe/run_test.sh
+      ./sklearnex/conda-recipe/run_test.sh
     timeoutInMinutes: 15
-    displayName: daal4py test
+    displayName: sklearnex test
   - script: |
       source /usr/share/miniconda/etc/profile.d/conda.sh
       conda activate CB
       source $(Pipeline.Workspace)/daal/latest/env/vars.sh
       ret_code=0
-      python -m daal4py daal4py/tests/run_examples.py
+      python -m sklearnex sklearnex/tests/run_examples.py
       ret_code=$(($ret_code + $?))
-      python -m daal4py daal4py/tests/daal4py/sycl/sklearn_sycl.py
+      python -m sklearnex sklearnex/tests/daal4py/sycl/sklearn_sycl.py
       ret_code=$(($ret_code + $?))
       exit $ret_code
-    displayName: daal4py examples
+    displayName: sklearnex examples
   - script: |
       source /usr/share/miniconda/etc/profile.d/conda.sh
       conda activate CB

From 0b26452c3e89022e91652f1937bd02581f146d05 Mon Sep 17 00:00:00 2001
From: msa <111298646+md-shafiul-alam@users.noreply.github.com>
Date: Wed, 31 Jul 2024 16:27:34 -0400
Subject: [PATCH 64/65] qemu version change (#2862)

* qemu version change

* allow multiple versions

* codefactor

* typo

* oops
---
 .ci/env/apt.sh | 27 ++++++++++++++++++++++++---
 1 file changed, 24 insertions(+), 3 deletions(-)

diff --git a/.ci/env/apt.sh b/.ci/env/apt.sh
index 2665ed9a65d..591c87b4297 100755
--- a/.ci/env/apt.sh
+++ b/.ci/env/apt.sh
@@ -63,10 +63,31 @@ function install_qemu_emulation_apt {
 }
 
 function install_qemu_emulation_deb {
-    qemu_deb=qemu-user-static_9.0.1+ds-1~bpo12+1_amd64.deb
+    set +e
+
+    versions=(9.0.2 9.0.1 8.2.4)
+    found_version=""
+    for version in ${versions[@]}; do
+        qemu_deb="qemu-user-static_${version}+ds-1_amd64.deb"
+        echo "Checking for http://ftp.debian.org/debian/pool/main/q/qemu/${qemu_deb}"
+        if wget -q --method=HEAD http://ftp.debian.org/debian/pool/main/q/qemu/${qemu_deb} &> /dev/null;
+        then
+            echo "Found qemu version ${version}"
+            found_version=${qemu_deb}
+            break
+        fi
+    done
+
     set -eo pipefail
-    wget http://ftp.debian.org/debian/pool/main/q/qemu/${qemu_deb}
-    sudo dpkg -i ${qemu_deb}
+    if [[ -z "${found_version}" ]] ; then
+        # If nothing is found, error out and fail
+        echo "None of the requested qemu versions ${versions[*]} are available."
+        false
+    fi
+
+    wget http://ftp.debian.org/debian/pool/main/q/qemu/${found_version}
+    sudo dpkg -i ${found_version}
+
     sudo systemctl restart systemd-binfmt.service
     set +eo pipefail
 }

From d993a44a598de200e503b05de5daae0aca962806 Mon Sep 17 00:00:00 2001
From: Anatoly Volkov <117643568+avolkov-intel@users.noreply.github.com>
Date: Fri, 2 Aug 2024 05:25:20 -0700
Subject: [PATCH 65/65] Add missing ONEDAL_EXPORT for sparse Logistic
 Regression (#2864)

* Add ONEDAL_EXPORT for sparse method

* Minor
---
 cpp/oneapi/dal/algo/logistic_regression/detail/infer_ops.cpp | 3 +++
 cpp/oneapi/dal/algo/logistic_regression/detail/train_ops.cpp | 3 +++
 2 files changed, 6 insertions(+)

diff --git a/cpp/oneapi/dal/algo/logistic_regression/detail/infer_ops.cpp b/cpp/oneapi/dal/algo/logistic_regression/detail/infer_ops.cpp
index 935bd6ab9af..2d462694bbe 100644
--- a/cpp/oneapi/dal/algo/logistic_regression/detail/infer_ops.cpp
+++ b/cpp/oneapi/dal/algo/logistic_regression/detail/infer_ops.cpp
@@ -40,5 +40,8 @@ struct infer_ops_dispatcher<host_policy, Float, Method, Task> {
 INSTANTIATE(float, method::dense_batch, task::classification)
 INSTANTIATE(double, method::dense_batch, task::classification)
 
+INSTANTIATE(float, method::sparse, task::classification)
+INSTANTIATE(double, method::sparse, task::classification)
+
 } // namespace v1
 } // namespace oneapi::dal::logistic_regression::detail
diff --git a/cpp/oneapi/dal/algo/logistic_regression/detail/train_ops.cpp b/cpp/oneapi/dal/algo/logistic_regression/detail/train_ops.cpp
index ab8b385154e..28663fffa3d 100644
--- a/cpp/oneapi/dal/algo/logistic_regression/detail/train_ops.cpp
+++ b/cpp/oneapi/dal/algo/logistic_regression/detail/train_ops.cpp
@@ -65,5 +65,8 @@ struct train_ops_dispatcher<Policy, Float, Method, Task> {
 INSTANTIATE(float, method::dense_batch, task::classification)
 INSTANTIATE(double, method::dense_batch, task::classification)
 
+INSTANTIATE(float, method::sparse, task::classification)
+INSTANTIATE(double, method::sparse, task::classification)
+
 } // namespace v1
 } // namespace oneapi::dal::logistic_regression::detail