Skip to content

Commit

Permalink
Implement LogLossFunction class (#2453)
Browse files Browse the repository at this point in the history
* Initial commit

* Add tests

* Add cg_solver primitive to solve equation Ax = b

* Move newton_cg primitve to optimizators primitive

* Define newton_cg optimization function

* Add backtracking algorithm for optimal alpha, implement newton_cg solver

* Fix errors, add tests for newton-cg

* Remove redundant wait_and_throw, add links to sources

* Ensure code stability and fix minor issues

- Add control over the number of iterations in while loops
- Use l2-norm for convergence checks in cg-solver
- Move QuadraticFunction to primitives section

* Add sycl::fill, sycl::fabs and add specifiers for virtual functions

* Remove redundant package dependency, update default values for Float parameters

* Change update_x return type to event_vector, rename test function and minor fix

* Initial commit

* Split logloss and derivative functions, decrease the number of parameters

* Delete redundant compute functions, deselect tests

* Add LogLossFunction class and cover it with tests

* Fix bugs, rename kernels and remove redundant, update perforamnce tests

* Add wait and throw after gemv events

* Minor

* Fix error and add batch test

* Add const qualifier for table with data

* Minor
  • Loading branch information
avolkov-intel authored Oct 4, 2023
1 parent d42294c commit 64bb0fb
Show file tree
Hide file tree
Showing 10 changed files with 601 additions and 491 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -118,28 +118,25 @@ void add_regularization(sycl::queue& q_,
template <typename Float>
sycl::event value_and_gradient_iter(sycl::queue& q_,
std::int64_t p,
const pr::ndarray<Float, 1>& params_nd,
const pr::ndarray<Float, 2>& data_nd,
const pr::ndarray<std::int32_t, 1>& responses_nd,
const pr::ndarray<Float, 1>& probabilities,
pr::ndarray<Float, 1>& out,
pr::ndarray<Float, 1>& ans,
const pr::ndview<Float, 2>& data_nd,
const pr::ndview<std::int32_t, 1>& responses_nd,
const pr::ndview<Float, 1>& probabilities,
pr::ndview<Float, 1>& out,
pr::ndview<Float, 1>& ans,
bool fit_intercept,
sycl::event& prev_iter) {
auto fill_event = fill(q_, out, Float(0), {});

auto out_loss = out.slice(0, 1);
auto out_gradient = out.slice(1, p + 1);
auto out_loss = out.get_slice(0, 1);
auto out_gradient = out.get_slice(1, p + 2);
auto out_gradient_suf = fit_intercept ? out_gradient : out_gradient.get_slice(1, p + 1);

auto loss_event = compute_logloss_with_der(q_,
params_nd,
data_nd,
responses_nd,
probabilities,
out_loss,
out_gradient,
Float(0),
Float(0),
out_gradient_suf,
fit_intercept,
{ fill_event });

Expand All @@ -156,26 +153,15 @@ sycl::event value_and_gradient_iter(sycl::queue& q_,

template <typename Float>
sycl::event value_iter(sycl::queue& q_,
std::int64_t p,
const pr::ndarray<Float, 1>& params_nd,
const pr::ndarray<Float, 2>& data_nd,
const pr::ndarray<std::int32_t, 1>& responses_nd,
const pr::ndarray<Float, 1>& probabilities,
pr::ndarray<Float, 1>& out_loss,
pr::ndarray<Float, 1>& ans_loss,
const pr::ndview<std::int32_t, 1>& responses_nd,
const pr::ndview<Float, 1>& probabilities,
pr::ndview<Float, 1>& out_loss,
pr::ndview<Float, 1>& ans_loss,
bool fit_intercept,
sycl::event& prev_iter) {
auto fill_event = fill(q_, out_loss, Float(0), {});
auto loss_event = compute_logloss(q_,
params_nd,
data_nd,
responses_nd,
probabilities,
out_loss,
Float(0),
Float(0),
fit_intercept,
{ fill_event });
auto loss_event =
compute_logloss(q_, responses_nd, probabilities, out_loss, fit_intercept, { fill_event });
const auto* const out_ptr = out_loss.get_data();
auto* const ans_loss_ptr = ans_loss.get_mutable_data();
return q_.submit([&](sycl::handler& cgh) {
Expand All @@ -189,7 +175,6 @@ sycl::event value_iter(sycl::queue& q_,
template <typename Float>
sycl::event gradient_iter(sycl::queue& q_,
std::int64_t p,
const pr::ndarray<Float, 1>& params_nd,
const pr::ndarray<Float, 2>& data_nd,
const pr::ndarray<std::int32_t, 1>& responses_nd,
const pr::ndarray<Float, 1>& probabilities,
Expand All @@ -198,14 +183,12 @@ sycl::event gradient_iter(sycl::queue& q_,
bool fit_intercept,
sycl::event& prev_iter) {
auto fill_event = fill(q_, out_gradient, Float(0), {});
auto out_grad_suf = fit_intercept ? out_gradient : out_gradient.get_slice(1, p + 1);
auto grad_event = compute_derivative(q_,
params_nd,
data_nd,
responses_nd,
probabilities,
out_gradient,
Float(0),
Float(0),
out_grad_suf,
fit_intercept,
{ fill_event });
grad_event.wait_and_throw();
Expand All @@ -225,7 +208,6 @@ sycl::event gradient_iter(sycl::queue& q_,
template <typename Float>
sycl::event hessian_iter(sycl::queue& q_,
std::int64_t p,
const pr::ndarray<Float, 1>& params_nd,
const pr::ndarray<Float, 2>& data_nd,
const pr::ndarray<std::int32_t, 1>& responses_nd,
const pr::ndarray<Float, 1>& probabilities,
Expand All @@ -235,7 +217,6 @@ sycl::event hessian_iter(sycl::queue& q_,
sycl::event& prev_iter) {
auto fill_event = fill(q_, out_hessian, Float(0), {});
auto hess_event = compute_hessian(q_,
params_nd,
data_nd,
responses_nd,
probabilities,
Expand Down Expand Up @@ -282,6 +263,7 @@ result_t compute_kernel_dense_batch_impl<Float>::operator()(
const bk::uniform_blocking blocking(n, bsz);

const auto params_nd = pr::table2ndarray_1d<Float>(q_, params, alloc::device);
const auto params_nd_suf = fit_intercept ? params_nd : params_nd.slice(1, p);
const auto* const params_ptr = params_nd.get_data();

const auto responses_nd_big = pr::table2ndarray_1d<std::int32_t>(q_, responses, alloc::device);
Expand Down Expand Up @@ -326,14 +308,13 @@ result_t compute_kernel_dense_batch_impl<Float>::operator()(
const auto responses_nd = responses_nd_big.slice(first, cursize);

sycl::event prob_e =
compute_probabilities(q_, params_nd, data_nd, probabilities, fit_intercept, {});
compute_probabilities(q_, params_nd_suf, data_nd, probabilities, fit_intercept, {});
prob_e.wait_and_throw();

if (desc.get_result_options().test(result_options::value) &&
desc.get_result_options().test(result_options::gradient)) {
prev_logloss_e = value_and_gradient_iter(q_,
p,
params_nd,
data_nd,
responses_nd,
probabilities,
Expand All @@ -345,9 +326,6 @@ result_t compute_kernel_dense_batch_impl<Float>::operator()(
else {
if (desc.get_result_options().test(result_options::value)) {
prev_logloss_e = value_iter(q_,
p,
params_nd,
data_nd,
responses_nd,
probabilities,
out_loss,
Expand All @@ -358,7 +336,6 @@ result_t compute_kernel_dense_batch_impl<Float>::operator()(
if (desc.get_result_options().test(result_options::gradient)) {
prev_grad_e = gradient_iter(q_,
p,
params_nd,
data_nd,
responses_nd,
probabilities,
Expand All @@ -371,7 +348,6 @@ result_t compute_kernel_dense_batch_impl<Float>::operator()(
if (desc.get_result_options().test(result_options::hessian)) {
prev_hess_e = hessian_iter(q_,
p,
params_nd,
data_nd,
responses_nd,
probabilities,
Expand Down
1 change: 1 addition & 0 deletions cpp/oneapi/dal/backend/primitives/objective_function/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ dal_module(
dal_deps = [
"@onedal//cpp/oneapi/dal/backend/primitives:common",
"@onedal//cpp/oneapi/dal/backend/primitives:blas",
"@onedal//cpp/oneapi/dal/backend/primitives/optimizers",
],
)

Expand Down
114 changes: 78 additions & 36 deletions cpp/oneapi/dal/backend/primitives/objective_function/logloss.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,69 +16,78 @@

#pragma once

#include "oneapi/dal/backend/primitives/utils.hpp"
#include "oneapi/dal/backend/primitives/ndarray.hpp"
#include "oneapi/dal/backend/primitives/optimizers/common.hpp"
#include "oneapi/dal/table/common.hpp"

namespace oneapi::dal::backend::primitives {

template <typename Float>
sycl::event compute_probabilities(sycl::queue& q,
const ndview<Float, 1>& parameters,
const ndview<Float, 2>& data,
ndview<Float, 1>& predictions,
ndview<Float, 1>& probabilities,
bool fit_intercept = true,
const event_vector& deps = {});

template <typename Float>
sycl::event compute_logloss(sycl::queue& q,
const ndview<Float, 1>& parameters,
const ndview<Float, 2>& data,
const ndview<std::int32_t, 1>& labels,
ndview<Float, 1>& out,
Float L1 = Float(0),
Float L2 = Float(0),
bool fit_intercept = true,
const event_vector& deps = {});

template <typename Float>
sycl::event compute_logloss(sycl::queue& q,
const ndview<Float, 1>& parameters,
const ndview<Float, 2>& data,
const ndview<std::int32_t, 1>& labels,
const ndview<Float, 1>& probabilities,
ndview<Float, 1>& out,
Float L1 = Float(0),
Float L2 = Float(0),
bool fit_intercept = true,
const event_vector& deps = {});

template <typename Float>
sycl::event compute_logloss_with_der(sycl::queue& q,
const ndview<Float, 1>& parameters,
const ndview<Float, 2>& data,
const ndview<std::int32_t, 1>& labels,
const ndview<Float, 1>& probabilities,
ndview<Float, 1>& out,
ndview<Float, 1>& out_derivative,
Float L1 = Float(0),
Float L2 = Float(0),
bool fit_intercept = true,
const event_vector& deps = {});

template <typename Float>
sycl::event compute_derivative(sycl::queue& q,
const ndview<Float, 1>& parameters,
const ndview<Float, 2>& data,
const ndview<std::int32_t, 1>& labels,
const ndview<Float, 1>& probabilities,
ndview<Float, 1>& out_derivative,
Float L1 = Float(0),
Float L2 = Float(0),
bool fit_intercept = true,
const event_vector& deps = {});

template <typename Float>
sycl::event add_regularization_loss(sycl::queue& q,
const ndview<Float, 1>& parameters,
ndview<Float, 1>& out,
Float L1 = Float(0),
Float L2 = Float(0),
bool fit_intercept = true,
const event_vector& deps = {});

template <typename Float>
sycl::event add_regularization_gradient_loss(sycl::queue& q,
const ndview<Float, 1>& parameters,
ndview<Float, 1>& out,
ndview<Float, 1>& out_derivative,
Float L1 = Float(0),
Float L2 = Float(0),
bool fit_intercept = true,
const event_vector& deps = {});

template <typename Float>
sycl::event add_regularization_gradient(sycl::queue& q,
const ndview<Float, 1>& parameters,
ndview<Float, 1>& out_derivative,
Float L1 = Float(0),
Float L2 = Float(0),
bool fit_intercept = true,
const event_vector& deps = {});

template <typename Float>
sycl::event compute_hessian(sycl::queue& q,
const ndview<Float, 1>& parameters,
const ndview<Float, 2>& data,
const ndview<std::int32_t, 1>& labels,
const ndview<Float, 1>& probabilities,
Expand All @@ -95,19 +104,17 @@ sycl::event compute_raw_hessian(sycl::queue& q,
const event_vector& deps = {});

template <typename Float>
class logloss_hessian_product {
class LogLossHessianProduct : public BaseMatrixOperator<Float> {
public:
logloss_hessian_product(sycl::queue& q,
const ndview<Float, 2>& data,
const Float L2 = Float(0),
const bool fit_intercept = true);

sycl::event set_raw_hessian(const ndview<Float, 1>& raw_hessian, const event_vector& deps = {});

ndview<Float, 1>& get_raw_hessian();
LogLossHessianProduct(sycl::queue& q,
const table& data,
Float L2 = Float(0),
bool fit_intercept = true,
std::int64_t bsz = -1);
sycl::event operator()(const ndview<Float, 1>& vec,
ndview<Float, 1>& out,
const event_vector& deps = {});
const event_vector& deps) final;
ndview<Float, 1>& get_raw_hessian();

private:
sycl::event compute_with_fit_intercept(const ndview<Float, 1>& vec,
Expand All @@ -118,13 +125,48 @@ class logloss_hessian_product {
const event_vector& deps);

sycl::queue q_;
const table data_;
Float L2_;
bool fit_intercept_;
ndarray<Float, 1> raw_hessian_;
const ndview<Float, 2> data_;
ndarray<Float, 1> buffer_;
const Float L2_;
const bool fit_intercept_;
const std::int64_t n_;
const std::int64_t p_;
const std::int64_t bsz_;
};

template <typename Float>
class LogLossFunction : public BaseFunction<Float> {
public:
LogLossFunction(sycl::queue queue,
const table& data,
ndview<std::int32_t, 1>& labels,
Float L2 = 0.0,
bool fit_intercept = true,
std::int64_t bsz = -1);
Float get_value() final;
ndview<Float, 1>& get_gradient() final;
BaseMatrixOperator<Float>& get_hessian_product() final;

event_vector update_x(const ndview<Float, 1>& x,
bool need_hessp = false,
const event_vector& deps = {}) final;

private:
sycl::queue q_;
const table data_;
ndview<std::int32_t, 1> labels_;
const std::int64_t n_;
const std::int64_t p_;
Float L2_;
bool fit_intercept_;
const std::int64_t bsz_;
ndarray<Float, 1> probabilities_;
ndarray<Float, 1> gradient_;
ndarray<Float, 1> buffer_;
LogLossHessianProduct<Float> hessp_;
const std::int64_t dimension_;
Float value_;
};

} // namespace oneapi::dal::backend::primitives
Loading

0 comments on commit 64bb0fb

Please sign in to comment.