Skip to content

Commit

Permalink
Introduce dispatching in oneAPI covariance algorithm (#2527)
Browse files Browse the repository at this point in the history
Changes proposed in this pull request:

- Added computre_parameters class in covariance algorithm. This class encapsulates the dispatching functionality on CPU and GPU.
- Added test for dispatching functionality in covariance algorithm.

Only batch algorithm was modified for now. Online part of the covariance algorithm will be modified further in a separate PR.
  • Loading branch information
Vika-F authored Oct 17, 2023
1 parent f92e8f1 commit 85ad140
Show file tree
Hide file tree
Showing 28 changed files with 610 additions and 49 deletions.
2 changes: 1 addition & 1 deletion cpp/daal/include/algorithms/algorithm_base_mode.h
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ class Algorithm<batch> : public AlgorithmIfaceImpl
{
public:
/** Default constructor */
Algorithm() : _ac(0), _par(0), _in(0), _res(0), _hpar(0) {}
Algorithm() : _ac(0), _hpar(0), _par(0), _in(0), _res(0) {}

virtual ~Algorithm()
{
Expand Down
2 changes: 1 addition & 1 deletion cpp/oneapi/dal/algo/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ load("@onedal//dev/bazel:dal.bzl",
)

PARAMETRIZED_ALGOS = [
"covariance",
"linear_regression",
]

Expand All @@ -14,7 +15,6 @@ ALGOS = [
"chebyshev_distance",
"connected_components",
"cosine_distance",
"covariance",
"dbscan",
"decision_forest",
"decision_tree",
Expand Down
24 changes: 17 additions & 7 deletions cpp/oneapi/dal/algo/covariance/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,27 @@ load("@onedal//dev/bazel:dal.bzl",
)

dal_module(
name = "covariance",
name = "core",
auto = True,
dal_deps = [
"@onedal//cpp/oneapi/dal:core",
"@onedal//cpp/oneapi/dal/backend/primitives:common",
"@onedal//cpp/oneapi/dal/backend/primitives:lapack",
"@onedal//cpp/oneapi/dal/backend/primitives:stat",
"@onedal//cpp/oneapi/dal/backend/primitives:reduction",
],
extra_deps = [
"@onedal//cpp/daal/src/algorithms/covariance:kernel",
)

dal_module(
name = "parameters",
dal_deps = [
"@onedal//cpp/oneapi/dal/algo/covariance/parameters",
],
)

dal_module(
name = "covariance",
dal_deps = [
":core",
":parameters",
"@onedal//cpp/oneapi/dal/algo/covariance/detail",
"@onedal//cpp/oneapi/dal/algo/covariance/backend",
]
)

Expand Down
13 changes: 13 additions & 0 deletions cpp/oneapi/dal/algo/covariance/backend/BUILD
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
package(default_visibility = ["//visibility:public"])
load("@onedal//dev/bazel:dal.bzl",
"dal_module",
"dal_test_suite",
)

dal_module(
name = "backend",
dal_deps = [
"@onedal//cpp/oneapi/dal/algo/covariance/backend/cpu",
"@onedal//cpp/oneapi/dal/algo/covariance/backend/gpu",
],
)
18 changes: 18 additions & 0 deletions cpp/oneapi/dal/algo/covariance/backend/cpu/BUILD
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
package(default_visibility = ["//visibility:public"])
load("@onedal//dev/bazel:dal.bzl",
"dal_module",
"dal_test_suite",
)

dal_module(
name = "cpu",
auto = True,
dal_deps = [
"@onedal//cpp/oneapi/dal/algo/covariance:core",
"@onedal//cpp/oneapi/dal/backend/primitives:common",
],
extra_deps = [
"@onedal//cpp/daal:core",
"@onedal//cpp/daal/src/algorithms/covariance:kernel",
],
)
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ namespace oneapi::dal::covariance::backend {
template <typename Float, typename Method, typename Task>
struct compute_kernel_cpu {
compute_result<Task> operator()(const dal::backend::context_cpu& ctx,
const detail::descriptor_base<Task>& params,
const detail::descriptor_base<Task>& desc,
const detail::compute_parameters<Task>& params,
const compute_input<Task>& input) const;
};

Expand Down
46 changes: 27 additions & 19 deletions cpp/oneapi/dal/algo/covariance/backend/cpu/compute_kernel_dense.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,17 +27,34 @@ namespace oneapi::dal::covariance::backend {

using dal::backend::context_cpu;
using descriptor_t = detail::descriptor_base<task::compute>;
using parameters_t = detail::compute_parameters<task::compute>;

namespace daal_covariance = daal::algorithms::covariance;
namespace interop = dal::backend::interop;

using daal_hyperparameters_t = daal_covariance::internal::Hyperparameter;

template <typename Float, daal::CpuType Cpu>
using daal_covariance_kernel_t = daal_covariance::internal::
CovarianceDenseBatchKernel<Float, daal_covariance::Method::defaultDense, Cpu>;

template <typename Float, typename Task>
static daal_hyperparameters_t convert_parameters(const detail::compute_parameters<Task>& params) {
using daal_covariance::internal::HyperparameterId;

const std::int64_t block = params.get_cpu_macro_block();

daal_hyperparameters_t daal_hyperparameter;
auto status = daal_hyperparameter.set(HyperparameterId::denseUpdateStepBlockSize, block);
interop::status_to_exception(status);

return daal_hyperparameter;
}

template <typename Float, typename Task>
static compute_result<Task> call_daal_kernel(const context_cpu& ctx,
const descriptor_t& desc,
const detail::descriptor_base<Task>& desc,
const detail::compute_parameters<Task>& params,
const table& data) {
bool is_mean_computed = false;

Expand All @@ -46,18 +63,7 @@ static compute_result<Task> call_daal_kernel(const context_cpu& ctx,
daal_covariance::Parameter daal_parameter;
daal_parameter.outputMatrixType = daal_covariance::covarianceMatrix;

daal_covariance::internal::Hyperparameter daal_hyperparameter;
/// the logic of block size calculation is copied from DAAL,
/// to be changed to passing the values from the performance model
std::int64_t blockSize = 140;
if (ctx.get_enabled_cpu_extensions() == dal::detail::cpu_extension::avx512) {
const std::int64_t row_count = data.get_row_count();
if (5000 < row_count && row_count <= 50000) {
blockSize = 1024;
}
}
interop::status_to_exception(
daal_hyperparameter.set(daal_covariance::internal::denseUpdateStepBlockSize, blockSize));
const daal_hyperparameters_t& hp = convert_parameters<Float, Task>(params);

dal::detail::check_mul_overflow(component_count, component_count);

Expand All @@ -79,7 +85,7 @@ static compute_result<Task> call_daal_kernel(const context_cpu& ctx,
daal_cov_matrix.get(),
daal_means.get(),
&daal_parameter,
&daal_hyperparameter));
&hp));
is_mean_computed = true;
result.set_cov_matrix(
homogen_table::wrap(arr_cov_matrix, component_count, component_count));
Expand All @@ -98,7 +104,7 @@ static compute_result<Task> call_daal_kernel(const context_cpu& ctx,
daal_cor_matrix.get(),
daal_means.get(),
&daal_parameter,
&daal_hyperparameter));
&hp));
is_mean_computed = true;
result.set_cor_matrix(
homogen_table::wrap(arr_cor_matrix, component_count, component_count));
Expand All @@ -115,7 +121,7 @@ static compute_result<Task> call_daal_kernel(const context_cpu& ctx,
daal_cov_matrix.get(),
daal_means.get(),
&daal_parameter,
&daal_hyperparameter));
&hp));
}
result.set_means(homogen_table::wrap(arr_means, 1, component_count));
}
Expand All @@ -124,17 +130,19 @@ static compute_result<Task> call_daal_kernel(const context_cpu& ctx,

template <typename Float, typename Task>
static compute_result<Task> compute(const context_cpu& ctx,
const descriptor_t& desc,
const detail::descriptor_base<Task>& desc,
const detail::compute_parameters<Task>& params,
const compute_input<Task>& input) {
return call_daal_kernel<Float, Task>(ctx, desc, input.get_data());
return call_daal_kernel<Float, Task>(ctx, desc, params, input.get_data());
}

template <typename Float>
struct compute_kernel_cpu<Float, method::by_default, task::compute> {
compute_result<task::compute> operator()(const context_cpu& ctx,
const descriptor_t& desc,
const parameters_t& params,
const compute_input<task::compute>& input) const {
return compute<Float, task::compute>(ctx, desc, input);
return compute<Float, task::compute>(ctx, desc, params, input);
}
};

Expand Down
20 changes: 20 additions & 0 deletions cpp/oneapi/dal/algo/covariance/backend/gpu/BUILD
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
package(default_visibility = ["//visibility:public"])
load("@onedal//dev/bazel:dal.bzl",
"dal_module",
"dal_test_suite",
)

dal_module(
name = "gpu",
auto = True,
dal_deps = [
"@onedal//cpp/oneapi/dal/backend/primitives:common",
"@onedal//cpp/oneapi/dal/backend/primitives:lapack",
"@onedal//cpp/oneapi/dal/backend/primitives:stat",
"@onedal//cpp/oneapi/dal/backend/primitives:reduction",
"@onedal//cpp/oneapi/dal/algo/covariance:core",
],
extra_deps = [
"@onedal//cpp/daal/src/algorithms/covariance:kernel",
]
)
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ namespace oneapi::dal::covariance::backend {
template <typename Float, typename Method, typename Task>
struct compute_kernel_gpu {
compute_result<Task> operator()(const dal::backend::context_gpu& ctx,
const detail::descriptor_base<Task>& params,
const detail::descriptor_base<Task>& desc,
const detail::compute_parameters<Task>& params,
const compute_input<Task>& input) const;
};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,20 +34,23 @@ using task_t = task::compute;
using input_t = compute_input<task_t>;
using result_t = compute_result<task_t>;
using descriptor_t = detail::descriptor_base<task_t>;
using parameters_t = detail::compute_parameters<task_t>;

template <typename Float>
static result_t compute(const bk::context_gpu& ctx,
const descriptor_t& desc,
const parameters_t& params,
const input_t& input) {
return compute_kernel_dense_impl<Float>(ctx)(desc, input);
return compute_kernel_dense_impl<Float>(ctx)(desc, params, input);
}

template <typename Float>
struct compute_kernel_gpu<Float, method_t, task_t> {
result_t operator()(const bk::context_gpu& ctx,
const descriptor_t& desc,
const parameters_t& params,
const input_t& input) const {
return compute<Float>(ctx, desc, input);
return compute<Float>(ctx, desc, params, input);
}
};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,13 @@ class compute_kernel_dense_impl {
using input_t = compute_input<task_t>;
using result_t = compute_result<task_t>;
using descriptor_t = detail::descriptor_base<task_t>;
using parameters_t = detail::compute_parameters<task_t>;

public:
compute_kernel_dense_impl(const bk::context_gpu& ctx)
: q_(ctx.get_queue()),
comm_(ctx.get_communicator()) {}
result_t operator()(const descriptor_t& desc, const input_t& input);
result_t operator()(const descriptor_t& desc, const parameters_t& params, const input_t& input);

private:
sycl::queue q_;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ using task_t = task::compute;
using input_t = compute_input<task_t>;
using result_t = compute_result<task_t>;
using descriptor_t = detail::descriptor_base<task_t>;
using parameters_t = detail::compute_parameters<task_t>;

template <typename Float>
auto compute_sums(sycl::queue& q,
Expand Down Expand Up @@ -118,6 +119,7 @@ auto compute_correlation(sycl::queue& q,

template <typename Float>
result_t compute_kernel_dense_impl<Float>::operator()(const descriptor_t& desc,
const parameters_t& params,
const input_t& input) {
ONEDAL_ASSERT(input.get_data().has_data());

Expand Down
41 changes: 38 additions & 3 deletions cpp/oneapi/dal/algo/covariance/compute_types.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,16 +20,18 @@

namespace oneapi::dal::covariance {

namespace detail::v1 {

template <typename Task>
class detail::v1::compute_input_impl : public base {
class compute_input_impl : public base {
public:
compute_input_impl() : data(table()){};
compute_input_impl(const table& data) : data(data){};
table data;
};

template <typename Task>
class detail::v1::compute_result_impl : public base {
class compute_result_impl : public base {
public:
table cov_matrix;
table cor_matrix;
Expand All @@ -38,16 +40,49 @@ class detail::v1::compute_result_impl : public base {
};

template <typename Task>
class detail::v1::partial_compute_result_impl : public base {
class partial_compute_result_impl : public base {
public:
table nobs;
table crossproduct;
table sums;
};

/// Structure that contains all the hyperparameters of the covariance algorithm.
///
/// @tparam Task The variant of the computations.
/// Covariance algorithm supports only :expr:`compute`.
template <typename Task>
struct compute_parameters_impl : public base {
/// To compute the variance-covariance matrix input data set is being split into blocks of rows.
/// This value defines the default number of rows in the block on CPU.
std::int64_t cpu_macro_block = 140l;
};

template <typename Task>
compute_parameters<Task>::compute_parameters() : impl_(new compute_parameters_impl<Task>{}) {}

/// Choose the number of rows in the data block used in variance-covariance matrix computations on CPU.
///
/// @tparam Task The variant of the computations.
/// Covariance algorithm supports only :expr:`compute`.
template <typename Task>
std::int64_t compute_parameters<Task>::get_cpu_macro_block() const {
return impl_->cpu_macro_block;
}

template <typename Task>
void compute_parameters<Task>::set_cpu_macro_block_impl(std::int64_t val) {
impl_->cpu_macro_block = val;
}

template class ONEDAL_EXPORT compute_parameters<task::compute>;

} // namespace detail::v1

using detail::v1::compute_input_impl;
using detail::v1::compute_result_impl;
using detail::v1::partial_compute_result_impl;
using detail::v1::compute_parameters;

namespace v1 {

Expand Down
Loading

0 comments on commit 85ad140

Please sign in to comment.