Skip to content

Commit

Permalink
feature: online pca algorithm (#2550)
Browse files Browse the repository at this point in the history
  • Loading branch information
Alexandr-Solovev authored Oct 31, 2023
1 parent ce14434 commit 7a304ec
Show file tree
Hide file tree
Showing 42 changed files with 2,388 additions and 17 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ template class OnlineContainer<DAAL_FPTYPE, correlationDense, DAAL_CPU>;
}
namespace internal
{
template class PCACorrelationKernel<online, DAAL_FPTYPE, DAAL_CPU>;
template class DAAL_EXPORT PCACorrelationKernel<online, DAAL_FPTYPE, DAAL_CPU>;
}
} // namespace pca
} // namespace algorithms
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ class PCACorrelationKernel<online, algorithmFPType, cpu> : public PCACorrelation
public:
explicit PCACorrelationKernel() {};

using PCACorrelationBase<algorithmFPType, cpu>::computeCorrelationEigenvalues;

services::Status compute(const data_management::NumericTablePtr & pData, PartialResult<correlationDense> * partialResult,
const OnlineParameter<algorithmFPType, correlationDense> * parameter);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ template class OnlineContainer<DAAL_FPTYPE, svdDense, DAAL_CPU>;
}
namespace internal
{
template class PCASVDOnlineKernel<DAAL_FPTYPE, DAAL_CPU>;
template class DAAL_EXPORT PCASVDOnlineKernel<DAAL_FPTYPE, DAAL_CPU>;
}
} // namespace pca
} // namespace algorithms
Expand Down
2 changes: 2 additions & 0 deletions cpp/oneapi/dal.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@
#include "oneapi/dal/train.hpp"
#include "oneapi/dal/partial_compute.hpp"
#include "oneapi/dal/finalize_compute.hpp"
#include "oneapi/dal/partial_train.hpp"
#include "oneapi/dal/finalize_train.hpp"

/* Tables */
#include "oneapi/dal/table/common.hpp"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,20 @@ static compute_result<Task> call_daal_kernel_finalize(const context_cpu& ctx,
const auto daal_nobs_matrix = interop::convert_to_daal_table<Float>(input.get_partial_n_rows());

auto result = compute_result<Task>{}.set_result_options(desc.get_result_options());

auto rows_count_global =
row_accessor<const Float>(input.get_partial_n_rows()).pull({ 0, -1 })[0];
daal_covariance::internal::Hyperparameter daal_hyperparameter;
/// the logic of block size calculation is copied from DAAL,
/// to be changed to passing the values from the performance model
std::int64_t blockSize = 140;
if (ctx.get_enabled_cpu_extensions() == dal::detail::cpu_extension::avx512) {
const std::int64_t row_count = rows_count_global;
if (5000 < row_count && row_count <= 50000) {
blockSize = 1024;
}
}
interop::status_to_exception(
daal_hyperparameter.set(daal_covariance::internal::denseUpdateStepBlockSize, blockSize));
if (desc.get_result_options().test(result_options::cov_matrix)) {
daal_parameter.outputMatrixType = daal_covariance::covarianceMatrix;
auto arr_cov_matrix = array<Float>::empty(component_count * component_count);
Expand All @@ -72,7 +85,8 @@ static compute_result<Task> call_daal_kernel_finalize(const context_cpu& ctx,
daal_sums.get(),
daal_cov_matrix.get(),
daal_means.get(),
&daal_parameter));
&daal_parameter,
&daal_hyperparameter));

result.set_cov_matrix(
homogen_table::wrap(arr_cov_matrix, component_count, component_count));
Expand All @@ -93,7 +107,8 @@ static compute_result<Task> call_daal_kernel_finalize(const context_cpu& ctx,
daal_sums.get(),
daal_cor_matrix.get(),
daal_means.get(),
&daal_parameter));
&daal_parameter,
&daal_hyperparameter));
is_mean_computed = true;
result.set_cor_matrix(
homogen_table::wrap(arr_cor_matrix, component_count, component_count));
Expand All @@ -112,7 +127,8 @@ static compute_result<Task> call_daal_kernel_finalize(const context_cpu& ctx,
daal_sums.get(),
daal_cov_matrix.get(),
daal_means.get(),
&daal_parameter));
&daal_parameter,
&daal_hyperparameter));
}
result.set_means(homogen_table::wrap(arr_means, 1, component_count));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,18 @@ static partial_compute_result<Task> call_daal_kernel_partial_compute(

auto data = input.get_data();
const auto daal_data = interop::convert_to_daal_table<Float>(data);

daal_covariance::internal::Hyperparameter daal_hyperparameter;
/// the logic of block size calculation is copied from DAAL,
/// to be changed to passing the values from the performance model
std::int64_t blockSize = 140;
if (ctx.get_enabled_cpu_extensions() == dal::detail::cpu_extension::avx512) {
const std::int64_t row_count = data.get_row_count();
if (5000 < row_count && row_count <= 50000) {
blockSize = 1024;
}
}
interop::status_to_exception(
daal_hyperparameter.set(daal_covariance::internal::denseUpdateStepBlockSize, blockSize));
auto result = partial_compute_result();

const bool has_nobs_data = input_.get_partial_n_rows().has_data();
Expand All @@ -66,7 +77,8 @@ static partial_compute_result<Task> call_daal_kernel_partial_compute(
daal_nobs_matrix.get(),
daal_crossproduct.get(),
daal_sums.get(),
&daal_parameter));
&daal_parameter,
&daal_hyperparameter));
result.set_partial_sum(interop::convert_from_daal_homogen_table<Float>(daal_sums));
result.set_partial_n_rows(
interop::convert_from_daal_homogen_table<Float>(daal_nobs_matrix));
Expand All @@ -90,7 +102,8 @@ static partial_compute_result<Task> call_daal_kernel_partial_compute(
daal_nobs_matrix.get(),
daal_crossproduct.get(),
daal_sums.get(),
&daal_parameter));
&daal_parameter,
&daal_hyperparameter));

result.set_partial_sum(interop::convert_from_daal_homogen_table<Float>(daal_sums));
result.set_partial_n_rows(
Expand Down
5 changes: 3 additions & 2 deletions cpp/oneapi/dal/algo/covariance/test/fixture.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -134,9 +134,10 @@ class covariance_test : public te::crtp_algo_fixture<TestType, Derived> {
check_compute_result(data, compute_result);
}

void online_general_checks(const te::dataframe& input, const te::table_id& input_table_id) {
void online_general_checks(const te::dataframe& input,
const te::table_id& input_table_id,
const std::int64_t nBlocks) {
const table data = input.get_table(this->get_policy(), input_table_id);
const std::int64_t nBlocks = 10;
INFO("create descriptor cov cor means")
auto cov_desc =
covariance::descriptor<Float, Method, covariance::task::compute>().set_result_options(
Expand Down
12 changes: 6 additions & 6 deletions cpp/oneapi/dal/algo/covariance/test/online.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ TEMPLATE_LIST_TEST_M(covariance_online_test,
"[covariance][integration][online]",
covariance_types) {
SKIP_IF(this->not_float64_friendly());

const int64_t nBlocks = GENERATE(1, 3, 10);
const te::dataframe input =
GENERATE_DATAFRAME(te::dataframe_builder{ 100, 100 }.fill_normal(0, 1, 7777),
te::dataframe_builder{ 100, 100 }.fill_normal(0, 1, 7777),
Expand All @@ -40,15 +40,15 @@ TEMPLATE_LIST_TEST_M(covariance_online_test,

// Homogen floating point type is the same as algorithm's floating point type
const auto input_data_table_id = this->get_homogen_table_id();
this->online_general_checks(input, input_data_table_id);
this->online_general_checks(input, input_data_table_id, nBlocks);
}

TEMPLATE_LIST_TEST_M(covariance_online_test,
"covariance fill_uniform common flow",
"[covariance][integration][online]",
covariance_types) {
SKIP_IF(this->not_float64_friendly());

const int64_t nBlocks = GENERATE(1, 3, 10);
const te::dataframe input =
GENERATE_DATAFRAME(te::dataframe_builder{ 1000, 20 }.fill_uniform(-30, 30, 7777),
te::dataframe_builder{ 100, 10 }.fill_uniform(0, 1, 7777),
Expand All @@ -58,23 +58,23 @@ TEMPLATE_LIST_TEST_M(covariance_online_test,

// Homogen floating point type is the same as algorithm's floating point type
const auto input_data_table_id = this->get_homogen_table_id();
this->online_general_checks(input, input_data_table_id);
this->online_general_checks(input, input_data_table_id, nBlocks);
}

TEMPLATE_LIST_TEST_M(covariance_online_test,
"covariance fill_uniform nightly common flow",
"[covariance][integration][online][nightly]",
covariance_types) {
SKIP_IF(this->not_float64_friendly());

const int64_t nBlocks = GENERATE(1, 3, 10);
const te::dataframe input =
GENERATE_DATAFRAME(te::dataframe_builder{ 5000, 20 }.fill_uniform(-30, 30, 7777),
te::dataframe_builder{ 10000, 200 }.fill_uniform(-30, 30, 7777),
te::dataframe_builder{ 1000000, 20 }.fill_uniform(-0.5, 0.5, 7777));

// Homogen floating point type is the same as algorithm's floating point type
const auto input_data_table_id = this->get_homogen_table_id();
this->online_general_checks(input, input_data_table_id);
this->online_general_checks(input, input_data_table_id, nBlocks);
}

} // namespace oneapi::dal::covariance::test
2 changes: 2 additions & 0 deletions cpp/oneapi/dal/algo/pca.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,5 @@

#include "oneapi/dal/algo/pca/train.hpp"
#include "oneapi/dal/algo/pca/infer.hpp"
#include "oneapi/dal/algo/pca/partial_train.hpp"
#include "oneapi/dal/algo/pca/finalize_train.hpp"
31 changes: 31 additions & 0 deletions cpp/oneapi/dal/algo/pca/backend/cpu/finalize_train_kernel.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
/*******************************************************************************
* Copyright 2023 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/

#pragma once

#include "oneapi/dal/algo/pca/train_types.hpp"
#include "oneapi/dal/backend/dispatcher.hpp"

namespace oneapi::dal::pca::backend {

template <typename Float, typename Method, typename Task>
struct finalize_train_kernel_cpu {
train_result<Task> operator()(const dal::backend::context_cpu& ctx,
const detail::descriptor_base<Task>& params,
const partial_train_result<Task>& input) const;
};

} // namespace oneapi::dal::pca::backend
150 changes: 150 additions & 0 deletions cpp/oneapi/dal/algo/pca/backend/cpu/finalize_train_kernel_cov.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
/*******************************************************************************
* Copyright 2023 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/

#include <daal/src/algorithms/pca/pca_dense_correlation_online_kernel.h>
#include <daal/src/algorithms/covariance/covariance_hyperparameter_impl.h>
#include "daal/src/algorithms/covariance/covariance_kernel.h"

#include "oneapi/dal/algo/pca/backend/common.hpp"
#include "oneapi/dal/algo/pca/backend/cpu/finalize_train_kernel.hpp"
#include "oneapi/dal/backend/interop/common.hpp"

#include "oneapi/dal/backend/interop/error_converter.hpp"
#include "oneapi/dal/backend/interop/table_conversion.hpp"
#include "oneapi/dal/table/row_accessor.hpp"

namespace oneapi::dal::pca::backend {

using dal::backend::context_cpu;
using descriptor_t = detail::descriptor_base<task::dim_reduction>;
using model_t = model<task::dim_reduction>;

namespace interop = dal::backend::interop;

namespace daal_pca = daal::algorithms::pca;
namespace daal_cov = daal::algorithms::covariance;
namespace interop = dal::backend::interop;

template <typename Float, daal::CpuType Cpu>
using daal_pca_cor_kernel_t = daal_pca::internal::PCACorrelationKernel<daal::online, Float, Cpu>;

template <typename Float, daal::CpuType Cpu>
using daal_cov_kernel_t =
daal_cov::internal::CovarianceDenseOnlineKernel<Float, daal_cov::Method::defaultDense, Cpu>;

template <typename Float, typename Task>
static train_result<Task> call_daal_kernel_finalize_train(const context_cpu& ctx,
const descriptor_t& desc,
const partial_train_result<Task>& input) {
const std::int64_t component_count =
get_component_count(desc, input.get_partial_crossproduct());
const std::int64_t column_count = input.get_partial_crossproduct().get_column_count();

auto result = train_result<task::dim_reduction>{}.set_result_options(desc.get_result_options());

auto arr_eigvec = array<Float>::empty(column_count * component_count);
auto arr_eigval = array<Float>::empty(1 * component_count);

const auto daal_eigenvectors =
interop::convert_to_daal_homogen_table(arr_eigvec, component_count, column_count);
const auto daal_eigenvalues =
interop::convert_to_daal_homogen_table(arr_eigval, 1, component_count);

auto rows_count_global =
row_accessor<const Float>(input.get_partial_n_rows()).pull({ 0, -1 })[0];
auto arr_means = array<Float>::empty(column_count);
const auto daal_means = interop::convert_to_daal_homogen_table(arr_means, 1, column_count);
daal_cov::internal::Hyperparameter daal_hyperparameter;
/// the logic of block size calculation is copied from DAAL,
/// to be changed to passing the values from the performance model
std::int64_t blockSize = 140;
if (ctx.get_enabled_cpu_extensions() == dal::detail::cpu_extension::avx512) {
const std::int64_t row_count = rows_count_global;
if (5000 < row_count && row_count <= 50000) {
blockSize = 1024;
}
}
interop::status_to_exception(
daal_hyperparameter.set(daal_cov::internal::denseUpdateStepBlockSize, blockSize));
auto daal_crossproduct =
interop::convert_to_daal_table<Float>(input.get_partial_crossproduct());
auto daal_sums = interop::convert_to_daal_table<Float>(input.get_partial_sum());
const auto daal_nobs = interop::convert_to_daal_table<Float>(input.get_partial_n_rows());

auto arr_cor_matrix = array<Float>::empty(column_count * column_count);
const auto daal_cor_matrix =
interop::convert_to_daal_homogen_table(arr_cor_matrix, column_count, column_count);
daal_cov::Parameter daal_parameter;
daal_parameter.outputMatrixType = daal_cov::correlationMatrix;

interop::status_to_exception(
interop::call_daal_kernel_finalize_compute<Float, daal_cov_kernel_t>(
ctx,
daal_nobs.get(),
daal_crossproduct.get(),
daal_sums.get(),
daal_cor_matrix.get(),
daal_means.get(),
&daal_parameter,
&daal_hyperparameter));

const auto data_to_compute = daal_cor_matrix;
{
const auto status = dal::backend::dispatch_by_cpu(ctx, [&](auto cpu) {
constexpr auto cpu_type = interop::to_daal_cpu_type<decltype(cpu)>::value;
return daal_pca_cor_kernel_t<Float, cpu_type>().computeCorrelationEigenvalues(
*data_to_compute,
*daal_eigenvectors,
*daal_eigenvalues);
});

interop::status_to_exception(status);
}

if (desc.get_result_options().test(result_options::eigenvectors)) {
const auto mdl = model_t{}.set_eigenvectors(
homogen_table::wrap(arr_eigvec, component_count, column_count));
result.set_model(mdl);
}

if (desc.get_result_options().test(result_options::eigenvalues)) {
result.set_eigenvalues(homogen_table::wrap(arr_eigval, 1, component_count));
}

return result;
}

template <typename Float, typename Task>
static train_result<Task> finalize_train(const context_cpu& ctx,
const descriptor_t& desc,
const partial_train_result<Task>& input) {
return call_daal_kernel_finalize_train<Float>(ctx, desc, input);
}

template <typename Float>
struct finalize_train_kernel_cpu<Float, method::cov, task::dim_reduction> {
train_result<task::dim_reduction> operator()(
const context_cpu& ctx,
const descriptor_t& desc,
const partial_train_result<task::dim_reduction>& input) const {
return finalize_train<Float, task::dim_reduction>(ctx, desc, input);
}
};

template struct finalize_train_kernel_cpu<float, method::cov, task::dim_reduction>;
template struct finalize_train_kernel_cpu<double, method::cov, task::dim_reduction>;

} // namespace oneapi::dal::pca::backend
Loading

0 comments on commit 7a304ec

Please sign in to comment.