diff --git a/cpp/oneapi/dal/algo/pca/backend/cpu/infer_kernel.cpp b/cpp/oneapi/dal/algo/pca/backend/cpu/infer_kernel.cpp index 7c495ef41c2..42a9727b1a7 100644 --- a/cpp/oneapi/dal/algo/pca/backend/cpu/infer_kernel.cpp +++ b/cpp/oneapi/dal/algo/pca/backend/cpu/infer_kernel.cpp @@ -51,6 +51,9 @@ static result_t call_daal_kernel(const context_cpu& ctx, const auto daal_data = interop::convert_to_daal_table(data); const auto daal_eigenvectors = interop::convert_to_daal_table(model.get_eigenvectors()); + const auto daal_means = interop::convert_to_daal_table(model.get_means()); + const auto daal_eigenvalues = interop::convert_to_daal_table(model.get_eigenvalues()); + const auto daal_result = interop::convert_to_daal_homogen_table(arr_result, row_count, component_count); @@ -58,11 +61,10 @@ static result_t call_daal_kernel(const context_cpu& ctx, interop::call_daal_kernel(ctx, *daal_data, *daal_eigenvectors, + daal_means.get(), nullptr, - nullptr, - nullptr, + daal_eigenvalues.get(), *daal_result)); - return result_t{}.set_transformed_data( dal::detail::homogen_table_builder{}.reset(arr_result, row_count, component_count).build()); } diff --git a/cpp/oneapi/dal/algo/pca/backend/gpu/infer_kernel_dpc.cpp b/cpp/oneapi/dal/algo/pca/backend/gpu/infer_kernel_dpc.cpp index 2047db7779e..fbc1227a514 100644 --- a/cpp/oneapi/dal/algo/pca/backend/gpu/infer_kernel_dpc.cpp +++ b/cpp/oneapi/dal/algo/pca/backend/gpu/infer_kernel_dpc.cpp @@ -39,25 +39,75 @@ static result_t infer(const context_gpu& ctx, const descriptor_t& desc, const in const auto data = input.get_data(); auto model = input.get_model(); auto eigenvectors = model.get_eigenvectors(); + auto means = model.get_means(); + auto eigenvalues = model.get_eigenvalues(); + const std::int64_t row_count = data.get_row_count(); + ONEDAL_ASSERT(row_count > 0); + const std::int64_t col_count = data.get_column_count(); + ONEDAL_ASSERT(col_count > 0); const std::int64_t component_count = get_component_count(desc, data); + ONEDAL_ASSERT(component_count > 0); dal::detail::check_mul_overflow(row_count, component_count); const auto data_nd = pr::table2ndarray(queue, data, sycl::usm::alloc::device); + auto means_nd = pr::table2ndarray_1d(queue, means, sycl::usm::alloc::device); + auto mean_centered_data_nd = + pr::ndarray::empty(queue, { row_count, col_count }, sycl::usm::alloc::device); const auto eigenvectors_nd = pr::table2ndarray(queue, eigenvectors, sycl::usm::alloc::device); + sycl::event mean_center_event; + { + ONEDAL_PROFILER_TASK(elementwise_difference, queue); + mean_center_event = + pr::elementwise_difference(queue, row_count, data_nd, means_nd, mean_centered_data_nd); + } + auto res_nd = pr::ndarray::empty(queue, { row_count, component_count }, sycl::usm::alloc::device); sycl::event gemm_event; { ONEDAL_PROFILER_TASK(gemm, queue); - gemm_event = pr::gemm(queue, data_nd, eigenvectors_nd.t(), res_nd, Float(1.0), Float(0.0)); + gemm_event = pr::gemm(queue, + mean_centered_data_nd, + eigenvectors_nd.t(), + res_nd, + Float(1.0), + Float(0.0), + { mean_center_event }); + gemm_event.wait_and_throw(); } - const auto res_array = res_nd.flatten(queue, { gemm_event }); - auto res_table = homogen_table::wrap(res_array, row_count, component_count); + if (eigenvalues.has_data()) { + auto eigenvalues_nd = + pr::table2ndarray_1d(queue, eigenvalues, sycl::usm::alloc::device); + auto sqrt_eigenvalues_nd = + pr::ndarray::empty(queue, { component_count }, sycl::usm::alloc::device); + pr::elementwise_sqrt(queue, eigenvalues_nd, sqrt_eigenvalues_nd); + + auto result_whitened_nd = pr::ndarray::empty(queue, + { row_count, component_count }, + sycl::usm::alloc::device); + sycl::event whiten_event; + { + ONEDAL_PROFILER_TASK(elementwise_division, queue); + whiten_event = pr::elementwise_division(queue, + row_count, + res_nd, + sqrt_eigenvalues_nd, + result_whitened_nd); + } + + const auto res_array_whitened = result_whitened_nd.flatten(queue, { whiten_event }); + const auto res_table = homogen_table::wrap(res_array_whitened, row_count, component_count); + + return result_t{}.set_transformed_data(res_table); + } + + const auto res_array = res_nd.flatten(queue, { gemm_event }); + const auto res_table = homogen_table::wrap(res_array, row_count, component_count); return result_t{}.set_transformed_data(res_table); } diff --git a/cpp/oneapi/dal/algo/pca/common.cpp b/cpp/oneapi/dal/algo/pca/common.cpp index f07003463ca..590f63d907a 100644 --- a/cpp/oneapi/dal/algo/pca/common.cpp +++ b/cpp/oneapi/dal/algo/pca/common.cpp @@ -61,13 +61,22 @@ template class model_impl : public ONEDAL_SERIALIZABLE(pca_dim_reduction_model_impl_id) { public: table eigenvectors; + table pMeans; + table pVariances; + table eigenvalues; void serialize(dal::detail::output_archive& ar) const override { ar(eigenvectors); + ar(pMeans); + ar(pVariances); + ar(eigenvalues); } void deserialize(dal::detail::input_archive& ar) override { ar(eigenvectors); + ar(pMeans); + ar(pVariances); + ar(eigenvalues); } }; @@ -133,6 +142,36 @@ void model::set_eigenvectors_impl(const table& value) { impl_->eigenvectors = value; } +template +const table& model::get_means() const { + return impl_->pMeans; +} + +template +void model::set_means_impl(const table& value) { + impl_->pMeans = value; +} + +template +const table& model::get_variances() const { + return impl_->pVariances; +} + +template +void model::set_variances_impl(const table& value) { + impl_->pVariances = value; +} + +template +const table& model::get_eigenvalues() const { + return impl_->eigenvalues; +} + +template +void model::set_eigenvalues_impl(const table& value) { + impl_->eigenvalues = value; +} + template void model::serialize(dal::detail::output_archive& ar) const { dal::detail::serialize_polymorphic_shared(impl_, ar); diff --git a/cpp/oneapi/dal/algo/pca/common.hpp b/cpp/oneapi/dal/algo/pca/common.hpp index 26502b58265..aeb235a3060 100644 --- a/cpp/oneapi/dal/algo/pca/common.hpp +++ b/cpp/oneapi/dal/algo/pca/common.hpp @@ -243,8 +243,32 @@ class model : public base { return *this; } + const table& get_means() const; + + auto& set_means(const table& value) { + set_means_impl(value); + return *this; + } + + const table& get_variances() const; + + auto& set_variances(const table& value) { + set_variances_impl(value); + return *this; + } + + const table& get_eigenvalues() const; + + auto& set_eigenvalues(const table& value) { + set_eigenvalues_impl(value); + return *this; + } + protected: void set_eigenvectors_impl(const table&); + void set_means_impl(const table&); + void set_variances_impl(const table&); + void set_eigenvalues_impl(const table&); private: void serialize(dal::detail::output_archive& ar) const; diff --git a/cpp/oneapi/dal/backend/primitives/stat/cov.hpp b/cpp/oneapi/dal/backend/primitives/stat/cov.hpp index 49c1f91a2dd..077adedab5b 100644 --- a/cpp/oneapi/dal/backend/primitives/stat/cov.hpp +++ b/cpp/oneapi/dal/backend/primitives/stat/cov.hpp @@ -37,6 +37,53 @@ sycl::event means(sycl::queue& queue, ndview& means, const event_vector& deps = {}); +/// Subtract 1-d array from 2-d array +/// +/// @tparam Float Floating-point type used to perform computations +/// +/// @param[in] queue The queue +/// @param[in] row_count The number of rows +/// @param[in] minuend A 2-d array, from which another array will be subtracted +/// @param[in] subtrahend A 1-d array, which is to be subtracted from minuend +/// @param[out] difference The difference between minuend and +template +sycl::event elementwise_difference(sycl::queue& queue, + std::int64_t row_count, + const ndview& minuend, + const ndview& subtrahend, + ndview& difference, + const event_vector& deps = {}); + +/// SQRT of 1-d array +/// +/// @tparam Float Floating-point type used to perform computations +/// +/// @param[in] queue The queue +/// @param[in] src source array +/// @param[out] dst destination array with elementwise square-root of source array +template +sycl::event elementwise_sqrt(sycl::queue& queue, + const ndview& src, + ndview& dst, + const event_vector& deps = {}); + +/// Divide 2-d array by 1-d array +/// +/// @tparam Float Floating-point type used to perform computations +/// +/// @param[in] queue The queue +/// @param[in] row_count The number of rows +/// @param[in] numerator A 2-d array +/// @param[in] denominator A 1-d array +/// @param[out] quotient The result of the division +template +sycl::event elementwise_division(sycl::queue& queue, + std::int64_t row_count, + const ndview& numerator, + const ndview& denominator, + ndview& quotient, + const event_vector& deps = {}); + /// Computes covariance matrix /// /// @tparam Float Floating-point type used to perform computations diff --git a/cpp/oneapi/dal/backend/primitives/stat/cov_dpc.cpp b/cpp/oneapi/dal/backend/primitives/stat/cov_dpc.cpp index a63f389a19b..c688ff9d056 100644 --- a/cpp/oneapi/dal/backend/primitives/stat/cov_dpc.cpp +++ b/cpp/oneapi/dal/backend/primitives/stat/cov_dpc.cpp @@ -51,6 +51,98 @@ sycl::event means(sycl::queue& q, }); } +// Subtract 1d array from 2d array elementwise +template +sycl::event elementwise_difference(sycl::queue& q, + std::int64_t row_count, + const ndview& minuend, + const ndview& subtrahend, + ndview& difference, + const event_vector& deps) { + ONEDAL_ASSERT(minuend.has_data()); + ONEDAL_ASSERT(subtrahend.has_data()); + ONEDAL_ASSERT(difference.has_mutable_data()); + ONEDAL_ASSERT(is_known_usm(q, minuend.get_data())); + ONEDAL_ASSERT(is_known_usm(q, subtrahend.get_data())); + ONEDAL_ASSERT(is_known_usm(q, difference.get_mutable_data())); + ONEDAL_ASSERT(minuend.get_dimension(0) == difference.get_dimension(0)); + ONEDAL_ASSERT(minuend.get_dimension(1) == subtrahend.get_dimension(0)); + + const auto column_count = minuend.get_dimension(1); + + const Float* minuend_ptr = minuend.get_data(); + const Float* subtrahend_ptr = subtrahend.get_data(); + Float* difference_ptr = difference.get_mutable_data(); + + return q.submit([&](sycl::handler& cgh) { + const auto range = make_range_2d(row_count, column_count); + cgh.depends_on(deps); + cgh.parallel_for(range, [=](sycl::id<2> id) { + const std::int64_t i = id[0]; + const std::int64_t j = id[1]; + difference_ptr[i * column_count + j] = + minuend_ptr[i * column_count + j] - subtrahend_ptr[j]; + }); + }); +} + +// SQRT of 1d array elementwise +template +sycl::event elementwise_sqrt(sycl::queue& q, + const ndview& src, + ndview& dst, + const event_vector& deps) { + ONEDAL_ASSERT(src.has_data()); + ONEDAL_ASSERT(dst.has_mutable_data()); + ONEDAL_ASSERT(dst.get_shape() == src.get_shape()); + Float* dst_ptr = dst.get_mutable_data(); + const Float* src_ptr = src.get_data(); + const auto num_elems = src.get_dimension(0); + return q.submit([&](sycl::handler& cgh) { + const auto range = make_range_1d(num_elems); + cgh.depends_on(deps); + cgh.parallel_for(range, [=](sycl::id<1> id) { + dst_ptr[id[0]] = sycl::sqrt(src_ptr[id[0]]); + }); + }); +} + +// Divide 2d array by 1d array elementwise +template +sycl::event elementwise_division(sycl::queue& q, + std::int64_t row_count, + const ndview& numerator, + const ndview& denominator, + ndview& quotient, + const event_vector& deps) { + ONEDAL_ASSERT(numerator.has_data()); + ONEDAL_ASSERT(denominator.has_data()); + ONEDAL_ASSERT(quotient.has_mutable_data()); + ONEDAL_ASSERT(is_known_usm(q, numerator.get_data())); + ONEDAL_ASSERT(is_known_usm(q, denominator.get_data())); + ONEDAL_ASSERT(is_known_usm(q, quotient.get_mutable_data())); + ONEDAL_ASSERT(numerator.get_dimension(0) == quotient.get_dimension(0)); + ONEDAL_ASSERT(numerator.get_dimension(1) == denominator.get_dimension(0)); + + const auto column_count = numerator.get_dimension(1); + + const Float* numerator_ptr = numerator.get_data(); + const Float* denominator_ptr = denominator.get_data(); + Float* quotient_ptr = quotient.get_mutable_data(); + + return q.submit([&](sycl::handler& cgh) { + const auto range = make_range_2d(row_count, column_count); + cgh.depends_on(deps); + cgh.parallel_for(range, [=](sycl::id<2> id) { + const std::int64_t i = id[0]; + const std::int64_t j = id[1]; + ONEDAL_ASSERT(Float(denominator_ptr[j]) != Float(0.0)); + quotient_ptr[i * column_count + j] = + numerator_ptr[i * column_count + j] / denominator_ptr[j]; + }); + }); +} + template inline sycl::event compute_covariance(sycl::queue& q, std::int64_t row_count, @@ -128,6 +220,7 @@ sycl::event variances(sycl::queue& q, }); }); } + template inline sycl::event prepare_correlation(sycl::queue& q, std::int64_t row_count, @@ -342,6 +435,37 @@ sycl::event correlation_from_covariance(sycl::queue& q, INSTANTIATE_MEANS(float) INSTANTIATE_MEANS(double) +#define INSTANTIATE_ELEMENWISE_DIFFERENCE(F) \ + template ONEDAL_EXPORT sycl::event elementwise_difference(sycl::queue&, \ + std::int64_t, \ + const ndview&, \ + const ndview&, \ + ndview&, \ + const event_vector&); + +INSTANTIATE_ELEMENWISE_DIFFERENCE(float) +INSTANTIATE_ELEMENWISE_DIFFERENCE(double) + +#define INSTANTIATE_ELEMENWISE_DIVISION(F) \ + template ONEDAL_EXPORT sycl::event elementwise_division(sycl::queue&, \ + std::int64_t, \ + const ndview&, \ + const ndview&, \ + ndview&, \ + const event_vector&); + +INSTANTIATE_ELEMENWISE_DIVISION(float) +INSTANTIATE_ELEMENWISE_DIVISION(double) + +#define INSTANTIATE_ELEMENWISE_SQRT(F) \ + template ONEDAL_EXPORT sycl::event elementwise_sqrt(sycl::queue&, \ + const ndview&, \ + ndview&, \ + const event_vector&); + +INSTANTIATE_ELEMENWISE_SQRT(float) +INSTANTIATE_ELEMENWISE_SQRT(double) + #define INSTANTIATE_COV(F) \ template ONEDAL_EXPORT sycl::event covariance(sycl::queue&, \ std::int64_t, \