From 4750d682dce3f8d6f30fde8018e5d441cd48b285 Mon Sep 17 00:00:00 2001 From: Alina Elizarova Date: Tue, 5 Sep 2023 21:35:58 +0100 Subject: [PATCH] [oneMKL][Stats][Spec] Simplify and clean up Summary Statistics domain (#494) --- .../domains/stats/onemkl_stats_dataset.rst | 22 ++--- .../stats/onemkl_stats_usage_model.rst | 82 +++---------------- 2 files changed, 19 insertions(+), 85 deletions(-) diff --git a/source/elements/oneMKL/source/domains/stats/onemkl_stats_dataset.rst b/source/elements/oneMKL/source/domains/stats/onemkl_stats_dataset.rst index e595031c83..56b2353935 100755 --- a/source/elements/oneMKL/source/domains/stats/onemkl_stats_dataset.rst +++ b/source/elements/oneMKL/source/domains/stats/onemkl_stats_dataset.rst @@ -30,10 +30,7 @@ structure dataset (Buffer version) explicit dataset(std::int64_t n_dims_, std::int64_t n_observations_, sycl::buffer observations_, sycl::buffer weights_ = {0}, - sycl::buffer indices_ = {0}) : - n_dims(n_dims_), n_observations(n_observations_), - observations(observations_), - weights(weights_), indices(indices_) {}; + sycl::buffer indices_ = {0}); std::int64_t n_dims; std::int64_t n_observations; @@ -91,7 +88,7 @@ structure dataset (Buffer version) explicit dataset::dataset(std::int64_t n_dims_, std::int64_t n_observations_, sycl::buffer observations_, sycl::buffer weights_ = {0}, - sycl::buffer indices_ = {0}) + sycl::buffer indices_ = {0}); .. container:: section @@ -102,8 +99,8 @@ structure dataset (Buffer version) * `n_dims_` is the number of dimensions * `n_observations_` is the number of observations * `observations_` is the matrix of observations - * `weights_` is an optional parameter, represents array of weights for observations (of size `n_observations`). If the parameter is not specified, each observation is assigned a weight equal 1. - * `indices_` is an optional parameter, represents array of dimensions that are processed (of size `n_dims`). If the parameter is not specified, all dimensions are processed. + * `weights_` is an optional parameter, represents an array of weights for observations (of size `n_observations`). If the parameter is not specified, each observation is assigned a weight equal 1. + * `indices_` is an optional parameter, represents an array of dimensions that are processed (of size `n_dims`). If the parameter is not specified, all dimensions are processed. .. container:: section @@ -125,10 +122,7 @@ structure dataset (USM version) template struct dataset { explicit dataset(std::int64_t n_dims_, std::int64_t n_observations_, Type* observations_, - Type* weights_ = nullptr, std::int64_t* indices_ = nullptr) : - n_dims(n_dims_), n_observations(n_observations_), - observations(observations_), - weights(weights_), indices(indices_) {}; + Type* weights_ = nullptr, std::int64_t* indices_ = nullptr); std::int64_t n_dims; std::int64_t n_observations; @@ -186,7 +180,7 @@ structure dataset (USM version) explicit dataset::dataset(std::int64_t n_dims_, std::int64_t n_observations_, Type* observations_, Type* weights_ = nullptr, - std::int64_t* indices_ = nullptr) + std::int64_t* indices_ = nullptr); .. container:: section @@ -197,8 +191,8 @@ structure dataset (USM version) * `n_dims_` is the number of dimensions * `n_observations_` is the number of observations * `observations_` is the matrix of observations - * `weights_` is an optional parameter, represents array of weights for observations (of size `n_observations`). If the parameter is not specified, each observation is assigned a weight equal 1. - * `indices_` is an optional parameter, represents array of dimensions that are processed (of size `n_dims`). If the parameter is not specified, all dimensions are processed. + * `weights_` is an optional parameter, represents an array of weights for observations (of size `n_observations`). If the parameter is not specified, each observation is assigned a weight equal 1. + * `indices_` is an optional parameter, represents an array of dimensions that are processed (of size `n_dims`). If the parameter is not specified, all dimensions are processed. .. container:: section diff --git a/source/elements/oneMKL/source/domains/stats/onemkl_stats_usage_model.rst b/source/elements/oneMKL/source/domains/stats/onemkl_stats_usage_model.rst index eee2dc16de..bb6d634ab3 100755 --- a/source/elements/oneMKL/source/domains/stats/onemkl_stats_usage_model.rst +++ b/source/elements/oneMKL/source/domains/stats/onemkl_stats_usage_model.rst @@ -20,92 +20,32 @@ A typical algorithm for summary statistics is as follows: The following example demonstrates how to calculate mean values for a 3-dimensional dataset filled with random numbers. For dataset creation, the :ref:`onemkl_stats_make_dataset` helper function is used. -Buffer-based example --------------------- - -.. code-block:: cpp - - #include - #include - - #include "CL/sycl.hpp" - #include "oneapi/mkl/stats.hpp" - - int main() { - sycl::queue queue; - - const size_t n_observations = 1000; - const size_t n_dims = 3; - std::vector x(n_observations * n_dims); - // fill x storage with random numbers - for(int i = 0; i < n_dims, i++) { - for(int j = 0; j < n_observations; j++) { - x[j + i * n_observations] = float(std::rand()) / float(RAND_MAX); - } - } - //create buffer for dataset - sycl::buffer x_buf(x.data(), x.size()); - // create buffer for mean values - sycl::buffer mean_buf(n_dims); - // create oneapi::mkl::stats::dataset - auto dataset = oneapi::mkl::stats::make_dataset(n_dims, n_observations, x_buf); - - - oneapi::mkl::stats::mean(queue, dataset, mean_buf); - - - // create host accessor for mean_buf to print results - auto acc = mean_buf.template get_access(); - - - for(int i = 0; i < n_dims; i++) { - std::cout << "Mean value for dimension " << i << ": " << acc[i] << std::endl; - } - return 0; - } - - USM-based example ----------------- .. code-block:: cpp - #include - #include - - #include "CL/sycl.hpp" #include "oneapi/mkl/stats.hpp" int main() { sycl::queue queue; - const size_t n_observations = 1000; - const size_t n_dims = 3; + constexpr std::size_t n_observations = 1000; + constexpr std::size_t n_dims = 3; - sycl::usm_allocator allocator(queue); + // allocate Unified Shared Memory for the dataset of the size n_observations * n_dims and fill it with any data + // allocate Unified Shared Memory for the mean output of the size n_dims - std::vector x(n_observations * n_dims, allocator); - // fill x storage with random numbers - for(int i = 0; i < n_dims, i++) { - for(int j = 0; j < n_observations; j++) { - x[j + i * n_observations] = float(std::rand()) / float(RAND_MAX); - } - } - std::vector mean_buf(n_dims, allocator); // create oneapi::mkl::stats::dataset - auto dataset = oneapi::mkl::stats::make_dataset(n_dims, n_observations, x); - - sycl::event event = oneapi::mkl::stats::mean(queue, dataset, mean); - event.wait(); - for(int i = 0; i < n_dims; i++) { - std::cout << "Mean value for dimension " << i << ": " << mean[i] << std::endl; - } - return 0; - } + auto dataset = oneapi::mkl::stats::make_dataset(n_dims, n_observations, dataset_ptr); + // call statistics computation routine + auto event = oneapi::mkl::stats::mean(queue, dataset, mean_ptr); -.. rubric:: USM usage + // wait until computations are completed + event.wait(); -You can also use USM with raw pointers by using the sycl::malloc_shared/malloc_device functions. + // ... + } **Parent topic:** :ref:`onemkl_stats`