[oneMKL][Stats][Spec] Simplify and clean up Summary Statistics domain (…

…#494)
uxlfoundation · Sep 5, 2023 · b1a323e · b1a323e
1 parent e494df3
commit b1a323e
Show file tree

Hide file tree

Showing 2 changed files with 19 additions and 85 deletions.
diff --git a/source/elements/oneMKL/source/domains/stats/onemkl_stats_dataset.rst b/source/elements/oneMKL/source/domains/stats/onemkl_stats_dataset.rst
@@ -30,10 +30,7 @@ structure dataset (Buffer version)
 
  explicit dataset(std::int64_t n_dims_, std::int64_t n_observations_,
  sycl::buffer<Type, 1> observations_, sycl::buffer<Type, 1> weights_ = {0},
- sycl::buffer<std::int64_t, 1> indices_ = {0}) :
- n_dims(n_dims_), n_observations(n_observations_),
- observations(observations_),
- weights(weights_), indices(indices_) {};
+ sycl::buffer<std::int64_t, 1> indices_ = {0});
 
  std::int64_t n_dims;
  std::int64_t n_observations;
@@ -91,7 +88,7 @@ structure dataset (Buffer version)
  explicit dataset::dataset(std::int64_t n_dims_, std::int64_t n_observations_,
  sycl::buffer<Type, 1> observations_,
  sycl::buffer<Type, 1> weights_ = {0},
- sycl::buffer<std::int64_t, 1> indices_ = {0})
+ sycl::buffer<std::int64_t, 1> indices_ = {0});
 
  .. container:: section
 
@@ -102,8 +99,8 @@ structure dataset (Buffer version)
  * `n_dims_` is the number of dimensions
  * `n_observations_` is the number of observations
  * `observations_` is the matrix of observations
- * `weights_` is an optional parameter, represents array of weights for observations (of size `n_observations`). If the parameter is not specified, each observation is assigned a weight equal 1.
- * `indices_` is an optional parameter, represents array of dimensions that are processed (of size `n_dims`). If the parameter is not specified, all dimensions are processed.
+ * `weights_` is an optional parameter, represents an array of weights for observations (of size `n_observations`). If the parameter is not specified, each observation is assigned a weight equal 1.
+ * `indices_` is an optional parameter, represents an array of dimensions that are processed (of size `n_dims`). If the parameter is not specified, all dimensions are processed.
 
  .. container:: section
 
@@ -125,10 +122,7 @@ structure dataset (USM version)
  template<layout ObservationsLayout, typename Type>
  struct dataset<Type*, ObservationsLayout> {
  explicit dataset(std::int64_t n_dims_, std::int64_t n_observations_, Type* observations_,
- Type* weights_ = nullptr, std::int64_t* indices_ = nullptr) :
- n_dims(n_dims_), n_observations(n_observations_),
- observations(observations_),
- weights(weights_), indices(indices_) {};
+ Type* weights_ = nullptr, std::int64_t* indices_ = nullptr);
 
  std::int64_t n_dims;
  std::int64_t n_observations;
@@ -186,7 +180,7 @@ structure dataset (USM version)
  explicit dataset::dataset(std::int64_t n_dims_, std::int64_t n_observations_,
  Type* observations_,
  Type* weights_ = nullptr,
- std::int64_t* indices_ = nullptr)
+ std::int64_t* indices_ = nullptr);
 
  .. container:: section
 
@@ -197,8 +191,8 @@ structure dataset (USM version)
  * `n_dims_` is the number of dimensions
  * `n_observations_` is the number of observations
  * `observations_` is the matrix of observations
- * `weights_` is an optional parameter, represents array of weights for observations (of size `n_observations`). If the parameter is not specified, each observation is assigned a weight equal 1.
- * `indices_` is an optional parameter, represents array of dimensions that are processed (of size `n_dims`). If the parameter is not specified, all dimensions are processed.
+ * `weights_` is an optional parameter, represents an array of weights for observations (of size `n_observations`). If the parameter is not specified, each observation is assigned a weight equal 1.
+ * `indices_` is an optional parameter, represents an array of dimensions that are processed (of size `n_dims`). If the parameter is not specified, all dimensions are processed.
 
  .. container:: section
 

diff --git a/source/elements/oneMKL/source/domains/stats/onemkl_stats_usage_model.rst b/source/elements/oneMKL/source/domains/stats/onemkl_stats_usage_model.rst
@@ -20,92 +20,32 @@ A typical algorithm for summary statistics is as follows:
 The following example demonstrates how to calculate mean values for a 3-dimensional dataset filled with random numbers. For dataset creation, the :ref:`onemkl_stats_make_dataset` helper function is used.
 
 
-Buffer-based example
---------------------
-
-.. code-block:: cpp
-
- #include <iostream>
- #include <vector>
-
- #include "CL/sycl.hpp"
- #include "oneapi/mkl/stats.hpp"
-
- int main() {
- sycl::queue queue;
-
- const size_t n_observations = 1000;
- const size_t n_dims = 3;
- std::vector<float> x(n_observations * n_dims);
- // fill x storage with random numbers
- for(int i = 0; i < n_dims, i++) {
- for(int j = 0; j < n_observations; j++) {
- x[j + i * n_observations] = float(std::rand()) / float(RAND_MAX);
- }
- }
- //create buffer for dataset
- sycl::buffer<float, 1> x_buf(x.data(), x.size());
- // create buffer for mean values
- sycl::buffer<float, 1> mean_buf(n_dims);
- // create oneapi::mkl::stats::dataset
- auto dataset = oneapi::mkl::stats::make_dataset<oneapi::mkl::stats::layout::row_major>(n_dims, n_observations, x_buf);
-
-
- oneapi::mkl::stats::mean(queue, dataset, mean_buf);
-
-
- // create host accessor for mean_buf to print results
- auto acc = mean_buf.template get_access<sycl::access::mode::read>();
-
-
- for(int i = 0; i < n_dims; i++) {
- std::cout << "Mean value for dimension " << i << ": " << acc[i] << std::endl;
- }
- return 0;
- }
-
-
 USM-based example
 -----------------
 
 .. code-block:: cpp
 
- #include <iostream>
- #include <vector>
-
- #include "CL/sycl.hpp"
  #include "oneapi/mkl/stats.hpp"
 
  int main() {
  sycl::queue queue;
 
- const size_t n_observations = 1000;
- const size_t n_dims = 3;
+ constexpr std::size_t n_observations = 1000;
+ constexpr std::size_t n_dims = 3;
 
- sycl::usm_allocator<float, sycl::usm::alloc::shared> allocator(queue);
+ // allocate Unified Shared Memory for the dataset of the size n_observations * n_dims and fill it with any data
+ // allocate Unified Shared Memory for the mean output of the size n_dims
 
- std::vector<float, decltype(allocator)> x(n_observations * n_dims, allocator);
- // fill x storage with random numbers
- for(int i = 0; i < n_dims, i++) {
- for(int j = 0; j < n_observations; j++) {
- x[j + i * n_observations] = float(std::rand()) / float(RAND_MAX);
- }
- }
- std::vector<float, decltype(allocator)> mean_buf(n_dims, allocator);
  // create oneapi::mkl::stats::dataset
- auto dataset = oneapi::mkl::stats::make_dataset<oneapi::mkl::stats::layout::row_major>(n_dims, n_observations, x);
-
- sycl::event event = oneapi::mkl::stats::mean(queue, dataset, mean);
- event.wait();
- for(int i = 0; i < n_dims; i++) {
- std::cout << "Mean value for dimension " << i << ": " << mean[i] << std::endl;
- }
- return 0;
- }
+ auto dataset = oneapi::mkl::stats::make_dataset<oneapi::mkl::stats::layout::row_major>(n_dims, n_observations, dataset_ptr);
 
+ // call statistics computation routine
+ auto event = oneapi::mkl::stats::mean(queue, dataset, mean_ptr);
 
-.. rubric:: USM usage
+ // wait until computations are completed
+ event.wait();
 
-You can also use USM with raw pointers by using the sycl::malloc_shared/malloc_device functions.
+ // ...
+ }
 
 **Parent topic:** :ref:`onemkl_stats`