Skip to content

Commit

Permalink
[oneMKL][Stats][Spec] Simplify and clean up Summary Statistics domain (
Browse files Browse the repository at this point in the history
  • Loading branch information
aelizaro committed Sep 5, 2023
1 parent e494df3 commit b1a323e
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 85 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,7 @@ structure dataset (Buffer version)
explicit dataset(std::int64_t n_dims_, std::int64_t n_observations_,
sycl::buffer<Type, 1> observations_, sycl::buffer<Type, 1> weights_ = {0},
sycl::buffer<std::int64_t, 1> indices_ = {0}) :
n_dims(n_dims_), n_observations(n_observations_),
observations(observations_),
weights(weights_), indices(indices_) {};
sycl::buffer<std::int64_t, 1> indices_ = {0});
std::int64_t n_dims;
std::int64_t n_observations;
Expand Down Expand Up @@ -91,7 +88,7 @@ structure dataset (Buffer version)
explicit dataset::dataset(std::int64_t n_dims_, std::int64_t n_observations_,
sycl::buffer<Type, 1> observations_,
sycl::buffer<Type, 1> weights_ = {0},
sycl::buffer<std::int64_t, 1> indices_ = {0})
sycl::buffer<std::int64_t, 1> indices_ = {0});
.. container:: section

Expand All @@ -102,8 +99,8 @@ structure dataset (Buffer version)
* `n_dims_` is the number of dimensions
* `n_observations_` is the number of observations
* `observations_` is the matrix of observations
* `weights_` is an optional parameter, represents array of weights for observations (of size `n_observations`). If the parameter is not specified, each observation is assigned a weight equal 1.
* `indices_` is an optional parameter, represents array of dimensions that are processed (of size `n_dims`). If the parameter is not specified, all dimensions are processed.
* `weights_` is an optional parameter, represents an array of weights for observations (of size `n_observations`). If the parameter is not specified, each observation is assigned a weight equal 1.
* `indices_` is an optional parameter, represents an array of dimensions that are processed (of size `n_dims`). If the parameter is not specified, all dimensions are processed.

.. container:: section

Expand All @@ -125,10 +122,7 @@ structure dataset (USM version)
template<layout ObservationsLayout, typename Type>
struct dataset<Type*, ObservationsLayout> {
explicit dataset(std::int64_t n_dims_, std::int64_t n_observations_, Type* observations_,
Type* weights_ = nullptr, std::int64_t* indices_ = nullptr) :
n_dims(n_dims_), n_observations(n_observations_),
observations(observations_),
weights(weights_), indices(indices_) {};
Type* weights_ = nullptr, std::int64_t* indices_ = nullptr);
std::int64_t n_dims;
std::int64_t n_observations;
Expand Down Expand Up @@ -186,7 +180,7 @@ structure dataset (USM version)
explicit dataset::dataset(std::int64_t n_dims_, std::int64_t n_observations_,
Type* observations_,
Type* weights_ = nullptr,
std::int64_t* indices_ = nullptr)
std::int64_t* indices_ = nullptr);
.. container:: section

Expand All @@ -197,8 +191,8 @@ structure dataset (USM version)
* `n_dims_` is the number of dimensions
* `n_observations_` is the number of observations
* `observations_` is the matrix of observations
* `weights_` is an optional parameter, represents array of weights for observations (of size `n_observations`). If the parameter is not specified, each observation is assigned a weight equal 1.
* `indices_` is an optional parameter, represents array of dimensions that are processed (of size `n_dims`). If the parameter is not specified, all dimensions are processed.
* `weights_` is an optional parameter, represents an array of weights for observations (of size `n_observations`). If the parameter is not specified, each observation is assigned a weight equal 1.
* `indices_` is an optional parameter, represents an array of dimensions that are processed (of size `n_dims`). If the parameter is not specified, all dimensions are processed.

.. container:: section

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,92 +20,32 @@ A typical algorithm for summary statistics is as follows:
The following example demonstrates how to calculate mean values for a 3-dimensional dataset filled with random numbers. For dataset creation, the :ref:`onemkl_stats_make_dataset` helper function is used.


Buffer-based example
--------------------

.. code-block:: cpp
#include <iostream>
#include <vector>
#include "CL/sycl.hpp"
#include "oneapi/mkl/stats.hpp"
int main() {
sycl::queue queue;
const size_t n_observations = 1000;
const size_t n_dims = 3;
std::vector<float> x(n_observations * n_dims);
// fill x storage with random numbers
for(int i = 0; i < n_dims, i++) {
for(int j = 0; j < n_observations; j++) {
x[j + i * n_observations] = float(std::rand()) / float(RAND_MAX);
}
}
//create buffer for dataset
sycl::buffer<float, 1> x_buf(x.data(), x.size());
// create buffer for mean values
sycl::buffer<float, 1> mean_buf(n_dims);
// create oneapi::mkl::stats::dataset
auto dataset = oneapi::mkl::stats::make_dataset<oneapi::mkl::stats::layout::row_major>(n_dims, n_observations, x_buf);
oneapi::mkl::stats::mean(queue, dataset, mean_buf);
// create host accessor for mean_buf to print results
auto acc = mean_buf.template get_access<sycl::access::mode::read>();
for(int i = 0; i < n_dims; i++) {
std::cout << "Mean value for dimension " << i << ": " << acc[i] << std::endl;
}
return 0;
}
USM-based example
-----------------

.. code-block:: cpp
#include <iostream>
#include <vector>
#include "CL/sycl.hpp"
#include "oneapi/mkl/stats.hpp"
int main() {
sycl::queue queue;
const size_t n_observations = 1000;
const size_t n_dims = 3;
constexpr std::size_t n_observations = 1000;
constexpr std::size_t n_dims = 3;
sycl::usm_allocator<float, sycl::usm::alloc::shared> allocator(queue);
// allocate Unified Shared Memory for the dataset of the size n_observations * n_dims and fill it with any data
// allocate Unified Shared Memory for the mean output of the size n_dims
std::vector<float, decltype(allocator)> x(n_observations * n_dims, allocator);
// fill x storage with random numbers
for(int i = 0; i < n_dims, i++) {
for(int j = 0; j < n_observations; j++) {
x[j + i * n_observations] = float(std::rand()) / float(RAND_MAX);
}
}
std::vector<float, decltype(allocator)> mean_buf(n_dims, allocator);
// create oneapi::mkl::stats::dataset
auto dataset = oneapi::mkl::stats::make_dataset<oneapi::mkl::stats::layout::row_major>(n_dims, n_observations, x);
sycl::event event = oneapi::mkl::stats::mean(queue, dataset, mean);
event.wait();
for(int i = 0; i < n_dims; i++) {
std::cout << "Mean value for dimension " << i << ": " << mean[i] << std::endl;
}
return 0;
}
auto dataset = oneapi::mkl::stats::make_dataset<oneapi::mkl::stats::layout::row_major>(n_dims, n_observations, dataset_ptr);
// call statistics computation routine
auto event = oneapi::mkl::stats::mean(queue, dataset, mean_ptr);
.. rubric:: USM usage
// wait until computations are completed
event.wait();
You can also use USM with raw pointers by using the sycl::malloc_shared/malloc_device functions.
// ...
}
**Parent topic:** :ref:`onemkl_stats`

0 comments on commit b1a323e

Please sign in to comment.