From e728c8c60d00a856973366037155161516f7cfc7 Mon Sep 17 00:00:00 2001 From: Victoriya Fedotova Date: Thu, 19 Sep 2024 03:03:27 -0700 Subject: [PATCH] Minor fixes --- cpp/daal/src/threading/threading.h | 30 +++++++++++--------------- docs/source/contribution/threading.rst | 2 +- 2 files changed, 14 insertions(+), 18 deletions(-) diff --git a/cpp/daal/src/threading/threading.h b/cpp/daal/src/threading/threading.h index 78edceb627d..b83564f25c1 100644 --- a/cpp/daal/src/threading/threading.h +++ b/cpp/daal/src/threading/threading.h @@ -224,10 +224,9 @@ inline void threader_func_break(int i, bool & needBreak, const void * a) } /// Pass a function to be executed in a for loop to the threading layer. -/// The maximal number of iterations in the loop is 2^31 - 1. +/// The maximal number of iterations in the loop is `2^31 - 1 (INT32_MAX)`. /// The default scheduling of the threading layer is used to assign /// the iterations of the loop to threads. -/// The iterations of the loop should be logically independent. /// Data dependencies between the iterations are allowed, but may requre the use /// of synchronization primitives. /// @@ -246,7 +245,7 @@ inline void threader_for(int n, int reserved, const F & lambda) } /// Pass a function to be executed in a for loop to the threading layer. -/// The maximal number of iterations in the loop is 2^63 - 1. +/// The maximal number of iterations in the loop is `2^63 - 1 (INT64_MAX)`. /// The default scheduling of the threading layer is used to assign /// the iterations of the loop to threads. /// The iterations of the loop should be logically independent. @@ -269,15 +268,14 @@ inline void threader_for_int64(int64_t n, const F & lambda) /// Pass a function to be executed in a for loop to the threading layer. /// The maximal number of iterations in the loop is 2^31 - 1. /// -/// The specifics of this loop comparing to `threader_for` is that the iteration spase -/// of the loop is always chunked to the chunks of size 1. -/// This means the threading layer tries to assign the consecutive iterations to -/// a different threads if possible. +/// The specifics of this loop comparing to `threader_for` is that the iteration space +/// of the loop is always chunked with chunk size 1. +/// This means the threading layer tries to assign consecutive iterations to +/// different threads, if possible. /// In case of oneTBB threading backend this means that `simple_partitioner` /// (https://oneapi-src.github.io/oneTBB/main/tbb_userguide/Partitioner_Summary.html) -/// with chunk size 1 is used to produce iterations to threads mapping. +/// with chunk size 1 is used to produce iteration to threads mappings. /// -/// The iterations of the loop should be logically independent. /// Data dependencies between the iterations are allowed, but may requre the use /// of synchronization primitives. /// @@ -313,17 +311,15 @@ inline void threader_for_int32ptr(const int * begin, const int * end, const F & /// It is recommended to use this parallel loop if each iteration of the loop /// performs equal amount of work. /// -/// Let `t` be the number of threads available to oneDAL. -/// -/// Then the number of iterations processed by each threads (except maybe the last one) -/// is computed as: +/// Let `t` be the number of threads available to oneDAL. The number of iterations +/// processed by each threads (except maybe the last one) is computed as: /// `nI = (n + t - 1) / t` /// /// Here is how the work is split across the threads: /// The 1st thread executes iterations `0, ..., nI - 1`; /// the 2nd thread executes iterations `nI, ..., 2 * nI - 1`; /// ... -/// the t-th thread executes iterations `(t - 1) * nI, ..., n - 1`. +/// the `t`-th thread executes iterations `(t - 1) * nI, ..., n - 1`. /// /// @tparam F Lambda function of type `[/* captures */](size_t i, size_t tid) -> void`, /// where @@ -341,13 +337,13 @@ inline void static_threader_for(size_t n, const F & lambda) } /// Pass a function to be executed in a for loop to the threading layer. -/// The maximal number of iterations in the loop is 2^31 - 1. +/// The maximal number of iterations in the loop is `2^31 - 1 INT32_MAX`. /// The default scheduling of the threading layer is used to assign /// the iterations of the loop to threads. /// /// @tparam F Lambda function of type `[/* captures */](int beginRange, int endRange) -> void` /// where -/// `beginRange` is the starting index of the loop's iterations block to be +/// `beginRange` is the starting index of the loop iterations block to be /// processed by a thread, `0 <= beginRange < n`; /// `endRange` is the index after the end of the loop's iterations block to be /// processed by a thread, `beginRange < endRange <= n`; @@ -417,7 +413,7 @@ class tls_deleter_ : public tls_deleter /// Thread-local storage (TLS). /// Can change its local variable after a nested parallel constructs. /// @note Thread-local storage in nested parallel regions is, in general, not thread local. -/// The use of nested parallelism should be avioded if possible, otherwise extra care +/// The use of nested parallelism should be avoided if possible, otherwise extra care /// must be taken with thread-local values. /// /// @tparam F Type of the data located in the storage diff --git a/docs/source/contribution/threading.rst b/docs/source/contribution/threading.rst index 91ce3e2a2ad..cd1acd84e95 100644 --- a/docs/source/contribution/threading.rst +++ b/docs/source/contribution/threading.rst @@ -69,7 +69,7 @@ This code shows how a typical parallel loop in oneDAL looks like: Thread-local Storage (TLS) ************************** -Lets consider you need to compute a dot product of two arrays. +Consider you need to compute a dot product of two arrays. Here is a variant of sequential implementation: .. include:: ../includes/threading/dot-sequential.rst