From ac0c63ef10dc02c246c93b8da5e4adccf2b5d2b4 Mon Sep 17 00:00:00 2001 From: Hugh Bird Date: Thu, 13 Jun 2024 17:03:51 +0100 Subject: [PATCH 1/3] [DFT][MKLGPU] Use MKLGPU's FWD/BWD_STRIDES API to avoid deprecation message * 2024.1's MKL prints a deprecation message when the deprecated INPUT/OUTPUT API is used. * This checks for 2024.1 and uses the new FWD/BWD stride API instead. * Also minor bugfix for CuFFT workspace size. --- src/dft/backends/cufft/commit.cpp | 2 +- src/dft/backends/mklgpu/backward.cpp | 5 +- src/dft/backends/mklgpu/commit.cpp | 78 +++++++++++++++++++++++----- src/dft/backends/mklgpu/forward.cpp | 5 +- 4 files changed, 75 insertions(+), 15 deletions(-) diff --git a/src/dft/backends/cufft/commit.cpp b/src/dft/backends/cufft/commit.cpp index 19507d722..135587e32 100644 --- a/src/dft/backends/cufft/commit.cpp +++ b/src/dft/backends/cufft/commit.cpp @@ -394,7 +394,7 @@ class cufft_commit final : public dft::detail::commit_impl { std::int64_t get_plan_workspace_size_bytes(cufftHandle handle) { std::size_t size = 0; - cufftGetSize(*plans[0], &size); + cufftGetSize(handle, &size); std::int64_t padded_size = static_cast(size); return padded_size; } diff --git a/src/dft/backends/mklgpu/backward.cpp b/src/dft/backends/mklgpu/backward.cpp index 0be8e701a..6c4896c66 100644 --- a/src/dft/backends/mklgpu/backward.cpp +++ b/src/dft/backends/mklgpu/backward.cpp @@ -41,12 +41,15 @@ namespace detail { template inline auto compute_backward(dft::detail::descriptor &desc, ArgTs &&... args) { using mklgpu_desc_t = dft::descriptor; + using desc_shptr_t = std::shared_ptr; + using handle_t = std::pair; auto commit_handle = dft::detail::get_commit(desc); if (commit_handle == nullptr || commit_handle->get_backend() != backend::mklgpu) { throw mkl::invalid_argument("DFT", "compute_backward", "DFT descriptor has not been commited for MKLGPU"); } - auto mklgpu_desc = reinterpret_cast(commit_handle->get_handle()); + auto handle = reinterpret_cast(commit_handle->get_handle()); + auto mklgpu_desc = handle->second; // Second because backward DFT. int commit_status{ DFTI_UNCOMMITTED }; mklgpu_desc->get_value(dft::config_param::COMMIT_STATUS, &commit_status); if (commit_status != DFTI_COMMITTED) { diff --git a/src/dft/backends/mklgpu/commit.cpp b/src/dft/backends/mklgpu/commit.cpp index 73d017d86..7decf4bfb 100644 --- a/src/dft/backends/mklgpu/commit.cpp +++ b/src/dft/backends/mklgpu/commit.cpp @@ -37,6 +37,12 @@ // MKLGPU header #include "oneapi/mkl/dfti.hpp" +// MKL 2024.1 deprecates input/output strides. +#include "mkl_version.h" +namespace oneapi::mkl::dft::mklgpu::detail { +constexpr bool mklgpu_use_forward_backward_strides_api = INTEL_MKL_VERSION >= 20240001; +} + /** Note that in this file, the Intel oneMKL closed-source library's interface mirrors the interface of this OneMKL open-source library. Consequently, the types under dft::TYPE are closed-source oneMKL types, @@ -53,14 +59,22 @@ class mklgpu_commit final : public dft::detail::commit_impl { // Equivalent MKLGPU precision and domain from OneMKL's precision / domain. static constexpr dft::precision mklgpu_prec = to_mklgpu(prec); static constexpr dft::domain mklgpu_dom = to_mklgpu(dom); + + // A pair of descriptors are needed because of the [[deprecated]]IN/OUTPUT_STRIDES vs F/BWD_STRIDES API. + // Of the pair [0] is fwd DFT, [1] is backward DFT. If possible, the pointers refer to the same desciptor. + // Both pointers must be valid. using mklgpu_descriptor_t = dft::descriptor; + using descriptor_shptr_t = std::shared_ptr; + using handle_t = std::pair; + using scalar_type = typename dft::detail::commit_impl::scalar_type; public: mklgpu_commit(sycl::queue queue, const dft::detail::dft_values& config_values) : oneapi::mkl::dft::detail::commit_impl(queue, backend::mklgpu, config_values), - handle(config_values.dimensions) { + handle(std::make_shared(config_values.dimensions), nullptr) { + handle.second = handle.first; // Make sure the bwd pointer is valid. // MKLGPU does not throw an informative exception for the following: if constexpr (prec == dft::detail::precision::DOUBLE) { if (!queue.get_device().has(sycl::aspect::fp64)) { @@ -75,14 +89,32 @@ class mklgpu_commit final : public dft::detail::commit_impl { oneapi::mkl::dft::detail::external_workspace_helper( config_values.workspace_placement == oneapi::mkl::dft::detail::config_value::WORKSPACE_EXTERNAL); - set_value(handle, config_values); + // Generate forward DFT descriptor. + set_value(*handle.first, config_values, true); try { - handle.commit(this->get_queue()); + handle.first->commit(this->get_queue()); } catch (const std::exception& mkl_exception) { // Catching the real Intel oneMKL exception causes headaches with naming. throw mkl::exception("dft/backends/mklgpu", "commit", mkl_exception.what()); } + + // Generate backward DFT descriptor only if required. + if (config_values.input_strides == config_values.output_strides) { + // Required if second != first before a recommit. + handle.second = handle.first; + } + else { + handle.second = std::make_shared(config_values.dimensions); + set_value(*handle.second, config_values, false); + try { + handle.second->commit(this->get_queue()); + } + catch (const std::exception& mkl_exception) { + // Catching the real Intel oneMKL exception causes headaches with naming. + throw mkl::exception("dft/backends/mklgpu", "commit", mkl_exception.what()); + } + } } void* get_handle() noexcept override { @@ -93,12 +125,18 @@ class mklgpu_commit final : public dft::detail::commit_impl { virtual void set_workspace(scalar_type* usm_workspace) override { this->external_workspace_helper_.set_workspace_throw(*this, usm_workspace); - handle.set_workspace(usm_workspace); + handle.first->set_workspace(usm_workspace); + if (handle.first != handle.second) { + handle.second->set_workspace(usm_workspace); + } } virtual void set_workspace(sycl::buffer& buffer_workspace) override { this->external_workspace_helper_.set_workspace_throw(*this, buffer_workspace); - handle.set_workspace(buffer_workspace); + handle.first->set_workspace(buffer_workspace); + if (handle.first != handle.second) { + handle.second->set_workspace(buffer_workspace); + } } #define BACKEND mklgpu @@ -107,9 +145,10 @@ class mklgpu_commit final : public dft::detail::commit_impl { private: // The native MKLGPU class. - mklgpu_descriptor_t handle; + handle_t handle; - void set_value(mklgpu_descriptor_t& desc, const dft::detail::dft_values& config) { + void set_value(mklgpu_descriptor_t& desc, const dft::detail::dft_values& config, + bool assume_fwd_dft) { using onemkl_param = dft::detail::config_param; using backend_param = dft::config_param; @@ -134,8 +173,22 @@ class mklgpu_commit final : public dft::detail::commit_impl { throw mkl::unimplemented("dft/backends/mklgpu", "commit", "MKLGPU does not support nonzero offsets."); } - desc.set_value(backend_param::INPUT_STRIDES, config.input_strides.data()); - desc.set_value(backend_param::OUTPUT_STRIDES, config.output_strides.data()); + if constexpr (mklgpu_use_forward_backward_strides_api) { + // Support for Intel oneMKL 2024.1 or newer using FWD/BWD stride API. + if (assume_fwd_dft) { + desc.set_value(backend_param::FWD_STRIDES, config.input_strides.data()); + desc.set_value(backend_param::BWD_STRIDES, config.output_strides.data()); + } + else { + desc.set_value(backend_param::FWD_STRIDES, config.output_strides.data()); + desc.set_value(backend_param::BWD_STRIDES, config.input_strides.data()); + } + } + else { + // Support for Intel oneMKL older than 2024.1 + desc.set_value(backend_param::INPUT_STRIDES, config.input_strides.data()); + desc.set_value(backend_param::OUTPUT_STRIDES, config.output_strides.data()); + } desc.set_value(backend_param::FWD_DISTANCE, config.fwd_dist); desc.set_value(backend_param::BWD_DISTANCE, config.bwd_dist); if (config.workspace_placement == dft::detail::config_value::WORKSPACE_EXTERNAL) { @@ -158,9 +211,10 @@ class mklgpu_commit final : public dft::detail::commit_impl { // This is called by the workspace_helper, and is not part of the user API. virtual std::int64_t get_workspace_external_bytes_impl() override { - std::size_t workspaceSize = 0; - handle.get_value(dft::config_param::WORKSPACE_BYTES, &workspaceSize); - return static_cast(workspaceSize); + std::size_t workspaceSizeFwd = 0, workspaceSizeBwd; + handle.first->get_value(dft::config_param::WORKSPACE_BYTES, &workspaceSizeFwd); + handle.second->get_value(dft::config_param::WORKSPACE_BYTES, &workspaceSizeBwd); + return static_cast(std::max(workspaceSizeFwd, workspaceSizeFwd)); } }; } // namespace detail diff --git a/src/dft/backends/mklgpu/forward.cpp b/src/dft/backends/mklgpu/forward.cpp index 2e3456848..39da42e45 100644 --- a/src/dft/backends/mklgpu/forward.cpp +++ b/src/dft/backends/mklgpu/forward.cpp @@ -48,12 +48,15 @@ namespace detail { template inline auto compute_forward(dft::detail::descriptor &desc, ArgTs &&... args) { using mklgpu_desc_t = dft::descriptor; + using desc_shptr_t = std::shared_ptr; + using handle_t = std::pair; auto commit_handle = dft::detail::get_commit(desc); if (commit_handle == nullptr || commit_handle->get_backend() != backend::mklgpu) { throw mkl::invalid_argument("DFT", "compute_forward", "DFT descriptor has not been commited for MKLGPU"); } - auto mklgpu_desc = reinterpret_cast(commit_handle->get_handle()); + auto handle = reinterpret_cast(commit_handle->get_handle()); + auto mklgpu_desc = handle->first; // First because forward DFT. int commit_status{ DFTI_UNCOMMITTED }; mklgpu_desc->get_value(dft::config_param::COMMIT_STATUS, &commit_status); if (commit_status != DFTI_COMMITTED) { From 9ae3a61876b05e631ce796ce7b453e73935d4663 Mon Sep 17 00:00:00 2001 From: Hugh Bird Date: Mon, 17 Jun 2024 15:27:35 +0100 Subject: [PATCH 2/3] Initialize workspaceSizeBwd --- src/dft/backends/mklgpu/commit.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/dft/backends/mklgpu/commit.cpp b/src/dft/backends/mklgpu/commit.cpp index 7decf4bfb..942a0debd 100644 --- a/src/dft/backends/mklgpu/commit.cpp +++ b/src/dft/backends/mklgpu/commit.cpp @@ -211,7 +211,7 @@ class mklgpu_commit final : public dft::detail::commit_impl { // This is called by the workspace_helper, and is not part of the user API. virtual std::int64_t get_workspace_external_bytes_impl() override { - std::size_t workspaceSizeFwd = 0, workspaceSizeBwd; + std::size_t workspaceSizeFwd = 0, workspaceSizeBwd = 0; handle.first->get_value(dft::config_param::WORKSPACE_BYTES, &workspaceSizeFwd); handle.second->get_value(dft::config_param::WORKSPACE_BYTES, &workspaceSizeBwd); return static_cast(std::max(workspaceSizeFwd, workspaceSizeFwd)); From 13e283854101e7159c0fa9c9c0652a4d4f509dc2 Mon Sep 17 00:00:00 2001 From: Hugh Bird Date: Tue, 18 Jun 2024 20:05:33 +0100 Subject: [PATCH 3/3] Support 2024.1 and later only; Fix exception handling --- src/dft/backends/mklgpu/commit.cpp | 52 ++++++++++++++++-------------- 1 file changed, 28 insertions(+), 24 deletions(-) diff --git a/src/dft/backends/mklgpu/commit.cpp b/src/dft/backends/mklgpu/commit.cpp index 942a0debd..10379a98a 100644 --- a/src/dft/backends/mklgpu/commit.cpp +++ b/src/dft/backends/mklgpu/commit.cpp @@ -39,9 +39,9 @@ // MKL 2024.1 deprecates input/output strides. #include "mkl_version.h" -namespace oneapi::mkl::dft::mklgpu::detail { -constexpr bool mklgpu_use_forward_backward_strides_api = INTEL_MKL_VERSION >= 20240001; -} +#if INTEL_MKL_VERSION < 20240001 +#error MKLGPU requires oneMKL 2024.1 or later +#endif /** Note that in this file, the Intel oneMKL closed-source library's interface mirrors the interface @@ -89,22 +89,30 @@ class mklgpu_commit final : public dft::detail::commit_impl { oneapi::mkl::dft::detail::external_workspace_helper( config_values.workspace_placement == oneapi::mkl::dft::detail::config_value::WORKSPACE_EXTERNAL); + + // A separate descriptor for each direction may not be required. + bool one_descriptor = config_values.input_strides == config_values.output_strides; + bool forward_good = true; + // Make sure that second is always pointing to something new if this is a recommit. + handle.second = handle.first; + // Generate forward DFT descriptor. set_value(*handle.first, config_values, true); try { handle.first->commit(this->get_queue()); } catch (const std::exception& mkl_exception) { - // Catching the real Intel oneMKL exception causes headaches with naming. - throw mkl::exception("dft/backends/mklgpu", "commit", mkl_exception.what()); + // Catching the real Intel oneMKL exception causes headaches with naming + forward_good = false; + if (one_descriptor) { + throw mkl::exception("dft/backends/mklgpu" + "commit", + mkl_exception.what()); + } } // Generate backward DFT descriptor only if required. - if (config_values.input_strides == config_values.output_strides) { - // Required if second != first before a recommit. - handle.second = handle.first; - } - else { + if (!one_descriptor) { handle.second = std::make_shared(config_values.dimensions); set_value(*handle.second, config_values, false); try { @@ -112,7 +120,11 @@ class mklgpu_commit final : public dft::detail::commit_impl { } catch (const std::exception& mkl_exception) { // Catching the real Intel oneMKL exception causes headaches with naming. - throw mkl::exception("dft/backends/mklgpu", "commit", mkl_exception.what()); + if (!forward_good) { + throw mkl::exception("dft/backends/mklgpu" + "commit", + mkl_exception.what()); + } } } } @@ -173,21 +185,13 @@ class mklgpu_commit final : public dft::detail::commit_impl { throw mkl::unimplemented("dft/backends/mklgpu", "commit", "MKLGPU does not support nonzero offsets."); } - if constexpr (mklgpu_use_forward_backward_strides_api) { - // Support for Intel oneMKL 2024.1 or newer using FWD/BWD stride API. - if (assume_fwd_dft) { - desc.set_value(backend_param::FWD_STRIDES, config.input_strides.data()); - desc.set_value(backend_param::BWD_STRIDES, config.output_strides.data()); - } - else { - desc.set_value(backend_param::FWD_STRIDES, config.output_strides.data()); - desc.set_value(backend_param::BWD_STRIDES, config.input_strides.data()); - } + if (assume_fwd_dft) { + desc.set_value(backend_param::FWD_STRIDES, config.input_strides.data()); + desc.set_value(backend_param::BWD_STRIDES, config.output_strides.data()); } else { - // Support for Intel oneMKL older than 2024.1 - desc.set_value(backend_param::INPUT_STRIDES, config.input_strides.data()); - desc.set_value(backend_param::OUTPUT_STRIDES, config.output_strides.data()); + desc.set_value(backend_param::FWD_STRIDES, config.output_strides.data()); + desc.set_value(backend_param::BWD_STRIDES, config.input_strides.data()); } desc.set_value(backend_param::FWD_DISTANCE, config.fwd_dist); desc.set_value(backend_param::BWD_DISTANCE, config.bwd_dist);