From 9d6e5d340de2c1729820102b93c98db0d926bc70 Mon Sep 17 00:00:00 2001 From: nscipione Date: Tue, 18 Jun 2024 10:08:31 +0100 Subject: [PATCH 1/9] Update dft examples Make CMakeLists configuration for compile-time dft example similar to other domains. Add mklcpu+cufft example. --- .../compile_time_dispatching/CMakeLists.txt | 35 ++-- .../complex_fwd_usm_mklcpu_cufft.cpp | 176 ++++++++++++++++++ .../dft/run_time_dispatching/CMakeLists.txt | 5 +- 3 files changed, 198 insertions(+), 18 deletions(-) create mode 100644 examples/dft/compile_time_dispatching/complex_fwd_usm_mklcpu_cufft.cpp diff --git a/examples/dft/compile_time_dispatching/CMakeLists.txt b/examples/dft/compile_time_dispatching/CMakeLists.txt index 0cddd8f5f..9b90ab388 100644 --- a/examples/dft/compile_time_dispatching/CMakeLists.txt +++ b/examples/dft/compile_time_dispatching/CMakeLists.txt @@ -18,31 +18,32 @@ #=============================================================================== #Build object from all sources -set(DFTI_CT_BACKENDS "") - -if(ENABLE_MKLGPU_BACKEND) - list(APPEND DFTI_CT_BACKENDS "mklgpu") -endif() - -if(ENABLE_MKLCPU_BACKEND) - list(APPEND DFTI_CT_BACKENDS "mklcpu") +set(DFTI_CT_SOURCES "") +if (ENABLE_MKLCPU_BACKEND AND ENABLE_CUFFT_BACKEND) + list(APPEND DFTI_CT_SOURCES "complex_fwd_usm_mklcpu_cufft") endif() -include(WarningsUtils) - -foreach(dfti_backend ${DFTI_CT_BACKENDS}) - set(EXAMPLE_NAME example_dft_complex_fwd_buffer_${dfti_backend}) - add_executable(${EXAMPLE_NAME} complex_fwd_buffer_${dfti_backend}.cpp) +foreach(dfti_ct_source ${DFTI_CT_SOURCES}) + set(EXAMPLE_NAME example_${domain}_${dfti_ct_source}) + add_executable(${EXAMPLE_NAME} ${dfti_ct_source}.cpp) target_include_directories(${EXAMPLE_NAME} PUBLIC ${PROJECT_SOURCE_DIR}/examples/include PUBLIC ${PROJECT_SOURCE_DIR}/include PUBLIC ${CMAKE_BINARY_DIR}/bin ) - add_dependencies(${EXAMPLE_NAME} onemkl_dft_${dfti_backend}) - target_link_libraries(${EXAMPLE_NAME} PRIVATE ONEMKL::SYCL::SYCL onemkl_dft_${dfti_backend} onemkl_warnings) +if(domain STREQUAL "dft" AND ENABLE_MKLCPU_BACKEND AND ENABLE_CUFFT_BACKEND) + add_dependencies(${EXAMPLE_NAME} onemkl_${domain}_mklcpu onemkl_${domain}_cufft) + list(APPEND ONEMKL_LIBRARIES_${domain} onemkl_${domain}_mklcpu onemkl_${domain}_cufft) +endif() + +target_link_libraries(${EXAMPLE_NAME} PUBLIC + ${ONEMKL_LIBRARIES_${domain}} + ONEMKL::SYCL::SYCL + ) # Register example as ctest - add_test(NAME dft/EXAMPLE/CT/complex_fwd_buffer_${dfti_backend} COMMAND ${EXAMPLE_NAME}) -endforeach(dfti_backend) + add_test(NAME dft/EXAMPLE/CT/${dfti_ct_source} COMMAND ${EXAMPLE_NAME}) + +endforeach(dfti_ct_source) diff --git a/examples/dft/compile_time_dispatching/complex_fwd_usm_mklcpu_cufft.cpp b/examples/dft/compile_time_dispatching/complex_fwd_usm_mklcpu_cufft.cpp new file mode 100644 index 000000000..7c7b281fe --- /dev/null +++ b/examples/dft/compile_time_dispatching/complex_fwd_usm_mklcpu_cufft.cpp @@ -0,0 +1,176 @@ +/******************************************************************************* +* Copyright 2023 Intel Corporation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions +* and limitations under the License. +* +* +* SPDX-License-Identifier: Apache-2.0 +*******************************************************************************/ + +// STL includes +#include + +// oneMKL/SYCL includes +#if __has_include() +#include +#else +#include +#endif +#include "oneapi/mkl.hpp" +#include + +void run_example(const sycl::device& cpu_device, const sycl::device& gpu_device) { + constexpr std::size_t N = 10; + + // Catch asynchronous exceptions for cpu + auto cpu_error_handler = [&](sycl::exception_list exceptions) { + for (auto const& e : exceptions) { + try { + std::rethrow_exception(e); + } + catch (sycl::exception const& e) { + // Handle not dft related exceptions that happened during asynchronous call + std::cerr << "Caught asynchronous SYCL exception on CPU device during execution:" + << std::endl; + std::cerr << "\t" << e.what() << std::endl; + } + } + std::exit(2); + }; + // Catch asynchronous exceptions for gpu + auto gpu_error_handler = [&](sycl::exception_list exceptions) { + for (auto const& e : exceptions) { + try { + std::rethrow_exception(e); + } + catch (sycl::exception const& e) { + // Handle not dft related exceptions that happened during asynchronous call + std::cerr << "Caught asynchronous SYCL exception on GPU device during execution:" + << std::endl; + std::cerr << "\t" << e.what() << std::endl; + } + } + std::exit(2); + }; + + // Preparation CPU device and GPU device + sycl::queue cpu_queue(cpu_device, cpu_error_handler); + sycl::queue gpu_queue(gpu_device, gpu_error_handler); + + // allocate on CPU device and GPU device + auto cpu_input_data = sycl::malloc_shared>(N, cpu_queue); + auto cpu_output_data = sycl::malloc_shared>(N, cpu_queue); + + auto gpu_input_data = sycl::malloc_shared>(N, gpu_queue); + auto gpu_output_data = sycl::malloc_shared>(N, gpu_queue); + + // Initialize input data + for (std::size_t i = 0; i < N; ++i) { + cpu_input_data[i] = { static_cast(i), static_cast(-i) }; + gpu_input_data[i] = { static_cast(i), static_cast(-i) }; + } + + // enabling + // 1. create descriptors + oneapi::mkl::dft::descriptor + desc(static_cast(N)); + + // 2. variadic set_value + desc.set_value(oneapi::mkl::dft::config_param::PLACEMENT, + oneapi::mkl::dft::config_value::NOT_INPLACE); + desc.set_value(oneapi::mkl::dft::config_param::NUMBER_OF_TRANSFORMS, + static_cast(1)); + + // 3a. commit_descriptor (compile_time MKLCPU) + desc.commit(oneapi::mkl::backend_selector{ cpu_queue }); + + // 4a. compute_forward / compute_backward (MKLCPU) + oneapi::mkl::dft::compute_forward, std::complex>( + desc, cpu_input_data, cpu_output_data); + + // 3b. commit_descriptor (compile_time cuFFT) + desc.commit(oneapi::mkl::backend_selector{ gpu_queue }); + + // 4b. compute_forward / compute_backward (cuFFT) + oneapi::mkl::dft::compute_forward, std::complex>( + desc, gpu_input_data, gpu_output_data); + + cpu_queue.wait_and_throw(); + gpu_queue.wait_and_throw(); + + sycl::free(cpu_input_data, cpu_queue); + sycl::free(gpu_input_data, gpu_queue); + sycl::free(cpu_output_data, cpu_queue); + sycl::free(gpu_output_data, gpu_queue); +} + +// +// Description of example setup, apis used and supported floating point type precisions +// +void print_example_banner() { + std::cout << "\n" + "########################################################################\n" + "# Complex out-of-place forward transform for Buffer API's example:\n" + "#\n" + "# Using APIs:\n" + "# Compile-time dispatch API\n" + "# USM forward complex out-of-place\n" + "#\n" + "# Using single precision (float) data type\n" + "#\n" + "# Running on both Intel CPU and NVIDIA GPU devices.\n" + "#\n" + "########################################################################\n" + << std::endl; +} + +// +// Main entry point for example. +// +int main(int /*argc*/, char** /*argv*/) { + print_example_banner(); + + try { + sycl::device cpu_device((sycl::cpu_selector_v)); + sycl::device gpu_device((sycl::gpu_selector_v)); + std::cout << "Running DFT Complex forward out-of-place buffer example" << std::endl; + std::cout << "Using compile-time dispatch API with MKLGPU." << std::endl; + std::cout << "Running with single precision real data type on:" << std::endl; + std::cout << "\tGPU device :" << gpu_device.get_info() + << std::endl; + + unsigned int vendor_id = gpu_device.get_info(); + if (vendor_id != NVIDIA_ID) { + std::cerr << "FAILED: NVIDIA GPU device not found" << std::endl; + return 1; + } + run_example(cpu_device, gpu_device); + std::cout << "DFT Complex USM example ran OK on MKLCPU and CUFFT" << std::endl; + } + catch (sycl::exception const& e) { + // Handle not dft related exceptions that happened during synchronous call + std::cerr << "Caught synchronous SYCL exception:" << std::endl; + std::cerr << "\t" << e.what() << std::endl; + std::cerr << "\tSYCL error code: " << e.code().value() << std::endl; + return 1; + } + catch (std::exception const& e) { + // Handle not SYCL related exceptions that happened during synchronous call + std::cerr << "Caught synchronous std::exception:" << std::endl; + std::cerr << "\t" << e.what() << std::endl; + return 1; + } + + return 0; +} diff --git a/examples/dft/run_time_dispatching/CMakeLists.txt b/examples/dft/run_time_dispatching/CMakeLists.txt index 6d9a8dd24..e221c7950 100644 --- a/examples/dft/run_time_dispatching/CMakeLists.txt +++ b/examples/dft/run_time_dispatching/CMakeLists.txt @@ -27,13 +27,16 @@ set(DFT_RT_SOURCES "") # If users build more than one backend (i.e. mklcpu and mklgpu, or mklcpu and CUDA), they may need to # overwrite ONEAPI_DEVICE_SELECTOR in their environment to run on the desired backend set(DEVICE_FILTERS "") -if(ENABLE_MKLGPU_BACKEND OR ENABLE_CUFFT_BACKEND OR ENABLE_ROCFFT_BACKEND OR ENABLE_PORTFFT_BACKEND) +if(ENABLE_MKLGPU_BACKEND OR ENABLE_MKLCPU_BACKEND OR ENABLE_CUFFT_BACKEND OR ENABLE_ROCFFT_BACKEND OR ENABLE_PORTFFT_BACKEND) list(APPEND DFT_RT_SOURCES "real_fwd_usm") endif() if(ENABLE_MKLGPU_BACKEND) list(APPEND DEVICE_FILTERS "level_zero:gpu") endif() +if(ENABLE_MKLCPU_BACKEND) + list(APPEND DEVICE_FILTERS "opencl:cpu") +endif() if(ENABLE_PORTFFT_BACKEND) list(APPEND DEVICE_FILTERS "*:gpu") endif() From b2c6196f003b3d33bd7219a049d33cfa4dff6bc4 Mon Sep 17 00:00:00 2001 From: nscipione Date: Tue, 18 Jun 2024 10:12:12 +0100 Subject: [PATCH 2/9] Remove old examples --- .../complex_fwd_buffer_mklcpu.cpp | 132 ------------------ .../complex_fwd_buffer_mklgpu.cpp | 132 ------------------ 2 files changed, 264 deletions(-) delete mode 100644 examples/dft/compile_time_dispatching/complex_fwd_buffer_mklcpu.cpp delete mode 100644 examples/dft/compile_time_dispatching/complex_fwd_buffer_mklgpu.cpp diff --git a/examples/dft/compile_time_dispatching/complex_fwd_buffer_mklcpu.cpp b/examples/dft/compile_time_dispatching/complex_fwd_buffer_mklcpu.cpp deleted file mode 100644 index cb6e85ffa..000000000 --- a/examples/dft/compile_time_dispatching/complex_fwd_buffer_mklcpu.cpp +++ /dev/null @@ -1,132 +0,0 @@ -/******************************************************************************* -* Copyright 2023 Intel Corporation -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, -* software distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions -* and limitations under the License. -* -* -* SPDX-License-Identifier: Apache-2.0 -*******************************************************************************/ - -// STL includes -#include - -// oneMKL/SYCL includes -#if __has_include() -#include -#else -#include -#endif -#include "oneapi/mkl.hpp" - -void run_example(const sycl::device& cpu_device) { - constexpr int N = 10; - - // Catch asynchronous exceptions for cpu - auto cpu_error_handler = [&](sycl::exception_list exceptions) { - for (auto const& e : exceptions) { - try { - std::rethrow_exception(e); - } - catch (sycl::exception const& e) { - // Handle not dft related exceptions that happened during asynchronous call - std::cerr << "Caught asynchronous SYCL exception:" << std::endl; - std::cerr << "\t" << e.what() << std::endl; - } - } - std::exit(2); - }; - - sycl::queue cpu_queue(cpu_device, cpu_error_handler); - - std::vector> input_data(N); - std::vector> output_data(N); - - // enabling - // 1. create descriptors - oneapi::mkl::dft::descriptor - desc(N); - - // 2. variadic set_value - desc.set_value(oneapi::mkl::dft::config_param::PLACEMENT, - oneapi::mkl::dft::config_value::NOT_INPLACE); - desc.set_value(oneapi::mkl::dft::config_param::NUMBER_OF_TRANSFORMS, - static_cast(1)); - - // 3. commit_descriptor (compile_time MKLCPU) - desc.commit(oneapi::mkl::backend_selector{ cpu_queue }); - - // 4. compute_forward / compute_backward (MKLCPU) - { - sycl::buffer> input_buffer(input_data.data(), sycl::range<1>(N)); - sycl::buffer> output_buffer(output_data.data(), sycl::range<1>(N)); - oneapi::mkl::dft::compute_forward, - std::complex>(desc, input_buffer, output_buffer); - } -} - -// -// Description of example setup, apis used and supported floating point type precisions -// -void print_example_banner() { - std::cout << "\n" - "########################################################################\n" - "# Complex out-of-place forward transform for Buffer API's example:\n" - "#\n" - "# Using APIs:\n" - "# Compile-time dispatch API\n" - "# Buffer forward complex out-of-place\n" - "#\n" - "# Using double precision (double) data type\n" - "#\n" - "# For Intel CPU with Intel MKLCPU backend.\n" - "#\n" - "# The environment variable ONEAPI_DEVICE_SELECTOR can be used to specify\n" - "# available devices\n" - "########################################################################\n" - << std::endl; -} - -// -// Main entry point for example. -// -int main() { - print_example_banner(); - - try { - sycl::device cpu_device((sycl::cpu_selector_v)); - std::cout << "Running DFT Complex forward out-of-place buffer example" << std::endl; - std::cout << "Using compile-time dispatch API with MKLCPU." << std::endl; - std::cout << "Running with double precision real data type on:" << std::endl; - std::cout << "\tCPU device :" << cpu_device.get_info() - << std::endl; - - run_example(cpu_device); - std::cout << "DFT Complex USM example ran OK on MKLCPU" << std::endl; - } - catch (sycl::exception const& e) { - // Handle not dft related exceptions that happened during synchronous call - std::cerr << "Caught synchronous SYCL exception:" << std::endl; - std::cerr << "\t" << e.what() << std::endl; - std::cerr << "\tSYCL error code: " << e.code().value() << std::endl; - return 1; - } - catch (std::exception const& e) { - // Handle not SYCL related exceptions that happened during synchronous call - std::cerr << "Caught synchronous std::exception:" << std::endl; - std::cerr << "\t" << e.what() << std::endl; - return 1; - } - - return 0; -} diff --git a/examples/dft/compile_time_dispatching/complex_fwd_buffer_mklgpu.cpp b/examples/dft/compile_time_dispatching/complex_fwd_buffer_mklgpu.cpp deleted file mode 100644 index 4c243569b..000000000 --- a/examples/dft/compile_time_dispatching/complex_fwd_buffer_mklgpu.cpp +++ /dev/null @@ -1,132 +0,0 @@ -/******************************************************************************* -* Copyright 2023 Intel Corporation -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, -* software distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions -* and limitations under the License. -* -* -* SPDX-License-Identifier: Apache-2.0 -*******************************************************************************/ - -// STL includes -#include - -// oneMKL/SYCL includes -#if __has_include() -#include -#else -#include -#endif -#include "oneapi/mkl.hpp" - -void run_example(const sycl::device& gpu_device) { - constexpr std::size_t N = 10; - - // Catch asynchronous exceptions for gpu - auto gpu_error_handler = [&](sycl::exception_list exceptions) { - for (auto const& e : exceptions) { - try { - std::rethrow_exception(e); - } - catch (sycl::exception const& e) { - // Handle not dft related exceptions that happened during asynchronous call - std::cerr << "Caught asynchronous SYCL exception:" << std::endl; - std::cerr << "\t" << e.what() << std::endl; - } - } - std::exit(2); - }; - - sycl::queue gpu_queue(gpu_device, gpu_error_handler); - - std::vector> input_data(N); - std::vector> output_data(N); - - // enabling - // 1. create descriptors - oneapi::mkl::dft::descriptor - desc(static_cast(N)); - - // 2. variadic set_value - desc.set_value(oneapi::mkl::dft::config_param::PLACEMENT, - oneapi::mkl::dft::config_value::NOT_INPLACE); - desc.set_value(oneapi::mkl::dft::config_param::NUMBER_OF_TRANSFORMS, - static_cast(1)); - - // 3. commit_descriptor (compile_time MKLGPU) - desc.commit(oneapi::mkl::backend_selector{ gpu_queue }); - - // 4. compute_forward / compute_backward (MKLGPU) - { - sycl::buffer> input_buffer(input_data.data(), sycl::range<1>(N)); - sycl::buffer> output_buffer(output_data.data(), sycl::range<1>(N)); - oneapi::mkl::dft::compute_forward, std::complex>( - desc, input_buffer, output_buffer); - } -} - -// -// Description of example setup, apis used and supported floating point type precisions -// -void print_example_banner() { - std::cout << "\n" - "########################################################################\n" - "# Complex out-of-place forward transform for Buffer API's example:\n" - "#\n" - "# Using APIs:\n" - "# Compile-time dispatch API\n" - "# Buffer forward complex out-of-place\n" - "#\n" - "# Using single precision (float) data type\n" - "#\n" - "# For Intel GPU with Intel MKLGPU backend.\n" - "#\n" - "# The environment variable ONEAPI_DEVICE_SELECTOR can be used to specify\n" - "# available devices\n" - "########################################################################\n" - << std::endl; -} - -// -// Main entry point for example. -// -int main(int /*argc*/, char** /*argv*/) { - print_example_banner(); - - try { - sycl::device gpu_device((sycl::gpu_selector_v)); - std::cout << "Running DFT Complex forward out-of-place buffer example" << std::endl; - std::cout << "Using compile-time dispatch API with MKLGPU." << std::endl; - std::cout << "Running with single precision real data type on:" << std::endl; - std::cout << "\tGPU device :" << gpu_device.get_info() - << std::endl; - - run_example(gpu_device); - std::cout << "DFT Complex USM example ran OK on MKLGPU" << std::endl; - } - catch (sycl::exception const& e) { - // Handle not dft related exceptions that happened during synchronous call - std::cerr << "Caught synchronous SYCL exception:" << std::endl; - std::cerr << "\t" << e.what() << std::endl; - std::cerr << "\tSYCL error code: " << e.code().value() << std::endl; - return 1; - } - catch (std::exception const& e) { - // Handle not SYCL related exceptions that happened during synchronous call - std::cerr << "Caught synchronous std::exception:" << std::endl; - std::cerr << "\t" << e.what() << std::endl; - return 1; - } - - return 0; -} From 5a049c7572284cd8262f176b59f6514432e448ef Mon Sep 17 00:00:00 2001 From: nscipione Date: Tue, 18 Jun 2024 10:13:23 +0100 Subject: [PATCH 3/9] Update Readme and copyright for new example Update example README and fixing copyright year of new dft example mklcpu+cufft. --- examples/README.md | 3 +-- .../compile_time_dispatching/complex_fwd_usm_mklcpu_cufft.cpp | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/examples/README.md b/examples/README.md index 9904a78f2..0dad8772d 100644 --- a/examples/README.md +++ b/examples/README.md @@ -3,7 +3,7 @@ oneAPI Math Kernel Library (oneMKL) Interfaces offers examples with the followin - blas: level3/gemm_usm - rng: uniform_usm - lapack: getrs_usm -- dft: complex_fwd_buffer, real_fwd_usm +- dft: complex_fwd_usm, real_fwd_usm - sparse_blas: sparse_gemv_usm Each routine has one run-time dispatching example and one compile-time dispatching example (which uses both mklcpu and cuda backends), located in `example/<$domain>/run_time_dispatching` and `example/<$domain>/compile_time_dispatching` subfolders, respectively. @@ -11,7 +11,6 @@ Each routine has one run-time dispatching example and one compile-time dispatchi To build examples, use cmake build option `-DBUILD_EXAMPLES=true`. Compile_time_dispatching will be built if `-DBUILD_EXAMPLES=true` and cuda backend is enabled, because the compile-time dispatching example runs on both mklcpu and cuda backends. Run_time_dispatching will be built if `-DBUILD_EXAMPLES=true` and `-DBUILD_SHARED_LIBS=true`. -All DFT examples require the mklgpu backend to be enabled. The example executable naming convention follows `example_<$domain>_<$routine>_<$backend>` for compile-time dispatching examples or `example_<$domain>_<$routine>` for run-time dispatching examples. diff --git a/examples/dft/compile_time_dispatching/complex_fwd_usm_mklcpu_cufft.cpp b/examples/dft/compile_time_dispatching/complex_fwd_usm_mklcpu_cufft.cpp index 7c7b281fe..431034b61 100644 --- a/examples/dft/compile_time_dispatching/complex_fwd_usm_mklcpu_cufft.cpp +++ b/examples/dft/compile_time_dispatching/complex_fwd_usm_mklcpu_cufft.cpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2023 Intel Corporation +* Copyright 2024 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. From dd00e601bece1b8b2b78cd7498799af5c795da4d Mon Sep 17 00:00:00 2001 From: nscipione Date: Tue, 18 Jun 2024 10:26:22 +0100 Subject: [PATCH 4/9] Update example log message --- .../complex_fwd_usm_mklcpu_cufft.cpp | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/examples/dft/compile_time_dispatching/complex_fwd_usm_mklcpu_cufft.cpp b/examples/dft/compile_time_dispatching/complex_fwd_usm_mklcpu_cufft.cpp index 431034b61..467f5a608 100644 --- a/examples/dft/compile_time_dispatching/complex_fwd_usm_mklcpu_cufft.cpp +++ b/examples/dft/compile_time_dispatching/complex_fwd_usm_mklcpu_cufft.cpp @@ -121,7 +121,7 @@ void run_example(const sycl::device& cpu_device, const sycl::device& gpu_device) void print_example_banner() { std::cout << "\n" "########################################################################\n" - "# Complex out-of-place forward transform for Buffer API's example:\n" + "# Complex out-of-place forward transform for USM API's example:\n" "#\n" "# Using APIs:\n" "# Compile-time dispatch API\n" @@ -144,17 +144,20 @@ int main(int /*argc*/, char** /*argv*/) { try { sycl::device cpu_device((sycl::cpu_selector_v)); sycl::device gpu_device((sycl::gpu_selector_v)); - std::cout << "Running DFT Complex forward out-of-place buffer example" << std::endl; - std::cout << "Using compile-time dispatch API with MKLGPU." << std::endl; - std::cout << "Running with single precision real data type on:" << std::endl; - std::cout << "\tGPU device :" << gpu_device.get_info() - << std::endl; unsigned int vendor_id = gpu_device.get_info(); if (vendor_id != NVIDIA_ID) { std::cerr << "FAILED: NVIDIA GPU device not found" << std::endl; return 1; } + + std::cout << "Running DFT Complex forward out-of-place usm example" << std::endl; + std::cout << "Using compile-time dispatch API with MKLCPU and cuFFT." << std::endl; + std::cout << "Running with single precision real data type on:" << std::endl; + std::cout << "\tCPU device: " << cpu_device.get_info() << std::endl; + std::cout << "\tGPU device :" << gpu_device.get_info() + << std::endl; + run_example(cpu_device, gpu_device); std::cout << "DFT Complex USM example ran OK on MKLCPU and CUFFT" << std::endl; } From 57c6d5fbfbb3ff1eab2d8620330329fa12d325fa Mon Sep 17 00:00:00 2001 From: nscipione Date: Tue, 18 Jun 2024 17:17:58 +0100 Subject: [PATCH 5/9] Formatting --- .../compile_time_dispatching/complex_fwd_usm_mklcpu_cufft.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/examples/dft/compile_time_dispatching/complex_fwd_usm_mklcpu_cufft.cpp b/examples/dft/compile_time_dispatching/complex_fwd_usm_mklcpu_cufft.cpp index 467f5a608..59c810f3f 100644 --- a/examples/dft/compile_time_dispatching/complex_fwd_usm_mklcpu_cufft.cpp +++ b/examples/dft/compile_time_dispatching/complex_fwd_usm_mklcpu_cufft.cpp @@ -154,7 +154,8 @@ int main(int /*argc*/, char** /*argv*/) { std::cout << "Running DFT Complex forward out-of-place usm example" << std::endl; std::cout << "Using compile-time dispatch API with MKLCPU and cuFFT." << std::endl; std::cout << "Running with single precision real data type on:" << std::endl; - std::cout << "\tCPU device: " << cpu_device.get_info() << std::endl; + std::cout << "\tCPU device: " << cpu_device.get_info() + << std::endl; std::cout << "\tGPU device :" << gpu_device.get_info() << std::endl; From a29940027f79e91e26ae62c6478c96c91925bdfc Mon Sep 17 00:00:00 2001 From: nscipione Date: Tue, 18 Jun 2024 18:37:34 +0100 Subject: [PATCH 6/9] Rename variable --- .../dft/compile_time_dispatching/CMakeLists.txt | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/examples/dft/compile_time_dispatching/CMakeLists.txt b/examples/dft/compile_time_dispatching/CMakeLists.txt index 9b90ab388..664373ae0 100644 --- a/examples/dft/compile_time_dispatching/CMakeLists.txt +++ b/examples/dft/compile_time_dispatching/CMakeLists.txt @@ -18,14 +18,14 @@ #=============================================================================== #Build object from all sources -set(DFTI_CT_SOURCES "") +set(DFT_CT_SOURCES "") if (ENABLE_MKLCPU_BACKEND AND ENABLE_CUFFT_BACKEND) - list(APPEND DFTI_CT_SOURCES "complex_fwd_usm_mklcpu_cufft") + list(APPEND DFT_CT_SOURCES "complex_fwd_usm_mklcpu_cufft") endif() -foreach(dfti_ct_source ${DFTI_CT_SOURCES}) - set(EXAMPLE_NAME example_${domain}_${dfti_ct_source}) - add_executable(${EXAMPLE_NAME} ${dfti_ct_source}.cpp) +foreach(dft_ct_source ${DFT_CT_SOURCES}) + set(EXAMPLE_NAME example_${domain}_${dft_ct_source}) + add_executable(${EXAMPLE_NAME} ${dft_ct_source}.cpp) target_include_directories(${EXAMPLE_NAME} PUBLIC ${PROJECT_SOURCE_DIR}/examples/include PUBLIC ${PROJECT_SOURCE_DIR}/include @@ -43,7 +43,7 @@ target_link_libraries(${EXAMPLE_NAME} PUBLIC ) # Register example as ctest - add_test(NAME dft/EXAMPLE/CT/${dfti_ct_source} COMMAND ${EXAMPLE_NAME}) + add_test(NAME dft/EXAMPLE/CT/${dft_ct_source} COMMAND ${EXAMPLE_NAME}) -endforeach(dfti_ct_source) +endforeach(dft_ct_source) From 29fb08c6aae3b7420202c0a644547274779d0a47 Mon Sep 17 00:00:00 2001 From: nscipione Date: Wed, 19 Jun 2024 08:50:19 +0100 Subject: [PATCH 7/9] Update README Update example output for compile-time and run-time example --- examples/README.md | 51 ++++++++++++++++++++++++++++++++++------------ 1 file changed, 38 insertions(+), 13 deletions(-) diff --git a/examples/README.md b/examples/README.md index 0dad8772d..7e0596e3b 100644 --- a/examples/README.md +++ b/examples/README.md @@ -353,34 +353,33 @@ Random number generator example with uniform distribution ran OK on MKLCPU and C ## dft -Compile-time dispatching example with MKLGPU backend +Compile-time dispatching example with both mklcpu and cufft backend ```none -$ ONEAPI_DEVICE_SELECTOR="level_zero:gpu" ./bin/example_dft_complex_fwd_buffer_mklgpu +$./bin/example_dft_complex_fwd_usm_mklcpu_cufft ######################################################################## -# Complex out-of-place forward transform for Buffer API's example: +# Complex out-of-place forward transform for USM API's example: # # Using APIs: # Compile-time dispatch API -# Buffer forward complex out-of-place +# USM forward complex out-of-place # # Using single precision (float) data type # -# For Intel GPU with Intel MKLGPU backend. +# Running on both Intel CPU and NVIDIA GPU devices. # -# The environment variable ONEAPI_DEVICE_SELECTOR can be used to specify -# available devices ######################################################################## -Running DFT Complex forward out-of-place buffer example -Using compile-time dispatch API with MKLGPU. +Running DFT Complex forward out-of-place usm example +Using compile-time dispatch API with MKLCPU and cuFFT. Running with single precision real data type on: - GPU device :Intel(R) UHD Graphics 750 [0x4c8a] -DFT Complex USM example ran OK on MKLGPU + CPU device: 12th Gen Intel(R) Core(TM) i9-12900K + GPU device :NVIDIA RTX A4000 +DFT Complex USM example ran OK on MKLCPU and CUFFT ``` -Runtime dispatching example with MKLGPU, cuFFT, rocFFT and portFFT backends: +Runtime dispatching example with MKLGPU, MKLCPU, cuFFT, rocFFT and portFFT backends: ```none $ ONEAPI_DEVICE_SELECTOR="level_zero:gpu" ./bin/example_dft_real_fwd_usm @@ -408,7 +407,33 @@ DFT example ran OK ``` ```none -$ ONEAPI_DEVICE_SELECTOR="level_zero:gpu" ./bin/example_dft_real_fwd_usm +$ ONEAPI_DEVICE_SELECTOR="opencl:cpu" ./bin/example_dft_real_fwd_usm + +######################################################################## +# DFT complex in-place forward transform with USM API example: +# +# Using APIs: +# USM forward complex in-place +# Run-time dispatch +# +# Using single precision (float) data type +# +# Device will be selected during runtime. +# The environment variable ONEAPI_DEVICE_SELECTOR can be used to specify +# available devices +# +######################################################################## + +Running DFT complex forward example on CPU device +Device name is: 12th Gen Intel(R) Core(TM) i9-12900K +Running with single precision real data type: +DFT example run_time dispatch +DFT example ran OK + +``` + +```none +$ ONEAPI_DEVICE_SELECTOR="cuda:gpu" ./bin/example_dft_real_fwd_usm ######################################################################## # DFT complex in-place forward transform with USM API example: From cb9bec15d00d820ddff3ffc9b009123e7014c83e Mon Sep 17 00:00:00 2001 From: nscipione Date: Fri, 21 Jun 2024 07:48:34 +0100 Subject: [PATCH 8/9] Revert "Update README" This reverts commit 5015d2bed4d18147fa6e6e1c55ea7c53966d1c0e. --- examples/README.md | 51 ++++++++++++---------------------------------- 1 file changed, 13 insertions(+), 38 deletions(-) diff --git a/examples/README.md b/examples/README.md index 7e0596e3b..0dad8772d 100644 --- a/examples/README.md +++ b/examples/README.md @@ -353,33 +353,34 @@ Random number generator example with uniform distribution ran OK on MKLCPU and C ## dft -Compile-time dispatching example with both mklcpu and cufft backend +Compile-time dispatching example with MKLGPU backend ```none -$./bin/example_dft_complex_fwd_usm_mklcpu_cufft +$ ONEAPI_DEVICE_SELECTOR="level_zero:gpu" ./bin/example_dft_complex_fwd_buffer_mklgpu ######################################################################## -# Complex out-of-place forward transform for USM API's example: +# Complex out-of-place forward transform for Buffer API's example: # # Using APIs: # Compile-time dispatch API -# USM forward complex out-of-place +# Buffer forward complex out-of-place # # Using single precision (float) data type # -# Running on both Intel CPU and NVIDIA GPU devices. +# For Intel GPU with Intel MKLGPU backend. # +# The environment variable ONEAPI_DEVICE_SELECTOR can be used to specify +# available devices ######################################################################## -Running DFT Complex forward out-of-place usm example -Using compile-time dispatch API with MKLCPU and cuFFT. +Running DFT Complex forward out-of-place buffer example +Using compile-time dispatch API with MKLGPU. Running with single precision real data type on: - CPU device: 12th Gen Intel(R) Core(TM) i9-12900K - GPU device :NVIDIA RTX A4000 -DFT Complex USM example ran OK on MKLCPU and CUFFT + GPU device :Intel(R) UHD Graphics 750 [0x4c8a] +DFT Complex USM example ran OK on MKLGPU ``` -Runtime dispatching example with MKLGPU, MKLCPU, cuFFT, rocFFT and portFFT backends: +Runtime dispatching example with MKLGPU, cuFFT, rocFFT and portFFT backends: ```none $ ONEAPI_DEVICE_SELECTOR="level_zero:gpu" ./bin/example_dft_real_fwd_usm @@ -407,33 +408,7 @@ DFT example ran OK ``` ```none -$ ONEAPI_DEVICE_SELECTOR="opencl:cpu" ./bin/example_dft_real_fwd_usm - -######################################################################## -# DFT complex in-place forward transform with USM API example: -# -# Using APIs: -# USM forward complex in-place -# Run-time dispatch -# -# Using single precision (float) data type -# -# Device will be selected during runtime. -# The environment variable ONEAPI_DEVICE_SELECTOR can be used to specify -# available devices -# -######################################################################## - -Running DFT complex forward example on CPU device -Device name is: 12th Gen Intel(R) Core(TM) i9-12900K -Running with single precision real data type: -DFT example run_time dispatch -DFT example ran OK - -``` - -```none -$ ONEAPI_DEVICE_SELECTOR="cuda:gpu" ./bin/example_dft_real_fwd_usm +$ ONEAPI_DEVICE_SELECTOR="level_zero:gpu" ./bin/example_dft_real_fwd_usm ######################################################################## # DFT complex in-place forward transform with USM API example: From afb8a79468d0e526b5b439447a98bc37b719dc80 Mon Sep 17 00:00:00 2001 From: nscipione Date: Fri, 21 Jun 2024 07:52:13 +0100 Subject: [PATCH 9/9] Adding back mkl warnings in compile-time example CMakeLists --- examples/dft/compile_time_dispatching/CMakeLists.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/examples/dft/compile_time_dispatching/CMakeLists.txt b/examples/dft/compile_time_dispatching/CMakeLists.txt index 664373ae0..704964af7 100644 --- a/examples/dft/compile_time_dispatching/CMakeLists.txt +++ b/examples/dft/compile_time_dispatching/CMakeLists.txt @@ -23,6 +23,8 @@ if (ENABLE_MKLCPU_BACKEND AND ENABLE_CUFFT_BACKEND) list(APPEND DFT_CT_SOURCES "complex_fwd_usm_mklcpu_cufft") endif() +include(WarningsUtils) + foreach(dft_ct_source ${DFT_CT_SOURCES}) set(EXAMPLE_NAME example_${domain}_${dft_ct_source}) add_executable(${EXAMPLE_NAME} ${dft_ct_source}.cpp) @@ -40,6 +42,7 @@ endif() target_link_libraries(${EXAMPLE_NAME} PUBLIC ${ONEMKL_LIBRARIES_${domain}} ONEMKL::SYCL::SYCL + onemkl_warnings ) # Register example as ctest