Skip to content

Commit

Permalink
[DFT][Examples] Make the oneMKL DFT example consistent with the other…
Browse files Browse the repository at this point in the history
… domains (#518)
  • Loading branch information
s-Nick committed Jun 28, 2024
1 parent a319ba0 commit f322f72
Show file tree
Hide file tree
Showing 5 changed files with 95 additions and 173 deletions.
3 changes: 1 addition & 2 deletions examples/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,14 @@ oneAPI Math Kernel Library (oneMKL) Interfaces offers examples with the followin
- blas: level3/gemm_usm
- rng: uniform_usm
- lapack: getrs_usm
- dft: complex_fwd_buffer, real_fwd_usm
- dft: complex_fwd_usm, real_fwd_usm
- sparse_blas: sparse_gemv_usm

Each routine has one run-time dispatching example and one compile-time dispatching example (which uses both mklcpu and cuda backends), located in `example/<$domain>/run_time_dispatching` and `example/<$domain>/compile_time_dispatching` subfolders, respectively.

To build examples, use cmake build option `-DBUILD_EXAMPLES=true`.
Compile_time_dispatching will be built if `-DBUILD_EXAMPLES=true` and cuda backend is enabled, because the compile-time dispatching example runs on both mklcpu and cuda backends.
Run_time_dispatching will be built if `-DBUILD_EXAMPLES=true` and `-DBUILD_SHARED_LIBS=true`.
All DFT examples require the mklgpu backend to be enabled.

The example executable naming convention follows `example_<$domain>_<$routine>_<$backend>` for compile-time dispatching examples
or `example_<$domain>_<$routine>` for run-time dispatching examples.
Expand Down
34 changes: 19 additions & 15 deletions examples/dft/compile_time_dispatching/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,31 +18,35 @@
#===============================================================================

#Build object from all sources
set(DFTI_CT_BACKENDS "")

if(ENABLE_MKLGPU_BACKEND)
list(APPEND DFTI_CT_BACKENDS "mklgpu")
endif()

if(ENABLE_MKLCPU_BACKEND)
list(APPEND DFTI_CT_BACKENDS "mklcpu")
set(DFT_CT_SOURCES "")
if (ENABLE_MKLCPU_BACKEND AND ENABLE_CUFFT_BACKEND)
list(APPEND DFT_CT_SOURCES "complex_fwd_usm_mklcpu_cufft")
endif()

include(WarningsUtils)

foreach(dfti_backend ${DFTI_CT_BACKENDS})
set(EXAMPLE_NAME example_dft_complex_fwd_buffer_${dfti_backend})
add_executable(${EXAMPLE_NAME} complex_fwd_buffer_${dfti_backend}.cpp)
foreach(dft_ct_source ${DFT_CT_SOURCES})
set(EXAMPLE_NAME example_${domain}_${dft_ct_source})
add_executable(${EXAMPLE_NAME} ${dft_ct_source}.cpp)
target_include_directories(${EXAMPLE_NAME}
PUBLIC ${PROJECT_SOURCE_DIR}/examples/include
PUBLIC ${PROJECT_SOURCE_DIR}/include
PUBLIC ${CMAKE_BINARY_DIR}/bin
)

add_dependencies(${EXAMPLE_NAME} onemkl_dft_${dfti_backend})
target_link_libraries(${EXAMPLE_NAME} PRIVATE ONEMKL::SYCL::SYCL onemkl_dft_${dfti_backend} onemkl_warnings)
if(domain STREQUAL "dft" AND ENABLE_MKLCPU_BACKEND AND ENABLE_CUFFT_BACKEND)
add_dependencies(${EXAMPLE_NAME} onemkl_${domain}_mklcpu onemkl_${domain}_cufft)
list(APPEND ONEMKL_LIBRARIES_${domain} onemkl_${domain}_mklcpu onemkl_${domain}_cufft)
endif()

target_link_libraries(${EXAMPLE_NAME} PUBLIC
${ONEMKL_LIBRARIES_${domain}}
ONEMKL::SYCL::SYCL
onemkl_warnings
)

# Register example as ctest
add_test(NAME dft/EXAMPLE/CT/complex_fwd_buffer_${dfti_backend} COMMAND ${EXAMPLE_NAME})
endforeach(dfti_backend)
add_test(NAME dft/EXAMPLE/CT/${dft_ct_source} COMMAND ${EXAMPLE_NAME})

endforeach(dft_ct_source)

132 changes: 0 additions & 132 deletions examples/dft/compile_time_dispatching/complex_fwd_buffer_mklcpu.cpp

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*******************************************************************************
* Copyright 2023 Intel Corporation
* Copyright 2024 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -27,10 +27,26 @@
#include <CL/sycl.hpp>
#endif
#include "oneapi/mkl.hpp"
#include <complex>

void run_example(const sycl::device& gpu_device) {
void run_example(const sycl::device& cpu_device, const sycl::device& gpu_device) {
constexpr std::size_t N = 10;

// Catch asynchronous exceptions for cpu
auto cpu_error_handler = [&](sycl::exception_list exceptions) {
for (auto const& e : exceptions) {
try {
std::rethrow_exception(e);
}
catch (sycl::exception const& e) {
// Handle not dft related exceptions that happened during asynchronous call
std::cerr << "Caught asynchronous SYCL exception on CPU device during execution:"
<< std::endl;
std::cerr << "\t" << e.what() << std::endl;
}
}
std::exit(2);
};
// Catch asynchronous exceptions for gpu
auto gpu_error_handler = [&](sycl::exception_list exceptions) {
for (auto const& e : exceptions) {
Expand All @@ -39,17 +55,30 @@ void run_example(const sycl::device& gpu_device) {
}
catch (sycl::exception const& e) {
// Handle not dft related exceptions that happened during asynchronous call
std::cerr << "Caught asynchronous SYCL exception:" << std::endl;
std::cerr << "Caught asynchronous SYCL exception on GPU device during execution:"
<< std::endl;
std::cerr << "\t" << e.what() << std::endl;
}
}
std::exit(2);
};

// Preparation CPU device and GPU device
sycl::queue cpu_queue(cpu_device, cpu_error_handler);
sycl::queue gpu_queue(gpu_device, gpu_error_handler);

std::vector<std::complex<float>> input_data(N);
std::vector<std::complex<float>> output_data(N);
// allocate on CPU device and GPU device
auto cpu_input_data = sycl::malloc_shared<std::complex<float>>(N, cpu_queue);
auto cpu_output_data = sycl::malloc_shared<std::complex<float>>(N, cpu_queue);

auto gpu_input_data = sycl::malloc_shared<std::complex<float>>(N, gpu_queue);
auto gpu_output_data = sycl::malloc_shared<std::complex<float>>(N, gpu_queue);

// Initialize input data
for (std::size_t i = 0; i < N; ++i) {
cpu_input_data[i] = { static_cast<float>(i), static_cast<float>(-i) };
gpu_input_data[i] = { static_cast<float>(i), static_cast<float>(-i) };
}

// enabling
// 1. create descriptors
Expand All @@ -63,16 +92,27 @@ void run_example(const sycl::device& gpu_device) {
desc.set_value(oneapi::mkl::dft::config_param::NUMBER_OF_TRANSFORMS,
static_cast<std::int64_t>(1));

// 3. commit_descriptor (compile_time MKLGPU)
desc.commit(oneapi::mkl::backend_selector<oneapi::mkl::backend::mklgpu>{ gpu_queue });
// 3a. commit_descriptor (compile_time MKLCPU)
desc.commit(oneapi::mkl::backend_selector<oneapi::mkl::backend::mklcpu>{ cpu_queue });

// 4. compute_forward / compute_backward (MKLGPU)
{
sycl::buffer<std::complex<float>> input_buffer(input_data.data(), sycl::range<1>(N));
sycl::buffer<std::complex<float>> output_buffer(output_data.data(), sycl::range<1>(N));
oneapi::mkl::dft::compute_forward<decltype(desc), std::complex<float>, std::complex<float>>(
desc, input_buffer, output_buffer);
}
// 4a. compute_forward / compute_backward (MKLCPU)
oneapi::mkl::dft::compute_forward<decltype(desc), std::complex<float>, std::complex<float>>(
desc, cpu_input_data, cpu_output_data);

// 3b. commit_descriptor (compile_time cuFFT)
desc.commit(oneapi::mkl::backend_selector<oneapi::mkl::backend::cufft>{ gpu_queue });

// 4b. compute_forward / compute_backward (cuFFT)
oneapi::mkl::dft::compute_forward<decltype(desc), std::complex<float>, std::complex<float>>(
desc, gpu_input_data, gpu_output_data);

cpu_queue.wait_and_throw();
gpu_queue.wait_and_throw();

sycl::free(cpu_input_data, cpu_queue);
sycl::free(gpu_input_data, gpu_queue);
sycl::free(cpu_output_data, cpu_queue);
sycl::free(gpu_output_data, gpu_queue);
}

//
Expand All @@ -81,18 +121,16 @@ void run_example(const sycl::device& gpu_device) {
void print_example_banner() {
std::cout << "\n"
"########################################################################\n"
"# Complex out-of-place forward transform for Buffer API's example:\n"
"# Complex out-of-place forward transform for USM API's example:\n"
"#\n"
"# Using APIs:\n"
"# Compile-time dispatch API\n"
"# Buffer forward complex out-of-place\n"
"# USM forward complex out-of-place\n"
"#\n"
"# Using single precision (float) data type\n"
"#\n"
"# For Intel GPU with Intel MKLGPU backend.\n"
"# Running on both Intel CPU and NVIDIA GPU devices.\n"
"#\n"
"# The environment variable ONEAPI_DEVICE_SELECTOR can be used to specify\n"
"# available devices\n"
"########################################################################\n"
<< std::endl;
}
Expand All @@ -104,15 +142,25 @@ int main(int /*argc*/, char** /*argv*/) {
print_example_banner();

try {
sycl::device cpu_device((sycl::cpu_selector_v));
sycl::device gpu_device((sycl::gpu_selector_v));
std::cout << "Running DFT Complex forward out-of-place buffer example" << std::endl;
std::cout << "Using compile-time dispatch API with MKLGPU." << std::endl;

unsigned int vendor_id = gpu_device.get_info<sycl::info::device::vendor_id>();
if (vendor_id != NVIDIA_ID) {
std::cerr << "FAILED: NVIDIA GPU device not found" << std::endl;
return 1;
}

std::cout << "Running DFT Complex forward out-of-place usm example" << std::endl;
std::cout << "Using compile-time dispatch API with MKLCPU and cuFFT." << std::endl;
std::cout << "Running with single precision real data type on:" << std::endl;
std::cout << "\tCPU device: " << cpu_device.get_info<sycl::info::device::name>()
<< std::endl;
std::cout << "\tGPU device :" << gpu_device.get_info<sycl::info::device::name>()
<< std::endl;

run_example(gpu_device);
std::cout << "DFT Complex USM example ran OK on MKLGPU" << std::endl;
run_example(cpu_device, gpu_device);
std::cout << "DFT Complex USM example ran OK on MKLCPU and CUFFT" << std::endl;
}
catch (sycl::exception const& e) {
// Handle not dft related exceptions that happened during synchronous call
Expand Down
5 changes: 4 additions & 1 deletion examples/dft/run_time_dispatching/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,16 @@ set(DFT_RT_SOURCES "")
# If users build more than one backend (i.e. mklcpu and mklgpu, or mklcpu and CUDA), they may need to
# overwrite ONEAPI_DEVICE_SELECTOR in their environment to run on the desired backend
set(DEVICE_FILTERS "")
if(ENABLE_MKLGPU_BACKEND OR ENABLE_CUFFT_BACKEND OR ENABLE_ROCFFT_BACKEND OR ENABLE_PORTFFT_BACKEND)
if(ENABLE_MKLGPU_BACKEND OR ENABLE_MKLCPU_BACKEND OR ENABLE_CUFFT_BACKEND OR ENABLE_ROCFFT_BACKEND OR ENABLE_PORTFFT_BACKEND)
list(APPEND DFT_RT_SOURCES "real_fwd_usm")
endif()

if(ENABLE_MKLGPU_BACKEND)
list(APPEND DEVICE_FILTERS "level_zero:gpu")
endif()
if(ENABLE_MKLCPU_BACKEND)
list(APPEND DEVICE_FILTERS "opencl:cpu")
endif()
if(ENABLE_PORTFFT_BACKEND)
list(APPEND DEVICE_FILTERS "*:gpu")
endif()
Expand Down

0 comments on commit f322f72

Please sign in to comment.