From 1f53e3e0c775b28a78c8140dc3baadd7ab33affa Mon Sep 17 00:00:00 2001 From: "romain.biessy" Date: Fri, 7 Jun 2024 08:24:27 +0100 Subject: [PATCH 01/39] [SPARSE] Update oneMKL backends to match new sparse API --- docs/domains/sparse_linear_algebra.rst | 31 ++ docs/index.rst | 1 + .../compile_time_dispatching/CMakeLists.txt | 6 +- ...pu.cpp => sparse_blas_spmv_usm_mklcpu.cpp} | 83 ++-- .../run_time_dispatching/CMakeLists.txt | 2 +- ..._gemv_usm.cpp => sparse_blas_spmv_usm.cpp} | 84 ++-- .../oneapi/mkl/sparse_blas/detail/handles.hpp | 38 ++ .../mkl/sparse_blas/detail/helper_types.hpp | 16 +- .../mklcpu/onemkl_sparse_blas_mklcpu.hpp | 1 + .../detail/mklcpu/sparse_blas_ct.hpp | 1 - .../mklgpu/onemkl_sparse_blas_mklgpu.hpp | 1 + .../detail/mklgpu/sparse_blas_ct.hpp | 1 - .../detail/onemkl_sparse_blas_backends.hxx | 240 +++++++--- .../sparse_blas/detail/operation_types.hpp | 38 ++ .../mkl/sparse_blas/detail/sparse_blas_ct.hxx | 375 ++++++++++++---- .../mkl/sparse_blas/detail/sparse_blas_rt.hpp | 223 +++++++--- .../oneapi/mkl/sparse_blas/matrix_view.hpp | 51 +++ include/oneapi/mkl/sparse_blas/types.hpp | 48 +- src/sparse_blas/CMakeLists.txt | 2 +- src/sparse_blas/backends/CMakeLists.txt | 2 +- src/sparse_blas/backends/backend_wrappers.cxx | 150 ++++--- .../backends/mkl_common/mkl_basic.cxx | 62 --- .../backends/mkl_common/mkl_handles.cxx | 386 ++++++++++++++++ .../backends/mkl_common/mkl_handles.hpp | 76 ++++ .../backends/mkl_common/mkl_helper.hpp | 123 ++++-- .../backends/mkl_common/mkl_operations.cxx | 170 ------- .../backends/mkl_common/mkl_spmm.cxx | 168 +++++++ .../backends/mkl_common/mkl_spmv.cxx | 197 +++++++++ .../backends/mkl_common/mkl_spsv.cxx | 150 +++++++ .../backends/mklcpu/CMakeLists.txt | 8 +- .../backends/mklcpu/mklcpu_basic.cpp | 28 -- .../backends/mklcpu/mklcpu_handles.cpp | 29 ++ .../backends/mklcpu/mklcpu_operations.cpp | 28 -- .../backends/mklcpu/mklcpu_spmm.cpp | 30 ++ .../backends/mklcpu/mklcpu_spmv.cpp | 30 ++ .../backends/mklcpu/mklcpu_spsv.cpp | 30 ++ .../backends/mklcpu/mklcpu_wrappers.cpp | 28 +- .../backends/mklgpu/CMakeLists.txt | 8 +- .../backends/mklgpu/mklgpu_basic.cpp | 28 -- .../backends/mklgpu/mklgpu_handles.cpp | 29 ++ .../backends/mklgpu/mklgpu_operations.cpp | 28 -- .../backends/mklgpu/mklgpu_spmm.cpp | 30 ++ .../backends/mklgpu/mklgpu_spmv.cpp | 30 ++ .../backends/mklgpu/mklgpu_spsv.cpp | 30 ++ .../backends/mklgpu/mklgpu_wrappers.cpp | 28 +- src/sparse_blas/enum_data_types.hpp | 69 +++ src/sparse_blas/function_table.hpp | 334 ++++++++++---- src/sparse_blas/generic_container.hpp | 330 ++++++++++++++ src/sparse_blas/macros.hpp | 6 + src/sparse_blas/sparse_blas_loader.cpp | 417 +++++++++++++----- tests/unit_tests/sparse_blas/CMakeLists.txt | 2 +- .../include/common_sparse_reference.hpp | 230 ++++++++++ .../sparse_blas/include/sparse_reference.hpp | 297 ------------- .../sparse_blas/include/test_common.hpp | 324 +++++++++++--- .../sparse_blas/include/test_spmm.hpp | 276 ++++++++++++ .../sparse_blas/include/test_spmv.hpp | 236 ++++++++++ .../sparse_blas/include/test_spsv.hpp | 191 ++++++++ .../sparse_blas/source/CMakeLists.txt | 14 +- .../sparse_blas/source/sparse_gemm_buffer.cpp | 302 ------------- .../sparse_blas/source/sparse_gemm_usm.cpp | 330 -------------- .../sparse_blas/source/sparse_gemv_buffer.cpp | 230 ---------- .../sparse_blas/source/sparse_gemv_usm.cpp | 256 ----------- .../sparse_blas/source/sparse_spmm_buffer.cpp | 247 +++++++++++ .../sparse_blas/source/sparse_spmm_usm.cpp | 285 ++++++++++++ .../sparse_blas/source/sparse_spmv_buffer.cpp | 247 +++++++++++ .../sparse_blas/source/sparse_spmv_usm.cpp | 285 ++++++++++++ .../sparse_blas/source/sparse_spsv_buffer.cpp | 242 ++++++++++ .../sparse_blas/source/sparse_spsv_usm.cpp | 280 ++++++++++++ .../sparse_blas/source/sparse_trsv_buffer.cpp | 240 ---------- .../sparse_blas/source/sparse_trsv_usm.cpp | 261 ----------- 70 files changed, 6140 insertions(+), 2939 deletions(-) create mode 100644 docs/domains/sparse_linear_algebra.rst rename examples/sparse_blas/compile_time_dispatching/{sparse_blas_gemv_usm_mklcpu.cpp => sparse_blas_spmv_usm_mklcpu.cpp} (73%) rename examples/sparse_blas/run_time_dispatching/{sparse_blas_gemv_usm.cpp => sparse_blas_spmv_usm.cpp} (73%) create mode 100644 include/oneapi/mkl/sparse_blas/detail/handles.hpp create mode 100644 include/oneapi/mkl/sparse_blas/detail/operation_types.hpp create mode 100644 include/oneapi/mkl/sparse_blas/matrix_view.hpp delete mode 100644 src/sparse_blas/backends/mkl_common/mkl_basic.cxx create mode 100644 src/sparse_blas/backends/mkl_common/mkl_handles.cxx create mode 100644 src/sparse_blas/backends/mkl_common/mkl_handles.hpp delete mode 100644 src/sparse_blas/backends/mkl_common/mkl_operations.cxx create mode 100644 src/sparse_blas/backends/mkl_common/mkl_spmm.cxx create mode 100644 src/sparse_blas/backends/mkl_common/mkl_spmv.cxx create mode 100644 src/sparse_blas/backends/mkl_common/mkl_spsv.cxx delete mode 100644 src/sparse_blas/backends/mklcpu/mklcpu_basic.cpp create mode 100644 src/sparse_blas/backends/mklcpu/mklcpu_handles.cpp delete mode 100644 src/sparse_blas/backends/mklcpu/mklcpu_operations.cpp create mode 100644 src/sparse_blas/backends/mklcpu/mklcpu_spmm.cpp create mode 100644 src/sparse_blas/backends/mklcpu/mklcpu_spmv.cpp create mode 100644 src/sparse_blas/backends/mklcpu/mklcpu_spsv.cpp delete mode 100644 src/sparse_blas/backends/mklgpu/mklgpu_basic.cpp create mode 100644 src/sparse_blas/backends/mklgpu/mklgpu_handles.cpp delete mode 100644 src/sparse_blas/backends/mklgpu/mklgpu_operations.cpp create mode 100644 src/sparse_blas/backends/mklgpu/mklgpu_spmm.cpp create mode 100644 src/sparse_blas/backends/mklgpu/mklgpu_spmv.cpp create mode 100644 src/sparse_blas/backends/mklgpu/mklgpu_spsv.cpp create mode 100644 src/sparse_blas/enum_data_types.hpp create mode 100644 src/sparse_blas/generic_container.hpp create mode 100644 tests/unit_tests/sparse_blas/include/common_sparse_reference.hpp delete mode 100644 tests/unit_tests/sparse_blas/include/sparse_reference.hpp create mode 100644 tests/unit_tests/sparse_blas/include/test_spmm.hpp create mode 100644 tests/unit_tests/sparse_blas/include/test_spmv.hpp create mode 100644 tests/unit_tests/sparse_blas/include/test_spsv.hpp delete mode 100644 tests/unit_tests/sparse_blas/source/sparse_gemm_buffer.cpp delete mode 100644 tests/unit_tests/sparse_blas/source/sparse_gemm_usm.cpp delete mode 100644 tests/unit_tests/sparse_blas/source/sparse_gemv_buffer.cpp delete mode 100644 tests/unit_tests/sparse_blas/source/sparse_gemv_usm.cpp create mode 100644 tests/unit_tests/sparse_blas/source/sparse_spmm_buffer.cpp create mode 100644 tests/unit_tests/sparse_blas/source/sparse_spmm_usm.cpp create mode 100644 tests/unit_tests/sparse_blas/source/sparse_spmv_buffer.cpp create mode 100644 tests/unit_tests/sparse_blas/source/sparse_spmv_usm.cpp create mode 100644 tests/unit_tests/sparse_blas/source/sparse_spsv_buffer.cpp create mode 100644 tests/unit_tests/sparse_blas/source/sparse_spsv_usm.cpp delete mode 100644 tests/unit_tests/sparse_blas/source/sparse_trsv_buffer.cpp delete mode 100644 tests/unit_tests/sparse_blas/source/sparse_trsv_usm.cpp diff --git a/docs/domains/sparse_linear_algebra.rst b/docs/domains/sparse_linear_algebra.rst new file mode 100644 index 000000000..eb39bd3f1 --- /dev/null +++ b/docs/domains/sparse_linear_algebra.rst @@ -0,0 +1,31 @@ +.. _onemkl_sparse_linear_algebra: + +Sparse Linear Algebra +===================== + +See the latest specification for the sparse domain `here +`_. + +This page documents implementation specific or backend specific details of the +sparse domain. + +OneMKL Intel CPU and GPU backends +--------------------------------- + +Known limitations as of Intel oneMKL product release 2024.1: + +- All operations' algorithms except ``no_optimize_alg`` map to the default + algorithm. +- The required external workspace size is always 0 bytes. +- ``oneapi::mkl::sparse::set_csr_data`` and + ``oneapi::mkl::sparse::set_coo_data`` functions cannot be used on a handle + that has already been used for an operation or its optimize function. Doing so + will throw an ``oneapi::mkl::unimplemented`` exception. +- Using ``spsv`` with the ``oneapi::mkl::sparse::spsv_alg::no_optimize_alg`` and + a sparse matrix that does not have the + ``oneapi::mkl::sparse::matrix_property::sorted`` property will throw an + ``oneapi::mkl::unimplemented`` exception. +- Using ``spmm`` on Intel GPU with a sparse matrix that is + ``oneapi::mkl::transpose::conjtrans`` and has the + ``oneapi::mkl::sparse::matrix_property::symmetric`` property will throw an + ``oneapi::mkl::unimplemented`` exception. diff --git a/docs/index.rst b/docs/index.rst index e1a051524..6f5dd1c40 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -30,4 +30,5 @@ Contents onemkl-datatypes.rst domains/dense_linear_algebra.rst + domains/sparse_linear_algebra.rst create_new_backend.rst diff --git a/examples/sparse_blas/compile_time_dispatching/CMakeLists.txt b/examples/sparse_blas/compile_time_dispatching/CMakeLists.txt index cb95333b4..5dbbba8a4 100644 --- a/examples/sparse_blas/compile_time_dispatching/CMakeLists.txt +++ b/examples/sparse_blas/compile_time_dispatching/CMakeLists.txt @@ -27,8 +27,8 @@ endif() include(WarningsUtils) foreach(backend ${SPARSE_BLAS_BACKENDS}) - set(EXAMPLE_NAME example_sparse_blas_gemv_usm_${backend}) - add_executable(${EXAMPLE_NAME} sparse_blas_gemv_usm_${backend}.cpp) + set(EXAMPLE_NAME example_sparse_blas_spmv_usm_${backend}) + add_executable(${EXAMPLE_NAME} sparse_blas_spmv_usm_${backend}.cpp) target_include_directories(${EXAMPLE_NAME} PUBLIC ${PROJECT_SOURCE_DIR}/examples/include PUBLIC ${PROJECT_SOURCE_DIR}/include @@ -39,6 +39,6 @@ foreach(backend ${SPARSE_BLAS_BACKENDS}) target_link_libraries(${EXAMPLE_NAME} PRIVATE ONEMKL::SYCL::SYCL onemkl_sparse_blas_${backend}) # Register example as ctest - add_test(NAME sparse_blas/EXAMPLE/CT/sparse_blas_gemv_usm_${backend} COMMAND ${EXAMPLE_NAME}) + add_test(NAME sparse_blas/EXAMPLE/CT/sparse_blas_spmv_usm_${backend} COMMAND ${EXAMPLE_NAME}) endforeach(backend) diff --git a/examples/sparse_blas/compile_time_dispatching/sparse_blas_gemv_usm_mklcpu.cpp b/examples/sparse_blas/compile_time_dispatching/sparse_blas_spmv_usm_mklcpu.cpp similarity index 73% rename from examples/sparse_blas/compile_time_dispatching/sparse_blas_gemv_usm_mklcpu.cpp rename to examples/sparse_blas/compile_time_dispatching/sparse_blas_spmv_usm_mklcpu.cpp index edb6d7e1f..4ab078601 100644 --- a/examples/sparse_blas/compile_time_dispatching/sparse_blas_gemv_usm_mklcpu.cpp +++ b/examples/sparse_blas/compile_time_dispatching/sparse_blas_spmv_usm_mklcpu.cpp @@ -20,7 +20,7 @@ /* * * Content: -* This example demonstrates use of DPCPP API oneapi::mkl::sparse::gemv +* This example demonstrates use of DPCPP API oneapi::mkl::sparse::spmv * using unified shared memory to perform general sparse matrix-vector * multiplication on a INTEL CPU SYCL device. * @@ -32,7 +32,7 @@ * * * This example demonstrates only single precision (float) data type for -* gemv matrix data +* spmv matrix data * * *******************************************************************************/ @@ -77,7 +77,7 @@ int run_sparse_matrix_vector_multiply_example(const sycl::device &cpu_dev) { } catch (sycl::exception const &e) { std::cout << "Caught asynchronous SYCL " - "exception during sparse::gemv:\n" + "exception during sparse::spmv:\n" << e.what() << std::endl; } } @@ -128,7 +128,10 @@ int run_sparse_matrix_vector_multiply_example(const sycl::device &cpu_dev) { // oneapi::mkl::transpose transA = oneapi::mkl::transpose::nontrans; - std::cout << "\n\t\tsparse::gemv parameters:\n"; + oneapi::mkl::sparse::spmv_alg alg = oneapi::mkl::sparse::spmv_alg::default_alg; + oneapi::mkl::sparse::matrix_view A_view; + + std::cout << "\n\t\tsparse::spmv parameters:\n"; std::cout << "\t\t\ttransA = " << (transA == oneapi::mkl::transpose::nontrans ? "nontrans" @@ -137,23 +140,49 @@ int run_sparse_matrix_vector_multiply_example(const sycl::device &cpu_dev) { std::cout << "\t\t\tnrows = " << nrows << std::endl; std::cout << "\t\t\talpha = " << alpha << ", beta = " << beta << std::endl; - // create and initialize handle for a Sparse Matrix in CSR format - oneapi::mkl::sparse::matrix_handle_t handle = nullptr; - - oneapi::mkl::sparse::init_matrix_handle(cpu_selector, &handle); - - auto ev_set = oneapi::mkl::sparse::set_csr_data(cpu_selector, handle, nrows, nrows, nnz, - oneapi::mkl::index_base::zero, ia, ja, a); - - auto ev_opt = oneapi::mkl::sparse::optimize_gemv(cpu_selector, transA, handle, { ev_set }); - - auto ev_gemv = - oneapi::mkl::sparse::gemv(cpu_selector, transA, alpha, handle, x, beta, y, { ev_opt }); - - auto ev_release = - oneapi::mkl::sparse::release_matrix_handle(cpu_selector, &handle, { ev_gemv }); - - ev_release.wait_and_throw(); + // Create and initialize handle for a Sparse Matrix in CSR format + oneapi::mkl::sparse::matrix_handle_t A_handle = nullptr; + oneapi::mkl::sparse::init_csr_matrix(cpu_selector, &A_handle, nrows, nrows, nnz, + oneapi::mkl::index_base::zero, ia, ja, a); + + // Create and initialize dense vector handles + oneapi::mkl::sparse::dense_vector_handle_t x_handle = nullptr; + oneapi::mkl::sparse::dense_vector_handle_t y_handle = nullptr; + oneapi::mkl::sparse::init_dense_vector(cpu_selector, &x_handle, sizevec, x); + oneapi::mkl::sparse::init_dense_vector(cpu_selector, &y_handle, sizevec, y); + + // Create operation descriptor + oneapi::mkl::sparse::spmv_descr_t descr = nullptr; + oneapi::mkl::sparse::init_spmv_descr(cpu_selector, &descr); + + // Allocate external workspace + std::size_t workspace_size = 0; + oneapi::mkl::sparse::spmv_buffer_size(cpu_selector, transA, &alpha, A_view, A_handle, x_handle, + &beta, y_handle, alg, descr, workspace_size); + void *workspace = sycl::malloc_device(workspace_size, cpu_queue); + + // Optimize spmv + auto ev_opt = + oneapi::mkl::sparse::spmv_optimize(cpu_selector, transA, &alpha, A_view, A_handle, x_handle, + &beta, y_handle, alg, descr, workspace); + + // Run spmv + auto ev_spmv = oneapi::mkl::sparse::spmv(cpu_selector, transA, &alpha, A_view, A_handle, + x_handle, &beta, y_handle, alg, descr, { ev_opt }); + + // Release handles and descriptor + std::vector release_events; + release_events.push_back( + oneapi::mkl::sparse::release_dense_vector(cpu_selector, x_handle, { ev_spmv })); + release_events.push_back( + oneapi::mkl::sparse::release_dense_vector(cpu_selector, y_handle, { ev_spmv })); + release_events.push_back( + oneapi::mkl::sparse::release_sparse_matrix(cpu_selector, A_handle, { ev_spmv })); + release_events.push_back( + oneapi::mkl::sparse::release_spmv_descr(cpu_selector, descr, { ev_spmv })); + for (auto event : release_events) { + event.wait_and_throw(); + } // // Post Processing @@ -181,7 +210,7 @@ int run_sparse_matrix_vector_multiply_example(const sycl::device &cpu_dev) { good &= check_result(res[row], z[row], nrows, row); } - std::cout << "\n\t\t sparse::gemv example " << (good ? "passed" : "failed") << "\n\tFinished" + std::cout << "\n\t\t sparse::spmv example " << (good ? "passed" : "failed") << "\n\tFinished" << std::endl; free_vec(fp_ptr_vec, cpu_queue); @@ -211,7 +240,7 @@ void print_example_banner() { std::cout << "# and alpha, beta are floating point type precision scalars." << std::endl; std::cout << "# " << std::endl; std::cout << "# Using apis:" << std::endl; - std::cout << "# sparse::gemv" << std::endl; + std::cout << "# sparse::spmv" << std::endl; std::cout << "# " << std::endl; std::cout << "# Using single precision (float) data type" << std::endl; std::cout << "# " << std::endl; @@ -232,22 +261,22 @@ int main(int /*argc*/, char ** /*argv*/) { // TODO: Add cuSPARSE compile-time dispatcher in this example once it is supported. sycl::device cpu_dev(sycl::cpu_selector_v); - std::cout << "Running Sparse BLAS GEMV USM example on CPU device." << std::endl; + std::cout << "Running Sparse BLAS SPMV USM example on CPU device." << std::endl; std::cout << "Device name is: " << cpu_dev.get_info() << std::endl; std::cout << "Running with single precision real data type:" << std::endl; run_sparse_matrix_vector_multiply_example(cpu_dev); - std::cout << "Sparse BLAS GEMV USM example ran OK." << std::endl; + std::cout << "Sparse BLAS SPMV USM example ran OK." << std::endl; } catch (sycl::exception const &e) { - std::cerr << "Caught synchronous SYCL exception during Sparse GEMV:" << std::endl; + std::cerr << "Caught synchronous SYCL exception during Sparse SPMV:" << std::endl; std::cerr << "\t" << e.what() << std::endl; std::cerr << "\tSYCL error code: " << e.code().value() << std::endl; return 1; } catch (std::exception const &e) { - std::cerr << "Caught std::exception during Sparse GEMV:" << std::endl; + std::cerr << "Caught std::exception during Sparse SPMV:" << std::endl; std::cerr << "\t" << e.what() << std::endl; return 1; } diff --git a/examples/sparse_blas/run_time_dispatching/CMakeLists.txt b/examples/sparse_blas/run_time_dispatching/CMakeLists.txt index 6f144c898..398f3e0f2 100644 --- a/examples/sparse_blas/run_time_dispatching/CMakeLists.txt +++ b/examples/sparse_blas/run_time_dispatching/CMakeLists.txt @@ -22,7 +22,7 @@ include(WarningsUtils) # Build object from all example sources -set(SPARSE_BLAS_RT_SOURCES "sparse_blas_gemv_usm") +set(SPARSE_BLAS_RT_SOURCES "sparse_blas_spmv_usm") # Set up for the right backend for run-time dispatching examples # If users build more than one backend (i.e. mklcpu and mklgpu, or mklcpu and CUDA), they may need to # overwrite ONEAPI_DEVICE_SELECTOR in their environment to run on the desired backend diff --git a/examples/sparse_blas/run_time_dispatching/sparse_blas_gemv_usm.cpp b/examples/sparse_blas/run_time_dispatching/sparse_blas_spmv_usm.cpp similarity index 73% rename from examples/sparse_blas/run_time_dispatching/sparse_blas_gemv_usm.cpp rename to examples/sparse_blas/run_time_dispatching/sparse_blas_spmv_usm.cpp index b5812fabf..6b14881fe 100644 --- a/examples/sparse_blas/run_time_dispatching/sparse_blas_gemv_usm.cpp +++ b/examples/sparse_blas/run_time_dispatching/sparse_blas_spmv_usm.cpp @@ -20,7 +20,7 @@ /* * * Content: -* This example demonstrates use of DPCPP API oneapi::mkl::sparse::gemv +* This example demonstrates use of DPCPP API oneapi::mkl::sparse::spmv * using unified shared memory to perform general sparse matrix-vector * multiplication on a SYCL device (HOST, CPU, GPU) that is selected * during runtime. @@ -33,7 +33,7 @@ * * * This example demonstrates only single precision (float) data type for -* gemv matrix data +* spmv matrix data * * *******************************************************************************/ @@ -78,7 +78,7 @@ int run_sparse_matrix_vector_multiply_example(const sycl::device &dev) { } catch (sycl::exception const &e) { std::cout << "Caught asynchronous SYCL " - "exception during sparse::gemv:\n" + "exception during sparse::spmv:\n" << e.what() << std::endl; } } @@ -128,7 +128,10 @@ int run_sparse_matrix_vector_multiply_example(const sycl::device &dev) { // oneapi::mkl::transpose transA = oneapi::mkl::transpose::nontrans; - std::cout << "\n\t\tsparse::gemv parameters:\n"; + oneapi::mkl::sparse::spmv_alg alg = oneapi::mkl::sparse::spmv_alg::default_alg; + oneapi::mkl::sparse::matrix_view A_view; + + std::cout << "\n\t\tsparse::spmv parameters:\n"; std::cout << "\t\t\ttransA = " << (transA == oneapi::mkl::transpose::nontrans ? "nontrans" @@ -137,22 +140,49 @@ int run_sparse_matrix_vector_multiply_example(const sycl::device &dev) { std::cout << "\t\t\tnrows = " << nrows << std::endl; std::cout << "\t\t\talpha = " << alpha << ", beta = " << beta << std::endl; - // create and initialize handle for a Sparse Matrix in CSR format - oneapi::mkl::sparse::matrix_handle_t handle = nullptr; - - oneapi::mkl::sparse::init_matrix_handle(main_queue, &handle); - - auto ev_set = oneapi::mkl::sparse::set_csr_data(main_queue, handle, nrows, nrows, nnz, - oneapi::mkl::index_base::zero, ia, ja, a); - - auto ev_opt = oneapi::mkl::sparse::optimize_gemv(main_queue, transA, handle, { ev_set }); - - auto ev_gemv = - oneapi::mkl::sparse::gemv(main_queue, transA, alpha, handle, x, beta, y, { ev_opt }); - - auto ev_release = oneapi::mkl::sparse::release_matrix_handle(main_queue, &handle, { ev_gemv }); - - ev_release.wait_and_throw(); + // Create and initialize handle for a Sparse Matrix in CSR format + oneapi::mkl::sparse::matrix_handle_t A_handle = nullptr; + oneapi::mkl::sparse::init_csr_matrix(main_queue, &A_handle, nrows, nrows, nnz, + oneapi::mkl::index_base::zero, ia, ja, a); + + // Create and initialize dense vector handles + oneapi::mkl::sparse::dense_vector_handle_t x_handle = nullptr; + oneapi::mkl::sparse::dense_vector_handle_t y_handle = nullptr; + oneapi::mkl::sparse::init_dense_vector(main_queue, &x_handle, static_cast(sizevec), x); + oneapi::mkl::sparse::init_dense_vector(main_queue, &y_handle, static_cast(sizevec), y); + + // Create operation descriptor + oneapi::mkl::sparse::spmv_descr_t descr = nullptr; + oneapi::mkl::sparse::init_spmv_descr(main_queue, &descr); + + // Allocate external workspace + std::size_t workspace_size = 0; + oneapi::mkl::sparse::spmv_buffer_size(main_queue, transA, &alpha, A_view, A_handle, x_handle, + &beta, y_handle, alg, descr, workspace_size); + void *workspace = sycl::malloc_device(workspace_size, main_queue); + + // Optimize spmv + auto ev_opt = + oneapi::mkl::sparse::spmv_optimize(main_queue, transA, &alpha, A_view, A_handle, x_handle, + &beta, y_handle, alg, descr, workspace); + + // Run spmv + auto ev_spmv = oneapi::mkl::sparse::spmv(main_queue, transA, &alpha, A_view, A_handle, x_handle, + &beta, y_handle, alg, descr, { ev_opt }); + + // Release handles and descriptor + std::vector release_events; + release_events.push_back( + oneapi::mkl::sparse::release_dense_vector(main_queue, x_handle, { ev_spmv })); + release_events.push_back( + oneapi::mkl::sparse::release_dense_vector(main_queue, y_handle, { ev_spmv })); + release_events.push_back( + oneapi::mkl::sparse::release_sparse_matrix(main_queue, A_handle, { ev_spmv })); + release_events.push_back( + oneapi::mkl::sparse::release_spmv_descr(main_queue, descr, { ev_spmv })); + for (auto event : release_events) { + event.wait_and_throw(); + } // // Post Processing @@ -180,7 +210,7 @@ int run_sparse_matrix_vector_multiply_example(const sycl::device &dev) { good &= check_result(res[row], z[row], nrows, row); } - std::cout << "\n\t\t sparse::gemv example " << (good ? "passed" : "failed") << "\n\tFinished" + std::cout << "\n\t\t sparse::spmv example " << (good ? "passed" : "failed") << "\n\tFinished" << std::endl; free_vec(fp_ptr_vec, main_queue); @@ -210,7 +240,7 @@ void print_example_banner() { std::cout << "# and alpha, beta are floating point type precision scalars." << std::endl; std::cout << "# " << std::endl; std::cout << "# Using apis:" << std::endl; - std::cout << "# sparse::gemv" << std::endl; + std::cout << "# sparse::spmv" << std::endl; std::cout << "# " << std::endl; std::cout << "# Using single precision (float) data type" << std::endl; std::cout << "# " << std::endl; @@ -234,28 +264,28 @@ int main(int /*argc*/, char ** /*argv*/) { sycl::device dev = sycl::device(); if (dev.is_gpu()) { - std::cout << "Running Sparse BLAS GEMV USM example on GPU device." << std::endl; + std::cout << "Running Sparse BLAS SPMV USM example on GPU device." << std::endl; std::cout << "Device name is: " << dev.get_info() << std::endl; } else { - std::cout << "Running Sparse BLAS GEMV USM example on CPU device." << std::endl; + std::cout << "Running Sparse BLAS SPMV USM example on CPU device." << std::endl; std::cout << "Device name is: " << dev.get_info() << std::endl; } std::cout << "Running with single precision real data type:" << std::endl; run_sparse_matrix_vector_multiply_example(dev); - std::cout << "Sparse BLAS GEMV USM example ran OK." << std::endl; + std::cout << "Sparse BLAS SPMV USM example ran OK." << std::endl; } catch (sycl::exception const &e) { - std::cerr << "Caught synchronous SYCL exception during Sparse GEMV:" << std::endl; + std::cerr << "Caught synchronous SYCL exception during Sparse SPMV:" << std::endl; std::cerr << "\t" << e.what() << std::endl; std::cerr << "\tSYCL error code: " << e.code().value() << std::endl; return 1; } catch (std::exception const &e) { - std::cerr << "Caught std::exception during Sparse GEMV:" << std::endl; + std::cerr << "Caught std::exception during Sparse SPMV:" << std::endl; std::cerr << "\t" << e.what() << std::endl; return 1; } diff --git a/include/oneapi/mkl/sparse_blas/detail/handles.hpp b/include/oneapi/mkl/sparse_blas/detail/handles.hpp new file mode 100644 index 000000000..0566f93b4 --- /dev/null +++ b/include/oneapi/mkl/sparse_blas/detail/handles.hpp @@ -0,0 +1,38 @@ +/*************************************************************************** +* Copyright (C) Codeplay Software Limited +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* For your convenience, a copy of the License has been included in this +* repository. +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +* +**************************************************************************/ + +#ifndef _ONEMKL_SPARSE_BLAS_DETAIL_HANDLES_HPP_ +#define _ONEMKL_SPARSE_BLAS_DETAIL_HANDLES_HPP_ + +namespace oneapi::mkl::sparse { + +// Each backend can create its own handle type or re-use the native handle types that will be reinterpret_cast'ed to the types below + +struct dense_matrix_handle; +using dense_matrix_handle_t = dense_matrix_handle*; + +struct dense_vector_handle; +using dense_vector_handle_t = dense_vector_handle*; + +struct matrix_handle; +using matrix_handle_t = matrix_handle*; + +} // namespace oneapi::mkl::sparse + +#endif // _ONEMKL_SPARSE_BLAS_DETAIL_HANDLES_HPP_ diff --git a/include/oneapi/mkl/sparse_blas/detail/helper_types.hpp b/include/oneapi/mkl/sparse_blas/detail/helper_types.hpp index 4964b1eff..75ee22211 100644 --- a/include/oneapi/mkl/sparse_blas/detail/helper_types.hpp +++ b/include/oneapi/mkl/sparse_blas/detail/helper_types.hpp @@ -29,20 +29,18 @@ namespace mkl { namespace sparse { namespace detail { -struct matrix_handle; - -template +template inline constexpr bool is_fp_supported_v = - std::is_same_v || std::is_same_v || - std::is_same_v> || std::is_same_v>; + std::is_same_v || std::is_same_v || + std::is_same_v> || std::is_same_v>; -template +template inline constexpr bool is_int_supported_v = - std::is_same_v || std::is_same_v; + std::is_same_v || std::is_same_v; -template +template inline constexpr bool are_fp_int_supported_v = - is_fp_supported_v&& is_int_supported_v; + is_fp_supported_v && is_int_supported_v; } // namespace detail } // namespace sparse diff --git a/include/oneapi/mkl/sparse_blas/detail/mklcpu/onemkl_sparse_blas_mklcpu.hpp b/include/oneapi/mkl/sparse_blas/detail/mklcpu/onemkl_sparse_blas_mklcpu.hpp index 2535e61f6..8686d35bc 100644 --- a/include/oneapi/mkl/sparse_blas/detail/mklcpu/onemkl_sparse_blas_mklcpu.hpp +++ b/include/oneapi/mkl/sparse_blas/detail/mklcpu/onemkl_sparse_blas_mklcpu.hpp @@ -22,6 +22,7 @@ #include "oneapi/mkl/detail/export.hpp" #include "oneapi/mkl/sparse_blas/detail/helper_types.hpp" +#include "oneapi/mkl/sparse_blas/types.hpp" namespace oneapi::mkl::sparse::mklcpu { diff --git a/include/oneapi/mkl/sparse_blas/detail/mklcpu/sparse_blas_ct.hpp b/include/oneapi/mkl/sparse_blas/detail/mklcpu/sparse_blas_ct.hpp index bc0089c57..ee127c3f8 100644 --- a/include/oneapi/mkl/sparse_blas/detail/mklcpu/sparse_blas_ct.hpp +++ b/include/oneapi/mkl/sparse_blas/detail/mklcpu/sparse_blas_ct.hpp @@ -20,7 +20,6 @@ #ifndef _ONEMKL_SPARSE_BLAS_DETAIL_MKLCPU_SPARSE_BLAS_CT_HPP_ #define _ONEMKL_SPARSE_BLAS_DETAIL_MKLCPU_SPARSE_BLAS_CT_HPP_ -#include "oneapi/mkl/sparse_blas/types.hpp" #include "oneapi/mkl/detail/backends.hpp" #include "oneapi/mkl/detail/backend_selector.hpp" diff --git a/include/oneapi/mkl/sparse_blas/detail/mklgpu/onemkl_sparse_blas_mklgpu.hpp b/include/oneapi/mkl/sparse_blas/detail/mklgpu/onemkl_sparse_blas_mklgpu.hpp index 1ca336b9b..eb3aaa5ff 100644 --- a/include/oneapi/mkl/sparse_blas/detail/mklgpu/onemkl_sparse_blas_mklgpu.hpp +++ b/include/oneapi/mkl/sparse_blas/detail/mklgpu/onemkl_sparse_blas_mklgpu.hpp @@ -22,6 +22,7 @@ #include "oneapi/mkl/detail/export.hpp" #include "oneapi/mkl/sparse_blas/detail/helper_types.hpp" +#include "oneapi/mkl/sparse_blas/types.hpp" namespace oneapi::mkl::sparse::mklgpu { diff --git a/include/oneapi/mkl/sparse_blas/detail/mklgpu/sparse_blas_ct.hpp b/include/oneapi/mkl/sparse_blas/detail/mklgpu/sparse_blas_ct.hpp index 00c01346f..d3b0d365f 100644 --- a/include/oneapi/mkl/sparse_blas/detail/mklgpu/sparse_blas_ct.hpp +++ b/include/oneapi/mkl/sparse_blas/detail/mklgpu/sparse_blas_ct.hpp @@ -20,7 +20,6 @@ #ifndef _ONEMKL_SPARSE_BLAS_DETAIL_MKLGPU_SPARSE_BLAS_CT_HPP_ #define _ONEMKL_SPARSE_BLAS_DETAIL_MKLGPU_SPARSE_BLAS_CT_HPP_ -#include "oneapi/mkl/sparse_blas/types.hpp" #include "oneapi/mkl/detail/backends.hpp" #include "oneapi/mkl/detail/backend_selector.hpp" diff --git a/include/oneapi/mkl/sparse_blas/detail/onemkl_sparse_blas_backends.hxx b/include/oneapi/mkl/sparse_blas/detail/onemkl_sparse_blas_backends.hxx index 03beaa4b4..4b701eb6f 100644 --- a/include/oneapi/mkl/sparse_blas/detail/onemkl_sparse_blas_backends.hxx +++ b/include/oneapi/mkl/sparse_blas/detail/onemkl_sparse_blas_backends.hxx @@ -20,72 +20,198 @@ // This file is meant to be included in each backend onemkl_sparse_blas_BACKEND.hpp files. // It is used to exports each symbol to the onemkl_sparse_blas_BACKEND library. -ONEMKL_EXPORT void init_matrix_handle(sycl::queue &queue, matrix_handle_t *p_handle); +// Dense vector +template +ONEMKL_EXPORT void init_dense_vector(sycl::queue &queue, dense_vector_handle_t *p_dvhandle, + std::int64_t size, sycl::buffer val); +template +ONEMKL_EXPORT void init_dense_vector(sycl::queue &queue, dense_vector_handle_t *p_dvhandle, + std::int64_t size, dataType *val); -ONEMKL_EXPORT sycl::event release_matrix_handle(sycl::queue &queue, matrix_handle_t *p_handle, +template +ONEMKL_EXPORT void set_dense_vector_data(sycl::queue &queue, dense_vector_handle_t dvhandle, + std::int64_t size, sycl::buffer val); +template +ONEMKL_EXPORT void set_dense_vector_data(sycl::queue &queue, dense_vector_handle_t dvhandle, + std::int64_t size, dataType *val); + +ONEMKL_EXPORT sycl::event release_dense_vector(sycl::queue &queue, dense_vector_handle_t dvhandle, + const std::vector &dependencies = {}); + +// Dense matrix +template +ONEMKL_EXPORT void init_dense_matrix(sycl::queue &queue, dense_matrix_handle_t *p_dmhandle, + std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, + layout dense_layout, sycl::buffer val); +template +ONEMKL_EXPORT void init_dense_matrix(sycl::queue &queue, dense_matrix_handle_t *p_dmhandle, + std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, + layout dense_layout, dataType *val); + +template +ONEMKL_EXPORT void set_dense_matrix_data(sycl::queue &queue, dense_matrix_handle_t dmhandle, + std::int64_t num_rows, std::int64_t num_cols, + std::int64_t ld, layout dense_layout, + sycl::buffer val); +template +ONEMKL_EXPORT void set_dense_matrix_data(sycl::queue &queue, dense_matrix_handle_t dmhandle, + std::int64_t num_rows, std::int64_t num_cols, + std::int64_t ld, layout dense_layout, dataType *val); + +ONEMKL_EXPORT sycl::event release_dense_matrix(sycl::queue &queue, dense_matrix_handle_t dmhandle, + const std::vector &dependencies = {}); + +// COO matrix +template +ONEMKL_EXPORT void init_coo_matrix(sycl::queue &queue, matrix_handle_t *p_smhandle, + std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, + index_base index, sycl::buffer row_ind, + sycl::buffer col_ind, + sycl::buffer val); +template +ONEMKL_EXPORT void init_coo_matrix(sycl::queue &queue, matrix_handle_t *p_smhandle, + std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, + index_base index, indexType *row_ind, indexType *col_ind, + dataType *val); + +template +ONEMKL_EXPORT void set_coo_matrix_data(sycl::queue &queue, matrix_handle_t smhandle, + std::int64_t num_rows, std::int64_t num_cols, + std::int64_t nnz, index_base index, + sycl::buffer row_ind, + sycl::buffer col_ind, + sycl::buffer val); +template +ONEMKL_EXPORT void set_coo_matrix_data(sycl::queue &queue, matrix_handle_t smhandle, + std::int64_t num_rows, std::int64_t num_cols, + std::int64_t nnz, index_base index, indexType *row_ind, + indexType *col_ind, dataType *val); + +// CSR matrix +template +ONEMKL_EXPORT void init_csr_matrix(sycl::queue &queue, matrix_handle_t *p_smhandle, + std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, + index_base index, sycl::buffer row_ptr, + sycl::buffer col_ind, + sycl::buffer val); +template +ONEMKL_EXPORT void init_csr_matrix(sycl::queue &queue, matrix_handle_t *p_smhandle, + std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, + index_base index, indexType *row_ptr, indexType *col_ind, + dataType *val); + +template +ONEMKL_EXPORT void set_csr_matrix_data(sycl::queue &queue, matrix_handle_t smhandle, + std::int64_t num_rows, std::int64_t num_cols, + std::int64_t nnz, index_base index, + sycl::buffer row_ptr, + sycl::buffer col_ind, + sycl::buffer val); +template +ONEMKL_EXPORT void set_csr_matrix_data(sycl::queue &queue, matrix_handle_t smhandle, + std::int64_t num_rows, std::int64_t num_cols, + std::int64_t nnz, index_base index, indexType *row_ptr, + indexType *col_ind, dataType *val); + +// Common sparse matrix functions +ONEMKL_EXPORT sycl::event release_sparse_matrix(sycl::queue &queue, matrix_handle_t smhandle, const std::vector &dependencies = {}); -template -ONEMKL_EXPORT std::enable_if_t> set_csr_data( - sycl::queue &queue, matrix_handle_t handle, intType num_rows, intType num_cols, intType nnz, - index_base index, sycl::buffer &row_ptr, sycl::buffer &col_ind, - sycl::buffer &val); +bool set_matrix_property(sycl::queue &queue, matrix_handle_t smhandle, matrix_property property); -template -ONEMKL_EXPORT std::enable_if_t, sycl::event> -set_csr_data(sycl::queue &queue, matrix_handle_t handle, intType num_rows, intType num_cols, - intType nnz, index_base index, intType *row_ptr, intType *col_ind, fpType *val, - const std::vector &dependencies = {}); +// SPMM +ONEMKL_EXPORT void init_spmm_descr(sycl::queue &queue, spmm_descr_t *p_spmm_descr); -ONEMKL_EXPORT sycl::event optimize_gemm(sycl::queue &queue, transpose transpose_A, - matrix_handle_t handle, - const std::vector &dependencies = {}); +ONEMKL_EXPORT sycl::event release_spmm_descr(sycl::queue &queue, spmm_descr_t spmm_descr, + const std::vector &dependencies = {}); + +ONEMKL_EXPORT void spmm_buffer_size(sycl::queue &queue, oneapi::mkl::transpose opA, + oneapi::mkl::transpose opB, const void *alpha, + matrix_view A_view, matrix_handle_t A_handle, + dense_matrix_handle_t B_handle, const void *beta, + dense_matrix_handle_t C_handle, spmm_alg alg, + spmm_descr_t spmm_descr, std::size_t &temp_buffer_size); -ONEMKL_EXPORT sycl::event optimize_gemm(sycl::queue &queue, transpose transpose_A, - transpose transpose_B, layout dense_matrix_layout, - const std::int64_t columns, matrix_handle_t handle, +ONEMKL_EXPORT void spmm_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, + oneapi::mkl::transpose opB, const void *alpha, matrix_view A_view, + matrix_handle_t A_handle, dense_matrix_handle_t B_handle, + const void *beta, dense_matrix_handle_t C_handle, spmm_alg alg, + spmm_descr_t spmm_descr, sycl::buffer workspace); + +ONEMKL_EXPORT sycl::event spmm_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, + oneapi::mkl::transpose opB, const void *alpha, + matrix_view A_view, matrix_handle_t A_handle, + dense_matrix_handle_t B_handle, const void *beta, + dense_matrix_handle_t C_handle, spmm_alg alg, + spmm_descr_t spmm_descr, void *workspace, const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event optimize_gemv(sycl::queue &queue, transpose transpose_val, - matrix_handle_t handle, +ONEMKL_EXPORT sycl::event spmm(sycl::queue &queue, oneapi::mkl::transpose opA, + oneapi::mkl::transpose opB, const void *alpha, matrix_view A_view, + matrix_handle_t A_handle, dense_matrix_handle_t B_handle, + const void *beta, dense_matrix_handle_t C_handle, spmm_alg alg, + spmm_descr_t spmm_descr, + const std::vector &dependencies = {}); + +// SPMV +ONEMKL_EXPORT void init_spmv_descr(sycl::queue &queue, spmv_descr_t *p_spmv_descr); + +ONEMKL_EXPORT sycl::event release_spmv_descr(sycl::queue &queue, spmv_descr_t spmv_descr, + const std::vector &dependencies = {}); + +ONEMKL_EXPORT void spmv_buffer_size(sycl::queue &queue, oneapi::mkl::transpose opA, + const void *alpha, matrix_view A_view, matrix_handle_t A_handle, + dense_vector_handle_t x_handle, const void *beta, + dense_vector_handle_t y_handle, spmv_alg alg, + spmv_descr_t spmv_descr, std::size_t &temp_buffer_size); + +ONEMKL_EXPORT void spmv_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, + matrix_view A_view, matrix_handle_t A_handle, + dense_vector_handle_t x_handle, const void *beta, + dense_vector_handle_t y_handle, spmv_alg alg, + spmv_descr_t spmv_descr, sycl::buffer workspace); + +ONEMKL_EXPORT sycl::event spmv_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, + const void *alpha, matrix_view A_view, + matrix_handle_t A_handle, dense_vector_handle_t x_handle, + const void *beta, dense_vector_handle_t y_handle, + spmv_alg alg, spmv_descr_t spmv_descr, void *workspace, const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event optimize_trsv(sycl::queue &queue, uplo uplo_val, transpose transpose_val, - diag diag_val, matrix_handle_t handle, +ONEMKL_EXPORT sycl::event spmv(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, + matrix_view A_view, matrix_handle_t A_handle, + dense_vector_handle_t x_handle, const void *beta, + dense_vector_handle_t y_handle, spmv_alg alg, + spmv_descr_t spmv_descr, + const std::vector &dependencies = {}); + +// SPSV +ONEMKL_EXPORT void init_spsv_descr(sycl::queue &queue, spsv_descr_t *p_spsv_descr); + +ONEMKL_EXPORT sycl::event release_spsv_descr(sycl::queue &queue, spsv_descr_t spsv_descr, + const std::vector &dependencies = {}); + +ONEMKL_EXPORT void spsv_buffer_size(sycl::queue &queue, oneapi::mkl::transpose opA, + const void *alpha, matrix_view A_view, matrix_handle_t A_handle, + dense_vector_handle_t x_handle, dense_vector_handle_t y_handle, + spsv_alg alg, spsv_descr_t spsv_descr, + std::size_t &temp_buffer_size); + +ONEMKL_EXPORT void spsv_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, + matrix_view A_view, matrix_handle_t A_handle, + dense_vector_handle_t x_handle, dense_vector_handle_t y_handle, + spsv_alg alg, spsv_descr_t spsv_descr, + sycl::buffer workspace); + +ONEMKL_EXPORT sycl::event spsv_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, + const void *alpha, matrix_view A_view, + matrix_handle_t A_handle, dense_vector_handle_t x_handle, + dense_vector_handle_t y_handle, spsv_alg alg, + spsv_descr_t spsv_descr, void *workspace, const std::vector &dependencies = {}); -template -ONEMKL_EXPORT std::enable_if_t> gemv( - sycl::queue &queue, transpose transpose_val, const fpType alpha, matrix_handle_t A_handle, - sycl::buffer &x, const fpType beta, sycl::buffer &y); - -template -ONEMKL_EXPORT std::enable_if_t, sycl::event> gemv( - sycl::queue &queue, transpose transpose_val, const fpType alpha, matrix_handle_t A_handle, - const fpType *x, const fpType beta, fpType *y, - const std::vector &dependencies = {}); - -template -ONEMKL_EXPORT std::enable_if_t> trsv( - sycl::queue &queue, uplo uplo_val, transpose transpose_val, diag diag_val, - matrix_handle_t A_handle, sycl::buffer &x, sycl::buffer &y); - -template -ONEMKL_EXPORT std::enable_if_t, sycl::event> trsv( - sycl::queue &queue, uplo uplo_val, transpose transpose_val, diag diag_val, - matrix_handle_t A_handle, const fpType *x, fpType *y, - const std::vector &dependencies = {}); - -template -ONEMKL_EXPORT std::enable_if_t> gemm( - sycl::queue &queue, layout dense_matrix_layout, transpose transpose_A, transpose transpose_B, - const fpType alpha, matrix_handle_t A_handle, sycl::buffer &B, - const std::int64_t columns, const std::int64_t ldb, const fpType beta, - sycl::buffer &C, const std::int64_t ldc); - -template -ONEMKL_EXPORT std::enable_if_t, sycl::event> gemm( - sycl::queue &queue, layout dense_matrix_layout, transpose transpose_A, transpose transpose_B, - const fpType alpha, matrix_handle_t A_handle, const fpType *B, const std::int64_t columns, - const std::int64_t ldb, const fpType beta, fpType *C, const std::int64_t ldc, - const std::vector &dependencies = {}); +ONEMKL_EXPORT sycl::event spsv(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, + matrix_view A_view, matrix_handle_t A_handle, + dense_vector_handle_t x_handle, dense_vector_handle_t y_handle, + spsv_alg alg, spsv_descr_t spsv_descr, + const std::vector &dependencies = {}); diff --git a/include/oneapi/mkl/sparse_blas/detail/operation_types.hpp b/include/oneapi/mkl/sparse_blas/detail/operation_types.hpp new file mode 100644 index 000000000..b79036830 --- /dev/null +++ b/include/oneapi/mkl/sparse_blas/detail/operation_types.hpp @@ -0,0 +1,38 @@ +/*************************************************************************** +* Copyright (C) Codeplay Software Limited +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* For your convenience, a copy of the License has been included in this +* repository. +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +* +**************************************************************************/ + +#ifndef _ONEMKL_SPARSE_BLAS_DETAIL_OPERATION_TYPES_HPP_ +#define _ONEMKL_SPARSE_BLAS_DETAIL_OPERATION_TYPES_HPP_ + +namespace oneapi::mkl::sparse { + +// Each backend can create its own descriptor type or re-use the native descriptor types that will be reinterpret_cast'ed to the types below + +struct spmm_descr; +using spmm_descr_t = spmm_descr*; + +struct spmv_descr; +using spmv_descr_t = spmv_descr*; + +struct spsv_descr; +using spsv_descr_t = spsv_descr*; + +} // namespace oneapi::mkl::sparse + +#endif // _ONEMKL_SPARSE_BLAS_DETAIL_OPERATION_TYPES_HPP_ diff --git a/include/oneapi/mkl/sparse_blas/detail/sparse_blas_ct.hxx b/include/oneapi/mkl/sparse_blas/detail/sparse_blas_ct.hxx index 41fe51c49..ca09d09d4 100644 --- a/include/oneapi/mkl/sparse_blas/detail/sparse_blas_ct.hxx +++ b/include/oneapi/mkl/sparse_blas/detail/sparse_blas_ct.hxx @@ -24,112 +24,307 @@ #error "BACKEND is not defined" #endif -inline void init_matrix_handle(backend_selector selector, - matrix_handle_t *p_handle) { - BACKEND::init_matrix_handle(selector.get_queue(), p_handle); +// Dense vector +template +std::enable_if_t> init_dense_vector( + backend_selector selector, dense_vector_handle_t *p_dvhandle, + std::int64_t size, sycl::buffer val) { + BACKEND::init_dense_vector(selector.get_queue(), p_dvhandle, size, val); +} +template +std::enable_if_t> init_dense_vector( + backend_selector selector, dense_vector_handle_t *p_dvhandle, + std::int64_t size, dataType *val) { + BACKEND::init_dense_vector(selector.get_queue(), p_dvhandle, size, val); +} + +template +std::enable_if_t> set_dense_vector_data( + backend_selector selector, dense_vector_handle_t dvhandle, std::int64_t size, + sycl::buffer val) { + BACKEND::set_dense_vector_data(selector.get_queue(), dvhandle, size, val); +} +template +std::enable_if_t> set_dense_vector_data( + backend_selector selector, dense_vector_handle_t dvhandle, std::int64_t size, + dataType *val) { + BACKEND::set_dense_vector_data(selector.get_queue(), dvhandle, size, val); +} + +inline sycl::event release_dense_vector(backend_selector selector, + dense_vector_handle_t dvhandle, + const std::vector &dependencies = {}) { + return BACKEND::release_dense_vector(selector.get_queue(), dvhandle, dependencies); +} + +// Dense matrix +template +std::enable_if_t> init_dense_matrix( + backend_selector selector, dense_matrix_handle_t *p_dmhandle, + std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, layout dense_layout, + sycl::buffer val) { + BACKEND::init_dense_matrix(selector.get_queue(), p_dmhandle, num_rows, num_cols, ld, + dense_layout, val); +} +template +std::enable_if_t> init_dense_matrix( + backend_selector selector, dense_matrix_handle_t *p_dmhandle, + std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, layout dense_layout, + dataType *val) { + BACKEND::init_dense_matrix(selector.get_queue(), p_dmhandle, num_rows, num_cols, ld, + dense_layout, val); +} + +template +std::enable_if_t> set_dense_matrix_data( + backend_selector selector, dense_matrix_handle_t dmhandle, + std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, layout dense_layout, + sycl::buffer val) { + BACKEND::set_dense_matrix_data(selector.get_queue(), dmhandle, num_rows, num_cols, ld, + dense_layout, val); +} +template +std::enable_if_t> set_dense_matrix_data( + backend_selector selector, dense_matrix_handle_t dmhandle, + std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, layout dense_layout, + dataType *val) { + BACKEND::set_dense_matrix_data(selector.get_queue(), dmhandle, num_rows, num_cols, ld, + dense_layout, val); +} + +inline sycl::event release_dense_matrix(backend_selector selector, + dense_matrix_handle_t dmhandle, + const std::vector &dependencies = {}) { + return BACKEND::release_dense_matrix(selector.get_queue(), dmhandle, dependencies); } -inline sycl::event release_matrix_handle(backend_selector selector, - matrix_handle_t *p_handle, +// COO matrix +template +std::enable_if_t> init_coo_matrix( + backend_selector selector, matrix_handle_t *p_smhandle, std::int64_t num_rows, + std::int64_t num_cols, std::int64_t nnz, index_base index, sycl::buffer row_ind, + sycl::buffer col_ind, sycl::buffer val) { + BACKEND::init_coo_matrix(selector.get_queue(), p_smhandle, num_rows, num_cols, nnz, index, + row_ind, col_ind, val); +} +template +std::enable_if_t> init_coo_matrix( + backend_selector selector, matrix_handle_t *p_smhandle, std::int64_t num_rows, + std::int64_t num_cols, std::int64_t nnz, index_base index, indexType *row_ind, + indexType *col_ind, dataType *val) { + BACKEND::init_coo_matrix(selector.get_queue(), p_smhandle, num_rows, num_cols, nnz, index, + row_ind, col_ind, val); +} + +template +std::enable_if_t> set_coo_matrix_data( + backend_selector selector, matrix_handle_t smhandle, std::int64_t num_rows, + std::int64_t num_cols, std::int64_t nnz, index_base index, sycl::buffer row_ind, + sycl::buffer col_ind, sycl::buffer val) { + BACKEND::set_coo_matrix_data(selector.get_queue(), smhandle, num_rows, num_cols, nnz, index, + row_ind, col_ind, val); +} +template +std::enable_if_t> set_coo_matrix_data( + backend_selector selector, matrix_handle_t smhandle, std::int64_t num_rows, + std::int64_t num_cols, std::int64_t nnz, index_base index, indexType *row_ind, + indexType *col_ind, dataType *val) { + BACKEND::set_coo_matrix_data(selector.get_queue(), smhandle, num_rows, num_cols, nnz, index, + row_ind, col_ind, val); +} + +// CSR matrix +template +std::enable_if_t> init_csr_matrix( + backend_selector selector, matrix_handle_t *p_smhandle, std::int64_t num_rows, + std::int64_t num_cols, std::int64_t nnz, index_base index, sycl::buffer row_ptr, + sycl::buffer col_ind, sycl::buffer val) { + BACKEND::init_csr_matrix(selector.get_queue(), p_smhandle, num_rows, num_cols, nnz, index, + row_ptr, col_ind, val); +} +template +std::enable_if_t> init_csr_matrix( + backend_selector selector, matrix_handle_t *p_smhandle, std::int64_t num_rows, + std::int64_t num_cols, std::int64_t nnz, index_base index, indexType *row_ptr, + indexType *col_ind, dataType *val) { + BACKEND::init_csr_matrix(selector.get_queue(), p_smhandle, num_rows, num_cols, nnz, index, + row_ptr, col_ind, val); +} + +template +std::enable_if_t> set_csr_matrix_data( + backend_selector selector, matrix_handle_t smhandle, std::int64_t num_rows, + std::int64_t num_cols, std::int64_t nnz, index_base index, sycl::buffer row_ptr, + sycl::buffer col_ind, sycl::buffer val) { + BACKEND::set_csr_matrix_data(selector.get_queue(), smhandle, num_rows, num_cols, nnz, index, + row_ptr, col_ind, val); +} +template +std::enable_if_t> set_csr_matrix_data( + backend_selector selector, matrix_handle_t smhandle, std::int64_t num_rows, + std::int64_t num_cols, std::int64_t nnz, index_base index, indexType *row_ptr, + indexType *col_ind, dataType *val) { + BACKEND::set_csr_matrix_data(selector.get_queue(), smhandle, num_rows, num_cols, nnz, index, + row_ptr, col_ind, val); +} + +// Common sparse matrix functions +inline sycl::event release_sparse_matrix(backend_selector selector, + matrix_handle_t smhandle, const std::vector &dependencies = {}) { - return BACKEND::release_matrix_handle(selector.get_queue(), p_handle, dependencies); + return BACKEND::release_sparse_matrix(selector.get_queue(), smhandle, dependencies); } -template -std::enable_if_t> set_csr_data( - backend_selector selector, matrix_handle_t handle, intType num_rows, - intType num_cols, intType nnz, index_base index, sycl::buffer &row_ptr, - sycl::buffer &col_ind, sycl::buffer &val) { - BACKEND::set_csr_data(selector.get_queue(), handle, num_rows, num_cols, nnz, index, row_ptr, - col_ind, val); +inline bool set_matrix_property(backend_selector selector, + matrix_handle_t smhandle, matrix_property property) { + return BACKEND::set_matrix_property(selector.get_queue(), smhandle, property); } -template -std::enable_if_t, sycl::event> set_csr_data( - backend_selector selector, matrix_handle_t handle, intType num_rows, - intType num_cols, intType nnz, index_base index, intType *row_ptr, intType *col_ind, - fpType *val, const std::vector &dependencies = {}) { - return BACKEND::set_csr_data(selector.get_queue(), handle, num_rows, num_cols, nnz, index, - row_ptr, col_ind, val, dependencies); +// SPMM +inline void init_spmm_descr(backend_selector selector, + spmm_descr_t *p_spmm_descr) { + BACKEND::init_spmm_descr(selector.get_queue(), p_spmm_descr); } -inline sycl::event optimize_gemm(backend_selector selector, transpose transpose_A, - matrix_handle_t handle, - const std::vector &dependencies = {}) { - return BACKEND::optimize_gemm(selector.get_queue(), transpose_A, handle, dependencies); +inline sycl::event release_spmm_descr(backend_selector selector, + spmm_descr_t spmm_descr, + const std::vector &dependencies = {}) { + return BACKEND::release_spmm_descr(selector.get_queue(), spmm_descr, dependencies); } -inline sycl::event optimize_gemm(backend_selector selector, transpose transpose_A, - transpose transpose_B, layout dense_matrix_layout, - const std::int64_t columns, matrix_handle_t handle, +inline void spmm_buffer_size(backend_selector selector, + oneapi::mkl::transpose opA, oneapi::mkl::transpose opB, + const void *alpha, matrix_view A_view, matrix_handle_t A_handle, + dense_matrix_handle_t B_handle, const void *beta, + dense_matrix_handle_t C_handle, spmm_alg alg, spmm_descr_t spmm_descr, + std::size_t &temp_buffer_size) { + BACKEND::spmm_buffer_size(selector.get_queue(), opA, opB, alpha, A_view, A_handle, B_handle, + beta, C_handle, alg, spmm_descr, temp_buffer_size); +} + +inline void spmm_optimize(backend_selector selector, oneapi::mkl::transpose opA, + oneapi::mkl::transpose opB, const void *alpha, matrix_view A_view, + matrix_handle_t A_handle, dense_matrix_handle_t B_handle, + const void *beta, dense_matrix_handle_t C_handle, spmm_alg alg, + spmm_descr_t spmm_descr, sycl::buffer workspace) { + BACKEND::spmm_optimize(selector.get_queue(), opA, opB, alpha, A_view, A_handle, B_handle, beta, + C_handle, alg, spmm_descr, workspace); +} + +inline sycl::event spmm_optimize(backend_selector selector, + oneapi::mkl::transpose opA, oneapi::mkl::transpose opB, + const void *alpha, matrix_view A_view, matrix_handle_t A_handle, + dense_matrix_handle_t B_handle, const void *beta, + dense_matrix_handle_t C_handle, spmm_alg alg, + spmm_descr_t spmm_descr, void *workspace, const std::vector &dependencies = {}) { - return BACKEND::optimize_gemm(selector.get_queue(), transpose_A, transpose_B, - dense_matrix_layout, columns, handle, dependencies); + return BACKEND::spmm_optimize(selector.get_queue(), opA, opB, alpha, A_view, A_handle, B_handle, + beta, C_handle, alg, spmm_descr, workspace, dependencies); +} + +inline sycl::event spmm(backend_selector selector, oneapi::mkl::transpose opA, + oneapi::mkl::transpose opB, const void *alpha, matrix_view A_view, + matrix_handle_t A_handle, dense_matrix_handle_t B_handle, const void *beta, + dense_matrix_handle_t C_handle, spmm_alg alg, spmm_descr_t spmm_descr, + const std::vector &dependencies = {}) { + return BACKEND::spmm(selector.get_queue(), opA, opB, alpha, A_view, A_handle, B_handle, beta, + C_handle, alg, spmm_descr, dependencies); +} + +// SPMV +inline void init_spmv_descr(backend_selector selector, + spmv_descr_t *p_spmv_descr) { + BACKEND::init_spmv_descr(selector.get_queue(), p_spmv_descr); +} + +inline sycl::event release_spmv_descr(backend_selector selector, + spmv_descr_t spmv_descr, + const std::vector &dependencies = {}) { + return BACKEND::release_spmv_descr(selector.get_queue(), spmv_descr, dependencies); +} + +inline void spmv_buffer_size(backend_selector selector, + oneapi::mkl::transpose opA, const void *alpha, matrix_view A_view, + matrix_handle_t A_handle, dense_vector_handle_t x_handle, + const void *beta, dense_vector_handle_t y_handle, spmv_alg alg, + spmv_descr_t spmv_descr, std::size_t &temp_buffer_size) { + BACKEND::spmv_buffer_size(selector.get_queue(), opA, alpha, A_view, A_handle, x_handle, beta, + y_handle, alg, spmv_descr, temp_buffer_size); +} + +inline void spmv_optimize(backend_selector selector, oneapi::mkl::transpose opA, + const void *alpha, matrix_view A_view, matrix_handle_t A_handle, + dense_vector_handle_t x_handle, const void *beta, + dense_vector_handle_t y_handle, spmv_alg alg, spmv_descr_t spmv_descr, + sycl::buffer workspace) { + BACKEND::spmv_optimize(selector.get_queue(), opA, alpha, A_view, A_handle, x_handle, beta, + y_handle, alg, spmv_descr, workspace); } -inline sycl::event optimize_gemv(backend_selector selector, - transpose transpose_val, matrix_handle_t handle, +inline sycl::event spmv_optimize(backend_selector selector, + oneapi::mkl::transpose opA, const void *alpha, matrix_view A_view, + matrix_handle_t A_handle, dense_vector_handle_t x_handle, + const void *beta, dense_vector_handle_t y_handle, spmv_alg alg, + spmv_descr_t spmv_descr, void *workspace, const std::vector &dependencies = {}) { - return BACKEND::optimize_gemv(selector.get_queue(), transpose_val, handle, dependencies); + return BACKEND::spmv_optimize(selector.get_queue(), opA, alpha, A_view, A_handle, x_handle, + beta, y_handle, alg, spmv_descr, workspace, dependencies); +} + +inline sycl::event spmv(backend_selector selector, oneapi::mkl::transpose opA, + const void *alpha, matrix_view A_view, matrix_handle_t A_handle, + dense_vector_handle_t x_handle, const void *beta, + dense_vector_handle_t y_handle, spmv_alg alg, spmv_descr_t spmv_descr, + const std::vector &dependencies = {}) { + return BACKEND::spmv(selector.get_queue(), opA, alpha, A_view, A_handle, x_handle, beta, + y_handle, alg, spmv_descr, dependencies); +} + +// SPSV +inline void init_spsv_descr(backend_selector selector, + spsv_descr_t *p_spsv_descr) { + BACKEND::init_spsv_descr(selector.get_queue(), p_spsv_descr); +} + +inline sycl::event release_spsv_descr(backend_selector selector, + spsv_descr_t spsv_descr, + const std::vector &dependencies = {}) { + return BACKEND::release_spsv_descr(selector.get_queue(), spsv_descr, dependencies); } -inline sycl::event optimize_trsv(backend_selector selector, uplo uplo_val, - transpose transpose_val, diag diag_val, matrix_handle_t handle, +inline void spsv_buffer_size(backend_selector selector, + oneapi::mkl::transpose opA, const void *alpha, matrix_view A_view, + matrix_handle_t A_handle, dense_vector_handle_t x_handle, + dense_vector_handle_t y_handle, spsv_alg alg, spsv_descr_t spsv_descr, + std::size_t &temp_buffer_size) { + BACKEND::spsv_buffer_size(selector.get_queue(), opA, alpha, A_view, A_handle, x_handle, + y_handle, alg, spsv_descr, temp_buffer_size); +} + +inline void spsv_optimize(backend_selector selector, oneapi::mkl::transpose opA, + const void *alpha, matrix_view A_view, matrix_handle_t A_handle, + dense_vector_handle_t x_handle, dense_vector_handle_t y_handle, + spsv_alg alg, spsv_descr_t spsv_descr, + sycl::buffer workspace) { + BACKEND::spsv_optimize(selector.get_queue(), opA, alpha, A_view, A_handle, x_handle, y_handle, + alg, spsv_descr, workspace); +} + +inline sycl::event spsv_optimize(backend_selector selector, + oneapi::mkl::transpose opA, const void *alpha, matrix_view A_view, + matrix_handle_t A_handle, dense_vector_handle_t x_handle, + dense_vector_handle_t y_handle, spsv_alg alg, + spsv_descr_t spsv_descr, void *workspace, const std::vector &dependencies = {}) { - return BACKEND::optimize_trsv(selector.get_queue(), uplo_val, transpose_val, diag_val, handle, - dependencies); -} - -template -std::enable_if_t> gemv( - backend_selector selector, transpose transpose_val, const fpType alpha, - matrix_handle_t A_handle, sycl::buffer &x, const fpType beta, - sycl::buffer &y) { - BACKEND::gemv(selector.get_queue(), transpose_val, alpha, A_handle, x, beta, y); -} - -template -std::enable_if_t, sycl::event> gemv( - backend_selector selector, transpose transpose_val, const fpType alpha, - matrix_handle_t A_handle, const fpType *x, const fpType beta, fpType *y, - const std::vector &dependencies = {}) { - return BACKEND::gemv(selector.get_queue(), transpose_val, alpha, A_handle, x, beta, y, - dependencies); -} - -template -std::enable_if_t> trsv( - backend_selector selector, uplo uplo_val, transpose transpose_val, - diag diag_val, matrix_handle_t A_handle, sycl::buffer &x, - sycl::buffer &y) { - BACKEND::trsv(selector.get_queue(), uplo_val, transpose_val, diag_val, A_handle, x, y); -} - -template -std::enable_if_t, sycl::event> trsv( - backend_selector selector, uplo uplo_val, transpose transpose_val, - diag diag_val, matrix_handle_t A_handle, const fpType *x, fpType *y, - const std::vector &dependencies = {}) { - return BACKEND::trsv(selector.get_queue(), uplo_val, transpose_val, diag_val, A_handle, x, y, - dependencies); -} - -template -std::enable_if_t> gemm( - backend_selector selector, layout dense_matrix_layout, transpose transpose_A, - transpose transpose_B, const fpType alpha, matrix_handle_t A_handle, sycl::buffer &B, - const std::int64_t columns, const std::int64_t ldb, const fpType beta, - sycl::buffer &C, const std::int64_t ldc) { - BACKEND::gemm(selector.get_queue(), dense_matrix_layout, transpose_A, transpose_B, alpha, - A_handle, B, columns, ldb, beta, C, ldc); -} - -template -std::enable_if_t, sycl::event> gemm( - backend_selector selector, layout dense_matrix_layout, transpose transpose_A, - transpose transpose_B, const fpType alpha, matrix_handle_t A_handle, const fpType *B, - const std::int64_t columns, const std::int64_t ldb, const fpType beta, fpType *C, - const std::int64_t ldc, const std::vector &dependencies = {}) { - return BACKEND::gemm(selector.get_queue(), dense_matrix_layout, transpose_A, transpose_B, alpha, - A_handle, B, columns, ldb, beta, C, ldc, dependencies); + return BACKEND::spsv_optimize(selector.get_queue(), opA, alpha, A_view, A_handle, x_handle, + y_handle, alg, spsv_descr, workspace, dependencies); +} + +inline sycl::event spsv(backend_selector selector, oneapi::mkl::transpose opA, + const void *alpha, matrix_view A_view, matrix_handle_t A_handle, + dense_vector_handle_t x_handle, dense_vector_handle_t y_handle, + spsv_alg alg, spsv_descr_t spsv_descr, + const std::vector &dependencies = {}) { + return BACKEND::spsv(selector.get_queue(), opA, alpha, A_view, A_handle, x_handle, y_handle, + alg, spsv_descr, dependencies); } diff --git a/include/oneapi/mkl/sparse_blas/detail/sparse_blas_rt.hpp b/include/oneapi/mkl/sparse_blas/detail/sparse_blas_rt.hpp index 131e0545a..86a00f507 100644 --- a/include/oneapi/mkl/sparse_blas/detail/sparse_blas_rt.hpp +++ b/include/oneapi/mkl/sparse_blas/detail/sparse_blas_rt.hpp @@ -20,81 +20,186 @@ #ifndef _ONEMKL_SPARSE_BLAS_DETAIL_SPARSE_BLAS_RT_HPP_ #define _ONEMKL_SPARSE_BLAS_DETAIL_SPARSE_BLAS_RT_HPP_ +#include "oneapi/mkl/sparse_blas/detail/helper_types.hpp" #include "oneapi/mkl/sparse_blas/types.hpp" namespace oneapi { namespace mkl { namespace sparse { -void init_matrix_handle(sycl::queue &queue, matrix_handle_t *p_handle); - -sycl::event release_matrix_handle(sycl::queue &queue, matrix_handle_t *p_handle, +// Dense vector +template +std::enable_if_t> init_dense_vector( + sycl::queue &queue, dense_vector_handle_t *p_dvhandle, std::int64_t size, + sycl::buffer val); +template +std::enable_if_t> init_dense_vector( + sycl::queue &queue, dense_vector_handle_t *p_dvhandle, std::int64_t size, dataType *val); + +template +std::enable_if_t> set_dense_vector_data( + sycl::queue &queue, dense_vector_handle_t dvhandle, std::int64_t size, + sycl::buffer val); +template +std::enable_if_t> set_dense_vector_data( + sycl::queue &queue, dense_vector_handle_t dvhandle, std::int64_t size, dataType *val); + +sycl::event release_dense_vector(sycl::queue &queue, dense_vector_handle_t dvhandle, + const std::vector &dependencies = {}); + +// Dense matrix +template +std::enable_if_t> init_dense_matrix( + sycl::queue &queue, dense_matrix_handle_t *p_dmhandle, std::int64_t num_rows, + std::int64_t num_cols, std::int64_t ld, layout dense_layout, sycl::buffer val); +template +std::enable_if_t> init_dense_matrix( + sycl::queue &queue, dense_matrix_handle_t *p_dmhandle, std::int64_t num_rows, + std::int64_t num_cols, std::int64_t ld, layout dense_layout, dataType *val); + +template +std::enable_if_t> set_dense_matrix_data( + sycl::queue &queue, dense_matrix_handle_t dmhandle, std::int64_t num_rows, + std::int64_t num_cols, std::int64_t ld, layout dense_layout, sycl::buffer val); +template +std::enable_if_t> set_dense_matrix_data( + sycl::queue &queue, dense_matrix_handle_t dmhandle, std::int64_t num_rows, + std::int64_t num_cols, std::int64_t ld, layout dense_layout, dataType *val); + +sycl::event release_dense_matrix(sycl::queue &queue, dense_matrix_handle_t dmhandle, + const std::vector &dependencies = {}); + +// COO matrix +template +std::enable_if_t> init_coo_matrix( + sycl::queue &queue, matrix_handle_t *p_smhandle, std::int64_t num_rows, std::int64_t num_cols, + std::int64_t nnz, index_base index, sycl::buffer row_ind, + sycl::buffer col_ind, sycl::buffer val); +template +std::enable_if_t> init_coo_matrix( + sycl::queue &queue, matrix_handle_t *p_smhandle, std::int64_t num_rows, std::int64_t num_cols, + std::int64_t nnz, index_base index, indexType *row_ind, indexType *col_ind, dataType *val); + +template +std::enable_if_t> set_coo_matrix_data( + sycl::queue &queue, matrix_handle_t smhandle, std::int64_t num_rows, std::int64_t num_cols, + std::int64_t nnz, index_base index, sycl::buffer row_ind, + sycl::buffer col_ind, sycl::buffer val); +template +std::enable_if_t> set_coo_matrix_data( + sycl::queue &queue, matrix_handle_t smhandle, std::int64_t num_rows, std::int64_t num_cols, + std::int64_t nnz, index_base index, indexType *row_ind, indexType *col_ind, dataType *val); + +// CSR matrix +template +std::enable_if_t> init_csr_matrix( + sycl::queue &queue, matrix_handle_t *p_smhandle, std::int64_t num_rows, std::int64_t num_cols, + std::int64_t nnz, index_base index, sycl::buffer row_ptr, + sycl::buffer col_ind, sycl::buffer val); +template +std::enable_if_t> init_csr_matrix( + sycl::queue &queue, matrix_handle_t *p_smhandle, std::int64_t num_rows, std::int64_t num_cols, + std::int64_t nnz, index_base index, indexType *row_ptr, indexType *col_ind, dataType *val); + +template +std::enable_if_t> set_csr_matrix_data( + sycl::queue &queue, matrix_handle_t smhandle, std::int64_t num_rows, std::int64_t num_cols, + std::int64_t nnz, index_base index, sycl::buffer row_ptr, + sycl::buffer col_ind, sycl::buffer val); +template +std::enable_if_t> set_csr_matrix_data( + sycl::queue &queue, matrix_handle_t smhandle, std::int64_t num_rows, std::int64_t num_cols, + std::int64_t nnz, index_base index, indexType *row_ptr, indexType *col_ind, dataType *val); + +// Common sparse matrix functions +sycl::event release_sparse_matrix(sycl::queue &queue, matrix_handle_t smhandle, const std::vector &dependencies = {}); -template -std::enable_if_t> set_csr_data( - sycl::queue &queue, matrix_handle_t handle, intType num_rows, intType num_cols, intType nnz, - index_base index, sycl::buffer &row_ptr, sycl::buffer &col_ind, - sycl::buffer &val); +bool set_matrix_property(sycl::queue &queue, matrix_handle_t smhandle, matrix_property property); -template -std::enable_if_t, sycl::event> set_csr_data( - sycl::queue &queue, matrix_handle_t handle, intType num_rows, intType num_cols, intType nnz, - index_base index, intType *row_ptr, intType *col_ind, fpType *val, - const std::vector &dependencies = {}); +// SPMM +void init_spmm_descr(sycl::queue &queue, spmm_descr_t *p_spmm_descr); -sycl::event optimize_gemm(sycl::queue &queue, transpose transpose_A, matrix_handle_t handle, - const std::vector &dependencies = {}); +sycl::event release_spmm_descr(sycl::queue &queue, spmm_descr_t spmm_descr, + const std::vector &dependencies = {}); -sycl::event optimize_gemm(sycl::queue &queue, transpose transpose_A, transpose transpose_B, - layout dense_matrix_layout, const std::int64_t columns, - matrix_handle_t handle, - const std::vector &dependencies = {}); +void spmm_buffer_size(sycl::queue &queue, oneapi::mkl::transpose opA, oneapi::mkl::transpose opB, + const void *alpha, matrix_view A_view, matrix_handle_t A_handle, + dense_matrix_handle_t B_handle, const void *beta, + dense_matrix_handle_t C_handle, spmm_alg alg, spmm_descr_t spmm_descr, + std::size_t &temp_buffer_size); + +void spmm_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, oneapi::mkl::transpose opB, + const void *alpha, matrix_view A_view, matrix_handle_t A_handle, + dense_matrix_handle_t B_handle, const void *beta, dense_matrix_handle_t C_handle, + spmm_alg alg, spmm_descr_t spmm_descr, sycl::buffer workspace); -sycl::event optimize_gemv(sycl::queue &queue, transpose transpose_val, matrix_handle_t handle, +sycl::event spmm_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, + oneapi::mkl::transpose opB, const void *alpha, matrix_view A_view, + matrix_handle_t A_handle, dense_matrix_handle_t B_handle, + const void *beta, dense_matrix_handle_t C_handle, spmm_alg alg, + spmm_descr_t spmm_descr, void *workspace, const std::vector &dependencies = {}); -sycl::event optimize_trsv(sycl::queue &queue, uplo uplo_val, transpose transpose_val, diag diag_val, - matrix_handle_t handle, +sycl::event spmm(sycl::queue &queue, oneapi::mkl::transpose opA, oneapi::mkl::transpose opB, + const void *alpha, matrix_view A_view, matrix_handle_t A_handle, + dense_matrix_handle_t B_handle, const void *beta, dense_matrix_handle_t C_handle, + spmm_alg alg, spmm_descr_t spmm_descr, + const std::vector &dependencies = {}); + +// SPMV +void init_spmv_descr(sycl::queue &queue, spmv_descr_t *p_spmv_descr); + +sycl::event release_spmv_descr(sycl::queue &queue, spmv_descr_t spmv_descr, + const std::vector &dependencies = {}); + +void spmv_buffer_size(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, + matrix_view A_view, matrix_handle_t A_handle, dense_vector_handle_t x_handle, + const void *beta, dense_vector_handle_t y_handle, spmv_alg alg, + spmv_descr_t spmv_descr, std::size_t &temp_buffer_size); + +void spmv_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, + matrix_view A_view, matrix_handle_t A_handle, dense_vector_handle_t x_handle, + const void *beta, dense_vector_handle_t y_handle, spmv_alg alg, + spmv_descr_t spmv_descr, sycl::buffer workspace); + +sycl::event spmv_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, + matrix_view A_view, matrix_handle_t A_handle, + dense_vector_handle_t x_handle, const void *beta, + dense_vector_handle_t y_handle, spmv_alg alg, spmv_descr_t spmv_descr, + void *workspace, const std::vector &dependencies = {}); + +sycl::event spmv(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, + matrix_view A_view, matrix_handle_t A_handle, dense_vector_handle_t x_handle, + const void *beta, dense_vector_handle_t y_handle, spmv_alg alg, + spmv_descr_t spmv_descr, const std::vector &dependencies = {}); + +// SPSV +void init_spsv_descr(sycl::queue &queue, spsv_descr_t *p_spsv_descr); + +sycl::event release_spsv_descr(sycl::queue &queue, spsv_descr_t spsv_descr, + const std::vector &dependencies = {}); + +void spsv_buffer_size(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, + matrix_view A_view, matrix_handle_t A_handle, dense_vector_handle_t x_handle, + dense_vector_handle_t y_handle, spsv_alg alg, spsv_descr_t spsv_descr, + std::size_t &temp_buffer_size); + +void spsv_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, + matrix_view A_view, matrix_handle_t A_handle, dense_vector_handle_t x_handle, + dense_vector_handle_t y_handle, spsv_alg alg, spsv_descr_t spsv_descr, + sycl::buffer workspace); + +sycl::event spsv_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, + matrix_view A_view, matrix_handle_t A_handle, + dense_vector_handle_t x_handle, dense_vector_handle_t y_handle, + spsv_alg alg, spsv_descr_t spsv_descr, void *workspace, const std::vector &dependencies = {}); -template -std::enable_if_t> gemv( - sycl::queue &queue, transpose transpose_val, const fpType alpha, matrix_handle_t A_handle, - sycl::buffer &x, const fpType beta, sycl::buffer &y); - -template -std::enable_if_t, sycl::event> gemv( - sycl::queue &queue, transpose transpose_val, const fpType alpha, matrix_handle_t A_handle, - const fpType *x, const fpType beta, fpType *y, - const std::vector &dependencies = {}); - -template -std::enable_if_t> trsv(sycl::queue &queue, uplo uplo_val, - transpose transpose_val, diag diag_val, - matrix_handle_t A_handle, - sycl::buffer &x, - sycl::buffer &y); - -template -std::enable_if_t, sycl::event> trsv( - sycl::queue &queue, uplo uplo_val, transpose transpose_val, diag diag_val, - matrix_handle_t A_handle, const fpType *x, fpType *y, - const std::vector &dependencies = {}); - -template -std::enable_if_t> gemm( - sycl::queue &queue, layout dense_matrix_layout, transpose transpose_A, transpose transpose_B, - const fpType alpha, matrix_handle_t A_handle, sycl::buffer &B, - const std::int64_t columns, const std::int64_t ldb, const fpType beta, - sycl::buffer &C, const std::int64_t ldc); - -template -std::enable_if_t, sycl::event> gemm( - sycl::queue &queue, layout dense_matrix_layout, transpose transpose_A, transpose transpose_B, - const fpType alpha, matrix_handle_t A_handle, const fpType *B, const std::int64_t columns, - const std::int64_t ldb, const fpType beta, fpType *C, const std::int64_t ldc, - const std::vector &dependencies = {}); +sycl::event spsv(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, + matrix_view A_view, matrix_handle_t A_handle, dense_vector_handle_t x_handle, + dense_vector_handle_t y_handle, spsv_alg alg, spsv_descr_t spsv_descr, + const std::vector &dependencies = {}); } // namespace sparse } // namespace mkl diff --git a/include/oneapi/mkl/sparse_blas/matrix_view.hpp b/include/oneapi/mkl/sparse_blas/matrix_view.hpp new file mode 100644 index 000000000..08762c5d7 --- /dev/null +++ b/include/oneapi/mkl/sparse_blas/matrix_view.hpp @@ -0,0 +1,51 @@ +/*************************************************************************** +* Copyright (C) Codeplay Software Limited +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* For your convenience, a copy of the License has been included in this +* repository. +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +* +**************************************************************************/ + +#ifndef _ONEMKL_SPARSE_BLAS_MATRIX_VIEW_HPP_ +#define _ONEMKL_SPARSE_BLAS_MATRIX_VIEW_HPP_ + +#include "oneapi/mkl/types.hpp" + +namespace oneapi { +namespace mkl { +namespace sparse { + +enum class matrix_descr { + general, + symmetric, + hermitian, + triangular, + diagonal, +}; + +struct matrix_view { + matrix_descr type_view = matrix_descr::general; + uplo uplo_view = uplo::lower; + diag diag_view = diag::nonunit; + + matrix_view() = default; + + matrix_view(matrix_descr type_view) : type_view(type_view) {} +}; + +} // namespace sparse +} // namespace mkl +} // namespace oneapi + +#endif // _ONEMKL_SPARSE_BLAS_MATRIX_VIEW_HPP_ diff --git a/include/oneapi/mkl/sparse_blas/types.hpp b/include/oneapi/mkl/sparse_blas/types.hpp index 406c7dd1f..d4aea3e88 100644 --- a/include/oneapi/mkl/sparse_blas/types.hpp +++ b/include/oneapi/mkl/sparse_blas/types.hpp @@ -20,22 +20,50 @@ #ifndef _ONEMKL_SPARSE_BLAS_TYPES_HPP_ #define _ONEMKL_SPARSE_BLAS_TYPES_HPP_ -#if __has_include() -#include -#else -#include -#endif - -#include - #include "oneapi/mkl/types.hpp" -#include "detail/helper_types.hpp" +#include "matrix_view.hpp" +#include "detail/handles.hpp" +#include "detail/operation_types.hpp" + +/** + * @file Include and define the sparse types that are common between close-source MKL API and oneMKL API. +*/ namespace oneapi { namespace mkl { namespace sparse { -using matrix_handle_t = detail::matrix_handle*; +enum class matrix_property { + symmetric, + sorted, +}; + +enum class spmm_alg { + default_alg, + no_optimize_alg, + coo_alg1, + coo_alg2, + coo_alg3, + coo_alg4, + csr_alg1, + csr_alg2, + csr_alg3, +}; + +enum class spmv_alg { + default_alg, + no_optimize_alg, + coo_alg1, + coo_alg2, + csr_alg1, + csr_alg2, + csr_alg3, +}; + +enum class spsv_alg { + default_alg, + no_optimize_alg, +}; } // namespace sparse } // namespace mkl diff --git a/src/sparse_blas/CMakeLists.txt b/src/sparse_blas/CMakeLists.txt index b93902f49..b01cc63fd 100644 --- a/src/sparse_blas/CMakeLists.txt +++ b/src/sparse_blas/CMakeLists.txt @@ -1,5 +1,5 @@ #=============================================================================== -# Copyright 2023 Intel Corporation +# Copyright 2024 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/sparse_blas/backends/CMakeLists.txt b/src/sparse_blas/backends/CMakeLists.txt index ef606c6e1..294040808 100644 --- a/src/sparse_blas/backends/CMakeLists.txt +++ b/src/sparse_blas/backends/CMakeLists.txt @@ -1,5 +1,5 @@ #=============================================================================== -# Copyright 2023 Intel Corporation +# Copyright 2024 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/sparse_blas/backends/backend_wrappers.cxx b/src/sparse_blas/backends/backend_wrappers.cxx index 2c8161249..fff743e82 100644 --- a/src/sparse_blas/backends/backend_wrappers.cxx +++ b/src/sparse_blas/backends/backend_wrappers.cxx @@ -1,21 +1,21 @@ -/******************************************************************************* -* Copyright 2023 Codeplay Software Ltd. +/*************************************************************************** +* Copyright (C) Codeplay Software Limited +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at * -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at +* http://www.apache.org/licenses/LICENSE-2.0 * -* http://www.apache.org/licenses/LICENSE-2.0 +* For your convenience, a copy of the License has been included in this +* repository. * -* Unless required by applicable law or agreed to in writing, -* software distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions -* and limitations under the License. +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. * -* -* SPDX-License-Identifier: Apache-2.0 -*******************************************************************************/ +**************************************************************************/ /* This file lists functions matching those required by sparse_blas_function_table_t in @@ -35,51 +35,81 @@ Changes to this file should be matched to changes in sparse_blas/function_table. function template instantiations must be added to backend_sparse_blas_instantiations.cxx. */ +#define REPEAT_FOR_EACH_FP_TYPE(DEFINE_MACRO) \ + DEFINE_MACRO() \ + DEFINE_MACRO() \ + DEFINE_MACRO() \ + DEFINE_MACRO() + +#define REPEAT_FOR_EACH_FP_AND_INT_TYPE(DEFINE_MACRO) \ + REPEAT_FOR_EACH_FP_TYPE(DEFINE_MACRO) \ + REPEAT_FOR_EACH_FP_TYPE(DEFINE_MACRO) + // clang-format off -oneapi::mkl::sparse::BACKEND::init_matrix_handle, -oneapi::mkl::sparse::BACKEND::release_matrix_handle, -oneapi::mkl::sparse::BACKEND::set_csr_data, -oneapi::mkl::sparse::BACKEND::set_csr_data, -oneapi::mkl::sparse::BACKEND::set_csr_data, -oneapi::mkl::sparse::BACKEND::set_csr_data, -oneapi::mkl::sparse::BACKEND::set_csr_data, -oneapi::mkl::sparse::BACKEND::set_csr_data, -oneapi::mkl::sparse::BACKEND::set_csr_data, -oneapi::mkl::sparse::BACKEND::set_csr_data, -oneapi::mkl::sparse::BACKEND::set_csr_data, -oneapi::mkl::sparse::BACKEND::set_csr_data, -oneapi::mkl::sparse::BACKEND::set_csr_data, -oneapi::mkl::sparse::BACKEND::set_csr_data, -oneapi::mkl::sparse::BACKEND::set_csr_data, -oneapi::mkl::sparse::BACKEND::set_csr_data, -oneapi::mkl::sparse::BACKEND::set_csr_data, -oneapi::mkl::sparse::BACKEND::set_csr_data, -oneapi::mkl::sparse::BACKEND::optimize_gemm, -oneapi::mkl::sparse::BACKEND::optimize_gemm, -oneapi::mkl::sparse::BACKEND::optimize_gemv, -oneapi::mkl::sparse::BACKEND::optimize_trsv, -oneapi::mkl::sparse::BACKEND::gemv, -oneapi::mkl::sparse::BACKEND::gemv, -oneapi::mkl::sparse::BACKEND::gemv, -oneapi::mkl::sparse::BACKEND::gemv, -oneapi::mkl::sparse::BACKEND::gemv, -oneapi::mkl::sparse::BACKEND::gemv, -oneapi::mkl::sparse::BACKEND::gemv, -oneapi::mkl::sparse::BACKEND::gemv, -oneapi::mkl::sparse::BACKEND::trsv, -oneapi::mkl::sparse::BACKEND::trsv, -oneapi::mkl::sparse::BACKEND::trsv, -oneapi::mkl::sparse::BACKEND::trsv, -oneapi::mkl::sparse::BACKEND::trsv, -oneapi::mkl::sparse::BACKEND::trsv, -oneapi::mkl::sparse::BACKEND::trsv, -oneapi::mkl::sparse::BACKEND::trsv, -oneapi::mkl::sparse::BACKEND::gemm, -oneapi::mkl::sparse::BACKEND::gemm, -oneapi::mkl::sparse::BACKEND::gemm, -oneapi::mkl::sparse::BACKEND::gemm, -oneapi::mkl::sparse::BACKEND::gemm, -oneapi::mkl::sparse::BACKEND::gemm, -oneapi::mkl::sparse::BACKEND::gemm, -oneapi::mkl::sparse::BACKEND::gemm, +// Dense vector +#define LIST_DENSE_VECTOR_FUNCS() \ +oneapi::mkl::sparse::BACKEND::init_dense_vector, \ +oneapi::mkl::sparse::BACKEND::init_dense_vector, \ +oneapi::mkl::sparse::BACKEND::set_dense_vector_data, \ +oneapi::mkl::sparse::BACKEND::set_dense_vector_data, +REPEAT_FOR_EACH_FP_TYPE(LIST_DENSE_VECTOR_FUNCS) +#undef LIST_DENSE_VECTOR_FUNCS +oneapi::mkl::sparse::BACKEND::release_dense_vector, + +// Dense matrix +#define LIST_DENSE_MATRIX_FUNCS() \ +oneapi::mkl::sparse::BACKEND::init_dense_matrix, \ +oneapi::mkl::sparse::BACKEND::init_dense_matrix, \ +oneapi::mkl::sparse::BACKEND::set_dense_matrix_data, \ +oneapi::mkl::sparse::BACKEND::set_dense_matrix_data, +REPEAT_FOR_EACH_FP_TYPE(LIST_DENSE_MATRIX_FUNCS) +#undef LIST_DENSE_MATRIX_FUNCS +oneapi::mkl::sparse::BACKEND::release_dense_matrix, + +// COO matrix +#define LIST_COO_MATRIX_FUNCS() \ +oneapi::mkl::sparse::BACKEND::init_coo_matrix, \ +oneapi::mkl::sparse::BACKEND::init_coo_matrix, \ +oneapi::mkl::sparse::BACKEND::set_coo_matrix_data, \ +oneapi::mkl::sparse::BACKEND::set_coo_matrix_data, +REPEAT_FOR_EACH_FP_AND_INT_TYPE(LIST_COO_MATRIX_FUNCS) +#undef LIST_COO_MATRIX_FUNCS + +// CSR matrix +#define LIST_CSR_MATRIX_FUNCS() \ +oneapi::mkl::sparse::BACKEND::init_csr_matrix, \ +oneapi::mkl::sparse::BACKEND::init_csr_matrix, \ +oneapi::mkl::sparse::BACKEND::set_csr_matrix_data, \ +oneapi::mkl::sparse::BACKEND::set_csr_matrix_data, +REPEAT_FOR_EACH_FP_AND_INT_TYPE(LIST_CSR_MATRIX_FUNCS) +#undef LIST_CSR_MATRIX_FUNCS + +// Common sparse matrix functions +oneapi::mkl::sparse::BACKEND::release_sparse_matrix, +oneapi::mkl::sparse::BACKEND::set_matrix_property, + +// SPMM +oneapi::mkl::sparse::BACKEND::init_spmm_descr, +oneapi::mkl::sparse::BACKEND::release_spmm_descr, +oneapi::mkl::sparse::BACKEND::spmm_buffer_size, +oneapi::mkl::sparse::BACKEND::spmm_optimize, +oneapi::mkl::sparse::BACKEND::spmm_optimize, +oneapi::mkl::sparse::BACKEND::spmm, + +// SPMV +oneapi::mkl::sparse::BACKEND::init_spmv_descr, +oneapi::mkl::sparse::BACKEND::release_spmv_descr, +oneapi::mkl::sparse::BACKEND::spmv_buffer_size, +oneapi::mkl::sparse::BACKEND::spmv_optimize, +oneapi::mkl::sparse::BACKEND::spmv_optimize, +oneapi::mkl::sparse::BACKEND::spmv, + +// SPSV +oneapi::mkl::sparse::BACKEND::init_spsv_descr, +oneapi::mkl::sparse::BACKEND::release_spsv_descr, +oneapi::mkl::sparse::BACKEND::spsv_buffer_size, +oneapi::mkl::sparse::BACKEND::spsv_optimize, +oneapi::mkl::sparse::BACKEND::spsv_optimize, +oneapi::mkl::sparse::BACKEND::spsv, + // clang-format on diff --git a/src/sparse_blas/backends/mkl_common/mkl_basic.cxx b/src/sparse_blas/backends/mkl_common/mkl_basic.cxx deleted file mode 100644 index fd3b1563a..000000000 --- a/src/sparse_blas/backends/mkl_common/mkl_basic.cxx +++ /dev/null @@ -1,62 +0,0 @@ -/******************************************************************************* -* Copyright 2023 Codeplay Software Ltd. -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, -* software distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions -* and limitations under the License. -* -* -* SPDX-License-Identifier: Apache-2.0 -*******************************************************************************/ - -void init_matrix_handle(sycl::queue & /*queue*/, detail::matrix_handle **p_handle) { - oneapi::mkl::sparse::init_matrix_handle(detail::get_handle(p_handle)); -} - -sycl::event release_matrix_handle(sycl::queue &queue, detail::matrix_handle **p_handle, - const std::vector &dependencies) { - return oneapi::mkl::sparse::release_matrix_handle(queue, detail::get_handle(p_handle), - dependencies); -} - -template -std::enable_if_t> set_csr_data( - sycl::queue &queue, detail::matrix_handle *handle, intType num_rows, intType num_cols, - intType /*nnz*/, index_base index, sycl::buffer &row_ptr, - sycl::buffer &col_ind, sycl::buffer &val) { - oneapi::mkl::sparse::set_csr_data(queue, detail::get_handle(handle), num_rows, num_cols, index, - row_ptr, col_ind, val); -} - -template -std::enable_if_t, sycl::event> set_csr_data( - sycl::queue &queue, detail::matrix_handle *handle, intType num_rows, intType num_cols, - intType /*nnz*/, index_base index, intType *row_ptr, intType *col_ind, fpType *val, - const std::vector &dependencies) { - return oneapi::mkl::sparse::set_csr_data(queue, detail::get_handle(handle), num_rows, num_cols, - index, row_ptr, col_ind, val, dependencies); -} - -#define INSTANTIATE_SET_CSR_DATA(FP_TYPE, INT_TYPE) \ - template std::enable_if_t> \ - set_csr_data( \ - sycl::queue & queue, detail::matrix_handle * handle, INT_TYPE num_rows, INT_TYPE num_cols, \ - INT_TYPE nnz, index_base index, sycl::buffer & row_ptr, \ - sycl::buffer & col_ind, sycl::buffer & val); \ - template std::enable_if_t, sycl::event> \ - set_csr_data(sycl::queue & queue, detail::matrix_handle * handle, \ - INT_TYPE num_rows, INT_TYPE num_cols, INT_TYPE nnz, \ - index_base index, INT_TYPE * row_ptr, INT_TYPE * col_ind, \ - FP_TYPE * val, const std::vector &dependencies) - -FOR_EACH_FP_AND_INT_TYPE(INSTANTIATE_SET_CSR_DATA); - -#undef INSTANTIATE_SET_CSR_DATA diff --git a/src/sparse_blas/backends/mkl_common/mkl_handles.cxx b/src/sparse_blas/backends/mkl_common/mkl_handles.cxx new file mode 100644 index 000000000..38d102768 --- /dev/null +++ b/src/sparse_blas/backends/mkl_common/mkl_handles.cxx @@ -0,0 +1,386 @@ +/*************************************************************************** +* Copyright (C) Codeplay Software Limited +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* For your convenience, a copy of the License has been included in this +* repository. +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +* +**************************************************************************/ + +// Dense vector +template +void init_dense_vector(sycl::queue & /*queue*/, + oneapi::mkl::sparse::dense_vector_handle_t *p_dvhandle, std::int64_t size, + sycl::buffer val) { + *p_dvhandle = new oneapi::mkl::sparse::dense_vector_handle(val, size); +} + +template +void init_dense_vector(sycl::queue & /*queue*/, + oneapi::mkl::sparse::dense_vector_handle_t *p_dvhandle, std::int64_t size, + fpType *val) { + *p_dvhandle = new oneapi::mkl::sparse::dense_vector_handle(val, size); +} + +template +void set_dense_vector_data(sycl::queue & /*queue*/, + oneapi::mkl::sparse::dense_vector_handle_t dvhandle, std::int64_t size, + sycl::buffer val) { + dvhandle->size = size; + dvhandle->set_buffer(val); +} + +template +void set_dense_vector_data(sycl::queue & /*queue*/, + oneapi::mkl::sparse::dense_vector_handle_t dvhandle, std::int64_t size, + fpType *val) { + dvhandle->size = size; + dvhandle->set_usm_ptr(val); +} + +#define INSTANTIATE_DENSE_VECTOR_FUNCS(FP_TYPE, FP_SUFFIX) \ + template std::enable_if_t> init_dense_vector( \ + sycl::queue & queue, oneapi::mkl::sparse::dense_vector_handle_t * p_dvhandle, \ + std::int64_t size, sycl::buffer val); \ + template std::enable_if_t> init_dense_vector( \ + sycl::queue & queue, oneapi::mkl::sparse::dense_vector_handle_t * p_dvhandle, \ + std::int64_t size, FP_TYPE *val); \ + template std::enable_if_t> set_dense_vector_data( \ + sycl::queue & queue, oneapi::mkl::sparse::dense_vector_handle_t dvhandle, \ + std::int64_t size, sycl::buffer val); \ + template std::enable_if_t> set_dense_vector_data( \ + sycl::queue & queue, oneapi::mkl::sparse::dense_vector_handle_t dvhandle, \ + std::int64_t size, FP_TYPE *val) +FOR_EACH_FP_TYPE(INSTANTIATE_DENSE_VECTOR_FUNCS); +#undef INSTANTIATE_DENSE_VECTOR_FUNCS + +sycl::event release_dense_vector(sycl::queue &queue, + oneapi::mkl::sparse::dense_vector_handle_t dvhandle, + const std::vector &dependencies) { + return detail::submit_release(queue, dvhandle, dependencies); +} + +// Dense matrix +template +void init_dense_matrix(sycl::queue & /*queue*/, + oneapi::mkl::sparse::dense_matrix_handle_t *p_dmhandle, + std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, + oneapi::mkl::layout dense_layout, sycl::buffer val) { + *p_dmhandle = + new oneapi::mkl::sparse::dense_matrix_handle(val, num_rows, num_cols, ld, dense_layout); +} + +template +void init_dense_matrix(sycl::queue & /*queue*/, + oneapi::mkl::sparse::dense_matrix_handle_t *p_dmhandle, + std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, + oneapi::mkl::layout dense_layout, fpType *val) { + *p_dmhandle = + new oneapi::mkl::sparse::dense_matrix_handle(val, num_rows, num_cols, ld, dense_layout); +} + +template +void set_dense_matrix_data(sycl::queue & /*queue*/, + oneapi::mkl::sparse::dense_matrix_handle_t dmhandle, + std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, + oneapi::mkl::layout dense_layout, sycl::buffer val) { + dmhandle->num_rows = num_rows; + dmhandle->num_cols = num_cols; + dmhandle->ld = ld; + dmhandle->dense_layout = dense_layout; + dmhandle->set_buffer(val); +} + +template +void set_dense_matrix_data(sycl::queue & /*queue*/, + oneapi::mkl::sparse::dense_matrix_handle_t dmhandle, + std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, + oneapi::mkl::layout dense_layout, fpType *val) { + dmhandle->num_rows = num_rows; + dmhandle->num_cols = num_cols; + dmhandle->ld = ld; + dmhandle->dense_layout = dense_layout; + dmhandle->set_usm_ptr(val); +} + +#define INSTANTIATE_DENSE_MATRIX_FUNCS(FP_TYPE, FP_SUFFIX) \ + template std::enable_if_t> init_dense_matrix( \ + sycl::queue & queue, oneapi::mkl::sparse::dense_matrix_handle_t * p_dmhandle, \ + std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, \ + oneapi::mkl::layout dense_layout, sycl::buffer val); \ + template std::enable_if_t> init_dense_matrix( \ + sycl::queue & queue, oneapi::mkl::sparse::dense_matrix_handle_t * p_dmhandle, \ + std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, \ + oneapi::mkl::layout dense_layout, FP_TYPE *val); \ + template std::enable_if_t> set_dense_matrix_data( \ + sycl::queue & queue, oneapi::mkl::sparse::dense_matrix_handle_t dmhandle, \ + std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, \ + oneapi::mkl::layout dense_layout, sycl::buffer val); \ + template std::enable_if_t> set_dense_matrix_data( \ + sycl::queue & queue, oneapi::mkl::sparse::dense_matrix_handle_t dmhandle, \ + std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, \ + oneapi::mkl::layout dense_layout, FP_TYPE *val) +FOR_EACH_FP_TYPE(INSTANTIATE_DENSE_MATRIX_FUNCS); +#undef INSTANTIATE_DENSE_MATRIX_FUNCS + +sycl::event release_dense_matrix(sycl::queue &queue, + oneapi::mkl::sparse::dense_matrix_handle_t dmhandle, + const std::vector &dependencies) { + return detail::submit_release(queue, dmhandle, dependencies); +} + +// COO matrix +template +void init_coo_matrix(sycl::queue &queue, oneapi::mkl::sparse::matrix_handle_t *p_smhandle, + std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, + oneapi::mkl::index_base index, sycl::buffer row_ind, + sycl::buffer col_ind, sycl::buffer val) { + oneapi::mkl::sparse::matrix_handle_t mkl_handle; + oneapi::mkl::sparse::init_matrix_handle(&mkl_handle); + auto internal_smhandle = new detail::sparse_matrix_handle(mkl_handle, row_ind, col_ind, val); + // The backend handle must use the buffers from the internal handle as they will be kept alive until the handle is released. + oneapi::mkl::sparse::set_coo_data(queue, mkl_handle, static_cast(num_rows), + static_cast(num_cols), static_cast(nnz), + index, internal_smhandle->row_container.get_buffer(), + internal_smhandle->col_container.get_buffer(), + internal_smhandle->value_container.get_buffer()); + *p_smhandle = reinterpret_cast(internal_smhandle); +} + +template +void init_coo_matrix(sycl::queue &queue, oneapi::mkl::sparse::matrix_handle_t *p_smhandle, + std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, + oneapi::mkl::index_base index, intType *row_ind, intType *col_ind, + fpType *val) { + oneapi::mkl::sparse::matrix_handle_t mkl_handle; + oneapi::mkl::sparse::init_matrix_handle(&mkl_handle); + auto internal_smhandle = new detail::sparse_matrix_handle(mkl_handle, row_ind, col_ind, val); + auto event = oneapi::mkl::sparse::set_coo_data( + queue, mkl_handle, static_cast(num_rows), static_cast(num_cols), + static_cast(nnz), index, row_ind, col_ind, val); + event.wait_and_throw(); + *p_smhandle = reinterpret_cast(internal_smhandle); +} + +template +void check_can_reset(const std::string &function_name, + detail::sparse_matrix_handle *internal_smhandle) { + if (internal_smhandle->get_value_type() != detail::get_data_type()) { + throw oneapi::mkl::invalid_argument( + "sparse_blas", function_name, + "Incompatible data types expected " + + data_type_to_str(internal_smhandle->get_value_type()) + " but got " + + data_type_to_str(detail::get_data_type())); + } + if (internal_smhandle->get_int_type() != detail::get_data_type()) { + throw oneapi::mkl::invalid_argument( + "sparse_blas", function_name, + "Incompatible data types expected " + + data_type_to_str(internal_smhandle->get_int_type()) + " but got " + + data_type_to_str(detail::get_data_type())); + } + if (!internal_smhandle->can_be_reset) { + throw mkl::unimplemented( + "sparse_blas/mkl", function_name, + "Reseting the matrix handle's data after it was used in a computation is not supported."); + } +} + +template +void set_coo_matrix_data(sycl::queue &queue, oneapi::mkl::sparse::matrix_handle_t smhandle, + std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, + oneapi::mkl::index_base index, sycl::buffer row_ind, + sycl::buffer col_ind, sycl::buffer val) { + auto internal_smhandle = detail::get_internal_handle(smhandle); + check_can_reset(__FUNCTION__, internal_smhandle); + internal_smhandle->row_container.set_buffer(row_ind); + internal_smhandle->col_container.set_buffer(col_ind); + internal_smhandle->value_container.set_buffer(val); + // The backend handle must use the buffers from the internal handle as they will be kept alive until the handle is released. + oneapi::mkl::sparse::set_coo_data(queue, internal_smhandle->backend_handle, + static_cast(num_rows), + static_cast(num_cols), static_cast(nnz), + index, internal_smhandle->row_container.get_buffer(), + internal_smhandle->col_container.get_buffer(), + internal_smhandle->value_container.get_buffer()); +} + +template +void set_coo_matrix_data(sycl::queue &queue, oneapi::mkl::sparse::matrix_handle_t smhandle, + std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, + oneapi::mkl::index_base index, intType *row_ind, intType *col_ind, + fpType *val) { + auto internal_smhandle = detail::get_internal_handle(smhandle); + check_can_reset(__FUNCTION__, internal_smhandle); + internal_smhandle->row_container.set_usm_ptr(row_ind); + internal_smhandle->col_container.set_usm_ptr(col_ind); + internal_smhandle->value_container.set_usm_ptr(val); + auto event = oneapi::mkl::sparse::set_coo_data( + queue, internal_smhandle->backend_handle, static_cast(num_rows), + static_cast(num_cols), static_cast(nnz), index, row_ind, col_ind, val); + event.wait_and_throw(); +} + +#define INSTANTIATE_COO_MATRIX_FUNCS(FP_TYPE, FP_SUFFIX, INT_TYPE, INT_SUFFIX) \ + template std::enable_if_t> \ + init_coo_matrix( \ + sycl::queue & queue, oneapi::mkl::sparse::matrix_handle_t * p_smhandle, \ + std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, \ + oneapi::mkl::index_base index, sycl::buffer row_ind, \ + sycl::buffer col_ind, sycl::buffer val); \ + template std::enable_if_t> \ + init_coo_matrix( \ + sycl::queue & queue, oneapi::mkl::sparse::matrix_handle_t * p_smhandle, \ + std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, \ + oneapi::mkl::index_base index, INT_TYPE *row_ind, INT_TYPE *col_ind, FP_TYPE *val); \ + template std::enable_if_t> \ + set_coo_matrix_data( \ + sycl::queue & queue, oneapi::mkl::sparse::matrix_handle_t smhandle, std::int64_t num_rows, \ + std::int64_t num_cols, std::int64_t nnz, oneapi::mkl::index_base index, \ + sycl::buffer row_ind, sycl::buffer col_ind, \ + sycl::buffer val); \ + template std::enable_if_t> \ + set_coo_matrix_data( \ + sycl::queue & queue, oneapi::mkl::sparse::matrix_handle_t smhandle, std::int64_t num_rows, \ + std::int64_t num_cols, std::int64_t nnz, oneapi::mkl::index_base index, INT_TYPE *row_ind, \ + INT_TYPE *col_ind, FP_TYPE *val) +FOR_EACH_FP_AND_INT_TYPE(INSTANTIATE_COO_MATRIX_FUNCS); +#undef INSTANTIATE_COO_MATRIX_FUNCS + +// CSR matrix +template +void init_csr_matrix(sycl::queue &queue, oneapi::mkl::sparse::matrix_handle_t *p_smhandle, + std::int64_t num_rows, std::int64_t num_cols, std::int64_t /*nnz*/, + oneapi::mkl::index_base index, sycl::buffer row_ptr, + sycl::buffer col_ind, sycl::buffer val) { + oneapi::mkl::sparse::matrix_handle_t mkl_handle; + oneapi::mkl::sparse::init_matrix_handle(&mkl_handle); + auto internal_smhandle = new detail::sparse_matrix_handle(mkl_handle, row_ptr, col_ind, val); + // The backend deduces nnz from row_ptr. + // The backend handle must use the buffers from the internal handle as they will be kept alive until the handle is released. + oneapi::mkl::sparse::set_csr_data(queue, mkl_handle, static_cast(num_rows), + static_cast(num_cols), index, + internal_smhandle->row_container.get_buffer(), + internal_smhandle->col_container.get_buffer(), + internal_smhandle->value_container.get_buffer()); + *p_smhandle = reinterpret_cast(internal_smhandle); +} + +template +void init_csr_matrix(sycl::queue &queue, oneapi::mkl::sparse::matrix_handle_t *p_smhandle, + std::int64_t num_rows, std::int64_t num_cols, std::int64_t /*nnz*/, + oneapi::mkl::index_base index, intType *row_ptr, intType *col_ind, + fpType *val) { + oneapi::mkl::sparse::matrix_handle_t mkl_handle; + oneapi::mkl::sparse::init_matrix_handle(&mkl_handle); + auto internal_smhandle = new detail::sparse_matrix_handle(mkl_handle, row_ptr, col_ind, val); + // The backend deduces nnz from row_ptr. + auto event = oneapi::mkl::sparse::set_csr_data( + queue, mkl_handle, static_cast(num_rows), static_cast(num_cols), index, + row_ptr, col_ind, val); + event.wait_and_throw(); + *p_smhandle = reinterpret_cast(internal_smhandle); +} + +template +void set_csr_matrix_data(sycl::queue &queue, oneapi::mkl::sparse::matrix_handle_t smhandle, + std::int64_t num_rows, std::int64_t num_cols, std::int64_t /*nnz*/, + oneapi::mkl::index_base index, sycl::buffer row_ptr, + sycl::buffer col_ind, sycl::buffer val) { + auto internal_smhandle = detail::get_internal_handle(smhandle); + check_can_reset(__FUNCTION__, internal_smhandle); + internal_smhandle->row_container.set_buffer(row_ptr); + internal_smhandle->col_container.set_buffer(col_ind); + internal_smhandle->value_container.set_buffer(val); + // The backend deduces nnz from row_ptr. + // The backend handle must use the buffers from the internal handle as they will be kept alive until the handle is released. + oneapi::mkl::sparse::set_csr_data(queue, internal_smhandle->backend_handle, + static_cast(num_rows), + static_cast(num_cols), index, + internal_smhandle->row_container.get_buffer(), + internal_smhandle->col_container.get_buffer(), + internal_smhandle->value_container.get_buffer()); +} + +template +void set_csr_matrix_data(sycl::queue &queue, oneapi::mkl::sparse::matrix_handle_t smhandle, + std::int64_t num_rows, std::int64_t num_cols, std::int64_t /*nnz*/, + oneapi::mkl::index_base index, intType *row_ptr, intType *col_ind, + fpType *val) { + auto internal_smhandle = detail::get_internal_handle(smhandle); + check_can_reset(__FUNCTION__, internal_smhandle); + internal_smhandle->row_container.set_usm_ptr(row_ptr); + internal_smhandle->col_container.set_usm_ptr(col_ind); + internal_smhandle->value_container.set_usm_ptr(val); + // The backend deduces nnz from row_ptr. + auto event = oneapi::mkl::sparse::set_csr_data( + queue, internal_smhandle->backend_handle, static_cast(num_rows), + static_cast(num_cols), index, row_ptr, col_ind, val); + event.wait_and_throw(); +} + +#define INSTANTIATE_CSR_MATRIX_FUNCS(FP_TYPE, FP_SUFFIX, INT_TYPE, INT_SUFFIX) \ + template std::enable_if_t> \ + init_csr_matrix( \ + sycl::queue & queue, oneapi::mkl::sparse::matrix_handle_t * p_smhandle, \ + std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, \ + oneapi::mkl::index_base index, sycl::buffer row_ptr, \ + sycl::buffer col_ind, sycl::buffer val); \ + template std::enable_if_t> \ + init_csr_matrix( \ + sycl::queue & queue, oneapi::mkl::sparse::matrix_handle_t * p_smhandle, \ + std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, \ + oneapi::mkl::index_base index, INT_TYPE *row_ptr, INT_TYPE *col_ind, FP_TYPE *val); \ + template std::enable_if_t> \ + set_csr_matrix_data( \ + sycl::queue & queue, oneapi::mkl::sparse::matrix_handle_t smhandle, std::int64_t num_rows, \ + std::int64_t num_cols, std::int64_t nnz, oneapi::mkl::index_base index, \ + sycl::buffer row_ptr, sycl::buffer col_ind, \ + sycl::buffer val); \ + template std::enable_if_t> \ + set_csr_matrix_data( \ + sycl::queue & queue, oneapi::mkl::sparse::matrix_handle_t smhandle, std::int64_t num_rows, \ + std::int64_t num_cols, std::int64_t nnz, oneapi::mkl::index_base index, INT_TYPE *row_ptr, \ + INT_TYPE *col_ind, FP_TYPE *val) +FOR_EACH_FP_AND_INT_TYPE(INSTANTIATE_CSR_MATRIX_FUNCS); +#undef INSTANTIATE_CSR_MATRIX_FUNCS + +// Common sparse matrix functions +sycl::event release_sparse_matrix(sycl::queue &queue, oneapi::mkl::sparse::matrix_handle_t smhandle, + const std::vector &dependencies) { + auto internal_smhandle = detail::get_internal_handle(smhandle); + // Asynchronously release the backend's handle followed by the internal handle. + auto event = oneapi::mkl::sparse::release_matrix_handle( + queue, &internal_smhandle->backend_handle, dependencies); + return detail::submit_release(queue, internal_smhandle, event); +} + +bool set_matrix_property(sycl::queue & /*queue*/, oneapi::mkl::sparse::matrix_handle_t smhandle, + oneapi::mkl::sparse::matrix_property property) { + auto internal_smhandle = detail::get_internal_handle(smhandle); + // Store the matrix property internally for better error checking + internal_smhandle->set_matrix_property(property); + // Set the matrix property on the backend handle + // Backend and oneMKL interface types for the property don't match + switch (property) { + case oneapi::mkl::sparse::matrix_property::symmetric: + oneapi::mkl::sparse::set_matrix_property(internal_smhandle->backend_handle, + oneapi::mkl::sparse::property::symmetric); + return true; + case oneapi::mkl::sparse::matrix_property::sorted: + oneapi::mkl::sparse::set_matrix_property(internal_smhandle->backend_handle, + oneapi::mkl::sparse::property::sorted); + return true; + default: return false; + } +} diff --git a/src/sparse_blas/backends/mkl_common/mkl_handles.hpp b/src/sparse_blas/backends/mkl_common/mkl_handles.hpp new file mode 100644 index 000000000..9a46b6533 --- /dev/null +++ b/src/sparse_blas/backends/mkl_common/mkl_handles.hpp @@ -0,0 +1,76 @@ +/*************************************************************************** +* Copyright (C) Codeplay Software Limited +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* For your convenience, a copy of the License has been included in this +* repository. +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +* +**************************************************************************/ + +#ifndef _ONEMKL_SRC_SPARSE_BLAS_BACKENDS_MKL_COMMON_MKL_HANDLES_HPP_ +#define _ONEMKL_SRC_SPARSE_BLAS_BACKENDS_MKL_COMMON_MKL_HANDLES_HPP_ + +// MKLCPU and MKLGPU backends include +// This include defines its own oneapi::mkl::sparse namespace with some of the types that are used here: matrix_handle_t, index_base, transpose, uplo, diag. +#include + +#include "sparse_blas/generic_container.hpp" + +namespace oneapi::mkl::sparse { + +// Complete the definition of incomplete types dense_vector_handle and dense_matrix_handle as there is no conflict with the oneMKL backends. + +struct dense_vector_handle : public detail::generic_dense_vector_handle { + template + dense_vector_handle(T* value_ptr, std::int64_t size) + : detail::generic_dense_vector_handle(nullptr, value_ptr, size) {} + + template + dense_vector_handle(const sycl::buffer value_buffer, std::int64_t size) + : detail::generic_dense_vector_handle(nullptr, value_buffer, size) {} +}; + +struct dense_matrix_handle : public detail::generic_dense_matrix_handle { + template + dense_matrix_handle(T* value_ptr, std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, + layout dense_layout) + : detail::generic_dense_matrix_handle(nullptr, value_ptr, num_rows, num_cols, ld, + dense_layout) {} + + template + dense_matrix_handle(const sycl::buffer value_buffer, std::int64_t num_rows, + std::int64_t num_cols, std::int64_t ld, layout dense_layout) + : detail::generic_dense_matrix_handle(nullptr, value_buffer, num_rows, num_cols, + ld, dense_layout) {} +}; + +} // namespace oneapi::mkl::sparse + +namespace oneapi::mkl::sparse::detail { + +/** + * Internal sparse_matrix_handle type for MKL backends. + * Here \p matrix_handle_t is the type of the backend's handle. + * The user-facing incomplete type matrix_handle_t must be kept incomplete. + * Internally matrix_handle_t is reinterpret_cast as oneapi::mkl::sparse::detail::sparse_matrix_handle which holds another matrix_handle_t for the backend handle. + */ +using sparse_matrix_handle = detail::generic_sparse_handle; + +/// Cast to oneMKL's interface handle type +inline auto get_internal_handle(matrix_handle_t handle) { + return reinterpret_cast(handle); +} + +} // namespace oneapi::mkl::sparse::detail + +#endif // _ONEMKL_SRC_SPARSE_BLAS_BACKENDS_MKL_COMMON_MKL_HANDLES_HPP_ diff --git a/src/sparse_blas/backends/mkl_common/mkl_helper.hpp b/src/sparse_blas/backends/mkl_common/mkl_helper.hpp index da5235ee0..316c4d7dc 100644 --- a/src/sparse_blas/backends/mkl_common/mkl_helper.hpp +++ b/src/sparse_blas/backends/mkl_common/mkl_helper.hpp @@ -1,56 +1,101 @@ -/******************************************************************************* -* Copyright 2023 Codeplay Software Ltd. +/*************************************************************************** +* Copyright (C) Codeplay Software Limited +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at * -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at +* http://www.apache.org/licenses/LICENSE-2.0 * -* http://www.apache.org/licenses/LICENSE-2.0 +* For your convenience, a copy of the License has been included in this +* repository. * -* Unless required by applicable law or agreed to in writing, -* software distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions -* and limitations under the License. +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. * -* -* SPDX-License-Identifier: Apache-2.0 -*******************************************************************************/ +**************************************************************************/ -// MKLCPU and MKLGPU backends include -// This include defines its own oneapi::mkl::sparse namespace with some of the types that are used here: matrix_handle_t, index_base, transpose, uolo, diag. -#include +#ifndef _ONEMKL_SRC_SPARSE_BLAS_BACKENDS_MKL_COMMON_MKL_HELPER_HPP_ +#define _ONEMKL_SRC_SPARSE_BLAS_BACKENDS_MKL_COMMON_MKL_HELPER_HPP_ -// Includes are set up so that oneapi::mkl::sparse namespace refers to the MKLCPU and MKLGPU backends namespace (oneMKL product) -// in this file. -// oneapi::mkl::sparse::detail namespace refers to the oneMKL interface namespace. +#if __has_include() +#include +#else +#include +#endif +#include "oneapi/mkl/exceptions.hpp" #include "oneapi/mkl/sparse_blas/detail/helper_types.hpp" +#include "sparse_blas/enum_data_types.hpp" +#include "sparse_blas/macros.hpp" + namespace oneapi::mkl::sparse::detail { -inline auto get_handle(detail::matrix_handle **handle) { - return reinterpret_cast(handle); +/// Return whether a pointer is accessible on the host +template +inline bool is_ptr_accessible_on_host(sycl::queue &queue, const T *host_or_device_ptr) { + auto alloc_type = sycl::get_pointer_type(host_or_device_ptr, queue.get_context()); + // Note sycl::usm::alloc::host may not be accessible on the host according to SYCL specification. + return alloc_type == sycl::usm::alloc::shared; +} + +/// Throw an exception if the scalar is not accessible in the host +template +void check_ptr_is_host_accessible(const std::string &function_name, const std::string &scalar_name, + sycl::queue &queue, const T *host_or_device_ptr) { + if (is_ptr_accessible_on_host(queue, host_or_device_ptr)) { + throw mkl::invalid_argument( + "sparse_blas", function_name, + "Scalar " + scalar_name + "must be accessible on the host for buffer functions."); + } } -inline auto get_handle(detail::matrix_handle *handle) { - return reinterpret_cast(handle); +/// Return a scalar on the host from a pointer to host or device memory +/// Used for USM functions +template +inline T get_scalar(sycl::queue &queue, const T *host_or_device_ptr) { + if (is_ptr_accessible_on_host(queue, host_or_device_ptr)) { + return *host_or_device_ptr; + } + T scalar; + auto event = queue.copy(host_or_device_ptr, &scalar, 1); + event.wait_and_throw(); + return scalar; } +/// Merge multiple event dependencies into one +inline sycl::event collapse_dependencies(sycl::queue &queue, + const std::vector &dependencies) { + if (dependencies.empty()) { + return {}; + } + else if (dependencies.size() == 1) { + return dependencies[0]; + } + + return queue.submit([&](sycl::handler &cgh) { + cgh.depends_on(dependencies); + cgh.host_task([=]() {}); + }); +} + +/// Convert \p value_type to template type argument and use it to call \p op_functor. +#define DISPATCH_MKL_OPERATION(function_name, value_type, op_functor, ...) \ + switch (value_type) { \ + case detail::data_type::real_fp32: return op_functor(__VA_ARGS__); \ + case detail::data_type::real_fp64: return op_functor(__VA_ARGS__); \ + case detail::data_type::complex_fp32: return op_functor>(__VA_ARGS__); \ + case detail::data_type::complex_fp64: \ + return op_functor>(__VA_ARGS__); \ + default: \ + throw oneapi::mkl::exception( \ + "sparse_blas", function_name, \ + "Internal error: unsupported type " + data_type_to_str(value_type)); \ + } + } // namespace oneapi::mkl::sparse::detail -#define FOR_EACH_FP_TYPE(INSTANTIATE_MACRO) \ - INSTANTIATE_MACRO(float); \ - INSTANTIATE_MACRO(double); \ - INSTANTIATE_MACRO(std::complex); \ - INSTANTIATE_MACRO(std::complex) - -#define FOR_EACH_FP_AND_INT_TYPE_HELPER(INSTANTIATE_MACRO, INT_TYPE) \ - INSTANTIATE_MACRO(float, INT_TYPE); \ - INSTANTIATE_MACRO(double, INT_TYPE); \ - INSTANTIATE_MACRO(std::complex, INT_TYPE); \ - INSTANTIATE_MACRO(std::complex, INT_TYPE) - -#define FOR_EACH_FP_AND_INT_TYPE(INSTANTIATE_MACRO) \ - FOR_EACH_FP_AND_INT_TYPE_HELPER(INSTANTIATE_MACRO, std::int32_t); \ - FOR_EACH_FP_AND_INT_TYPE_HELPER(INSTANTIATE_MACRO, std::int64_t) +#endif // _ONEMKL_SRC_SPARSE_BLAS_BACKENDS_MKL_COMMON_MKL_HELPER_HPP_ diff --git a/src/sparse_blas/backends/mkl_common/mkl_operations.cxx b/src/sparse_blas/backends/mkl_common/mkl_operations.cxx deleted file mode 100644 index ba6960341..000000000 --- a/src/sparse_blas/backends/mkl_common/mkl_operations.cxx +++ /dev/null @@ -1,170 +0,0 @@ -/******************************************************************************* -* Copyright 2023 Codeplay Software Ltd. -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, -* software distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions -* and limitations under the License. -* -* -* SPDX-License-Identifier: Apache-2.0 -*******************************************************************************/ - -sycl::event optimize_gemm(sycl::queue& queue, transpose /*transpose_A*/, - detail::matrix_handle* /*handle*/, - const std::vector& dependencies) { - // TODO: Call to optimize_gemm with 2024.1 oneMKL release - // Return an event depending on the dependencies - return queue.submit([=](sycl::handler& cgh) { - cgh.depends_on(dependencies); - cgh.host_task([=]() { /* Empty kernel */ }); - }); -} - -sycl::event optimize_gemm(sycl::queue& queue, transpose /*transpose_A*/, transpose /*transpose_B*/, - layout /*dense_matrix_layout*/, const std::int64_t /*columns*/, - detail::matrix_handle* /*handle*/, - const std::vector& dependencies) { - // TODO: Call to optimize_gemm with 2024.1 oneMKL release - // Return an event depending on the dependencies - return queue.submit([=](sycl::handler& cgh) { - cgh.depends_on(dependencies); - cgh.host_task([=]() { /* Empty kernel */ }); - }); -} - -sycl::event optimize_gemv(sycl::queue& queue, transpose transpose_val, - detail::matrix_handle* handle, - const std::vector& dependencies) { - return oneapi::mkl::sparse::optimize_gemv(queue, transpose_val, detail::get_handle(handle), - dependencies); -} - -sycl::event optimize_trsv(sycl::queue& queue, uplo uplo_val, transpose transpose_val, diag diag_val, - detail::matrix_handle* handle, - const std::vector& dependencies) { - // TODO: Remove this if condition once Intel oneMKL adds support for trans/conjtrans to optimize_trsv - if (transpose_val != transpose::nontrans) { - throw mkl::unimplemented("sparse_blas/backends/mkl", __FUNCTION__, - "Transposed or conjugate trsv is not supported"); - } - return oneapi::mkl::sparse::optimize_trsv(queue, uplo_val, transpose_val, diag_val, - detail::get_handle(handle), dependencies); -} - -template -std::enable_if_t> gemv( - sycl::queue& queue, transpose transpose_val, const fpType alpha, - detail::matrix_handle* A_handle, sycl::buffer& x, const fpType beta, - sycl::buffer& y) { - oneapi::mkl::sparse::gemv(queue, transpose_val, alpha, detail::get_handle(A_handle), x, beta, y); -} - -template -std::enable_if_t, sycl::event> gemv( - sycl::queue& queue, transpose transpose_val, const fpType alpha, - detail::matrix_handle* A_handle, const fpType* x, const fpType beta, fpType* y, - const std::vector& dependencies) { - return oneapi::mkl::sparse::gemv(queue, transpose_val, alpha, detail::get_handle(A_handle), x, beta, y, - dependencies); -} - -template -std::enable_if_t> trsv(sycl::queue& queue, uplo uplo_val, - transpose transpose_val, diag diag_val, - detail::matrix_handle* A_handle, - sycl::buffer& x, - sycl::buffer& y) { - // TODO: Remove this if condition once Intel oneMKL adds support for trans/conjtrans to trsv - if (transpose_val != transpose::nontrans) { - throw mkl::unimplemented("sparse_blas/backends/mkl", __FUNCTION__, - "Transposed or conjugate trsv is not supported"); - } - oneapi::mkl::sparse::trsv(queue, uplo_val, transpose_val, diag_val, - detail::get_handle(A_handle), x, y); -} - -template -std::enable_if_t, sycl::event> trsv( - sycl::queue& queue, uplo uplo_val, transpose transpose_val, diag diag_val, - detail::matrix_handle* A_handle, const fpType* x, fpType* y, - const std::vector& dependencies) { - // TODO: Remove this if condition once Intel oneMKL adds support for trans/conjtrans to trsv - if (transpose_val != transpose::nontrans) { - throw mkl::unimplemented("sparse_blas/backends/mkl", __FUNCTION__, - "Transposed or conjugate trsv is not supported"); - } - // TODO: Remove const_cast in future oneMKL release - return oneapi::mkl::sparse::trsv(queue, uplo_val, transpose_val, diag_val, - detail::get_handle(A_handle), const_cast(x), y, - dependencies); -} - -template -std::enable_if_t> gemm( - sycl::queue& queue, layout dense_matrix_layout, transpose transpose_A, transpose transpose_B, - const fpType alpha, detail::matrix_handle* A_handle, sycl::buffer& B, - const std::int64_t columns, const std::int64_t ldb, const fpType beta, - sycl::buffer& C, const std::int64_t ldc) { - oneapi::mkl::sparse::gemm(queue, dense_matrix_layout, transpose_A, transpose_B, alpha, - detail::get_handle(A_handle), B, columns, ldb, beta, C, ldc); -} - -template -std::enable_if_t, sycl::event> gemm( - sycl::queue& queue, layout dense_matrix_layout, transpose transpose_A, transpose transpose_B, - const fpType alpha, detail::matrix_handle* A_handle, const fpType* B, - const std::int64_t columns, const std::int64_t ldb, const fpType beta, fpType* C, - const std::int64_t ldc, const std::vector& dependencies) { - // TODO: Remove const_cast in future oneMKL release - return oneapi::mkl::sparse::gemm(queue, dense_matrix_layout, transpose_A, transpose_B, alpha, - detail::get_handle(A_handle), const_cast(B), columns, - ldb, beta, C, ldc, dependencies); -} - -#define INSTANTIATE_GEMV(FP_TYPE) \ - template std::enable_if_t> gemv( \ - sycl::queue& queue, transpose transpose_val, const FP_TYPE alpha, \ - detail::matrix_handle* A_handle, sycl::buffer& x, const FP_TYPE beta, \ - sycl::buffer& y); \ - template std::enable_if_t, sycl::event> gemv( \ - sycl::queue& queue, transpose transpose_val, const FP_TYPE alpha, \ - detail::matrix_handle* A_handle, const FP_TYPE* x, const FP_TYPE beta, FP_TYPE* y, \ - const std::vector& dependencies) - -#define INSTANTIATE_TRSV(FP_TYPE) \ - template std::enable_if_t> trsv( \ - sycl::queue& queue, uplo uplo_val, transpose transpose_val, diag diag_val, \ - detail::matrix_handle* A_handle, sycl::buffer& x, \ - sycl::buffer& y); \ - template std::enable_if_t, sycl::event> trsv( \ - sycl::queue& queue, uplo uplo_val, transpose transpose_val, diag diag_val, \ - detail::matrix_handle* A_handle, const FP_TYPE* x, FP_TYPE* y, \ - const std::vector& dependencies) - -#define INSTANTIATE_GEMM(FP_TYPE) \ - template std::enable_if_t> gemm( \ - sycl::queue& queue, layout dense_matrix_layout, transpose transpose_A, \ - transpose transpose_B, const FP_TYPE alpha, detail::matrix_handle* A_handle, \ - sycl::buffer& B, const std::int64_t columns, const std::int64_t ldb, \ - const FP_TYPE beta, sycl::buffer& C, const std::int64_t ldc); \ - template std::enable_if_t, sycl::event> gemm( \ - sycl::queue& queue, layout dense_matrix_layout, transpose transpose_A, \ - transpose transpose_B, const FP_TYPE alpha, detail::matrix_handle* A_handle, \ - const FP_TYPE* B, const std::int64_t columns, const std::int64_t ldb, const FP_TYPE beta, \ - FP_TYPE* C, const std::int64_t ldc, const std::vector& dependencies) - -FOR_EACH_FP_TYPE(INSTANTIATE_GEMV); -FOR_EACH_FP_TYPE(INSTANTIATE_TRSV); -FOR_EACH_FP_TYPE(INSTANTIATE_GEMM); - -#undef INSTANTIATE_GEMV -#undef INSTANTIATE_TRSV -#undef INSTANTIATE_GEMM diff --git a/src/sparse_blas/backends/mkl_common/mkl_spmm.cxx b/src/sparse_blas/backends/mkl_common/mkl_spmm.cxx new file mode 100644 index 000000000..e870341ff --- /dev/null +++ b/src/sparse_blas/backends/mkl_common/mkl_spmm.cxx @@ -0,0 +1,168 @@ +/*************************************************************************** +* Copyright (C) Codeplay Software Limited +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* For your convenience, a copy of the License has been included in this +* repository. +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +* +**************************************************************************/ + +// The operation descriptor is not needed as long as the backend does not have an equivalent type and does not support external workspace. +using spmm_descr = void *; + +void init_spmm_descr(sycl::queue & /*queue*/, oneapi::mkl::sparse::spmm_descr_t *p_spmm_descr) { + *p_spmm_descr = nullptr; +} + +sycl::event release_spmm_descr(sycl::queue &queue, oneapi::mkl::sparse::spmm_descr_t /*spmm_descr*/, + const std::vector &dependencies) { + return detail::collapse_dependencies(queue, dependencies); +} + +void check_valid_spmm(const std::string function_name, sycl::queue &queue, + oneapi::mkl::transpose opA, oneapi::mkl::sparse::matrix_view A_view, + oneapi::mkl::sparse::matrix_handle_t A_handle, + oneapi::mkl::sparse::dense_matrix_handle_t B_handle, + oneapi::mkl::sparse::dense_matrix_handle_t C_handle, const void *alpha, + const void *beta) { + THROW_IF_NULLPTR(function_name, A_handle); + THROW_IF_NULLPTR(function_name, B_handle); + THROW_IF_NULLPTR(function_name, C_handle); + + auto internal_A_handle = detail::get_internal_handle(A_handle); + detail::check_all_containers_compatible(function_name, internal_A_handle, B_handle, C_handle); + if (internal_A_handle->all_use_buffer()) { + detail::check_ptr_is_host_accessible("spmm", "alpha", queue, alpha); + detail::check_ptr_is_host_accessible("spmm", "beta", queue, beta); + } + if (B_handle->dense_layout != C_handle->dense_layout) { + throw mkl::invalid_argument("sparse_blas", function_name, + "B and C matrices must used the same layout."); + } + + if (A_view.type_view != oneapi::mkl::sparse::matrix_descr::general) { + throw mkl::invalid_argument("sparse_blas", function_name, + "Matrix view's type must be `matrix_descr::general`."); + } + + if (A_view.diag_view != oneapi::mkl::diag::nonunit) { + throw mkl::invalid_argument("sparse_blas", function_name, + "Matrix's diag_view must be `nonunit`."); + } + +#if BACKEND == gpu + if (opA == oneapi::mkl::transpose::conjtrans && + internal_A_handle->has_matrix_property(oneapi::mkl::sparse::matrix_property::symmetric)) { + throw mkl::unimplemented("sparse_blas/mklgpu", function_name, + "spmm does not support conjtrans with the symmetric property."); + } +#else + (void)opA; +#endif // BACKEND +} + +void spmm_buffer_size(sycl::queue &queue, oneapi::mkl::transpose opA, + oneapi::mkl::transpose /*opB*/, const void *alpha, + oneapi::mkl::sparse::matrix_view A_view, + oneapi::mkl::sparse::matrix_handle_t A_handle, + oneapi::mkl::sparse::dense_matrix_handle_t B_handle, const void *beta, + oneapi::mkl::sparse::dense_matrix_handle_t C_handle, + oneapi::mkl::sparse::spmm_alg /*alg*/, + oneapi::mkl::sparse::spmm_descr_t /*spmm_descr*/, + std::size_t &temp_buffer_size) { + // TODO: Add support for external workspace once the close-source oneMKL backend supports it. + check_valid_spmm(__FUNCTION__, queue, opA, A_view, A_handle, B_handle, C_handle, alpha, beta); + temp_buffer_size = 0; +} + +void spmm_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, oneapi::mkl::transpose /*opB*/, + const void *alpha, oneapi::mkl::sparse::matrix_view A_view, + oneapi::mkl::sparse::matrix_handle_t A_handle, + oneapi::mkl::sparse::dense_matrix_handle_t B_handle, const void *beta, + oneapi::mkl::sparse::dense_matrix_handle_t C_handle, + oneapi::mkl::sparse::spmm_alg alg, + oneapi::mkl::sparse::spmm_descr_t /*spmm_descr*/, + sycl::buffer /*workspace*/) { + check_valid_spmm(__FUNCTION__, queue, opA, A_view, A_handle, B_handle, C_handle, alpha, beta); + if (alg == oneapi::mkl::sparse::spmm_alg::no_optimize_alg) { + return; + } + auto internal_A_handle = detail::get_internal_handle(A_handle); + internal_A_handle->can_be_reset = false; + // TODO: Add support for spmm_optimize once the close-source oneMKL backend supports it. +} + +sycl::event spmm_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, + oneapi::mkl::transpose /*opB*/, const void *alpha, + oneapi::mkl::sparse::matrix_view A_view, + oneapi::mkl::sparse::matrix_handle_t A_handle, + oneapi::mkl::sparse::dense_matrix_handle_t B_handle, const void *beta, + oneapi::mkl::sparse::dense_matrix_handle_t C_handle, + oneapi::mkl::sparse::spmm_alg alg, + oneapi::mkl::sparse::spmm_descr_t /*spmm_descr*/, void * /*workspace*/, + const std::vector &dependencies) { + check_valid_spmm(__FUNCTION__, queue, opA, A_view, A_handle, B_handle, C_handle, alpha, beta); + if (alg == oneapi::mkl::sparse::spmm_alg::no_optimize_alg) { + return detail::collapse_dependencies(queue, dependencies); + } + auto internal_A_handle = detail::get_internal_handle(A_handle); + internal_A_handle->can_be_reset = false; + // TODO: Add support for spmm_optimize once the close-source oneMKL backend supports it. + return detail::collapse_dependencies(queue, dependencies); +} + +template +sycl::event internal_spmm(sycl::queue &queue, oneapi::mkl::transpose opA, + oneapi::mkl::transpose opB, const void *alpha, + oneapi::mkl::sparse::matrix_view /*A_view*/, + oneapi::mkl::sparse::matrix_handle_t A_handle, + oneapi::mkl::sparse::dense_matrix_handle_t B_handle, const void *beta, + oneapi::mkl::sparse::dense_matrix_handle_t C_handle, + oneapi::mkl::sparse::spmm_alg /*alg*/, + oneapi::mkl::sparse::spmm_descr_t /*spmm_descr*/, + const std::vector &dependencies) { + T cast_alpha = *static_cast(alpha); + T cast_beta = *static_cast(beta); + auto internal_A_handle = detail::get_internal_handle(A_handle); + internal_A_handle->can_be_reset = false; + auto layout = B_handle->dense_layout; + auto columns = C_handle->num_cols; + auto ldb = B_handle->ld; + auto ldc = C_handle->ld; + if (internal_A_handle->all_use_buffer()) { + oneapi::mkl::sparse::gemm(queue, layout, opA, opB, cast_alpha, + internal_A_handle->backend_handle, B_handle->get_buffer(), + columns, ldb, cast_beta, C_handle->get_buffer(), ldc); + // Dependencies are not used for buffers + return {}; + } + else { + return oneapi::mkl::sparse::gemm(queue, layout, opA, opB, cast_alpha, + internal_A_handle->backend_handle, + B_handle->get_usm_ptr(), columns, ldb, cast_beta, + C_handle->get_usm_ptr(), ldc, dependencies); + } +} + +sycl::event spmm(sycl::queue &queue, oneapi::mkl::transpose opA, oneapi::mkl::transpose opB, + const void *alpha, oneapi::mkl::sparse::matrix_view A_view, + oneapi::mkl::sparse::matrix_handle_t A_handle, + oneapi::mkl::sparse::dense_matrix_handle_t B_handle, const void *beta, + oneapi::mkl::sparse::dense_matrix_handle_t C_handle, + oneapi::mkl::sparse::spmm_alg alg, oneapi::mkl::sparse::spmm_descr_t spmm_descr, + const std::vector &dependencies) { + check_valid_spmm(__FUNCTION__, queue, opA, A_view, A_handle, B_handle, C_handle, alpha, beta); + auto value_type = detail::get_internal_handle(A_handle)->get_value_type(); + DISPATCH_MKL_OPERATION("spmm", value_type, internal_spmm, queue, opA, opB, alpha, A_view, + A_handle, B_handle, beta, C_handle, alg, spmm_descr, dependencies); +} diff --git a/src/sparse_blas/backends/mkl_common/mkl_spmv.cxx b/src/sparse_blas/backends/mkl_common/mkl_spmv.cxx new file mode 100644 index 000000000..73efe4e7d --- /dev/null +++ b/src/sparse_blas/backends/mkl_common/mkl_spmv.cxx @@ -0,0 +1,197 @@ +/*************************************************************************** +* Copyright (C) Codeplay Software Limited +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* For your convenience, a copy of the License has been included in this +* repository. +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +* +**************************************************************************/ + +// The operation descriptor is not needed as long as the backend does not have an equivalent type and does not support external workspace. +using spmv_descr = void *; + +void init_spmv_descr(sycl::queue & /*queue*/, oneapi::mkl::sparse::spmv_descr_t *p_spmv_descr) { + *p_spmv_descr = nullptr; +} + +sycl::event release_spmv_descr(sycl::queue &queue, oneapi::mkl::sparse::spmv_descr_t /*spmv_descr*/, + const std::vector &dependencies) { + return detail::collapse_dependencies(queue, dependencies); +} + +void check_valid_spmv(const std::string function_name, sycl::queue &queue, + oneapi::mkl::sparse::matrix_view A_view, + oneapi::mkl::sparse::matrix_handle_t A_handle, + oneapi::mkl::sparse::dense_vector_handle_t x_handle, + oneapi::mkl::sparse::dense_vector_handle_t y_handle, const void *alpha, + const void *beta) { + THROW_IF_NULLPTR(function_name, A_handle); + THROW_IF_NULLPTR(function_name, x_handle); + THROW_IF_NULLPTR(function_name, y_handle); + + auto internal_A_handle = detail::get_internal_handle(A_handle); + detail::check_all_containers_compatible(function_name, internal_A_handle, x_handle, y_handle); + if (internal_A_handle->all_use_buffer()) { + detail::check_ptr_is_host_accessible("spmv", "alpha", queue, alpha); + detail::check_ptr_is_host_accessible("spmv", "beta", queue, beta); + } + if (A_view.type_view == oneapi::mkl::sparse::matrix_descr::diagonal) { + throw mkl::invalid_argument("sparse_blas", function_name, + "Matrix view's type cannot be diagonal."); + } + + if (A_view.type_view != oneapi::mkl::sparse::matrix_descr::triangular && + A_view.diag_view != oneapi::mkl::diag::nonunit) { + throw mkl::invalid_argument( + "sparse_blas", function_name, + "`unit` diag_view can only be used with a triangular type_view."); + } +} + +void spmv_buffer_size(sycl::queue &queue, oneapi::mkl::transpose /*opA*/, const void *alpha, + oneapi::mkl::sparse::matrix_view A_view, + oneapi::mkl::sparse::matrix_handle_t A_handle, + oneapi::mkl::sparse::dense_vector_handle_t x_handle, const void *beta, + oneapi::mkl::sparse::dense_vector_handle_t y_handle, + oneapi::mkl::sparse::spmv_alg /*alg*/, + oneapi::mkl::sparse::spmv_descr_t /*spmv_descr*/, + std::size_t &temp_buffer_size) { + // TODO: Add support for external workspace once the close-source oneMKL backend supports it. + check_valid_spmv(__FUNCTION__, queue, A_view, A_handle, x_handle, y_handle, alpha, beta); + temp_buffer_size = 0; +} + +void spmv_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, + oneapi::mkl::sparse::matrix_view A_view, + oneapi::mkl::sparse::matrix_handle_t A_handle, + oneapi::mkl::sparse::dense_vector_handle_t x_handle, const void *beta, + oneapi::mkl::sparse::dense_vector_handle_t y_handle, + oneapi::mkl::sparse::spmv_alg alg, + oneapi::mkl::sparse::spmv_descr_t /*spmv_descr*/, + sycl::buffer /*workspace*/) { + check_valid_spmv(__FUNCTION__, queue, A_view, A_handle, x_handle, y_handle, alpha, beta); + if (alg == oneapi::mkl::sparse::spmv_alg::no_optimize_alg) { + return; + } + sycl::event event; + auto internal_A_handle = detail::get_internal_handle(A_handle); + internal_A_handle->can_be_reset = false; + if (A_view.type_view == matrix_descr::triangular) { + event = oneapi::mkl::sparse::optimize_trmv(queue, A_view.uplo_view, opA, A_view.diag_view, + internal_A_handle->backend_handle); + } + else if (A_view.type_view == matrix_descr::symmetric || + A_view.type_view == matrix_descr::hermitian) { + // No optimize_symv currently + return; + } + else { + event = oneapi::mkl::sparse::optimize_gemv(queue, opA, internal_A_handle->backend_handle); + } + // spmv_optimize is not asynchronous for buffers as the backend optimize functions don't take buffers. + event.wait_and_throw(); +} + +sycl::event spmv_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, + oneapi::mkl::sparse::matrix_view A_view, + oneapi::mkl::sparse::matrix_handle_t A_handle, + oneapi::mkl::sparse::dense_vector_handle_t x_handle, const void *beta, + oneapi::mkl::sparse::dense_vector_handle_t y_handle, + oneapi::mkl::sparse::spmv_alg alg, + oneapi::mkl::sparse::spmv_descr_t /*spmv_descr*/, void * /*workspace*/, + const std::vector &dependencies) { + check_valid_spmv(__FUNCTION__, queue, A_view, A_handle, x_handle, y_handle, alpha, beta); + if (alg == oneapi::mkl::sparse::spmv_alg::no_optimize_alg) { + return detail::collapse_dependencies(queue, dependencies); + } + auto internal_A_handle = detail::get_internal_handle(A_handle); + internal_A_handle->can_be_reset = false; + if (A_view.type_view == matrix_descr::triangular) { + return oneapi::mkl::sparse::optimize_trmv(queue, A_view.uplo_view, opA, A_view.diag_view, + internal_A_handle->backend_handle, dependencies); + } + else if (A_view.type_view == matrix_descr::symmetric || + A_view.type_view == matrix_descr::hermitian) { + return detail::collapse_dependencies(queue, dependencies); + } + else { + return oneapi::mkl::sparse::optimize_gemv(queue, opA, internal_A_handle->backend_handle, + dependencies); + } +} + +template +sycl::event internal_spmv(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, + oneapi::mkl::sparse::matrix_view A_view, + oneapi::mkl::sparse::matrix_handle_t A_handle, + oneapi::mkl::sparse::dense_vector_handle_t x_handle, const void *beta, + oneapi::mkl::sparse::dense_vector_handle_t y_handle, + oneapi::mkl::sparse::spmv_alg /*alg*/, + oneapi::mkl::sparse::spmv_descr_t /*spmv_descr*/, + const std::vector &dependencies) { + T cast_alpha = *static_cast(alpha); + T cast_beta = *static_cast(beta); + auto internal_A_handle = detail::get_internal_handle(A_handle); + internal_A_handle->can_be_reset = false; + auto backend_handle = internal_A_handle->backend_handle; + if (internal_A_handle->all_use_buffer()) { + auto x_buffer = x_handle->get_buffer(); + auto y_buffer = y_handle->get_buffer(); + if (A_view.type_view == matrix_descr::triangular) { + oneapi::mkl::sparse::trmv(queue, A_view.uplo_view, opA, A_view.diag_view, cast_alpha, + backend_handle, x_buffer, cast_beta, y_buffer); + } + else if (A_view.type_view == matrix_descr::symmetric || + A_view.type_view == matrix_descr::hermitian) { + oneapi::mkl::sparse::symv(queue, A_view.uplo_view, cast_alpha, backend_handle, x_buffer, + cast_beta, y_buffer); + } + else { + oneapi::mkl::sparse::gemv(queue, opA, cast_alpha, backend_handle, x_buffer, cast_beta, + y_buffer); + } + // Dependencies are not used for buffers + return {}; + } + else { + auto x_usm = x_handle->get_usm_ptr(); + auto y_usm = y_handle->get_usm_ptr(); + if (A_view.type_view == matrix_descr::triangular) { + return oneapi::mkl::sparse::trmv(queue, A_view.uplo_view, opA, A_view.diag_view, + cast_alpha, backend_handle, x_usm, cast_beta, y_usm, + dependencies); + } + else if (A_view.type_view == matrix_descr::symmetric || + A_view.type_view == matrix_descr::hermitian) { + return oneapi::mkl::sparse::symv(queue, A_view.uplo_view, cast_alpha, backend_handle, + x_usm, cast_beta, y_usm, dependencies); + } + else { + return oneapi::mkl::sparse::gemv(queue, opA, cast_alpha, backend_handle, x_usm, + cast_beta, y_usm, dependencies); + } + } +} + +sycl::event spmv(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, + oneapi::mkl::sparse::matrix_view A_view, + oneapi::mkl::sparse::matrix_handle_t A_handle, + oneapi::mkl::sparse::dense_vector_handle_t x_handle, const void *beta, + oneapi::mkl::sparse::dense_vector_handle_t y_handle, + oneapi::mkl::sparse::spmv_alg alg, oneapi::mkl::sparse::spmv_descr_t spmv_descr, + const std::vector &dependencies) { + check_valid_spmv(__FUNCTION__, queue, A_view, A_handle, x_handle, y_handle, alpha, beta); + auto value_type = detail::get_internal_handle(A_handle)->get_value_type(); + DISPATCH_MKL_OPERATION("spmv", value_type, internal_spmv, queue, opA, alpha, A_view, A_handle, + x_handle, beta, y_handle, alg, spmv_descr, dependencies); +} diff --git a/src/sparse_blas/backends/mkl_common/mkl_spsv.cxx b/src/sparse_blas/backends/mkl_common/mkl_spsv.cxx new file mode 100644 index 000000000..bd8094f90 --- /dev/null +++ b/src/sparse_blas/backends/mkl_common/mkl_spsv.cxx @@ -0,0 +1,150 @@ +/*************************************************************************** +* Copyright (C) Codeplay Software Limited +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* For your convenience, a copy of the License has been included in this +* repository. +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +* +**************************************************************************/ + +// The operation descriptor is not needed as long as the backend does not have an equivalent type and does not support external workspace. +using spsv_descr = void *; + +void init_spsv_descr(sycl::queue & /*queue*/, oneapi::mkl::sparse::spsv_descr_t *p_spsv_descr) { + *p_spsv_descr = nullptr; +} + +sycl::event release_spsv_descr(sycl::queue &queue, oneapi::mkl::sparse::spsv_descr_t /*spsv_descr*/, + const std::vector &dependencies) { + return detail::collapse_dependencies(queue, dependencies); +} + +void check_valid_spsv(const std::string function_name, sycl::queue &queue, + oneapi::mkl::sparse::matrix_view A_view, + oneapi::mkl::sparse::matrix_handle_t A_handle, + oneapi::mkl::sparse::dense_vector_handle_t x_handle, + oneapi::mkl::sparse::dense_vector_handle_t y_handle, const void *alpha, + oneapi::mkl::sparse::spsv_alg alg) { + THROW_IF_NULLPTR(function_name, A_handle); + THROW_IF_NULLPTR(function_name, x_handle); + THROW_IF_NULLPTR(function_name, y_handle); + + auto internal_A_handle = detail::get_internal_handle(A_handle); + if (alg == oneapi::mkl::sparse::spsv_alg::no_optimize_alg && + !internal_A_handle->has_matrix_property(oneapi::mkl::sparse::matrix_property::sorted)) { + throw mkl::unimplemented( + "sparse_blas/mkl", function_name, + "`no_optimize_alg` is not supported unless A_handle has the property `matrix_property::sorted`."); + } + + detail::check_all_containers_compatible(function_name, internal_A_handle, x_handle, y_handle); + if (A_view.type_view != matrix_descr::triangular) { + throw mkl::invalid_argument("sparse_blas", function_name, + "Matrix view's type must be `matrix_descr::triangular`."); + } + + if (internal_A_handle->all_use_buffer()) { + detail::check_ptr_is_host_accessible("spsv", "alpha", queue, alpha); + } +} + +void spsv_buffer_size(sycl::queue &queue, oneapi::mkl::transpose /*opA*/, const void *alpha, + oneapi::mkl::sparse::matrix_view A_view, + oneapi::mkl::sparse::matrix_handle_t A_handle, + oneapi::mkl::sparse::dense_vector_handle_t x_handle, + oneapi::mkl::sparse::dense_vector_handle_t y_handle, + oneapi::mkl::sparse::spsv_alg alg, + oneapi::mkl::sparse::spsv_descr_t /*spsv_descr*/, + std::size_t &temp_buffer_size) { + // TODO: Add support for external workspace once the close-source oneMKL backend supports it. + check_valid_spsv(__FUNCTION__, queue, A_view, A_handle, x_handle, y_handle, alpha, alg); + temp_buffer_size = 0; +} + +void spsv_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, + oneapi::mkl::sparse::matrix_view A_view, + oneapi::mkl::sparse::matrix_handle_t A_handle, + oneapi::mkl::sparse::dense_vector_handle_t x_handle, + oneapi::mkl::sparse::dense_vector_handle_t y_handle, + oneapi::mkl::sparse::spsv_alg alg, + oneapi::mkl::sparse::spsv_descr_t /*spsv_descr*/, + sycl::buffer /*workspace*/) { + check_valid_spsv(__FUNCTION__, queue, A_view, A_handle, x_handle, y_handle, alpha, alg); + if (alg == oneapi::mkl::sparse::spsv_alg::no_optimize_alg) { + return; + } + auto internal_A_handle = detail::get_internal_handle(A_handle); + internal_A_handle->can_be_reset = false; + auto event = oneapi::mkl::sparse::optimize_trsv(queue, A_view.uplo_view, opA, A_view.diag_view, + internal_A_handle->backend_handle); + // spsv_optimize is not asynchronous for buffers as the backend optimize functions don't take buffers. + event.wait_and_throw(); +} + +sycl::event spsv_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, + oneapi::mkl::sparse::matrix_view A_view, + oneapi::mkl::sparse::matrix_handle_t A_handle, + oneapi::mkl::sparse::dense_vector_handle_t x_handle, + oneapi::mkl::sparse::dense_vector_handle_t y_handle, + oneapi::mkl::sparse::spsv_alg alg, + oneapi::mkl::sparse::spsv_descr_t /*spsv_descr*/, void * /*workspace*/, + const std::vector &dependencies) { + check_valid_spsv(__FUNCTION__, queue, A_view, A_handle, x_handle, y_handle, alpha, alg); + if (alg == oneapi::mkl::sparse::spsv_alg::no_optimize_alg) { + return detail::collapse_dependencies(queue, dependencies); + } + auto internal_A_handle = detail::get_internal_handle(A_handle); + internal_A_handle->can_be_reset = false; + return oneapi::mkl::sparse::optimize_trsv(queue, A_view.uplo_view, opA, A_view.diag_view, + internal_A_handle->backend_handle, dependencies); +} + +template +sycl::event internal_spsv(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, + oneapi::mkl::sparse::matrix_view A_view, + oneapi::mkl::sparse::matrix_handle_t A_handle, + oneapi::mkl::sparse::dense_vector_handle_t x_handle, + oneapi::mkl::sparse::dense_vector_handle_t y_handle, + oneapi::mkl::sparse::spsv_alg /*alg*/, + oneapi::mkl::sparse::spsv_descr_t /*spsv_descr*/, + const std::vector &dependencies) { + T cast_alpha = *static_cast(alpha); + auto internal_A_handle = detail::get_internal_handle(A_handle); + internal_A_handle->can_be_reset = false; + if (internal_A_handle->all_use_buffer()) { + oneapi::mkl::sparse::trsv(queue, A_view.uplo_view, opA, A_view.diag_view, cast_alpha, + internal_A_handle->backend_handle, x_handle->get_buffer(), + y_handle->get_buffer()); + // Dependencies are not used for buffers + return {}; + } + else { + return oneapi::mkl::sparse::trsv(queue, A_view.uplo_view, opA, A_view.diag_view, cast_alpha, + internal_A_handle->backend_handle, + x_handle->get_usm_ptr(), y_handle->get_usm_ptr(), + dependencies); + } +} + +sycl::event spsv(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, + oneapi::mkl::sparse::matrix_view A_view, + oneapi::mkl::sparse::matrix_handle_t A_handle, + oneapi::mkl::sparse::dense_vector_handle_t x_handle, + oneapi::mkl::sparse::dense_vector_handle_t y_handle, + oneapi::mkl::sparse::spsv_alg alg, oneapi::mkl::sparse::spsv_descr_t spsv_descr, + const std::vector &dependencies) { + check_valid_spsv(__FUNCTION__, queue, A_view, A_handle, x_handle, y_handle, alpha, alg); + auto value_type = detail::get_internal_handle(A_handle)->get_value_type(); + DISPATCH_MKL_OPERATION("spsv", value_type, internal_spsv, queue, opA, alpha, A_view, A_handle, + x_handle, y_handle, alg, spsv_descr, dependencies); +} diff --git a/src/sparse_blas/backends/mklcpu/CMakeLists.txt b/src/sparse_blas/backends/mklcpu/CMakeLists.txt index cfcf9cf3d..c851db8bc 100644 --- a/src/sparse_blas/backends/mklcpu/CMakeLists.txt +++ b/src/sparse_blas/backends/mklcpu/CMakeLists.txt @@ -1,5 +1,5 @@ #=============================================================================== -# Copyright 2023 Intel Corporation +# Copyright 2024 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -24,8 +24,10 @@ include(WarningsUtils) add_library(${LIB_NAME}) add_library(${LIB_OBJ} OBJECT - mklcpu_basic.cpp - mklcpu_operations.cpp + mklcpu_handles.cpp + mklcpu_spmm.cpp + mklcpu_spmv.cpp + mklcpu_spsv.cpp $<$: mklcpu_wrappers.cpp> ) add_dependencies(onemkl_backend_libs_sparse_blas ${LIB_NAME}) diff --git a/src/sparse_blas/backends/mklcpu/mklcpu_basic.cpp b/src/sparse_blas/backends/mklcpu/mklcpu_basic.cpp deleted file mode 100644 index 9ab29ee92..000000000 --- a/src/sparse_blas/backends/mklcpu/mklcpu_basic.cpp +++ /dev/null @@ -1,28 +0,0 @@ -/******************************************************************************* -* Copyright 2023 Codeplay Software Ltd. -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, -* software distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions -* and limitations under the License. -* -* -* SPDX-License-Identifier: Apache-2.0 -*******************************************************************************/ - -#include "../mkl_common/mkl_helper.hpp" - -#include "oneapi/mkl/sparse_blas/detail/mklcpu/onemkl_sparse_blas_mklcpu.hpp" - -namespace oneapi::mkl::sparse::mklcpu { - -#include "../mkl_common/mkl_basic.cxx" - -} // namespace oneapi::mkl::sparse::mklcpu diff --git a/src/sparse_blas/backends/mklcpu/mklcpu_handles.cpp b/src/sparse_blas/backends/mklcpu/mklcpu_handles.cpp new file mode 100644 index 000000000..a6ea51629 --- /dev/null +++ b/src/sparse_blas/backends/mklcpu/mklcpu_handles.cpp @@ -0,0 +1,29 @@ +/*************************************************************************** +* Copyright (C) Codeplay Software Limited +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* For your convenience, a copy of the License has been included in this +* repository. +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +* +**************************************************************************/ + +#include "oneapi/mkl/sparse_blas/detail/mklcpu/onemkl_sparse_blas_mklcpu.hpp" + +#include "sparse_blas/backends/mkl_common/mkl_helper.hpp" +#include "sparse_blas/backends/mkl_common/mkl_handles.hpp" + +namespace oneapi::mkl::sparse::mklcpu { + +#include "sparse_blas/backends/mkl_common/mkl_handles.cxx" + +} // namespace oneapi::mkl::sparse::mklcpu diff --git a/src/sparse_blas/backends/mklcpu/mklcpu_operations.cpp b/src/sparse_blas/backends/mklcpu/mklcpu_operations.cpp deleted file mode 100644 index e636b1816..000000000 --- a/src/sparse_blas/backends/mklcpu/mklcpu_operations.cpp +++ /dev/null @@ -1,28 +0,0 @@ -/******************************************************************************* -* Copyright 2023 Codeplay Software Ltd. -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, -* software distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions -* and limitations under the License. -* -* -* SPDX-License-Identifier: Apache-2.0 -*******************************************************************************/ - -#include "../mkl_common/mkl_helper.hpp" - -#include "oneapi/mkl/sparse_blas/detail/mklcpu/onemkl_sparse_blas_mklcpu.hpp" - -namespace oneapi::mkl::sparse::mklcpu { - -#include "../mkl_common/mkl_operations.cxx" - -} // namespace oneapi::mkl::sparse::mklcpu diff --git a/src/sparse_blas/backends/mklcpu/mklcpu_spmm.cpp b/src/sparse_blas/backends/mklcpu/mklcpu_spmm.cpp new file mode 100644 index 000000000..de34dcb4d --- /dev/null +++ b/src/sparse_blas/backends/mklcpu/mklcpu_spmm.cpp @@ -0,0 +1,30 @@ +/*************************************************************************** +* Copyright (C) Codeplay Software Limited +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* For your convenience, a copy of the License has been included in this +* repository. +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +* +**************************************************************************/ + +#include "sparse_blas/backends/mkl_common/mkl_helper.hpp" +#include "sparse_blas/macros.hpp" +#include "sparse_blas/backends/mkl_common/mkl_handles.hpp" + +#include "oneapi/mkl/sparse_blas/detail/mklcpu/onemkl_sparse_blas_mklcpu.hpp" + +namespace oneapi::mkl::sparse::mklcpu { + +#include "sparse_blas/backends/mkl_common/mkl_spmm.cxx" + +} // namespace oneapi::mkl::sparse::mklcpu diff --git a/src/sparse_blas/backends/mklcpu/mklcpu_spmv.cpp b/src/sparse_blas/backends/mklcpu/mklcpu_spmv.cpp new file mode 100644 index 000000000..ffbba2f5e --- /dev/null +++ b/src/sparse_blas/backends/mklcpu/mklcpu_spmv.cpp @@ -0,0 +1,30 @@ +/*************************************************************************** +* Copyright (C) Codeplay Software Limited +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* For your convenience, a copy of the License has been included in this +* repository. +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +* +**************************************************************************/ + +#include "sparse_blas/backends/mkl_common/mkl_helper.hpp" +#include "sparse_blas/macros.hpp" +#include "sparse_blas/backends/mkl_common/mkl_handles.hpp" + +#include "oneapi/mkl/sparse_blas/detail/mklcpu/onemkl_sparse_blas_mklcpu.hpp" + +namespace oneapi::mkl::sparse::mklcpu { + +#include "sparse_blas/backends/mkl_common/mkl_spmv.cxx" + +} // namespace oneapi::mkl::sparse::mklcpu diff --git a/src/sparse_blas/backends/mklcpu/mklcpu_spsv.cpp b/src/sparse_blas/backends/mklcpu/mklcpu_spsv.cpp new file mode 100644 index 000000000..aff9b4abc --- /dev/null +++ b/src/sparse_blas/backends/mklcpu/mklcpu_spsv.cpp @@ -0,0 +1,30 @@ +/*************************************************************************** +* Copyright (C) Codeplay Software Limited +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* For your convenience, a copy of the License has been included in this +* repository. +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +* +**************************************************************************/ + +#include "sparse_blas/backends/mkl_common/mkl_helper.hpp" +#include "sparse_blas/macros.hpp" +#include "sparse_blas/backends/mkl_common/mkl_handles.hpp" + +#include "oneapi/mkl/sparse_blas/detail/mklcpu/onemkl_sparse_blas_mklcpu.hpp" + +namespace oneapi::mkl::sparse::mklcpu { + +#include "sparse_blas/backends/mkl_common/mkl_spsv.cxx" + +} // namespace oneapi::mkl::sparse::mklcpu diff --git a/src/sparse_blas/backends/mklcpu/mklcpu_wrappers.cpp b/src/sparse_blas/backends/mklcpu/mklcpu_wrappers.cpp index 40f75c60c..1a6217684 100644 --- a/src/sparse_blas/backends/mklcpu/mklcpu_wrappers.cpp +++ b/src/sparse_blas/backends/mklcpu/mklcpu_wrappers.cpp @@ -1,21 +1,21 @@ -/******************************************************************************* -* Copyright 2023 Codeplay Software Ltd. +/*************************************************************************** +* Copyright (C) Codeplay Software Limited +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at * -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at +* http://www.apache.org/licenses/LICENSE-2.0 * -* http://www.apache.org/licenses/LICENSE-2.0 +* For your convenience, a copy of the License has been included in this +* repository. * -* Unless required by applicable law or agreed to in writing, -* software distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions -* and limitations under the License. +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. * -* -* SPDX-License-Identifier: Apache-2.0 -*******************************************************************************/ +**************************************************************************/ #include "oneapi/mkl/sparse_blas/types.hpp" diff --git a/src/sparse_blas/backends/mklgpu/CMakeLists.txt b/src/sparse_blas/backends/mklgpu/CMakeLists.txt index a31794547..b83a39297 100644 --- a/src/sparse_blas/backends/mklgpu/CMakeLists.txt +++ b/src/sparse_blas/backends/mklgpu/CMakeLists.txt @@ -1,5 +1,5 @@ #=============================================================================== -# Copyright 2023 Intel Corporation +# Copyright 2024 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -24,8 +24,10 @@ include(WarningsUtils) add_library(${LIB_NAME}) add_library(${LIB_OBJ} OBJECT - mklgpu_basic.cpp - mklgpu_operations.cpp + mklgpu_handles.cpp + mklgpu_spmm.cpp + mklgpu_spmv.cpp + mklgpu_spsv.cpp $<$: mklgpu_wrappers.cpp> ) add_dependencies(onemkl_backend_libs_sparse_blas ${LIB_NAME}) diff --git a/src/sparse_blas/backends/mklgpu/mklgpu_basic.cpp b/src/sparse_blas/backends/mklgpu/mklgpu_basic.cpp deleted file mode 100644 index 8df24f8da..000000000 --- a/src/sparse_blas/backends/mklgpu/mklgpu_basic.cpp +++ /dev/null @@ -1,28 +0,0 @@ -/******************************************************************************* -* Copyright 2023 Codeplay Software Ltd. -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, -* software distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions -* and limitations under the License. -* -* -* SPDX-License-Identifier: Apache-2.0 -*******************************************************************************/ - -#include "../mkl_common/mkl_helper.hpp" - -#include "oneapi/mkl/sparse_blas/detail/mklgpu/onemkl_sparse_blas_mklgpu.hpp" - -namespace oneapi::mkl::sparse::mklgpu { - -#include "../mkl_common/mkl_basic.cxx" - -} // namespace oneapi::mkl::sparse::mklgpu diff --git a/src/sparse_blas/backends/mklgpu/mklgpu_handles.cpp b/src/sparse_blas/backends/mklgpu/mklgpu_handles.cpp new file mode 100644 index 000000000..7cb9853a7 --- /dev/null +++ b/src/sparse_blas/backends/mklgpu/mklgpu_handles.cpp @@ -0,0 +1,29 @@ +/*************************************************************************** +* Copyright (C) Codeplay Software Limited +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* For your convenience, a copy of the License has been included in this +* repository. +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +* +**************************************************************************/ + +#include "oneapi/mkl/sparse_blas/detail/mklgpu/onemkl_sparse_blas_mklgpu.hpp" + +#include "sparse_blas/backends/mkl_common/mkl_handles.hpp" +#include "sparse_blas/backends/mkl_common/mkl_helper.hpp" + +namespace oneapi::mkl::sparse::mklgpu { + +#include "sparse_blas/backends/mkl_common/mkl_handles.cxx" + +} // namespace oneapi::mkl::sparse::mklgpu diff --git a/src/sparse_blas/backends/mklgpu/mklgpu_operations.cpp b/src/sparse_blas/backends/mklgpu/mklgpu_operations.cpp deleted file mode 100644 index 439dc4eea..000000000 --- a/src/sparse_blas/backends/mklgpu/mklgpu_operations.cpp +++ /dev/null @@ -1,28 +0,0 @@ -/******************************************************************************* -* Copyright 2023 Codeplay Software Ltd. -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, -* software distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions -* and limitations under the License. -* -* -* SPDX-License-Identifier: Apache-2.0 -*******************************************************************************/ - -#include "../mkl_common/mkl_helper.hpp" - -#include "oneapi/mkl/sparse_blas/detail/mklgpu/onemkl_sparse_blas_mklgpu.hpp" - -namespace oneapi::mkl::sparse::mklgpu { - -#include "../mkl_common/mkl_operations.cxx" - -} // namespace oneapi::mkl::sparse::mklgpu diff --git a/src/sparse_blas/backends/mklgpu/mklgpu_spmm.cpp b/src/sparse_blas/backends/mklgpu/mklgpu_spmm.cpp new file mode 100644 index 000000000..ca2338787 --- /dev/null +++ b/src/sparse_blas/backends/mklgpu/mklgpu_spmm.cpp @@ -0,0 +1,30 @@ +/*************************************************************************** +* Copyright (C) Codeplay Software Limited +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* For your convenience, a copy of the License has been included in this +* repository. +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +* +**************************************************************************/ + +#include "sparse_blas/backends/mkl_common/mkl_helper.hpp" +#include "sparse_blas/macros.hpp" +#include "sparse_blas/backends/mkl_common/mkl_handles.hpp" + +#include "oneapi/mkl/sparse_blas/detail/mklgpu/onemkl_sparse_blas_mklgpu.hpp" + +namespace oneapi::mkl::sparse::mklgpu { + +#include "sparse_blas/backends/mkl_common/mkl_spmm.cxx" + +} // namespace oneapi::mkl::sparse::mklgpu diff --git a/src/sparse_blas/backends/mklgpu/mklgpu_spmv.cpp b/src/sparse_blas/backends/mklgpu/mklgpu_spmv.cpp new file mode 100644 index 000000000..6a7087a86 --- /dev/null +++ b/src/sparse_blas/backends/mklgpu/mklgpu_spmv.cpp @@ -0,0 +1,30 @@ +/*************************************************************************** +* Copyright (C) Codeplay Software Limited +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* For your convenience, a copy of the License has been included in this +* repository. +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +* +**************************************************************************/ + +#include "sparse_blas/backends/mkl_common/mkl_helper.hpp" +#include "sparse_blas/macros.hpp" +#include "sparse_blas/backends/mkl_common/mkl_handles.hpp" + +#include "oneapi/mkl/sparse_blas/detail/mklgpu/onemkl_sparse_blas_mklgpu.hpp" + +namespace oneapi::mkl::sparse::mklgpu { + +#include "sparse_blas/backends/mkl_common/mkl_spmv.cxx" + +} // namespace oneapi::mkl::sparse::mklgpu diff --git a/src/sparse_blas/backends/mklgpu/mklgpu_spsv.cpp b/src/sparse_blas/backends/mklgpu/mklgpu_spsv.cpp new file mode 100644 index 000000000..b42d4539f --- /dev/null +++ b/src/sparse_blas/backends/mklgpu/mklgpu_spsv.cpp @@ -0,0 +1,30 @@ +/*************************************************************************** +* Copyright (C) Codeplay Software Limited +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* For your convenience, a copy of the License has been included in this +* repository. +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +* +**************************************************************************/ + +#include "sparse_blas/backends/mkl_common/mkl_helper.hpp" +#include "sparse_blas/macros.hpp" +#include "sparse_blas/backends/mkl_common/mkl_handles.hpp" + +#include "oneapi/mkl/sparse_blas/detail/mklgpu/onemkl_sparse_blas_mklgpu.hpp" + +namespace oneapi::mkl::sparse::mklgpu { + +#include "sparse_blas/backends/mkl_common/mkl_spsv.cxx" + +} // namespace oneapi::mkl::sparse::mklgpu diff --git a/src/sparse_blas/backends/mklgpu/mklgpu_wrappers.cpp b/src/sparse_blas/backends/mklgpu/mklgpu_wrappers.cpp index 346b13540..4a261f64e 100644 --- a/src/sparse_blas/backends/mklgpu/mklgpu_wrappers.cpp +++ b/src/sparse_blas/backends/mklgpu/mklgpu_wrappers.cpp @@ -1,21 +1,21 @@ -/******************************************************************************* -* Copyright 2023 Codeplay Software Ltd. +/*************************************************************************** +* Copyright (C) Codeplay Software Limited +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at * -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at +* http://www.apache.org/licenses/LICENSE-2.0 * -* http://www.apache.org/licenses/LICENSE-2.0 +* For your convenience, a copy of the License has been included in this +* repository. * -* Unless required by applicable law or agreed to in writing, -* software distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions -* and limitations under the License. +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. * -* -* SPDX-License-Identifier: Apache-2.0 -*******************************************************************************/ +**************************************************************************/ #include "oneapi/mkl/sparse_blas/types.hpp" diff --git a/src/sparse_blas/enum_data_types.hpp b/src/sparse_blas/enum_data_types.hpp new file mode 100644 index 000000000..7114482ee --- /dev/null +++ b/src/sparse_blas/enum_data_types.hpp @@ -0,0 +1,69 @@ +/*************************************************************************** +* Copyright (C) Codeplay Software Limited +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* For your convenience, a copy of the License has been included in this +* repository. +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +* +**************************************************************************/ + +#ifndef _ONEMKL_SRC_SPARSE_BLAS_ENUM_DATA_TYPES_HPP_ +#define _ONEMKL_SRC_SPARSE_BLAS_ENUM_DATA_TYPES_HPP_ + +#include + +namespace oneapi::mkl::sparse::detail { + +enum data_type { none, int32, int64, real_fp32, real_fp64, complex_fp32, complex_fp64 }; + +inline std::string data_type_to_str(data_type data_type) { + switch (data_type) { + case none: return "none"; + case int32: return "int32"; + case int64: return "int64"; + case real_fp32: return "real_fp32"; + case real_fp64: return "real_fp64"; + case complex_fp32: return "complex_fp32"; + case complex_fp64: return "complex_fp64"; + default: return "unknown"; + } +} + +template +data_type get_data_type() { + if constexpr (std::is_same_v) { + return data_type::int32; + } + else if constexpr (std::is_same_v) { + return data_type::int64; + } + else if constexpr (std::is_same_v) { + return data_type::real_fp32; + } + else if constexpr (std::is_same_v) { + return data_type::real_fp64; + } + else if constexpr (std::is_same_v>) { + return data_type::complex_fp32; + } + else if constexpr (std::is_same_v>) { + return data_type::complex_fp64; + } + else { + static_assert(false, "Unsupported type"); + } +} + +} // namespace oneapi::mkl::sparse::detail + +#endif // _ONEMKL_SRC_SPARSE_BLAS_ENUM_DATA_TYPES_HPP_ diff --git a/src/sparse_blas/function_table.hpp b/src/sparse_blas/function_table.hpp index 57279fb3f..2fc5d4d28 100644 --- a/src/sparse_blas/function_table.hpp +++ b/src/sparse_blas/function_table.hpp @@ -1,21 +1,21 @@ -/******************************************************************************* -* Copyright 2023 Codeplay Software Ltd. +/*************************************************************************** +* Copyright (C) Codeplay Software Limited +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at * -* (*Licensed under the Apache License, Version 2.0 )(the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at +* http://www.apache.org/licenses/LICENSE-2.0 * -* http://www.apache.org/licenses/LICENSE-2.0 +* For your convenience, a copy of the License has been included in this +* repository. * -* Unless required by applicable law or agreed to in writing, -* software distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions -* and limitations under the License. +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. * -* -* SPDX-License-Identifier: Apache-2.0 -*******************************************************************************/ +**************************************************************************/ #ifndef _ONEMKL_SPARSE_BLAS_FUNCTION_TABLE_HPP_ #define _ONEMKL_SPARSE_BLAS_FUNCTION_TABLE_HPP_ @@ -23,87 +23,245 @@ #include "oneapi/mkl/sparse_blas/types.hpp" #include "sparse_blas/macros.hpp" -#define DEFINE_SET_CSR_DATA(FP_TYPE, FP_SUFFIX, INT_TYPE, INT_SUFFIX) \ - void (*set_csr_data_buffer##FP_SUFFIX##INT_SUFFIX)( \ - sycl::queue & queue, oneapi::mkl::sparse::matrix_handle_t handle, INT_TYPE num_rows, \ - INT_TYPE num_cols, INT_TYPE nnz, oneapi::mkl::index_base index, \ - sycl::buffer & row_ptr, sycl::buffer & col_ind, \ - sycl::buffer & val); \ - sycl::event (*set_csr_data_usm##FP_SUFFIX##INT_SUFFIX)( \ - sycl::queue & queue, oneapi::mkl::sparse::matrix_handle_t handle, INT_TYPE num_rows, \ - INT_TYPE num_cols, INT_TYPE nnz, oneapi::mkl::index_base index, INT_TYPE * row_ptr, \ - INT_TYPE * col_ind, FP_TYPE * val, const std::vector &dependencies) - -#define DEFINE_GEMV(FP_TYPE, FP_SUFFIX) \ - void (*gemv_buffer##FP_SUFFIX)( \ - sycl::queue & queue, oneapi::mkl::transpose transpose_val, const FP_TYPE alpha, \ - oneapi::mkl::sparse::matrix_handle_t A_handle, sycl::buffer &x, \ - const FP_TYPE beta, sycl::buffer &y); \ - sycl::event (*gemv_usm##FP_SUFFIX)( \ - sycl::queue & queue, oneapi::mkl::transpose transpose_val, const FP_TYPE alpha, \ - oneapi::mkl::sparse::matrix_handle_t A_handle, const FP_TYPE *x, const FP_TYPE beta, \ - FP_TYPE *y, const std::vector &dependencies) - -#define DEFINE_TRSV(FP_TYPE, FP_SUFFIX) \ - void (*trsv_buffer##FP_SUFFIX)( \ - sycl::queue & queue, oneapi::mkl::uplo uplo_val, oneapi::mkl::transpose transpose_val, \ - oneapi::mkl::diag diag_val, oneapi::mkl::sparse::matrix_handle_t A_handle, \ - sycl::buffer & x, sycl::buffer & y); \ - sycl::event (*trsv_usm##FP_SUFFIX)( \ - sycl::queue & queue, oneapi::mkl::uplo uplo_val, oneapi::mkl::transpose transpose_val, \ - oneapi::mkl::diag diag_val, oneapi::mkl::sparse::matrix_handle_t A_handle, \ - const FP_TYPE *x, FP_TYPE *y, const std::vector &dependencies) - -#define DEFINE_GEMM(FP_TYPE, FP_SUFFIX) \ - void (*gemm_buffer##FP_SUFFIX)( \ - sycl::queue & queue, oneapi::mkl::layout dense_matrix_layout, \ - oneapi::mkl::transpose transpose_A, oneapi::mkl::transpose transpose_B, \ - const FP_TYPE alpha, oneapi::mkl::sparse::matrix_handle_t A_handle, \ - sycl::buffer &B, const std::int64_t columns, const std::int64_t ldb, \ - const FP_TYPE beta, sycl::buffer &C, const std::int64_t ldc); \ - sycl::event (*gemm_usm##FP_SUFFIX)( \ - sycl::queue & queue, oneapi::mkl::layout dense_matrix_layout, \ - oneapi::mkl::transpose transpose_A, oneapi::mkl::transpose transpose_B, \ - const FP_TYPE alpha, oneapi::mkl::sparse::matrix_handle_t A_handle, const FP_TYPE *B, \ - const std::int64_t columns, const std::int64_t ldb, const FP_TYPE beta, FP_TYPE *C, \ - const std::int64_t ldc, const std::vector &dependencies) +// Dense vector +#define DEFINE_DENSE_VECTOR_FUNCS(FP_TYPE, FP_SUFFIX) \ + void (*init_dense_vector_buffer##FP_SUFFIX)( \ + sycl::queue & queue, oneapi::mkl::sparse::dense_vector_handle_t * p_dvhandle, \ + std::int64_t size, sycl::buffer val); \ + void (*init_dense_vector_usm##FP_SUFFIX)( \ + sycl::queue & queue, oneapi::mkl::sparse::dense_vector_handle_t * p_dvhandle, \ + std::int64_t size, FP_TYPE *val); \ + void (*set_dense_vector_data_buffer##FP_SUFFIX)( \ + sycl::queue & queue, oneapi::mkl::sparse::dense_vector_handle_t dvhandle, \ + std::int64_t size, sycl::buffer val); \ + void (*set_dense_vector_data_usm##FP_SUFFIX)( \ + sycl::queue & queue, oneapi::mkl::sparse::dense_vector_handle_t dvhandle, \ + std::int64_t size, FP_TYPE *val) + +// Dense matrix +#define DEFINE_DENSE_MATRIX_FUNCS(FP_TYPE, FP_SUFFIX) \ + void (*init_dense_matrix_buffer##FP_SUFFIX)( \ + sycl::queue & queue, oneapi::mkl::sparse::dense_matrix_handle_t * p_dmhandle, \ + std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, \ + oneapi::mkl::layout dense_layout, sycl::buffer val); \ + void (*init_dense_matrix_usm##FP_SUFFIX)( \ + sycl::queue & queue, oneapi::mkl::sparse::dense_matrix_handle_t * p_dmhandle, \ + std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, \ + oneapi::mkl::layout dense_layout, FP_TYPE *val); \ + void (*set_dense_matrix_data_buffer##FP_SUFFIX)( \ + sycl::queue & queue, oneapi::mkl::sparse::dense_matrix_handle_t dmhandle, \ + std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, \ + oneapi::mkl::layout dense_layout, sycl::buffer val); \ + void (*set_dense_matrix_data_usm##FP_SUFFIX)( \ + sycl::queue & queue, oneapi::mkl::sparse::dense_matrix_handle_t dmhandle, \ + std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, \ + oneapi::mkl::layout dense_layout, FP_TYPE *val) + +// COO matrix +#define DEFINE_COO_MATRIX_FUNCS(FP_TYPE, FP_SUFFIX, INT_TYPE, INT_SUFFIX) \ + void (*init_coo_matrix_buffer##FP_SUFFIX##INT_SUFFIX)( \ + sycl::queue & queue, oneapi::mkl::sparse::matrix_handle_t * p_smhandle, \ + std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, \ + oneapi::mkl::index_base index, sycl::buffer row_ind, \ + sycl::buffer col_ind, sycl::buffer val); \ + void (*init_coo_matrix_usm##FP_SUFFIX##INT_SUFFIX)( \ + sycl::queue & queue, oneapi::mkl::sparse::matrix_handle_t * p_smhandle, \ + std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, \ + oneapi::mkl::index_base index, INT_TYPE *row_ind, INT_TYPE *col_ind, FP_TYPE *val); \ + void (*set_coo_matrix_data_buffer##FP_SUFFIX##INT_SUFFIX)( \ + sycl::queue & queue, oneapi::mkl::sparse::matrix_handle_t smhandle, std::int64_t num_rows, \ + std::int64_t num_cols, std::int64_t nnz, oneapi::mkl::index_base index, \ + sycl::buffer row_ind, sycl::buffer col_ind, \ + sycl::buffer val); \ + void (*set_coo_matrix_data_usm##FP_SUFFIX##INT_SUFFIX)( \ + sycl::queue & queue, oneapi::mkl::sparse::matrix_handle_t smhandle, std::int64_t num_rows, \ + std::int64_t num_cols, std::int64_t nnz, oneapi::mkl::index_base index, INT_TYPE *row_ind, \ + INT_TYPE *col_ind, FP_TYPE *val) + +// CSR matrix +#define DEFINE_CSR_MATRIX_FUNCS(FP_TYPE, FP_SUFFIX, INT_TYPE, INT_SUFFIX) \ + void (*init_csr_matrix_buffer##FP_SUFFIX##INT_SUFFIX)( \ + sycl::queue & queue, oneapi::mkl::sparse::matrix_handle_t * p_smhandle, \ + std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, \ + oneapi::mkl::index_base index, sycl::buffer row_ptr, \ + sycl::buffer col_ind, sycl::buffer val); \ + void (*init_csr_matrix_usm##FP_SUFFIX##INT_SUFFIX)( \ + sycl::queue & queue, oneapi::mkl::sparse::matrix_handle_t * p_smhandle, \ + std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, \ + oneapi::mkl::index_base index, INT_TYPE *row_ptr, INT_TYPE *col_ind, FP_TYPE *val); \ + void (*set_csr_matrix_data_buffer##FP_SUFFIX##INT_SUFFIX)( \ + sycl::queue & queue, oneapi::mkl::sparse::matrix_handle_t smhandle, std::int64_t num_rows, \ + std::int64_t num_cols, std::int64_t nnz, oneapi::mkl::index_base index, \ + sycl::buffer row_ptr, sycl::buffer col_ind, \ + sycl::buffer val); \ + void (*set_csr_matrix_data_usm##FP_SUFFIX##INT_SUFFIX)( \ + sycl::queue & queue, oneapi::mkl::sparse::matrix_handle_t smhandle, std::int64_t num_rows, \ + std::int64_t num_cols, std::int64_t nnz, oneapi::mkl::index_base index, INT_TYPE *row_ptr, \ + INT_TYPE *col_ind, FP_TYPE *val) typedef struct { int version; - void (*init_matrix_handle)(sycl::queue &queue, oneapi::mkl::sparse::matrix_handle_t *p_handle); - sycl::event (*release_matrix_handle)(sycl::queue &queue, - oneapi::mkl::sparse::matrix_handle_t *p_handle, + // Dense vector + FOR_EACH_FP_TYPE(DEFINE_DENSE_VECTOR_FUNCS); + sycl::event (*release_dense_vector)(sycl::queue &queue, + oneapi::mkl::sparse::dense_vector_handle_t dvhandle, + const std::vector &dependencies); + + // Dense matrix + FOR_EACH_FP_TYPE(DEFINE_DENSE_MATRIX_FUNCS); + sycl::event (*release_dense_matrix)(sycl::queue &queue, + oneapi::mkl::sparse::dense_matrix_handle_t dmhandle, + const std::vector &dependencies); + + // COO matrix + FOR_EACH_FP_AND_INT_TYPE(DEFINE_COO_MATRIX_FUNCS); + + // CSR matrix + FOR_EACH_FP_AND_INT_TYPE(DEFINE_CSR_MATRIX_FUNCS); + + // Common sparse matrix functions + sycl::event (*release_sparse_matrix)(sycl::queue &queue, + oneapi::mkl::sparse::matrix_handle_t smhandle, const std::vector &dependencies); - FOR_EACH_FP_AND_INT_TYPE(DEFINE_SET_CSR_DATA); - - // optimize_* - sycl::event (*optimize_gemm_v1)(sycl::queue &queue, oneapi::mkl::transpose transpose_A, - oneapi::mkl::sparse::matrix_handle_t handle, - const std::vector &dependencies); - sycl::event (*optimize_gemm_v2)(sycl::queue &queue, oneapi::mkl::transpose transpose_A, - oneapi::mkl::transpose transpose_B, - oneapi::mkl::layout dense_matrix_layout, - const std::int64_t columns, - oneapi::mkl::sparse::matrix_handle_t handle, - const std::vector &dependencies); - sycl::event (*optimize_gemv)(sycl::queue &queue, oneapi::mkl::transpose transpose_val, - oneapi::mkl::sparse::matrix_handle_t handle, - const std::vector &dependencies); - sycl::event (*optimize_trsv)(sycl::queue &queue, oneapi::mkl::uplo uplo_val, - oneapi::mkl::transpose transpose_val, oneapi::mkl::diag diag_val, - oneapi::mkl::sparse::matrix_handle_t handle, - const std::vector &dependencies); - - FOR_EACH_FP_TYPE(DEFINE_GEMV); - FOR_EACH_FP_TYPE(DEFINE_TRSV); - FOR_EACH_FP_TYPE(DEFINE_GEMM); + bool (*set_matrix_property)(sycl::queue &queue, oneapi::mkl::sparse::matrix_handle_t smhandle, + oneapi::mkl::sparse::matrix_property property); + + // SPMM + void (*init_spmm_descr)(sycl::queue &queue, oneapi::mkl::sparse::spmm_descr_t *p_spmm_descr); + + sycl::event (*release_spmm_descr)(sycl::queue &queue, + oneapi::mkl::sparse::spmm_descr_t spmm_descr, + const std::vector &dependencies); + + void (*spmm_buffer_size)(sycl::queue &queue, oneapi::mkl::transpose opA, + oneapi::mkl::transpose opB, const void *alpha, + oneapi::mkl::sparse::matrix_view A_view, + oneapi::mkl::sparse::matrix_handle_t A_handle, + oneapi::mkl::sparse::dense_matrix_handle_t B_handle, const void *beta, + oneapi::mkl::sparse::dense_matrix_handle_t C_handle, + oneapi::mkl::sparse::spmm_alg alg, + oneapi::mkl::sparse::spmm_descr_t spmm_descr, + std::size_t &temp_buffer_size); + + void (*spmm_optimize_buffer)( + sycl::queue &queue, oneapi::mkl::transpose opA, oneapi::mkl::transpose opB, + const void *alpha, oneapi::mkl::sparse::matrix_view A_view, + oneapi::mkl::sparse::matrix_handle_t A_handle, + oneapi::mkl::sparse::dense_matrix_handle_t B_handle, const void *beta, + oneapi::mkl::sparse::dense_matrix_handle_t C_handle, oneapi::mkl::sparse::spmm_alg alg, + oneapi::mkl::sparse::spmm_descr_t spmm_descr, sycl::buffer workspace); + + sycl::event (*spmm_optimize_usm)(sycl::queue &queue, oneapi::mkl::transpose opA, + oneapi::mkl::transpose opB, const void *alpha, + oneapi::mkl::sparse::matrix_view A_view, + oneapi::mkl::sparse::matrix_handle_t A_handle, + oneapi::mkl::sparse::dense_matrix_handle_t B_handle, + const void *beta, + oneapi::mkl::sparse::dense_matrix_handle_t C_handle, + oneapi::mkl::sparse::spmm_alg alg, + oneapi::mkl::sparse::spmm_descr_t spmm_descr, void *workspace, + const std::vector &dependencies); + + sycl::event (*spmm)(sycl::queue &queue, oneapi::mkl::transpose opA, oneapi::mkl::transpose opB, + const void *alpha, oneapi::mkl::sparse::matrix_view A_view, + oneapi::mkl::sparse::matrix_handle_t A_handle, + oneapi::mkl::sparse::dense_matrix_handle_t B_handle, const void *beta, + oneapi::mkl::sparse::dense_matrix_handle_t C_handle, + oneapi::mkl::sparse::spmm_alg alg, + oneapi::mkl::sparse::spmm_descr_t spmm_descr, + const std::vector &dependencies); + + // SPMV + void (*init_spmv_descr)(sycl::queue &queue, oneapi::mkl::sparse::spmv_descr_t *p_spmv_descr); + + sycl::event (*release_spmv_descr)(sycl::queue &queue, + oneapi::mkl::sparse::spmv_descr_t spmv_descr, + const std::vector &dependencies); + + void (*spmv_buffer_size)(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, + oneapi::mkl::sparse::matrix_view A_view, + oneapi::mkl::sparse::matrix_handle_t A_handle, + oneapi::mkl::sparse::dense_vector_handle_t x_handle, const void *beta, + oneapi::mkl::sparse::dense_vector_handle_t y_handle, + oneapi::mkl::sparse::spmv_alg alg, + oneapi::mkl::sparse::spmv_descr_t spmv_descr, + std::size_t &temp_buffer_size); + + void (*spmv_optimize_buffer)( + sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, + oneapi::mkl::sparse::matrix_view A_view, oneapi::mkl::sparse::matrix_handle_t A_handle, + oneapi::mkl::sparse::dense_vector_handle_t x_handle, const void *beta, + oneapi::mkl::sparse::dense_vector_handle_t y_handle, oneapi::mkl::sparse::spmv_alg alg, + oneapi::mkl::sparse::spmv_descr_t spmv_descr, sycl::buffer workspace); + + sycl::event (*spmv_optimize_usm)(sycl::queue &queue, oneapi::mkl::transpose opA, + const void *alpha, oneapi::mkl::sparse::matrix_view A_view, + oneapi::mkl::sparse::matrix_handle_t A_handle, + oneapi::mkl::sparse::dense_vector_handle_t x_handle, + const void *beta, + oneapi::mkl::sparse::dense_vector_handle_t y_handle, + oneapi::mkl::sparse::spmv_alg alg, + oneapi::mkl::sparse::spmv_descr_t spmv_descr, void *workspace, + const std::vector &dependencies); + + sycl::event (*spmv)(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, + oneapi::mkl::sparse::matrix_view A_view, + oneapi::mkl::sparse::matrix_handle_t A_handle, + oneapi::mkl::sparse::dense_vector_handle_t x_handle, const void *beta, + oneapi::mkl::sparse::dense_vector_handle_t y_handle, + oneapi::mkl::sparse::spmv_alg alg, + oneapi::mkl::sparse::spmv_descr_t spmv_descr, + const std::vector &dependencies); + + // SPSV + void (*init_spsv_descr)(sycl::queue &queue, oneapi::mkl::sparse::spsv_descr_t *p_spsv_descr); + + sycl::event (*release_spsv_descr)(sycl::queue &queue, + oneapi::mkl::sparse::spsv_descr_t spsv_descr, + const std::vector &dependencies); + + void (*spsv_buffer_size)(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, + oneapi::mkl::sparse::matrix_view A_view, + oneapi::mkl::sparse::matrix_handle_t A_handle, + oneapi::mkl::sparse::dense_vector_handle_t x_handle, + oneapi::mkl::sparse::dense_vector_handle_t y_handle, + oneapi::mkl::sparse::spsv_alg alg, + oneapi::mkl::sparse::spsv_descr_t spsv_descr, + std::size_t &temp_buffer_size); + + void (*spsv_optimize_buffer)(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, + oneapi::mkl::sparse::matrix_view A_view, + oneapi::mkl::sparse::matrix_handle_t A_handle, + oneapi::mkl::sparse::dense_vector_handle_t x_handle, + oneapi::mkl::sparse::dense_vector_handle_t y_handle, + oneapi::mkl::sparse::spsv_alg alg, + oneapi::mkl::sparse::spsv_descr_t spsv_descr, + sycl::buffer workspace); + + sycl::event (*spsv_optimize_usm)(sycl::queue &queue, oneapi::mkl::transpose opA, + const void *alpha, oneapi::mkl::sparse::matrix_view A_view, + oneapi::mkl::sparse::matrix_handle_t A_handle, + oneapi::mkl::sparse::dense_vector_handle_t x_handle, + oneapi::mkl::sparse::dense_vector_handle_t y_handle, + oneapi::mkl::sparse::spsv_alg alg, + oneapi::mkl::sparse::spsv_descr_t spsv_descr, void *workspace, + const std::vector &dependencies); + + sycl::event (*spsv)(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, + oneapi::mkl::sparse::matrix_view A_view, + oneapi::mkl::sparse::matrix_handle_t A_handle, + oneapi::mkl::sparse::dense_vector_handle_t x_handle, + oneapi::mkl::sparse::dense_vector_handle_t y_handle, + oneapi::mkl::sparse::spsv_alg alg, + oneapi::mkl::sparse::spsv_descr_t spsv_descr, + const std::vector &dependencies); } sparse_blas_function_table_t; -#undef DEFINE_SET_CSR_DATA -#undef DEFINE_GEMV -#undef DEFINE_TRSV -#undef DEFINE_GEMM +#undef DEFINE_DENSE_VECTOR_FUNCS +#undef DEFINE_DENSE_MATRIX_FUNCS +#undef DEFINE_COO_MATRIX_FUNCS +#undef DEFINE_CSR_MATRIX_FUNCS #endif // _ONEMKL_SPARSE_BLAS_FUNCTION_TABLE_HPP_ diff --git a/src/sparse_blas/generic_container.hpp b/src/sparse_blas/generic_container.hpp new file mode 100644 index 000000000..46732722d --- /dev/null +++ b/src/sparse_blas/generic_container.hpp @@ -0,0 +1,330 @@ +/*************************************************************************** +* Copyright (C) Codeplay Software Limited +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* For your convenience, a copy of the License has been included in this +* repository. +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +* +**************************************************************************/ + +#ifndef _ONEMKL_SRC_SPARSE_BLAS_GENERIC_CONTAINER_HPP_ +#define _ONEMKL_SRC_SPARSE_BLAS_GENERIC_CONTAINER_HPP_ + +#include +#include + +#if __has_include() +#include +#else +#include +#endif + +#include "oneapi/mkl/sparse_blas/types.hpp" +#include "enum_data_types.hpp" + +namespace oneapi::mkl::sparse::detail { + +/// Represent a non-templated container for USM or buffer. +struct generic_container { + // USM pointer, nullptr if the provided data is a buffer. + void* usm_ptr; + + // Buffer pointer, nullptr if the provided data is a USM pointer. + // The buffer is needed to properly handle the dependencies when the handle is used. + // Use a void* type for the buffer to avoid using template arguments in every function using data handles. + // Using reinterpret does not solve the issue as the returned buffer needs the type of the original buffer for the aligned_allocator. + std::shared_ptr buffer_ptr; + + // Underlying USM or buffer data type + data_type data_type; + + generic_container() : usm_ptr(nullptr), buffer_ptr(), data_type(data_type::none) {} + + template + generic_container(T* ptr) : usm_ptr(ptr), + buffer_ptr(), + data_type(get_data_type()) {} + + template + generic_container(const sycl::buffer buffer) + : usm_ptr(nullptr), + buffer_ptr(std::make_shared>(buffer)), + data_type(get_data_type()) {} + + template + void set_usm_ptr(T* ptr) { + usm_ptr = ptr; + data_type = get_data_type(); + } + + template + void set_buffer_untyped(const sycl::buffer buffer) { + buffer_ptr = std::make_shared>(buffer); + // Do not set data_type if T is meant as a generic byte type. + } + + template + void set_buffer(const sycl::buffer buffer) { + set_buffer_untyped(buffer); + data_type = get_data_type(); + } + + template + T* get_usm_ptr() { + return static_cast(usm_ptr); + } + + template + auto& get_buffer() { + return *reinterpret_cast*>(buffer_ptr.get()); + } +}; + +/// Common type for dense vector and matrix handles +template +struct generic_dense_handle { + BackendHandleT backend_handle; + + generic_container value_container; + + template + generic_dense_handle(BackendHandleT backend_handle, T* value_ptr) + : backend_handle(backend_handle), + value_container(generic_container(value_ptr)) {} + + template + generic_dense_handle(BackendHandleT backend_handle, const sycl::buffer value_buffer) + : backend_handle(backend_handle), + value_container(value_buffer) {} + + bool all_use_buffer() const { + return static_cast(value_container.buffer_ptr); + } + + data_type get_value_type() const { + return value_container.data_type; + } + + data_type get_int_type() const { + return data_type::none; + } + + template + T* get_usm_ptr() { + return value_container.get_usm_ptr(); + } + + template + auto& get_buffer() { + return value_container.get_buffer(); + } + + template + void set_usm_ptr(T* ptr) { + value_container.set_usm_ptr(ptr); + } + + template + void set_buffer(const sycl::buffer buffer) { + value_container.set_buffer(buffer); + } +}; + +/// Generic dense_vector_handle used by all backends +template +struct generic_dense_vector_handle : public detail::generic_dense_handle { + std::int64_t size; + + template + generic_dense_vector_handle(BackendHandleT backend_handle, T* value_ptr, std::int64_t size) + : generic_dense_handle(backend_handle, value_ptr), + size(size) {} + + template + generic_dense_vector_handle(BackendHandleT backend_handle, + const sycl::buffer value_buffer, std::int64_t size) + : generic_dense_handle(backend_handle, value_buffer), + size(size) { + if (value_buffer.size() < static_cast(size)) { + throw oneapi::mkl::invalid_argument( + "sparse_blas", "init_dense_vector", + "Buffer size too small, expected at least " + std::to_string(size) + " but got " + + std::to_string(value_buffer.size()) + " elements."); + } + } +}; + +/// Generic dense_matrix_handle used by all backends +template +struct generic_dense_matrix_handle : public detail::generic_dense_handle { + std::int64_t num_rows; + std::int64_t num_cols; + std::int64_t ld; + oneapi::mkl::layout dense_layout; + + template + generic_dense_matrix_handle(BackendHandleT backend_handle, T* value_ptr, std::int64_t num_rows, + std::int64_t num_cols, std::int64_t ld, layout dense_layout) + : generic_dense_handle(backend_handle, value_ptr), + num_rows(num_rows), + num_cols(num_cols), + ld(ld), + dense_layout(dense_layout) {} + + template + generic_dense_matrix_handle(BackendHandleT backend_handle, + const sycl::buffer value_buffer, std::int64_t num_rows, + std::int64_t num_cols, std::int64_t ld, layout dense_layout) + : generic_dense_handle(backend_handle, value_buffer), + num_rows(num_rows), + num_cols(num_cols), + ld(ld), + dense_layout(dense_layout) { + std::size_t minimum_size = static_cast( + (dense_layout == oneapi::mkl::layout::row_major ? num_rows : num_cols) * ld); + if (value_buffer.size() < minimum_size) { + throw oneapi::mkl::invalid_argument( + "sparse_blas", "init_dense_matrix", + "Buffer size too small, expected at least " + std::to_string(minimum_size) + + " but got " + std::to_string(value_buffer.size()) + " elements."); + } + } +}; + +/// Generic sparse_matrix_handle used by all backends +template +struct generic_sparse_handle { + BackendHandleT backend_handle; + + generic_container row_container; + generic_container col_container; + generic_container value_container; + + std::int32_t properties_mask; + bool can_be_reset; + + template + generic_sparse_handle(BackendHandleT backend_handle, intType* row_ptr, intType* col_ptr, + fpType* value_ptr) + : backend_handle(backend_handle), + row_container(generic_container(row_ptr)), + col_container(generic_container(col_ptr)), + value_container(generic_container(value_ptr)), + properties_mask(0), + can_be_reset(true) {} + + template + generic_sparse_handle(BackendHandleT backend_handle, const sycl::buffer row_buffer, + const sycl::buffer col_buffer, + const sycl::buffer value_buffer) + : backend_handle(backend_handle), + row_container(row_buffer), + col_container(col_buffer), + value_container(value_buffer), + properties_mask(0), + can_be_reset(true) {} + + bool all_use_buffer() const { + return static_cast(value_container.buffer_ptr) && + static_cast(row_container.buffer_ptr) && + static_cast(col_container.buffer_ptr); + } + + data_type get_value_type() const { + return value_container.data_type; + } + + data_type get_int_type() const { + return row_container.data_type; + } + + void set_matrix_property(oneapi::mkl::sparse::matrix_property property) { + properties_mask |= matrix_property_to_mask(property); + } + + bool has_matrix_property(oneapi::mkl::sparse::matrix_property property) { + return properties_mask & matrix_property_to_mask(property); + } + +private: + std::int32_t matrix_property_to_mask(oneapi::mkl::sparse::matrix_property property) { + switch (property) { + case oneapi::mkl::sparse::matrix_property::symmetric: return 1 << 0; + case oneapi::mkl::sparse::matrix_property::sorted: return 1 << 1; + default: + throw oneapi::mkl::invalid_argument( + "sparse_blas", "set_matrix_property", + "Unsupported matrix property " + std::to_string(static_cast(property))); + } + } +}; + +/** + * Check that all internal containers use the same container. +*/ +template +void check_all_containers_use_buffers(const std::string& function_name, + ContainerT first_internal_container, + Ts... internal_containers) { + bool first_use_buffer = first_internal_container->all_use_buffer(); + for (const auto internal_container : { internal_containers... }) { + if (internal_container->all_use_buffer() != first_use_buffer) { + throw oneapi::mkl::invalid_argument( + "sparse_blas", function_name, + "Incompatible container types. All inputs and outputs must use the same container: buffer or USM"); + } + } +} + +/** + * Check that all internal containers use the same container type, data type and integer type. + * The integer type can be 'none' if the internal container does not store any integer (i.e. for dense handles). + * The first internal container is used to determine what container and types the other internal containers should use. +*/ +template +void check_all_containers_compatible(const std::string& function_name, + ContainerT first_internal_container, + Ts... internal_containers) { + check_all_containers_use_buffers(function_name, first_internal_container, + internal_containers...); + data_type first_value_type = first_internal_container->get_value_type(); + data_type first_int_type = first_internal_container->get_int_type(); + for (const auto internal_container : { internal_containers... }) { + const data_type other_value_type = internal_container->get_value_type(); + if (other_value_type != first_value_type) { + throw oneapi::mkl::invalid_argument( + "sparse_blas", function_name, + "Incompatible data types expected " + data_type_to_str(first_value_type) + + " but got " + data_type_to_str(other_value_type)); + } + const data_type other_int_type = internal_container->get_int_type(); + if (other_int_type != data_type::none && other_int_type != first_int_type) { + throw oneapi::mkl::invalid_argument("sparse_blas", function_name, + "Incompatible integer types expected " + + data_type_to_str(first_int_type) + " but got " + + data_type_to_str(other_int_type)); + } + } +} + +template +sycl::event submit_release(sycl::queue& queue, T* ptr, const DependenciesT& dependencies) { + return queue.submit([&](sycl::handler& cgh) { + cgh.depends_on(dependencies); + cgh.host_task([=]() { delete ptr; }); + }); +} + +} // namespace oneapi::mkl::sparse::detail + +#endif // _ONEMKL_SRC_SPARSE_BLAS_GENERIC_CONTAINER_HPP_ diff --git a/src/sparse_blas/macros.hpp b/src/sparse_blas/macros.hpp index a4ef88e35..7eba01390 100644 --- a/src/sparse_blas/macros.hpp +++ b/src/sparse_blas/macros.hpp @@ -36,4 +36,10 @@ FOR_EACH_FP_AND_INT_TYPE_HELPER(DEFINE_MACRO, std::int32_t, _i32); \ FOR_EACH_FP_AND_INT_TYPE_HELPER(DEFINE_MACRO, std::int64_t, _i64) +#define THROW_IF_NULLPTR(FUNC_NAME, PTR) \ + if (!(PTR)) { \ + throw mkl::uninitialized("sparse_blas", FUNC_NAME, \ + std::string(#PTR) + " must not be nullptr."); \ + } + #endif // _ONEMKL_SPARSE_BLAS_MACROS_HPP_ diff --git a/src/sparse_blas/sparse_blas_loader.cpp b/src/sparse_blas/sparse_blas_loader.cpp index 95da6df9c..cdc3ae6b2 100644 --- a/src/sparse_blas/sparse_blas_loader.cpp +++ b/src/sparse_blas/sparse_blas_loader.cpp @@ -1,21 +1,21 @@ -/******************************************************************************* -* Copyright 2023 Codeplay Software Ltd. +/*************************************************************************** +* Copyright (C) Codeplay Software Limited +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at * -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at +* http://www.apache.org/licenses/LICENSE-2.0 * -* http://www.apache.org/licenses/LICENSE-2.0 +* For your convenience, a copy of the License has been included in this +* repository. * -* Unless required by applicable law or agreed to in writing, -* software distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions -* and limitations under the License. +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. * -* -* SPDX-License-Identifier: Apache-2.0 -*******************************************************************************/ +**************************************************************************/ #include "oneapi/mkl/sparse_blas/detail/sparse_blas_rt.hpp" @@ -30,133 +30,326 @@ static oneapi::mkl::detail::table_initializer function_tables; -void init_matrix_handle(sycl::queue &queue, matrix_handle_t *p_handle) { +// Dense vector +#define DEFINE_DENSE_VECTOR_FUNCS(FP_TYPE, FP_SUFFIX) \ + template <> \ + void init_dense_vector(sycl::queue &queue, dense_vector_handle_t *p_dvhandle, \ + std::int64_t size, sycl::buffer val) { \ + auto libkey = get_device_id(queue); \ + function_tables[libkey].init_dense_vector_buffer##FP_SUFFIX(queue, p_dvhandle, size, val); \ + } \ + template <> \ + void init_dense_vector(sycl::queue &queue, dense_vector_handle_t *p_dvhandle, \ + std::int64_t size, FP_TYPE *val) { \ + auto libkey = get_device_id(queue); \ + function_tables[libkey].init_dense_vector_usm##FP_SUFFIX(queue, p_dvhandle, size, val); \ + } \ + template <> \ + void set_dense_vector_data(sycl::queue &queue, dense_vector_handle_t dvhandle, \ + std::int64_t size, sycl::buffer val) { \ + auto libkey = get_device_id(queue); \ + function_tables[libkey].set_dense_vector_data_buffer##FP_SUFFIX(queue, dvhandle, size, \ + val); \ + } \ + template <> \ + void set_dense_vector_data(sycl::queue &queue, dense_vector_handle_t dvhandle, \ + std::int64_t size, FP_TYPE *val) { \ + auto libkey = get_device_id(queue); \ + function_tables[libkey].set_dense_vector_data_usm##FP_SUFFIX(queue, dvhandle, size, val); \ + } +FOR_EACH_FP_TYPE(DEFINE_DENSE_VECTOR_FUNCS); +#undef DEFINE_DENSE_VECTOR_FUNCS + +sycl::event release_dense_vector(sycl::queue &queue, dense_vector_handle_t dvhandle, + const std::vector &dependencies) { auto libkey = get_device_id(queue); - function_tables[libkey].init_matrix_handle(queue, p_handle); + return function_tables[libkey].release_dense_vector(queue, dvhandle, dependencies); } -sycl::event release_matrix_handle(sycl::queue &queue, matrix_handle_t *p_handle, - const std::vector &dependencies) { +// Dense matrix +#define DEFINE_DENSE_MATRIX_FUNCS(FP_TYPE, FP_SUFFIX) \ + template <> \ + void init_dense_matrix(sycl::queue &queue, dense_matrix_handle_t *p_dmhandle, \ + std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, \ + layout dense_layout, sycl::buffer val) { \ + auto libkey = get_device_id(queue); \ + function_tables[libkey].init_dense_matrix_buffer##FP_SUFFIX( \ + queue, p_dmhandle, num_rows, num_cols, ld, dense_layout, val); \ + } \ + template <> \ + void init_dense_matrix(sycl::queue &queue, dense_matrix_handle_t *p_dmhandle, \ + std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, \ + layout dense_layout, FP_TYPE *val) { \ + auto libkey = get_device_id(queue); \ + function_tables[libkey].init_dense_matrix_usm##FP_SUFFIX(queue, p_dmhandle, num_rows, \ + num_cols, ld, dense_layout, val); \ + } \ + template <> \ + void set_dense_matrix_data(sycl::queue &queue, dense_matrix_handle_t dmhandle, \ + std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, \ + layout dense_layout, sycl::buffer val) { \ + auto libkey = get_device_id(queue); \ + function_tables[libkey].set_dense_matrix_data_buffer##FP_SUFFIX( \ + queue, dmhandle, num_rows, num_cols, ld, dense_layout, val); \ + } \ + template <> \ + void set_dense_matrix_data(sycl::queue &queue, dense_matrix_handle_t dmhandle, \ + std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, \ + layout dense_layout, FP_TYPE *val) { \ + auto libkey = get_device_id(queue); \ + function_tables[libkey].set_dense_matrix_data_usm##FP_SUFFIX( \ + queue, dmhandle, num_rows, num_cols, ld, dense_layout, val); \ + } +FOR_EACH_FP_TYPE(DEFINE_DENSE_MATRIX_FUNCS); +#undef DEFINE_DENSE_MATRIX_FUNCS + +sycl::event release_dense_matrix(sycl::queue &queue, dense_matrix_handle_t dmhandle, + const std::vector &dependencies) { auto libkey = get_device_id(queue); - return function_tables[libkey].release_matrix_handle(queue, p_handle, dependencies); + return function_tables[libkey].release_dense_matrix(queue, dmhandle, dependencies); } -#define DEFINE_SET_CSR_DATA(FP_TYPE, FP_SUFFIX, INT_TYPE, INT_SUFFIX) \ +// COO matrix +#define DEFINE_COO_MATRIX_FUNCS(FP_TYPE, FP_SUFFIX, INT_TYPE, INT_SUFFIX) \ + template <> \ + void init_coo_matrix(sycl::queue &queue, matrix_handle_t *p_smhandle, std::int64_t num_rows, \ + std::int64_t num_cols, std::int64_t nnz, index_base index, \ + sycl::buffer row_ind, sycl::buffer col_ind, \ + sycl::buffer val) { \ + auto libkey = get_device_id(queue); \ + function_tables[libkey].init_coo_matrix_buffer##FP_SUFFIX##INT_SUFFIX( \ + queue, p_smhandle, num_rows, num_cols, nnz, index, row_ind, col_ind, val); \ + } \ + template <> \ + void init_coo_matrix(sycl::queue &queue, matrix_handle_t *p_smhandle, std::int64_t num_rows, \ + std::int64_t num_cols, std::int64_t nnz, index_base index, \ + INT_TYPE *row_ind, INT_TYPE *col_ind, FP_TYPE *val) { \ + auto libkey = get_device_id(queue); \ + function_tables[libkey].init_coo_matrix_usm##FP_SUFFIX##INT_SUFFIX( \ + queue, p_smhandle, num_rows, num_cols, nnz, index, row_ind, col_ind, val); \ + } \ template <> \ - void set_csr_data(sycl::queue &queue, matrix_handle_t handle, INT_TYPE num_rows, \ - INT_TYPE num_cols, INT_TYPE nnz, index_base index, \ - sycl::buffer &row_ptr, sycl::buffer &col_ind, \ - sycl::buffer &val) { \ + void set_coo_matrix_data(sycl::queue &queue, matrix_handle_t smhandle, std::int64_t num_rows, \ + std::int64_t num_cols, std::int64_t nnz, index_base index, \ + sycl::buffer row_ind, sycl::buffer col_ind, \ + sycl::buffer val) { \ auto libkey = get_device_id(queue); \ - function_tables[libkey].set_csr_data_buffer##FP_SUFFIX##INT_SUFFIX( \ - queue, handle, num_rows, num_cols, nnz, index, row_ptr, col_ind, val); \ + function_tables[libkey].set_coo_matrix_data_buffer##FP_SUFFIX##INT_SUFFIX( \ + queue, smhandle, num_rows, num_cols, nnz, index, row_ind, col_ind, val); \ } \ template <> \ - sycl::event set_csr_data(sycl::queue &queue, matrix_handle_t handle, INT_TYPE num_rows, \ - INT_TYPE num_cols, INT_TYPE nnz, index_base index, INT_TYPE *row_ptr, \ - INT_TYPE *col_ind, FP_TYPE *val, \ - const std::vector &dependencies) { \ + void set_coo_matrix_data(sycl::queue &queue, matrix_handle_t smhandle, std::int64_t num_rows, \ + std::int64_t num_cols, std::int64_t nnz, index_base index, \ + INT_TYPE *row_ind, INT_TYPE *col_ind, FP_TYPE *val) { \ auto libkey = get_device_id(queue); \ - return function_tables[libkey].set_csr_data_usm##FP_SUFFIX##INT_SUFFIX( \ - queue, handle, num_rows, num_cols, nnz, index, row_ptr, col_ind, val, dependencies); \ + function_tables[libkey].set_coo_matrix_data_usm##FP_SUFFIX##INT_SUFFIX( \ + queue, smhandle, num_rows, num_cols, nnz, index, row_ind, col_ind, val); \ } +FOR_EACH_FP_AND_INT_TYPE(DEFINE_COO_MATRIX_FUNCS); +#undef DEFINE_COO_MATRIX_FUNCS -FOR_EACH_FP_AND_INT_TYPE(DEFINE_SET_CSR_DATA) -#undef DEFINE_SET_CSR_DATA +// CSR matrix +#define DEFINE_INIT_CSR_MATRIX_FUNCS(FP_TYPE, FP_SUFFIX, INT_TYPE, INT_SUFFIX) \ + template <> \ + void init_csr_matrix(sycl::queue &queue, matrix_handle_t *p_smhandle, std::int64_t num_rows, \ + std::int64_t num_cols, std::int64_t nnz, index_base index, \ + sycl::buffer row_ptr, sycl::buffer col_ind, \ + sycl::buffer val) { \ + auto libkey = get_device_id(queue); \ + function_tables[libkey].init_csr_matrix_buffer##FP_SUFFIX##INT_SUFFIX( \ + queue, p_smhandle, num_rows, num_cols, nnz, index, row_ptr, col_ind, val); \ + } \ + template <> \ + void init_csr_matrix(sycl::queue &queue, matrix_handle_t *p_smhandle, std::int64_t num_rows, \ + std::int64_t num_cols, std::int64_t nnz, index_base index, \ + INT_TYPE *row_ptr, INT_TYPE *col_ind, FP_TYPE *val) { \ + auto libkey = get_device_id(queue); \ + function_tables[libkey].init_csr_matrix_usm##FP_SUFFIX##INT_SUFFIX( \ + queue, p_smhandle, num_rows, num_cols, nnz, index, row_ptr, col_ind, val); \ + } \ + template <> \ + void set_csr_matrix_data(sycl::queue &queue, matrix_handle_t smhandle, std::int64_t num_rows, \ + std::int64_t num_cols, std::int64_t nnz, index_base index, \ + sycl::buffer row_ptr, sycl::buffer col_ind, \ + sycl::buffer val) { \ + auto libkey = get_device_id(queue); \ + function_tables[libkey].set_csr_matrix_data_buffer##FP_SUFFIX##INT_SUFFIX( \ + queue, smhandle, num_rows, num_cols, nnz, index, row_ptr, col_ind, val); \ + } \ + template <> \ + void set_csr_matrix_data(sycl::queue &queue, matrix_handle_t smhandle, std::int64_t num_rows, \ + std::int64_t num_cols, std::int64_t nnz, index_base index, \ + INT_TYPE *row_ptr, INT_TYPE *col_ind, FP_TYPE *val) { \ + auto libkey = get_device_id(queue); \ + function_tables[libkey].set_csr_matrix_data_usm##FP_SUFFIX##INT_SUFFIX( \ + queue, smhandle, num_rows, num_cols, nnz, index, row_ptr, col_ind, val); \ + } +FOR_EACH_FP_AND_INT_TYPE(DEFINE_INIT_CSR_MATRIX_FUNCS); +#undef DEFINE_INIT_CSR_MATRIX_FUNCS -sycl::event optimize_gemm(sycl::queue &queue, transpose transpose_A, matrix_handle_t handle, - const std::vector &dependencies) { +// Common sparse matrix functions +sycl::event release_sparse_matrix(sycl::queue &queue, matrix_handle_t smhandle, + const std::vector &dependencies) { auto libkey = get_device_id(queue); - return function_tables[libkey].optimize_gemm_v1(queue, transpose_A, handle, dependencies); + return function_tables[libkey].release_sparse_matrix(queue, smhandle, dependencies); } -sycl::event optimize_gemm(sycl::queue &queue, transpose transpose_A, transpose transpose_B, - layout dense_matrix_layout, const std::int64_t columns, - matrix_handle_t handle, const std::vector &dependencies) { +bool set_matrix_property(sycl::queue &queue, matrix_handle_t smhandle, matrix_property property) { auto libkey = get_device_id(queue); - return function_tables[libkey].optimize_gemm_v2( - queue, transpose_A, transpose_B, dense_matrix_layout, columns, handle, dependencies); + return function_tables[libkey].set_matrix_property(queue, smhandle, property); } -sycl::event optimize_gemv(sycl::queue &queue, transpose transpose_val, matrix_handle_t handle, +// SPMM +void init_spmm_descr(sycl::queue &queue, spmm_descr_t *p_spmm_descr) { + auto libkey = get_device_id(queue); + function_tables[libkey].init_spmm_descr(queue, p_spmm_descr); +} + +sycl::event release_spmm_descr(sycl::queue &queue, spmm_descr_t spmm_descr, + const std::vector &dependencies) { + auto libkey = get_device_id(queue); + return function_tables[libkey].release_spmm_descr(queue, spmm_descr, dependencies); +} + +void spmm_buffer_size(sycl::queue &queue, oneapi::mkl::transpose opA, oneapi::mkl::transpose opB, + const void *alpha, matrix_view A_view, matrix_handle_t A_handle, + dense_matrix_handle_t B_handle, const void *beta, + dense_matrix_handle_t C_handle, spmm_alg alg, spmm_descr_t spmm_descr, + std::size_t &temp_buffer_size) { + auto libkey = get_device_id(queue); + function_tables[libkey].spmm_buffer_size(queue, opA, opB, alpha, A_view, A_handle, B_handle, + beta, C_handle, alg, spmm_descr, temp_buffer_size); +} + +void spmm_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, oneapi::mkl::transpose opB, + const void *alpha, matrix_view A_view, matrix_handle_t A_handle, + dense_matrix_handle_t B_handle, const void *beta, dense_matrix_handle_t C_handle, + spmm_alg alg, spmm_descr_t spmm_descr, sycl::buffer workspace) { + auto libkey = get_device_id(queue); + function_tables[libkey].spmm_optimize_buffer(queue, opA, opB, alpha, A_view, A_handle, B_handle, + beta, C_handle, alg, spmm_descr, workspace); +} + +sycl::event spmm_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, + oneapi::mkl::transpose opB, const void *alpha, matrix_view A_view, + matrix_handle_t A_handle, dense_matrix_handle_t B_handle, + const void *beta, dense_matrix_handle_t C_handle, spmm_alg alg, + spmm_descr_t spmm_descr, void *workspace, const std::vector &dependencies) { auto libkey = get_device_id(queue); - return function_tables[libkey].optimize_gemv(queue, transpose_val, handle, dependencies); + return function_tables[libkey].spmm_optimize_usm(queue, opA, opB, alpha, A_view, A_handle, + B_handle, beta, C_handle, alg, spmm_descr, + workspace, dependencies); } -sycl::event optimize_trsv(sycl::queue &queue, uplo uplo_val, transpose transpose_val, diag diag_val, - matrix_handle_t handle, const std::vector &dependencies) { +sycl::event spmm(sycl::queue &queue, oneapi::mkl::transpose opA, oneapi::mkl::transpose opB, + const void *alpha, matrix_view A_view, matrix_handle_t A_handle, + dense_matrix_handle_t B_handle, const void *beta, dense_matrix_handle_t C_handle, + spmm_alg alg, spmm_descr_t spmm_descr, + const std::vector &dependencies) { auto libkey = get_device_id(queue); - return function_tables[libkey].optimize_trsv(queue, uplo_val, transpose_val, diag_val, handle, - dependencies); + return function_tables[libkey].spmm(queue, opA, opB, alpha, A_view, A_handle, B_handle, beta, + C_handle, alg, spmm_descr, dependencies); } -#define DEFINE_GEMV(FP_TYPE, FP_SUFFIX) \ - template <> \ - void gemv(sycl::queue &queue, transpose transpose_val, const FP_TYPE alpha, \ - matrix_handle_t A_handle, sycl::buffer &x, const FP_TYPE beta, \ - sycl::buffer &y) { \ - auto libkey = get_device_id(queue); \ - function_tables[libkey].gemv_buffer##FP_SUFFIX(queue, transpose_val, alpha, A_handle, x, \ - beta, y); \ - } \ - template <> \ - sycl::event gemv(sycl::queue &queue, transpose transpose_val, const FP_TYPE alpha, \ - matrix_handle_t A_handle, const FP_TYPE *x, const FP_TYPE beta, FP_TYPE *y, \ - const std::vector &dependencies) { \ - auto libkey = get_device_id(queue); \ - return function_tables[libkey].gemv_usm##FP_SUFFIX(queue, transpose_val, alpha, A_handle, \ - x, beta, y, dependencies); \ - } +// SPMV +void init_spmv_descr(sycl::queue &queue, spmv_descr_t *p_spmv_descr) { + auto libkey = get_device_id(queue); + function_tables[libkey].init_spmv_descr(queue, p_spmv_descr); +} -FOR_EACH_FP_TYPE(DEFINE_GEMV) -#undef DEFINE_GEMV - -#define DEFINE_TRSV(FP_TYPE, FP_SUFFIX) \ - template <> \ - void trsv(sycl::queue &queue, uplo uplo_val, transpose transpose_val, diag diag_val, \ - matrix_handle_t A_handle, sycl::buffer &x, \ - sycl::buffer &y) { \ - auto libkey = get_device_id(queue); \ - function_tables[libkey].trsv_buffer##FP_SUFFIX(queue, uplo_val, transpose_val, diag_val, \ - A_handle, x, y); \ - } \ - template <> \ - sycl::event trsv(sycl::queue &queue, uplo uplo_val, transpose transpose_val, diag diag_val, \ - matrix_handle_t A_handle, const FP_TYPE *x, FP_TYPE *y, \ - const std::vector &dependencies) { \ - auto libkey = get_device_id(queue); \ - return function_tables[libkey].trsv_usm##FP_SUFFIX( \ - queue, uplo_val, transpose_val, diag_val, A_handle, x, y, dependencies); \ - } +sycl::event release_spmv_descr(sycl::queue &queue, spmv_descr_t spmv_descr, + const std::vector &dependencies) { + auto libkey = get_device_id(queue); + return function_tables[libkey].release_spmv_descr(queue, spmv_descr, dependencies); +} -FOR_EACH_FP_TYPE(DEFINE_TRSV) -#undef DEFINE_TRSV - -#define DEFINE_GEMM(FP_TYPE, FP_SUFFIX) \ - template <> \ - void gemm(sycl::queue &queue, layout dense_matrix_layout, transpose transpose_A, \ - transpose transpose_B, const FP_TYPE alpha, matrix_handle_t A_handle, \ - sycl::buffer &B, const std::int64_t columns, const std::int64_t ldb, \ - const FP_TYPE beta, sycl::buffer &C, const std::int64_t ldc) { \ - auto libkey = get_device_id(queue); \ - function_tables[libkey].gemm_buffer##FP_SUFFIX(queue, dense_matrix_layout, transpose_A, \ - transpose_B, alpha, A_handle, B, columns, \ - ldb, beta, C, ldc); \ - } \ - template <> \ - sycl::event gemm(sycl::queue &queue, layout dense_matrix_layout, transpose transpose_A, \ - transpose transpose_B, const FP_TYPE alpha, matrix_handle_t A_handle, \ - const FP_TYPE *B, const std::int64_t columns, const std::int64_t ldb, \ - const FP_TYPE beta, FP_TYPE *C, const std::int64_t ldc, \ - const std::vector &dependencies) { \ - auto libkey = get_device_id(queue); \ - return function_tables[libkey].gemm_usm##FP_SUFFIX( \ - queue, dense_matrix_layout, transpose_A, transpose_B, alpha, A_handle, B, columns, \ - ldb, beta, C, ldc, dependencies); \ - } +void spmv_buffer_size(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, + matrix_view A_view, matrix_handle_t A_handle, dense_vector_handle_t x_handle, + const void *beta, dense_vector_handle_t y_handle, spmv_alg alg, + spmv_descr_t spmv_descr, std::size_t &temp_buffer_size) { + auto libkey = get_device_id(queue); + function_tables[libkey].spmv_buffer_size(queue, opA, alpha, A_view, A_handle, x_handle, beta, + y_handle, alg, spmv_descr, temp_buffer_size); +} + +void spmv_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, + matrix_view A_view, matrix_handle_t A_handle, dense_vector_handle_t x_handle, + const void *beta, dense_vector_handle_t y_handle, spmv_alg alg, + spmv_descr_t spmv_descr, sycl::buffer workspace) { + auto libkey = get_device_id(queue); + function_tables[libkey].spmv_optimize_buffer(queue, opA, alpha, A_view, A_handle, x_handle, + beta, y_handle, alg, spmv_descr, workspace); +} -FOR_EACH_FP_TYPE(DEFINE_GEMM) -#undef DEFINE_GEMM +sycl::event spmv_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, + matrix_view A_view, matrix_handle_t A_handle, + dense_vector_handle_t x_handle, const void *beta, + dense_vector_handle_t y_handle, spmv_alg alg, spmv_descr_t spmv_descr, + void *workspace, const std::vector &dependencies) { + auto libkey = get_device_id(queue); + return function_tables[libkey].spmv_optimize_usm(queue, opA, alpha, A_view, A_handle, x_handle, + beta, y_handle, alg, spmv_descr, workspace, + dependencies); +} + +sycl::event spmv(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, + matrix_view A_view, matrix_handle_t A_handle, dense_vector_handle_t x_handle, + const void *beta, dense_vector_handle_t y_handle, spmv_alg alg, + spmv_descr_t spmv_descr, const std::vector &dependencies) { + auto libkey = get_device_id(queue); + return function_tables[libkey].spmv(queue, opA, alpha, A_view, A_handle, x_handle, beta, + y_handle, alg, spmv_descr, dependencies); +} + +// SPSV +void init_spsv_descr(sycl::queue &queue, spsv_descr_t *p_spsv_descr) { + auto libkey = get_device_id(queue); + function_tables[libkey].init_spsv_descr(queue, p_spsv_descr); +} + +sycl::event release_spsv_descr(sycl::queue &queue, spsv_descr_t spsv_descr, + const std::vector &dependencies) { + auto libkey = get_device_id(queue); + return function_tables[libkey].release_spsv_descr(queue, spsv_descr, dependencies); +} + +void spsv_buffer_size(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, + matrix_view A_view, matrix_handle_t A_handle, dense_vector_handle_t x_handle, + dense_vector_handle_t y_handle, spsv_alg alg, spsv_descr_t spsv_descr, + std::size_t &temp_buffer_size) { + auto libkey = get_device_id(queue); + function_tables[libkey].spsv_buffer_size(queue, opA, alpha, A_view, A_handle, x_handle, + y_handle, alg, spsv_descr, temp_buffer_size); +} + +void spsv_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, + matrix_view A_view, matrix_handle_t A_handle, dense_vector_handle_t x_handle, + dense_vector_handle_t y_handle, spsv_alg alg, spsv_descr_t spsv_descr, + sycl::buffer workspace) { + auto libkey = get_device_id(queue); + function_tables[libkey].spsv_optimize_buffer(queue, opA, alpha, A_view, A_handle, x_handle, + y_handle, alg, spsv_descr, workspace); +} + +sycl::event spsv_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, + matrix_view A_view, matrix_handle_t A_handle, + dense_vector_handle_t x_handle, dense_vector_handle_t y_handle, + spsv_alg alg, spsv_descr_t spsv_descr, void *workspace, + const std::vector &dependencies) { + auto libkey = get_device_id(queue); + return function_tables[libkey].spsv_optimize_usm(queue, opA, alpha, A_view, A_handle, x_handle, + y_handle, alg, spsv_descr, workspace, + dependencies); +} + +sycl::event spsv(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, + matrix_view A_view, matrix_handle_t A_handle, dense_vector_handle_t x_handle, + dense_vector_handle_t y_handle, spsv_alg alg, spsv_descr_t spsv_descr, + const std::vector &dependencies) { + auto libkey = get_device_id(queue); + return function_tables[libkey].spsv(queue, opA, alpha, A_view, A_handle, x_handle, y_handle, + alg, spsv_descr, dependencies); +} } // namespace oneapi::mkl::sparse diff --git a/tests/unit_tests/sparse_blas/CMakeLists.txt b/tests/unit_tests/sparse_blas/CMakeLists.txt index 2c46cd38c..65e12c981 100644 --- a/tests/unit_tests/sparse_blas/CMakeLists.txt +++ b/tests/unit_tests/sparse_blas/CMakeLists.txt @@ -1,5 +1,5 @@ #=============================================================================== -# Copyright 2023 Intel Corporation +# Copyright 2024 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/unit_tests/sparse_blas/include/common_sparse_reference.hpp b/tests/unit_tests/sparse_blas/include/common_sparse_reference.hpp new file mode 100644 index 000000000..7949342d3 --- /dev/null +++ b/tests/unit_tests/sparse_blas/include/common_sparse_reference.hpp @@ -0,0 +1,230 @@ +/******************************************************************************* +* Copyright 2024 Intel Corporation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions +* and limitations under the License. +* +* +* SPDX-License-Identifier: Apache-2.0 +*******************************************************************************/ + +#ifndef _COMMON_SPARSE_REFERENCE_HPP__ +#define _COMMON_SPARSE_REFERENCE_HPP__ + +#include +#include +#include + +#include "oneapi/mkl.hpp" + +#include "test_common.hpp" + +template +inline T conjugate(T) { + static_assert(false, "Unsupported type"); +} +template <> +inline float conjugate(float t) { + return t; +} +template <> +inline double conjugate(double t) { + return t; +} +template <> +inline std::complex conjugate(std::complex t) { + return std::conj(t); +} +template <> +inline std::complex conjugate(std::complex t) { + return std::conj(t); +} + +template +inline T opVal(const T t, const bool isConj) { + return (isConj ? conjugate(t) : t); +}; + +template +void do_csr_transpose(const oneapi::mkl::transpose opA, intType *ia_t, intType *ja_t, fpType *a_t, + intType a_nrows, intType a_ncols, intType indexing, accIntType &ia, + accIntType &ja, accFpType &a, const bool structOnlyFlag = false) { + const bool isConj = (opA == oneapi::mkl::transpose::conjtrans); + + // initialize ia_t to zero + for (intType i = 0; i < a_ncols + 1; ++i) { + ia_t[i] = 0; + } + + // fill ia_t with counts of columns + for (intType i = 0; i < a_nrows; ++i) { + const intType st = ia[i] - indexing; + const intType en = ia[i + 1] - indexing; + for (intType j = st; j < en; ++j) { + const intType col = ja[j] - indexing; + ia_t[col + 1]++; + } + } + // prefix sum to get official ia_t counts + ia_t[0] = indexing; + for (intType i = 0; i < a_ncols; ++i) { + ia_t[i + 1] += ia_t[i]; + } + + // second pass through data to fill transpose structure + for (intType i = 0; i < a_nrows; ++i) { + const intType st = ia[i] - indexing; + const intType en = ia[i + 1] - indexing; + for (intType j = st; j < en; ++j) { + const intType col = ja[j] - indexing; + const intType j_in_a_t = ia_t[col] - indexing; + ia_t[col]++; + ja_t[j_in_a_t] = i + indexing; + if (!structOnlyFlag) { + const fpType val = a[j]; + a_t[j_in_a_t] = opVal(val, isConj); + } + } + } + + // adjust ia_t back to original state after filling structure + for (intType i = a_ncols; i > 0; --i) { + ia_t[i] = ia_t[i - 1]; + } + ia_t[0] = indexing; +} + +// Transpose the given sparse matrix if needed +template +auto sparse_transpose_if_needed(const intType *ia, const intType *ja, const fpType *a, + intType a_nrows, intType a_ncols, std::size_t nnz, intType indexing, + oneapi::mkl::transpose transpose_val) { + std::vector iopa; + std::vector jopa; + std::vector opa; + if (transpose_val == oneapi::mkl::transpose::nontrans) { + iopa.assign(ia, ia + a_nrows + 1); + jopa.assign(ja, ja + nnz); + opa.assign(a, a + nnz); + } + else if (transpose_val == oneapi::mkl::transpose::trans || + transpose_val == oneapi::mkl::transpose::conjtrans) { + iopa.resize(static_cast(a_ncols + 1)); + jopa.resize(nnz); + opa.resize(nnz); + do_csr_transpose(transpose_val, iopa.data(), jopa.data(), opa.data(), a_nrows, a_ncols, + indexing, ia, ja, a); + } + else { + throw std::runtime_error("unsupported transpose_val=" + + std::to_string(static_cast(transpose_val))); + } + return std::make_tuple(iopa, jopa, opa); +} + +template +auto dense_transpose_if_needed(const fpType *x, std::size_t outer_size, std::size_t inner_size, + std::size_t ld, oneapi::mkl::transpose transpose_val) { + std::vector opx; + if (transpose_val == oneapi::mkl::transpose::nontrans) { + opx.assign(x, x + outer_size * ld); + } + else { + opx.resize(outer_size * ld); + for (std::size_t i = 0; i < outer_size; ++i) { + for (std::size_t j = 0; j < inner_size; ++j) { + opx[i + j * ld] = x[i * ld + j]; + } + } + } + return opx; +} + +/// Convert the sparse matrix in the given format to a dense matrix A in row major layout applied with A_view. +template +std::vector sparse_to_dense(sparse_matrix_format_t format, const intType *ia, + const intType *ja, const fpType *a, std::size_t a_nrows, + std::size_t a_ncols, std::size_t nnz, intType indexing, + oneapi::mkl::transpose transpose_val, + oneapi::mkl::sparse::matrix_view A_view) { + oneapi::mkl::sparse::matrix_descr type_view = A_view.type_view; + oneapi::mkl::uplo uplo_val = A_view.uplo_view; + const bool is_symmetric_or_hermitian_view = + type_view == oneapi::mkl::sparse::matrix_descr::symmetric || + type_view == oneapi::mkl::sparse::matrix_descr::hermitian; + // Matrices are not conjugated if they are symmetric + const bool apply_conjugate = + !is_symmetric_or_hermitian_view && transpose_val == oneapi::mkl::transpose::conjtrans; + std::vector dense_a(a_nrows * a_ncols, fpType(0)); + + auto write_to_dense_if_needed = [&](std::size_t a_idx, std::size_t row, std::size_t col) { + if ((type_view == oneapi::mkl::sparse::matrix_descr::triangular || + is_symmetric_or_hermitian_view) && + ((uplo_val == oneapi::mkl::uplo::lower && col > row) || + (uplo_val == oneapi::mkl::uplo::upper && col < row))) { + // Read only the upper or lower part of the sparse matrix + return; + } + if (type_view == oneapi::mkl::sparse::matrix_descr::diagonal && col != row) { + // Read only the diagonal + return; + } + // Do not transpose symmetric matrices to simplify the propagation of the symmetric values + std::size_t dense_a_idx = + (!is_symmetric_or_hermitian_view && transpose_val != oneapi::mkl::transpose::nontrans) + ? col * a_nrows + row + : row * a_ncols + col; + fpType val = opVal(a[a_idx], apply_conjugate); + dense_a[dense_a_idx] = val; + }; + + if (format == sparse_matrix_format_t::CSR) { + for (std::size_t row = 0; row < a_nrows; row++) { + for (intType i = ia[row] - indexing; i < ia[row + 1] - indexing; i++) { + std::size_t iu = static_cast(i); + std::size_t col = static_cast(ja[iu] - indexing); + write_to_dense_if_needed(iu, row, col); + } + } + } + else if (format == sparse_matrix_format_t::COO) { + for (std::size_t i = 0; i < nnz; i++) { + std::size_t row = static_cast(ia[i] - indexing); + std::size_t col = static_cast(ja[i] - indexing); + write_to_dense_if_needed(i, row, col); + } + } + + // Write unit diagonal + if (A_view.diag_view == oneapi::mkl::diag::unit && a_nrows == a_ncols) { + for (std::size_t i = 0; i < a_nrows; i++) { + dense_a[i * a_nrows + i] = fpType(1); + } + } + + // Propagate the rest of the symmetric matrix + if (is_symmetric_or_hermitian_view) { + for (std::size_t i = 0; i < a_nrows; ++i) { + for (std::size_t j = i + 1; j < a_ncols; ++j) { + if (uplo_val == oneapi::mkl::uplo::lower) { + dense_a[i * a_ncols + j] = dense_a[j * a_nrows + i]; + } + else { + dense_a[j * a_nrows + i] = dense_a[i * a_ncols + j]; + } + } + } + } + return dense_a; +} + +#endif // _COMMON_SPARSE_REFERENCE_HPP__ diff --git a/tests/unit_tests/sparse_blas/include/sparse_reference.hpp b/tests/unit_tests/sparse_blas/include/sparse_reference.hpp deleted file mode 100644 index ffb876f11..000000000 --- a/tests/unit_tests/sparse_blas/include/sparse_reference.hpp +++ /dev/null @@ -1,297 +0,0 @@ -/******************************************************************************* -* Copyright 2023 Intel Corporation -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, -* software distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions -* and limitations under the License. -* -* -* SPDX-License-Identifier: Apache-2.0 -*******************************************************************************/ - -#ifndef _SPARSE_REFERENCE_HPP__ -#define _SPARSE_REFERENCE_HPP__ - -#include -#include -#include - -#include "oneapi/mkl.hpp" - -#include "test_common.hpp" - -template -inline T conjugate(T) { - static_assert(false, "Unsupported type"); -} -template <> -inline float conjugate(float t) { - return t; -} -template <> -inline double conjugate(double t) { - return t; -} -template <> -inline std::complex conjugate(std::complex t) { - return std::conj(t); -} -template <> -inline std::complex conjugate(std::complex t) { - return std::conj(t); -} - -template -inline T opVal(const T t, const bool isConj) { - return (isConj ? conjugate(t) : t); -}; - -template -void do_csr_transpose(const oneapi::mkl::transpose opA, intType *ia_t, intType *ja_t, fpType *a_t, - intType a_nrows, intType a_ncols, intType a_ind, accIntType &ia, - accIntType &ja, accFpType &a, const bool structOnlyFlag = false) { - const bool isConj = (opA == oneapi::mkl::transpose::conjtrans); - - // initialize ia_t to zero - for (intType i = 0; i < a_ncols + 1; ++i) { - ia_t[i] = 0; - } - - // fill ia_t with counts of columns - for (intType i = 0; i < a_nrows; ++i) { - const intType st = ia[i] - a_ind; - const intType en = ia[i + 1] - a_ind; - for (intType j = st; j < en; ++j) { - const intType col = ja[j] - a_ind; - ia_t[col + 1]++; - } - } - // prefix sum to get official ia_t counts - ia_t[0] = a_ind; - for (intType i = 0; i < a_ncols; ++i) { - ia_t[i + 1] += ia_t[i]; - } - - // second pass through data to fill transpose structure - for (intType i = 0; i < a_nrows; ++i) { - const intType st = ia[i] - a_ind; - const intType en = ia[i + 1] - a_ind; - for (intType j = st; j < en; ++j) { - const intType col = ja[j] - a_ind; - const intType j_in_a_t = ia_t[col] - a_ind; - ia_t[col]++; - ja_t[j_in_a_t] = i + a_ind; - if (!structOnlyFlag) { - const fpType val = a[j]; - a_t[j_in_a_t] = opVal(val, isConj); - } - } - } - - // adjust ia_t back to original state after filling structure - for (intType i = a_ncols; i > 0; --i) { - ia_t[i] = ia_t[i - 1]; - } - ia_t[0] = a_ind; -} - -// Transpose the given sparse matrix if needed -template -auto sparse_transpose_if_needed(const intType *ia, const intType *ja, const fpType *a, - intType a_nrows, intType a_ncols, std::size_t nnz, intType a_ind, - oneapi::mkl::transpose transpose_val) { - std::vector iopa; - std::vector jopa; - std::vector opa; - if (transpose_val == oneapi::mkl::transpose::nontrans) { - iopa.assign(ia, ia + a_nrows + 1); - jopa.assign(ja, ja + nnz); - opa.assign(a, a + nnz); - } - else if (transpose_val == oneapi::mkl::transpose::trans || - transpose_val == oneapi::mkl::transpose::conjtrans) { - iopa.resize(static_cast(a_ncols + 1)); - jopa.resize(nnz); - opa.resize(nnz); - do_csr_transpose(transpose_val, iopa.data(), jopa.data(), opa.data(), a_nrows, a_ncols, - a_ind, ia, ja, a); - } - else { - throw std::runtime_error("unsupported transpose_val=" + - std::to_string(static_cast(transpose_val))); - } - return std::make_tuple(iopa, jopa, opa); -} - -template -auto dense_transpose_if_needed(const fpType *x, std::size_t outer_size, std::size_t inner_size, - std::size_t ld, oneapi::mkl::transpose transpose_val) { - std::vector opx; - if (transpose_val == oneapi::mkl::transpose::nontrans) { - opx.assign(x, x + outer_size * ld); - } - else { - opx.resize(outer_size * ld); - for (std::size_t i = 0; i < outer_size; ++i) { - for (std::size_t j = 0; j < inner_size; ++j) { - opx[i + j * ld] = x[i * ld + j]; - } - } - } - return opx; -} - -/// Return the dense matrix A in row major layout. -/// Diagonal values are overwritten with 1s if diag_val is unit. -template -std::vector sparse_to_dense(const intType *ia, const intType *ja, const fpType *a, - std::size_t a_nrows, std::size_t a_ncols, intType a_ind, - oneapi::mkl::transpose transpose_val, - oneapi::mkl::diag diag_val) { - std::vector dense_a(a_nrows * a_ncols, fpType(0)); - for (std::size_t row = 0; row < a_nrows; row++) { - for (intType i = ia[row] - a_ind; i < ia[row + 1] - a_ind; i++) { - std::size_t iu = static_cast(i); - std::size_t col = static_cast(ja[iu] - a_ind); - std::size_t dense_a_idx = transpose_val != oneapi::mkl::transpose::nontrans - ? col * a_nrows + row - : row * a_ncols + col; - fpType val = a[iu]; - if constexpr (complex_info::is_complex) { - if (transpose_val == oneapi::mkl::transpose::conjtrans) { - val = std::conj(val); - } - } - dense_a[dense_a_idx] = val; - } - } - if (diag_val == oneapi::mkl::diag::unit) { - for (std::size_t i = 0; i < a_nrows; ++i) { - dense_a[i * a_ncols + i] = set_fp_value()(1.f, 0.f); - } - } - return dense_a; -} - -template -void prepare_reference_gemv_data(const intType *ia, const intType *ja, const fpType *a, - intType a_nrows, intType a_ncols, intType a_nnz, intType a_ind, - oneapi::mkl::transpose opA, fpType alpha, fpType beta, - const fpType *x, fpType *y_ref) { - std::size_t opa_nrows = - static_cast((opA == oneapi::mkl::transpose::nontrans) ? a_nrows : a_ncols); - const std::size_t nnz = static_cast(a_nnz); - auto [iopa, jopa, opa] = - sparse_transpose_if_needed(ia, ja, a, a_nrows, a_ncols, nnz, a_ind, opA); - - // - // do GEMV operation - // - // y_ref <- alpha * op(A) * x + beta * y_ref - // - for (std::size_t row = 0; row < opa_nrows; row++) { - fpType tmp = 0; - for (intType i = iopa[row] - a_ind; i < iopa[row + 1] - a_ind; i++) { - std::size_t iu = static_cast(i); - std::size_t x_ind = static_cast(jopa[iu] - a_ind); - tmp += opa[iu] * x[x_ind]; - } - - y_ref[row] = alpha * tmp + beta * y_ref[row]; - } -} - -template -void prepare_reference_gemm_data(const intType *ia, const intType *ja, const fpType *a, - intType a_nrows, intType a_ncols, intType c_ncols, intType a_nnz, - intType a_ind, oneapi::mkl::layout dense_matrix_layout, - oneapi::mkl::transpose opA, oneapi::mkl::transpose opB, - fpType alpha, fpType beta, intType ldb, intType ldc, - const fpType *b, fpType *c_ref) { - std::size_t opa_nrows = - static_cast((opA == oneapi::mkl::transpose::nontrans) ? a_nrows : a_ncols); - std::size_t opa_ncols = - static_cast((opA == oneapi::mkl::transpose::nontrans) ? a_ncols : a_nrows); - const std::size_t nnz = static_cast(a_nnz); - const std::size_t ldb_u = static_cast(ldb); - const std::size_t ldc_u = static_cast(ldc); - auto [iopa, jopa, opa] = - sparse_transpose_if_needed(ia, ja, a, a_nrows, a_ncols, nnz, a_ind, opA); - - std::size_t b_outer_size = static_cast(opa_ncols); - std::size_t b_inner_size = static_cast(c_ncols); - if (dense_matrix_layout == oneapi::mkl::layout::col_major) { - std::swap(b_outer_size, b_inner_size); - } - auto opb = dense_transpose_if_needed(b, b_outer_size, b_inner_size, ldb_u, opB); - - // - // do GEMM operation - // - // C <- alpha * opA(A) * opB(B) + beta * C - // - if (dense_matrix_layout == oneapi::mkl::layout::row_major) { - for (std::size_t row = 0; row < opa_nrows; row++) { - for (std::size_t col = 0; col < static_cast(c_ncols); col++) { - fpType tmp = 0; - for (std::size_t i = static_cast(iopa[row] - a_ind); - i < static_cast(iopa[row + 1] - a_ind); i++) { - tmp += opa[i] * opb[static_cast(jopa[i] - a_ind) * ldb_u + col]; - } - fpType &c = c_ref[row * ldc_u + col]; - c = alpha * tmp + beta * c; - } - } - } - else { - for (std::size_t col = 0; col < static_cast(c_ncols); col++) { - for (std::size_t row = 0; row < opa_nrows; row++) { - fpType tmp = 0; - for (std::size_t i = static_cast(iopa[row] - a_ind); - i < static_cast(iopa[row + 1] - a_ind); i++) { - tmp += opa[i] * opb[static_cast(jopa[i] - a_ind) + col * ldb_u]; - } - fpType &c = c_ref[row + col * ldc_u]; - c = alpha * tmp + beta * c; - } - } - } -} - -template -void prepare_reference_trsv_data(const intType *ia, const intType *ja, const fpType *a, intType m, - intType a_ind, oneapi::mkl::uplo uplo_val, - oneapi::mkl::transpose opA, oneapi::mkl::diag diag_val, - const fpType *x, fpType *y_ref) { - std::size_t mu = static_cast(m); - auto dense_a = sparse_to_dense(ia, ja, a, mu, mu, a_ind, opA, diag_val); - - // - // do TRSV operation - // - // y_ref <- op(A)^-1 * x - // - // Compute each element of the reference one after the other starting from 0 (resp. the end) for a lower (resp. upper) triangular matrix. - // A matrix is considered lowered if it is lower and not transposed or upper and transposed. - const bool is_lower = - (uplo_val == oneapi::mkl::uplo::lower) == (opA == oneapi::mkl::transpose::nontrans); - for (std::size_t row = 0; row < mu; row++) { - std::size_t uplo_row = is_lower ? row : (mu - 1 - row); - fpType rhs = x[uplo_row]; - for (std::size_t col = 0; col < row; col++) { - std::size_t uplo_col = is_lower ? col : (mu - 1 - col); - rhs -= dense_a[uplo_row * mu + uplo_col] * y_ref[uplo_col]; - } - y_ref[uplo_row] = rhs / dense_a[uplo_row * mu + uplo_row]; - } -} - -#endif // _SPARSE_REFERENCE_HPP__ diff --git a/tests/unit_tests/sparse_blas/include/test_common.hpp b/tests/unit_tests/sparse_blas/include/test_common.hpp index fd1e91a47..b8d3fe4f2 100644 --- a/tests/unit_tests/sparse_blas/include/test_common.hpp +++ b/tests/unit_tests/sparse_blas/include/test_common.hpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2023 Intel Corporation +* Copyright 2024 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -25,6 +25,7 @@ #include #include #include +#include #if __has_include() #include @@ -53,6 +54,18 @@ struct complex_info> { static const bool is_complex = true; }; +enum sparse_matrix_format_t { + CSR, + COO, +}; + +static std::vector> test_matrix_properties{ + { oneapi::mkl::sparse::matrix_property::sorted }, + { oneapi::mkl::sparse::matrix_property::symmetric }, + { oneapi::mkl::sparse::matrix_property::sorted, + oneapi::mkl::sparse::matrix_property::symmetric } +}; + void print_error_code(sycl::exception const &e); // Catch asynchronous exceptions. @@ -70,17 +83,18 @@ struct exception_handler_t { } }; +struct UsmDeleter { + sycl::queue q; + UsmDeleter(sycl::queue _q) : q(_q) {} + void operator()(void *ptr) { + sycl::free(ptr, q); + } +}; + // Use a unique_ptr to automatically free device memory on unique_ptr destruction. template auto malloc_device_uptr(sycl::queue q, std::size_t num_elts) { - struct Deleter { - sycl::queue q; - Deleter(sycl::queue _q) : q(_q) {} - void operator()(T *ptr) { - sycl::free(ptr, q); - } - }; - return std::unique_ptr(sycl::malloc_device(num_elts, q), Deleter(q)); + return std::unique_ptr(sycl::malloc_device(num_elts, q), UsmDeleter(q)); } // SYCL buffer creation helper. @@ -90,6 +104,24 @@ sycl::buffer make_buffer(const vec &v) { return buf; } +template +void copy_host_to_buffer(sycl::queue queue, const std::vector &src, sycl::buffer dst) { + queue.submit([&](sycl::handler &cgh) { + auto dst_acc = dst.template get_access( + cgh, sycl::range<1>(src.size())); + cgh.copy(src.data(), dst_acc); + }); +} + +template +void fill_buffer_to_0(sycl::queue queue, sycl::buffer dst) { + queue.submit([&](sycl::handler &cgh) { + auto dst_acc = dst.template get_access( + cgh, sycl::range<1>(dst.size())); + cgh.fill(dst_acc, T(0)); + }); +} + template struct set_fp_value { inline fpType operator()(fpType real, fpType /*imag*/) { @@ -138,6 +170,9 @@ void rand_matrix(std::vector &m, oneapi::mkl::layout layout_val, std::si if (layout_val == oneapi::mkl::layout::col_major) { std::swap(outer_size, inner_size); } + if (inner_size > ld) { + throw std::runtime_error("Expected inner_size <= ld"); + } m.resize(outer_size * ld); rand_scalar rand; for (std::size_t i = 0; i < outer_size; ++i) { @@ -151,69 +186,256 @@ void rand_matrix(std::vector &m, oneapi::mkl::layout layout_val, std::si } } -// Creating the 3arrays CSR representation (ia, ja, values) -// of general random sparse matrix -// with density (0 < density <= 1.0) -// -0.5 <= value < 0.5 -// require_diagonal means all diagonal entries guaranteed to be nonzero +/// Generate random value in the range [-0.5, 0.5] +/// The amplitude is guaranteed to be >= 0.1 if is_diag is true +template +fpType generate_data(bool is_diag) { + rand_scalar rand_data; + if (is_diag) { + // Guarantee an amplitude >= 0.1 + fpType sign = (std::rand() % 2) * 2 - 1; + return rand_data(0.1, 0.5) * sign; + } + return rand_data(-0.5, 0.5); +} + +/// Populate the 3 arrays of a random sparse matrix in CSR representation (ia, ja, values) +/// with the given density in range [0, 1] and values in range [-0.5, 0.5]. +/// ja is sorted. +/// require_diagonal means all diagonal entries guaranteed to be nonzero. template -intType generate_random_matrix(const intType nrows, const intType ncols, const double density_val, - intType indexing, std::vector &ia, std::vector &ja, - std::vector &a, bool require_diagonal = false) { +intType generate_random_csr_matrix(const intType nrows, const intType ncols, + const double density_val, intType indexing, + std::vector &ia, std::vector &ja, + std::vector &a, bool is_symmetric, + bool require_diagonal = false) { intType nnz = 0; rand_scalar rand_density; - rand_scalar rand_data; ia.push_back(indexing); // starting index of row0. for (intType i = 0; i < nrows; i++) { - ia.push_back(nnz + indexing); // ending index of row_i. - for (intType j = 0; j < ncols; j++) { - const bool is_diag = require_diagonal && i == j; - if (is_diag || (rand_density(0.0, 1.0) <= density_val)) { - fpType val; - if (is_diag) { - // Guarantee an amplitude >= 0.1 - fpType sign = (std::rand() % 2) * 2 - 1; - val = rand_data(0.1, 0.5) * sign; + if (is_symmetric) { + // Fill the lower triangular part based on the previously filled upper triangle + // This ensures that the ja indices are always sorted + for (intType j = 0; j < i; ++j) { + // Check if element at row j and column i has been added, assuming ja is sorted + intType row_offset_j = ia[static_cast(j)]; + intType num_elts_row_j = ia.at(static_cast(j) + 1) - row_offset_j; + intType ja_idx = 0; + while (ja_idx < num_elts_row_j && + ja[static_cast(row_offset_j + ja_idx)] < i) { + ++ja_idx; } - else { - val = rand_data(-0.5, 0.5); + auto symmetric_idx = static_cast(row_offset_j + ja_idx); + if (ja_idx < num_elts_row_j && ja[symmetric_idx] == i) { + a.push_back(a[symmetric_idx]); + ja.push_back(j + indexing); + nnz++; } - a.push_back(val); + } + } + // Loop through the upper triangular to fill a symmetric matrix + const intType j_start = is_symmetric ? i : 0; + for (intType j = j_start; j < ncols; j++) { + const bool is_diag = require_diagonal && i == j; + if (is_diag || (rand_density(0.0, 1.0) <= density_val)) { + a.push_back(generate_data(is_diag)); ja.push_back(j + indexing); nnz++; } } - ia[static_cast(i) + 1] = nnz + indexing; + ia.push_back(nnz + indexing); // ending index of row_i } return nnz; } -// Shuffle the 3arrays CSR representation (ia, ja, values) -// of any sparse matrix and set values serially from 0..nnz. -// Intended for use with sorting. +/// Populate the 3 arrays of a random sparse matrix in COO representation (ia, ja, values) +/// with the given density in range [0, 1] and values in range [-0.5, 0.5]. +/// Indices are sorted by row (ia) then by column (ja). +/// require_diagonal means all diagonal entries guaranteed to be nonzero. +template +intType generate_random_coo_matrix(const intType nrows, const intType ncols, + const double density_val, intType indexing, + std::vector &ia, std::vector &ja, + std::vector &a, bool is_symmetric, + bool require_diagonal = false) { + rand_scalar rand_density; + + for (intType i = 0; i < nrows; i++) { + if (is_symmetric) { + // Fill the lower triangular part based on the previously filled upper triangle + // This ensures that the ja indices are always sorted + for (intType j = 0; j < i; ++j) { + // Check if element at row j and column i has been added, assuming ia and ja are sorted + std::size_t idx = 0; + while (idx < ia.size() && ia[idx] - indexing <= j && ja[idx] - indexing < i) { + ++idx; + } + if (idx < ia.size() && ia[idx] - indexing == j && ja[idx] - indexing == i) { + a.push_back(a[idx]); + ia.push_back(i + indexing); + ja.push_back(j + indexing); + } + } + } + // Loop through the upper triangular to fill a symmetric matrix + const intType j_start = is_symmetric ? i : 0; + for (intType j = j_start; j < ncols; j++) { + const bool is_diag = require_diagonal && i == j; + if (is_diag || (rand_density(0.0, 1.0) <= density_val)) { + a.push_back(generate_data(is_diag)); + ia.push_back(i + indexing); + ja.push_back(j + indexing); + } + } + } + return static_cast(a.size()); +} + +// Populate the 3 arrays of a random sparse matrix in CSR or COO representation +// with the given density in range [0, 1] and values in range [-0.5, 0.5]. +// require_diagonal means all diagonal entries guaranteed to be nonzero template -void shuffle_data(const intType *ia, intType *ja, fpType *a, const std::size_t nrows) { - // - // shuffle indices according to random seed - // - intType indexing = ia[0]; - for (std::size_t i = 0; i < nrows; ++i) { - intType nnz_row = ia[i + 1] - ia[i]; - for (intType j = ia[i] - indexing; j < ia[i + 1] - indexing; ++j) { - intType q = ia[i] - indexing + std::rand() % (nnz_row); - // swap element i and q - std::swap(ja[q], ja[j]); - std::swap(a[q], a[j]); +intType generate_random_matrix(sparse_matrix_format_t format, const intType nrows, + const intType ncols, const double density_val, intType indexing, + std::vector &ia, std::vector &ja, + std::vector &a, bool is_symmetric, + bool require_diagonal = false) { + ia.clear(); + ja.clear(); + a.clear(); + if (format == sparse_matrix_format_t::CSR) { + return generate_random_csr_matrix(nrows, ncols, density_val, indexing, ia, ja, a, + is_symmetric, require_diagonal); + } + else if (format == sparse_matrix_format_t::COO) { + return generate_random_coo_matrix(nrows, ncols, density_val, indexing, ia, ja, a, + is_symmetric, require_diagonal); + } + throw std::runtime_error("Unsupported sparse format"); +} + +/// Shuffle the 3arrays CSR or COO representation (ia, ja, values) +/// of any sparse matrix. +/// In CSR format, the elements within a row are shuffled without changing ia. +/// In COO format, all the elements are shuffled. +template +void shuffle_sparse_matrix(sparse_matrix_format_t format, intType indexing, intType *ia, + intType *ja, fpType *a, intType nnz, std::size_t nrows) { + if (format == sparse_matrix_format_t::CSR) { + for (std::size_t i = 0; i < nrows; ++i) { + intType nnz_row = ia[i + 1] - ia[i]; + for (intType j = ia[i] - indexing; j < ia[i + 1] - indexing; ++j) { + intType q = ia[i] - indexing + std::rand() % nnz_row; + // Swap elements j and q + std::swap(ja[q], ja[j]); + std::swap(a[q], a[j]); + } } } + else if (format == sparse_matrix_format_t::COO) { + for (std::size_t i = 0; i < static_cast(nnz); ++i) { + intType q = std::rand() % nnz; + // Swap elements i and q + std::swap(ia[q], ia[i]); + std::swap(ja[q], ja[i]); + std::swap(a[q], a[i]); + } + } + else { + throw oneapi::mkl::exception("sparse_blas", "shuffle_sparse_matrix", + "Internal error: unsupported format"); + } +} + +/// Initialize a sparse matrix specified by the given format +template +void init_sparse_matrix(sycl::queue &queue, sparse_matrix_format_t format, + oneapi::mkl::sparse::matrix_handle_t *p_smhandle, std::int64_t num_rows, + std::int64_t num_cols, std::int64_t nnz, oneapi::mkl::index_base index, + ContainerIndexT rows, ContainerIndexT cols, ContainerValueT vals) { + if (format == sparse_matrix_format_t::CSR) { + CALL_RT_OR_CT(oneapi::mkl::sparse::init_csr_matrix, queue, p_smhandle, num_rows, num_cols, + nnz, index, rows, cols, vals); + } + else if (format == sparse_matrix_format_t::COO) { + CALL_RT_OR_CT(oneapi::mkl::sparse::init_coo_matrix, queue, p_smhandle, num_rows, num_cols, + nnz, index, rows, cols, vals); + } + else { + throw oneapi::mkl::exception("sparse_blas", "init_sparse_matrix", + "Internal error: unsupported format"); + } +} + +/// Reset the data of a sparse matrix specified by the given format +template +void set_matrix_data(sycl::queue &queue, sparse_matrix_format_t format, + oneapi::mkl::sparse::matrix_handle_t smhandle, std::int64_t num_rows, + std::int64_t num_cols, std::int64_t nnz, oneapi::mkl::index_base index, + ContainerIndexT rows, ContainerIndexT cols, ContainerValueT vals) { + if (format == sparse_matrix_format_t::CSR) { + CALL_RT_OR_CT(oneapi::mkl::sparse::set_csr_matrix_data, queue, smhandle, num_rows, num_cols, + nnz, index, rows, cols, vals); + } + else if (format == sparse_matrix_format_t::COO) { + CALL_RT_OR_CT(oneapi::mkl::sparse::set_coo_matrix_data, queue, smhandle, num_rows, num_cols, + nnz, index, rows, cols, vals); + } + else { + throw oneapi::mkl::exception("sparse_blas", "set_matrix_data", + "Internal error: unsupported format"); + } +} + +template +inline void free_handles(sycl::queue &queue, const std::vector dependencies, + HandlesT &&...handles) { + // Fold expression so that handles expands to each value one after the other. + ( + [&] { + if (!handles) { + return; + } + sycl::event event; + if constexpr (std::is_same_v) { + CALL_RT_OR_CT(event = oneapi::mkl::sparse::release_dense_vector, queue, handles, + dependencies); + } + else if constexpr (std::is_same_v) { + CALL_RT_OR_CT(event = oneapi::mkl::sparse::release_dense_matrix, queue, handles, + dependencies); + } + else if constexpr (std::is_same_v) { + CALL_RT_OR_CT(event = oneapi::mkl::sparse::release_sparse_matrix, queue, handles, + dependencies); + } + event.wait(); + }(), + ...); +} + +template +inline void free_handles(sycl::queue &queue, HandlesT &&...handles) { + free_handles(queue, {}, handles...); +} + +template +inline void wait_and_free_handles(sycl::queue &queue, HandlesT &&...handles) { + queue.wait(); + free_handles(queue, handles...); } -inline void wait_and_free(sycl::queue &main_queue, oneapi::mkl::sparse::matrix_handle_t *p_handle) { - main_queue.wait(); - sycl::event ev_release; - CALL_RT_OR_CT(ev_release = oneapi::mkl::sparse::release_matrix_handle, main_queue, p_handle); - ev_release.wait(); +inline bool require_square_matrix( + oneapi::mkl::sparse::matrix_view A_view, + const std::set &matrix_properties) { + const bool is_symmetric = + matrix_properties.find(oneapi::mkl::sparse::matrix_property::symmetric) != + matrix_properties.cend(); + return A_view.type_view != oneapi::mkl::sparse::matrix_descr::general || is_symmetric; } template diff --git a/tests/unit_tests/sparse_blas/include/test_spmm.hpp b/tests/unit_tests/sparse_blas/include/test_spmm.hpp new file mode 100644 index 000000000..8216067a5 --- /dev/null +++ b/tests/unit_tests/sparse_blas/include/test_spmm.hpp @@ -0,0 +1,276 @@ +/******************************************************************************* +* Copyright 2024 Intel Corporation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions +* and limitations under the License. +* +* +* SPDX-License-Identifier: Apache-2.0 +*******************************************************************************/ + +#ifndef _TEST_SPMM_HPP__ +#define _TEST_SPMM_HPP__ + +#if __has_include() +#include +#else +#include +#endif + +#include "oneapi/mkl.hpp" +#include "oneapi/mkl/detail/config.hpp" + +#include "common_sparse_reference.hpp" +#include "test_common.hpp" +#include "test_helper.hpp" + +#include + +/** + * Helper function to run tests in different configuration. + * + * @tparam fpType Complex or scalar, single or double precision type + * @tparam testFunctorI32 Test functor for fpType and int32 + * @tparam testFunctorI64 Test functor for fpType and int64 + * @param dev Device to test + * @param format Sparse matrix format to use + * @param non_default_algorithms Algorithms compatible with the given format, other than default_alg + * @param transpose_A Transpose value for the A matrix + * @param transpose_B Transpose value for the B matrix + * @param num_passed Increase the number of configurations passed + * @param num_skipped Increase the number of configurations skipped + * + * The test functions will use different sizes and leading dimensions if the configuration implies a symmetric matrix. + */ +template +void test_helper_with_format_with_transpose( + testFunctorI32 test_functor_i32, testFunctorI64 test_functor_i64, sycl::device *dev, + sparse_matrix_format_t format, + const std::vector &non_default_algorithms, + oneapi::mkl::transpose transpose_A, oneapi::mkl::transpose transpose_B, int &num_passed, + int &num_skipped) { + double density_A_matrix = 0.8; + fpType fp_zero = set_fp_value()(0.f, 0.f); + fpType fp_one = set_fp_value()(1.f, 0.f); + oneapi::mkl::index_base index_zero = oneapi::mkl::index_base::zero; + oneapi::mkl::layout col_major = oneapi::mkl::layout::col_major; + int nrows_A = 4, ncols_A = 6, ncols_C = 5; + int ldb = transpose_A == oneapi::mkl::transpose::nontrans ? ncols_A : nrows_A; + int ldc = transpose_A == oneapi::mkl::transpose::nontrans ? nrows_A : ncols_A; + oneapi::mkl::sparse::spmm_alg default_alg = oneapi::mkl::sparse::spmm_alg::default_alg; + oneapi::mkl::sparse::matrix_view default_A_view; + std::set no_properties; + bool no_reset_data = false; + + // Basic test + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i32(dev, format, nrows_A, ncols_A, ncols_C, density_A_matrix, index_zero, + col_major, transpose_A, transpose_B, fp_one, fp_zero, ldb, ldc, + default_alg, default_A_view, no_properties, no_reset_data), + num_passed, num_skipped); + // Reset data + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i32(dev, format, nrows_A, ncols_A, ncols_C, density_A_matrix, index_zero, + col_major, transpose_A, transpose_B, fp_one, fp_zero, ldb, ldc, + default_alg, default_A_view, no_properties, true), + num_passed, num_skipped); + // Test index_base 1 + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i32(dev, format, nrows_A, ncols_A, ncols_C, density_A_matrix, + oneapi::mkl::index_base::one, col_major, transpose_A, transpose_B, fp_one, + fp_zero, ldb, ldc, default_alg, default_A_view, no_properties, + no_reset_data), + num_passed, num_skipped); + // Test non-default alpha + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i32(dev, format, nrows_A, ncols_A, ncols_C, density_A_matrix, index_zero, + col_major, transpose_A, transpose_B, set_fp_value()(2.f, 1.5f), + fp_zero, ldb, ldc, default_alg, default_A_view, no_properties, + no_reset_data), + num_passed, num_skipped); + // Test non-default beta + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i32(dev, format, nrows_A, ncols_A, ncols_C, density_A_matrix, index_zero, + col_major, transpose_A, transpose_B, fp_one, + set_fp_value()(3.2f, 1.f), ldb, ldc, default_alg, default_A_view, + no_properties, no_reset_data), + num_passed, num_skipped); + // Test 0 alpha + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i32(dev, format, nrows_A, ncols_A, ncols_C, density_A_matrix, index_zero, + col_major, transpose_A, transpose_B, fp_zero, fp_one, ldb, ldc, + default_alg, default_A_view, no_properties, no_reset_data), + num_passed, num_skipped); + // Test 0 alpha and beta + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i32(dev, format, nrows_A, ncols_A, ncols_C, density_A_matrix, index_zero, + col_major, transpose_A, transpose_B, fp_zero, fp_zero, ldb, ldc, + default_alg, default_A_view, no_properties, no_reset_data), + num_passed, num_skipped); + // Test non-default ldb + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i32(dev, format, nrows_A, ncols_A, ncols_C, density_A_matrix, index_zero, + col_major, transpose_A, transpose_B, fp_one, fp_zero, ldb + 5, ldc, + default_alg, default_A_view, no_properties, no_reset_data), + num_passed, num_skipped); + // Test non-default ldc + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i32(dev, format, nrows_A, ncols_A, ncols_C, density_A_matrix, index_zero, + col_major, transpose_A, transpose_B, fp_one, fp_zero, ldb, ldc + 6, + default_alg, default_A_view, no_properties, no_reset_data), + num_passed, num_skipped); + // Test row major layout + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i32(dev, format, nrows_A, ncols_A, ncols_C, density_A_matrix, index_zero, + oneapi::mkl::layout::row_major, transpose_A, transpose_B, fp_one, fp_zero, + ncols_C, ncols_C, default_alg, default_A_view, no_properties, + no_reset_data), + num_passed, num_skipped); + // Test int64 indices + long long_nrows_A = 27, long_ncols_A = 13, long_ncols_C = 6; + long long_ldb = transpose_A == oneapi::mkl::transpose::nontrans ? long_ncols_A : long_nrows_A; + long long_ldc = transpose_A == oneapi::mkl::transpose::nontrans ? long_nrows_A : long_ncols_A; + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i64(dev, format, long_nrows_A, long_ncols_A, long_ncols_C, density_A_matrix, + index_zero, col_major, transpose_A, transpose_B, fp_one, fp_zero, long_ldb, + long_ldc, default_alg, default_A_view, no_properties, no_reset_data), + num_passed, num_skipped); + // Test other algorithms + for (auto alg : non_default_algorithms) { + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i32(dev, format, nrows_A, ncols_A, ncols_C, density_A_matrix, index_zero, + col_major, transpose_A, transpose_B, fp_one, fp_zero, ldb, ldc, alg, + default_A_view, no_properties, no_reset_data), + num_passed, num_skipped); + } + // Test matrix properties + for (auto properties : test_matrix_properties) { + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i32(dev, format, nrows_A, ncols_A, ncols_C, density_A_matrix, index_zero, + col_major, transpose_A, transpose_B, fp_one, fp_zero, ldb, ldc, + default_alg, default_A_view, properties, no_reset_data), + num_passed, num_skipped); + } +} + +/** + * Helper function to test combination of transpose vals. + * Only test \p conjtrans if \p fpType is complex. + * + * @tparam fpType Complex or scalar, single or double precision type + * @tparam testFunctorI32 Test functor for fpType and int32 + * @tparam testFunctorI64 Test functor for fpType and int64 + * @param dev Device to test + * @param format Sparse matrix format to use + * @param non_default_algorithms Algorithms compatible with the given format, other than default_alg + * @param num_passed Increase the number of configurations passed + * @param num_skipped Increase the number of configurations skipped + */ +template +void test_helper_with_format( + testFunctorI32 test_functor_i32, testFunctorI64 test_functor_i64, sycl::device *dev, + sparse_matrix_format_t format, + const std::vector &non_default_algorithms, int &num_passed, + int &num_skipped) { + std::vector transpose_vals{ oneapi::mkl::transpose::nontrans, + oneapi::mkl::transpose::trans }; + if (complex_info::is_complex) { + transpose_vals.push_back(oneapi::mkl::transpose::conjtrans); + } + for (auto transpose_A : transpose_vals) { + for (auto transpose_B : transpose_vals) { + test_helper_with_format_with_transpose( + test_functor_i32, test_functor_i64, dev, format, non_default_algorithms, + transpose_A, transpose_B, num_passed, num_skipped); + } + } +} + +/** + * Helper function to test multiple sparse matrix format and choose valid algorithms. + * + * @tparam fpType Complex or scalar, single or double precision type + * @tparam testFunctorI32 Test functor for fpType and int32 + * @tparam testFunctorI64 Test functor for fpType and int64 + * @param dev Device to test + * @param num_passed Increase the number of configurations passed + * @param num_skipped Increase the number of configurations skipped + */ +template +void test_helper(testFunctorI32 test_functor_i32, testFunctorI64 test_functor_i64, + sycl::device *dev, int &num_passed, int &num_skipped) { + test_helper_with_format( + test_functor_i32, test_functor_i64, dev, sparse_matrix_format_t::CSR, + { oneapi::mkl::sparse::spmm_alg::no_optimize_alg, oneapi::mkl::sparse::spmm_alg::csr_alg1, + oneapi::mkl::sparse::spmm_alg::csr_alg2, oneapi::mkl::sparse::spmm_alg::csr_alg3 }, + num_passed, num_skipped); + test_helper_with_format( + test_functor_i32, test_functor_i64, dev, sparse_matrix_format_t::COO, + { oneapi::mkl::sparse::spmm_alg::no_optimize_alg, oneapi::mkl::sparse::spmm_alg::coo_alg1, + oneapi::mkl::sparse::spmm_alg::coo_alg2, oneapi::mkl::sparse::spmm_alg::coo_alg3, + oneapi::mkl::sparse::spmm_alg::coo_alg4 }, + num_passed, num_skipped); +} + +/// Compute spmm reference as a dense operation +template +void prepare_reference_spmm_data(sparse_matrix_format_t format, const intType *ia, + const intType *ja, const fpType *a, intType a_nrows, + intType a_ncols, intType c_ncols, intType a_nnz, intType indexing, + oneapi::mkl::layout dense_matrix_layout, + oneapi::mkl::transpose opA, oneapi::mkl::transpose opB, + fpType alpha, fpType beta, intType ldb, intType ldc, + const fpType *b, oneapi::mkl::sparse::matrix_view A_view, + fpType *c_ref) { + std::size_t a_nrows_u = static_cast(a_nrows); + std::size_t a_ncols_u = static_cast(a_ncols); + std::size_t c_ncols_u = static_cast(c_ncols); + std::size_t opa_nrows = (opA == oneapi::mkl::transpose::nontrans) ? a_nrows_u : a_ncols_u; + std::size_t opa_ncols = (opA == oneapi::mkl::transpose::nontrans) ? a_ncols_u : a_nrows_u; + const std::size_t nnz = static_cast(a_nnz); + const std::size_t ldb_u = static_cast(ldb); + const std::size_t ldc_u = static_cast(ldc); + // dense_opa is always row major + auto dense_opa = + sparse_to_dense(format, ia, ja, a, a_nrows_u, a_ncols_u, nnz, indexing, opA, A_view); + + std::size_t b_outer_size = static_cast(opa_ncols); + std::size_t b_inner_size = c_ncols_u; + if (dense_matrix_layout == oneapi::mkl::layout::col_major) { + std::swap(b_outer_size, b_inner_size); + } + auto dense_opb = dense_transpose_if_needed(b, b_outer_size, b_inner_size, ldb_u, opB); + + // Return the linear index to access a dense matrix from + auto dense_linear_idx = [=](std::size_t row, std::size_t col, std::size_t ld) { + return (dense_matrix_layout == oneapi::mkl::layout::row_major) ? row * ld + col + : col * ld + row; + }; + + // + // do SPMM operation + // + // C <- alpha * opA(A) * opB(B) + beta * C + // + for (std::size_t row = 0; row < opa_nrows; row++) { + for (std::size_t col = 0; col < c_ncols_u; col++) { + fpType acc = 0; + for (std::size_t i = 0; i < opa_ncols; i++) { + acc += dense_opa[row * opa_ncols + i] * dense_opb[dense_linear_idx(i, col, ldb_u)]; + } + fpType &c = c_ref[dense_linear_idx(row, col, ldc_u)]; + c = alpha * acc + beta * c; + } + } +} + +#endif // _TEST_SPMM_HPP__ diff --git a/tests/unit_tests/sparse_blas/include/test_spmv.hpp b/tests/unit_tests/sparse_blas/include/test_spmv.hpp new file mode 100644 index 000000000..eee9ec124 --- /dev/null +++ b/tests/unit_tests/sparse_blas/include/test_spmv.hpp @@ -0,0 +1,236 @@ +/******************************************************************************* +* Copyright 2024 Intel Corporation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions +* and limitations under the License. +* +* +* SPDX-License-Identifier: Apache-2.0 +*******************************************************************************/ + +#ifndef _TEST_SPMV_HPP__ +#define _TEST_SPMV_HPP__ + +#if __has_include() +#include +#else +#include +#endif + +#include "oneapi/mkl.hpp" +#include "oneapi/mkl/detail/config.hpp" + +#include "common_sparse_reference.hpp" +#include "test_common.hpp" +#include "test_helper.hpp" + +#include + +/** + * Helper function to run tests in different configuration. + * + * @tparam fpType Complex or scalar, single or double precision type + * @tparam testFunctorI32 Test functor for fpType and int32 + * @tparam testFunctorI64 Test functor for fpType and int64 + * @param dev Device to test + * @param format Sparse matrix format to use + * @param non_default_algorithms Algorithms compatible with the given format, other than default_alg + * @param transpose_val Transpose value for the input matrix + * @param num_passed Increase the number of configurations passed + * @param num_skipped Increase the number of configurations skipped + * + * The test functions will use different sizes if the configuration implies a symmetric matrix. + */ +template +void test_helper_with_format( + testFunctorI32 test_functor_i32, testFunctorI64 test_functor_i64, sycl::device *dev, + sparse_matrix_format_t format, + const std::vector &non_default_algorithms, + oneapi::mkl::transpose transpose_val, int &num_passed, int &num_skipped) { + double density_A_matrix = 0.8; + fpType fp_zero = set_fp_value()(0.f, 0.f); + fpType fp_one = set_fp_value()(1.f, 0.f); + int nrows_A = 4, ncols_A = 6; + oneapi::mkl::index_base index_zero = oneapi::mkl::index_base::zero; + oneapi::mkl::sparse::spmv_alg default_alg = oneapi::mkl::sparse::spmv_alg::default_alg; + oneapi::mkl::sparse::matrix_view default_A_view; + std::set no_properties; + bool no_reset_data = false; + + // Basic test + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i32(dev, format, nrows_A, ncols_A, density_A_matrix, index_zero, transpose_val, + fp_one, fp_zero, default_alg, default_A_view, no_properties, + no_reset_data), + num_passed, num_skipped); + // Reset data + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i32(dev, format, nrows_A, ncols_A, density_A_matrix, index_zero, transpose_val, + fp_one, fp_zero, default_alg, default_A_view, no_properties, true), + num_passed, num_skipped); + // Test index_base 1 + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i32(dev, format, nrows_A, ncols_A, density_A_matrix, + oneapi::mkl::index_base::one, transpose_val, fp_one, fp_zero, default_alg, + default_A_view, no_properties, no_reset_data), + num_passed, num_skipped); + // Test non-default alpha + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i32(dev, format, nrows_A, ncols_A, density_A_matrix, index_zero, transpose_val, + set_fp_value()(2.f, 1.5f), fp_zero, default_alg, default_A_view, + no_properties, no_reset_data), + num_passed, num_skipped); + // Test non-default beta + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i32(dev, format, nrows_A, ncols_A, density_A_matrix, index_zero, transpose_val, + fp_one, set_fp_value()(3.2f, 1.f), default_alg, default_A_view, + no_properties, no_reset_data), + num_passed, num_skipped); + // Test 0 alpha + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i32(dev, format, nrows_A, ncols_A, density_A_matrix, index_zero, transpose_val, + fp_zero, fp_one, default_alg, default_A_view, no_properties, + no_reset_data), + num_passed, num_skipped); + // Test 0 alpha and beta + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i32(dev, format, nrows_A, ncols_A, density_A_matrix, index_zero, transpose_val, + fp_zero, fp_zero, default_alg, default_A_view, no_properties, + no_reset_data), + num_passed, num_skipped); + // Test int64 indices + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i64(dev, format, 27L, 13L, density_A_matrix, index_zero, transpose_val, fp_one, + fp_one, default_alg, default_A_view, no_properties, no_reset_data), + num_passed, num_skipped); + // Lower triangular + oneapi::mkl::sparse::matrix_view triangular_A_view( + oneapi::mkl::sparse::matrix_descr::triangular); + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i32(dev, format, nrows_A, ncols_A, density_A_matrix, index_zero, transpose_val, + fp_one, fp_zero, default_alg, triangular_A_view, no_properties, + no_reset_data), + num_passed, num_skipped); + // Upper triangular + triangular_A_view.uplo_view = oneapi::mkl::uplo::upper; + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i32(dev, format, nrows_A, ncols_A, density_A_matrix, index_zero, transpose_val, + fp_one, fp_zero, default_alg, triangular_A_view, no_properties, + no_reset_data), + num_passed, num_skipped); + // Lower triangular unit diagonal + oneapi::mkl::sparse::matrix_view triangular_unit_A_view( + oneapi::mkl::sparse::matrix_descr::triangular); + triangular_unit_A_view.diag_view = oneapi::mkl::diag::unit; + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i32(dev, format, nrows_A, ncols_A, density_A_matrix, index_zero, transpose_val, + fp_one, fp_zero, default_alg, triangular_unit_A_view, no_properties, + no_reset_data), + num_passed, num_skipped); + // Upper triangular unit diagonal + triangular_A_view.uplo_view = oneapi::mkl::uplo::upper; + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i32(dev, format, nrows_A, ncols_A, density_A_matrix, index_zero, transpose_val, + fp_one, fp_zero, default_alg, triangular_unit_A_view, no_properties, + no_reset_data), + num_passed, num_skipped); + // Lower symmetric or hermitian + oneapi::mkl::sparse::matrix_view symmetric_view( + complex_info::is_complex ? oneapi::mkl::sparse::matrix_descr::hermitian + : oneapi::mkl::sparse::matrix_descr::symmetric); + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i32(dev, format, nrows_A, ncols_A, density_A_matrix, index_zero, transpose_val, + fp_one, fp_zero, default_alg, symmetric_view, no_properties, + no_reset_data), + num_passed, num_skipped); + // Upper symmetric or hermitian + symmetric_view.uplo_view = oneapi::mkl::uplo::upper; + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i32(dev, format, nrows_A, ncols_A, density_A_matrix, index_zero, transpose_val, + fp_one, fp_zero, default_alg, symmetric_view, no_properties, + no_reset_data), + num_passed, num_skipped); + // Test other algorithms + for (auto alg : non_default_algorithms) { + EXPECT_TRUE_OR_FUTURE_SKIP(test_functor_i32(dev, format, nrows_A, ncols_A, density_A_matrix, + index_zero, transpose_val, fp_one, fp_zero, alg, + default_A_view, no_properties, no_reset_data), + num_passed, num_skipped); + } + // Test matrix properties + for (auto properties : test_matrix_properties) { + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i32(dev, format, nrows_A, ncols_A, density_A_matrix, index_zero, + transpose_val, fp_one, fp_zero, default_alg, default_A_view, + properties, no_reset_data), + num_passed, num_skipped); + } +} + +/** + * Helper function to test multiple sparse matrix format and choose valid algorithms. + * + * @tparam fpType Complex or scalar, single or double precision type + * @tparam testFunctorI32 Test functor for fpType and int32 + * @tparam testFunctorI64 Test functor for fpType and int64 + * @param dev Device to test + * @param transpose_val Transpose value for the input matrix + * @param num_passed Increase the number of configurations passed + * @param num_skipped Increase the number of configurations skipped + */ +template +void test_helper(testFunctorI32 test_functor_i32, testFunctorI64 test_functor_i64, + sycl::device *dev, oneapi::mkl::transpose transpose_val, int &num_passed, + int &num_skipped) { + test_helper_with_format( + test_functor_i32, test_functor_i64, dev, sparse_matrix_format_t::CSR, + { oneapi::mkl::sparse::spmv_alg::no_optimize_alg, oneapi::mkl::sparse::spmv_alg::csr_alg1, + oneapi::mkl::sparse::spmv_alg::csr_alg2, oneapi::mkl::sparse::spmv_alg::csr_alg3 }, + transpose_val, num_passed, num_skipped); + test_helper_with_format( + test_functor_i32, test_functor_i64, dev, sparse_matrix_format_t::COO, + { oneapi::mkl::sparse::spmv_alg::no_optimize_alg, oneapi::mkl::sparse::spmv_alg::coo_alg1, + oneapi::mkl::sparse::spmv_alg::coo_alg2 }, + transpose_val, num_passed, num_skipped); +} + +/// Compute spmv reference as a dense operation +template +void prepare_reference_spmv_data(sparse_matrix_format_t format, const intType *ia, + const intType *ja, const fpType *a, intType a_nrows, + intType a_ncols, intType a_nnz, intType indexing, + oneapi::mkl::transpose opA, fpType alpha, fpType beta, + const fpType *x, oneapi::mkl::sparse::matrix_view A_view, + fpType *y_ref) { + std::size_t a_nrows_u = static_cast(a_nrows); + std::size_t a_ncols_u = static_cast(a_ncols); + std::size_t opa_nrows = (opA == oneapi::mkl::transpose::nontrans) ? a_nrows_u : a_ncols_u; + std::size_t opa_ncols = (opA == oneapi::mkl::transpose::nontrans) ? a_ncols_u : a_nrows_u; + const std::size_t nnz = static_cast(a_nnz); + auto dense_opa = + sparse_to_dense(format, ia, ja, a, a_nrows_u, a_ncols_u, nnz, indexing, opA, A_view); + + // + // do SPMV operation + // + // y_ref <- alpha * op(A) * x + beta * y_ref + // + for (std::size_t row = 0; row < opa_nrows; row++) { + fpType acc = 0; + for (std::size_t col = 0; col < opa_ncols; col++) { + acc += dense_opa[row * opa_ncols + col] * x[col]; + } + y_ref[row] = alpha * acc + beta * y_ref[row]; + } +} + +#endif // _TEST_SPMV_HPP__ diff --git a/tests/unit_tests/sparse_blas/include/test_spsv.hpp b/tests/unit_tests/sparse_blas/include/test_spsv.hpp new file mode 100644 index 000000000..3354dffaa --- /dev/null +++ b/tests/unit_tests/sparse_blas/include/test_spsv.hpp @@ -0,0 +1,191 @@ +/******************************************************************************* +* Copyright 2024 Intel Corporation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions +* and limitations under the License. +* +* +* SPDX-License-Identifier: Apache-2.0 +*******************************************************************************/ + +#ifndef _TEST_SPSV_HPP__ +#define _TEST_SPSV_HPP__ + +#if __has_include() +#include +#else +#include +#endif + +#include "oneapi/mkl.hpp" +#include "oneapi/mkl/detail/config.hpp" + +#include "common_sparse_reference.hpp" +#include "test_common.hpp" +#include "test_helper.hpp" + +#include + +/** + * Helper function to run tests in different configuration. + * + * @tparam fpType Complex or scalar, single or double precision type + * @tparam testFunctorI32 Test functor for fpType and int32 + * @tparam testFunctorI64 Test functor for fpType and int64 + * @param dev Device to test + * @param transpose_val Transpose value for the input matrix + * @param num_passed Increase the number of configurations passed + * @param num_skipped Increase the number of configurations skipped + */ +template +void test_helper_with_format(testFunctorI32 test_functor_i32, testFunctorI64 test_functor_i64, + sycl::device *dev, sparse_matrix_format_t format, + oneapi::mkl::transpose transpose_val, int &num_passed, + int &num_skipped) { + double density_A_matrix = 0.144; + fpType alpha = set_fp_value()(1.f, 0.f); + int m = 277; + oneapi::mkl::index_base index_zero = oneapi::mkl::index_base::zero; + oneapi::mkl::sparse::spsv_alg default_alg = oneapi::mkl::sparse::spsv_alg::default_alg; + oneapi::mkl::sparse::spsv_alg no_optimize_alg = oneapi::mkl::sparse::spsv_alg::no_optimize_alg; + oneapi::mkl::sparse::matrix_view default_A_view(oneapi::mkl::sparse::matrix_descr::triangular); + oneapi::mkl::sparse::matrix_view upper_A_view(oneapi::mkl::sparse::matrix_descr::triangular); + upper_A_view.uplo_view = oneapi::mkl::uplo::upper; + std::set no_properties; + bool no_reset_data = false; + + // Basic test + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i32(dev, format, m, density_A_matrix, index_zero, transpose_val, alpha, + default_alg, default_A_view, no_properties, no_reset_data), + num_passed, num_skipped); + // Reset data + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i32(dev, format, m, density_A_matrix, index_zero, transpose_val, alpha, + default_alg, default_A_view, no_properties, true), + num_passed, num_skipped); + // Test index_base 1 + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i32(dev, format, m, density_A_matrix, oneapi::mkl::index_base::one, + transpose_val, alpha, default_alg, default_A_view, no_properties, + no_reset_data), + num_passed, num_skipped); + // Test upper triangular matrix + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i32(dev, format, m, density_A_matrix, index_zero, transpose_val, alpha, + default_alg, upper_A_view, no_properties, no_reset_data), + num_passed, num_skipped); + // Test lower triangular unit diagonal matrix + oneapi::mkl::sparse::matrix_view triangular_unit_A_view( + oneapi::mkl::sparse::matrix_descr::triangular); + triangular_unit_A_view.diag_view = oneapi::mkl::diag::unit; + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i32(dev, format, m, density_A_matrix, index_zero, transpose_val, alpha, + default_alg, triangular_unit_A_view, no_properties, no_reset_data), + num_passed, num_skipped); + // Test upper triangular unit diagonal matrix + triangular_unit_A_view.uplo_view = oneapi::mkl::uplo::upper; + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i32(dev, format, m, density_A_matrix, index_zero, transpose_val, alpha, + default_alg, triangular_unit_A_view, no_properties, no_reset_data), + num_passed, num_skipped); + // Test non-default alpha + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i32(dev, format, m, density_A_matrix, index_zero, transpose_val, + set_fp_value()(2.f, 1.5f), default_alg, default_A_view, + no_properties, no_reset_data), + num_passed, num_skipped); + // Test int64 indices + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i64(dev, format, 15L, density_A_matrix, index_zero, transpose_val, alpha, + default_alg, default_A_view, no_properties, no_reset_data), + num_passed, num_skipped); + // Test lower no_optimize_alg + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i32(dev, format, m, density_A_matrix, index_zero, transpose_val, alpha, + no_optimize_alg, default_A_view, no_properties, no_reset_data), + num_passed, num_skipped); + // Test upper no_optimize_alg + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i32(dev, format, m, density_A_matrix, index_zero, transpose_val, alpha, + no_optimize_alg, upper_A_view, no_properties, no_reset_data), + num_passed, num_skipped); + // Test matrix properties + for (auto properties : test_matrix_properties) { + // Basic test with matrix properties + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i32(dev, format, m, density_A_matrix, index_zero, transpose_val, alpha, + default_alg, default_A_view, properties, no_reset_data), + num_passed, num_skipped); + // Test lower no_optimize_alg with matrix properties + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i32(dev, format, m, density_A_matrix, index_zero, transpose_val, alpha, + no_optimize_alg, default_A_view, properties, no_reset_data), + num_passed, num_skipped); + } +} + +/** + * Helper function to test multiple sparse matrix format. + * + * @tparam fpType Complex or scalar, single or double precision type + * @tparam testFunctorI32 Test functor for fpType and int32 + * @tparam testFunctorI64 Test functor for fpType and int64 + * @param dev Device to test + * @param transpose_val Transpose value for the input matrix + * @param num_passed Increase the number of configurations passed + * @param num_skipped Increase the number of configurations skipped + */ +template +void test_helper(testFunctorI32 test_functor_i32, testFunctorI64 test_functor_i64, + sycl::device *dev, oneapi::mkl::transpose transpose_val, int &num_passed, + int &num_skipped) { + test_helper_with_format(test_functor_i32, test_functor_i64, dev, + sparse_matrix_format_t::CSR, transpose_val, num_passed, + num_skipped); + test_helper_with_format(test_functor_i32, test_functor_i64, dev, + sparse_matrix_format_t::COO, transpose_val, num_passed, + num_skipped); +} + +/// Compute spsv reference as a dense operation +template +void prepare_reference_spsv_data(sparse_matrix_format_t format, const intType *ia, + const intType *ja, const fpType *a, intType m, intType nnz, + intType indexing, oneapi::mkl::transpose opA, const fpType *x, + fpType alpha, oneapi::mkl::sparse::matrix_view A_view, + fpType *y_ref) { + std::size_t mu = static_cast(m); + auto dense_opa = sparse_to_dense(format, ia, ja, a, mu, mu, static_cast(nnz), + indexing, opA, A_view); + + // + // do SPSV operation + // + // y_ref <- op(A)^-1 * x + // + // Compute each element of the reference one after the other starting from 0 (resp. the end) for a lower (resp. upper) triangular matrix. + // A matrix is considered lowered if it is lower and not transposed or upper and transposed. + const bool is_lower = + (A_view.uplo_view == oneapi::mkl::uplo::lower) == (opA == oneapi::mkl::transpose::nontrans); + for (std::size_t row = 0; row < mu; row++) { + std::size_t uplo_row = is_lower ? row : (mu - 1 - row); + fpType rhs = alpha * x[uplo_row]; + for (std::size_t col = 0; col < row; col++) { + std::size_t uplo_col = is_lower ? col : (mu - 1 - col); + rhs -= dense_opa[uplo_row * mu + uplo_col] * y_ref[uplo_col]; + } + y_ref[uplo_row] = rhs / dense_opa[uplo_row * mu + uplo_row]; + } +} + +#endif // _TEST_SPSV_HPP__ diff --git a/tests/unit_tests/sparse_blas/source/CMakeLists.txt b/tests/unit_tests/sparse_blas/source/CMakeLists.txt index 3a1fcb288..a9271bfa2 100644 --- a/tests/unit_tests/sparse_blas/source/CMakeLists.txt +++ b/tests/unit_tests/sparse_blas/source/CMakeLists.txt @@ -1,5 +1,5 @@ #=============================================================================== -# Copyright 2023 Intel Corporation +# Copyright 2024 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -18,12 +18,12 @@ #=============================================================================== set(SPBLAS_SOURCES - "sparse_gemm_buffer.cpp" - "sparse_gemm_usm.cpp" - "sparse_gemv_buffer.cpp" - "sparse_gemv_usm.cpp" - "sparse_trsv_buffer.cpp" - "sparse_trsv_usm.cpp" + "sparse_spmm_buffer.cpp" + "sparse_spmm_usm.cpp" + "sparse_spmv_buffer.cpp" + "sparse_spmv_usm.cpp" + "sparse_spsv_buffer.cpp" + "sparse_spsv_usm.cpp" ) include(WarningsUtils) diff --git a/tests/unit_tests/sparse_blas/source/sparse_gemm_buffer.cpp b/tests/unit_tests/sparse_blas/source/sparse_gemm_buffer.cpp deleted file mode 100644 index cc6fae6db..000000000 --- a/tests/unit_tests/sparse_blas/source/sparse_gemm_buffer.cpp +++ /dev/null @@ -1,302 +0,0 @@ -/******************************************************************************* -* Copyright 2023 Intel Corporation -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, -* software distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions -* and limitations under the License. -* -* -* SPDX-License-Identifier: Apache-2.0 -*******************************************************************************/ - -#include -#include -#include - -#if __has_include() -#include -#else -#include -#endif - -#include "oneapi/mkl.hpp" -#include "oneapi/mkl/detail/config.hpp" -#include "sparse_reference.hpp" -#include "test_common.hpp" -#include "test_helper.hpp" - -#include - -extern std::vector devices; - -namespace { - -template -int test(sycl::device *dev, intType nrows_A, intType ncols_A, intType ncols_C, - double density_A_matrix, oneapi::mkl::index_base index, - oneapi::mkl::layout dense_matrix_layout, oneapi::mkl::transpose transpose_A, - oneapi::mkl::transpose transpose_B, fpType alpha, fpType beta, intType ldb, intType ldc, - bool opt_1_input, bool opt_2_inputs) { - sycl::queue main_queue(*dev, exception_handler_t()); - - intType int_index = (index == oneapi::mkl::index_base::zero) ? 0 : 1; - std::size_t opa_nrows = static_cast( - transpose_A == oneapi::mkl::transpose::nontrans ? nrows_A : ncols_A); - std::size_t opa_ncols = static_cast( - transpose_A == oneapi::mkl::transpose::nontrans ? ncols_A : nrows_A); - - // Input matrix in CSR format - std::vector ia_host, ja_host; - std::vector a_host; - intType nnz = generate_random_matrix(nrows_A, ncols_A, density_A_matrix, - int_index, ia_host, ja_host, a_host); - - // Input and output dense vectors - std::vector b_host, c_host; - rand_matrix(b_host, dense_matrix_layout, opa_ncols, static_cast(ncols_C), - static_cast(ldb)); - rand_matrix(c_host, dense_matrix_layout, opa_nrows, static_cast(ncols_C), - static_cast(ldc)); - std::vector c_ref_host(c_host); - - // Shuffle ordering of column indices/values to test sortedness - shuffle_data(ia_host.data(), ja_host.data(), a_host.data(), static_cast(nrows_A)); - - auto ia_buf = make_buffer(ia_host); - auto ja_buf = make_buffer(ja_host); - auto a_buf = make_buffer(a_host); - auto b_buf = make_buffer(b_host); - auto c_buf = make_buffer(c_host); - - sycl::event ev_release; - oneapi::mkl::sparse::matrix_handle_t handle = nullptr; - try { - CALL_RT_OR_CT(oneapi::mkl::sparse::init_matrix_handle, main_queue, &handle); - - CALL_RT_OR_CT(oneapi::mkl::sparse::set_csr_data, main_queue, handle, nrows_A, ncols_A, nnz, - index, ia_buf, ja_buf, a_buf); - - if (opt_1_input) { - CALL_RT_OR_CT(oneapi::mkl::sparse::optimize_gemm, main_queue, transpose_A, handle); - } - - if (opt_2_inputs) { - CALL_RT_OR_CT(oneapi::mkl::sparse::optimize_gemm, main_queue, transpose_A, transpose_B, - dense_matrix_layout, static_cast(ncols_C), handle); - } - - CALL_RT_OR_CT(oneapi::mkl::sparse::gemm, main_queue, dense_matrix_layout, transpose_A, - transpose_B, alpha, handle, b_buf, ncols_C, ldb, beta, c_buf, ldc); - - CALL_RT_OR_CT(ev_release = oneapi::mkl::sparse::release_matrix_handle, main_queue, &handle); - } - catch (const sycl::exception &e) { - std::cout << "Caught synchronous SYCL exception during sparse GEMV:\n" - << e.what() << std::endl; - print_error_code(e); - return 0; - } - catch (const oneapi::mkl::unimplemented &e) { - wait_and_free(main_queue, &handle); - return test_skipped; - } - catch (const std::runtime_error &error) { - std::cout << "Error raised during execution of sparse GEMV:\n" << error.what() << std::endl; - return 0; - } - - // Compute reference. - prepare_reference_gemm_data(ia_host.data(), ja_host.data(), a_host.data(), nrows_A, ncols_A, - ncols_C, nnz, int_index, dense_matrix_layout, transpose_A, - transpose_B, alpha, beta, ldb, ldc, b_host.data(), - c_ref_host.data()); - - // Compare the results of reference implementation and DPC++ implementation. - auto c_acc = c_buf.template get_host_access(sycl::read_only); - bool valid = check_equal_vector(c_acc, c_ref_host); - - ev_release.wait_and_throw(); - return static_cast(valid); -} - -class SparseGemmBufferTests : public ::testing::TestWithParam {}; - -/** - * Helper function to run tests in different configuration. - * - * @tparam fpType Complex or scalar, single or double precision type - * @param dev Device to test - * @param transpose_A Transpose value for the A matrix - * @param transpose_B Transpose value for the B matrix - * @param num_passed Increase the number of configurations passed - * @param num_skipped Increase the number of configurations skipped - */ -template -void test_helper(sycl::device *dev, oneapi::mkl::transpose transpose_A, - oneapi::mkl::transpose transpose_B, int &num_passed, int &num_skipped) { - double density_A_matrix = 0.8; - fpType fp_zero = set_fp_value()(0.f, 0.f); - fpType fp_one = set_fp_value()(1.f, 0.f); - oneapi::mkl::index_base index_zero = oneapi::mkl::index_base::zero; - oneapi::mkl::layout col_major = oneapi::mkl::layout::col_major; - int nrows_A = 4, ncols_A = 6, ncols_C = 5; - int ldb = transpose_A == oneapi::mkl::transpose::nontrans ? ncols_A : nrows_A; - int ldc = transpose_A == oneapi::mkl::transpose::nontrans ? nrows_A : ncols_A; - bool no_opt_1_input = false; - bool opt_2_inputs = true; - - // Basic test - EXPECT_TRUE_OR_FUTURE_SKIP( - test(dev, nrows_A, ncols_A, ncols_C, density_A_matrix, index_zero, col_major, transpose_A, - transpose_B, fp_one, fp_zero, ldb, ldc, no_opt_1_input, opt_2_inputs), - num_passed, num_skipped); - // Test index_base 1 - EXPECT_TRUE_OR_FUTURE_SKIP( - test(dev, nrows_A, ncols_A, ncols_C, density_A_matrix, oneapi::mkl::index_base::one, - col_major, transpose_A, transpose_B, fp_one, fp_zero, ldb, ldc, no_opt_1_input, - opt_2_inputs), - num_passed, num_skipped); - // Test non-default alpha - EXPECT_TRUE_OR_FUTURE_SKIP( - test(dev, nrows_A, ncols_A, ncols_C, density_A_matrix, index_zero, col_major, transpose_A, - transpose_B, set_fp_value()(2.f, 1.5f), fp_zero, ldb, ldc, no_opt_1_input, - opt_2_inputs), - num_passed, num_skipped); - // Test non-default beta - EXPECT_TRUE_OR_FUTURE_SKIP( - test(dev, nrows_A, ncols_A, ncols_C, density_A_matrix, index_zero, col_major, transpose_A, - transpose_B, fp_one, set_fp_value()(3.2f, 1.f), ldb, ldc, no_opt_1_input, - opt_2_inputs), - num_passed, num_skipped); - // Test 0 alpha - EXPECT_TRUE_OR_FUTURE_SKIP( - test(dev, nrows_A, ncols_A, ncols_C, density_A_matrix, index_zero, col_major, transpose_A, - transpose_B, fp_zero, fp_one, ldb, ldc, no_opt_1_input, opt_2_inputs), - num_passed, num_skipped); - // Test 0 alpha and beta - EXPECT_TRUE_OR_FUTURE_SKIP( - test(dev, nrows_A, ncols_A, ncols_C, density_A_matrix, index_zero, col_major, transpose_A, - transpose_B, fp_zero, fp_zero, ldb, ldc, no_opt_1_input, opt_2_inputs), - num_passed, num_skipped); - // Test non-default ldb - EXPECT_TRUE_OR_FUTURE_SKIP( - test(dev, nrows_A, ncols_A, ncols_C, density_A_matrix, index_zero, col_major, transpose_A, - transpose_B, fp_one, fp_zero, ldb + 5, ldc, no_opt_1_input, opt_2_inputs), - num_passed, num_skipped); - // Test non-default ldc - EXPECT_TRUE_OR_FUTURE_SKIP( - test(dev, nrows_A, ncols_A, ncols_C, density_A_matrix, index_zero, col_major, transpose_A, - transpose_B, fp_one, fp_zero, ldb, ldc + 6, no_opt_1_input, opt_2_inputs), - num_passed, num_skipped); - // Test row major layout - EXPECT_TRUE_OR_FUTURE_SKIP( - test(dev, nrows_A, ncols_A, ncols_C, density_A_matrix, index_zero, - oneapi::mkl::layout::row_major, transpose_A, transpose_B, fp_one, fp_zero, ncols_C, - ncols_C, no_opt_1_input, opt_2_inputs), - num_passed, num_skipped); - // Test int64 indices - long long_nrows_A = 27, long_ncols_A = 13, long_ncols_C = 6; - long long_ldb = transpose_A == oneapi::mkl::transpose::nontrans ? long_ncols_A : long_nrows_A; - long long_ldc = transpose_A == oneapi::mkl::transpose::nontrans ? long_nrows_A : long_ncols_A; - EXPECT_TRUE_OR_FUTURE_SKIP(test(dev, long_nrows_A, long_ncols_A, long_ncols_C, density_A_matrix, - index_zero, col_major, transpose_A, transpose_B, fp_one, - fp_zero, long_ldb, long_ldc, no_opt_1_input, opt_2_inputs), - num_passed, num_skipped); - // Use optimize_gemm with only the sparse gemm input - EXPECT_TRUE_OR_FUTURE_SKIP( - test(dev, nrows_A, ncols_A, ncols_C, density_A_matrix, index_zero, col_major, transpose_A, - transpose_B, fp_one, fp_zero, ldb, ldc, true, false), - num_passed, num_skipped); - // Use the 2 optimize_gemm versions - EXPECT_TRUE_OR_FUTURE_SKIP( - test(dev, nrows_A, ncols_A, ncols_C, density_A_matrix, index_zero, col_major, transpose_A, - transpose_B, fp_one, fp_zero, ldb, ldc, true, true), - num_passed, num_skipped); - // Do not use optimize_gemm - EXPECT_TRUE_OR_FUTURE_SKIP( - test(dev, nrows_A, ncols_A, ncols_C, density_A_matrix, index_zero, col_major, transpose_A, - transpose_B, fp_one, fp_zero, ldb, ldc, false, false), - num_passed, num_skipped); -} - -/** - * Helper function to test combination of transpose vals. - * Only test \p conjtrans if \p fpType is complex. - * - * @tparam fpType Complex or scalar, single or double precision type - * @param dev Device to test - * @param num_passed Increase the number of configurations passed - * @param num_skipped Increase the number of configurations skipped - */ -template -void test_helper_transpose(sycl::device *dev, int &num_passed, int &num_skipped) { - std::vector transpose_vals{ oneapi::mkl::transpose::nontrans, - oneapi::mkl::transpose::trans }; - if (complex_info::is_complex) { - transpose_vals.push_back(oneapi::mkl::transpose::conjtrans); - } - for (auto transpose_A : transpose_vals) { - for (auto transpose_B : transpose_vals) { - test_helper(dev, transpose_A, transpose_B, num_passed, num_skipped); - } - } -} - -TEST_P(SparseGemmBufferTests, RealSinglePrecision) { - using fpType = float; - int num_passed = 0, num_skipped = 0; - test_helper_transpose(GetParam(), num_passed, num_skipped); - if (num_skipped > 0) { - // Mark that some tests were skipped - GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped - << " configurations." << std::endl; - } -} - -TEST_P(SparseGemmBufferTests, RealDoublePrecision) { - using fpType = double; - CHECK_DOUBLE_ON_DEVICE(GetParam()); - int num_passed = 0, num_skipped = 0; - test_helper_transpose(GetParam(), num_passed, num_skipped); - if (num_skipped > 0) { - // Mark that some tests were skipped - GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped - << " configurations." << std::endl; - } -} - -TEST_P(SparseGemmBufferTests, ComplexSinglePrecision) { - using fpType = std::complex; - int num_passed = 0, num_skipped = 0; - test_helper_transpose(GetParam(), num_passed, num_skipped); - if (num_skipped > 0) { - // Mark that some tests were skipped - GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped - << " configurations." << std::endl; - } -} - -TEST_P(SparseGemmBufferTests, ComplexDoublePrecision) { - using fpType = std::complex; - CHECK_DOUBLE_ON_DEVICE(GetParam()); - int num_passed = 0, num_skipped = 0; - test_helper_transpose(GetParam(), num_passed, num_skipped); - if (num_skipped > 0) { - // Mark that some tests were skipped - GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped - << " configurations." << std::endl; - } -} - -INSTANTIATE_TEST_SUITE_P(SparseGemmBufferTestSuite, SparseGemmBufferTests, - testing::ValuesIn(devices), ::DeviceNamePrint()); - -} // anonymous namespace diff --git a/tests/unit_tests/sparse_blas/source/sparse_gemm_usm.cpp b/tests/unit_tests/sparse_blas/source/sparse_gemm_usm.cpp deleted file mode 100644 index 3850f3b99..000000000 --- a/tests/unit_tests/sparse_blas/source/sparse_gemm_usm.cpp +++ /dev/null @@ -1,330 +0,0 @@ -/******************************************************************************* -* Copyright 2023 Intel Corporation -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, -* software distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions -* and limitations under the License. -* -* -* SPDX-License-Identifier: Apache-2.0 -*******************************************************************************/ - -#include -#include -#include - -#if __has_include() -#include -#else -#include -#endif - -#include "oneapi/mkl.hpp" -#include "oneapi/mkl/detail/config.hpp" -#include "sparse_reference.hpp" -#include "test_common.hpp" -#include "test_helper.hpp" - -#include - -extern std::vector devices; - -namespace { - -template -int test(sycl::device *dev, intType nrows_A, intType ncols_A, intType ncols_C, - double density_A_matrix, oneapi::mkl::index_base index, - oneapi::mkl::layout dense_matrix_layout, oneapi::mkl::transpose transpose_A, - oneapi::mkl::transpose transpose_B, fpType alpha, fpType beta, intType ldb, intType ldc, - bool opt_1_input, bool opt_2_inputs) { - sycl::queue main_queue(*dev, exception_handler_t()); - - intType int_index = (index == oneapi::mkl::index_base::zero) ? 0 : 1; - std::size_t opa_nrows = static_cast( - transpose_A == oneapi::mkl::transpose::nontrans ? nrows_A : ncols_A); - std::size_t opa_ncols = static_cast( - transpose_A == oneapi::mkl::transpose::nontrans ? ncols_A : nrows_A); - - // Input matrix in CSR format - std::vector ia_host, ja_host; - std::vector a_host; - intType nnz = generate_random_matrix(nrows_A, ncols_A, density_A_matrix, - int_index, ia_host, ja_host, a_host); - - // Input and output dense vectors - std::vector b_host, c_host; - rand_matrix(b_host, dense_matrix_layout, opa_ncols, static_cast(ncols_C), - static_cast(ldb)); - rand_matrix(c_host, dense_matrix_layout, opa_nrows, static_cast(ncols_C), - static_cast(ldc)); - std::vector c_ref_host(c_host); - - // Shuffle ordering of column indices/values to test sortedness - shuffle_data(ia_host.data(), ja_host.data(), a_host.data(), static_cast(nrows_A)); - - auto ia_usm_uptr = malloc_device_uptr(main_queue, ia_host.size()); - auto ja_usm_uptr = malloc_device_uptr(main_queue, ja_host.size()); - auto a_usm_uptr = malloc_device_uptr(main_queue, a_host.size()); - auto b_usm_uptr = malloc_device_uptr(main_queue, b_host.size()); - auto c_usm_uptr = malloc_device_uptr(main_queue, c_host.size()); - - intType *ia_usm = ia_usm_uptr.get(); - intType *ja_usm = ja_usm_uptr.get(); - fpType *a_usm = a_usm_uptr.get(); - fpType *b_usm = b_usm_uptr.get(); - fpType *c_usm = c_usm_uptr.get(); - - std::vector mat_dependencies; - std::vector gemm_dependencies; - // Copy host to device - mat_dependencies.push_back( - main_queue.memcpy(ia_usm, ia_host.data(), ia_host.size() * sizeof(intType))); - mat_dependencies.push_back( - main_queue.memcpy(ja_usm, ja_host.data(), ja_host.size() * sizeof(intType))); - mat_dependencies.push_back( - main_queue.memcpy(a_usm, a_host.data(), a_host.size() * sizeof(fpType))); - gemm_dependencies.push_back( - main_queue.memcpy(b_usm, b_host.data(), b_host.size() * sizeof(fpType))); - gemm_dependencies.push_back( - main_queue.memcpy(c_usm, c_host.data(), c_host.size() * sizeof(fpType))); - - sycl::event ev_copy, ev_release; - oneapi::mkl::sparse::matrix_handle_t handle = nullptr; - try { - sycl::event event; - CALL_RT_OR_CT(oneapi::mkl::sparse::init_matrix_handle, main_queue, &handle); - - CALL_RT_OR_CT(event = oneapi::mkl::sparse::set_csr_data, main_queue, handle, nrows_A, - ncols_A, nnz, index, ia_usm, ja_usm, a_usm, mat_dependencies); - - if (opt_1_input) { - CALL_RT_OR_CT(event = oneapi::mkl::sparse::optimize_gemm, main_queue, transpose_A, - handle, { event }); - } - - if (opt_2_inputs) { - CALL_RT_OR_CT(event = oneapi::mkl::sparse::optimize_gemm, main_queue, transpose_A, - transpose_B, dense_matrix_layout, static_cast(ncols_C), - handle, { event }); - } - - gemm_dependencies.push_back(event); - CALL_RT_OR_CT(event = oneapi::mkl::sparse::gemm, main_queue, dense_matrix_layout, - transpose_A, transpose_B, alpha, handle, b_usm, ncols_C, ldb, beta, c_usm, - ldc, gemm_dependencies); - - CALL_RT_OR_CT(ev_release = oneapi::mkl::sparse::release_matrix_handle, main_queue, &handle, - { event }); - - ev_copy = main_queue.memcpy(c_host.data(), c_usm, c_host.size() * sizeof(fpType), event); - } - catch (const sycl::exception &e) { - std::cout << "Caught synchronous SYCL exception during sparse GEMV:\n" - << e.what() << std::endl; - print_error_code(e); - return 0; - } - catch (const oneapi::mkl::unimplemented &e) { - wait_and_free(main_queue, &handle); - return test_skipped; - } - catch (const std::runtime_error &error) { - std::cout << "Error raised during execution of sparse GEMV:\n" << error.what() << std::endl; - return 0; - } - - // Compute reference. - prepare_reference_gemm_data(ia_host.data(), ja_host.data(), a_host.data(), nrows_A, ncols_A, - ncols_C, nnz, int_index, dense_matrix_layout, transpose_A, - transpose_B, alpha, beta, ldb, ldc, b_host.data(), - c_ref_host.data()); - - // Compare the results of reference implementation and DPC++ implementation. - ev_copy.wait_and_throw(); - bool valid = check_equal_vector(c_host, c_ref_host); - - ev_release.wait_and_throw(); - return static_cast(valid); -} - -class SparseGemmUsmTests : public ::testing::TestWithParam {}; - -/** - * Helper function to run tests in different configuration. - * - * @tparam fpType Complex or scalar, single or double precision type - * @param dev Device to test - * @param transpose_A Transpose value for the A matrix - * @param transpose_B Transpose value for the B matrix - * @param num_passed Increase the number of configurations passed - * @param num_skipped Increase the number of configurations skipped - */ -template -void test_helper(sycl::device *dev, oneapi::mkl::transpose transpose_A, - oneapi::mkl::transpose transpose_B, int &num_passed, int &num_skipped) { - double density_A_matrix = 0.8; - fpType fp_zero = set_fp_value()(0.f, 0.f); - fpType fp_one = set_fp_value()(1.f, 0.f); - oneapi::mkl::index_base index_zero = oneapi::mkl::index_base::zero; - oneapi::mkl::layout col_major = oneapi::mkl::layout::col_major; - int nrows_A = 4, ncols_A = 6, ncols_C = 5; - int ldb = transpose_A == oneapi::mkl::transpose::nontrans ? ncols_A : nrows_A; - int ldc = transpose_A == oneapi::mkl::transpose::nontrans ? nrows_A : ncols_A; - bool no_opt_1_input = false; - bool opt_2_inputs = true; - - // Basic test - EXPECT_TRUE_OR_FUTURE_SKIP( - test(dev, nrows_A, ncols_A, ncols_C, density_A_matrix, index_zero, col_major, transpose_A, - transpose_B, fp_one, fp_zero, ldb, ldc, no_opt_1_input, opt_2_inputs), - num_passed, num_skipped); - // Test index_base 1 - EXPECT_TRUE_OR_FUTURE_SKIP( - test(dev, nrows_A, ncols_A, ncols_C, density_A_matrix, oneapi::mkl::index_base::one, - col_major, transpose_A, transpose_B, fp_one, fp_zero, ldb, ldc, no_opt_1_input, - opt_2_inputs), - num_passed, num_skipped); - // Test non-default alpha - EXPECT_TRUE_OR_FUTURE_SKIP( - test(dev, nrows_A, ncols_A, ncols_C, density_A_matrix, index_zero, col_major, transpose_A, - transpose_B, set_fp_value()(2.f, 1.5f), fp_zero, ldb, ldc, no_opt_1_input, - opt_2_inputs), - num_passed, num_skipped); - // Test non-default beta - EXPECT_TRUE_OR_FUTURE_SKIP( - test(dev, nrows_A, ncols_A, ncols_C, density_A_matrix, index_zero, col_major, transpose_A, - transpose_B, fp_one, set_fp_value()(3.2f, 1.f), ldb, ldc, no_opt_1_input, - opt_2_inputs), - num_passed, num_skipped); - // Test 0 alpha - EXPECT_TRUE_OR_FUTURE_SKIP( - test(dev, nrows_A, ncols_A, ncols_C, density_A_matrix, index_zero, col_major, transpose_A, - transpose_B, fp_zero, fp_one, ldb, ldc, no_opt_1_input, opt_2_inputs), - num_passed, num_skipped); - // Test 0 alpha and beta - EXPECT_TRUE_OR_FUTURE_SKIP( - test(dev, nrows_A, ncols_A, ncols_C, density_A_matrix, index_zero, col_major, transpose_A, - transpose_B, fp_zero, fp_zero, ldb, ldc, no_opt_1_input, opt_2_inputs), - num_passed, num_skipped); - // Test non-default ldb - EXPECT_TRUE_OR_FUTURE_SKIP( - test(dev, nrows_A, ncols_A, ncols_C, density_A_matrix, index_zero, col_major, transpose_A, - transpose_B, fp_one, fp_zero, ldb + 5, ldc, no_opt_1_input, opt_2_inputs), - num_passed, num_skipped); - // Test non-default ldc - EXPECT_TRUE_OR_FUTURE_SKIP( - test(dev, nrows_A, ncols_A, ncols_C, density_A_matrix, index_zero, col_major, transpose_A, - transpose_B, fp_one, fp_zero, ldb, ldc + 6, no_opt_1_input, opt_2_inputs), - num_passed, num_skipped); - // Test row major layout - EXPECT_TRUE_OR_FUTURE_SKIP( - test(dev, nrows_A, ncols_A, ncols_C, density_A_matrix, index_zero, - oneapi::mkl::layout::row_major, transpose_A, transpose_B, fp_one, fp_zero, ncols_C, - ncols_C, no_opt_1_input, opt_2_inputs), - num_passed, num_skipped); - // Test int64 indices - long long_nrows_A = 27, long_ncols_A = 13, long_ncols_C = 6; - long long_ldb = transpose_A == oneapi::mkl::transpose::nontrans ? long_ncols_A : long_nrows_A; - long long_ldc = transpose_A == oneapi::mkl::transpose::nontrans ? long_nrows_A : long_ncols_A; - EXPECT_TRUE_OR_FUTURE_SKIP(test(dev, long_nrows_A, long_ncols_A, long_ncols_C, density_A_matrix, - index_zero, col_major, transpose_A, transpose_B, fp_one, - fp_zero, long_ldb, long_ldc, no_opt_1_input, opt_2_inputs), - num_passed, num_skipped); - // Use optimize_gemm with only the sparse gemm input - EXPECT_TRUE_OR_FUTURE_SKIP( - test(dev, nrows_A, ncols_A, ncols_C, density_A_matrix, index_zero, col_major, transpose_A, - transpose_B, fp_one, fp_zero, ldb, ldc, true, false), - num_passed, num_skipped); - // Use the 2 optimize_gemm versions - EXPECT_TRUE_OR_FUTURE_SKIP( - test(dev, nrows_A, ncols_A, ncols_C, density_A_matrix, index_zero, col_major, transpose_A, - transpose_B, fp_one, fp_zero, ldb, ldc, true, true), - num_passed, num_skipped); - // Do not use optimize_gemm - EXPECT_TRUE_OR_FUTURE_SKIP( - test(dev, nrows_A, ncols_A, ncols_C, density_A_matrix, index_zero, col_major, transpose_A, - transpose_B, fp_one, fp_zero, ldb, ldc, false, false), - num_passed, num_skipped); -} - -/** - * Helper function to test combination of transpose vals. - * Only test \p conjtrans if \p fpType is complex. - * - * @tparam fpType Complex or scalar, single or double precision type - * @param dev Device to test - * @param num_passed Increase the number of configurations passed - * @param num_skipped Increase the number of configurations skipped - */ -template -auto test_helper_transpose(sycl::device *dev, int &num_passed, int &num_skipped) { - std::vector transpose_vals{ oneapi::mkl::transpose::nontrans, - oneapi::mkl::transpose::trans }; - if (complex_info::is_complex) { - transpose_vals.push_back(oneapi::mkl::transpose::conjtrans); - } - for (auto transpose_A : transpose_vals) { - for (auto transpose_B : transpose_vals) { - test_helper(dev, transpose_A, transpose_B, num_passed, num_skipped); - } - } -} - -TEST_P(SparseGemmUsmTests, RealSinglePrecision) { - using fpType = float; - int num_passed = 0, num_skipped = 0; - test_helper_transpose(GetParam(), num_passed, num_skipped); - if (num_skipped > 0) { - // Mark that some tests were skipped - GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped - << " configurations." << std::endl; - } -} - -TEST_P(SparseGemmUsmTests, RealDoublePrecision) { - using fpType = double; - CHECK_DOUBLE_ON_DEVICE(GetParam()); - int num_passed = 0, num_skipped = 0; - test_helper_transpose(GetParam(), num_passed, num_skipped); - if (num_skipped > 0) { - // Mark that some tests were skipped - GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped - << " configurations." << std::endl; - } -} - -TEST_P(SparseGemmUsmTests, ComplexSinglePrecision) { - using fpType = std::complex; - int num_passed = 0, num_skipped = 0; - test_helper_transpose(GetParam(), num_passed, num_skipped); - if (num_skipped > 0) { - // Mark that some tests were skipped - GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped - << " configurations." << std::endl; - } -} - -TEST_P(SparseGemmUsmTests, ComplexDoublePrecision) { - using fpType = std::complex; - CHECK_DOUBLE_ON_DEVICE(GetParam()); - int num_passed = 0, num_skipped = 0; - test_helper_transpose(GetParam(), num_passed, num_skipped); - if (num_skipped > 0) { - // Mark that some tests were skipped - GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped - << " configurations." << std::endl; - } -} - -INSTANTIATE_TEST_SUITE_P(SparseGemmUsmTestSuite, SparseGemmUsmTests, testing::ValuesIn(devices), - ::DeviceNamePrint()); - -} // anonymous namespace diff --git a/tests/unit_tests/sparse_blas/source/sparse_gemv_buffer.cpp b/tests/unit_tests/sparse_blas/source/sparse_gemv_buffer.cpp deleted file mode 100644 index b6506ed5f..000000000 --- a/tests/unit_tests/sparse_blas/source/sparse_gemv_buffer.cpp +++ /dev/null @@ -1,230 +0,0 @@ -/******************************************************************************* -* Copyright 2023 Intel Corporation -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, -* software distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions -* and limitations under the License. -* -* -* SPDX-License-Identifier: Apache-2.0 -*******************************************************************************/ - -#include -#include -#include - -#if __has_include() -#include -#else -#include -#endif - -#include "oneapi/mkl.hpp" -#include "oneapi/mkl/detail/config.hpp" -#include "sparse_reference.hpp" -#include "test_common.hpp" -#include "test_helper.hpp" - -#include - -extern std::vector devices; - -namespace { - -template -int test(sycl::device *dev, intType nrows, intType ncols, double density_A_matrix, - oneapi::mkl::index_base index, oneapi::mkl::transpose transpose_val, fpType alpha, - fpType beta, bool use_optimize) { - sycl::queue main_queue(*dev, exception_handler_t()); - - intType int_index = (index == oneapi::mkl::index_base::zero) ? 0 : 1; - std::size_t opa_nrows = - static_cast(transpose_val == oneapi::mkl::transpose::nontrans ? nrows : ncols); - std::size_t opa_ncols = - static_cast(transpose_val == oneapi::mkl::transpose::nontrans ? ncols : nrows); - - // Input matrix in CSR format - std::vector ia_host, ja_host; - std::vector a_host; - intType nnz = generate_random_matrix(nrows, ncols, density_A_matrix, int_index, - ia_host, ja_host, a_host); - - // Input and output dense vectors - // The input `x` and the input-output `y` are both initialized to random values on host and device. - std::vector x_host, y_host; - rand_vector(x_host, opa_ncols); - rand_vector(y_host, opa_nrows); - std::vector y_ref_host(y_host); - - // Shuffle ordering of column indices/values to test sortedness - shuffle_data(ia_host.data(), ja_host.data(), a_host.data(), static_cast(nrows)); - - auto ia_buf = make_buffer(ia_host); - auto ja_buf = make_buffer(ja_host); - auto a_buf = make_buffer(a_host); - auto x_buf = make_buffer(x_host); - auto y_buf = make_buffer(y_host); - - oneapi::mkl::sparse::matrix_handle_t handle = nullptr; - sycl::event ev_release; - try { - CALL_RT_OR_CT(oneapi::mkl::sparse::init_matrix_handle, main_queue, &handle); - - CALL_RT_OR_CT(oneapi::mkl::sparse::set_csr_data, main_queue, handle, nrows, ncols, nnz, - index, ia_buf, ja_buf, a_buf); - - if (use_optimize) { - CALL_RT_OR_CT(oneapi::mkl::sparse::optimize_gemv, main_queue, transpose_val, handle); - } - - CALL_RT_OR_CT(oneapi::mkl::sparse::gemv, main_queue, transpose_val, alpha, handle, x_buf, - beta, y_buf); - - CALL_RT_OR_CT(ev_release = oneapi::mkl::sparse::release_matrix_handle, main_queue, &handle); - } - catch (const sycl::exception &e) { - std::cout << "Caught synchronous SYCL exception during sparse GEMV:\n" - << e.what() << std::endl; - print_error_code(e); - return 0; - } - catch (const oneapi::mkl::unimplemented &e) { - wait_and_free(main_queue, &handle); - return test_skipped; - } - catch (const std::runtime_error &error) { - std::cout << "Error raised during execution of sparse GEMV:\n" << error.what() << std::endl; - return 0; - } - - // Compute reference. - prepare_reference_gemv_data(ia_host.data(), ja_host.data(), a_host.data(), nrows, ncols, nnz, - int_index, transpose_val, alpha, beta, x_host.data(), - y_ref_host.data()); - - // Compare the results of reference implementation and DPC++ implementation. - auto y_acc = y_buf.template get_host_access(sycl::read_only); - bool valid = check_equal_vector(y_acc, y_ref_host); - - ev_release.wait_and_throw(); - return static_cast(valid); -} - -class SparseGemvBufferTests : public ::testing::TestWithParam {}; - -/** - * Helper function to run tests in different configuration. - * - * @tparam fpType Complex or scalar, single or double precision type - * @param dev Device to test - * @param transpose_val Transpose value for the input matrix - * @param num_passed Increase the number of configurations passed - * @param num_skipped Increase the number of configurations skipped - */ -template -void test_helper(sycl::device *dev, oneapi::mkl::transpose transpose_val, int &num_passed, - int &num_skipped) { - double density_A_matrix = 0.8; - fpType fp_zero = set_fp_value()(0.f, 0.f); - fpType fp_one = set_fp_value()(1.f, 0.f); - oneapi::mkl::index_base index_zero = oneapi::mkl::index_base::zero; - bool use_optimize = true; - - // Basic test - EXPECT_TRUE_OR_FUTURE_SKIP( - test(dev, 4, 6, density_A_matrix, index_zero, transpose_val, fp_one, fp_zero, use_optimize), - num_passed, num_skipped); - // Test index_base 1 - EXPECT_TRUE_OR_FUTURE_SKIP(test(dev, 4, 6, density_A_matrix, oneapi::mkl::index_base::one, - transpose_val, fp_one, fp_zero, use_optimize), - num_passed, num_skipped); - // Test non-default alpha - EXPECT_TRUE_OR_FUTURE_SKIP(test(dev, 4, 6, density_A_matrix, index_zero, transpose_val, - set_fp_value()(2.f, 1.5f), fp_zero, use_optimize), - num_passed, num_skipped); - // Test non-default beta - EXPECT_TRUE_OR_FUTURE_SKIP(test(dev, 4, 6, density_A_matrix, index_zero, transpose_val, fp_one, - set_fp_value()(3.2f, 1.f), use_optimize), - num_passed, num_skipped); - // Test 0 alpha - EXPECT_TRUE_OR_FUTURE_SKIP( - test(dev, 4, 6, density_A_matrix, index_zero, transpose_val, fp_zero, fp_one, use_optimize), - num_passed, num_skipped); - // Test 0 alpha and beta - EXPECT_TRUE_OR_FUTURE_SKIP(test(dev, 4, 6, density_A_matrix, index_zero, transpose_val, fp_zero, - fp_zero, use_optimize), - num_passed, num_skipped); - // Test int64 indices - EXPECT_TRUE_OR_FUTURE_SKIP(test(dev, 27L, 13L, density_A_matrix, index_zero, transpose_val, - fp_one, fp_one, use_optimize), - num_passed, num_skipped); - // Test without optimize_gemv - EXPECT_TRUE_OR_FUTURE_SKIP( - test(dev, 4, 6, density_A_matrix, index_zero, transpose_val, fp_one, fp_zero, false), - num_passed, num_skipped); -} - -TEST_P(SparseGemvBufferTests, RealSinglePrecision) { - using fpType = float; - int num_passed = 0, num_skipped = 0; - test_helper(GetParam(), oneapi::mkl::transpose::nontrans, num_passed, num_skipped); - test_helper(GetParam(), oneapi::mkl::transpose::trans, num_passed, num_skipped); - if (num_skipped > 0) { - // Mark that some tests were skipped - GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped - << " configurations." << std::endl; - } -} - -TEST_P(SparseGemvBufferTests, RealDoublePrecision) { - using fpType = double; - CHECK_DOUBLE_ON_DEVICE(GetParam()); - int num_passed = 0, num_skipped = 0; - test_helper(GetParam(), oneapi::mkl::transpose::nontrans, num_passed, num_skipped); - test_helper(GetParam(), oneapi::mkl::transpose::trans, num_passed, num_skipped); - if (num_skipped > 0) { - // Mark that some tests were skipped - GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped - << " configurations." << std::endl; - } -} - -TEST_P(SparseGemvBufferTests, ComplexSinglePrecision) { - using fpType = std::complex; - int num_passed = 0, num_skipped = 0; - test_helper(GetParam(), oneapi::mkl::transpose::nontrans, num_passed, num_skipped); - test_helper(GetParam(), oneapi::mkl::transpose::trans, num_passed, num_skipped); - test_helper(GetParam(), oneapi::mkl::transpose::conjtrans, num_passed, num_skipped); - if (num_skipped > 0) { - // Mark that some tests were skipped - GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped - << " configurations." << std::endl; - } -} - -TEST_P(SparseGemvBufferTests, ComplexDoublePrecision) { - using fpType = std::complex; - CHECK_DOUBLE_ON_DEVICE(GetParam()); - int num_passed = 0, num_skipped = 0; - test_helper(GetParam(), oneapi::mkl::transpose::nontrans, num_passed, num_skipped); - test_helper(GetParam(), oneapi::mkl::transpose::trans, num_passed, num_skipped); - test_helper(GetParam(), oneapi::mkl::transpose::conjtrans, num_passed, num_skipped); - if (num_skipped > 0) { - // Mark that some tests were skipped - GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped - << " configurations." << std::endl; - } -} - -INSTANTIATE_TEST_SUITE_P(SparseGemvBufferTestSuite, SparseGemvBufferTests, - testing::ValuesIn(devices), ::DeviceNamePrint()); - -} // anonymous namespace diff --git a/tests/unit_tests/sparse_blas/source/sparse_gemv_usm.cpp b/tests/unit_tests/sparse_blas/source/sparse_gemv_usm.cpp deleted file mode 100644 index 582e0c6f4..000000000 --- a/tests/unit_tests/sparse_blas/source/sparse_gemv_usm.cpp +++ /dev/null @@ -1,256 +0,0 @@ -/******************************************************************************* -* Copyright 2023 Intel Corporation -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, -* software distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions -* and limitations under the License. -* -* -* SPDX-License-Identifier: Apache-2.0 -*******************************************************************************/ - -#include -#include -#include - -#if __has_include() -#include -#else -#include -#endif - -#include "oneapi/mkl.hpp" -#include "oneapi/mkl/detail/config.hpp" -#include "sparse_reference.hpp" -#include "test_common.hpp" -#include "test_helper.hpp" - -#include - -extern std::vector devices; - -namespace { - -template -int test(sycl::device *dev, intType nrows, intType ncols, double density_A_matrix, - oneapi::mkl::index_base index, oneapi::mkl::transpose transpose_val, fpType alpha, - fpType beta, bool use_optimize) { - sycl::queue main_queue(*dev, exception_handler_t()); - - intType int_index = (index == oneapi::mkl::index_base::zero) ? 0 : 1; - std::size_t opa_nrows = - static_cast(transpose_val == oneapi::mkl::transpose::nontrans ? nrows : ncols); - std::size_t opa_ncols = - static_cast(transpose_val == oneapi::mkl::transpose::nontrans ? ncols : nrows); - - // Input matrix in CSR format - std::vector ia_host, ja_host; - std::vector a_host; - intType nnz = generate_random_matrix(nrows, ncols, density_A_matrix, int_index, - ia_host, ja_host, a_host); - - // Input and output dense vectors - // The input `x` and the input-output `y` are both initialized to random values on host and device. - std::vector x_host, y_host; - rand_vector(x_host, opa_ncols); - rand_vector(y_host, opa_nrows); - std::vector y_ref_host(y_host); - - // Shuffle ordering of column indices/values to test sortedness - shuffle_data(ia_host.data(), ja_host.data(), a_host.data(), static_cast(nrows)); - - auto ia_usm_uptr = malloc_device_uptr(main_queue, ia_host.size()); - auto ja_usm_uptr = malloc_device_uptr(main_queue, ja_host.size()); - auto a_usm_uptr = malloc_device_uptr(main_queue, a_host.size()); - auto x_usm_uptr = malloc_device_uptr(main_queue, x_host.size()); - auto y_usm_uptr = malloc_device_uptr(main_queue, y_host.size()); - - intType *ia_usm = ia_usm_uptr.get(); - intType *ja_usm = ja_usm_uptr.get(); - fpType *a_usm = a_usm_uptr.get(); - fpType *x_usm = x_usm_uptr.get(); - fpType *y_usm = y_usm_uptr.get(); - - std::vector mat_dependencies; - std::vector gemv_dependencies; - // Copy host to device - mat_dependencies.push_back( - main_queue.memcpy(ia_usm, ia_host.data(), ia_host.size() * sizeof(intType))); - mat_dependencies.push_back( - main_queue.memcpy(ja_usm, ja_host.data(), ja_host.size() * sizeof(intType))); - mat_dependencies.push_back( - main_queue.memcpy(a_usm, a_host.data(), a_host.size() * sizeof(fpType))); - gemv_dependencies.push_back( - main_queue.memcpy(x_usm, x_host.data(), x_host.size() * sizeof(fpType))); - gemv_dependencies.push_back( - main_queue.memcpy(y_usm, y_host.data(), y_host.size() * sizeof(fpType))); - - sycl::event ev_copy, ev_release; - oneapi::mkl::sparse::matrix_handle_t handle = nullptr; - try { - sycl::event event; - CALL_RT_OR_CT(oneapi::mkl::sparse::init_matrix_handle, main_queue, &handle); - - CALL_RT_OR_CT(event = oneapi::mkl::sparse::set_csr_data, main_queue, handle, nrows, ncols, - nnz, index, ia_usm, ja_usm, a_usm, mat_dependencies); - - if (use_optimize) { - CALL_RT_OR_CT(event = oneapi::mkl::sparse::optimize_gemv, main_queue, transpose_val, - handle, { event }); - } - - gemv_dependencies.push_back(event); - CALL_RT_OR_CT(event = oneapi::mkl::sparse::gemv, main_queue, transpose_val, alpha, handle, - x_usm, beta, y_usm, gemv_dependencies); - - CALL_RT_OR_CT(ev_release = oneapi::mkl::sparse::release_matrix_handle, main_queue, &handle, - { event }); - - ev_copy = main_queue.memcpy(y_host.data(), y_usm, y_host.size() * sizeof(fpType), event); - } - catch (const sycl::exception &e) { - std::cout << "Caught synchronous SYCL exception during sparse GEMV:\n" - << e.what() << std::endl; - print_error_code(e); - return 0; - } - catch (const oneapi::mkl::unimplemented &e) { - wait_and_free(main_queue, &handle); - return test_skipped; - } - catch (const std::runtime_error &error) { - std::cout << "Error raised during execution of sparse GEMV:\n" << error.what() << std::endl; - return 0; - } - - // Compute reference. - prepare_reference_gemv_data(ia_host.data(), ja_host.data(), a_host.data(), nrows, ncols, nnz, - int_index, transpose_val, alpha, beta, x_host.data(), - y_ref_host.data()); - - // Compare the results of reference implementation and DPC++ implementation. - ev_copy.wait_and_throw(); - bool valid = check_equal_vector(y_host, y_ref_host); - - ev_release.wait_and_throw(); - return static_cast(valid); -} - -class SparseGemvUsmTests : public ::testing::TestWithParam {}; - -/** - * Helper function to run tests in different configuration. - * - * @tparam fpType Complex or scalar, single or double precision type - * @param dev Device to test - * @param transpose_val Transpose value for the input matrix - * @param num_passed Increase the number of configurations passed - * @param num_skipped Increase the number of configurations skipped - */ -template -void test_helper(sycl::device *dev, oneapi::mkl::transpose transpose_val, int &num_passed, - int &num_skipped) { - double density_A_matrix = 0.8; - fpType fp_zero = set_fp_value()(0.f, 0.f); - fpType fp_one = set_fp_value()(1.f, 0.f); - oneapi::mkl::index_base index_zero = oneapi::mkl::index_base::zero; - bool use_optimize = true; - - // Basic test - EXPECT_TRUE_OR_FUTURE_SKIP( - test(dev, 4, 6, density_A_matrix, index_zero, transpose_val, fp_one, fp_zero, use_optimize), - num_passed, num_skipped); - // Test index_base 1 - EXPECT_TRUE_OR_FUTURE_SKIP(test(dev, 4, 6, density_A_matrix, oneapi::mkl::index_base::one, - transpose_val, fp_one, fp_zero, use_optimize), - num_passed, num_skipped); - // Test non-default alpha - EXPECT_TRUE_OR_FUTURE_SKIP(test(dev, 4, 6, density_A_matrix, index_zero, transpose_val, - set_fp_value()(2.f, 1.5f), fp_zero, use_optimize), - num_passed, num_skipped); - // Test non-default beta - EXPECT_TRUE_OR_FUTURE_SKIP(test(dev, 4, 6, density_A_matrix, index_zero, transpose_val, fp_one, - set_fp_value()(3.2f, 1.f), use_optimize), - num_passed, num_skipped); - // Test 0 alpha - EXPECT_TRUE_OR_FUTURE_SKIP( - test(dev, 4, 6, density_A_matrix, index_zero, transpose_val, fp_zero, fp_one, use_optimize), - num_passed, num_skipped); - // Test 0 alpha and beta - EXPECT_TRUE_OR_FUTURE_SKIP(test(dev, 4, 6, density_A_matrix, index_zero, transpose_val, fp_zero, - fp_zero, use_optimize), - num_passed, num_skipped); - // Test int64 indices - EXPECT_TRUE_OR_FUTURE_SKIP(test(dev, 27L, 13L, density_A_matrix, index_zero, transpose_val, - fp_one, fp_one, use_optimize), - num_passed, num_skipped); - // Test without optimize_gemv - EXPECT_TRUE_OR_FUTURE_SKIP( - test(dev, 4, 6, density_A_matrix, index_zero, transpose_val, fp_one, fp_zero, false), - num_passed, num_skipped); -} - -TEST_P(SparseGemvUsmTests, RealSinglePrecision) { - using fpType = float; - int num_passed = 0, num_skipped = 0; - test_helper(GetParam(), oneapi::mkl::transpose::nontrans, num_passed, num_skipped); - test_helper(GetParam(), oneapi::mkl::transpose::trans, num_passed, num_skipped); - if (num_skipped > 0) { - // Mark that some tests were skipped - GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped - << " configurations." << std::endl; - } -} - -TEST_P(SparseGemvUsmTests, RealDoublePrecision) { - using fpType = double; - CHECK_DOUBLE_ON_DEVICE(GetParam()); - int num_passed = 0, num_skipped = 0; - test_helper(GetParam(), oneapi::mkl::transpose::nontrans, num_passed, num_skipped); - test_helper(GetParam(), oneapi::mkl::transpose::trans, num_passed, num_skipped); - if (num_skipped > 0) { - // Mark that some tests were skipped - GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped - << " configurations." << std::endl; - } -} - -TEST_P(SparseGemvUsmTests, ComplexSinglePrecision) { - using fpType = std::complex; - int num_passed = 0, num_skipped = 0; - test_helper(GetParam(), oneapi::mkl::transpose::nontrans, num_passed, num_skipped); - test_helper(GetParam(), oneapi::mkl::transpose::trans, num_passed, num_skipped); - test_helper(GetParam(), oneapi::mkl::transpose::conjtrans, num_passed, num_skipped); - if (num_skipped > 0) { - // Mark that some tests were skipped - GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped - << " configurations." << std::endl; - } -} - -TEST_P(SparseGemvUsmTests, ComplexDoublePrecision) { - using fpType = std::complex; - CHECK_DOUBLE_ON_DEVICE(GetParam()); - int num_passed = 0, num_skipped = 0; - test_helper(GetParam(), oneapi::mkl::transpose::nontrans, num_passed, num_skipped); - test_helper(GetParam(), oneapi::mkl::transpose::trans, num_passed, num_skipped); - test_helper(GetParam(), oneapi::mkl::transpose::conjtrans, num_passed, num_skipped); - if (num_skipped > 0) { - // Mark that some tests were skipped - GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped - << " configurations." << std::endl; - } -} - -INSTANTIATE_TEST_SUITE_P(SparseGemvUsmTestSuite, SparseGemvUsmTests, testing::ValuesIn(devices), - ::DeviceNamePrint()); - -} // anonymous namespace diff --git a/tests/unit_tests/sparse_blas/source/sparse_spmm_buffer.cpp b/tests/unit_tests/sparse_blas/source/sparse_spmm_buffer.cpp new file mode 100644 index 000000000..b655aaf37 --- /dev/null +++ b/tests/unit_tests/sparse_blas/source/sparse_spmm_buffer.cpp @@ -0,0 +1,247 @@ +/******************************************************************************* +* Copyright 2024 Intel Corporation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions +* and limitations under the License. +* +* +* SPDX-License-Identifier: Apache-2.0 +*******************************************************************************/ + +#include +#include +#include + +#include "test_spmm.hpp" + +extern std::vector devices; + +namespace { + +template +int test_spmm(sycl::device *dev, sparse_matrix_format_t format, intType nrows_A, intType ncols_A, + intType ncols_C, double density_A_matrix, oneapi::mkl::index_base index, + oneapi::mkl::layout dense_matrix_layout, oneapi::mkl::transpose transpose_A, + oneapi::mkl::transpose transpose_B, fpType alpha, fpType beta, intType ldb, + intType ldc, oneapi::mkl::sparse::spmm_alg alg, + oneapi::mkl::sparse::matrix_view A_view, + const std::set &matrix_properties, + bool reset_data) { + sycl::queue main_queue(*dev, exception_handler_t()); + + if (require_square_matrix(A_view, matrix_properties)) { + ncols_A = nrows_A; + ncols_C = nrows_A; + ldb = nrows_A; + ldc = nrows_A; + } + std::size_t opa_nrows = static_cast( + transpose_A == oneapi::mkl::transpose::nontrans ? nrows_A : ncols_A); + std::size_t opa_ncols = static_cast( + transpose_A == oneapi::mkl::transpose::nontrans ? ncols_A : nrows_A); + intType indexing = (index == oneapi::mkl::index_base::zero) ? 0 : 1; + const bool is_sorted = matrix_properties.find(oneapi::mkl::sparse::matrix_property::sorted) != + matrix_properties.cend(); + const bool is_symmetric = + matrix_properties.find(oneapi::mkl::sparse::matrix_property::symmetric) != + matrix_properties.cend(); + + // Input matrix + std::vector ia_host, ja_host; + std::vector a_host; + intType nnz = + generate_random_matrix(format, nrows_A, ncols_A, density_A_matrix, + indexing, ia_host, ja_host, a_host, is_symmetric); + + // Input and output dense vectors + std::vector b_host, c_host; + rand_matrix(b_host, dense_matrix_layout, opa_ncols, static_cast(ncols_C), + static_cast(ldb)); + rand_matrix(c_host, dense_matrix_layout, opa_nrows, static_cast(ncols_C), + static_cast(ldc)); + std::vector c_ref_host(c_host); + + // Shuffle ordering of column indices/values to test sortedness + if (!is_sorted) { + shuffle_sparse_matrix(format, indexing, ia_host.data(), ja_host.data(), a_host.data(), nnz, + static_cast(nrows_A)); + } + + auto ia_buf = make_buffer(ia_host); + auto ja_buf = make_buffer(ja_host); + auto a_buf = make_buffer(a_host); + auto b_buf = make_buffer(b_host); + auto c_buf = make_buffer(c_host); + + oneapi::mkl::sparse::matrix_handle_t A_handle = nullptr; + oneapi::mkl::sparse::dense_matrix_handle_t B_handle = nullptr; + oneapi::mkl::sparse::dense_matrix_handle_t C_handle = nullptr; + oneapi::mkl::sparse::spmm_descr_t descr = nullptr; + try { + init_sparse_matrix(main_queue, format, &A_handle, nrows_A, ncols_A, nnz, index, ia_buf, + ja_buf, a_buf); + for (auto property : matrix_properties) { + CALL_RT_OR_CT(oneapi::mkl::sparse::set_matrix_property, main_queue, A_handle, property); + } + CALL_RT_OR_CT(oneapi::mkl::sparse::init_dense_matrix, main_queue, &B_handle, + static_cast(opa_ncols), ncols_C, ldb, dense_matrix_layout, + b_buf); + CALL_RT_OR_CT(oneapi::mkl::sparse::init_dense_matrix, main_queue, &C_handle, + static_cast(opa_nrows), ncols_C, ldc, dense_matrix_layout, + c_buf); + + CALL_RT_OR_CT(oneapi::mkl::sparse::init_spmm_descr, main_queue, &descr); + + std::size_t workspace_size = 0; + CALL_RT_OR_CT(oneapi::mkl::sparse::spmm_buffer_size, main_queue, transpose_A, transpose_B, + &alpha, A_view, A_handle, B_handle, &beta, C_handle, alg, descr, + workspace_size); + sycl::buffer workspace_buf((sycl::range<1>(workspace_size))); + + CALL_RT_OR_CT(oneapi::mkl::sparse::spmm_optimize, main_queue, transpose_A, transpose_B, + &alpha, A_view, A_handle, B_handle, &beta, C_handle, alg, descr, + workspace_buf); + + CALL_RT_OR_CT(oneapi::mkl::sparse::spmm, main_queue, transpose_A, transpose_B, &alpha, + A_view, A_handle, B_handle, &beta, C_handle, alg, descr); + + if (reset_data) { + intType reset_nnz = generate_random_matrix( + format, nrows_A, ncols_A, density_A_matrix, indexing, ia_host, ja_host, a_host, + is_symmetric); + if (!is_sorted) { + shuffle_sparse_matrix(format, indexing, ia_host.data(), ja_host.data(), + a_host.data(), nnz, static_cast(nrows_A)); + } + if (reset_nnz > nnz) { + ia_buf = make_buffer(ia_host); + ja_buf = make_buffer(ja_host); + a_buf = make_buffer(a_host); + } + else { + copy_host_to_buffer(main_queue, ia_host, ia_buf); + copy_host_to_buffer(main_queue, ja_host, ja_buf); + copy_host_to_buffer(main_queue, a_host, a_buf); + } + nnz = reset_nnz; + fill_buffer_to_0(main_queue, c_buf); + set_matrix_data(main_queue, format, A_handle, nrows_A, ncols_A, nnz, index, ia_buf, + ja_buf, a_buf); + + std::size_t workspace_size_2 = 0; + CALL_RT_OR_CT(oneapi::mkl::sparse::spmm_buffer_size, main_queue, transpose_A, + transpose_B, &alpha, A_view, A_handle, B_handle, &beta, C_handle, alg, + descr, workspace_size_2); + if (workspace_size_2 > workspace_size) { + workspace_buf = sycl::buffer((sycl::range<1>(workspace_size_2))); + } + + CALL_RT_OR_CT(oneapi::mkl::sparse::spmm_optimize, main_queue, transpose_A, transpose_B, + &alpha, A_view, A_handle, B_handle, &beta, C_handle, alg, descr, + workspace_buf); + + CALL_RT_OR_CT(oneapi::mkl::sparse::spmm, main_queue, transpose_A, transpose_B, &alpha, + A_view, A_handle, B_handle, &beta, C_handle, alg, descr); + } + } + catch (const sycl::exception &e) { + std::cout << "Caught synchronous SYCL exception during sparse SPMM:\n" + << e.what() << std::endl; + print_error_code(e); + return 0; + } + catch (const oneapi::mkl::unimplemented &e) { + wait_and_free_handles(main_queue, A_handle, B_handle, C_handle); + if (descr) { + sycl::event ev_release_descr; + CALL_RT_OR_CT(ev_release_descr = oneapi::mkl::sparse::release_spmm_descr, main_queue, + descr); + ev_release_descr.wait(); + } + return test_skipped; + } + catch (const std::runtime_error &error) { + std::cout << "Error raised during execution of sparse SPMM:\n" << error.what() << std::endl; + return 0; + } + CALL_RT_OR_CT(oneapi::mkl::sparse::release_spmm_descr, main_queue, descr); + free_handles(main_queue, A_handle, B_handle, C_handle); + + // Compute reference. + prepare_reference_spmm_data(format, ia_host.data(), ja_host.data(), a_host.data(), nrows_A, + ncols_A, ncols_C, nnz, indexing, dense_matrix_layout, transpose_A, + transpose_B, alpha, beta, ldb, ldc, b_host.data(), A_view, + c_ref_host.data()); + + // Compare the results of reference implementation and DPC++ implementation. + auto c_acc = c_buf.template get_host_access(sycl::read_only); + bool valid = check_equal_vector(c_acc, c_ref_host); + + return static_cast(valid); +} + +class SparseSpmmBufferTests : public ::testing::TestWithParam {}; + +TEST_P(SparseSpmmBufferTests, RealSinglePrecision) { + using fpType = float; + int num_passed = 0, num_skipped = 0; + test_helper(test_spmm, test_spmm, GetParam(), + num_passed, num_skipped); + if (num_skipped > 0) { + // Mark that some tests were skipped + GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped + << " configurations." << std::endl; + } +} + +TEST_P(SparseSpmmBufferTests, RealDoublePrecision) { + using fpType = double; + CHECK_DOUBLE_ON_DEVICE(GetParam()); + int num_passed = 0, num_skipped = 0; + test_helper(test_spmm, test_spmm, GetParam(), + num_passed, num_skipped); + if (num_skipped > 0) { + // Mark that some tests were skipped + GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped + << " configurations." << std::endl; + } +} + +TEST_P(SparseSpmmBufferTests, ComplexSinglePrecision) { + using fpType = std::complex; + int num_passed = 0, num_skipped = 0; + test_helper(test_spmm, test_spmm, GetParam(), + num_passed, num_skipped); + if (num_skipped > 0) { + // Mark that some tests were skipped + GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped + << " configurations." << std::endl; + } +} + +TEST_P(SparseSpmmBufferTests, ComplexDoublePrecision) { + using fpType = std::complex; + CHECK_DOUBLE_ON_DEVICE(GetParam()); + int num_passed = 0, num_skipped = 0; + test_helper(test_spmm, test_spmm, GetParam(), + num_passed, num_skipped); + if (num_skipped > 0) { + // Mark that some tests were skipped + GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped + << " configurations." << std::endl; + } +} + +INSTANTIATE_TEST_SUITE_P(SparseSpmmBufferTestSuite, SparseSpmmBufferTests, + testing::ValuesIn(devices), ::DeviceNamePrint()); + +} // anonymous namespace diff --git a/tests/unit_tests/sparse_blas/source/sparse_spmm_usm.cpp b/tests/unit_tests/sparse_blas/source/sparse_spmm_usm.cpp new file mode 100644 index 000000000..6f29ca9b1 --- /dev/null +++ b/tests/unit_tests/sparse_blas/source/sparse_spmm_usm.cpp @@ -0,0 +1,285 @@ +/******************************************************************************* +* Copyright 2024 Intel Corporation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions +* and limitations under the License. +* +* +* SPDX-License-Identifier: Apache-2.0 +*******************************************************************************/ + +#include +#include +#include + +#include "test_spmm.hpp" + +extern std::vector devices; + +namespace { + +template +int test_spmm(sycl::device *dev, sparse_matrix_format_t format, intType nrows_A, intType ncols_A, + intType ncols_C, double density_A_matrix, oneapi::mkl::index_base index, + oneapi::mkl::layout dense_matrix_layout, oneapi::mkl::transpose transpose_A, + oneapi::mkl::transpose transpose_B, fpType alpha, fpType beta, intType ldb, + intType ldc, oneapi::mkl::sparse::spmm_alg alg, + oneapi::mkl::sparse::matrix_view A_view, + const std::set &matrix_properties, + bool reset_data) { + sycl::queue main_queue(*dev, exception_handler_t()); + + if (require_square_matrix(A_view, matrix_properties)) { + ncols_A = nrows_A; + ncols_C = nrows_A; + ldb = nrows_A; + ldc = nrows_A; + } + std::size_t opa_nrows = static_cast( + transpose_A == oneapi::mkl::transpose::nontrans ? nrows_A : ncols_A); + std::size_t opa_ncols = static_cast( + transpose_A == oneapi::mkl::transpose::nontrans ? ncols_A : nrows_A); + intType indexing = (index == oneapi::mkl::index_base::zero) ? 0 : 1; + const bool is_sorted = matrix_properties.find(oneapi::mkl::sparse::matrix_property::sorted) != + matrix_properties.cend(); + const bool is_symmetric = + matrix_properties.find(oneapi::mkl::sparse::matrix_property::symmetric) != + matrix_properties.cend(); + + // Input matrix + std::vector ia_host, ja_host; + std::vector a_host; + intType nnz = + generate_random_matrix(format, nrows_A, ncols_A, density_A_matrix, + indexing, ia_host, ja_host, a_host, is_symmetric); + + // Input and output dense vectors + std::vector b_host, c_host; + rand_matrix(b_host, dense_matrix_layout, opa_ncols, static_cast(ncols_C), + static_cast(ldb)); + rand_matrix(c_host, dense_matrix_layout, opa_nrows, static_cast(ncols_C), + static_cast(ldc)); + std::vector c_ref_host(c_host); + + // Shuffle ordering of column indices/values to test sortedness + if (!is_sorted) { + shuffle_sparse_matrix(format, indexing, ia_host.data(), ja_host.data(), a_host.data(), nnz, + static_cast(nrows_A)); + } + + auto ia_usm_uptr = malloc_device_uptr(main_queue, ia_host.size()); + auto ja_usm_uptr = malloc_device_uptr(main_queue, ja_host.size()); + auto a_usm_uptr = malloc_device_uptr(main_queue, a_host.size()); + auto b_usm_uptr = malloc_device_uptr(main_queue, b_host.size()); + auto c_usm_uptr = malloc_device_uptr(main_queue, c_host.size()); + + intType *ia_usm = ia_usm_uptr.get(); + intType *ja_usm = ja_usm_uptr.get(); + fpType *a_usm = a_usm_uptr.get(); + fpType *b_usm = b_usm_uptr.get(); + fpType *c_usm = c_usm_uptr.get(); + + std::vector mat_dependencies; + std::vector spmm_dependencies; + // Copy host to device + mat_dependencies.push_back( + main_queue.memcpy(ia_usm, ia_host.data(), ia_host.size() * sizeof(intType))); + mat_dependencies.push_back( + main_queue.memcpy(ja_usm, ja_host.data(), ja_host.size() * sizeof(intType))); + mat_dependencies.push_back( + main_queue.memcpy(a_usm, a_host.data(), a_host.size() * sizeof(fpType))); + spmm_dependencies.push_back( + main_queue.memcpy(b_usm, b_host.data(), b_host.size() * sizeof(fpType))); + spmm_dependencies.push_back( + main_queue.memcpy(c_usm, c_host.data(), c_host.size() * sizeof(fpType))); + + sycl::event ev_copy, ev_spmm; + oneapi::mkl::sparse::matrix_handle_t A_handle = nullptr; + oneapi::mkl::sparse::dense_matrix_handle_t B_handle = nullptr; + oneapi::mkl::sparse::dense_matrix_handle_t C_handle = nullptr; + oneapi::mkl::sparse::spmm_descr_t descr = nullptr; + std::unique_ptr workspace_usm(nullptr, UsmDeleter(main_queue)); + try { + init_sparse_matrix(main_queue, format, &A_handle, nrows_A, ncols_A, nnz, index, ia_usm, + ja_usm, a_usm); + for (auto property : matrix_properties) { + CALL_RT_OR_CT(oneapi::mkl::sparse::set_matrix_property, main_queue, A_handle, property); + } + CALL_RT_OR_CT(oneapi::mkl::sparse::init_dense_matrix, main_queue, &B_handle, + static_cast(opa_ncols), ncols_C, ldb, dense_matrix_layout, + b_usm); + CALL_RT_OR_CT(oneapi::mkl::sparse::init_dense_matrix, main_queue, &C_handle, + static_cast(opa_nrows), ncols_C, ldc, dense_matrix_layout, + c_usm); + + CALL_RT_OR_CT(oneapi::mkl::sparse::init_spmm_descr, main_queue, &descr); + + std::size_t workspace_size = 0; + CALL_RT_OR_CT(oneapi::mkl::sparse::spmm_buffer_size, main_queue, transpose_A, transpose_B, + &alpha, A_view, A_handle, B_handle, &beta, C_handle, alg, descr, + workspace_size); + workspace_usm = malloc_device_uptr(main_queue, workspace_size); + + sycl::event ev_opt; + CALL_RT_OR_CT(ev_opt = oneapi::mkl::sparse::spmm_optimize, main_queue, transpose_A, + transpose_B, &alpha, A_view, A_handle, B_handle, &beta, C_handle, alg, descr, + workspace_usm.get(), mat_dependencies); + + spmm_dependencies.push_back(ev_opt); + CALL_RT_OR_CT(ev_spmm = oneapi::mkl::sparse::spmm, main_queue, transpose_A, transpose_B, + &alpha, A_view, A_handle, B_handle, &beta, C_handle, alg, descr, + spmm_dependencies); + + if (reset_data) { + intType reset_nnz = generate_random_matrix( + format, nrows_A, ncols_A, density_A_matrix, indexing, ia_host, ja_host, a_host, + is_symmetric); + if (!is_sorted) { + shuffle_sparse_matrix(format, indexing, ia_host.data(), ja_host.data(), + a_host.data(), nnz, static_cast(nrows_A)); + } + if (reset_nnz > nnz) { + ia_usm_uptr = malloc_device_uptr(main_queue, ia_host.size()); + ja_usm_uptr = malloc_device_uptr(main_queue, ja_host.size()); + a_usm_uptr = malloc_device_uptr(main_queue, a_host.size()); + ia_usm = ia_usm_uptr.get(); + ja_usm = ja_usm_uptr.get(); + a_usm = a_usm_uptr.get(); + } + nnz = reset_nnz; + + mat_dependencies.clear(); + mat_dependencies.push_back(main_queue.memcpy( + ia_usm, ia_host.data(), ia_host.size() * sizeof(intType), ev_spmm)); + mat_dependencies.push_back(main_queue.memcpy( + ja_usm, ja_host.data(), ja_host.size() * sizeof(intType), ev_spmm)); + mat_dependencies.push_back( + main_queue.memcpy(a_usm, a_host.data(), a_host.size() * sizeof(fpType), ev_spmm)); + mat_dependencies.push_back( + main_queue.memcpy(c_usm, c_host.data(), c_host.size() * sizeof(fpType), ev_spmm)); + set_matrix_data(main_queue, format, A_handle, nrows_A, ncols_A, nnz, index, ia_usm, + ja_usm, a_usm); + + std::size_t workspace_size_2 = 0; + CALL_RT_OR_CT(oneapi::mkl::sparse::spmm_buffer_size, main_queue, transpose_A, + transpose_B, &alpha, A_view, A_handle, B_handle, &beta, C_handle, alg, + descr, workspace_size_2); + if (workspace_size_2 > workspace_size) { + workspace_usm = malloc_device_uptr(main_queue, workspace_size_2); + } + + CALL_RT_OR_CT(ev_opt = oneapi::mkl::sparse::spmm_optimize, main_queue, transpose_A, + transpose_B, &alpha, A_view, A_handle, B_handle, &beta, C_handle, alg, + descr, workspace_usm.get(), mat_dependencies); + + CALL_RT_OR_CT(ev_spmm = oneapi::mkl::sparse::spmm, main_queue, transpose_A, transpose_B, + &alpha, A_view, A_handle, B_handle, &beta, C_handle, alg, descr, + { ev_opt }); + } + + ev_copy = main_queue.memcpy(c_host.data(), c_usm, c_host.size() * sizeof(fpType), ev_spmm); + } + catch (const sycl::exception &e) { + std::cout << "Caught synchronous SYCL exception during sparse SPMM:\n" + << e.what() << std::endl; + print_error_code(e); + return 0; + } + catch (const oneapi::mkl::unimplemented &e) { + wait_and_free_handles(main_queue, A_handle, B_handle, C_handle); + if (descr) { + sycl::event ev_release_descr; + CALL_RT_OR_CT(ev_release_descr = oneapi::mkl::sparse::release_spmm_descr, main_queue, + descr); + ev_release_descr.wait(); + } + return test_skipped; + } + catch (const std::runtime_error &error) { + std::cout << "Error raised during execution of sparse SPMM:\n" << error.what() << std::endl; + return 0; + } + sycl::event ev_release_descr; + CALL_RT_OR_CT(ev_release_descr = oneapi::mkl::sparse::release_spmm_descr, main_queue, descr, + { ev_spmm }); + ev_release_descr.wait_and_throw(); + free_handles(main_queue, { ev_spmm }, A_handle, B_handle, C_handle); + + // Compute reference. + prepare_reference_spmm_data(format, ia_host.data(), ja_host.data(), a_host.data(), nrows_A, + ncols_A, ncols_C, nnz, indexing, dense_matrix_layout, transpose_A, + transpose_B, alpha, beta, ldb, ldc, b_host.data(), A_view, + c_ref_host.data()); + + // Compare the results of reference implementation and DPC++ implementation. + ev_copy.wait_and_throw(); + bool valid = check_equal_vector(c_host, c_ref_host); + + return static_cast(valid); +} + +class SparseSpmmUsmTests : public ::testing::TestWithParam {}; + +TEST_P(SparseSpmmUsmTests, RealSinglePrecision) { + using fpType = float; + int num_passed = 0, num_skipped = 0; + test_helper(test_spmm, test_spmm, GetParam(), + num_passed, num_skipped); + if (num_skipped > 0) { + // Mark that some tests were skipped + GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped + << " configurations." << std::endl; + } +} + +TEST_P(SparseSpmmUsmTests, RealDoublePrecision) { + using fpType = double; + CHECK_DOUBLE_ON_DEVICE(GetParam()); + int num_passed = 0, num_skipped = 0; + test_helper(test_spmm, test_spmm, GetParam(), + num_passed, num_skipped); + if (num_skipped > 0) { + // Mark that some tests were skipped + GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped + << " configurations." << std::endl; + } +} + +TEST_P(SparseSpmmUsmTests, ComplexSinglePrecision) { + using fpType = std::complex; + int num_passed = 0, num_skipped = 0; + test_helper(test_spmm, test_spmm, GetParam(), + num_passed, num_skipped); + if (num_skipped > 0) { + // Mark that some tests were skipped + GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped + << " configurations." << std::endl; + } +} + +TEST_P(SparseSpmmUsmTests, ComplexDoublePrecision) { + using fpType = std::complex; + CHECK_DOUBLE_ON_DEVICE(GetParam()); + int num_passed = 0, num_skipped = 0; + test_helper(test_spmm, test_spmm, GetParam(), + num_passed, num_skipped); + if (num_skipped > 0) { + // Mark that some tests were skipped + GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped + << " configurations." << std::endl; + } +} + +INSTANTIATE_TEST_SUITE_P(SparseSpmmUsmTestSuite, SparseSpmmUsmTests, testing::ValuesIn(devices), + ::DeviceNamePrint()); + +} // anonymous namespace diff --git a/tests/unit_tests/sparse_blas/source/sparse_spmv_buffer.cpp b/tests/unit_tests/sparse_blas/source/sparse_spmv_buffer.cpp new file mode 100644 index 000000000..ac834c91c --- /dev/null +++ b/tests/unit_tests/sparse_blas/source/sparse_spmv_buffer.cpp @@ -0,0 +1,247 @@ +/******************************************************************************* +* Copyright 2024 Intel Corporation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions +* and limitations under the License. +* +* +* SPDX-License-Identifier: Apache-2.0 +*******************************************************************************/ + +#include +#include +#include + +#include "test_spmv.hpp" + +extern std::vector devices; + +namespace { + +template +int test_spmv(sycl::device *dev, sparse_matrix_format_t format, intType nrows_A, intType ncols_A, + double density_A_matrix, oneapi::mkl::index_base index, + oneapi::mkl::transpose transpose_val, fpType alpha, fpType beta, + oneapi::mkl::sparse::spmv_alg alg, oneapi::mkl::sparse::matrix_view A_view, + const std::set &matrix_properties, + bool reset_data) { + sycl::queue main_queue(*dev, exception_handler_t()); + + if (require_square_matrix(A_view, matrix_properties)) { + ncols_A = nrows_A; + } + std::size_t opa_nrows = static_cast( + transpose_val == oneapi::mkl::transpose::nontrans ? nrows_A : ncols_A); + std::size_t opa_ncols = static_cast( + transpose_val == oneapi::mkl::transpose::nontrans ? ncols_A : nrows_A); + intType indexing = (index == oneapi::mkl::index_base::zero) ? 0 : 1; + const bool is_sorted = matrix_properties.find(oneapi::mkl::sparse::matrix_property::sorted) != + matrix_properties.cend(); + const bool is_symmetric = + matrix_properties.find(oneapi::mkl::sparse::matrix_property::symmetric) != + matrix_properties.cend(); + + // Input matrix + std::vector ia_host, ja_host; + std::vector a_host; + intType nnz = + generate_random_matrix(format, nrows_A, ncols_A, density_A_matrix, + indexing, ia_host, ja_host, a_host, is_symmetric); + + // Input and output dense vectors + // The input `x` and the input-output `y` are both initialized to random values on host and device. + std::vector x_host, y_host; + rand_vector(x_host, opa_ncols); + rand_vector(y_host, opa_nrows); + std::vector y_ref_host(y_host); + + // Shuffle ordering of column indices/values to test sortedness + if (!is_sorted) { + shuffle_sparse_matrix(format, indexing, ia_host.data(), ja_host.data(), a_host.data(), nnz, + static_cast(nrows_A)); + } + + auto ia_buf = make_buffer(ia_host); + auto ja_buf = make_buffer(ja_host); + auto a_buf = make_buffer(a_host); + auto x_buf = make_buffer(x_host); + auto y_buf = make_buffer(y_host); + + oneapi::mkl::sparse::matrix_handle_t A_handle = nullptr; + oneapi::mkl::sparse::dense_vector_handle_t x_handle = nullptr; + oneapi::mkl::sparse::dense_vector_handle_t y_handle = nullptr; + oneapi::mkl::sparse::spmv_descr_t descr = nullptr; + try { + init_sparse_matrix(main_queue, format, &A_handle, nrows_A, ncols_A, nnz, index, ia_buf, + ja_buf, a_buf); + for (auto property : matrix_properties) { + CALL_RT_OR_CT(oneapi::mkl::sparse::set_matrix_property, main_queue, A_handle, property); + } + CALL_RT_OR_CT(oneapi::mkl::sparse::init_dense_vector, main_queue, &x_handle, + static_cast(x_host.size()), x_buf); + CALL_RT_OR_CT(oneapi::mkl::sparse::init_dense_vector, main_queue, &y_handle, + static_cast(y_host.size()), y_buf); + + CALL_RT_OR_CT(oneapi::mkl::sparse::init_spmv_descr, main_queue, &descr); + + std::size_t workspace_size = 0; + CALL_RT_OR_CT(oneapi::mkl::sparse::spmv_buffer_size, main_queue, transpose_val, &alpha, + A_view, A_handle, x_handle, &beta, y_handle, alg, descr, workspace_size); + sycl::buffer workspace_buf((sycl::range<1>(workspace_size))); + + CALL_RT_OR_CT(oneapi::mkl::sparse::spmv_optimize, main_queue, transpose_val, &alpha, A_view, + A_handle, x_handle, &beta, y_handle, alg, descr, workspace_buf); + + CALL_RT_OR_CT(oneapi::mkl::sparse::spmv, main_queue, transpose_val, &alpha, A_view, + A_handle, x_handle, &beta, y_handle, alg, descr); + + if (reset_data) { + intType reset_nnz = generate_random_matrix( + format, nrows_A, ncols_A, density_A_matrix, indexing, ia_host, ja_host, a_host, + is_symmetric); + if (!is_sorted) { + shuffle_sparse_matrix(format, indexing, ia_host.data(), ja_host.data(), + a_host.data(), nnz, static_cast(nrows_A)); + } + if (reset_nnz > nnz) { + ia_buf = make_buffer(ia_host); + ja_buf = make_buffer(ja_host); + a_buf = make_buffer(a_host); + } + else { + copy_host_to_buffer(main_queue, ia_host, ia_buf); + copy_host_to_buffer(main_queue, ja_host, ja_buf); + copy_host_to_buffer(main_queue, a_host, a_buf); + } + fill_buffer_to_0(main_queue, y_buf); + nnz = reset_nnz; + set_matrix_data(main_queue, format, A_handle, nrows_A, ncols_A, nnz, index, ia_buf, + ja_buf, a_buf); + + std::size_t workspace_size_2 = 0; + CALL_RT_OR_CT(oneapi::mkl::sparse::spmv_buffer_size, main_queue, transpose_val, &alpha, + A_view, A_handle, x_handle, &beta, y_handle, alg, descr, + workspace_size_2); + if (workspace_size_2 > workspace_size) { + workspace_buf = sycl::buffer((sycl::range<1>(workspace_size_2))); + } + + CALL_RT_OR_CT(oneapi::mkl::sparse::spmv_optimize, main_queue, transpose_val, &alpha, + A_view, A_handle, x_handle, &beta, y_handle, alg, descr, workspace_buf); + + CALL_RT_OR_CT(oneapi::mkl::sparse::spmv, main_queue, transpose_val, &alpha, A_view, + A_handle, x_handle, &beta, y_handle, alg, descr); + } + } + catch (const sycl::exception &e) { + std::cout << "Caught synchronous SYCL exception during sparse SPMV:\n" + << e.what() << std::endl; + print_error_code(e); + return 0; + } + catch (const oneapi::mkl::unimplemented &e) { + wait_and_free_handles(main_queue, A_handle, x_handle, y_handle); + if (descr) { + sycl::event ev_release_descr; + CALL_RT_OR_CT(ev_release_descr = oneapi::mkl::sparse::release_spmv_descr, main_queue, + descr); + ev_release_descr.wait(); + } + return test_skipped; + } + catch (const std::runtime_error &error) { + std::cout << "Error raised during execution of sparse SPMV:\n" << error.what() << std::endl; + return 0; + } + CALL_RT_OR_CT(oneapi::mkl::sparse::release_spmv_descr, main_queue, descr); + free_handles(main_queue, A_handle, x_handle, y_handle); + + // Compute reference. + prepare_reference_spmv_data(format, ia_host.data(), ja_host.data(), a_host.data(), nrows_A, + ncols_A, nnz, indexing, transpose_val, alpha, beta, x_host.data(), + A_view, y_ref_host.data()); + + // Compare the results of reference implementation and DPC++ implementation. + auto y_acc = y_buf.template get_host_access(sycl::read_only); + bool valid = check_equal_vector(y_acc, y_ref_host); + + return static_cast(valid); +} + +class SparseSpmvBufferTests : public ::testing::TestWithParam {}; + +TEST_P(SparseSpmvBufferTests, RealSinglePrecision) { + using fpType = float; + int num_passed = 0, num_skipped = 0; + test_helper(test_spmv, test_spmv, GetParam(), + oneapi::mkl::transpose::nontrans, num_passed, num_skipped); + test_helper(test_spmv, test_spmv, GetParam(), + oneapi::mkl::transpose::trans, num_passed, num_skipped); + if (num_skipped > 0) { + // Mark that some tests were skipped + GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped + << " configurations." << std::endl; + } +} + +TEST_P(SparseSpmvBufferTests, RealDoublePrecision) { + using fpType = double; + CHECK_DOUBLE_ON_DEVICE(GetParam()); + int num_passed = 0, num_skipped = 0; + test_helper(test_spmv, test_spmv, GetParam(), + oneapi::mkl::transpose::nontrans, num_passed, num_skipped); + test_helper(test_spmv, test_spmv, GetParam(), + oneapi::mkl::transpose::trans, num_passed, num_skipped); + if (num_skipped > 0) { + // Mark that some tests were skipped + GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped + << " configurations." << std::endl; + } +} + +TEST_P(SparseSpmvBufferTests, ComplexSinglePrecision) { + using fpType = std::complex; + int num_passed = 0, num_skipped = 0; + test_helper(test_spmv, test_spmv, GetParam(), + oneapi::mkl::transpose::nontrans, num_passed, num_skipped); + test_helper(test_spmv, test_spmv, GetParam(), + oneapi::mkl::transpose::trans, num_passed, num_skipped); + test_helper(test_spmv, test_spmv, GetParam(), + oneapi::mkl::transpose::conjtrans, num_passed, num_skipped); + if (num_skipped > 0) { + // Mark that some tests were skipped + GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped + << " configurations." << std::endl; + } +} + +TEST_P(SparseSpmvBufferTests, ComplexDoublePrecision) { + using fpType = std::complex; + CHECK_DOUBLE_ON_DEVICE(GetParam()); + int num_passed = 0, num_skipped = 0; + test_helper(test_spmv, test_spmv, GetParam(), + oneapi::mkl::transpose::nontrans, num_passed, num_skipped); + test_helper(test_spmv, test_spmv, GetParam(), + oneapi::mkl::transpose::trans, num_passed, num_skipped); + test_helper(test_spmv, test_spmv, GetParam(), + oneapi::mkl::transpose::conjtrans, num_passed, num_skipped); + if (num_skipped > 0) { + // Mark that some tests were skipped + GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped + << " configurations." << std::endl; + } +} + +INSTANTIATE_TEST_SUITE_P(SparseSpmvBufferTestSuite, SparseSpmvBufferTests, + testing::ValuesIn(devices), ::DeviceNamePrint()); + +} // anonymous namespace diff --git a/tests/unit_tests/sparse_blas/source/sparse_spmv_usm.cpp b/tests/unit_tests/sparse_blas/source/sparse_spmv_usm.cpp new file mode 100644 index 000000000..122b395d9 --- /dev/null +++ b/tests/unit_tests/sparse_blas/source/sparse_spmv_usm.cpp @@ -0,0 +1,285 @@ +/******************************************************************************* +* Copyright 2024 Intel Corporation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions +* and limitations under the License. +* +* +* SPDX-License-Identifier: Apache-2.0 +*******************************************************************************/ + +#include +#include +#include + +#include "test_spmv.hpp" + +extern std::vector devices; + +namespace { + +template +int test_spmv(sycl::device *dev, sparse_matrix_format_t format, intType nrows_A, intType ncols_A, + double density_A_matrix, oneapi::mkl::index_base index, + oneapi::mkl::transpose transpose_val, fpType alpha, fpType beta, + oneapi::mkl::sparse::spmv_alg alg, oneapi::mkl::sparse::matrix_view A_view, + const std::set &matrix_properties, + bool reset_data) { + sycl::queue main_queue(*dev, exception_handler_t()); + + if (require_square_matrix(A_view, matrix_properties)) { + ncols_A = nrows_A; + } + std::size_t opa_nrows = static_cast( + transpose_val == oneapi::mkl::transpose::nontrans ? nrows_A : ncols_A); + std::size_t opa_ncols = static_cast( + transpose_val == oneapi::mkl::transpose::nontrans ? ncols_A : nrows_A); + intType indexing = (index == oneapi::mkl::index_base::zero) ? 0 : 1; + const bool is_sorted = matrix_properties.find(oneapi::mkl::sparse::matrix_property::sorted) != + matrix_properties.cend(); + const bool is_symmetric = + matrix_properties.find(oneapi::mkl::sparse::matrix_property::symmetric) != + matrix_properties.cend(); + + // Input matrix + std::vector ia_host, ja_host; + std::vector a_host; + intType nnz = + generate_random_matrix(format, nrows_A, ncols_A, density_A_matrix, + indexing, ia_host, ja_host, a_host, is_symmetric); + + // Input and output dense vectors + // The input `x` and the input-output `y` are both initialized to random values on host and device. + std::vector x_host, y_host; + rand_vector(x_host, opa_ncols); + rand_vector(y_host, opa_nrows); + std::vector y_ref_host(y_host); + + // Shuffle ordering of column indices/values to test sortedness + if (!is_sorted) { + shuffle_sparse_matrix(format, indexing, ia_host.data(), ja_host.data(), a_host.data(), nnz, + static_cast(nrows_A)); + } + + auto ia_usm_uptr = malloc_device_uptr(main_queue, ia_host.size()); + auto ja_usm_uptr = malloc_device_uptr(main_queue, ja_host.size()); + auto a_usm_uptr = malloc_device_uptr(main_queue, a_host.size()); + auto x_usm_uptr = malloc_device_uptr(main_queue, x_host.size()); + auto y_usm_uptr = malloc_device_uptr(main_queue, y_host.size()); + + intType *ia_usm = ia_usm_uptr.get(); + intType *ja_usm = ja_usm_uptr.get(); + fpType *a_usm = a_usm_uptr.get(); + fpType *x_usm = x_usm_uptr.get(); + fpType *y_usm = y_usm_uptr.get(); + + std::vector mat_dependencies; + std::vector spmv_dependencies; + // Copy host to device + mat_dependencies.push_back( + main_queue.memcpy(ia_usm, ia_host.data(), ia_host.size() * sizeof(intType))); + mat_dependencies.push_back( + main_queue.memcpy(ja_usm, ja_host.data(), ja_host.size() * sizeof(intType))); + mat_dependencies.push_back( + main_queue.memcpy(a_usm, a_host.data(), a_host.size() * sizeof(fpType))); + spmv_dependencies.push_back( + main_queue.memcpy(x_usm, x_host.data(), x_host.size() * sizeof(fpType))); + spmv_dependencies.push_back( + main_queue.memcpy(y_usm, y_host.data(), y_host.size() * sizeof(fpType))); + + sycl::event ev_copy, ev_spmv; + oneapi::mkl::sparse::matrix_handle_t A_handle = nullptr; + oneapi::mkl::sparse::dense_vector_handle_t x_handle = nullptr; + oneapi::mkl::sparse::dense_vector_handle_t y_handle = nullptr; + oneapi::mkl::sparse::spmv_descr_t descr = nullptr; + std::unique_ptr workspace_usm(nullptr, UsmDeleter(main_queue)); + try { + init_sparse_matrix(main_queue, format, &A_handle, nrows_A, ncols_A, nnz, index, ia_usm, + ja_usm, a_usm); + for (auto property : matrix_properties) { + CALL_RT_OR_CT(oneapi::mkl::sparse::set_matrix_property, main_queue, A_handle, property); + } + CALL_RT_OR_CT(oneapi::mkl::sparse::init_dense_vector, main_queue, &x_handle, + static_cast(x_host.size()), x_usm); + CALL_RT_OR_CT(oneapi::mkl::sparse::init_dense_vector, main_queue, &y_handle, + static_cast(y_host.size()), y_usm); + + CALL_RT_OR_CT(oneapi::mkl::sparse::init_spmv_descr, main_queue, &descr); + + std::size_t workspace_size = 0; + CALL_RT_OR_CT(oneapi::mkl::sparse::spmv_buffer_size, main_queue, transpose_val, &alpha, + A_view, A_handle, x_handle, &beta, y_handle, alg, descr, workspace_size); + workspace_usm = malloc_device_uptr(main_queue, workspace_size); + + sycl::event ev_opt; + CALL_RT_OR_CT(ev_opt = oneapi::mkl::sparse::spmv_optimize, main_queue, transpose_val, + &alpha, A_view, A_handle, x_handle, &beta, y_handle, alg, descr, + workspace_usm.get(), mat_dependencies); + + spmv_dependencies.push_back(ev_opt); + CALL_RT_OR_CT(ev_spmv = oneapi::mkl::sparse::spmv, main_queue, transpose_val, &alpha, + A_view, A_handle, x_handle, &beta, y_handle, alg, descr, spmv_dependencies); + + if (reset_data) { + intType reset_nnz = generate_random_matrix( + format, nrows_A, ncols_A, density_A_matrix, indexing, ia_host, ja_host, a_host, + is_symmetric); + if (!is_sorted) { + shuffle_sparse_matrix(format, indexing, ia_host.data(), ja_host.data(), + a_host.data(), nnz, static_cast(nrows_A)); + } + if (reset_nnz > nnz) { + ia_usm_uptr = malloc_device_uptr(main_queue, ia_host.size()); + ja_usm_uptr = malloc_device_uptr(main_queue, ja_host.size()); + a_usm_uptr = malloc_device_uptr(main_queue, a_host.size()); + ia_usm = ia_usm_uptr.get(); + ja_usm = ja_usm_uptr.get(); + a_usm = a_usm_uptr.get(); + } + nnz = reset_nnz; + + mat_dependencies.clear(); + mat_dependencies.push_back(main_queue.memcpy( + ia_usm, ia_host.data(), ia_host.size() * sizeof(intType), ev_spmv)); + mat_dependencies.push_back(main_queue.memcpy( + ja_usm, ja_host.data(), ja_host.size() * sizeof(intType), ev_spmv)); + mat_dependencies.push_back( + main_queue.memcpy(a_usm, a_host.data(), a_host.size() * sizeof(fpType), ev_spmv)); + mat_dependencies.push_back( + main_queue.memcpy(y_usm, y_host.data(), y_host.size() * sizeof(fpType), ev_spmv)); + set_matrix_data(main_queue, format, A_handle, nrows_A, ncols_A, nnz, index, ia_usm, + ja_usm, a_usm); + + std::size_t workspace_size_2 = 0; + CALL_RT_OR_CT(oneapi::mkl::sparse::spmv_buffer_size, main_queue, transpose_val, &alpha, + A_view, A_handle, x_handle, &beta, y_handle, alg, descr, + workspace_size_2); + if (workspace_size_2 > workspace_size) { + workspace_usm = malloc_device_uptr(main_queue, workspace_size_2); + } + + CALL_RT_OR_CT(ev_opt = oneapi::mkl::sparse::spmv_optimize, main_queue, transpose_val, + &alpha, A_view, A_handle, x_handle, &beta, y_handle, alg, descr, + workspace_usm.get(), mat_dependencies); + + CALL_RT_OR_CT(ev_spmv = oneapi::mkl::sparse::spmv, main_queue, transpose_val, &alpha, + A_view, A_handle, x_handle, &beta, y_handle, alg, descr, { ev_opt }); + } + + ev_copy = main_queue.memcpy(y_host.data(), y_usm, y_host.size() * sizeof(fpType), ev_spmv); + } + catch (const sycl::exception &e) { + std::cout << "Caught synchronous SYCL exception during sparse SPMV:\n" + << e.what() << std::endl; + print_error_code(e); + return 0; + } + catch (const oneapi::mkl::unimplemented &e) { + wait_and_free_handles(main_queue, A_handle, x_handle, y_handle); + if (descr) { + sycl::event ev_release_descr; + CALL_RT_OR_CT(ev_release_descr = oneapi::mkl::sparse::release_spmv_descr, main_queue, + descr); + ev_release_descr.wait(); + } + return test_skipped; + } + catch (const std::runtime_error &error) { + std::cout << "Error raised during execution of sparse SPMV:\n" << error.what() << std::endl; + return 0; + } + sycl::event ev_release_descr; + CALL_RT_OR_CT(ev_release_descr = oneapi::mkl::sparse::release_spmv_descr, main_queue, descr, + { ev_spmv }); + ev_release_descr.wait_and_throw(); + free_handles(main_queue, { ev_spmv }, A_handle, x_handle, y_handle); + + // Compute reference. + prepare_reference_spmv_data(format, ia_host.data(), ja_host.data(), a_host.data(), nrows_A, + ncols_A, nnz, indexing, transpose_val, alpha, beta, x_host.data(), + A_view, y_ref_host.data()); + + // Compare the results of reference implementation and DPC++ implementation. + ev_copy.wait_and_throw(); + bool valid = check_equal_vector(y_host, y_ref_host); + + return static_cast(valid); +} + +class SparseSpmvUsmTests : public ::testing::TestWithParam {}; + +TEST_P(SparseSpmvUsmTests, RealSinglePrecision) { + using fpType = float; + int num_passed = 0, num_skipped = 0; + test_helper(test_spmv, test_spmv, GetParam(), + oneapi::mkl::transpose::nontrans, num_passed, num_skipped); + test_helper(test_spmv, test_spmv, GetParam(), + oneapi::mkl::transpose::trans, num_passed, num_skipped); + if (num_skipped > 0) { + // Mark that some tests were skipped + GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped + << " configurations." << std::endl; + } +} + +TEST_P(SparseSpmvUsmTests, RealDoublePrecision) { + using fpType = double; + CHECK_DOUBLE_ON_DEVICE(GetParam()); + int num_passed = 0, num_skipped = 0; + test_helper(test_spmv, test_spmv, GetParam(), + oneapi::mkl::transpose::nontrans, num_passed, num_skipped); + test_helper(test_spmv, test_spmv, GetParam(), + oneapi::mkl::transpose::trans, num_passed, num_skipped); + if (num_skipped > 0) { + // Mark that some tests were skipped + GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped + << " configurations." << std::endl; + } +} + +TEST_P(SparseSpmvUsmTests, ComplexSinglePrecision) { + using fpType = std::complex; + int num_passed = 0, num_skipped = 0; + test_helper(test_spmv, test_spmv, GetParam(), + oneapi::mkl::transpose::nontrans, num_passed, num_skipped); + test_helper(test_spmv, test_spmv, GetParam(), + oneapi::mkl::transpose::trans, num_passed, num_skipped); + test_helper(test_spmv, test_spmv, GetParam(), + oneapi::mkl::transpose::conjtrans, num_passed, num_skipped); + if (num_skipped > 0) { + // Mark that some tests were skipped + GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped + << " configurations." << std::endl; + } +} + +TEST_P(SparseSpmvUsmTests, ComplexDoublePrecision) { + using fpType = std::complex; + CHECK_DOUBLE_ON_DEVICE(GetParam()); + int num_passed = 0, num_skipped = 0; + test_helper(test_spmv, test_spmv, GetParam(), + oneapi::mkl::transpose::nontrans, num_passed, num_skipped); + test_helper(test_spmv, test_spmv, GetParam(), + oneapi::mkl::transpose::trans, num_passed, num_skipped); + test_helper(test_spmv, test_spmv, GetParam(), + oneapi::mkl::transpose::conjtrans, num_passed, num_skipped); + if (num_skipped > 0) { + // Mark that some tests were skipped + GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped + << " configurations." << std::endl; + } +} + +INSTANTIATE_TEST_SUITE_P(SparseSpmvUsmTestSuite, SparseSpmvUsmTests, testing::ValuesIn(devices), + ::DeviceNamePrint()); + +} // anonymous namespace diff --git a/tests/unit_tests/sparse_blas/source/sparse_spsv_buffer.cpp b/tests/unit_tests/sparse_blas/source/sparse_spsv_buffer.cpp new file mode 100644 index 000000000..1878b179f --- /dev/null +++ b/tests/unit_tests/sparse_blas/source/sparse_spsv_buffer.cpp @@ -0,0 +1,242 @@ +/******************************************************************************* +* Copyright 2024 Intel Corporation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions +* and limitations under the License. +* +* +* SPDX-License-Identifier: Apache-2.0 +*******************************************************************************/ + +#include +#include +#include + +#include "test_spsv.hpp" + +extern std::vector devices; + +namespace { + +template +int test_spsv(sycl::device *dev, sparse_matrix_format_t format, intType m, double density_A_matrix, + oneapi::mkl::index_base index, oneapi::mkl::transpose transpose_val, fpType alpha, + oneapi::mkl::sparse::spsv_alg alg, oneapi::mkl::sparse::matrix_view A_view, + const std::set &matrix_properties, + bool reset_data) { + sycl::queue main_queue(*dev, exception_handler_t()); + + intType indexing = (index == oneapi::mkl::index_base::zero) ? 0 : 1; + const std::size_t mu = static_cast(m); + const bool is_sorted = matrix_properties.find(oneapi::mkl::sparse::matrix_property::sorted) != + matrix_properties.cend(); + const bool is_symmetric = + matrix_properties.find(oneapi::mkl::sparse::matrix_property::symmetric) != + matrix_properties.cend(); + + // Input matrix + std::vector ia_host, ja_host; + std::vector a_host; + // Set non-zero values to the diagonal, except if the matrix is viewed as a unit matrix. + const bool require_diagonal = + !(A_view.type_view == oneapi::mkl::sparse::matrix_descr::diagonal && + A_view.diag_view == oneapi::mkl::diag::unit); + intType nnz = + generate_random_matrix(format, m, m, density_A_matrix, indexing, ia_host, + ja_host, a_host, is_symmetric, require_diagonal); + + // Input dense vector. + // The input `x` is initialized to random values on host and device. + std::vector x_host; + rand_vector(x_host, mu); + + // Output and reference dense vectors. + // They are both initialized with a dummy value to catch more errors. + std::vector y_host(mu, -2.0f); + std::vector y_ref_host(y_host); + + // Shuffle ordering of column indices/values to test sortedness + if (!is_sorted) { + shuffle_sparse_matrix(format, indexing, ia_host.data(), ja_host.data(), a_host.data(), nnz, + mu); + } + + auto ia_buf = make_buffer(ia_host); + auto ja_buf = make_buffer(ja_host); + auto a_buf = make_buffer(a_host); + auto x_buf = make_buffer(x_host); + auto y_buf = make_buffer(y_host); + + oneapi::mkl::sparse::matrix_handle_t A_handle = nullptr; + oneapi::mkl::sparse::dense_vector_handle_t x_handle = nullptr; + oneapi::mkl::sparse::dense_vector_handle_t y_handle = nullptr; + oneapi::mkl::sparse::spsv_descr_t descr = nullptr; + try { + init_sparse_matrix(main_queue, format, &A_handle, m, m, nnz, index, ia_buf, ja_buf, a_buf); + for (auto property : matrix_properties) { + CALL_RT_OR_CT(oneapi::mkl::sparse::set_matrix_property, main_queue, A_handle, property); + } + CALL_RT_OR_CT(oneapi::mkl::sparse::init_dense_vector, main_queue, &x_handle, m, x_buf); + CALL_RT_OR_CT(oneapi::mkl::sparse::init_dense_vector, main_queue, &y_handle, m, y_buf); + + CALL_RT_OR_CT(oneapi::mkl::sparse::init_spsv_descr, main_queue, &descr); + + std::size_t workspace_size = 0; + CALL_RT_OR_CT(oneapi::mkl::sparse::spsv_buffer_size, main_queue, transpose_val, &alpha, + A_view, A_handle, x_handle, y_handle, alg, descr, workspace_size); + sycl::buffer workspace_buf((sycl::range<1>(workspace_size))); + + CALL_RT_OR_CT(oneapi::mkl::sparse::spsv_optimize, main_queue, transpose_val, &alpha, A_view, + A_handle, x_handle, y_handle, alg, descr, workspace_buf); + + CALL_RT_OR_CT(oneapi::mkl::sparse::spsv, main_queue, transpose_val, &alpha, A_view, + A_handle, x_handle, y_handle, alg, descr); + + if (reset_data) { + intType reset_nnz = generate_random_matrix( + format, m, m, density_A_matrix, indexing, ia_host, ja_host, a_host, is_symmetric, + require_diagonal); + if (!is_sorted) { + shuffle_sparse_matrix(format, indexing, ia_host.data(), ja_host.data(), + a_host.data(), nnz, mu); + } + if (reset_nnz > nnz) { + ia_buf = make_buffer(ia_host); + ja_buf = make_buffer(ja_host); + a_buf = make_buffer(a_host); + } + else { + copy_host_to_buffer(main_queue, ia_host, ia_buf); + copy_host_to_buffer(main_queue, ja_host, ja_buf); + copy_host_to_buffer(main_queue, a_host, a_buf); + } + fill_buffer_to_0(main_queue, y_buf); + nnz = reset_nnz; + set_matrix_data(main_queue, format, A_handle, m, m, nnz, index, ia_buf, ja_buf, a_buf); + + std::size_t workspace_size_2 = 0; + CALL_RT_OR_CT(oneapi::mkl::sparse::spsv_buffer_size, main_queue, transpose_val, &alpha, + A_view, A_handle, x_handle, y_handle, alg, descr, workspace_size_2); + if (workspace_size_2 > workspace_size) { + workspace_buf = sycl::buffer((sycl::range<1>(workspace_size_2))); + } + + CALL_RT_OR_CT(oneapi::mkl::sparse::spsv_optimize, main_queue, transpose_val, &alpha, + A_view, A_handle, x_handle, y_handle, alg, descr, workspace_buf); + + CALL_RT_OR_CT(oneapi::mkl::sparse::spsv, main_queue, transpose_val, &alpha, A_view, + A_handle, x_handle, y_handle, alg, descr); + } + } + catch (const sycl::exception &e) { + std::cout << "Caught synchronous SYCL exception during sparse SPSV:\n" + << e.what() << std::endl; + print_error_code(e); + return 0; + } + catch (const oneapi::mkl::unimplemented &e) { + wait_and_free_handles(main_queue, A_handle, x_handle, y_handle); + if (descr) { + sycl::event ev_release_descr; + CALL_RT_OR_CT(ev_release_descr = oneapi::mkl::sparse::release_spsv_descr, main_queue, + descr); + ev_release_descr.wait(); + } + return test_skipped; + } + catch (const std::runtime_error &error) { + std::cout << "Error raised during execution of sparse SPSV:\n" << error.what() << std::endl; + return 0; + } + CALL_RT_OR_CT(oneapi::mkl::sparse::release_spsv_descr, main_queue, descr); + free_handles(main_queue, A_handle, x_handle, y_handle); + + // Compute reference. + prepare_reference_spsv_data(format, ia_host.data(), ja_host.data(), a_host.data(), m, nnz, + indexing, transpose_val, x_host.data(), alpha, A_view, + y_ref_host.data()); + + // Compare the results of reference implementation and DPC++ implementation. + auto y_acc = y_buf.template get_host_access(sycl::read_only); + bool valid = check_equal_vector(y_acc, y_ref_host); + + return static_cast(valid); +} + +class SparseSpsvBufferTests : public ::testing::TestWithParam {}; + +TEST_P(SparseSpsvBufferTests, RealSinglePrecision) { + using fpType = float; + int num_passed = 0, num_skipped = 0; + test_helper(test_spsv, test_spsv, GetParam(), + oneapi::mkl::transpose::nontrans, num_passed, num_skipped); + test_helper(test_spsv, test_spsv, GetParam(), + oneapi::mkl::transpose::trans, num_passed, num_skipped); + if (num_skipped > 0) { + // Mark that some tests were skipped + GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped + << " configurations." << std::endl; + } +} + +TEST_P(SparseSpsvBufferTests, RealDoublePrecision) { + using fpType = double; + CHECK_DOUBLE_ON_DEVICE(GetParam()); + int num_passed = 0, num_skipped = 0; + test_helper(test_spsv, test_spsv, GetParam(), + oneapi::mkl::transpose::nontrans, num_passed, num_skipped); + test_helper(test_spsv, test_spsv, GetParam(), + oneapi::mkl::transpose::trans, num_passed, num_skipped); + if (num_skipped > 0) { + // Mark that some tests were skipped + GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped + << " configurations." << std::endl; + } +} + +TEST_P(SparseSpsvBufferTests, ComplexSinglePrecision) { + using fpType = std::complex; + int num_passed = 0, num_skipped = 0; + test_helper(test_spsv, test_spsv, GetParam(), + oneapi::mkl::transpose::nontrans, num_passed, num_skipped); + test_helper(test_spsv, test_spsv, GetParam(), + oneapi::mkl::transpose::trans, num_passed, num_skipped); + test_helper(test_spsv, test_spsv, GetParam(), + oneapi::mkl::transpose::conjtrans, num_passed, num_skipped); + if (num_skipped > 0) { + // Mark that some tests were skipped + GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped + << " configurations." << std::endl; + } +} + +TEST_P(SparseSpsvBufferTests, ComplexDoublePrecision) { + using fpType = std::complex; + CHECK_DOUBLE_ON_DEVICE(GetParam()); + int num_passed = 0, num_skipped = 0; + test_helper(test_spsv, test_spsv, GetParam(), + oneapi::mkl::transpose::nontrans, num_passed, num_skipped); + test_helper(test_spsv, test_spsv, GetParam(), + oneapi::mkl::transpose::trans, num_passed, num_skipped); + test_helper(test_spsv, test_spsv, GetParam(), + oneapi::mkl::transpose::conjtrans, num_passed, num_skipped); + if (num_skipped > 0) { + // Mark that some tests were skipped + GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped + << " configurations." << std::endl; + } +} + +INSTANTIATE_TEST_SUITE_P(SparseSpsvBufferTestSuite, SparseSpsvBufferTests, + testing::ValuesIn(devices), ::DeviceNamePrint()); + +} // anonymous namespace diff --git a/tests/unit_tests/sparse_blas/source/sparse_spsv_usm.cpp b/tests/unit_tests/sparse_blas/source/sparse_spsv_usm.cpp new file mode 100644 index 000000000..f6d4f5db7 --- /dev/null +++ b/tests/unit_tests/sparse_blas/source/sparse_spsv_usm.cpp @@ -0,0 +1,280 @@ +/******************************************************************************* +* Copyright 2024 Intel Corporation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions +* and limitations under the License. +* +* +* SPDX-License-Identifier: Apache-2.0 +*******************************************************************************/ + +#include +#include +#include + +#include "test_spsv.hpp" + +extern std::vector devices; + +namespace { + +template +int test_spsv(sycl::device *dev, sparse_matrix_format_t format, intType m, double density_A_matrix, + oneapi::mkl::index_base index, oneapi::mkl::transpose transpose_val, fpType alpha, + oneapi::mkl::sparse::spsv_alg alg, oneapi::mkl::sparse::matrix_view A_view, + const std::set &matrix_properties, + bool reset_data) { + sycl::queue main_queue(*dev, exception_handler_t()); + + intType indexing = (index == oneapi::mkl::index_base::zero) ? 0 : 1; + const std::size_t mu = static_cast(m); + const bool is_sorted = matrix_properties.find(oneapi::mkl::sparse::matrix_property::sorted) != + matrix_properties.cend(); + const bool is_symmetric = + matrix_properties.find(oneapi::mkl::sparse::matrix_property::symmetric) != + matrix_properties.cend(); + + // Input matrix + std::vector ia_host, ja_host; + std::vector a_host; + // Set non-zero values to the diagonal, except if the matrix is viewed as a unit matrix. + const bool require_diagonal = + !(A_view.type_view == oneapi::mkl::sparse::matrix_descr::diagonal && + A_view.diag_view == oneapi::mkl::diag::unit); + intType nnz = + generate_random_matrix(format, m, m, density_A_matrix, indexing, ia_host, + ja_host, a_host, is_symmetric, require_diagonal); + + // Input dense vector. + // The input `x` is initialized to random values on host and device. + std::vector x_host; + rand_vector(x_host, mu); + + // Output and reference dense vectors. + // They are both initialized with a dummy value to catch more errors. + std::vector y_host(mu, -2.0f); + std::vector y_ref_host(y_host); + + // Shuffle ordering of column indices/values to test sortedness + if (!is_sorted) { + shuffle_sparse_matrix(format, indexing, ia_host.data(), ja_host.data(), a_host.data(), nnz, + mu); + } + + auto ia_usm_uptr = malloc_device_uptr(main_queue, ia_host.size()); + auto ja_usm_uptr = malloc_device_uptr(main_queue, ja_host.size()); + auto a_usm_uptr = malloc_device_uptr(main_queue, a_host.size()); + auto x_usm_uptr = malloc_device_uptr(main_queue, x_host.size()); + auto y_usm_uptr = malloc_device_uptr(main_queue, y_host.size()); + + intType *ia_usm = ia_usm_uptr.get(); + intType *ja_usm = ja_usm_uptr.get(); + fpType *a_usm = a_usm_uptr.get(); + fpType *x_usm = x_usm_uptr.get(); + fpType *y_usm = y_usm_uptr.get(); + + std::vector mat_dependencies; + std::vector spsv_dependencies; + // Copy host to device + mat_dependencies.push_back( + main_queue.memcpy(ia_usm, ia_host.data(), ia_host.size() * sizeof(intType))); + mat_dependencies.push_back( + main_queue.memcpy(ja_usm, ja_host.data(), ja_host.size() * sizeof(intType))); + mat_dependencies.push_back( + main_queue.memcpy(a_usm, a_host.data(), a_host.size() * sizeof(fpType))); + spsv_dependencies.push_back( + main_queue.memcpy(x_usm, x_host.data(), x_host.size() * sizeof(fpType))); + spsv_dependencies.push_back( + main_queue.memcpy(y_usm, y_host.data(), y_host.size() * sizeof(fpType))); + + sycl::event ev_copy, ev_spsv; + oneapi::mkl::sparse::matrix_handle_t A_handle = nullptr; + oneapi::mkl::sparse::dense_vector_handle_t x_handle = nullptr; + oneapi::mkl::sparse::dense_vector_handle_t y_handle = nullptr; + oneapi::mkl::sparse::spsv_descr_t descr = nullptr; + std::unique_ptr workspace_usm(nullptr, UsmDeleter(main_queue)); + try { + init_sparse_matrix(main_queue, format, &A_handle, m, m, nnz, index, ia_usm, ja_usm, a_usm); + for (auto property : matrix_properties) { + CALL_RT_OR_CT(oneapi::mkl::sparse::set_matrix_property, main_queue, A_handle, property); + } + CALL_RT_OR_CT(oneapi::mkl::sparse::init_dense_vector, main_queue, &x_handle, m, x_usm); + CALL_RT_OR_CT(oneapi::mkl::sparse::init_dense_vector, main_queue, &y_handle, m, y_usm); + + CALL_RT_OR_CT(oneapi::mkl::sparse::init_spsv_descr, main_queue, &descr); + + std::size_t workspace_size = 0; + CALL_RT_OR_CT(oneapi::mkl::sparse::spsv_buffer_size, main_queue, transpose_val, &alpha, + A_view, A_handle, x_handle, y_handle, alg, descr, workspace_size); + workspace_usm = malloc_device_uptr(main_queue, workspace_size); + + sycl::event ev_opt; + CALL_RT_OR_CT(ev_opt = oneapi::mkl::sparse::spsv_optimize, main_queue, transpose_val, + &alpha, A_view, A_handle, x_handle, y_handle, alg, descr, workspace_usm.get(), + mat_dependencies); + + spsv_dependencies.push_back(ev_opt); + CALL_RT_OR_CT(ev_spsv = oneapi::mkl::sparse::spsv, main_queue, transpose_val, &alpha, + A_view, A_handle, x_handle, y_handle, alg, descr, spsv_dependencies); + + if (reset_data) { + intType reset_nnz = generate_random_matrix( + format, m, m, density_A_matrix, indexing, ia_host, ja_host, a_host, is_symmetric, + require_diagonal); + if (!is_sorted) { + shuffle_sparse_matrix(format, indexing, ia_host.data(), ja_host.data(), + a_host.data(), nnz, mu); + } + if (reset_nnz > nnz) { + ia_usm_uptr = malloc_device_uptr(main_queue, ia_host.size()); + ja_usm_uptr = malloc_device_uptr(main_queue, ja_host.size()); + a_usm_uptr = malloc_device_uptr(main_queue, a_host.size()); + ia_usm = ia_usm_uptr.get(); + ja_usm = ja_usm_uptr.get(); + a_usm = a_usm_uptr.get(); + } + nnz = reset_nnz; + + mat_dependencies.clear(); + mat_dependencies.push_back(main_queue.memcpy( + ia_usm, ia_host.data(), ia_host.size() * sizeof(intType), ev_spsv)); + mat_dependencies.push_back(main_queue.memcpy( + ja_usm, ja_host.data(), ja_host.size() * sizeof(intType), ev_spsv)); + mat_dependencies.push_back( + main_queue.memcpy(a_usm, a_host.data(), a_host.size() * sizeof(fpType), ev_spsv)); + mat_dependencies.push_back( + main_queue.memcpy(y_usm, y_host.data(), y_host.size() * sizeof(fpType), ev_spsv)); + set_matrix_data(main_queue, format, A_handle, m, m, nnz, index, ia_usm, ja_usm, a_usm); + + std::size_t workspace_size_2 = 0; + CALL_RT_OR_CT(oneapi::mkl::sparse::spsv_buffer_size, main_queue, transpose_val, &alpha, + A_view, A_handle, x_handle, y_handle, alg, descr, workspace_size_2); + if (workspace_size_2 > workspace_size) { + workspace_usm = malloc_device_uptr(main_queue, workspace_size_2); + } + + CALL_RT_OR_CT(ev_opt = oneapi::mkl::sparse::spsv_optimize, main_queue, transpose_val, + &alpha, A_view, A_handle, x_handle, y_handle, alg, descr, + workspace_usm.get(), mat_dependencies); + + CALL_RT_OR_CT(ev_spsv = oneapi::mkl::sparse::spsv, main_queue, transpose_val, &alpha, + A_view, A_handle, x_handle, y_handle, alg, descr, { ev_opt }); + } + + ev_copy = main_queue.memcpy(y_host.data(), y_usm, y_host.size() * sizeof(fpType), ev_spsv); + } + catch (const sycl::exception &e) { + std::cout << "Caught synchronous SYCL exception during sparse SPSV:\n" + << e.what() << std::endl; + print_error_code(e); + return 0; + } + catch (const oneapi::mkl::unimplemented &e) { + wait_and_free_handles(main_queue, A_handle, x_handle, y_handle); + if (descr) { + sycl::event ev_release_descr; + CALL_RT_OR_CT(ev_release_descr = oneapi::mkl::sparse::release_spsv_descr, main_queue, + descr); + ev_release_descr.wait(); + } + return test_skipped; + } + catch (const std::runtime_error &error) { + std::cout << "Error raised during execution of sparse SPSV:\n" << error.what() << std::endl; + return 0; + } + sycl::event ev_release_descr; + CALL_RT_OR_CT(ev_release_descr = oneapi::mkl::sparse::release_spsv_descr, main_queue, descr, + { ev_spsv }); + ev_release_descr.wait_and_throw(); + free_handles(main_queue, { ev_spsv }, A_handle, x_handle, y_handle); + + // Compute reference. + prepare_reference_spsv_data(format, ia_host.data(), ja_host.data(), a_host.data(), m, nnz, + indexing, transpose_val, x_host.data(), alpha, A_view, + y_ref_host.data()); + + // Compare the results of reference implementation and DPC++ implementation. + ev_copy.wait_and_throw(); + bool valid = check_equal_vector(y_host, y_ref_host); + + return static_cast(valid); +} + +class SparseSpsvUsmTests : public ::testing::TestWithParam {}; + +TEST_P(SparseSpsvUsmTests, RealSinglePrecision) { + using fpType = float; + int num_passed = 0, num_skipped = 0; + test_helper(test_spsv, test_spsv, GetParam(), + oneapi::mkl::transpose::nontrans, num_passed, num_skipped); + test_helper(test_spsv, test_spsv, GetParam(), + oneapi::mkl::transpose::trans, num_passed, num_skipped); + if (num_skipped > 0) { + // Mark that some tests were skipped + GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped + << " configurations." << std::endl; + } +} + +TEST_P(SparseSpsvUsmTests, RealDoublePrecision) { + using fpType = double; + CHECK_DOUBLE_ON_DEVICE(GetParam()); + int num_passed = 0, num_skipped = 0; + test_helper(test_spsv, test_spsv, GetParam(), + oneapi::mkl::transpose::nontrans, num_passed, num_skipped); + test_helper(test_spsv, test_spsv, GetParam(), + oneapi::mkl::transpose::trans, num_passed, num_skipped); + if (num_skipped > 0) { + // Mark that some tests were skipped + GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped + << " configurations." << std::endl; + } +} + +TEST_P(SparseSpsvUsmTests, ComplexSinglePrecision) { + using fpType = std::complex; + int num_passed = 0, num_skipped = 0; + test_helper(test_spsv, test_spsv, GetParam(), + oneapi::mkl::transpose::nontrans, num_passed, num_skipped); + test_helper(test_spsv, test_spsv, GetParam(), + oneapi::mkl::transpose::trans, num_passed, num_skipped); + test_helper(test_spsv, test_spsv, GetParam(), + oneapi::mkl::transpose::conjtrans, num_passed, num_skipped); + if (num_skipped > 0) { + // Mark that some tests were skipped + GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped + << " configurations." << std::endl; + } +} + +TEST_P(SparseSpsvUsmTests, ComplexDoublePrecision) { + using fpType = std::complex; + CHECK_DOUBLE_ON_DEVICE(GetParam()); + int num_passed = 0, num_skipped = 0; + test_helper(test_spsv, test_spsv, GetParam(), + oneapi::mkl::transpose::nontrans, num_passed, num_skipped); + test_helper(test_spsv, test_spsv, GetParam(), + oneapi::mkl::transpose::trans, num_passed, num_skipped); + test_helper(test_spsv, test_spsv, GetParam(), + oneapi::mkl::transpose::conjtrans, num_passed, num_skipped); + if (num_skipped > 0) { + // Mark that some tests were skipped + GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped + << " configurations." << std::endl; + } +} + +INSTANTIATE_TEST_SUITE_P(SparseSpsvUsmTestSuite, SparseSpsvUsmTests, testing::ValuesIn(devices), + ::DeviceNamePrint()); + +} // anonymous namespace diff --git a/tests/unit_tests/sparse_blas/source/sparse_trsv_buffer.cpp b/tests/unit_tests/sparse_blas/source/sparse_trsv_buffer.cpp deleted file mode 100644 index 00ec6e5ed..000000000 --- a/tests/unit_tests/sparse_blas/source/sparse_trsv_buffer.cpp +++ /dev/null @@ -1,240 +0,0 @@ -/******************************************************************************* -* Copyright 2023 Intel Corporation -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, -* software distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions -* and limitations under the License. -* -* -* SPDX-License-Identifier: Apache-2.0 -*******************************************************************************/ - -#include -#include -#include - -#if __has_include() -#include -#else -#include -#endif - -#include "oneapi/mkl.hpp" -#include "oneapi/mkl/detail/config.hpp" -#include "sparse_reference.hpp" -#include "test_common.hpp" -#include "test_helper.hpp" - -#include - -extern std::vector devices; - -namespace { - -template -int test(sycl::device *dev, intType m, double density_A_matrix, oneapi::mkl::index_base index, - oneapi::mkl::uplo uplo_val, oneapi::mkl::transpose transpose_val, - oneapi::mkl::diag diag_val, bool use_optimize) { - sycl::queue main_queue(*dev, exception_handler_t()); - - intType int_index = (index == oneapi::mkl::index_base::zero) ? 0 : 1; - const std::size_t mu = static_cast(m); - - // Input matrix in CSR format - std::vector ia_host, ja_host; - std::vector a_host; - // Always require values to be present in the diagonal of the sparse matrix. - // The values set in the matrix don't need to be 1s even if diag_val is unit. - const bool require_diagonal = true; - intType nnz = generate_random_matrix( - m, m, density_A_matrix, int_index, ia_host, ja_host, a_host, require_diagonal); - - // Input dense vector. - // The input `x` is initialized to random values on host and device. - std::vector x_host; - rand_vector(x_host, mu); - - // Output and reference dense vectors. - // They are both initialized with a dummy value to catch more errors. - std::vector y_host(mu, -2.0f); - std::vector y_ref_host(y_host); - - // Intel oneMKL does not support unsorted data if - // `sparse::optimize_trsv()` is not called first. - if (use_optimize) { - // Shuffle ordering of column indices/values to test sortedness - shuffle_data(ia_host.data(), ja_host.data(), a_host.data(), mu); - } - - auto ia_buf = make_buffer(ia_host); - auto ja_buf = make_buffer(ja_host); - auto a_buf = make_buffer(a_host); - auto x_buf = make_buffer(x_host); - auto y_buf = make_buffer(y_host); - - sycl::event ev_release; - oneapi::mkl::sparse::matrix_handle_t handle = nullptr; - try { - CALL_RT_OR_CT(oneapi::mkl::sparse::init_matrix_handle, main_queue, &handle); - - CALL_RT_OR_CT(oneapi::mkl::sparse::set_csr_data, main_queue, handle, m, m, nnz, index, - ia_buf, ja_buf, a_buf); - - if (use_optimize) { - CALL_RT_OR_CT(oneapi::mkl::sparse::optimize_trsv, main_queue, uplo_val, transpose_val, - diag_val, handle); - } - - CALL_RT_OR_CT(oneapi::mkl::sparse::trsv, main_queue, uplo_val, transpose_val, diag_val, - handle, x_buf, y_buf); - - CALL_RT_OR_CT(ev_release = oneapi::mkl::sparse::release_matrix_handle, main_queue, &handle); - } - catch (const sycl::exception &e) { - std::cout << "Caught synchronous SYCL exception during sparse TRSV:\n" - << e.what() << std::endl; - print_error_code(e); - return 0; - } - catch (const oneapi::mkl::unimplemented &e) { - wait_and_free(main_queue, &handle); - return test_skipped; - } - catch (const std::runtime_error &error) { - std::cout << "Error raised during execution of sparse TRSV:\n" << error.what() << std::endl; - return 0; - } - - // Compute reference. - prepare_reference_trsv_data(ia_host.data(), ja_host.data(), a_host.data(), m, int_index, - uplo_val, transpose_val, diag_val, x_host.data(), - y_ref_host.data()); - - // Compare the results of reference implementation and DPC++ implementation. - auto y_acc = y_buf.template get_host_access(sycl::read_only); - bool valid = check_equal_vector(y_acc, y_ref_host); - - ev_release.wait_and_throw(); - return static_cast(valid); -} - -class SparseTrsvBufferTests : public ::testing::TestWithParam {}; - -/** - * Helper function to run tests in different configuration. - * - * @tparam fpType Complex or scalar, single or double precision type - * @param dev Device to test - * @param transpose_val Transpose value for the input matrix - * @param num_passed Increase the number of configurations passed - * @param num_skipped Increase the number of configurations skipped - */ -template -auto test_helper(sycl::device *dev, oneapi::mkl::transpose transpose_val, int &num_passed, - int &num_skipped) { - double density_A_matrix = 0.144; - oneapi::mkl::index_base index_zero = oneapi::mkl::index_base::zero; - oneapi::mkl::uplo lower = oneapi::mkl::uplo::lower; - oneapi::mkl::diag nonunit = oneapi::mkl::diag::nonunit; - int m = 277; - bool use_optimize = true; - - // Basic test - EXPECT_TRUE_OR_FUTURE_SKIP(test(dev, m, density_A_matrix, index_zero, lower, - transpose_val, nonunit, use_optimize), - num_passed, num_skipped); - // Test index_base 1 - EXPECT_TRUE_OR_FUTURE_SKIP(test(dev, m, density_A_matrix, oneapi::mkl::index_base::one, - lower, transpose_val, nonunit, use_optimize), - num_passed, num_skipped); - // Test upper triangular matrix - EXPECT_TRUE_OR_FUTURE_SKIP( - test(dev, m, density_A_matrix, index_zero, oneapi::mkl::uplo::upper, transpose_val, - nonunit, use_optimize), - num_passed, num_skipped); - // Test unit diagonal matrix - EXPECT_TRUE_OR_FUTURE_SKIP(test(dev, m, density_A_matrix, index_zero, lower, - transpose_val, oneapi::mkl::diag::unit, use_optimize), - num_passed, num_skipped); - // Temporarily disable trsv using long indices on GPU - if (!dev->is_gpu()) { - // Test int64 indices - EXPECT_TRUE_OR_FUTURE_SKIP(test(dev, 15L, density_A_matrix, index_zero, lower, - transpose_val, nonunit, use_optimize), - num_passed, num_skipped); - } - // Test lower without optimize_trsv - EXPECT_TRUE_OR_FUTURE_SKIP( - test(dev, m, density_A_matrix, index_zero, lower, transpose_val, nonunit, false), - num_passed, num_skipped); - // Test upper without optimize_trsv - EXPECT_TRUE_OR_FUTURE_SKIP( - test(dev, m, density_A_matrix, index_zero, oneapi::mkl::uplo::upper, transpose_val, - nonunit, false), - num_passed, num_skipped); -} - -TEST_P(SparseTrsvBufferTests, RealSinglePrecision) { - using fpType = float; - int num_passed = 0, num_skipped = 0; - test_helper(GetParam(), oneapi::mkl::transpose::nontrans, num_passed, num_skipped); - test_helper(GetParam(), oneapi::mkl::transpose::trans, num_passed, num_skipped); - if (num_skipped > 0) { - // Mark that some tests were skipped - GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped - << " configurations." << std::endl; - } -} - -TEST_P(SparseTrsvBufferTests, RealDoublePrecision) { - using fpType = double; - CHECK_DOUBLE_ON_DEVICE(GetParam()); - int num_passed = 0, num_skipped = 0; - test_helper(GetParam(), oneapi::mkl::transpose::nontrans, num_passed, num_skipped); - test_helper(GetParam(), oneapi::mkl::transpose::trans, num_passed, num_skipped); - if (num_skipped > 0) { - // Mark that some tests were skipped - GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped - << " configurations." << std::endl; - } -} - -TEST_P(SparseTrsvBufferTests, ComplexSinglePrecision) { - using fpType = std::complex; - int num_passed = 0, num_skipped = 0; - test_helper(GetParam(), oneapi::mkl::transpose::nontrans, num_passed, num_skipped); - test_helper(GetParam(), oneapi::mkl::transpose::trans, num_passed, num_skipped); - test_helper(GetParam(), oneapi::mkl::transpose::conjtrans, num_passed, num_skipped); - if (num_skipped > 0) { - // Mark that some tests were skipped - GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped - << " configurations." << std::endl; - } -} - -TEST_P(SparseTrsvBufferTests, ComplexDoublePrecision) { - using fpType = std::complex; - CHECK_DOUBLE_ON_DEVICE(GetParam()); - int num_passed = 0, num_skipped = 0; - test_helper(GetParam(), oneapi::mkl::transpose::nontrans, num_passed, num_skipped); - test_helper(GetParam(), oneapi::mkl::transpose::trans, num_passed, num_skipped); - test_helper(GetParam(), oneapi::mkl::transpose::conjtrans, num_passed, num_skipped); - if (num_skipped > 0) { - // Mark that some tests were skipped - GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped - << " configurations." << std::endl; - } -} - -INSTANTIATE_TEST_SUITE_P(SparseTrsvBufferTestSuite, SparseTrsvBufferTests, - testing::ValuesIn(devices), ::DeviceNamePrint()); - -} // anonymous namespace diff --git a/tests/unit_tests/sparse_blas/source/sparse_trsv_usm.cpp b/tests/unit_tests/sparse_blas/source/sparse_trsv_usm.cpp deleted file mode 100644 index 8292395fb..000000000 --- a/tests/unit_tests/sparse_blas/source/sparse_trsv_usm.cpp +++ /dev/null @@ -1,261 +0,0 @@ -/******************************************************************************* -* Copyright 2023 Intel Corporation -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, -* software distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions -* and limitations under the License. -* -* -* SPDX-License-Identifier: Apache-2.0 -*******************************************************************************/ - -#include -#include -#include - -#if __has_include() -#include -#else -#include -#endif - -#include "oneapi/mkl.hpp" -#include "oneapi/mkl/detail/config.hpp" -#include "sparse_reference.hpp" -#include "test_common.hpp" -#include "test_helper.hpp" - -#include - -extern std::vector devices; - -namespace { - -template -int test(sycl::device *dev, intType m, double density_A_matrix, oneapi::mkl::index_base index, - oneapi::mkl::uplo uplo_val, oneapi::mkl::transpose transpose_val, - oneapi::mkl::diag diag_val, bool use_optimize) { - sycl::queue main_queue(*dev, exception_handler_t()); - - intType int_index = (index == oneapi::mkl::index_base::zero) ? 0 : 1; - const std::size_t mu = static_cast(m); - - // Input matrix in CSR format - std::vector ia_host, ja_host; - std::vector a_host; - const bool require_diagonal = diag_val == oneapi::mkl::diag::nonunit; - intType nnz = generate_random_matrix( - m, m, density_A_matrix, int_index, ia_host, ja_host, a_host, require_diagonal); - - // Input dense vector. - // The input `x` is initialized to random values on host and device. - std::vector x_host; - rand_vector(x_host, mu); - - // Output and reference dense vectors. - // They are both initialized with a dummy value to catch more errors. - std::vector y_host(mu, -2.0f); - std::vector y_ref_host(y_host); - - // Intel oneMKL does not support unsorted data if - // `sparse::optimize_trsv()` is not called first. - if (use_optimize) { - // Shuffle ordering of column indices/values to test sortedness - shuffle_data(ia_host.data(), ja_host.data(), a_host.data(), mu); - } - - auto ia_usm_uptr = malloc_device_uptr(main_queue, ia_host.size()); - auto ja_usm_uptr = malloc_device_uptr(main_queue, ja_host.size()); - auto a_usm_uptr = malloc_device_uptr(main_queue, a_host.size()); - auto x_usm_uptr = malloc_device_uptr(main_queue, x_host.size()); - auto y_usm_uptr = malloc_device_uptr(main_queue, y_host.size()); - - intType *ia_usm = ia_usm_uptr.get(); - intType *ja_usm = ja_usm_uptr.get(); - fpType *a_usm = a_usm_uptr.get(); - fpType *x_usm = x_usm_uptr.get(); - fpType *y_usm = y_usm_uptr.get(); - - std::vector mat_dependencies; - std::vector trsv_dependencies; - // Copy host to device - mat_dependencies.push_back( - main_queue.memcpy(ia_usm, ia_host.data(), ia_host.size() * sizeof(intType))); - mat_dependencies.push_back( - main_queue.memcpy(ja_usm, ja_host.data(), ja_host.size() * sizeof(intType))); - mat_dependencies.push_back( - main_queue.memcpy(a_usm, a_host.data(), a_host.size() * sizeof(fpType))); - trsv_dependencies.push_back( - main_queue.memcpy(x_usm, x_host.data(), x_host.size() * sizeof(fpType))); - trsv_dependencies.push_back( - main_queue.memcpy(y_usm, y_host.data(), y_host.size() * sizeof(fpType))); - - sycl::event ev_copy, ev_release; - oneapi::mkl::sparse::matrix_handle_t handle = nullptr; - try { - sycl::event event; - CALL_RT_OR_CT(oneapi::mkl::sparse::init_matrix_handle, main_queue, &handle); - - CALL_RT_OR_CT(event = oneapi::mkl::sparse::set_csr_data, main_queue, handle, m, m, nnz, - index, ia_usm, ja_usm, a_usm, mat_dependencies); - - if (use_optimize) { - CALL_RT_OR_CT(event = oneapi::mkl::sparse::optimize_trsv, main_queue, uplo_val, - transpose_val, diag_val, handle, { event }); - } - - trsv_dependencies.push_back(event); - CALL_RT_OR_CT(event = oneapi::mkl::sparse::trsv, main_queue, uplo_val, transpose_val, - diag_val, handle, x_usm, y_usm, trsv_dependencies); - - CALL_RT_OR_CT(ev_release = oneapi::mkl::sparse::release_matrix_handle, main_queue, &handle, - { event }); - - ev_copy = main_queue.memcpy(y_host.data(), y_usm, y_host.size() * sizeof(fpType), event); - } - catch (const sycl::exception &e) { - std::cout << "Caught synchronous SYCL exception during sparse TRSV:\n" - << e.what() << std::endl; - print_error_code(e); - return 0; - } - catch (const oneapi::mkl::unimplemented &e) { - wait_and_free(main_queue, &handle); - return test_skipped; - } - catch (const std::runtime_error &error) { - std::cout << "Error raised during execution of sparse TRSV:\n" << error.what() << std::endl; - return 0; - } - - // Compute reference. - prepare_reference_trsv_data(ia_host.data(), ja_host.data(), a_host.data(), m, int_index, - uplo_val, transpose_val, diag_val, x_host.data(), - y_ref_host.data()); - - // Compare the results of reference implementation and DPC++ implementation. - ev_copy.wait_and_throw(); - bool valid = check_equal_vector(y_host, y_ref_host); - - ev_release.wait_and_throw(); - return static_cast(valid); -} - -class SparseTrsvUsmTests : public ::testing::TestWithParam {}; - -/** - * Helper function to run tests in different configuration. - * - * @tparam fpType Complex or scalar, single or double precision type - * @param dev Device to test - * @param transpose_val Transpose value for the input matrix - */ -template -void test_helper(sycl::device *dev, oneapi::mkl::transpose transpose_val, int &num_passed, - int &num_skipped) { - double density_A_matrix = 0.144; - oneapi::mkl::index_base index_zero = oneapi::mkl::index_base::zero; - oneapi::mkl::uplo lower = oneapi::mkl::uplo::lower; - oneapi::mkl::diag nonunit = oneapi::mkl::diag::nonunit; - int m = 277; - bool use_optimize = true; - - // Basic test - EXPECT_TRUE_OR_FUTURE_SKIP(test(dev, m, density_A_matrix, index_zero, lower, - transpose_val, nonunit, use_optimize), - num_passed, num_skipped); - // Test index_base 1 - EXPECT_TRUE_OR_FUTURE_SKIP(test(dev, m, density_A_matrix, oneapi::mkl::index_base::one, - lower, transpose_val, nonunit, use_optimize), - num_passed, num_skipped); - // Test upper triangular matrix - EXPECT_TRUE_OR_FUTURE_SKIP( - test(dev, m, density_A_matrix, index_zero, oneapi::mkl::uplo::upper, transpose_val, - nonunit, use_optimize), - num_passed, num_skipped); - // Test unit diagonal matrix - EXPECT_TRUE_OR_FUTURE_SKIP(test(dev, m, density_A_matrix, index_zero, lower, - transpose_val, oneapi::mkl::diag::unit, use_optimize), - num_passed, num_skipped); - // Temporarily disable trsv using long indices on GPU - if (!dev->is_gpu()) { - // Test int64 indices - EXPECT_TRUE_OR_FUTURE_SKIP(test(dev, 15L, density_A_matrix, index_zero, lower, - transpose_val, nonunit, use_optimize), - num_passed, num_skipped); - } - // Test lower without optimize_trsv - EXPECT_TRUE_OR_FUTURE_SKIP( - test(dev, m, density_A_matrix, index_zero, lower, transpose_val, nonunit, false), - num_passed, num_skipped); - // Test upper without optimize_trsv - EXPECT_TRUE_OR_FUTURE_SKIP( - test(dev, m, density_A_matrix, index_zero, oneapi::mkl::uplo::upper, transpose_val, - nonunit, false), - num_passed, num_skipped); -} - -TEST_P(SparseTrsvUsmTests, RealSinglePrecision) { - using fpType = float; - int num_passed = 0, num_skipped = 0; - test_helper(GetParam(), oneapi::mkl::transpose::nontrans, num_passed, num_skipped); - test_helper(GetParam(), oneapi::mkl::transpose::trans, num_passed, num_skipped); - if (num_skipped > 0) { - // Mark that some tests were skipped - GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped - << " configurations." << std::endl; - } -} - -TEST_P(SparseTrsvUsmTests, RealDoublePrecision) { - using fpType = double; - CHECK_DOUBLE_ON_DEVICE(GetParam()); - int num_passed = 0, num_skipped = 0; - test_helper(GetParam(), oneapi::mkl::transpose::nontrans, num_passed, num_skipped); - test_helper(GetParam(), oneapi::mkl::transpose::trans, num_passed, num_skipped); - if (num_skipped > 0) { - // Mark that some tests were skipped - GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped - << " configurations." << std::endl; - } -} - -TEST_P(SparseTrsvUsmTests, ComplexSinglePrecision) { - using fpType = std::complex; - int num_passed = 0, num_skipped = 0; - test_helper(GetParam(), oneapi::mkl::transpose::nontrans, num_passed, num_skipped); - test_helper(GetParam(), oneapi::mkl::transpose::trans, num_passed, num_skipped); - test_helper(GetParam(), oneapi::mkl::transpose::conjtrans, num_passed, num_skipped); - if (num_skipped > 0) { - // Mark that some tests were skipped - GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped - << " configurations." << std::endl; - } -} - -TEST_P(SparseTrsvUsmTests, ComplexDoublePrecision) { - using fpType = std::complex; - CHECK_DOUBLE_ON_DEVICE(GetParam()); - int num_passed = 0, num_skipped = 0; - test_helper(GetParam(), oneapi::mkl::transpose::nontrans, num_passed, num_skipped); - test_helper(GetParam(), oneapi::mkl::transpose::trans, num_passed, num_skipped); - test_helper(GetParam(), oneapi::mkl::transpose::conjtrans, num_passed, num_skipped); - if (num_skipped > 0) { - // Mark that some tests were skipped - GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped - << " configurations." << std::endl; - } -} - -INSTANTIATE_TEST_SUITE_P(SparseTrsvUsmTestSuite, SparseTrsvUsmTests, testing::ValuesIn(devices), - ::DeviceNamePrint()); - -} // anonymous namespace From 3a967be15db13c299636ed1cf8d24fee0bbde142 Mon Sep 17 00:00:00 2001 From: "romain.biessy" Date: Fri, 7 Jun 2024 08:25:07 +0100 Subject: [PATCH 02/39] Do not allow changing data types of dense handles --- .../backends/mkl_common/mkl_handles.cxx | 36 ++++++++++++------- 1 file changed, 23 insertions(+), 13 deletions(-) diff --git a/src/sparse_blas/backends/mkl_common/mkl_handles.cxx b/src/sparse_blas/backends/mkl_common/mkl_handles.cxx index 38d102768..f3ff5afa2 100644 --- a/src/sparse_blas/backends/mkl_common/mkl_handles.cxx +++ b/src/sparse_blas/backends/mkl_common/mkl_handles.cxx @@ -32,10 +32,23 @@ void init_dense_vector(sycl::queue & /*queue*/, *p_dvhandle = new oneapi::mkl::sparse::dense_vector_handle(val, size); } +template +void check_can_reset_value_handle(const std::string &function_name, + InternalHandleT *internal_handle) { + if (internal_handle->get_value_type() != detail::get_data_type()) { + throw oneapi::mkl::invalid_argument( + "sparse_blas", function_name, + "Incompatible data types expected " + + data_type_to_str(internal_handle->get_value_type()) + " but got " + + data_type_to_str(detail::get_data_type())); + } +} + template void set_dense_vector_data(sycl::queue & /*queue*/, oneapi::mkl::sparse::dense_vector_handle_t dvhandle, std::int64_t size, sycl::buffer val) { + check_can_reset_value_handle(__FUNCTION__, dvhandle); dvhandle->size = size; dvhandle->set_buffer(val); } @@ -44,6 +57,7 @@ template void set_dense_vector_data(sycl::queue & /*queue*/, oneapi::mkl::sparse::dense_vector_handle_t dvhandle, std::int64_t size, fpType *val) { + check_can_reset_value_handle(__FUNCTION__, dvhandle); dvhandle->size = size; dvhandle->set_usm_ptr(val); } @@ -94,6 +108,7 @@ void set_dense_matrix_data(sycl::queue & /*queue*/, oneapi::mkl::sparse::dense_matrix_handle_t dmhandle, std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, oneapi::mkl::layout dense_layout, sycl::buffer val) { + check_can_reset_value_handle(__FUNCTION__, dmhandle); dmhandle->num_rows = num_rows; dmhandle->num_cols = num_cols; dmhandle->ld = ld; @@ -106,6 +121,7 @@ void set_dense_matrix_data(sycl::queue & /*queue*/, oneapi::mkl::sparse::dense_matrix_handle_t dmhandle, std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, oneapi::mkl::layout dense_layout, fpType *val) { + check_can_reset_value_handle(__FUNCTION__, dmhandle); dmhandle->num_rows = num_rows; dmhandle->num_cols = num_cols; dmhandle->ld = ld; @@ -173,15 +189,9 @@ void init_coo_matrix(sycl::queue &queue, oneapi::mkl::sparse::matrix_handle_t *p } template -void check_can_reset(const std::string &function_name, - detail::sparse_matrix_handle *internal_smhandle) { - if (internal_smhandle->get_value_type() != detail::get_data_type()) { - throw oneapi::mkl::invalid_argument( - "sparse_blas", function_name, - "Incompatible data types expected " + - data_type_to_str(internal_smhandle->get_value_type()) + " but got " + - data_type_to_str(detail::get_data_type())); - } +void check_can_reset_sparse_handle(const std::string &function_name, + detail::sparse_matrix_handle *internal_smhandle) { + check_can_reset_value_handle(function_name, internal_smhandle); if (internal_smhandle->get_int_type() != detail::get_data_type()) { throw oneapi::mkl::invalid_argument( "sparse_blas", function_name, @@ -202,7 +212,7 @@ void set_coo_matrix_data(sycl::queue &queue, oneapi::mkl::sparse::matrix_handle_ oneapi::mkl::index_base index, sycl::buffer row_ind, sycl::buffer col_ind, sycl::buffer val) { auto internal_smhandle = detail::get_internal_handle(smhandle); - check_can_reset(__FUNCTION__, internal_smhandle); + check_can_reset_sparse_handle(__FUNCTION__, internal_smhandle); internal_smhandle->row_container.set_buffer(row_ind); internal_smhandle->col_container.set_buffer(col_ind); internal_smhandle->value_container.set_buffer(val); @@ -221,7 +231,7 @@ void set_coo_matrix_data(sycl::queue &queue, oneapi::mkl::sparse::matrix_handle_ oneapi::mkl::index_base index, intType *row_ind, intType *col_ind, fpType *val) { auto internal_smhandle = detail::get_internal_handle(smhandle); - check_can_reset(__FUNCTION__, internal_smhandle); + check_can_reset_sparse_handle(__FUNCTION__, internal_smhandle); internal_smhandle->row_container.set_usm_ptr(row_ind); internal_smhandle->col_container.set_usm_ptr(col_ind); internal_smhandle->value_container.set_usm_ptr(val); @@ -298,7 +308,7 @@ void set_csr_matrix_data(sycl::queue &queue, oneapi::mkl::sparse::matrix_handle_ oneapi::mkl::index_base index, sycl::buffer row_ptr, sycl::buffer col_ind, sycl::buffer val) { auto internal_smhandle = detail::get_internal_handle(smhandle); - check_can_reset(__FUNCTION__, internal_smhandle); + check_can_reset_sparse_handle(__FUNCTION__, internal_smhandle); internal_smhandle->row_container.set_buffer(row_ptr); internal_smhandle->col_container.set_buffer(col_ind); internal_smhandle->value_container.set_buffer(val); @@ -318,7 +328,7 @@ void set_csr_matrix_data(sycl::queue &queue, oneapi::mkl::sparse::matrix_handle_ oneapi::mkl::index_base index, intType *row_ptr, intType *col_ind, fpType *val) { auto internal_smhandle = detail::get_internal_handle(smhandle); - check_can_reset(__FUNCTION__, internal_smhandle); + check_can_reset_sparse_handle(__FUNCTION__, internal_smhandle); internal_smhandle->row_container.set_usm_ptr(row_ptr); internal_smhandle->col_container.set_usm_ptr(col_ind); internal_smhandle->value_container.set_usm_ptr(val); From 9667522fd89dfc8c83e45a801ba3d34617779b57 Mon Sep 17 00:00:00 2001 From: "romain.biessy" Date: Fri, 7 Jun 2024 08:25:11 +0100 Subject: [PATCH 03/39] Add check container type is not changed --- .../backends/mkl_common/mkl_handles.cxx | 27 +++++++++++-------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/src/sparse_blas/backends/mkl_common/mkl_handles.cxx b/src/sparse_blas/backends/mkl_common/mkl_handles.cxx index f3ff5afa2..0a80130f2 100644 --- a/src/sparse_blas/backends/mkl_common/mkl_handles.cxx +++ b/src/sparse_blas/backends/mkl_common/mkl_handles.cxx @@ -34,7 +34,7 @@ void init_dense_vector(sycl::queue & /*queue*/, template void check_can_reset_value_handle(const std::string &function_name, - InternalHandleT *internal_handle) { + InternalHandleT *internal_handle, bool expect_buffer) { if (internal_handle->get_value_type() != detail::get_data_type()) { throw oneapi::mkl::invalid_argument( "sparse_blas", function_name, @@ -42,13 +42,17 @@ void check_can_reset_value_handle(const std::string &function_name, data_type_to_str(internal_handle->get_value_type()) + " but got " + data_type_to_str(detail::get_data_type())); } + if (internal_handle->all_use_buffer() != expect_buffer) { + throw oneapi::mkl::invalid_argument( + "sparse_blas", function_name, "Cannot change the container type between buffer or USM"); + } } template void set_dense_vector_data(sycl::queue & /*queue*/, oneapi::mkl::sparse::dense_vector_handle_t dvhandle, std::int64_t size, sycl::buffer val) { - check_can_reset_value_handle(__FUNCTION__, dvhandle); + check_can_reset_value_handle(__FUNCTION__, dvhandle, true); dvhandle->size = size; dvhandle->set_buffer(val); } @@ -57,7 +61,7 @@ template void set_dense_vector_data(sycl::queue & /*queue*/, oneapi::mkl::sparse::dense_vector_handle_t dvhandle, std::int64_t size, fpType *val) { - check_can_reset_value_handle(__FUNCTION__, dvhandle); + check_can_reset_value_handle(__FUNCTION__, dvhandle, false); dvhandle->size = size; dvhandle->set_usm_ptr(val); } @@ -108,7 +112,7 @@ void set_dense_matrix_data(sycl::queue & /*queue*/, oneapi::mkl::sparse::dense_matrix_handle_t dmhandle, std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, oneapi::mkl::layout dense_layout, sycl::buffer val) { - check_can_reset_value_handle(__FUNCTION__, dmhandle); + check_can_reset_value_handle(__FUNCTION__, dmhandle, true); dmhandle->num_rows = num_rows; dmhandle->num_cols = num_cols; dmhandle->ld = ld; @@ -121,7 +125,7 @@ void set_dense_matrix_data(sycl::queue & /*queue*/, oneapi::mkl::sparse::dense_matrix_handle_t dmhandle, std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, oneapi::mkl::layout dense_layout, fpType *val) { - check_can_reset_value_handle(__FUNCTION__, dmhandle); + check_can_reset_value_handle(__FUNCTION__, dmhandle, false); dmhandle->num_rows = num_rows; dmhandle->num_cols = num_cols; dmhandle->ld = ld; @@ -190,8 +194,9 @@ void init_coo_matrix(sycl::queue &queue, oneapi::mkl::sparse::matrix_handle_t *p template void check_can_reset_sparse_handle(const std::string &function_name, - detail::sparse_matrix_handle *internal_smhandle) { - check_can_reset_value_handle(function_name, internal_smhandle); + detail::sparse_matrix_handle *internal_smhandle, + bool expect_buffer) { + check_can_reset_value_handle(function_name, internal_smhandle, expect_buffer); if (internal_smhandle->get_int_type() != detail::get_data_type()) { throw oneapi::mkl::invalid_argument( "sparse_blas", function_name, @@ -212,7 +217,7 @@ void set_coo_matrix_data(sycl::queue &queue, oneapi::mkl::sparse::matrix_handle_ oneapi::mkl::index_base index, sycl::buffer row_ind, sycl::buffer col_ind, sycl::buffer val) { auto internal_smhandle = detail::get_internal_handle(smhandle); - check_can_reset_sparse_handle(__FUNCTION__, internal_smhandle); + check_can_reset_sparse_handle(__FUNCTION__, internal_smhandle, true); internal_smhandle->row_container.set_buffer(row_ind); internal_smhandle->col_container.set_buffer(col_ind); internal_smhandle->value_container.set_buffer(val); @@ -231,7 +236,7 @@ void set_coo_matrix_data(sycl::queue &queue, oneapi::mkl::sparse::matrix_handle_ oneapi::mkl::index_base index, intType *row_ind, intType *col_ind, fpType *val) { auto internal_smhandle = detail::get_internal_handle(smhandle); - check_can_reset_sparse_handle(__FUNCTION__, internal_smhandle); + check_can_reset_sparse_handle(__FUNCTION__, internal_smhandle, false); internal_smhandle->row_container.set_usm_ptr(row_ind); internal_smhandle->col_container.set_usm_ptr(col_ind); internal_smhandle->value_container.set_usm_ptr(val); @@ -308,7 +313,7 @@ void set_csr_matrix_data(sycl::queue &queue, oneapi::mkl::sparse::matrix_handle_ oneapi::mkl::index_base index, sycl::buffer row_ptr, sycl::buffer col_ind, sycl::buffer val) { auto internal_smhandle = detail::get_internal_handle(smhandle); - check_can_reset_sparse_handle(__FUNCTION__, internal_smhandle); + check_can_reset_sparse_handle(__FUNCTION__, internal_smhandle, true); internal_smhandle->row_container.set_buffer(row_ptr); internal_smhandle->col_container.set_buffer(col_ind); internal_smhandle->value_container.set_buffer(val); @@ -328,7 +333,7 @@ void set_csr_matrix_data(sycl::queue &queue, oneapi::mkl::sparse::matrix_handle_ oneapi::mkl::index_base index, intType *row_ptr, intType *col_ind, fpType *val) { auto internal_smhandle = detail::get_internal_handle(smhandle); - check_can_reset_sparse_handle(__FUNCTION__, internal_smhandle); + check_can_reset_sparse_handle(__FUNCTION__, internal_smhandle, false); internal_smhandle->row_container.set_usm_ptr(row_ptr); internal_smhandle->col_container.set_usm_ptr(col_ind); internal_smhandle->value_container.set_usm_ptr(val); From c64b57c7cb74ed4af6d2fa1864520152c658ab5a Mon Sep 17 00:00:00 2001 From: "romain.biessy" Date: Fri, 7 Jun 2024 08:25:14 +0100 Subject: [PATCH 04/39] Fix is_ptr_accessible_on_host --- src/sparse_blas/backends/mkl_common/mkl_helper.hpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/sparse_blas/backends/mkl_common/mkl_helper.hpp b/src/sparse_blas/backends/mkl_common/mkl_helper.hpp index 316c4d7dc..1562c240b 100644 --- a/src/sparse_blas/backends/mkl_common/mkl_helper.hpp +++ b/src/sparse_blas/backends/mkl_common/mkl_helper.hpp @@ -39,17 +39,18 @@ template inline bool is_ptr_accessible_on_host(sycl::queue &queue, const T *host_or_device_ptr) { auto alloc_type = sycl::get_pointer_type(host_or_device_ptr, queue.get_context()); // Note sycl::usm::alloc::host may not be accessible on the host according to SYCL specification. - return alloc_type == sycl::usm::alloc::shared; + // sycl::usm::alloc::unknown is returned if the pointer is not a USM allocation which is assumed to be a normal host pointer. + return alloc_type == sycl::usm::alloc::shared || alloc_type == sycl::usm::alloc::unknown; } /// Throw an exception if the scalar is not accessible in the host template void check_ptr_is_host_accessible(const std::string &function_name, const std::string &scalar_name, sycl::queue &queue, const T *host_or_device_ptr) { - if (is_ptr_accessible_on_host(queue, host_or_device_ptr)) { + if (!is_ptr_accessible_on_host(queue, host_or_device_ptr)) { throw mkl::invalid_argument( "sparse_blas", function_name, - "Scalar " + scalar_name + "must be accessible on the host for buffer functions."); + "Scalar " + scalar_name + " must be accessible on the host for buffer functions."); } } From 02483e74b7ffc4022d6bb8633eb593a84a1ea9b9 Mon Sep 17 00:00:00 2001 From: "romain.biessy" Date: Fri, 7 Jun 2024 08:25:18 +0100 Subject: [PATCH 05/39] Check workspace container is compatible with the handles --- src/sparse_blas/backends/mkl_common/mkl_spmm.cxx | 10 ++++++++-- src/sparse_blas/backends/mkl_common/mkl_spmv.cxx | 10 ++++++++-- src/sparse_blas/backends/mkl_common/mkl_spsv.cxx | 10 ++++++++-- src/sparse_blas/generic_container.hpp | 10 +++++++--- 4 files changed, 31 insertions(+), 9 deletions(-) diff --git a/src/sparse_blas/backends/mkl_common/mkl_spmm.cxx b/src/sparse_blas/backends/mkl_common/mkl_spmm.cxx index e870341ff..6e3038122 100644 --- a/src/sparse_blas/backends/mkl_common/mkl_spmm.cxx +++ b/src/sparse_blas/backends/mkl_common/mkl_spmm.cxx @@ -94,10 +94,13 @@ void spmm_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, oneapi::mkl:: oneapi::mkl::sparse::spmm_descr_t /*spmm_descr*/, sycl::buffer /*workspace*/) { check_valid_spmm(__FUNCTION__, queue, opA, A_view, A_handle, B_handle, C_handle, alpha, beta); + auto internal_A_handle = detail::get_internal_handle(A_handle); + if (!internal_A_handle->all_use_buffer()) { + detail::throw_incompatible_container(__FUNCTION__); + } if (alg == oneapi::mkl::sparse::spmm_alg::no_optimize_alg) { return; } - auto internal_A_handle = detail::get_internal_handle(A_handle); internal_A_handle->can_be_reset = false; // TODO: Add support for spmm_optimize once the close-source oneMKL backend supports it. } @@ -112,10 +115,13 @@ sycl::event spmm_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, oneapi::mkl::sparse::spmm_descr_t /*spmm_descr*/, void * /*workspace*/, const std::vector &dependencies) { check_valid_spmm(__FUNCTION__, queue, opA, A_view, A_handle, B_handle, C_handle, alpha, beta); + auto internal_A_handle = detail::get_internal_handle(A_handle); + if (internal_A_handle->all_use_buffer()) { + detail::throw_incompatible_container(__FUNCTION__); + } if (alg == oneapi::mkl::sparse::spmm_alg::no_optimize_alg) { return detail::collapse_dependencies(queue, dependencies); } - auto internal_A_handle = detail::get_internal_handle(A_handle); internal_A_handle->can_be_reset = false; // TODO: Add support for spmm_optimize once the close-source oneMKL backend supports it. return detail::collapse_dependencies(queue, dependencies); diff --git a/src/sparse_blas/backends/mkl_common/mkl_spmv.cxx b/src/sparse_blas/backends/mkl_common/mkl_spmv.cxx index 73efe4e7d..6950dc700 100644 --- a/src/sparse_blas/backends/mkl_common/mkl_spmv.cxx +++ b/src/sparse_blas/backends/mkl_common/mkl_spmv.cxx @@ -80,11 +80,14 @@ void spmv_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, const void *a oneapi::mkl::sparse::spmv_descr_t /*spmv_descr*/, sycl::buffer /*workspace*/) { check_valid_spmv(__FUNCTION__, queue, A_view, A_handle, x_handle, y_handle, alpha, beta); + auto internal_A_handle = detail::get_internal_handle(A_handle); + if (!internal_A_handle->all_use_buffer()) { + detail::throw_incompatible_container(__FUNCTION__); + } if (alg == oneapi::mkl::sparse::spmv_alg::no_optimize_alg) { return; } sycl::event event; - auto internal_A_handle = detail::get_internal_handle(A_handle); internal_A_handle->can_be_reset = false; if (A_view.type_view == matrix_descr::triangular) { event = oneapi::mkl::sparse::optimize_trmv(queue, A_view.uplo_view, opA, A_view.diag_view, @@ -111,10 +114,13 @@ sycl::event spmv_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, const oneapi::mkl::sparse::spmv_descr_t /*spmv_descr*/, void * /*workspace*/, const std::vector &dependencies) { check_valid_spmv(__FUNCTION__, queue, A_view, A_handle, x_handle, y_handle, alpha, beta); + auto internal_A_handle = detail::get_internal_handle(A_handle); + if (internal_A_handle->all_use_buffer()) { + detail::throw_incompatible_container(__FUNCTION__); + } if (alg == oneapi::mkl::sparse::spmv_alg::no_optimize_alg) { return detail::collapse_dependencies(queue, dependencies); } - auto internal_A_handle = detail::get_internal_handle(A_handle); internal_A_handle->can_be_reset = false; if (A_view.type_view == matrix_descr::triangular) { return oneapi::mkl::sparse::optimize_trmv(queue, A_view.uplo_view, opA, A_view.diag_view, diff --git a/src/sparse_blas/backends/mkl_common/mkl_spsv.cxx b/src/sparse_blas/backends/mkl_common/mkl_spsv.cxx index bd8094f90..8fef1339d 100644 --- a/src/sparse_blas/backends/mkl_common/mkl_spsv.cxx +++ b/src/sparse_blas/backends/mkl_common/mkl_spsv.cxx @@ -80,10 +80,13 @@ void spsv_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, const void *a oneapi::mkl::sparse::spsv_descr_t /*spsv_descr*/, sycl::buffer /*workspace*/) { check_valid_spsv(__FUNCTION__, queue, A_view, A_handle, x_handle, y_handle, alpha, alg); + auto internal_A_handle = detail::get_internal_handle(A_handle); + if (!internal_A_handle->all_use_buffer()) { + detail::throw_incompatible_container(__FUNCTION__); + } if (alg == oneapi::mkl::sparse::spsv_alg::no_optimize_alg) { return; } - auto internal_A_handle = detail::get_internal_handle(A_handle); internal_A_handle->can_be_reset = false; auto event = oneapi::mkl::sparse::optimize_trsv(queue, A_view.uplo_view, opA, A_view.diag_view, internal_A_handle->backend_handle); @@ -100,10 +103,13 @@ sycl::event spsv_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, const oneapi::mkl::sparse::spsv_descr_t /*spsv_descr*/, void * /*workspace*/, const std::vector &dependencies) { check_valid_spsv(__FUNCTION__, queue, A_view, A_handle, x_handle, y_handle, alpha, alg); + auto internal_A_handle = detail::get_internal_handle(A_handle); + if (internal_A_handle->all_use_buffer()) { + detail::throw_incompatible_container(__FUNCTION__); + } if (alg == oneapi::mkl::sparse::spsv_alg::no_optimize_alg) { return detail::collapse_dependencies(queue, dependencies); } - auto internal_A_handle = detail::get_internal_handle(A_handle); internal_A_handle->can_be_reset = false; return oneapi::mkl::sparse::optimize_trsv(queue, A_view.uplo_view, opA, A_view.diag_view, internal_A_handle->backend_handle, dependencies); diff --git a/src/sparse_blas/generic_container.hpp b/src/sparse_blas/generic_container.hpp index 46732722d..53bd50837 100644 --- a/src/sparse_blas/generic_container.hpp +++ b/src/sparse_blas/generic_container.hpp @@ -269,6 +269,12 @@ struct generic_sparse_handle { } }; +inline void throw_incompatible_container(const std::string& function_name) { + throw oneapi::mkl::invalid_argument( + "sparse_blas", function_name, + "Incompatible container types. All inputs and outputs must use the same container: buffer or USM"); +} + /** * Check that all internal containers use the same container. */ @@ -279,9 +285,7 @@ void check_all_containers_use_buffers(const std::string& function_name, bool first_use_buffer = first_internal_container->all_use_buffer(); for (const auto internal_container : { internal_containers... }) { if (internal_container->all_use_buffer() != first_use_buffer) { - throw oneapi::mkl::invalid_argument( - "sparse_blas", function_name, - "Incompatible container types. All inputs and outputs must use the same container: buffer or USM"); + throw_incompatible_container(function_name); } } } From 5cb4518cf33971938b3f509ece4e9d1df118f97b Mon Sep 17 00:00:00 2001 From: "romain.biessy" Date: Fri, 7 Jun 2024 08:25:22 +0100 Subject: [PATCH 06/39] Fix example static_cast --- .../sparse_blas/run_time_dispatching/sparse_blas_spmv_usm.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/sparse_blas/run_time_dispatching/sparse_blas_spmv_usm.cpp b/examples/sparse_blas/run_time_dispatching/sparse_blas_spmv_usm.cpp index 6b14881fe..4a8a4280b 100644 --- a/examples/sparse_blas/run_time_dispatching/sparse_blas_spmv_usm.cpp +++ b/examples/sparse_blas/run_time_dispatching/sparse_blas_spmv_usm.cpp @@ -148,8 +148,8 @@ int run_sparse_matrix_vector_multiply_example(const sycl::device &dev) { // Create and initialize dense vector handles oneapi::mkl::sparse::dense_vector_handle_t x_handle = nullptr; oneapi::mkl::sparse::dense_vector_handle_t y_handle = nullptr; - oneapi::mkl::sparse::init_dense_vector(main_queue, &x_handle, static_cast(sizevec), x); - oneapi::mkl::sparse::init_dense_vector(main_queue, &y_handle, static_cast(sizevec), y); + oneapi::mkl::sparse::init_dense_vector(main_queue, &x_handle, static_cast(sizevec), x); + oneapi::mkl::sparse::init_dense_vector(main_queue, &y_handle, static_cast(sizevec), y); // Create operation descriptor oneapi::mkl::sparse::spmv_descr_t descr = nullptr; From 7190c6ad8446060643022fe163f57b4c1d324cb3 Mon Sep 17 00:00:00 2001 From: "romain.biessy" Date: Fri, 7 Jun 2024 08:25:26 +0100 Subject: [PATCH 07/39] Disallow symmetric/hermitian conjtrans configurations for spmv --- .../backends/mkl_common/mkl_spmv.cxx | 22 ++++++++---- .../include/common_sparse_reference.hpp | 4 +-- .../sparse_blas/include/test_spmv.hpp | 34 ++++++++++--------- 3 files changed, 34 insertions(+), 26 deletions(-) diff --git a/src/sparse_blas/backends/mkl_common/mkl_spmv.cxx b/src/sparse_blas/backends/mkl_common/mkl_spmv.cxx index 6950dc700..7f809c75e 100644 --- a/src/sparse_blas/backends/mkl_common/mkl_spmv.cxx +++ b/src/sparse_blas/backends/mkl_common/mkl_spmv.cxx @@ -30,7 +30,7 @@ sycl::event release_spmv_descr(sycl::queue &queue, oneapi::mkl::sparse::spmv_des } void check_valid_spmv(const std::string function_name, sycl::queue &queue, - oneapi::mkl::sparse::matrix_view A_view, + oneapi::mkl::transpose opA, oneapi::mkl::sparse::matrix_view A_view, oneapi::mkl::sparse::matrix_handle_t A_handle, oneapi::mkl::sparse::dense_vector_handle_t x_handle, oneapi::mkl::sparse::dense_vector_handle_t y_handle, const void *alpha, @@ -51,14 +51,22 @@ void check_valid_spmv(const std::string function_name, sycl::queue &queue, } if (A_view.type_view != oneapi::mkl::sparse::matrix_descr::triangular && - A_view.diag_view != oneapi::mkl::diag::nonunit) { + A_view.diag_view == oneapi::mkl::diag::unit) { throw mkl::invalid_argument( "sparse_blas", function_name, "`unit` diag_view can only be used with a triangular type_view."); } + + if ((A_view.type_view == oneapi::mkl::sparse::matrix_descr::symmetric || + A_view.type_view == oneapi::mkl::sparse::matrix_descr::hermitian) && + opA == oneapi::mkl::transpose::conjtrans) { + throw mkl::invalid_argument( + "sparse_blas", function_name, + "Symmetric or Hermitian matrix cannot be conjugated with `conjtrans`."); + } } -void spmv_buffer_size(sycl::queue &queue, oneapi::mkl::transpose /*opA*/, const void *alpha, +void spmv_buffer_size(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, oneapi::mkl::sparse::matrix_view A_view, oneapi::mkl::sparse::matrix_handle_t A_handle, oneapi::mkl::sparse::dense_vector_handle_t x_handle, const void *beta, @@ -67,7 +75,7 @@ void spmv_buffer_size(sycl::queue &queue, oneapi::mkl::transpose /*opA*/, const oneapi::mkl::sparse::spmv_descr_t /*spmv_descr*/, std::size_t &temp_buffer_size) { // TODO: Add support for external workspace once the close-source oneMKL backend supports it. - check_valid_spmv(__FUNCTION__, queue, A_view, A_handle, x_handle, y_handle, alpha, beta); + check_valid_spmv(__FUNCTION__, queue, opA, A_view, A_handle, x_handle, y_handle, alpha, beta); temp_buffer_size = 0; } @@ -79,7 +87,7 @@ void spmv_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, const void *a oneapi::mkl::sparse::spmv_alg alg, oneapi::mkl::sparse::spmv_descr_t /*spmv_descr*/, sycl::buffer /*workspace*/) { - check_valid_spmv(__FUNCTION__, queue, A_view, A_handle, x_handle, y_handle, alpha, beta); + check_valid_spmv(__FUNCTION__, queue, opA, A_view, A_handle, x_handle, y_handle, alpha, beta); auto internal_A_handle = detail::get_internal_handle(A_handle); if (!internal_A_handle->all_use_buffer()) { detail::throw_incompatible_container(__FUNCTION__); @@ -113,7 +121,7 @@ sycl::event spmv_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, const oneapi::mkl::sparse::spmv_alg alg, oneapi::mkl::sparse::spmv_descr_t /*spmv_descr*/, void * /*workspace*/, const std::vector &dependencies) { - check_valid_spmv(__FUNCTION__, queue, A_view, A_handle, x_handle, y_handle, alpha, beta); + check_valid_spmv(__FUNCTION__, queue, opA, A_view, A_handle, x_handle, y_handle, alpha, beta); auto internal_A_handle = detail::get_internal_handle(A_handle); if (internal_A_handle->all_use_buffer()) { detail::throw_incompatible_container(__FUNCTION__); @@ -196,7 +204,7 @@ sycl::event spmv(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alp oneapi::mkl::sparse::dense_vector_handle_t y_handle, oneapi::mkl::sparse::spmv_alg alg, oneapi::mkl::sparse::spmv_descr_t spmv_descr, const std::vector &dependencies) { - check_valid_spmv(__FUNCTION__, queue, A_view, A_handle, x_handle, y_handle, alpha, beta); + check_valid_spmv(__FUNCTION__, queue, opA, A_view, A_handle, x_handle, y_handle, alpha, beta); auto value_type = detail::get_internal_handle(A_handle)->get_value_type(); DISPATCH_MKL_OPERATION("spmv", value_type, internal_spmv, queue, opA, alpha, A_view, A_handle, x_handle, beta, y_handle, alg, spmv_descr, dependencies); diff --git a/tests/unit_tests/sparse_blas/include/common_sparse_reference.hpp b/tests/unit_tests/sparse_blas/include/common_sparse_reference.hpp index 7949342d3..d8b11e6b7 100644 --- a/tests/unit_tests/sparse_blas/include/common_sparse_reference.hpp +++ b/tests/unit_tests/sparse_blas/include/common_sparse_reference.hpp @@ -161,9 +161,7 @@ std::vector sparse_to_dense(sparse_matrix_format_t format, const intType const bool is_symmetric_or_hermitian_view = type_view == oneapi::mkl::sparse::matrix_descr::symmetric || type_view == oneapi::mkl::sparse::matrix_descr::hermitian; - // Matrices are not conjugated if they are symmetric - const bool apply_conjugate = - !is_symmetric_or_hermitian_view && transpose_val == oneapi::mkl::transpose::conjtrans; + const bool apply_conjugate = transpose_val == oneapi::mkl::transpose::conjtrans; std::vector dense_a(a_nrows * a_ncols, fpType(0)); auto write_to_dense_if_needed = [&](std::size_t a_idx, std::size_t row, std::size_t col) { diff --git a/tests/unit_tests/sparse_blas/include/test_spmv.hpp b/tests/unit_tests/sparse_blas/include/test_spmv.hpp index eee9ec124..70738dd02 100644 --- a/tests/unit_tests/sparse_blas/include/test_spmv.hpp +++ b/tests/unit_tests/sparse_blas/include/test_spmv.hpp @@ -143,22 +143,24 @@ void test_helper_with_format( fp_one, fp_zero, default_alg, triangular_unit_A_view, no_properties, no_reset_data), num_passed, num_skipped); - // Lower symmetric or hermitian - oneapi::mkl::sparse::matrix_view symmetric_view( - complex_info::is_complex ? oneapi::mkl::sparse::matrix_descr::hermitian - : oneapi::mkl::sparse::matrix_descr::symmetric); - EXPECT_TRUE_OR_FUTURE_SKIP( - test_functor_i32(dev, format, nrows_A, ncols_A, density_A_matrix, index_zero, transpose_val, - fp_one, fp_zero, default_alg, symmetric_view, no_properties, - no_reset_data), - num_passed, num_skipped); - // Upper symmetric or hermitian - symmetric_view.uplo_view = oneapi::mkl::uplo::upper; - EXPECT_TRUE_OR_FUTURE_SKIP( - test_functor_i32(dev, format, nrows_A, ncols_A, density_A_matrix, index_zero, transpose_val, - fp_one, fp_zero, default_alg, symmetric_view, no_properties, - no_reset_data), - num_passed, num_skipped); + if (transpose_val != oneapi::mkl::transpose::conjtrans) { + // Lower symmetric or hermitian + oneapi::mkl::sparse::matrix_view symmetric_view( + complex_info::is_complex ? oneapi::mkl::sparse::matrix_descr::hermitian + : oneapi::mkl::sparse::matrix_descr::symmetric); + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i32(dev, format, nrows_A, ncols_A, density_A_matrix, index_zero, + transpose_val, fp_one, fp_zero, default_alg, symmetric_view, + no_properties, no_reset_data), + num_passed, num_skipped); + // Upper symmetric or hermitian + symmetric_view.uplo_view = oneapi::mkl::uplo::upper; + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i32(dev, format, nrows_A, ncols_A, density_A_matrix, index_zero, + transpose_val, fp_one, fp_zero, default_alg, symmetric_view, + no_properties, no_reset_data), + num_passed, num_skipped); + } // Test other algorithms for (auto alg : non_default_algorithms) { EXPECT_TRUE_OR_FUTURE_SKIP(test_functor_i32(dev, format, nrows_A, ncols_A, density_A_matrix, From d68155b29e47c9b275031ed28c416658d0ace1ca Mon Sep 17 00:00:00 2001 From: "romain.biessy" Date: Thu, 13 Jun 2024 13:03:28 +0100 Subject: [PATCH 08/39] Remove enable_if from template instantiations --- .../backends/mkl_common/mkl_handles.cxx | 80 +++++++++---------- 1 file changed, 36 insertions(+), 44 deletions(-) diff --git a/src/sparse_blas/backends/mkl_common/mkl_handles.cxx b/src/sparse_blas/backends/mkl_common/mkl_handles.cxx index 0a80130f2..c6f501027 100644 --- a/src/sparse_blas/backends/mkl_common/mkl_handles.cxx +++ b/src/sparse_blas/backends/mkl_common/mkl_handles.cxx @@ -66,18 +66,18 @@ void set_dense_vector_data(sycl::queue & /*queue*/, dvhandle->set_usm_ptr(val); } -#define INSTANTIATE_DENSE_VECTOR_FUNCS(FP_TYPE, FP_SUFFIX) \ - template std::enable_if_t> init_dense_vector( \ - sycl::queue & queue, oneapi::mkl::sparse::dense_vector_handle_t * p_dvhandle, \ - std::int64_t size, sycl::buffer val); \ - template std::enable_if_t> init_dense_vector( \ - sycl::queue & queue, oneapi::mkl::sparse::dense_vector_handle_t * p_dvhandle, \ - std::int64_t size, FP_TYPE *val); \ - template std::enable_if_t> set_dense_vector_data( \ - sycl::queue & queue, oneapi::mkl::sparse::dense_vector_handle_t dvhandle, \ - std::int64_t size, sycl::buffer val); \ - template std::enable_if_t> set_dense_vector_data( \ - sycl::queue & queue, oneapi::mkl::sparse::dense_vector_handle_t dvhandle, \ +#define INSTANTIATE_DENSE_VECTOR_FUNCS(FP_TYPE, FP_SUFFIX) \ + template void init_dense_vector( \ + sycl::queue & queue, oneapi::mkl::sparse::dense_vector_handle_t * p_dvhandle, \ + std::int64_t size, sycl::buffer val); \ + template void init_dense_vector( \ + sycl::queue & queue, oneapi::mkl::sparse::dense_vector_handle_t * p_dvhandle, \ + std::int64_t size, FP_TYPE *val); \ + template void set_dense_vector_data( \ + sycl::queue & queue, oneapi::mkl::sparse::dense_vector_handle_t dvhandle, \ + std::int64_t size, sycl::buffer val); \ + template void set_dense_vector_data( \ + sycl::queue & queue, oneapi::mkl::sparse::dense_vector_handle_t dvhandle, \ std::int64_t size, FP_TYPE *val) FOR_EACH_FP_TYPE(INSTANTIATE_DENSE_VECTOR_FUNCS); #undef INSTANTIATE_DENSE_VECTOR_FUNCS @@ -133,22 +133,22 @@ void set_dense_matrix_data(sycl::queue & /*queue*/, dmhandle->set_usm_ptr(val); } -#define INSTANTIATE_DENSE_MATRIX_FUNCS(FP_TYPE, FP_SUFFIX) \ - template std::enable_if_t> init_dense_matrix( \ - sycl::queue & queue, oneapi::mkl::sparse::dense_matrix_handle_t * p_dmhandle, \ - std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, \ - oneapi::mkl::layout dense_layout, sycl::buffer val); \ - template std::enable_if_t> init_dense_matrix( \ - sycl::queue & queue, oneapi::mkl::sparse::dense_matrix_handle_t * p_dmhandle, \ - std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, \ - oneapi::mkl::layout dense_layout, FP_TYPE *val); \ - template std::enable_if_t> set_dense_matrix_data( \ - sycl::queue & queue, oneapi::mkl::sparse::dense_matrix_handle_t dmhandle, \ - std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, \ - oneapi::mkl::layout dense_layout, sycl::buffer val); \ - template std::enable_if_t> set_dense_matrix_data( \ - sycl::queue & queue, oneapi::mkl::sparse::dense_matrix_handle_t dmhandle, \ - std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, \ +#define INSTANTIATE_DENSE_MATRIX_FUNCS(FP_TYPE, FP_SUFFIX) \ + template void init_dense_matrix( \ + sycl::queue & queue, oneapi::mkl::sparse::dense_matrix_handle_t * p_dmhandle, \ + std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, \ + oneapi::mkl::layout dense_layout, sycl::buffer val); \ + template void init_dense_matrix( \ + sycl::queue & queue, oneapi::mkl::sparse::dense_matrix_handle_t * p_dmhandle, \ + std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, \ + oneapi::mkl::layout dense_layout, FP_TYPE *val); \ + template void set_dense_matrix_data( \ + sycl::queue & queue, oneapi::mkl::sparse::dense_matrix_handle_t dmhandle, \ + std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, \ + oneapi::mkl::layout dense_layout, sycl::buffer val); \ + template void set_dense_matrix_data( \ + sycl::queue & queue, oneapi::mkl::sparse::dense_matrix_handle_t dmhandle, \ + std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, \ oneapi::mkl::layout dense_layout, FP_TYPE *val) FOR_EACH_FP_TYPE(INSTANTIATE_DENSE_MATRIX_FUNCS); #undef INSTANTIATE_DENSE_MATRIX_FUNCS @@ -247,25 +247,21 @@ void set_coo_matrix_data(sycl::queue &queue, oneapi::mkl::sparse::matrix_handle_ } #define INSTANTIATE_COO_MATRIX_FUNCS(FP_TYPE, FP_SUFFIX, INT_TYPE, INT_SUFFIX) \ - template std::enable_if_t> \ - init_coo_matrix( \ + template void init_coo_matrix( \ sycl::queue & queue, oneapi::mkl::sparse::matrix_handle_t * p_smhandle, \ std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, \ oneapi::mkl::index_base index, sycl::buffer row_ind, \ sycl::buffer col_ind, sycl::buffer val); \ - template std::enable_if_t> \ - init_coo_matrix( \ + template void init_coo_matrix( \ sycl::queue & queue, oneapi::mkl::sparse::matrix_handle_t * p_smhandle, \ std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, \ oneapi::mkl::index_base index, INT_TYPE *row_ind, INT_TYPE *col_ind, FP_TYPE *val); \ - template std::enable_if_t> \ - set_coo_matrix_data( \ + template void set_coo_matrix_data( \ sycl::queue & queue, oneapi::mkl::sparse::matrix_handle_t smhandle, std::int64_t num_rows, \ std::int64_t num_cols, std::int64_t nnz, oneapi::mkl::index_base index, \ sycl::buffer row_ind, sycl::buffer col_ind, \ sycl::buffer val); \ - template std::enable_if_t> \ - set_coo_matrix_data( \ + template void set_coo_matrix_data( \ sycl::queue & queue, oneapi::mkl::sparse::matrix_handle_t smhandle, std::int64_t num_rows, \ std::int64_t num_cols, std::int64_t nnz, oneapi::mkl::index_base index, INT_TYPE *row_ind, \ INT_TYPE *col_ind, FP_TYPE *val) @@ -345,25 +341,21 @@ void set_csr_matrix_data(sycl::queue &queue, oneapi::mkl::sparse::matrix_handle_ } #define INSTANTIATE_CSR_MATRIX_FUNCS(FP_TYPE, FP_SUFFIX, INT_TYPE, INT_SUFFIX) \ - template std::enable_if_t> \ - init_csr_matrix( \ + template void init_csr_matrix( \ sycl::queue & queue, oneapi::mkl::sparse::matrix_handle_t * p_smhandle, \ std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, \ oneapi::mkl::index_base index, sycl::buffer row_ptr, \ sycl::buffer col_ind, sycl::buffer val); \ - template std::enable_if_t> \ - init_csr_matrix( \ + template void init_csr_matrix( \ sycl::queue & queue, oneapi::mkl::sparse::matrix_handle_t * p_smhandle, \ std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, \ oneapi::mkl::index_base index, INT_TYPE *row_ptr, INT_TYPE *col_ind, FP_TYPE *val); \ - template std::enable_if_t> \ - set_csr_matrix_data( \ + template void set_csr_matrix_data( \ sycl::queue & queue, oneapi::mkl::sparse::matrix_handle_t smhandle, std::int64_t num_rows, \ std::int64_t num_cols, std::int64_t nnz, oneapi::mkl::index_base index, \ sycl::buffer row_ptr, sycl::buffer col_ind, \ sycl::buffer val); \ - template std::enable_if_t> \ - set_csr_matrix_data( \ + template void set_csr_matrix_data( \ sycl::queue & queue, oneapi::mkl::sparse::matrix_handle_t smhandle, std::int64_t num_rows, \ std::int64_t num_cols, std::int64_t nnz, oneapi::mkl::index_base index, INT_TYPE *row_ptr, \ INT_TYPE *col_ind, FP_TYPE *val) From 31d0a5f72eeea7647844875d72620360702d08a0 Mon Sep 17 00:00:00 2001 From: "romain.biessy" Date: Thu, 27 Jun 2024 17:08:15 +0100 Subject: [PATCH 09/39] More generic exception message for unimplemented exceptions --- src/sparse_blas/backends/mkl_common/mkl_spmm.cxx | 5 +++-- src/sparse_blas/backends/mkl_common/mkl_spsv.cxx | 4 ++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/sparse_blas/backends/mkl_common/mkl_spmm.cxx b/src/sparse_blas/backends/mkl_common/mkl_spmm.cxx index 6e3038122..5696dde55 100644 --- a/src/sparse_blas/backends/mkl_common/mkl_spmm.cxx +++ b/src/sparse_blas/backends/mkl_common/mkl_spmm.cxx @@ -63,8 +63,9 @@ void check_valid_spmm(const std::string function_name, sycl::queue &queue, #if BACKEND == gpu if (opA == oneapi::mkl::transpose::conjtrans && internal_A_handle->has_matrix_property(oneapi::mkl::sparse::matrix_property::symmetric)) { - throw mkl::unimplemented("sparse_blas/mklgpu", function_name, - "spmm does not support conjtrans with the symmetric property."); + throw mkl::unimplemented( + "sparse_blas", function_name, + "The backend does not support spmm using conjtrans and the symmetric property."); } #else (void)opA; diff --git a/src/sparse_blas/backends/mkl_common/mkl_spsv.cxx b/src/sparse_blas/backends/mkl_common/mkl_spsv.cxx index 8fef1339d..790ccc6d8 100644 --- a/src/sparse_blas/backends/mkl_common/mkl_spsv.cxx +++ b/src/sparse_blas/backends/mkl_common/mkl_spsv.cxx @@ -43,8 +43,8 @@ void check_valid_spsv(const std::string function_name, sycl::queue &queue, if (alg == oneapi::mkl::sparse::spsv_alg::no_optimize_alg && !internal_A_handle->has_matrix_property(oneapi::mkl::sparse::matrix_property::sorted)) { throw mkl::unimplemented( - "sparse_blas/mkl", function_name, - "`no_optimize_alg` is not supported unless A_handle has the property `matrix_property::sorted`."); + "sparse_blas", function_name, + "The backend does not support `no_optimize_alg` unless A_handle has the property `matrix_property::sorted`."); } detail::check_all_containers_compatible(function_name, internal_A_handle, x_handle, y_handle); From 42dba2e71ff25bc72666b434daf1c1ee06d1225d Mon Sep 17 00:00:00 2001 From: "romain.biessy" Date: Thu, 27 Jun 2024 17:09:51 +0100 Subject: [PATCH 10/39] Force at least one element in random sparse matrices --- tests/unit_tests/sparse_blas/include/test_common.hpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/unit_tests/sparse_blas/include/test_common.hpp b/tests/unit_tests/sparse_blas/include/test_common.hpp index b8d3fe4f2..2ece3ec9d 100644 --- a/tests/unit_tests/sparse_blas/include/test_common.hpp +++ b/tests/unit_tests/sparse_blas/include/test_common.hpp @@ -238,7 +238,8 @@ intType generate_random_csr_matrix(const intType nrows, const intType ncols, const intType j_start = is_symmetric ? i : 0; for (intType j = j_start; j < ncols; j++) { const bool is_diag = require_diagonal && i == j; - if (is_diag || (rand_density(0.0, 1.0) <= density_val)) { + const bool force_last_nnz = nnz == 0 && i == nrows - 1 && j == ncols - 1; + if (force_last_nnz || is_diag || (rand_density(0.0, 1.0) <= density_val)) { a.push_back(generate_data(is_diag)); ja.push_back(j + indexing); nnz++; @@ -282,7 +283,8 @@ intType generate_random_coo_matrix(const intType nrows, const intType ncols, const intType j_start = is_symmetric ? i : 0; for (intType j = j_start; j < ncols; j++) { const bool is_diag = require_diagonal && i == j; - if (is_diag || (rand_density(0.0, 1.0) <= density_val)) { + const bool force_last_nnz = a.size() == 0 && i == nrows - 1 && j == ncols - 1; + if (force_last_nnz || is_diag || (rand_density(0.0, 1.0) <= density_val)) { a.push_back(generate_data(is_diag)); ia.push_back(i + indexing); ja.push_back(j + indexing); From 3436abe51da87ffd09b36051df238f19be64fc23 Mon Sep 17 00:00:00 2001 From: "romain.biessy" Date: Thu, 27 Jun 2024 17:11:32 +0100 Subject: [PATCH 11/39] Test more sizes with spmm --- .../sparse_blas/include/test_spmm.hpp | 189 ++++++++++-------- 1 file changed, 107 insertions(+), 82 deletions(-) diff --git a/tests/unit_tests/sparse_blas/include/test_spmm.hpp b/tests/unit_tests/sparse_blas/include/test_spmm.hpp index 8216067a5..e1db30238 100644 --- a/tests/unit_tests/sparse_blas/include/test_spmm.hpp +++ b/tests/unit_tests/sparse_blas/include/test_spmm.hpp @@ -63,101 +63,126 @@ void test_helper_with_format_with_transpose( fpType fp_one = set_fp_value()(1.f, 0.f); oneapi::mkl::index_base index_zero = oneapi::mkl::index_base::zero; oneapi::mkl::layout col_major = oneapi::mkl::layout::col_major; - int nrows_A = 4, ncols_A = 6, ncols_C = 5; - int ldb = transpose_A == oneapi::mkl::transpose::nontrans ? ncols_A : nrows_A; - int ldc = transpose_A == oneapi::mkl::transpose::nontrans ? nrows_A : ncols_A; oneapi::mkl::sparse::spmm_alg default_alg = oneapi::mkl::sparse::spmm_alg::default_alg; oneapi::mkl::sparse::matrix_view default_A_view; std::set no_properties; bool no_reset_data = false; - // Basic test - EXPECT_TRUE_OR_FUTURE_SKIP( - test_functor_i32(dev, format, nrows_A, ncols_A, ncols_C, density_A_matrix, index_zero, - col_major, transpose_A, transpose_B, fp_one, fp_zero, ldb, ldc, - default_alg, default_A_view, no_properties, no_reset_data), - num_passed, num_skipped); - // Reset data - EXPECT_TRUE_OR_FUTURE_SKIP( - test_functor_i32(dev, format, nrows_A, ncols_A, ncols_C, density_A_matrix, index_zero, - col_major, transpose_A, transpose_B, fp_one, fp_zero, ldb, ldc, - default_alg, default_A_view, no_properties, true), - num_passed, num_skipped); - // Test index_base 1 - EXPECT_TRUE_OR_FUTURE_SKIP( - test_functor_i32(dev, format, nrows_A, ncols_A, ncols_C, density_A_matrix, - oneapi::mkl::index_base::one, col_major, transpose_A, transpose_B, fp_one, - fp_zero, ldb, ldc, default_alg, default_A_view, no_properties, - no_reset_data), - num_passed, num_skipped); - // Test non-default alpha - EXPECT_TRUE_OR_FUTURE_SKIP( - test_functor_i32(dev, format, nrows_A, ncols_A, ncols_C, density_A_matrix, index_zero, - col_major, transpose_A, transpose_B, set_fp_value()(2.f, 1.5f), - fp_zero, ldb, ldc, default_alg, default_A_view, no_properties, - no_reset_data), - num_passed, num_skipped); - // Test non-default beta - EXPECT_TRUE_OR_FUTURE_SKIP( - test_functor_i32(dev, format, nrows_A, ncols_A, ncols_C, density_A_matrix, index_zero, - col_major, transpose_A, transpose_B, fp_one, - set_fp_value()(3.2f, 1.f), ldb, ldc, default_alg, default_A_view, - no_properties, no_reset_data), - num_passed, num_skipped); - // Test 0 alpha - EXPECT_TRUE_OR_FUTURE_SKIP( - test_functor_i32(dev, format, nrows_A, ncols_A, ncols_C, density_A_matrix, index_zero, - col_major, transpose_A, transpose_B, fp_zero, fp_one, ldb, ldc, - default_alg, default_A_view, no_properties, no_reset_data), - num_passed, num_skipped); - // Test 0 alpha and beta - EXPECT_TRUE_OR_FUTURE_SKIP( - test_functor_i32(dev, format, nrows_A, ncols_A, ncols_C, density_A_matrix, index_zero, - col_major, transpose_A, transpose_B, fp_zero, fp_zero, ldb, ldc, - default_alg, default_A_view, no_properties, no_reset_data), - num_passed, num_skipped); - // Test non-default ldb - EXPECT_TRUE_OR_FUTURE_SKIP( - test_functor_i32(dev, format, nrows_A, ncols_A, ncols_C, density_A_matrix, index_zero, - col_major, transpose_A, transpose_B, fp_one, fp_zero, ldb + 5, ldc, - default_alg, default_A_view, no_properties, no_reset_data), - num_passed, num_skipped); - // Test non-default ldc - EXPECT_TRUE_OR_FUTURE_SKIP( - test_functor_i32(dev, format, nrows_A, ncols_A, ncols_C, density_A_matrix, index_zero, - col_major, transpose_A, transpose_B, fp_one, fp_zero, ldb, ldc + 6, - default_alg, default_A_view, no_properties, no_reset_data), - num_passed, num_skipped); - // Test row major layout - EXPECT_TRUE_OR_FUTURE_SKIP( - test_functor_i32(dev, format, nrows_A, ncols_A, ncols_C, density_A_matrix, index_zero, - oneapi::mkl::layout::row_major, transpose_A, transpose_B, fp_one, fp_zero, - ncols_C, ncols_C, default_alg, default_A_view, no_properties, - no_reset_data), - num_passed, num_skipped); - // Test int64 indices - long long_nrows_A = 27, long_ncols_A = 13, long_ncols_C = 6; - long long_ldb = transpose_A == oneapi::mkl::transpose::nontrans ? long_ncols_A : long_nrows_A; - long long_ldc = transpose_A == oneapi::mkl::transpose::nontrans ? long_nrows_A : long_ncols_A; - EXPECT_TRUE_OR_FUTURE_SKIP( - test_functor_i64(dev, format, long_nrows_A, long_ncols_A, long_ncols_C, density_A_matrix, - index_zero, col_major, transpose_A, transpose_B, fp_one, fp_zero, long_ldb, - long_ldc, default_alg, default_A_view, no_properties, no_reset_data), - num_passed, num_skipped); - // Test other algorithms - for (auto alg : non_default_algorithms) { + { + int m = 4, k = 6, n = 5; + int nrows_A = (transpose_A != oneapi::mkl::transpose::nontrans) ? k : m; + int ncols_A = (transpose_A != oneapi::mkl::transpose::nontrans) ? m : k; + int nrows_B = (transpose_B != oneapi::mkl::transpose::nontrans) ? n : k; + int ncols_B = (transpose_B != oneapi::mkl::transpose::nontrans) ? k : n; + int nrows_C = m; + int ncols_C = n; + int ldb = nrows_B; + int ldc = nrows_C; + + // Basic test + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i32(dev, format, nrows_A, ncols_A, ncols_C, density_A_matrix, index_zero, + col_major, transpose_A, transpose_B, fp_one, fp_zero, ldb, ldc, + default_alg, default_A_view, no_properties, no_reset_data), + num_passed, num_skipped); + // Reset data + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i32(dev, format, nrows_A, ncols_A, ncols_C, density_A_matrix, index_zero, + col_major, transpose_A, transpose_B, fp_one, fp_zero, ldb, ldc, + default_alg, default_A_view, no_properties, true), + num_passed, num_skipped); + // Test index_base 1 + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i32(dev, format, nrows_A, ncols_A, ncols_C, density_A_matrix, + oneapi::mkl::index_base::one, col_major, transpose_A, transpose_B, + fp_one, fp_zero, ldb, ldc, default_alg, default_A_view, no_properties, + no_reset_data), + num_passed, num_skipped); + // Test non-default alpha EXPECT_TRUE_OR_FUTURE_SKIP( test_functor_i32(dev, format, nrows_A, ncols_A, ncols_C, density_A_matrix, index_zero, - col_major, transpose_A, transpose_B, fp_one, fp_zero, ldb, ldc, alg, + col_major, transpose_A, transpose_B, set_fp_value()(2.f, 1.5f), + fp_zero, ldb, ldc, default_alg, default_A_view, no_properties, + no_reset_data), + num_passed, num_skipped); + // Test non-default beta + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i32(dev, format, nrows_A, ncols_A, ncols_C, density_A_matrix, index_zero, + col_major, transpose_A, transpose_B, fp_one, + set_fp_value()(3.2f, 1.f), ldb, ldc, default_alg, default_A_view, no_properties, no_reset_data), num_passed, num_skipped); + // Test 0 alpha + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i32(dev, format, nrows_A, ncols_A, ncols_C, density_A_matrix, index_zero, + col_major, transpose_A, transpose_B, fp_zero, fp_one, ldb, ldc, + default_alg, default_A_view, no_properties, no_reset_data), + num_passed, num_skipped); + // Test 0 alpha and beta + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i32(dev, format, nrows_A, ncols_A, ncols_C, density_A_matrix, index_zero, + col_major, transpose_A, transpose_B, fp_zero, fp_zero, ldb, ldc, + default_alg, default_A_view, no_properties, no_reset_data), + num_passed, num_skipped); + // Test non-default ldb + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i32(dev, format, nrows_A, ncols_A, ncols_C, density_A_matrix, index_zero, + col_major, transpose_A, transpose_B, fp_one, fp_zero, ldb + 5, ldc, + default_alg, default_A_view, no_properties, no_reset_data), + num_passed, num_skipped); + // Test non-default ldc + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i32(dev, format, nrows_A, ncols_A, ncols_C, density_A_matrix, index_zero, + col_major, transpose_A, transpose_B, fp_one, fp_zero, ldb, ldc + 6, + default_alg, default_A_view, no_properties, no_reset_data), + num_passed, num_skipped); + // Test row major layout + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i32(dev, format, nrows_A, ncols_A, ncols_C, density_A_matrix, index_zero, + oneapi::mkl::layout::row_major, transpose_A, transpose_B, fp_one, + fp_zero, ncols_B, ncols_C, default_alg, default_A_view, no_properties, + no_reset_data), + num_passed, num_skipped); + // Test int64 indices + long long_nrows_A = 27, long_ncols_A = 13, long_ncols_C = 6; + auto [long_ldc, long_ldb] = swap_if_transposed(transpose_A, long_nrows_A, long_ncols_A); + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i64(dev, format, long_nrows_A, long_ncols_A, long_ncols_C, + density_A_matrix, index_zero, col_major, transpose_A, transpose_B, + fp_one, fp_zero, long_ldb, long_ldc, default_alg, default_A_view, + no_properties, no_reset_data), + num_passed, num_skipped); + // Test other algorithms + for (auto alg : non_default_algorithms) { + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i32(dev, format, nrows_A, ncols_A, ncols_C, density_A_matrix, + index_zero, col_major, transpose_A, transpose_B, fp_one, fp_zero, + ldb, ldc, alg, default_A_view, no_properties, no_reset_data), + num_passed, num_skipped); + } + // Test matrix properties + for (auto properties : test_matrix_properties) { + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i32(dev, format, nrows_A, ncols_A, ncols_C, density_A_matrix, + index_zero, col_major, transpose_A, transpose_B, fp_one, fp_zero, + ldb, ldc, default_alg, default_A_view, properties, no_reset_data), + num_passed, num_skipped); + } } - // Test matrix properties - for (auto properties : test_matrix_properties) { + { + // Test different sizes + int m = 6, k = 2, n = 5; + int nrows_A = (transpose_A != oneapi::mkl::transpose::nontrans) ? k : m; + int ncols_A = (transpose_A != oneapi::mkl::transpose::nontrans) ? m : k; + int nrows_B = (transpose_B != oneapi::mkl::transpose::nontrans) ? n : k; + int nrows_C = m; + int ncols_C = n; + int ldb = nrows_B; + int ldc = nrows_C; EXPECT_TRUE_OR_FUTURE_SKIP( test_functor_i32(dev, format, nrows_A, ncols_A, ncols_C, density_A_matrix, index_zero, col_major, transpose_A, transpose_B, fp_one, fp_zero, ldb, ldc, - default_alg, default_A_view, properties, no_reset_data), + default_alg, default_A_view, no_properties, no_reset_data), num_passed, num_skipped); } } From b0ecc40f724329646dc3f23b53207994a66fbe9b Mon Sep 17 00:00:00 2001 From: "romain.biessy" Date: Thu, 27 Jun 2024 17:12:33 +0100 Subject: [PATCH 12/39] Use default beta for spmv with long indices --- tests/unit_tests/sparse_blas/include/test_spmv.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit_tests/sparse_blas/include/test_spmv.hpp b/tests/unit_tests/sparse_blas/include/test_spmv.hpp index 70738dd02..a58f51657 100644 --- a/tests/unit_tests/sparse_blas/include/test_spmv.hpp +++ b/tests/unit_tests/sparse_blas/include/test_spmv.hpp @@ -110,7 +110,7 @@ void test_helper_with_format( // Test int64 indices EXPECT_TRUE_OR_FUTURE_SKIP( test_functor_i64(dev, format, 27L, 13L, density_A_matrix, index_zero, transpose_val, fp_one, - fp_one, default_alg, default_A_view, no_properties, no_reset_data), + fp_zero, default_alg, default_A_view, no_properties, no_reset_data), num_passed, num_skipped); // Lower triangular oneapi::mkl::sparse::matrix_view triangular_A_view( From 84fa7f820028fa58726c014c75ac5c7fa62eac9e Mon Sep 17 00:00:00 2001 From: "romain.biessy" Date: Thu, 27 Jun 2024 17:14:04 +0100 Subject: [PATCH 13/39] Fix nnz in tests resetting data --- tests/unit_tests/sparse_blas/source/sparse_spmm_buffer.cpp | 2 +- tests/unit_tests/sparse_blas/source/sparse_spmm_usm.cpp | 2 +- tests/unit_tests/sparse_blas/source/sparse_spmv_buffer.cpp | 2 +- tests/unit_tests/sparse_blas/source/sparse_spmv_usm.cpp | 2 +- tests/unit_tests/sparse_blas/source/sparse_spsv_buffer.cpp | 2 +- tests/unit_tests/sparse_blas/source/sparse_spsv_usm.cpp | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/unit_tests/sparse_blas/source/sparse_spmm_buffer.cpp b/tests/unit_tests/sparse_blas/source/sparse_spmm_buffer.cpp index b655aaf37..a6b1a79ec 100644 --- a/tests/unit_tests/sparse_blas/source/sparse_spmm_buffer.cpp +++ b/tests/unit_tests/sparse_blas/source/sparse_spmm_buffer.cpp @@ -120,7 +120,7 @@ int test_spmm(sycl::device *dev, sparse_matrix_format_t format, intType nrows_A, is_symmetric); if (!is_sorted) { shuffle_sparse_matrix(format, indexing, ia_host.data(), ja_host.data(), - a_host.data(), nnz, static_cast(nrows_A)); + a_host.data(), reset_nnz, static_cast(nrows_A)); } if (reset_nnz > nnz) { ia_buf = make_buffer(ia_host); diff --git a/tests/unit_tests/sparse_blas/source/sparse_spmm_usm.cpp b/tests/unit_tests/sparse_blas/source/sparse_spmm_usm.cpp index 6f29ca9b1..fc0090bd0 100644 --- a/tests/unit_tests/sparse_blas/source/sparse_spmm_usm.cpp +++ b/tests/unit_tests/sparse_blas/source/sparse_spmm_usm.cpp @@ -145,7 +145,7 @@ int test_spmm(sycl::device *dev, sparse_matrix_format_t format, intType nrows_A, is_symmetric); if (!is_sorted) { shuffle_sparse_matrix(format, indexing, ia_host.data(), ja_host.data(), - a_host.data(), nnz, static_cast(nrows_A)); + a_host.data(), reset_nnz, static_cast(nrows_A)); } if (reset_nnz > nnz) { ia_usm_uptr = malloc_device_uptr(main_queue, ia_host.size()); diff --git a/tests/unit_tests/sparse_blas/source/sparse_spmv_buffer.cpp b/tests/unit_tests/sparse_blas/source/sparse_spmv_buffer.cpp index ac834c91c..395d812a1 100644 --- a/tests/unit_tests/sparse_blas/source/sparse_spmv_buffer.cpp +++ b/tests/unit_tests/sparse_blas/source/sparse_spmv_buffer.cpp @@ -110,7 +110,7 @@ int test_spmv(sycl::device *dev, sparse_matrix_format_t format, intType nrows_A, is_symmetric); if (!is_sorted) { shuffle_sparse_matrix(format, indexing, ia_host.data(), ja_host.data(), - a_host.data(), nnz, static_cast(nrows_A)); + a_host.data(), reset_nnz, static_cast(nrows_A)); } if (reset_nnz > nnz) { ia_buf = make_buffer(ia_host); diff --git a/tests/unit_tests/sparse_blas/source/sparse_spmv_usm.cpp b/tests/unit_tests/sparse_blas/source/sparse_spmv_usm.cpp index 122b395d9..55744aebe 100644 --- a/tests/unit_tests/sparse_blas/source/sparse_spmv_usm.cpp +++ b/tests/unit_tests/sparse_blas/source/sparse_spmv_usm.cpp @@ -135,7 +135,7 @@ int test_spmv(sycl::device *dev, sparse_matrix_format_t format, intType nrows_A, is_symmetric); if (!is_sorted) { shuffle_sparse_matrix(format, indexing, ia_host.data(), ja_host.data(), - a_host.data(), nnz, static_cast(nrows_A)); + a_host.data(), reset_nnz, static_cast(nrows_A)); } if (reset_nnz > nnz) { ia_usm_uptr = malloc_device_uptr(main_queue, ia_host.size()); diff --git a/tests/unit_tests/sparse_blas/source/sparse_spsv_buffer.cpp b/tests/unit_tests/sparse_blas/source/sparse_spsv_buffer.cpp index 1878b179f..38f8fd427 100644 --- a/tests/unit_tests/sparse_blas/source/sparse_spsv_buffer.cpp +++ b/tests/unit_tests/sparse_blas/source/sparse_spsv_buffer.cpp @@ -107,7 +107,7 @@ int test_spsv(sycl::device *dev, sparse_matrix_format_t format, intType m, doubl require_diagonal); if (!is_sorted) { shuffle_sparse_matrix(format, indexing, ia_host.data(), ja_host.data(), - a_host.data(), nnz, mu); + a_host.data(), reset_nnz, mu); } if (reset_nnz > nnz) { ia_buf = make_buffer(ia_host); diff --git a/tests/unit_tests/sparse_blas/source/sparse_spsv_usm.cpp b/tests/unit_tests/sparse_blas/source/sparse_spsv_usm.cpp index f6d4f5db7..0832ed5ca 100644 --- a/tests/unit_tests/sparse_blas/source/sparse_spsv_usm.cpp +++ b/tests/unit_tests/sparse_blas/source/sparse_spsv_usm.cpp @@ -132,7 +132,7 @@ int test_spsv(sycl::device *dev, sparse_matrix_format_t format, intType m, doubl require_diagonal); if (!is_sorted) { shuffle_sparse_matrix(format, indexing, ia_host.data(), ja_host.data(), - a_host.data(), nnz, mu); + a_host.data(), reset_nnz, mu); } if (reset_nnz > nnz) { ia_usm_uptr = malloc_device_uptr(main_queue, ia_host.size()); From a79c5af025b99a77cd2c98164345ed24614e601a Mon Sep 17 00:00:00 2001 From: "romain.biessy" Date: Thu, 27 Jun 2024 17:19:58 +0100 Subject: [PATCH 14/39] Fix invalid accesses in tests --- .../include/common_sparse_reference.hpp | 32 +++++++++++------ .../sparse_blas/include/test_common.hpp | 36 +++++++++++++++---- .../sparse_blas/include/test_spmm.hpp | 13 +++---- .../sparse_blas/include/test_spmv.hpp | 3 +- .../sparse_blas/source/sparse_spmm_buffer.cpp | 13 +++---- .../sparse_blas/source/sparse_spmm_usm.cpp | 13 +++---- .../sparse_blas/source/sparse_spmv_buffer.cpp | 5 +-- .../sparse_blas/source/sparse_spmv_usm.cpp | 5 +-- 8 files changed, 68 insertions(+), 52 deletions(-) diff --git a/tests/unit_tests/sparse_blas/include/common_sparse_reference.hpp b/tests/unit_tests/sparse_blas/include/common_sparse_reference.hpp index d8b11e6b7..62b213100 100644 --- a/tests/unit_tests/sparse_blas/include/common_sparse_reference.hpp +++ b/tests/unit_tests/sparse_blas/include/common_sparse_reference.hpp @@ -131,19 +131,29 @@ auto sparse_transpose_if_needed(const intType *ia, const intType *ja, const fpTy return std::make_tuple(iopa, jopa, opa); } +/// Reduce the leading dimension to the minimum and transpose the matrix if needed +/// The outputted matrix always uses row major layout template -auto dense_transpose_if_needed(const fpType *x, std::size_t outer_size, std::size_t inner_size, - std::size_t ld, oneapi::mkl::transpose transpose_val) { - std::vector opx; - if (transpose_val == oneapi::mkl::transpose::nontrans) { - opx.assign(x, x + outer_size * ld); +auto extract_dense_matrix(const fpType *x, std::size_t nrows, std::size_t ncols, std::size_t ld, + oneapi::mkl::transpose transpose_val, + oneapi::mkl::layout dense_matrix_layout) { + const bool is_row_major = dense_matrix_layout == oneapi::mkl::layout::row_major; + const bool is_transposed = transpose_val != oneapi::mkl::transpose::nontrans; + const bool apply_conjugate = transpose_val == oneapi::mkl::transpose::conjtrans; + const bool swap_ld = is_row_major != is_transposed; + if (swap_ld && ncols > ld) { + throw std::runtime_error("Expected ncols <= ld"); } - else { - opx.resize(outer_size * ld); - for (std::size_t i = 0; i < outer_size; ++i) { - for (std::size_t j = 0; j < inner_size; ++j) { - opx[i + j * ld] = x[i * ld + j]; - } + if (!swap_ld && nrows > ld) { + throw std::runtime_error("Expected nrows <= ld"); + } + + // Copy with a default leading dimension and transpose if needed + std::vector opx(nrows * ncols); + for (std::size_t i = 0; i < nrows; ++i) { + for (std::size_t j = 0; j < ncols; ++j) { + auto val = swap_ld ? x[i * ld + j] : x[j * ld + i]; + opx[i * ncols + j] = opVal(val, apply_conjugate); } } return opx; diff --git a/tests/unit_tests/sparse_blas/include/test_common.hpp b/tests/unit_tests/sparse_blas/include/test_common.hpp index 2ece3ec9d..48d0b13ee 100644 --- a/tests/unit_tests/sparse_blas/include/test_common.hpp +++ b/tests/unit_tests/sparse_blas/include/test_common.hpp @@ -122,6 +122,31 @@ void fill_buffer_to_0(sycl::queue queue, sycl::buffer dst) { }); } +template +std::pair swap_if_cond(bool swap, XT x, YT y) { + if (swap) { + return { static_cast(y), static_cast(x) }; + } + else { + return { static_cast(x), static_cast(y) }; + } +} + +template +auto swap_if_cond(bool swap, T x, T y) { + return swap_if_cond(swap, x, y); +} + +template +auto swap_if_transposed(oneapi::mkl::transpose op, XT x, YT y) { + return swap_if_cond(op != oneapi::mkl::transpose::nontrans, x, y); +} + +template +auto swap_if_transposed(oneapi::mkl::transpose op, T x, T y) { + return swap_if_transposed(op, x, y); +} + template struct set_fp_value { inline fpType operator()(fpType real, fpType /*imag*/) { @@ -163,13 +188,12 @@ void rand_vector(std::vector &v, std::size_t n) { template void rand_matrix(std::vector &m, oneapi::mkl::layout layout_val, std::size_t nrows, - std::size_t ncols, std::size_t ld) { + std::size_t ncols, std::size_t ld, + oneapi::mkl::transpose transpose_val = oneapi::mkl::transpose::nontrans) { using fpRealType = typename complex_info::real_type; - std::size_t outer_size = nrows; - std::size_t inner_size = ncols; - if (layout_val == oneapi::mkl::layout::col_major) { - std::swap(outer_size, inner_size); - } + auto [op_nrows, op_cols] = swap_if_transposed(transpose_val, nrows, ncols); + auto [outer_size, inner_size] = + swap_if_cond(layout_val == oneapi::mkl::layout::row_major, op_cols, op_nrows); if (inner_size > ld) { throw std::runtime_error("Expected inner_size <= ld"); } diff --git a/tests/unit_tests/sparse_blas/include/test_spmm.hpp b/tests/unit_tests/sparse_blas/include/test_spmm.hpp index e1db30238..fc4cbb171 100644 --- a/tests/unit_tests/sparse_blas/include/test_spmm.hpp +++ b/tests/unit_tests/sparse_blas/include/test_spmm.hpp @@ -259,8 +259,7 @@ void prepare_reference_spmm_data(sparse_matrix_format_t format, const intType *i std::size_t a_nrows_u = static_cast(a_nrows); std::size_t a_ncols_u = static_cast(a_ncols); std::size_t c_ncols_u = static_cast(c_ncols); - std::size_t opa_nrows = (opA == oneapi::mkl::transpose::nontrans) ? a_nrows_u : a_ncols_u; - std::size_t opa_ncols = (opA == oneapi::mkl::transpose::nontrans) ? a_ncols_u : a_nrows_u; + auto [opa_nrows, opa_ncols] = swap_if_transposed(opA, a_nrows_u, a_ncols_u); const std::size_t nnz = static_cast(a_nnz); const std::size_t ldb_u = static_cast(ldb); const std::size_t ldc_u = static_cast(ldc); @@ -268,12 +267,8 @@ void prepare_reference_spmm_data(sparse_matrix_format_t format, const intType *i auto dense_opa = sparse_to_dense(format, ia, ja, a, a_nrows_u, a_ncols_u, nnz, indexing, opA, A_view); - std::size_t b_outer_size = static_cast(opa_ncols); - std::size_t b_inner_size = c_ncols_u; - if (dense_matrix_layout == oneapi::mkl::layout::col_major) { - std::swap(b_outer_size, b_inner_size); - } - auto dense_opb = dense_transpose_if_needed(b, b_outer_size, b_inner_size, ldb_u, opB); + // dense_opb is always row major and not transposed + auto dense_opb = extract_dense_matrix(b, opa_ncols, c_ncols_u, ldb_u, opB, dense_matrix_layout); // Return the linear index to access a dense matrix from auto dense_linear_idx = [=](std::size_t row, std::size_t col, std::size_t ld) { @@ -290,7 +285,7 @@ void prepare_reference_spmm_data(sparse_matrix_format_t format, const intType *i for (std::size_t col = 0; col < c_ncols_u; col++) { fpType acc = 0; for (std::size_t i = 0; i < opa_ncols; i++) { - acc += dense_opa[row * opa_ncols + i] * dense_opb[dense_linear_idx(i, col, ldb_u)]; + acc += dense_opa[row * opa_ncols + i] * dense_opb[i * c_ncols_u + col]; } fpType &c = c_ref[dense_linear_idx(row, col, ldc_u)]; c = alpha * acc + beta * c; diff --git a/tests/unit_tests/sparse_blas/include/test_spmv.hpp b/tests/unit_tests/sparse_blas/include/test_spmv.hpp index a58f51657..5043c132f 100644 --- a/tests/unit_tests/sparse_blas/include/test_spmv.hpp +++ b/tests/unit_tests/sparse_blas/include/test_spmv.hpp @@ -215,8 +215,7 @@ void prepare_reference_spmv_data(sparse_matrix_format_t format, const intType *i fpType *y_ref) { std::size_t a_nrows_u = static_cast(a_nrows); std::size_t a_ncols_u = static_cast(a_ncols); - std::size_t opa_nrows = (opA == oneapi::mkl::transpose::nontrans) ? a_nrows_u : a_ncols_u; - std::size_t opa_ncols = (opA == oneapi::mkl::transpose::nontrans) ? a_ncols_u : a_nrows_u; + auto [opa_nrows, opa_ncols] = swap_if_transposed(opA, a_nrows_u, a_ncols_u); const std::size_t nnz = static_cast(a_nnz); auto dense_opa = sparse_to_dense(format, ia, ja, a, a_nrows_u, a_ncols_u, nnz, indexing, opA, A_view); diff --git a/tests/unit_tests/sparse_blas/source/sparse_spmm_buffer.cpp b/tests/unit_tests/sparse_blas/source/sparse_spmm_buffer.cpp index a6b1a79ec..a28992391 100644 --- a/tests/unit_tests/sparse_blas/source/sparse_spmm_buffer.cpp +++ b/tests/unit_tests/sparse_blas/source/sparse_spmm_buffer.cpp @@ -44,10 +44,8 @@ int test_spmm(sycl::device *dev, sparse_matrix_format_t format, intType nrows_A, ldb = nrows_A; ldc = nrows_A; } - std::size_t opa_nrows = static_cast( - transpose_A == oneapi::mkl::transpose::nontrans ? nrows_A : ncols_A); - std::size_t opa_ncols = static_cast( - transpose_A == oneapi::mkl::transpose::nontrans ? ncols_A : nrows_A); + auto [opa_nrows, opa_ncols] = swap_if_transposed(transpose_A, nrows_A, ncols_A); + auto [opb_nrows, opb_ncols] = swap_if_transposed(transpose_B, opa_ncols, ncols_C); intType indexing = (index == oneapi::mkl::index_base::zero) ? 0 : 1; const bool is_sorted = matrix_properties.find(oneapi::mkl::sparse::matrix_property::sorted) != matrix_properties.cend(); @@ -65,7 +63,7 @@ int test_spmm(sycl::device *dev, sparse_matrix_format_t format, intType nrows_A, // Input and output dense vectors std::vector b_host, c_host; rand_matrix(b_host, dense_matrix_layout, opa_ncols, static_cast(ncols_C), - static_cast(ldb)); + static_cast(ldb), transpose_B); rand_matrix(c_host, dense_matrix_layout, opa_nrows, static_cast(ncols_C), static_cast(ldc)); std::vector c_ref_host(c_host); @@ -92,9 +90,8 @@ int test_spmm(sycl::device *dev, sparse_matrix_format_t format, intType nrows_A, for (auto property : matrix_properties) { CALL_RT_OR_CT(oneapi::mkl::sparse::set_matrix_property, main_queue, A_handle, property); } - CALL_RT_OR_CT(oneapi::mkl::sparse::init_dense_matrix, main_queue, &B_handle, - static_cast(opa_ncols), ncols_C, ldb, dense_matrix_layout, - b_buf); + CALL_RT_OR_CT(oneapi::mkl::sparse::init_dense_matrix, main_queue, &B_handle, opb_nrows, + opb_ncols, ldb, dense_matrix_layout, b_buf); CALL_RT_OR_CT(oneapi::mkl::sparse::init_dense_matrix, main_queue, &C_handle, static_cast(opa_nrows), ncols_C, ldc, dense_matrix_layout, c_buf); diff --git a/tests/unit_tests/sparse_blas/source/sparse_spmm_usm.cpp b/tests/unit_tests/sparse_blas/source/sparse_spmm_usm.cpp index fc0090bd0..5c74a319b 100644 --- a/tests/unit_tests/sparse_blas/source/sparse_spmm_usm.cpp +++ b/tests/unit_tests/sparse_blas/source/sparse_spmm_usm.cpp @@ -44,10 +44,8 @@ int test_spmm(sycl::device *dev, sparse_matrix_format_t format, intType nrows_A, ldb = nrows_A; ldc = nrows_A; } - std::size_t opa_nrows = static_cast( - transpose_A == oneapi::mkl::transpose::nontrans ? nrows_A : ncols_A); - std::size_t opa_ncols = static_cast( - transpose_A == oneapi::mkl::transpose::nontrans ? ncols_A : nrows_A); + auto [opa_nrows, opa_ncols] = swap_if_transposed(transpose_A, nrows_A, ncols_A); + auto [opb_nrows, opb_ncols] = swap_if_transposed(transpose_B, opa_ncols, ncols_C); intType indexing = (index == oneapi::mkl::index_base::zero) ? 0 : 1; const bool is_sorted = matrix_properties.find(oneapi::mkl::sparse::matrix_property::sorted) != matrix_properties.cend(); @@ -65,7 +63,7 @@ int test_spmm(sycl::device *dev, sparse_matrix_format_t format, intType nrows_A, // Input and output dense vectors std::vector b_host, c_host; rand_matrix(b_host, dense_matrix_layout, opa_ncols, static_cast(ncols_C), - static_cast(ldb)); + static_cast(ldb), transpose_B); rand_matrix(c_host, dense_matrix_layout, opa_nrows, static_cast(ncols_C), static_cast(ldc)); std::vector c_ref_host(c_host); @@ -114,9 +112,8 @@ int test_spmm(sycl::device *dev, sparse_matrix_format_t format, intType nrows_A, for (auto property : matrix_properties) { CALL_RT_OR_CT(oneapi::mkl::sparse::set_matrix_property, main_queue, A_handle, property); } - CALL_RT_OR_CT(oneapi::mkl::sparse::init_dense_matrix, main_queue, &B_handle, - static_cast(opa_ncols), ncols_C, ldb, dense_matrix_layout, - b_usm); + CALL_RT_OR_CT(oneapi::mkl::sparse::init_dense_matrix, main_queue, &B_handle, opb_nrows, + opb_ncols, ldb, dense_matrix_layout, b_usm); CALL_RT_OR_CT(oneapi::mkl::sparse::init_dense_matrix, main_queue, &C_handle, static_cast(opa_nrows), ncols_C, ldc, dense_matrix_layout, c_usm); diff --git a/tests/unit_tests/sparse_blas/source/sparse_spmv_buffer.cpp b/tests/unit_tests/sparse_blas/source/sparse_spmv_buffer.cpp index 395d812a1..5b23ae3fd 100644 --- a/tests/unit_tests/sparse_blas/source/sparse_spmv_buffer.cpp +++ b/tests/unit_tests/sparse_blas/source/sparse_spmv_buffer.cpp @@ -39,10 +39,7 @@ int test_spmv(sycl::device *dev, sparse_matrix_format_t format, intType nrows_A, if (require_square_matrix(A_view, matrix_properties)) { ncols_A = nrows_A; } - std::size_t opa_nrows = static_cast( - transpose_val == oneapi::mkl::transpose::nontrans ? nrows_A : ncols_A); - std::size_t opa_ncols = static_cast( - transpose_val == oneapi::mkl::transpose::nontrans ? ncols_A : nrows_A); + auto [opa_nrows, opa_ncols] = swap_if_transposed(transpose_val, nrows_A, ncols_A); intType indexing = (index == oneapi::mkl::index_base::zero) ? 0 : 1; const bool is_sorted = matrix_properties.find(oneapi::mkl::sparse::matrix_property::sorted) != matrix_properties.cend(); diff --git a/tests/unit_tests/sparse_blas/source/sparse_spmv_usm.cpp b/tests/unit_tests/sparse_blas/source/sparse_spmv_usm.cpp index 55744aebe..28e291bbe 100644 --- a/tests/unit_tests/sparse_blas/source/sparse_spmv_usm.cpp +++ b/tests/unit_tests/sparse_blas/source/sparse_spmv_usm.cpp @@ -39,10 +39,7 @@ int test_spmv(sycl::device *dev, sparse_matrix_format_t format, intType nrows_A, if (require_square_matrix(A_view, matrix_properties)) { ncols_A = nrows_A; } - std::size_t opa_nrows = static_cast( - transpose_val == oneapi::mkl::transpose::nontrans ? nrows_A : ncols_A); - std::size_t opa_ncols = static_cast( - transpose_val == oneapi::mkl::transpose::nontrans ? ncols_A : nrows_A); + auto [opa_nrows, opa_ncols] = swap_if_transposed(transpose_val, nrows_A, ncols_A); intType indexing = (index == oneapi::mkl::index_base::zero) ? 0 : 1; const bool is_sorted = matrix_properties.find(oneapi::mkl::sparse::matrix_property::sorted) != matrix_properties.cend(); From d4528466da3aa1608ddb2d60c5c8bff88306a7c4 Mon Sep 17 00:00:00 2001 From: "romain.biessy" Date: Fri, 28 Jun 2024 15:08:29 +0100 Subject: [PATCH 15/39] Test scalars on device memory --- .../backends/mkl_common/mkl_helper.hpp | 2 +- .../backends/mkl_common/mkl_spmm.cxx | 18 ++++-- .../backends/mkl_common/mkl_spmv.cxx | 30 ++++++---- .../backends/mkl_common/mkl_spsv.cxx | 6 +- .../sparse_blas/include/test_spmm.hpp | 44 ++++++++++----- .../sparse_blas/include/test_spmv.hpp | 52 +++++++++++------- .../sparse_blas/include/test_spsv.hpp | 55 +++++++++++-------- .../sparse_blas/source/sparse_spmm_buffer.cpp | 6 +- .../sparse_blas/source/sparse_spmm_usm.cpp | 14 ++++- .../sparse_blas/source/sparse_spmv_buffer.cpp | 6 +- .../sparse_blas/source/sparse_spmv_usm.cpp | 43 ++++++++++----- .../sparse_blas/source/sparse_spsv_buffer.cpp | 6 +- .../sparse_blas/source/sparse_spsv_usm.cpp | 36 ++++++++---- 13 files changed, 209 insertions(+), 109 deletions(-) diff --git a/src/sparse_blas/backends/mkl_common/mkl_helper.hpp b/src/sparse_blas/backends/mkl_common/mkl_helper.hpp index 1562c240b..021dabb51 100644 --- a/src/sparse_blas/backends/mkl_common/mkl_helper.hpp +++ b/src/sparse_blas/backends/mkl_common/mkl_helper.hpp @@ -57,7 +57,7 @@ void check_ptr_is_host_accessible(const std::string &function_name, const std::s /// Return a scalar on the host from a pointer to host or device memory /// Used for USM functions template -inline T get_scalar(sycl::queue &queue, const T *host_or_device_ptr) { +inline T get_scalar_on_host(sycl::queue &queue, const T *host_or_device_ptr) { if (is_ptr_accessible_on_host(queue, host_or_device_ptr)) { return *host_or_device_ptr; } diff --git a/src/sparse_blas/backends/mkl_common/mkl_spmm.cxx b/src/sparse_blas/backends/mkl_common/mkl_spmm.cxx index 5696dde55..56a0cedf7 100644 --- a/src/sparse_blas/backends/mkl_common/mkl_spmm.cxx +++ b/src/sparse_blas/backends/mkl_common/mkl_spmm.cxx @@ -45,6 +45,12 @@ void check_valid_spmm(const std::string function_name, sycl::queue &queue, detail::check_ptr_is_host_accessible("spmm", "alpha", queue, alpha); detail::check_ptr_is_host_accessible("spmm", "beta", queue, beta); } + if (detail::is_ptr_accessible_on_host(queue, alpha) != + detail::is_ptr_accessible_on_host(queue, beta)) { + throw mkl::invalid_argument( + "sparse_blas", function_name, + "Alpha and beta must both be placed on host memory or device memory."); + } if (B_handle->dense_layout != C_handle->dense_layout) { throw mkl::invalid_argument("sparse_blas", function_name, "B and C matrices must used the same layout."); @@ -138,8 +144,8 @@ sycl::event internal_spmm(sycl::queue &queue, oneapi::mkl::transpose opA, oneapi::mkl::sparse::spmm_alg /*alg*/, oneapi::mkl::sparse::spmm_descr_t /*spmm_descr*/, const std::vector &dependencies) { - T cast_alpha = *static_cast(alpha); - T cast_beta = *static_cast(beta); + T host_alpha = detail::get_scalar_on_host(queue, static_cast(alpha)); + T host_beta = detail::get_scalar_on_host(queue, static_cast(beta)); auto internal_A_handle = detail::get_internal_handle(A_handle); internal_A_handle->can_be_reset = false; auto layout = B_handle->dense_layout; @@ -147,16 +153,16 @@ sycl::event internal_spmm(sycl::queue &queue, oneapi::mkl::transpose opA, auto ldb = B_handle->ld; auto ldc = C_handle->ld; if (internal_A_handle->all_use_buffer()) { - oneapi::mkl::sparse::gemm(queue, layout, opA, opB, cast_alpha, + oneapi::mkl::sparse::gemm(queue, layout, opA, opB, host_alpha, internal_A_handle->backend_handle, B_handle->get_buffer(), - columns, ldb, cast_beta, C_handle->get_buffer(), ldc); + columns, ldb, host_beta, C_handle->get_buffer(), ldc); // Dependencies are not used for buffers return {}; } else { - return oneapi::mkl::sparse::gemm(queue, layout, opA, opB, cast_alpha, + return oneapi::mkl::sparse::gemm(queue, layout, opA, opB, host_alpha, internal_A_handle->backend_handle, - B_handle->get_usm_ptr(), columns, ldb, cast_beta, + B_handle->get_usm_ptr(), columns, ldb, host_beta, C_handle->get_usm_ptr(), ldc, dependencies); } } diff --git a/src/sparse_blas/backends/mkl_common/mkl_spmv.cxx b/src/sparse_blas/backends/mkl_common/mkl_spmv.cxx index 7f809c75e..7f0e21208 100644 --- a/src/sparse_blas/backends/mkl_common/mkl_spmv.cxx +++ b/src/sparse_blas/backends/mkl_common/mkl_spmv.cxx @@ -45,6 +45,12 @@ void check_valid_spmv(const std::string function_name, sycl::queue &queue, detail::check_ptr_is_host_accessible("spmv", "alpha", queue, alpha); detail::check_ptr_is_host_accessible("spmv", "beta", queue, beta); } + if (detail::is_ptr_accessible_on_host(queue, alpha) != + detail::is_ptr_accessible_on_host(queue, beta)) { + throw mkl::invalid_argument( + "sparse_blas", function_name, + "Alpha and beta must both be placed on host memory or device memory."); + } if (A_view.type_view == oneapi::mkl::sparse::matrix_descr::diagonal) { throw mkl::invalid_argument("sparse_blas", function_name, "Matrix view's type cannot be diagonal."); @@ -153,8 +159,8 @@ sycl::event internal_spmv(sycl::queue &queue, oneapi::mkl::transpose opA, const oneapi::mkl::sparse::spmv_alg /*alg*/, oneapi::mkl::sparse::spmv_descr_t /*spmv_descr*/, const std::vector &dependencies) { - T cast_alpha = *static_cast(alpha); - T cast_beta = *static_cast(beta); + T host_alpha = detail::get_scalar_on_host(queue, static_cast(alpha)); + T host_beta = detail::get_scalar_on_host(queue, static_cast(beta)); auto internal_A_handle = detail::get_internal_handle(A_handle); internal_A_handle->can_be_reset = false; auto backend_handle = internal_A_handle->backend_handle; @@ -162,16 +168,16 @@ sycl::event internal_spmv(sycl::queue &queue, oneapi::mkl::transpose opA, const auto x_buffer = x_handle->get_buffer(); auto y_buffer = y_handle->get_buffer(); if (A_view.type_view == matrix_descr::triangular) { - oneapi::mkl::sparse::trmv(queue, A_view.uplo_view, opA, A_view.diag_view, cast_alpha, - backend_handle, x_buffer, cast_beta, y_buffer); + oneapi::mkl::sparse::trmv(queue, A_view.uplo_view, opA, A_view.diag_view, host_alpha, + backend_handle, x_buffer, host_beta, y_buffer); } else if (A_view.type_view == matrix_descr::symmetric || A_view.type_view == matrix_descr::hermitian) { - oneapi::mkl::sparse::symv(queue, A_view.uplo_view, cast_alpha, backend_handle, x_buffer, - cast_beta, y_buffer); + oneapi::mkl::sparse::symv(queue, A_view.uplo_view, host_alpha, backend_handle, x_buffer, + host_beta, y_buffer); } else { - oneapi::mkl::sparse::gemv(queue, opA, cast_alpha, backend_handle, x_buffer, cast_beta, + oneapi::mkl::sparse::gemv(queue, opA, host_alpha, backend_handle, x_buffer, host_beta, y_buffer); } // Dependencies are not used for buffers @@ -182,17 +188,17 @@ sycl::event internal_spmv(sycl::queue &queue, oneapi::mkl::transpose opA, const auto y_usm = y_handle->get_usm_ptr(); if (A_view.type_view == matrix_descr::triangular) { return oneapi::mkl::sparse::trmv(queue, A_view.uplo_view, opA, A_view.diag_view, - cast_alpha, backend_handle, x_usm, cast_beta, y_usm, + host_alpha, backend_handle, x_usm, host_beta, y_usm, dependencies); } else if (A_view.type_view == matrix_descr::symmetric || A_view.type_view == matrix_descr::hermitian) { - return oneapi::mkl::sparse::symv(queue, A_view.uplo_view, cast_alpha, backend_handle, - x_usm, cast_beta, y_usm, dependencies); + return oneapi::mkl::sparse::symv(queue, A_view.uplo_view, host_alpha, backend_handle, + x_usm, host_beta, y_usm, dependencies); } else { - return oneapi::mkl::sparse::gemv(queue, opA, cast_alpha, backend_handle, x_usm, - cast_beta, y_usm, dependencies); + return oneapi::mkl::sparse::gemv(queue, opA, host_alpha, backend_handle, x_usm, + host_beta, y_usm, dependencies); } } } diff --git a/src/sparse_blas/backends/mkl_common/mkl_spsv.cxx b/src/sparse_blas/backends/mkl_common/mkl_spsv.cxx index 790ccc6d8..f1e5ebc01 100644 --- a/src/sparse_blas/backends/mkl_common/mkl_spsv.cxx +++ b/src/sparse_blas/backends/mkl_common/mkl_spsv.cxx @@ -124,18 +124,18 @@ sycl::event internal_spsv(sycl::queue &queue, oneapi::mkl::transpose opA, const oneapi::mkl::sparse::spsv_alg /*alg*/, oneapi::mkl::sparse::spsv_descr_t /*spsv_descr*/, const std::vector &dependencies) { - T cast_alpha = *static_cast(alpha); + T host_alpha = detail::get_scalar_on_host(queue, static_cast(alpha)); auto internal_A_handle = detail::get_internal_handle(A_handle); internal_A_handle->can_be_reset = false; if (internal_A_handle->all_use_buffer()) { - oneapi::mkl::sparse::trsv(queue, A_view.uplo_view, opA, A_view.diag_view, cast_alpha, + oneapi::mkl::sparse::trsv(queue, A_view.uplo_view, opA, A_view.diag_view, host_alpha, internal_A_handle->backend_handle, x_handle->get_buffer(), y_handle->get_buffer()); // Dependencies are not used for buffers return {}; } else { - return oneapi::mkl::sparse::trsv(queue, A_view.uplo_view, opA, A_view.diag_view, cast_alpha, + return oneapi::mkl::sparse::trsv(queue, A_view.uplo_view, opA, A_view.diag_view, host_alpha, internal_A_handle->backend_handle, x_handle->get_usm_ptr(), y_handle->get_usm_ptr(), dependencies); diff --git a/tests/unit_tests/sparse_blas/include/test_spmm.hpp b/tests/unit_tests/sparse_blas/include/test_spmm.hpp index fc4cbb171..049d58b88 100644 --- a/tests/unit_tests/sparse_blas/include/test_spmm.hpp +++ b/tests/unit_tests/sparse_blas/include/test_spmm.hpp @@ -67,6 +67,7 @@ void test_helper_with_format_with_transpose( oneapi::mkl::sparse::matrix_view default_A_view; std::set no_properties; bool no_reset_data = false; + bool no_scalars_on_device = false; { int m = 4, k = 6, n = 5; @@ -83,65 +84,77 @@ void test_helper_with_format_with_transpose( EXPECT_TRUE_OR_FUTURE_SKIP( test_functor_i32(dev, format, nrows_A, ncols_A, ncols_C, density_A_matrix, index_zero, col_major, transpose_A, transpose_B, fp_one, fp_zero, ldb, ldc, - default_alg, default_A_view, no_properties, no_reset_data), + default_alg, default_A_view, no_properties, no_reset_data, + no_scalars_on_device), num_passed, num_skipped); // Reset data EXPECT_TRUE_OR_FUTURE_SKIP( test_functor_i32(dev, format, nrows_A, ncols_A, ncols_C, density_A_matrix, index_zero, col_major, transpose_A, transpose_B, fp_one, fp_zero, ldb, ldc, - default_alg, default_A_view, no_properties, true), + default_alg, default_A_view, no_properties, true, + no_scalars_on_device), + num_passed, num_skipped); + // Test alpha and beta on the device + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i32(dev, format, nrows_A, ncols_A, ncols_C, density_A_matrix, index_zero, + col_major, transpose_A, transpose_B, fp_one, fp_zero, ldb, ldc, + default_alg, default_A_view, no_properties, no_reset_data, true), num_passed, num_skipped); // Test index_base 1 EXPECT_TRUE_OR_FUTURE_SKIP( test_functor_i32(dev, format, nrows_A, ncols_A, ncols_C, density_A_matrix, oneapi::mkl::index_base::one, col_major, transpose_A, transpose_B, fp_one, fp_zero, ldb, ldc, default_alg, default_A_view, no_properties, - no_reset_data), + no_reset_data, no_scalars_on_device), num_passed, num_skipped); // Test non-default alpha EXPECT_TRUE_OR_FUTURE_SKIP( test_functor_i32(dev, format, nrows_A, ncols_A, ncols_C, density_A_matrix, index_zero, col_major, transpose_A, transpose_B, set_fp_value()(2.f, 1.5f), fp_zero, ldb, ldc, default_alg, default_A_view, no_properties, - no_reset_data), + no_reset_data, no_scalars_on_device), num_passed, num_skipped); // Test non-default beta EXPECT_TRUE_OR_FUTURE_SKIP( test_functor_i32(dev, format, nrows_A, ncols_A, ncols_C, density_A_matrix, index_zero, col_major, transpose_A, transpose_B, fp_one, set_fp_value()(3.2f, 1.f), ldb, ldc, default_alg, - default_A_view, no_properties, no_reset_data), + default_A_view, no_properties, no_reset_data, no_scalars_on_device), num_passed, num_skipped); // Test 0 alpha EXPECT_TRUE_OR_FUTURE_SKIP( test_functor_i32(dev, format, nrows_A, ncols_A, ncols_C, density_A_matrix, index_zero, col_major, transpose_A, transpose_B, fp_zero, fp_one, ldb, ldc, - default_alg, default_A_view, no_properties, no_reset_data), + default_alg, default_A_view, no_properties, no_reset_data, + no_scalars_on_device), num_passed, num_skipped); // Test 0 alpha and beta EXPECT_TRUE_OR_FUTURE_SKIP( test_functor_i32(dev, format, nrows_A, ncols_A, ncols_C, density_A_matrix, index_zero, col_major, transpose_A, transpose_B, fp_zero, fp_zero, ldb, ldc, - default_alg, default_A_view, no_properties, no_reset_data), + default_alg, default_A_view, no_properties, no_reset_data, + no_scalars_on_device), num_passed, num_skipped); // Test non-default ldb EXPECT_TRUE_OR_FUTURE_SKIP( test_functor_i32(dev, format, nrows_A, ncols_A, ncols_C, density_A_matrix, index_zero, col_major, transpose_A, transpose_B, fp_one, fp_zero, ldb + 5, ldc, - default_alg, default_A_view, no_properties, no_reset_data), + default_alg, default_A_view, no_properties, no_reset_data, + no_scalars_on_device), num_passed, num_skipped); // Test non-default ldc EXPECT_TRUE_OR_FUTURE_SKIP( test_functor_i32(dev, format, nrows_A, ncols_A, ncols_C, density_A_matrix, index_zero, col_major, transpose_A, transpose_B, fp_one, fp_zero, ldb, ldc + 6, - default_alg, default_A_view, no_properties, no_reset_data), + default_alg, default_A_view, no_properties, no_reset_data, + no_scalars_on_device), num_passed, num_skipped); // Test row major layout EXPECT_TRUE_OR_FUTURE_SKIP( test_functor_i32(dev, format, nrows_A, ncols_A, ncols_C, density_A_matrix, index_zero, oneapi::mkl::layout::row_major, transpose_A, transpose_B, fp_one, fp_zero, ncols_B, ncols_C, default_alg, default_A_view, no_properties, - no_reset_data), + no_reset_data, no_scalars_on_device), num_passed, num_skipped); // Test int64 indices long long_nrows_A = 27, long_ncols_A = 13, long_ncols_C = 6; @@ -150,14 +163,15 @@ void test_helper_with_format_with_transpose( test_functor_i64(dev, format, long_nrows_A, long_ncols_A, long_ncols_C, density_A_matrix, index_zero, col_major, transpose_A, transpose_B, fp_one, fp_zero, long_ldb, long_ldc, default_alg, default_A_view, - no_properties, no_reset_data), + no_properties, no_reset_data, no_scalars_on_device), num_passed, num_skipped); // Test other algorithms for (auto alg : non_default_algorithms) { EXPECT_TRUE_OR_FUTURE_SKIP( test_functor_i32(dev, format, nrows_A, ncols_A, ncols_C, density_A_matrix, index_zero, col_major, transpose_A, transpose_B, fp_one, fp_zero, - ldb, ldc, alg, default_A_view, no_properties, no_reset_data), + ldb, ldc, alg, default_A_view, no_properties, no_reset_data, + no_scalars_on_device), num_passed, num_skipped); } // Test matrix properties @@ -165,7 +179,8 @@ void test_helper_with_format_with_transpose( EXPECT_TRUE_OR_FUTURE_SKIP( test_functor_i32(dev, format, nrows_A, ncols_A, ncols_C, density_A_matrix, index_zero, col_major, transpose_A, transpose_B, fp_one, fp_zero, - ldb, ldc, default_alg, default_A_view, properties, no_reset_data), + ldb, ldc, default_alg, default_A_view, properties, no_reset_data, + no_scalars_on_device), num_passed, num_skipped); } } @@ -182,7 +197,8 @@ void test_helper_with_format_with_transpose( EXPECT_TRUE_OR_FUTURE_SKIP( test_functor_i32(dev, format, nrows_A, ncols_A, ncols_C, density_A_matrix, index_zero, col_major, transpose_A, transpose_B, fp_one, fp_zero, ldb, ldc, - default_alg, default_A_view, no_properties, no_reset_data), + default_alg, default_A_view, no_properties, no_reset_data, + no_scalars_on_device), num_passed, num_skipped); } } diff --git a/tests/unit_tests/sparse_blas/include/test_spmv.hpp b/tests/unit_tests/sparse_blas/include/test_spmv.hpp index 5043c132f..43599e9d3 100644 --- a/tests/unit_tests/sparse_blas/include/test_spmv.hpp +++ b/tests/unit_tests/sparse_blas/include/test_spmv.hpp @@ -65,52 +65,61 @@ void test_helper_with_format( oneapi::mkl::sparse::matrix_view default_A_view; std::set no_properties; bool no_reset_data = false; + bool no_scalars_on_device = false; // Basic test EXPECT_TRUE_OR_FUTURE_SKIP( test_functor_i32(dev, format, nrows_A, ncols_A, density_A_matrix, index_zero, transpose_val, - fp_one, fp_zero, default_alg, default_A_view, no_properties, - no_reset_data), + fp_one, fp_zero, default_alg, default_A_view, no_properties, no_reset_data, + no_scalars_on_device), num_passed, num_skipped); // Reset data EXPECT_TRUE_OR_FUTURE_SKIP( test_functor_i32(dev, format, nrows_A, ncols_A, density_A_matrix, index_zero, transpose_val, - fp_one, fp_zero, default_alg, default_A_view, no_properties, true), + fp_one, fp_zero, default_alg, default_A_view, no_properties, true, + no_scalars_on_device), + num_passed, num_skipped); + // Test alpha and beta on the device + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i32(dev, format, nrows_A, ncols_A, density_A_matrix, index_zero, transpose_val, + fp_one, fp_zero, default_alg, default_A_view, no_properties, no_reset_data, + true), num_passed, num_skipped); // Test index_base 1 EXPECT_TRUE_OR_FUTURE_SKIP( test_functor_i32(dev, format, nrows_A, ncols_A, density_A_matrix, oneapi::mkl::index_base::one, transpose_val, fp_one, fp_zero, default_alg, - default_A_view, no_properties, no_reset_data), + default_A_view, no_properties, no_reset_data, no_scalars_on_device), num_passed, num_skipped); // Test non-default alpha EXPECT_TRUE_OR_FUTURE_SKIP( test_functor_i32(dev, format, nrows_A, ncols_A, density_A_matrix, index_zero, transpose_val, set_fp_value()(2.f, 1.5f), fp_zero, default_alg, default_A_view, - no_properties, no_reset_data), + no_properties, no_reset_data, no_scalars_on_device), num_passed, num_skipped); // Test non-default beta EXPECT_TRUE_OR_FUTURE_SKIP( test_functor_i32(dev, format, nrows_A, ncols_A, density_A_matrix, index_zero, transpose_val, fp_one, set_fp_value()(3.2f, 1.f), default_alg, default_A_view, - no_properties, no_reset_data), + no_properties, no_reset_data, no_scalars_on_device), num_passed, num_skipped); // Test 0 alpha EXPECT_TRUE_OR_FUTURE_SKIP( test_functor_i32(dev, format, nrows_A, ncols_A, density_A_matrix, index_zero, transpose_val, - fp_zero, fp_one, default_alg, default_A_view, no_properties, - no_reset_data), + fp_zero, fp_one, default_alg, default_A_view, no_properties, no_reset_data, + no_scalars_on_device), num_passed, num_skipped); // Test 0 alpha and beta EXPECT_TRUE_OR_FUTURE_SKIP( test_functor_i32(dev, format, nrows_A, ncols_A, density_A_matrix, index_zero, transpose_val, fp_zero, fp_zero, default_alg, default_A_view, no_properties, - no_reset_data), + no_reset_data, no_scalars_on_device), num_passed, num_skipped); // Test int64 indices EXPECT_TRUE_OR_FUTURE_SKIP( test_functor_i64(dev, format, 27L, 13L, density_A_matrix, index_zero, transpose_val, fp_one, - fp_zero, default_alg, default_A_view, no_properties, no_reset_data), + fp_zero, default_alg, default_A_view, no_properties, no_reset_data, + no_scalars_on_device), num_passed, num_skipped); // Lower triangular oneapi::mkl::sparse::matrix_view triangular_A_view( @@ -118,14 +127,14 @@ void test_helper_with_format( EXPECT_TRUE_OR_FUTURE_SKIP( test_functor_i32(dev, format, nrows_A, ncols_A, density_A_matrix, index_zero, transpose_val, fp_one, fp_zero, default_alg, triangular_A_view, no_properties, - no_reset_data), + no_reset_data, no_scalars_on_device), num_passed, num_skipped); // Upper triangular triangular_A_view.uplo_view = oneapi::mkl::uplo::upper; EXPECT_TRUE_OR_FUTURE_SKIP( test_functor_i32(dev, format, nrows_A, ncols_A, density_A_matrix, index_zero, transpose_val, fp_one, fp_zero, default_alg, triangular_A_view, no_properties, - no_reset_data), + no_reset_data, no_scalars_on_device), num_passed, num_skipped); // Lower triangular unit diagonal oneapi::mkl::sparse::matrix_view triangular_unit_A_view( @@ -134,14 +143,14 @@ void test_helper_with_format( EXPECT_TRUE_OR_FUTURE_SKIP( test_functor_i32(dev, format, nrows_A, ncols_A, density_A_matrix, index_zero, transpose_val, fp_one, fp_zero, default_alg, triangular_unit_A_view, no_properties, - no_reset_data), + no_reset_data, no_scalars_on_device), num_passed, num_skipped); // Upper triangular unit diagonal triangular_A_view.uplo_view = oneapi::mkl::uplo::upper; EXPECT_TRUE_OR_FUTURE_SKIP( test_functor_i32(dev, format, nrows_A, ncols_A, density_A_matrix, index_zero, transpose_val, fp_one, fp_zero, default_alg, triangular_unit_A_view, no_properties, - no_reset_data), + no_reset_data, no_scalars_on_device), num_passed, num_skipped); if (transpose_val != oneapi::mkl::transpose::conjtrans) { // Lower symmetric or hermitian @@ -151,29 +160,30 @@ void test_helper_with_format( EXPECT_TRUE_OR_FUTURE_SKIP( test_functor_i32(dev, format, nrows_A, ncols_A, density_A_matrix, index_zero, transpose_val, fp_one, fp_zero, default_alg, symmetric_view, - no_properties, no_reset_data), + no_properties, no_reset_data, no_scalars_on_device), num_passed, num_skipped); // Upper symmetric or hermitian symmetric_view.uplo_view = oneapi::mkl::uplo::upper; EXPECT_TRUE_OR_FUTURE_SKIP( test_functor_i32(dev, format, nrows_A, ncols_A, density_A_matrix, index_zero, transpose_val, fp_one, fp_zero, default_alg, symmetric_view, - no_properties, no_reset_data), + no_properties, no_reset_data, no_scalars_on_device), num_passed, num_skipped); } // Test other algorithms for (auto alg : non_default_algorithms) { - EXPECT_TRUE_OR_FUTURE_SKIP(test_functor_i32(dev, format, nrows_A, ncols_A, density_A_matrix, - index_zero, transpose_val, fp_one, fp_zero, alg, - default_A_view, no_properties, no_reset_data), - num_passed, num_skipped); + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i32(dev, format, nrows_A, ncols_A, density_A_matrix, index_zero, + transpose_val, fp_one, fp_zero, alg, default_A_view, no_properties, + no_reset_data, no_scalars_on_device), + num_passed, num_skipped); } // Test matrix properties for (auto properties : test_matrix_properties) { EXPECT_TRUE_OR_FUTURE_SKIP( test_functor_i32(dev, format, nrows_A, ncols_A, density_A_matrix, index_zero, transpose_val, fp_one, fp_zero, default_alg, default_A_view, - properties, no_reset_data), + properties, no_reset_data, no_scalars_on_device), num_passed, num_skipped); } } diff --git a/tests/unit_tests/sparse_blas/include/test_spsv.hpp b/tests/unit_tests/sparse_blas/include/test_spsv.hpp index 3354dffaa..bdf9210f8 100644 --- a/tests/unit_tests/sparse_blas/include/test_spsv.hpp +++ b/tests/unit_tests/sparse_blas/include/test_spsv.hpp @@ -62,74 +62,85 @@ void test_helper_with_format(testFunctorI32 test_functor_i32, testFunctorI64 tes upper_A_view.uplo_view = oneapi::mkl::uplo::upper; std::set no_properties; bool no_reset_data = false; + bool no_scalars_on_device = false; // Basic test + EXPECT_TRUE_OR_FUTURE_SKIP(test_functor_i32(dev, format, m, density_A_matrix, index_zero, + transpose_val, alpha, default_alg, default_A_view, + no_properties, no_reset_data, no_scalars_on_device), + num_passed, num_skipped); + // Reset data EXPECT_TRUE_OR_FUTURE_SKIP( test_functor_i32(dev, format, m, density_A_matrix, index_zero, transpose_val, alpha, - default_alg, default_A_view, no_properties, no_reset_data), + default_alg, default_A_view, no_properties, true, no_scalars_on_device), num_passed, num_skipped); - // Reset data + // Test alpha on the device EXPECT_TRUE_OR_FUTURE_SKIP( test_functor_i32(dev, format, m, density_A_matrix, index_zero, transpose_val, alpha, - default_alg, default_A_view, no_properties, true), + default_alg, default_A_view, no_properties, no_reset_data, true), num_passed, num_skipped); // Test index_base 1 EXPECT_TRUE_OR_FUTURE_SKIP( test_functor_i32(dev, format, m, density_A_matrix, oneapi::mkl::index_base::one, transpose_val, alpha, default_alg, default_A_view, no_properties, - no_reset_data), + no_reset_data, no_scalars_on_device), num_passed, num_skipped); // Test upper triangular matrix - EXPECT_TRUE_OR_FUTURE_SKIP( - test_functor_i32(dev, format, m, density_A_matrix, index_zero, transpose_val, alpha, - default_alg, upper_A_view, no_properties, no_reset_data), - num_passed, num_skipped); + EXPECT_TRUE_OR_FUTURE_SKIP(test_functor_i32(dev, format, m, density_A_matrix, index_zero, + transpose_val, alpha, default_alg, upper_A_view, + no_properties, no_reset_data, no_scalars_on_device), + num_passed, num_skipped); // Test lower triangular unit diagonal matrix oneapi::mkl::sparse::matrix_view triangular_unit_A_view( oneapi::mkl::sparse::matrix_descr::triangular); triangular_unit_A_view.diag_view = oneapi::mkl::diag::unit; EXPECT_TRUE_OR_FUTURE_SKIP( test_functor_i32(dev, format, m, density_A_matrix, index_zero, transpose_val, alpha, - default_alg, triangular_unit_A_view, no_properties, no_reset_data), + default_alg, triangular_unit_A_view, no_properties, no_reset_data, + no_scalars_on_device), num_passed, num_skipped); // Test upper triangular unit diagonal matrix triangular_unit_A_view.uplo_view = oneapi::mkl::uplo::upper; EXPECT_TRUE_OR_FUTURE_SKIP( test_functor_i32(dev, format, m, density_A_matrix, index_zero, transpose_val, alpha, - default_alg, triangular_unit_A_view, no_properties, no_reset_data), + default_alg, triangular_unit_A_view, no_properties, no_reset_data, + no_scalars_on_device), num_passed, num_skipped); // Test non-default alpha EXPECT_TRUE_OR_FUTURE_SKIP( test_functor_i32(dev, format, m, density_A_matrix, index_zero, transpose_val, set_fp_value()(2.f, 1.5f), default_alg, default_A_view, - no_properties, no_reset_data), + no_properties, no_reset_data, no_scalars_on_device), num_passed, num_skipped); // Test int64 indices - EXPECT_TRUE_OR_FUTURE_SKIP( - test_functor_i64(dev, format, 15L, density_A_matrix, index_zero, transpose_val, alpha, - default_alg, default_A_view, no_properties, no_reset_data), - num_passed, num_skipped); + EXPECT_TRUE_OR_FUTURE_SKIP(test_functor_i64(dev, format, 15L, density_A_matrix, index_zero, + transpose_val, alpha, default_alg, default_A_view, + no_properties, no_reset_data, no_scalars_on_device), + num_passed, num_skipped); // Test lower no_optimize_alg EXPECT_TRUE_OR_FUTURE_SKIP( test_functor_i32(dev, format, m, density_A_matrix, index_zero, transpose_val, alpha, - no_optimize_alg, default_A_view, no_properties, no_reset_data), + no_optimize_alg, default_A_view, no_properties, no_reset_data, + no_scalars_on_device), num_passed, num_skipped); // Test upper no_optimize_alg - EXPECT_TRUE_OR_FUTURE_SKIP( - test_functor_i32(dev, format, m, density_A_matrix, index_zero, transpose_val, alpha, - no_optimize_alg, upper_A_view, no_properties, no_reset_data), - num_passed, num_skipped); + EXPECT_TRUE_OR_FUTURE_SKIP(test_functor_i32(dev, format, m, density_A_matrix, index_zero, + transpose_val, alpha, no_optimize_alg, upper_A_view, + no_properties, no_reset_data, no_scalars_on_device), + num_passed, num_skipped); // Test matrix properties for (auto properties : test_matrix_properties) { // Basic test with matrix properties EXPECT_TRUE_OR_FUTURE_SKIP( test_functor_i32(dev, format, m, density_A_matrix, index_zero, transpose_val, alpha, - default_alg, default_A_view, properties, no_reset_data), + default_alg, default_A_view, properties, no_reset_data, + no_scalars_on_device), num_passed, num_skipped); // Test lower no_optimize_alg with matrix properties EXPECT_TRUE_OR_FUTURE_SKIP( test_functor_i32(dev, format, m, density_A_matrix, index_zero, transpose_val, alpha, - no_optimize_alg, default_A_view, properties, no_reset_data), + no_optimize_alg, default_A_view, properties, no_reset_data, + no_scalars_on_device), num_passed, num_skipped); } } diff --git a/tests/unit_tests/sparse_blas/source/sparse_spmm_buffer.cpp b/tests/unit_tests/sparse_blas/source/sparse_spmm_buffer.cpp index a28992391..815c2ecbd 100644 --- a/tests/unit_tests/sparse_blas/source/sparse_spmm_buffer.cpp +++ b/tests/unit_tests/sparse_blas/source/sparse_spmm_buffer.cpp @@ -35,7 +35,11 @@ int test_spmm(sycl::device *dev, sparse_matrix_format_t format, intType nrows_A, intType ldc, oneapi::mkl::sparse::spmm_alg alg, oneapi::mkl::sparse::matrix_view A_view, const std::set &matrix_properties, - bool reset_data) { + bool reset_data, bool test_scalar_on_device) { + if (test_scalar_on_device) { + // Scalars on the device is not planned to be supported with the buffer API + return 1; + } sycl::queue main_queue(*dev, exception_handler_t()); if (require_square_matrix(A_view, matrix_properties)) { diff --git a/tests/unit_tests/sparse_blas/source/sparse_spmm_usm.cpp b/tests/unit_tests/sparse_blas/source/sparse_spmm_usm.cpp index 5c74a319b..766915537 100644 --- a/tests/unit_tests/sparse_blas/source/sparse_spmm_usm.cpp +++ b/tests/unit_tests/sparse_blas/source/sparse_spmm_usm.cpp @@ -35,7 +35,7 @@ int test_spmm(sycl::device *dev, sparse_matrix_format_t format, intType nrows_A, intType ldc, oneapi::mkl::sparse::spmm_alg alg, oneapi::mkl::sparse::matrix_view A_view, const std::set &matrix_properties, - bool reset_data) { + bool reset_data, bool test_scalar_on_device) { sycl::queue main_queue(*dev, exception_handler_t()); if (require_square_matrix(A_view, matrix_properties)) { @@ -79,6 +79,8 @@ int test_spmm(sycl::device *dev, sparse_matrix_format_t format, intType nrows_A, auto a_usm_uptr = malloc_device_uptr(main_queue, a_host.size()); auto b_usm_uptr = malloc_device_uptr(main_queue, b_host.size()); auto c_usm_uptr = malloc_device_uptr(main_queue, c_host.size()); + auto alpha_usm_uptr = malloc_device_uptr(main_queue, 1); + auto beta_usm_uptr = malloc_device_uptr(main_queue, 1); intType *ia_usm = ia_usm_uptr.get(); intType *ja_usm = ja_usm_uptr.get(); @@ -100,6 +102,16 @@ int test_spmm(sycl::device *dev, sparse_matrix_format_t format, intType nrows_A, spmm_dependencies.push_back( main_queue.memcpy(c_usm, c_host.data(), c_host.size() * sizeof(fpType))); + fpType *alpha_host_or_usm_ptr = α + fpType *beta_host_or_usm_ptr = β + if (test_scalar_on_device) { + spmm_dependencies.push_back( + main_queue.memcpy(alpha_usm_uptr.get(), &alpha, sizeof(fpType))); + spmm_dependencies.push_back(main_queue.memcpy(beta_usm_uptr.get(), &beta, sizeof(fpType))); + alpha_host_or_usm_ptr = alpha_usm_uptr.get(); + beta_host_or_usm_ptr = beta_usm_uptr.get(); + } + sycl::event ev_copy, ev_spmm; oneapi::mkl::sparse::matrix_handle_t A_handle = nullptr; oneapi::mkl::sparse::dense_matrix_handle_t B_handle = nullptr; diff --git a/tests/unit_tests/sparse_blas/source/sparse_spmv_buffer.cpp b/tests/unit_tests/sparse_blas/source/sparse_spmv_buffer.cpp index 5b23ae3fd..12b449e61 100644 --- a/tests/unit_tests/sparse_blas/source/sparse_spmv_buffer.cpp +++ b/tests/unit_tests/sparse_blas/source/sparse_spmv_buffer.cpp @@ -33,7 +33,11 @@ int test_spmv(sycl::device *dev, sparse_matrix_format_t format, intType nrows_A, oneapi::mkl::transpose transpose_val, fpType alpha, fpType beta, oneapi::mkl::sparse::spmv_alg alg, oneapi::mkl::sparse::matrix_view A_view, const std::set &matrix_properties, - bool reset_data) { + bool reset_data, bool test_scalar_on_device) { + if (test_scalar_on_device) { + // Scalars on the device is not planned to be supported with the buffer API + return 1; + } sycl::queue main_queue(*dev, exception_handler_t()); if (require_square_matrix(A_view, matrix_properties)) { diff --git a/tests/unit_tests/sparse_blas/source/sparse_spmv_usm.cpp b/tests/unit_tests/sparse_blas/source/sparse_spmv_usm.cpp index 28e291bbe..85feacbda 100644 --- a/tests/unit_tests/sparse_blas/source/sparse_spmv_usm.cpp +++ b/tests/unit_tests/sparse_blas/source/sparse_spmv_usm.cpp @@ -33,7 +33,7 @@ int test_spmv(sycl::device *dev, sparse_matrix_format_t format, intType nrows_A, oneapi::mkl::transpose transpose_val, fpType alpha, fpType beta, oneapi::mkl::sparse::spmv_alg alg, oneapi::mkl::sparse::matrix_view A_view, const std::set &matrix_properties, - bool reset_data) { + bool reset_data, bool test_scalar_on_device) { sycl::queue main_queue(*dev, exception_handler_t()); if (require_square_matrix(A_view, matrix_properties)) { @@ -72,6 +72,8 @@ int test_spmv(sycl::device *dev, sparse_matrix_format_t format, intType nrows_A, auto a_usm_uptr = malloc_device_uptr(main_queue, a_host.size()); auto x_usm_uptr = malloc_device_uptr(main_queue, x_host.size()); auto y_usm_uptr = malloc_device_uptr(main_queue, y_host.size()); + auto alpha_usm_uptr = malloc_device_uptr(main_queue, 1); + auto beta_usm_uptr = malloc_device_uptr(main_queue, 1); intType *ia_usm = ia_usm_uptr.get(); intType *ja_usm = ja_usm_uptr.get(); @@ -93,6 +95,16 @@ int test_spmv(sycl::device *dev, sparse_matrix_format_t format, intType nrows_A, spmv_dependencies.push_back( main_queue.memcpy(y_usm, y_host.data(), y_host.size() * sizeof(fpType))); + fpType *alpha_host_or_usm_ptr = α + fpType *beta_host_or_usm_ptr = β + if (test_scalar_on_device) { + spmv_dependencies.push_back( + main_queue.memcpy(alpha_usm_uptr.get(), &alpha, sizeof(fpType))); + spmv_dependencies.push_back(main_queue.memcpy(beta_usm_uptr.get(), &beta, sizeof(fpType))); + alpha_host_or_usm_ptr = alpha_usm_uptr.get(); + beta_host_or_usm_ptr = beta_usm_uptr.get(); + } + sycl::event ev_copy, ev_spmv; oneapi::mkl::sparse::matrix_handle_t A_handle = nullptr; oneapi::mkl::sparse::dense_vector_handle_t x_handle = nullptr; @@ -113,18 +125,20 @@ int test_spmv(sycl::device *dev, sparse_matrix_format_t format, intType nrows_A, CALL_RT_OR_CT(oneapi::mkl::sparse::init_spmv_descr, main_queue, &descr); std::size_t workspace_size = 0; - CALL_RT_OR_CT(oneapi::mkl::sparse::spmv_buffer_size, main_queue, transpose_val, &alpha, - A_view, A_handle, x_handle, &beta, y_handle, alg, descr, workspace_size); + CALL_RT_OR_CT(oneapi::mkl::sparse::spmv_buffer_size, main_queue, transpose_val, + alpha_host_or_usm_ptr, A_view, A_handle, x_handle, beta_host_or_usm_ptr, + y_handle, alg, descr, workspace_size); workspace_usm = malloc_device_uptr(main_queue, workspace_size); sycl::event ev_opt; CALL_RT_OR_CT(ev_opt = oneapi::mkl::sparse::spmv_optimize, main_queue, transpose_val, - &alpha, A_view, A_handle, x_handle, &beta, y_handle, alg, descr, - workspace_usm.get(), mat_dependencies); + alpha_host_or_usm_ptr, A_view, A_handle, x_handle, beta_host_or_usm_ptr, + y_handle, alg, descr, workspace_usm.get(), mat_dependencies); spmv_dependencies.push_back(ev_opt); - CALL_RT_OR_CT(ev_spmv = oneapi::mkl::sparse::spmv, main_queue, transpose_val, &alpha, - A_view, A_handle, x_handle, &beta, y_handle, alg, descr, spmv_dependencies); + CALL_RT_OR_CT(ev_spmv = oneapi::mkl::sparse::spmv, main_queue, transpose_val, + alpha_host_or_usm_ptr, A_view, A_handle, x_handle, beta_host_or_usm_ptr, + y_handle, alg, descr, spmv_dependencies); if (reset_data) { intType reset_nnz = generate_random_matrix( @@ -157,19 +171,20 @@ int test_spmv(sycl::device *dev, sparse_matrix_format_t format, intType nrows_A, ja_usm, a_usm); std::size_t workspace_size_2 = 0; - CALL_RT_OR_CT(oneapi::mkl::sparse::spmv_buffer_size, main_queue, transpose_val, &alpha, - A_view, A_handle, x_handle, &beta, y_handle, alg, descr, - workspace_size_2); + CALL_RT_OR_CT(oneapi::mkl::sparse::spmv_buffer_size, main_queue, transpose_val, + alpha_host_or_usm_ptr, A_view, A_handle, x_handle, beta_host_or_usm_ptr, + y_handle, alg, descr, workspace_size_2); if (workspace_size_2 > workspace_size) { workspace_usm = malloc_device_uptr(main_queue, workspace_size_2); } CALL_RT_OR_CT(ev_opt = oneapi::mkl::sparse::spmv_optimize, main_queue, transpose_val, - &alpha, A_view, A_handle, x_handle, &beta, y_handle, alg, descr, - workspace_usm.get(), mat_dependencies); + alpha_host_or_usm_ptr, A_view, A_handle, x_handle, beta_host_or_usm_ptr, + y_handle, alg, descr, workspace_usm.get(), mat_dependencies); - CALL_RT_OR_CT(ev_spmv = oneapi::mkl::sparse::spmv, main_queue, transpose_val, &alpha, - A_view, A_handle, x_handle, &beta, y_handle, alg, descr, { ev_opt }); + CALL_RT_OR_CT(ev_spmv = oneapi::mkl::sparse::spmv, main_queue, transpose_val, + alpha_host_or_usm_ptr, A_view, A_handle, x_handle, beta_host_or_usm_ptr, + y_handle, alg, descr, { ev_opt }); } ev_copy = main_queue.memcpy(y_host.data(), y_usm, y_host.size() * sizeof(fpType), ev_spmv); diff --git a/tests/unit_tests/sparse_blas/source/sparse_spsv_buffer.cpp b/tests/unit_tests/sparse_blas/source/sparse_spsv_buffer.cpp index 38f8fd427..c94c75ecf 100644 --- a/tests/unit_tests/sparse_blas/source/sparse_spsv_buffer.cpp +++ b/tests/unit_tests/sparse_blas/source/sparse_spsv_buffer.cpp @@ -32,7 +32,11 @@ int test_spsv(sycl::device *dev, sparse_matrix_format_t format, intType m, doubl oneapi::mkl::index_base index, oneapi::mkl::transpose transpose_val, fpType alpha, oneapi::mkl::sparse::spsv_alg alg, oneapi::mkl::sparse::matrix_view A_view, const std::set &matrix_properties, - bool reset_data) { + bool reset_data, bool test_scalar_on_device) { + if (test_scalar_on_device) { + // Scalars on the device is not planned to be supported with the buffer API + return 1; + } sycl::queue main_queue(*dev, exception_handler_t()); intType indexing = (index == oneapi::mkl::index_base::zero) ? 0 : 1; diff --git a/tests/unit_tests/sparse_blas/source/sparse_spsv_usm.cpp b/tests/unit_tests/sparse_blas/source/sparse_spsv_usm.cpp index 0832ed5ca..1bc025851 100644 --- a/tests/unit_tests/sparse_blas/source/sparse_spsv_usm.cpp +++ b/tests/unit_tests/sparse_blas/source/sparse_spsv_usm.cpp @@ -32,7 +32,7 @@ int test_spsv(sycl::device *dev, sparse_matrix_format_t format, intType m, doubl oneapi::mkl::index_base index, oneapi::mkl::transpose transpose_val, fpType alpha, oneapi::mkl::sparse::spsv_alg alg, oneapi::mkl::sparse::matrix_view A_view, const std::set &matrix_properties, - bool reset_data) { + bool reset_data, bool test_scalar_on_device) { sycl::queue main_queue(*dev, exception_handler_t()); intType indexing = (index == oneapi::mkl::index_base::zero) ? 0 : 1; @@ -75,6 +75,7 @@ int test_spsv(sycl::device *dev, sparse_matrix_format_t format, intType m, doubl auto a_usm_uptr = malloc_device_uptr(main_queue, a_host.size()); auto x_usm_uptr = malloc_device_uptr(main_queue, x_host.size()); auto y_usm_uptr = malloc_device_uptr(main_queue, y_host.size()); + auto alpha_usm_uptr = malloc_device_uptr(main_queue, 1); intType *ia_usm = ia_usm_uptr.get(); intType *ja_usm = ja_usm_uptr.get(); @@ -96,6 +97,13 @@ int test_spsv(sycl::device *dev, sparse_matrix_format_t format, intType m, doubl spsv_dependencies.push_back( main_queue.memcpy(y_usm, y_host.data(), y_host.size() * sizeof(fpType))); + fpType *alpha_host_or_usm_ptr = α + if (test_scalar_on_device) { + spsv_dependencies.push_back( + main_queue.memcpy(alpha_usm_uptr.get(), &alpha, sizeof(fpType))); + alpha_host_or_usm_ptr = alpha_usm_uptr.get(); + } + sycl::event ev_copy, ev_spsv; oneapi::mkl::sparse::matrix_handle_t A_handle = nullptr; oneapi::mkl::sparse::dense_vector_handle_t x_handle = nullptr; @@ -113,18 +121,20 @@ int test_spsv(sycl::device *dev, sparse_matrix_format_t format, intType m, doubl CALL_RT_OR_CT(oneapi::mkl::sparse::init_spsv_descr, main_queue, &descr); std::size_t workspace_size = 0; - CALL_RT_OR_CT(oneapi::mkl::sparse::spsv_buffer_size, main_queue, transpose_val, &alpha, - A_view, A_handle, x_handle, y_handle, alg, descr, workspace_size); + CALL_RT_OR_CT(oneapi::mkl::sparse::spsv_buffer_size, main_queue, transpose_val, + alpha_host_or_usm_ptr, A_view, A_handle, x_handle, y_handle, alg, descr, + workspace_size); workspace_usm = malloc_device_uptr(main_queue, workspace_size); sycl::event ev_opt; CALL_RT_OR_CT(ev_opt = oneapi::mkl::sparse::spsv_optimize, main_queue, transpose_val, - &alpha, A_view, A_handle, x_handle, y_handle, alg, descr, workspace_usm.get(), - mat_dependencies); + alpha_host_or_usm_ptr, A_view, A_handle, x_handle, y_handle, alg, descr, + workspace_usm.get(), mat_dependencies); spsv_dependencies.push_back(ev_opt); - CALL_RT_OR_CT(ev_spsv = oneapi::mkl::sparse::spsv, main_queue, transpose_val, &alpha, - A_view, A_handle, x_handle, y_handle, alg, descr, spsv_dependencies); + CALL_RT_OR_CT(ev_spsv = oneapi::mkl::sparse::spsv, main_queue, transpose_val, + alpha_host_or_usm_ptr, A_view, A_handle, x_handle, y_handle, alg, descr, + spsv_dependencies); if (reset_data) { intType reset_nnz = generate_random_matrix( @@ -156,18 +166,20 @@ int test_spsv(sycl::device *dev, sparse_matrix_format_t format, intType m, doubl set_matrix_data(main_queue, format, A_handle, m, m, nnz, index, ia_usm, ja_usm, a_usm); std::size_t workspace_size_2 = 0; - CALL_RT_OR_CT(oneapi::mkl::sparse::spsv_buffer_size, main_queue, transpose_val, &alpha, - A_view, A_handle, x_handle, y_handle, alg, descr, workspace_size_2); + CALL_RT_OR_CT(oneapi::mkl::sparse::spsv_buffer_size, main_queue, transpose_val, + alpha_host_or_usm_ptr, A_view, A_handle, x_handle, y_handle, alg, descr, + workspace_size_2); if (workspace_size_2 > workspace_size) { workspace_usm = malloc_device_uptr(main_queue, workspace_size_2); } CALL_RT_OR_CT(ev_opt = oneapi::mkl::sparse::spsv_optimize, main_queue, transpose_val, - &alpha, A_view, A_handle, x_handle, y_handle, alg, descr, + alpha_host_or_usm_ptr, A_view, A_handle, x_handle, y_handle, alg, descr, workspace_usm.get(), mat_dependencies); - CALL_RT_OR_CT(ev_spsv = oneapi::mkl::sparse::spsv, main_queue, transpose_val, &alpha, - A_view, A_handle, x_handle, y_handle, alg, descr, { ev_opt }); + CALL_RT_OR_CT(ev_spsv = oneapi::mkl::sparse::spsv, main_queue, transpose_val, + alpha_host_or_usm_ptr, A_view, A_handle, x_handle, y_handle, alg, descr, + { ev_opt }); } ev_copy = main_queue.memcpy(y_host.data(), y_usm, y_host.size() * sizeof(fpType), ev_spsv); From 002d78867f238d0e357b1cd8bdd63bc6554a6463 Mon Sep 17 00:00:00 2001 From: "romain.biessy" Date: Tue, 2 Jul 2024 11:06:20 +0200 Subject: [PATCH 16/39] Add documentation for alpha and beta limitations --- docs/domains/sparse_linear_algebra.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/domains/sparse_linear_algebra.rst b/docs/domains/sparse_linear_algebra.rst index eb39bd3f1..f1328c652 100644 --- a/docs/domains/sparse_linear_algebra.rst +++ b/docs/domains/sparse_linear_algebra.rst @@ -29,3 +29,5 @@ Known limitations as of Intel oneMKL product release 2024.1: ``oneapi::mkl::transpose::conjtrans`` and has the ``oneapi::mkl::sparse::matrix_property::symmetric`` property will throw an ``oneapi::mkl::unimplemented`` exception. +- Scalar parameters ``alpha`` and ``beta`` should be host pointers to prevent + synchronizations and copies to the host. From 5c37ee30a4975789bbe8ab7f35632d7e2b205292 Mon Sep 17 00:00:00 2001 From: "romain.biessy" Date: Tue, 2 Jul 2024 12:24:01 +0200 Subject: [PATCH 17/39] Reword and format mkl_handles comments --- src/sparse_blas/backends/mkl_common/mkl_handles.hpp | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/sparse_blas/backends/mkl_common/mkl_handles.hpp b/src/sparse_blas/backends/mkl_common/mkl_handles.hpp index 9a46b6533..efadd72e7 100644 --- a/src/sparse_blas/backends/mkl_common/mkl_handles.hpp +++ b/src/sparse_blas/backends/mkl_common/mkl_handles.hpp @@ -21,14 +21,16 @@ #define _ONEMKL_SRC_SPARSE_BLAS_BACKENDS_MKL_COMMON_MKL_HANDLES_HPP_ // MKLCPU and MKLGPU backends include -// This include defines its own oneapi::mkl::sparse namespace with some of the types that are used here: matrix_handle_t, index_base, transpose, uplo, diag. +// This include defines its own oneapi::mkl::sparse namespace with some of the +// types that are used here: matrix_handle_t, index_base, transpose, uplo, diag. #include #include "sparse_blas/generic_container.hpp" namespace oneapi::mkl::sparse { -// Complete the definition of incomplete types dense_vector_handle and dense_matrix_handle as there is no conflict with the oneMKL backends. +// Complete the definition of incomplete types dense_vector_handle and +// dense_matrix_handle as they don't exist in oneMKL backends yet. struct dense_vector_handle : public detail::generic_dense_vector_handle { template @@ -62,7 +64,9 @@ namespace oneapi::mkl::sparse::detail { * Internal sparse_matrix_handle type for MKL backends. * Here \p matrix_handle_t is the type of the backend's handle. * The user-facing incomplete type matrix_handle_t must be kept incomplete. - * Internally matrix_handle_t is reinterpret_cast as oneapi::mkl::sparse::detail::sparse_matrix_handle which holds another matrix_handle_t for the backend handle. + * Internally matrix_handle_t is reinterpret_cast as + * oneapi::mkl::sparse::detail::sparse_matrix_handle which holds another + * matrix_handle_t for the backend handle. */ using sparse_matrix_handle = detail::generic_sparse_handle; From 9dbd67bde71a9a2b19905509cf2d63117d0e87b7 Mon Sep 17 00:00:00 2001 From: "romain.biessy" Date: Tue, 2 Jul 2024 12:27:47 +0200 Subject: [PATCH 18/39] Replace __FUNCTION__ with __func__ --- .../backends/mkl_common/mkl_handles.cxx | 16 ++++++++-------- src/sparse_blas/backends/mkl_common/mkl_spmm.cxx | 12 ++++++------ src/sparse_blas/backends/mkl_common/mkl_spmv.cxx | 12 ++++++------ src/sparse_blas/backends/mkl_common/mkl_spsv.cxx | 12 ++++++------ 4 files changed, 26 insertions(+), 26 deletions(-) diff --git a/src/sparse_blas/backends/mkl_common/mkl_handles.cxx b/src/sparse_blas/backends/mkl_common/mkl_handles.cxx index c6f501027..49b2591fd 100644 --- a/src/sparse_blas/backends/mkl_common/mkl_handles.cxx +++ b/src/sparse_blas/backends/mkl_common/mkl_handles.cxx @@ -52,7 +52,7 @@ template void set_dense_vector_data(sycl::queue & /*queue*/, oneapi::mkl::sparse::dense_vector_handle_t dvhandle, std::int64_t size, sycl::buffer val) { - check_can_reset_value_handle(__FUNCTION__, dvhandle, true); + check_can_reset_value_handle(__func__, dvhandle, true); dvhandle->size = size; dvhandle->set_buffer(val); } @@ -61,7 +61,7 @@ template void set_dense_vector_data(sycl::queue & /*queue*/, oneapi::mkl::sparse::dense_vector_handle_t dvhandle, std::int64_t size, fpType *val) { - check_can_reset_value_handle(__FUNCTION__, dvhandle, false); + check_can_reset_value_handle(__func__, dvhandle, false); dvhandle->size = size; dvhandle->set_usm_ptr(val); } @@ -112,7 +112,7 @@ void set_dense_matrix_data(sycl::queue & /*queue*/, oneapi::mkl::sparse::dense_matrix_handle_t dmhandle, std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, oneapi::mkl::layout dense_layout, sycl::buffer val) { - check_can_reset_value_handle(__FUNCTION__, dmhandle, true); + check_can_reset_value_handle(__func__, dmhandle, true); dmhandle->num_rows = num_rows; dmhandle->num_cols = num_cols; dmhandle->ld = ld; @@ -125,7 +125,7 @@ void set_dense_matrix_data(sycl::queue & /*queue*/, oneapi::mkl::sparse::dense_matrix_handle_t dmhandle, std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, oneapi::mkl::layout dense_layout, fpType *val) { - check_can_reset_value_handle(__FUNCTION__, dmhandle, false); + check_can_reset_value_handle(__func__, dmhandle, false); dmhandle->num_rows = num_rows; dmhandle->num_cols = num_cols; dmhandle->ld = ld; @@ -217,7 +217,7 @@ void set_coo_matrix_data(sycl::queue &queue, oneapi::mkl::sparse::matrix_handle_ oneapi::mkl::index_base index, sycl::buffer row_ind, sycl::buffer col_ind, sycl::buffer val) { auto internal_smhandle = detail::get_internal_handle(smhandle); - check_can_reset_sparse_handle(__FUNCTION__, internal_smhandle, true); + check_can_reset_sparse_handle(__func__, internal_smhandle, true); internal_smhandle->row_container.set_buffer(row_ind); internal_smhandle->col_container.set_buffer(col_ind); internal_smhandle->value_container.set_buffer(val); @@ -236,7 +236,7 @@ void set_coo_matrix_data(sycl::queue &queue, oneapi::mkl::sparse::matrix_handle_ oneapi::mkl::index_base index, intType *row_ind, intType *col_ind, fpType *val) { auto internal_smhandle = detail::get_internal_handle(smhandle); - check_can_reset_sparse_handle(__FUNCTION__, internal_smhandle, false); + check_can_reset_sparse_handle(__func__, internal_smhandle, false); internal_smhandle->row_container.set_usm_ptr(row_ind); internal_smhandle->col_container.set_usm_ptr(col_ind); internal_smhandle->value_container.set_usm_ptr(val); @@ -309,7 +309,7 @@ void set_csr_matrix_data(sycl::queue &queue, oneapi::mkl::sparse::matrix_handle_ oneapi::mkl::index_base index, sycl::buffer row_ptr, sycl::buffer col_ind, sycl::buffer val) { auto internal_smhandle = detail::get_internal_handle(smhandle); - check_can_reset_sparse_handle(__FUNCTION__, internal_smhandle, true); + check_can_reset_sparse_handle(__func__, internal_smhandle, true); internal_smhandle->row_container.set_buffer(row_ptr); internal_smhandle->col_container.set_buffer(col_ind); internal_smhandle->value_container.set_buffer(val); @@ -329,7 +329,7 @@ void set_csr_matrix_data(sycl::queue &queue, oneapi::mkl::sparse::matrix_handle_ oneapi::mkl::index_base index, intType *row_ptr, intType *col_ind, fpType *val) { auto internal_smhandle = detail::get_internal_handle(smhandle); - check_can_reset_sparse_handle(__FUNCTION__, internal_smhandle, false); + check_can_reset_sparse_handle(__func__, internal_smhandle, false); internal_smhandle->row_container.set_usm_ptr(row_ptr); internal_smhandle->col_container.set_usm_ptr(col_ind); internal_smhandle->value_container.set_usm_ptr(val); diff --git a/src/sparse_blas/backends/mkl_common/mkl_spmm.cxx b/src/sparse_blas/backends/mkl_common/mkl_spmm.cxx index 56a0cedf7..7e5ba03b2 100644 --- a/src/sparse_blas/backends/mkl_common/mkl_spmm.cxx +++ b/src/sparse_blas/backends/mkl_common/mkl_spmm.cxx @@ -88,7 +88,7 @@ void spmm_buffer_size(sycl::queue &queue, oneapi::mkl::transpose opA, oneapi::mkl::sparse::spmm_descr_t /*spmm_descr*/, std::size_t &temp_buffer_size) { // TODO: Add support for external workspace once the close-source oneMKL backend supports it. - check_valid_spmm(__FUNCTION__, queue, opA, A_view, A_handle, B_handle, C_handle, alpha, beta); + check_valid_spmm(__func__, queue, opA, A_view, A_handle, B_handle, C_handle, alpha, beta); temp_buffer_size = 0; } @@ -100,10 +100,10 @@ void spmm_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, oneapi::mkl:: oneapi::mkl::sparse::spmm_alg alg, oneapi::mkl::sparse::spmm_descr_t /*spmm_descr*/, sycl::buffer /*workspace*/) { - check_valid_spmm(__FUNCTION__, queue, opA, A_view, A_handle, B_handle, C_handle, alpha, beta); + check_valid_spmm(__func__, queue, opA, A_view, A_handle, B_handle, C_handle, alpha, beta); auto internal_A_handle = detail::get_internal_handle(A_handle); if (!internal_A_handle->all_use_buffer()) { - detail::throw_incompatible_container(__FUNCTION__); + detail::throw_incompatible_container(__func__); } if (alg == oneapi::mkl::sparse::spmm_alg::no_optimize_alg) { return; @@ -121,10 +121,10 @@ sycl::event spmm_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, oneapi::mkl::sparse::spmm_alg alg, oneapi::mkl::sparse::spmm_descr_t /*spmm_descr*/, void * /*workspace*/, const std::vector &dependencies) { - check_valid_spmm(__FUNCTION__, queue, opA, A_view, A_handle, B_handle, C_handle, alpha, beta); + check_valid_spmm(__func__, queue, opA, A_view, A_handle, B_handle, C_handle, alpha, beta); auto internal_A_handle = detail::get_internal_handle(A_handle); if (internal_A_handle->all_use_buffer()) { - detail::throw_incompatible_container(__FUNCTION__); + detail::throw_incompatible_container(__func__); } if (alg == oneapi::mkl::sparse::spmm_alg::no_optimize_alg) { return detail::collapse_dependencies(queue, dependencies); @@ -174,7 +174,7 @@ sycl::event spmm(sycl::queue &queue, oneapi::mkl::transpose opA, oneapi::mkl::tr oneapi::mkl::sparse::dense_matrix_handle_t C_handle, oneapi::mkl::sparse::spmm_alg alg, oneapi::mkl::sparse::spmm_descr_t spmm_descr, const std::vector &dependencies) { - check_valid_spmm(__FUNCTION__, queue, opA, A_view, A_handle, B_handle, C_handle, alpha, beta); + check_valid_spmm(__func__, queue, opA, A_view, A_handle, B_handle, C_handle, alpha, beta); auto value_type = detail::get_internal_handle(A_handle)->get_value_type(); DISPATCH_MKL_OPERATION("spmm", value_type, internal_spmm, queue, opA, opB, alpha, A_view, A_handle, B_handle, beta, C_handle, alg, spmm_descr, dependencies); diff --git a/src/sparse_blas/backends/mkl_common/mkl_spmv.cxx b/src/sparse_blas/backends/mkl_common/mkl_spmv.cxx index 7f0e21208..6a954feba 100644 --- a/src/sparse_blas/backends/mkl_common/mkl_spmv.cxx +++ b/src/sparse_blas/backends/mkl_common/mkl_spmv.cxx @@ -81,7 +81,7 @@ void spmv_buffer_size(sycl::queue &queue, oneapi::mkl::transpose opA, const void oneapi::mkl::sparse::spmv_descr_t /*spmv_descr*/, std::size_t &temp_buffer_size) { // TODO: Add support for external workspace once the close-source oneMKL backend supports it. - check_valid_spmv(__FUNCTION__, queue, opA, A_view, A_handle, x_handle, y_handle, alpha, beta); + check_valid_spmv(__func__, queue, opA, A_view, A_handle, x_handle, y_handle, alpha, beta); temp_buffer_size = 0; } @@ -93,10 +93,10 @@ void spmv_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, const void *a oneapi::mkl::sparse::spmv_alg alg, oneapi::mkl::sparse::spmv_descr_t /*spmv_descr*/, sycl::buffer /*workspace*/) { - check_valid_spmv(__FUNCTION__, queue, opA, A_view, A_handle, x_handle, y_handle, alpha, beta); + check_valid_spmv(__func__, queue, opA, A_view, A_handle, x_handle, y_handle, alpha, beta); auto internal_A_handle = detail::get_internal_handle(A_handle); if (!internal_A_handle->all_use_buffer()) { - detail::throw_incompatible_container(__FUNCTION__); + detail::throw_incompatible_container(__func__); } if (alg == oneapi::mkl::sparse::spmv_alg::no_optimize_alg) { return; @@ -127,10 +127,10 @@ sycl::event spmv_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, const oneapi::mkl::sparse::spmv_alg alg, oneapi::mkl::sparse::spmv_descr_t /*spmv_descr*/, void * /*workspace*/, const std::vector &dependencies) { - check_valid_spmv(__FUNCTION__, queue, opA, A_view, A_handle, x_handle, y_handle, alpha, beta); + check_valid_spmv(__func__, queue, opA, A_view, A_handle, x_handle, y_handle, alpha, beta); auto internal_A_handle = detail::get_internal_handle(A_handle); if (internal_A_handle->all_use_buffer()) { - detail::throw_incompatible_container(__FUNCTION__); + detail::throw_incompatible_container(__func__); } if (alg == oneapi::mkl::sparse::spmv_alg::no_optimize_alg) { return detail::collapse_dependencies(queue, dependencies); @@ -210,7 +210,7 @@ sycl::event spmv(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alp oneapi::mkl::sparse::dense_vector_handle_t y_handle, oneapi::mkl::sparse::spmv_alg alg, oneapi::mkl::sparse::spmv_descr_t spmv_descr, const std::vector &dependencies) { - check_valid_spmv(__FUNCTION__, queue, opA, A_view, A_handle, x_handle, y_handle, alpha, beta); + check_valid_spmv(__func__, queue, opA, A_view, A_handle, x_handle, y_handle, alpha, beta); auto value_type = detail::get_internal_handle(A_handle)->get_value_type(); DISPATCH_MKL_OPERATION("spmv", value_type, internal_spmv, queue, opA, alpha, A_view, A_handle, x_handle, beta, y_handle, alg, spmv_descr, dependencies); diff --git a/src/sparse_blas/backends/mkl_common/mkl_spsv.cxx b/src/sparse_blas/backends/mkl_common/mkl_spsv.cxx index f1e5ebc01..f69cca852 100644 --- a/src/sparse_blas/backends/mkl_common/mkl_spsv.cxx +++ b/src/sparse_blas/backends/mkl_common/mkl_spsv.cxx @@ -67,7 +67,7 @@ void spsv_buffer_size(sycl::queue &queue, oneapi::mkl::transpose /*opA*/, const oneapi::mkl::sparse::spsv_descr_t /*spsv_descr*/, std::size_t &temp_buffer_size) { // TODO: Add support for external workspace once the close-source oneMKL backend supports it. - check_valid_spsv(__FUNCTION__, queue, A_view, A_handle, x_handle, y_handle, alpha, alg); + check_valid_spsv(__func__, queue, A_view, A_handle, x_handle, y_handle, alpha, alg); temp_buffer_size = 0; } @@ -79,10 +79,10 @@ void spsv_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, const void *a oneapi::mkl::sparse::spsv_alg alg, oneapi::mkl::sparse::spsv_descr_t /*spsv_descr*/, sycl::buffer /*workspace*/) { - check_valid_spsv(__FUNCTION__, queue, A_view, A_handle, x_handle, y_handle, alpha, alg); + check_valid_spsv(__func__, queue, A_view, A_handle, x_handle, y_handle, alpha, alg); auto internal_A_handle = detail::get_internal_handle(A_handle); if (!internal_A_handle->all_use_buffer()) { - detail::throw_incompatible_container(__FUNCTION__); + detail::throw_incompatible_container(__func__); } if (alg == oneapi::mkl::sparse::spsv_alg::no_optimize_alg) { return; @@ -102,10 +102,10 @@ sycl::event spsv_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, const oneapi::mkl::sparse::spsv_alg alg, oneapi::mkl::sparse::spsv_descr_t /*spsv_descr*/, void * /*workspace*/, const std::vector &dependencies) { - check_valid_spsv(__FUNCTION__, queue, A_view, A_handle, x_handle, y_handle, alpha, alg); + check_valid_spsv(__func__, queue, A_view, A_handle, x_handle, y_handle, alpha, alg); auto internal_A_handle = detail::get_internal_handle(A_handle); if (internal_A_handle->all_use_buffer()) { - detail::throw_incompatible_container(__FUNCTION__); + detail::throw_incompatible_container(__func__); } if (alg == oneapi::mkl::sparse::spsv_alg::no_optimize_alg) { return detail::collapse_dependencies(queue, dependencies); @@ -149,7 +149,7 @@ sycl::event spsv(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alp oneapi::mkl::sparse::dense_vector_handle_t y_handle, oneapi::mkl::sparse::spsv_alg alg, oneapi::mkl::sparse::spsv_descr_t spsv_descr, const std::vector &dependencies) { - check_valid_spsv(__FUNCTION__, queue, A_view, A_handle, x_handle, y_handle, alpha, alg); + check_valid_spsv(__func__, queue, A_view, A_handle, x_handle, y_handle, alpha, alg); auto value_type = detail::get_internal_handle(A_handle)->get_value_type(); DISPATCH_MKL_OPERATION("spsv", value_type, internal_spsv, queue, opA, alpha, A_view, A_handle, x_handle, y_handle, alg, spsv_descr, dependencies); From c2b89f5580fb059a8070fb88cbe70574b59da799 Mon Sep 17 00:00:00 2001 From: "romain.biessy" Date: Wed, 3 Jul 2024 17:15:07 +0200 Subject: [PATCH 19/39] Allow to access host USM allocations on the host --- src/sparse_blas/backends/mkl_common/mkl_helper.hpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/sparse_blas/backends/mkl_common/mkl_helper.hpp b/src/sparse_blas/backends/mkl_common/mkl_helper.hpp index 021dabb51..d1303d949 100644 --- a/src/sparse_blas/backends/mkl_common/mkl_helper.hpp +++ b/src/sparse_blas/backends/mkl_common/mkl_helper.hpp @@ -38,9 +38,8 @@ namespace oneapi::mkl::sparse::detail { template inline bool is_ptr_accessible_on_host(sycl::queue &queue, const T *host_or_device_ptr) { auto alloc_type = sycl::get_pointer_type(host_or_device_ptr, queue.get_context()); - // Note sycl::usm::alloc::host may not be accessible on the host according to SYCL specification. - // sycl::usm::alloc::unknown is returned if the pointer is not a USM allocation which is assumed to be a normal host pointer. - return alloc_type == sycl::usm::alloc::shared || alloc_type == sycl::usm::alloc::unknown; + return alloc_type == sycl::usm::alloc::host || alloc_type == sycl::usm::alloc::shared || + alloc_type == sycl::usm::alloc::unknown; } /// Throw an exception if the scalar is not accessible in the host From d109332606bb7caed8737c73f26ab848708733a8 Mon Sep 17 00:00:00 2001 From: "romain.biessy" Date: Thu, 4 Jul 2024 10:53:18 +0200 Subject: [PATCH 20/39] Remove version from known limitations --- docs/domains/sparse_linear_algebra.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/domains/sparse_linear_algebra.rst b/docs/domains/sparse_linear_algebra.rst index f1328c652..8798303c9 100644 --- a/docs/domains/sparse_linear_algebra.rst +++ b/docs/domains/sparse_linear_algebra.rst @@ -12,7 +12,7 @@ sparse domain. OneMKL Intel CPU and GPU backends --------------------------------- -Known limitations as of Intel oneMKL product release 2024.1: +Currently known limitations: - All operations' algorithms except ``no_optimize_alg`` map to the default algorithm. From 9b9548aa98cb23b510a4907786350836430b25a7 Mon Sep 17 00:00:00 2001 From: "romain.biessy" Date: Thu, 4 Jul 2024 15:59:35 +0200 Subject: [PATCH 21/39] Disable spsv symmetric conjtrans --- docs/domains/sparse_linear_algebra.rst | 3 +++ .../backends/mkl_common/mkl_spmm.cxx | 5 +++- .../backends/mkl_common/mkl_spsv.cxx | 25 ++++++++++++++----- 3 files changed, 26 insertions(+), 7 deletions(-) diff --git a/docs/domains/sparse_linear_algebra.rst b/docs/domains/sparse_linear_algebra.rst index 8798303c9..41b7c9d9e 100644 --- a/docs/domains/sparse_linear_algebra.rst +++ b/docs/domains/sparse_linear_algebra.rst @@ -29,5 +29,8 @@ Currently known limitations: ``oneapi::mkl::transpose::conjtrans`` and has the ``oneapi::mkl::sparse::matrix_property::symmetric`` property will throw an ``oneapi::mkl::unimplemented`` exception. +- Using ``spsv`` on Intel GPU with a sparse matrix that is + ``oneapi::mkl::transpose::conjtrans`` and will throw an + ``oneapi::mkl::unimplemented`` exception. - Scalar parameters ``alpha`` and ``beta`` should be host pointers to prevent synchronizations and copies to the host. diff --git a/src/sparse_blas/backends/mkl_common/mkl_spmm.cxx b/src/sparse_blas/backends/mkl_common/mkl_spmm.cxx index 7e5ba03b2..aa292921e 100644 --- a/src/sparse_blas/backends/mkl_common/mkl_spmm.cxx +++ b/src/sparse_blas/backends/mkl_common/mkl_spmm.cxx @@ -67,7 +67,10 @@ void check_valid_spmm(const std::string function_name, sycl::queue &queue, } #if BACKEND == gpu - if (opA == oneapi::mkl::transpose::conjtrans && + detail::data_type data_type = internal_A_handle->get_value_type(); + if ((data_type == detail::data_type::complex_fp32 || + data_type == detail::data_type::complex_fp64) && + opA == oneapi::mkl::transpose::conjtrans && internal_A_handle->has_matrix_property(oneapi::mkl::sparse::matrix_property::symmetric)) { throw mkl::unimplemented( "sparse_blas", function_name, diff --git a/src/sparse_blas/backends/mkl_common/mkl_spsv.cxx b/src/sparse_blas/backends/mkl_common/mkl_spsv.cxx index f69cca852..ea8410a88 100644 --- a/src/sparse_blas/backends/mkl_common/mkl_spsv.cxx +++ b/src/sparse_blas/backends/mkl_common/mkl_spsv.cxx @@ -30,7 +30,7 @@ sycl::event release_spsv_descr(sycl::queue &queue, oneapi::mkl::sparse::spsv_des } void check_valid_spsv(const std::string function_name, sycl::queue &queue, - oneapi::mkl::sparse::matrix_view A_view, + oneapi::mkl::transpose opA, oneapi::mkl::sparse::matrix_view A_view, oneapi::mkl::sparse::matrix_handle_t A_handle, oneapi::mkl::sparse::dense_vector_handle_t x_handle, oneapi::mkl::sparse::dense_vector_handle_t y_handle, const void *alpha, @@ -47,6 +47,19 @@ void check_valid_spsv(const std::string function_name, sycl::queue &queue, "The backend does not support `no_optimize_alg` unless A_handle has the property `matrix_property::sorted`."); } +#if BACKEND == gpu + detail::data_type data_type = internal_A_handle->get_value_type(); + if ((data_type == detail::data_type::complex_fp32 || + data_type == detail::data_type::complex_fp64) && + opA == oneapi::mkl::transpose::conjtrans) { + throw mkl::unimplemented( + "sparse_blas", function_name, + "The backend does not support spsv using conjtrans."); + } +#else + (void)opA; +#endif // BACKEND + detail::check_all_containers_compatible(function_name, internal_A_handle, x_handle, y_handle); if (A_view.type_view != matrix_descr::triangular) { throw mkl::invalid_argument("sparse_blas", function_name, @@ -58,7 +71,7 @@ void check_valid_spsv(const std::string function_name, sycl::queue &queue, } } -void spsv_buffer_size(sycl::queue &queue, oneapi::mkl::transpose /*opA*/, const void *alpha, +void spsv_buffer_size(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, oneapi::mkl::sparse::matrix_view A_view, oneapi::mkl::sparse::matrix_handle_t A_handle, oneapi::mkl::sparse::dense_vector_handle_t x_handle, @@ -67,7 +80,7 @@ void spsv_buffer_size(sycl::queue &queue, oneapi::mkl::transpose /*opA*/, const oneapi::mkl::sparse::spsv_descr_t /*spsv_descr*/, std::size_t &temp_buffer_size) { // TODO: Add support for external workspace once the close-source oneMKL backend supports it. - check_valid_spsv(__func__, queue, A_view, A_handle, x_handle, y_handle, alpha, alg); + check_valid_spsv(__func__, queue, opA, A_view, A_handle, x_handle, y_handle, alpha, alg); temp_buffer_size = 0; } @@ -79,7 +92,7 @@ void spsv_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, const void *a oneapi::mkl::sparse::spsv_alg alg, oneapi::mkl::sparse::spsv_descr_t /*spsv_descr*/, sycl::buffer /*workspace*/) { - check_valid_spsv(__func__, queue, A_view, A_handle, x_handle, y_handle, alpha, alg); + check_valid_spsv(__func__, queue, opA, A_view, A_handle, x_handle, y_handle, alpha, alg); auto internal_A_handle = detail::get_internal_handle(A_handle); if (!internal_A_handle->all_use_buffer()) { detail::throw_incompatible_container(__func__); @@ -102,7 +115,7 @@ sycl::event spsv_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, const oneapi::mkl::sparse::spsv_alg alg, oneapi::mkl::sparse::spsv_descr_t /*spsv_descr*/, void * /*workspace*/, const std::vector &dependencies) { - check_valid_spsv(__func__, queue, A_view, A_handle, x_handle, y_handle, alpha, alg); + check_valid_spsv(__func__, queue, opA, A_view, A_handle, x_handle, y_handle, alpha, alg); auto internal_A_handle = detail::get_internal_handle(A_handle); if (internal_A_handle->all_use_buffer()) { detail::throw_incompatible_container(__func__); @@ -149,7 +162,7 @@ sycl::event spsv(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alp oneapi::mkl::sparse::dense_vector_handle_t y_handle, oneapi::mkl::sparse::spsv_alg alg, oneapi::mkl::sparse::spsv_descr_t spsv_descr, const std::vector &dependencies) { - check_valid_spsv(__func__, queue, A_view, A_handle, x_handle, y_handle, alpha, alg); + check_valid_spsv(__func__, queue, opA, A_view, A_handle, x_handle, y_handle, alpha, alg); auto value_type = detail::get_internal_handle(A_handle)->get_value_type(); DISPATCH_MKL_OPERATION("spsv", value_type, internal_spsv, queue, opA, alpha, A_view, A_handle, x_handle, y_handle, alg, spsv_descr, dependencies); From 82566e5026444717991adffbd1c809989ae79aea Mon Sep 17 00:00:00 2001 From: "romain.biessy" Date: Tue, 9 Jul 2024 15:12:22 +0200 Subject: [PATCH 22/39] Test symmetric with complex types and hermitian and conjtrans with real types --- .../sparse_blas/include/test_spmm.hpp | 7 +-- .../sparse_blas/include/test_spmv.hpp | 61 ++++++++++++++++--- .../sparse_blas/source/sparse_spmv_buffer.cpp | 20 ++---- .../sparse_blas/source/sparse_spmv_usm.cpp | 20 ++---- 4 files changed, 61 insertions(+), 47 deletions(-) diff --git a/tests/unit_tests/sparse_blas/include/test_spmm.hpp b/tests/unit_tests/sparse_blas/include/test_spmm.hpp index 049d58b88..6188d4268 100644 --- a/tests/unit_tests/sparse_blas/include/test_spmm.hpp +++ b/tests/unit_tests/sparse_blas/include/test_spmm.hpp @@ -205,7 +205,6 @@ void test_helper_with_format_with_transpose( /** * Helper function to test combination of transpose vals. - * Only test \p conjtrans if \p fpType is complex. * * @tparam fpType Complex or scalar, single or double precision type * @tparam testFunctorI32 Test functor for fpType and int32 @@ -223,10 +222,8 @@ void test_helper_with_format( const std::vector &non_default_algorithms, int &num_passed, int &num_skipped) { std::vector transpose_vals{ oneapi::mkl::transpose::nontrans, - oneapi::mkl::transpose::trans }; - if (complex_info::is_complex) { - transpose_vals.push_back(oneapi::mkl::transpose::conjtrans); - } + oneapi::mkl::transpose::trans, + oneapi::mkl::transpose::conjtrans }; for (auto transpose_A : transpose_vals) { for (auto transpose_B : transpose_vals) { test_helper_with_format_with_transpose( diff --git a/tests/unit_tests/sparse_blas/include/test_spmv.hpp b/tests/unit_tests/sparse_blas/include/test_spmv.hpp index 43599e9d3..6ee256adb 100644 --- a/tests/unit_tests/sparse_blas/include/test_spmv.hpp +++ b/tests/unit_tests/sparse_blas/include/test_spmv.hpp @@ -51,7 +51,7 @@ * The test functions will use different sizes if the configuration implies a symmetric matrix. */ template -void test_helper_with_format( +void test_helper_with_format_with_transpose( testFunctorI32 test_functor_i32, testFunctorI64 test_functor_i64, sycl::device *dev, sparse_matrix_format_t format, const std::vector &non_default_algorithms, @@ -153,22 +153,37 @@ void test_helper_with_format( no_reset_data, no_scalars_on_device), num_passed, num_skipped); if (transpose_val != oneapi::mkl::transpose::conjtrans) { - // Lower symmetric or hermitian + // Do not test conjtrans with symmetric or hermitian views as no backend supports it. + // Lower symmetric oneapi::mkl::sparse::matrix_view symmetric_view( - complex_info::is_complex ? oneapi::mkl::sparse::matrix_descr::hermitian - : oneapi::mkl::sparse::matrix_descr::symmetric); + oneapi::mkl::sparse::matrix_descr::symmetric); EXPECT_TRUE_OR_FUTURE_SKIP( test_functor_i32(dev, format, nrows_A, ncols_A, density_A_matrix, index_zero, transpose_val, fp_one, fp_zero, default_alg, symmetric_view, no_properties, no_reset_data, no_scalars_on_device), num_passed, num_skipped); - // Upper symmetric or hermitian + // Upper symmetric symmetric_view.uplo_view = oneapi::mkl::uplo::upper; EXPECT_TRUE_OR_FUTURE_SKIP( test_functor_i32(dev, format, nrows_A, ncols_A, density_A_matrix, index_zero, transpose_val, fp_one, fp_zero, default_alg, symmetric_view, no_properties, no_reset_data, no_scalars_on_device), num_passed, num_skipped); + // Lower hermitian + oneapi::mkl::sparse::matrix_view hermitian_view( + oneapi::mkl::sparse::matrix_descr::hermitian); + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i32(dev, format, nrows_A, ncols_A, density_A_matrix, index_zero, + transpose_val, fp_one, fp_zero, default_alg, hermitian_view, + no_properties, no_reset_data, no_scalars_on_device), + num_passed, num_skipped); + // Upper hermitian + hermitian_view.uplo_view = oneapi::mkl::uplo::upper; + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i32(dev, format, nrows_A, ncols_A, density_A_matrix, index_zero, + transpose_val, fp_one, fp_zero, default_alg, hermitian_view, + no_properties, no_reset_data, no_scalars_on_device), + num_passed, num_skipped); } // Test other algorithms for (auto alg : non_default_algorithms) { @@ -188,6 +203,34 @@ void test_helper_with_format( } } +/** + * Helper function to test combination of transpose vals. + * + * @tparam fpType Complex or scalar, single or double precision type + * @tparam testFunctorI32 Test functor for fpType and int32 + * @tparam testFunctorI64 Test functor for fpType and int64 + * @param dev Device to test + * @param format Sparse matrix format to use + * @param non_default_algorithms Algorithms compatible with the given format, other than default_alg + * @param num_passed Increase the number of configurations passed + * @param num_skipped Increase the number of configurations skipped + */ +template +void test_helper_with_format( + testFunctorI32 test_functor_i32, testFunctorI64 test_functor_i64, sycl::device *dev, + sparse_matrix_format_t format, + const std::vector &non_default_algorithms, int &num_passed, + int &num_skipped) { + std::vector transpose_vals{ oneapi::mkl::transpose::nontrans, + oneapi::mkl::transpose::trans, + oneapi::mkl::transpose::conjtrans }; + for (auto transpose_A : transpose_vals) { + test_helper_with_format_with_transpose(test_functor_i32, test_functor_i64, dev, + format, non_default_algorithms, transpose_A, + num_passed, num_skipped); + } +} + /** * Helper function to test multiple sparse matrix format and choose valid algorithms. * @@ -195,24 +238,22 @@ void test_helper_with_format( * @tparam testFunctorI32 Test functor for fpType and int32 * @tparam testFunctorI64 Test functor for fpType and int64 * @param dev Device to test - * @param transpose_val Transpose value for the input matrix * @param num_passed Increase the number of configurations passed * @param num_skipped Increase the number of configurations skipped */ template void test_helper(testFunctorI32 test_functor_i32, testFunctorI64 test_functor_i64, - sycl::device *dev, oneapi::mkl::transpose transpose_val, int &num_passed, - int &num_skipped) { + sycl::device *dev, int &num_passed, int &num_skipped) { test_helper_with_format( test_functor_i32, test_functor_i64, dev, sparse_matrix_format_t::CSR, { oneapi::mkl::sparse::spmv_alg::no_optimize_alg, oneapi::mkl::sparse::spmv_alg::csr_alg1, oneapi::mkl::sparse::spmv_alg::csr_alg2, oneapi::mkl::sparse::spmv_alg::csr_alg3 }, - transpose_val, num_passed, num_skipped); + num_passed, num_skipped); test_helper_with_format( test_functor_i32, test_functor_i64, dev, sparse_matrix_format_t::COO, { oneapi::mkl::sparse::spmv_alg::no_optimize_alg, oneapi::mkl::sparse::spmv_alg::coo_alg1, oneapi::mkl::sparse::spmv_alg::coo_alg2 }, - transpose_val, num_passed, num_skipped); + num_passed, num_skipped); } /// Compute spmv reference as a dense operation diff --git a/tests/unit_tests/sparse_blas/source/sparse_spmv_buffer.cpp b/tests/unit_tests/sparse_blas/source/sparse_spmv_buffer.cpp index 12b449e61..0ba5afb9c 100644 --- a/tests/unit_tests/sparse_blas/source/sparse_spmv_buffer.cpp +++ b/tests/unit_tests/sparse_blas/source/sparse_spmv_buffer.cpp @@ -184,9 +184,7 @@ TEST_P(SparseSpmvBufferTests, RealSinglePrecision) { using fpType = float; int num_passed = 0, num_skipped = 0; test_helper(test_spmv, test_spmv, GetParam(), - oneapi::mkl::transpose::nontrans, num_passed, num_skipped); - test_helper(test_spmv, test_spmv, GetParam(), - oneapi::mkl::transpose::trans, num_passed, num_skipped); + num_passed, num_skipped); if (num_skipped > 0) { // Mark that some tests were skipped GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped @@ -199,9 +197,7 @@ TEST_P(SparseSpmvBufferTests, RealDoublePrecision) { CHECK_DOUBLE_ON_DEVICE(GetParam()); int num_passed = 0, num_skipped = 0; test_helper(test_spmv, test_spmv, GetParam(), - oneapi::mkl::transpose::nontrans, num_passed, num_skipped); - test_helper(test_spmv, test_spmv, GetParam(), - oneapi::mkl::transpose::trans, num_passed, num_skipped); + num_passed, num_skipped); if (num_skipped > 0) { // Mark that some tests were skipped GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped @@ -213,11 +209,7 @@ TEST_P(SparseSpmvBufferTests, ComplexSinglePrecision) { using fpType = std::complex; int num_passed = 0, num_skipped = 0; test_helper(test_spmv, test_spmv, GetParam(), - oneapi::mkl::transpose::nontrans, num_passed, num_skipped); - test_helper(test_spmv, test_spmv, GetParam(), - oneapi::mkl::transpose::trans, num_passed, num_skipped); - test_helper(test_spmv, test_spmv, GetParam(), - oneapi::mkl::transpose::conjtrans, num_passed, num_skipped); + num_passed, num_skipped); if (num_skipped > 0) { // Mark that some tests were skipped GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped @@ -230,11 +222,7 @@ TEST_P(SparseSpmvBufferTests, ComplexDoublePrecision) { CHECK_DOUBLE_ON_DEVICE(GetParam()); int num_passed = 0, num_skipped = 0; test_helper(test_spmv, test_spmv, GetParam(), - oneapi::mkl::transpose::nontrans, num_passed, num_skipped); - test_helper(test_spmv, test_spmv, GetParam(), - oneapi::mkl::transpose::trans, num_passed, num_skipped); - test_helper(test_spmv, test_spmv, GetParam(), - oneapi::mkl::transpose::conjtrans, num_passed, num_skipped); + num_passed, num_skipped); if (num_skipped > 0) { // Mark that some tests were skipped GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped diff --git a/tests/unit_tests/sparse_blas/source/sparse_spmv_usm.cpp b/tests/unit_tests/sparse_blas/source/sparse_spmv_usm.cpp index 85feacbda..fdeb57913 100644 --- a/tests/unit_tests/sparse_blas/source/sparse_spmv_usm.cpp +++ b/tests/unit_tests/sparse_blas/source/sparse_spmv_usm.cpp @@ -233,9 +233,7 @@ TEST_P(SparseSpmvUsmTests, RealSinglePrecision) { using fpType = float; int num_passed = 0, num_skipped = 0; test_helper(test_spmv, test_spmv, GetParam(), - oneapi::mkl::transpose::nontrans, num_passed, num_skipped); - test_helper(test_spmv, test_spmv, GetParam(), - oneapi::mkl::transpose::trans, num_passed, num_skipped); + num_passed, num_skipped); if (num_skipped > 0) { // Mark that some tests were skipped GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped @@ -248,9 +246,7 @@ TEST_P(SparseSpmvUsmTests, RealDoublePrecision) { CHECK_DOUBLE_ON_DEVICE(GetParam()); int num_passed = 0, num_skipped = 0; test_helper(test_spmv, test_spmv, GetParam(), - oneapi::mkl::transpose::nontrans, num_passed, num_skipped); - test_helper(test_spmv, test_spmv, GetParam(), - oneapi::mkl::transpose::trans, num_passed, num_skipped); + num_passed, num_skipped); if (num_skipped > 0) { // Mark that some tests were skipped GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped @@ -262,11 +258,7 @@ TEST_P(SparseSpmvUsmTests, ComplexSinglePrecision) { using fpType = std::complex; int num_passed = 0, num_skipped = 0; test_helper(test_spmv, test_spmv, GetParam(), - oneapi::mkl::transpose::nontrans, num_passed, num_skipped); - test_helper(test_spmv, test_spmv, GetParam(), - oneapi::mkl::transpose::trans, num_passed, num_skipped); - test_helper(test_spmv, test_spmv, GetParam(), - oneapi::mkl::transpose::conjtrans, num_passed, num_skipped); + num_passed, num_skipped); if (num_skipped > 0) { // Mark that some tests were skipped GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped @@ -279,11 +271,7 @@ TEST_P(SparseSpmvUsmTests, ComplexDoublePrecision) { CHECK_DOUBLE_ON_DEVICE(GetParam()); int num_passed = 0, num_skipped = 0; test_helper(test_spmv, test_spmv, GetParam(), - oneapi::mkl::transpose::nontrans, num_passed, num_skipped); - test_helper(test_spmv, test_spmv, GetParam(), - oneapi::mkl::transpose::trans, num_passed, num_skipped); - test_helper(test_spmv, test_spmv, GetParam(), - oneapi::mkl::transpose::conjtrans, num_passed, num_skipped); + num_passed, num_skipped); if (num_skipped > 0) { // Mark that some tests were skipped GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped From 0bac3d49c56c48499e661d0cd4199407cbaabcc9 Mon Sep 17 00:00:00 2001 From: "romain.biessy" Date: Fri, 12 Jul 2024 11:36:52 +0200 Subject: [PATCH 23/39] Merge operations in one file --- .../backends/mklcpu/CMakeLists.txt | 4 +-- ...{mklcpu_spmm.cpp => mklcpu_operations.cpp} | 2 ++ .../backends/mklcpu/mklcpu_spmv.cpp | 30 ------------------- .../backends/mklcpu/mklcpu_spsv.cpp | 30 ------------------- .../backends/mklgpu/CMakeLists.txt | 4 +-- ...{mklgpu_spmv.cpp => mklgpu_operations.cpp} | 2 ++ .../backends/mklgpu/mklgpu_spmm.cpp | 30 ------------------- .../backends/mklgpu/mklgpu_spsv.cpp | 30 ------------------- 8 files changed, 6 insertions(+), 126 deletions(-) rename src/sparse_blas/backends/mklcpu/{mklcpu_spmm.cpp => mklcpu_operations.cpp} (91%) delete mode 100644 src/sparse_blas/backends/mklcpu/mklcpu_spmv.cpp delete mode 100644 src/sparse_blas/backends/mklcpu/mklcpu_spsv.cpp rename src/sparse_blas/backends/mklgpu/{mklgpu_spmv.cpp => mklgpu_operations.cpp} (91%) delete mode 100644 src/sparse_blas/backends/mklgpu/mklgpu_spmm.cpp delete mode 100644 src/sparse_blas/backends/mklgpu/mklgpu_spsv.cpp diff --git a/src/sparse_blas/backends/mklcpu/CMakeLists.txt b/src/sparse_blas/backends/mklcpu/CMakeLists.txt index c851db8bc..e41cae268 100644 --- a/src/sparse_blas/backends/mklcpu/CMakeLists.txt +++ b/src/sparse_blas/backends/mklcpu/CMakeLists.txt @@ -25,9 +25,7 @@ include(WarningsUtils) add_library(${LIB_NAME}) add_library(${LIB_OBJ} OBJECT mklcpu_handles.cpp - mklcpu_spmm.cpp - mklcpu_spmv.cpp - mklcpu_spsv.cpp + mklcpu_operations.cpp $<$: mklcpu_wrappers.cpp> ) add_dependencies(onemkl_backend_libs_sparse_blas ${LIB_NAME}) diff --git a/src/sparse_blas/backends/mklcpu/mklcpu_spmm.cpp b/src/sparse_blas/backends/mklcpu/mklcpu_operations.cpp similarity index 91% rename from src/sparse_blas/backends/mklcpu/mklcpu_spmm.cpp rename to src/sparse_blas/backends/mklcpu/mklcpu_operations.cpp index de34dcb4d..4e0242c2d 100644 --- a/src/sparse_blas/backends/mklcpu/mklcpu_spmm.cpp +++ b/src/sparse_blas/backends/mklcpu/mklcpu_operations.cpp @@ -26,5 +26,7 @@ namespace oneapi::mkl::sparse::mklcpu { #include "sparse_blas/backends/mkl_common/mkl_spmm.cxx" +#include "sparse_blas/backends/mkl_common/mkl_spmv.cxx" +#include "sparse_blas/backends/mkl_common/mkl_spsv.cxx" } // namespace oneapi::mkl::sparse::mklcpu diff --git a/src/sparse_blas/backends/mklcpu/mklcpu_spmv.cpp b/src/sparse_blas/backends/mklcpu/mklcpu_spmv.cpp deleted file mode 100644 index ffbba2f5e..000000000 --- a/src/sparse_blas/backends/mklcpu/mklcpu_spmv.cpp +++ /dev/null @@ -1,30 +0,0 @@ -/*************************************************************************** -* Copyright (C) Codeplay Software Limited -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* For your convenience, a copy of the License has been included in this -* repository. -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -* -**************************************************************************/ - -#include "sparse_blas/backends/mkl_common/mkl_helper.hpp" -#include "sparse_blas/macros.hpp" -#include "sparse_blas/backends/mkl_common/mkl_handles.hpp" - -#include "oneapi/mkl/sparse_blas/detail/mklcpu/onemkl_sparse_blas_mklcpu.hpp" - -namespace oneapi::mkl::sparse::mklcpu { - -#include "sparse_blas/backends/mkl_common/mkl_spmv.cxx" - -} // namespace oneapi::mkl::sparse::mklcpu diff --git a/src/sparse_blas/backends/mklcpu/mklcpu_spsv.cpp b/src/sparse_blas/backends/mklcpu/mklcpu_spsv.cpp deleted file mode 100644 index aff9b4abc..000000000 --- a/src/sparse_blas/backends/mklcpu/mklcpu_spsv.cpp +++ /dev/null @@ -1,30 +0,0 @@ -/*************************************************************************** -* Copyright (C) Codeplay Software Limited -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* For your convenience, a copy of the License has been included in this -* repository. -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -* -**************************************************************************/ - -#include "sparse_blas/backends/mkl_common/mkl_helper.hpp" -#include "sparse_blas/macros.hpp" -#include "sparse_blas/backends/mkl_common/mkl_handles.hpp" - -#include "oneapi/mkl/sparse_blas/detail/mklcpu/onemkl_sparse_blas_mklcpu.hpp" - -namespace oneapi::mkl::sparse::mklcpu { - -#include "sparse_blas/backends/mkl_common/mkl_spsv.cxx" - -} // namespace oneapi::mkl::sparse::mklcpu diff --git a/src/sparse_blas/backends/mklgpu/CMakeLists.txt b/src/sparse_blas/backends/mklgpu/CMakeLists.txt index b83a39297..cd25babc2 100644 --- a/src/sparse_blas/backends/mklgpu/CMakeLists.txt +++ b/src/sparse_blas/backends/mklgpu/CMakeLists.txt @@ -25,9 +25,7 @@ include(WarningsUtils) add_library(${LIB_NAME}) add_library(${LIB_OBJ} OBJECT mklgpu_handles.cpp - mklgpu_spmm.cpp - mklgpu_spmv.cpp - mklgpu_spsv.cpp + mklgpu_operations.cpp $<$: mklgpu_wrappers.cpp> ) add_dependencies(onemkl_backend_libs_sparse_blas ${LIB_NAME}) diff --git a/src/sparse_blas/backends/mklgpu/mklgpu_spmv.cpp b/src/sparse_blas/backends/mklgpu/mklgpu_operations.cpp similarity index 91% rename from src/sparse_blas/backends/mklgpu/mklgpu_spmv.cpp rename to src/sparse_blas/backends/mklgpu/mklgpu_operations.cpp index 6a7087a86..0c5a73fb0 100644 --- a/src/sparse_blas/backends/mklgpu/mklgpu_spmv.cpp +++ b/src/sparse_blas/backends/mklgpu/mklgpu_operations.cpp @@ -25,6 +25,8 @@ namespace oneapi::mkl::sparse::mklgpu { +#include "sparse_blas/backends/mkl_common/mkl_spmm.cxx" #include "sparse_blas/backends/mkl_common/mkl_spmv.cxx" +#include "sparse_blas/backends/mkl_common/mkl_spsv.cxx" } // namespace oneapi::mkl::sparse::mklgpu diff --git a/src/sparse_blas/backends/mklgpu/mklgpu_spmm.cpp b/src/sparse_blas/backends/mklgpu/mklgpu_spmm.cpp deleted file mode 100644 index ca2338787..000000000 --- a/src/sparse_blas/backends/mklgpu/mklgpu_spmm.cpp +++ /dev/null @@ -1,30 +0,0 @@ -/*************************************************************************** -* Copyright (C) Codeplay Software Limited -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* For your convenience, a copy of the License has been included in this -* repository. -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -* -**************************************************************************/ - -#include "sparse_blas/backends/mkl_common/mkl_helper.hpp" -#include "sparse_blas/macros.hpp" -#include "sparse_blas/backends/mkl_common/mkl_handles.hpp" - -#include "oneapi/mkl/sparse_blas/detail/mklgpu/onemkl_sparse_blas_mklgpu.hpp" - -namespace oneapi::mkl::sparse::mklgpu { - -#include "sparse_blas/backends/mkl_common/mkl_spmm.cxx" - -} // namespace oneapi::mkl::sparse::mklgpu diff --git a/src/sparse_blas/backends/mklgpu/mklgpu_spsv.cpp b/src/sparse_blas/backends/mklgpu/mklgpu_spsv.cpp deleted file mode 100644 index b42d4539f..000000000 --- a/src/sparse_blas/backends/mklgpu/mklgpu_spsv.cpp +++ /dev/null @@ -1,30 +0,0 @@ -/*************************************************************************** -* Copyright (C) Codeplay Software Limited -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* For your convenience, a copy of the License has been included in this -* repository. -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -* -**************************************************************************/ - -#include "sparse_blas/backends/mkl_common/mkl_helper.hpp" -#include "sparse_blas/macros.hpp" -#include "sparse_blas/backends/mkl_common/mkl_handles.hpp" - -#include "oneapi/mkl/sparse_blas/detail/mklgpu/onemkl_sparse_blas_mklgpu.hpp" - -namespace oneapi::mkl::sparse::mklgpu { - -#include "sparse_blas/backends/mkl_common/mkl_spsv.cxx" - -} // namespace oneapi::mkl::sparse::mklgpu From d04452ac2c020f5c0336d05290d402afeb123713 Mon Sep 17 00:00:00 2001 From: "romain.biessy" Date: Fri, 12 Jul 2024 11:41:57 +0200 Subject: [PATCH 24/39] Make get_data_type constexpr --- src/sparse_blas/enum_data_types.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sparse_blas/enum_data_types.hpp b/src/sparse_blas/enum_data_types.hpp index 7114482ee..26946facb 100644 --- a/src/sparse_blas/enum_data_types.hpp +++ b/src/sparse_blas/enum_data_types.hpp @@ -40,7 +40,7 @@ inline std::string data_type_to_str(data_type data_type) { } template -data_type get_data_type() { +constexpr data_type get_data_type() { if constexpr (std::is_same_v) { return data_type::int32; } From e8eac877710b5f802d6a9953914059417e074c2b Mon Sep 17 00:00:00 2001 From: "romain.biessy" Date: Mon, 15 Jul 2024 18:20:03 +0200 Subject: [PATCH 25/39] Remove unused macro TEST_RUN_CT_SELECT --- tests/unit_tests/include/test_helper.hpp | 30 ++++++------------- .../sparse_blas/include/test_common.hpp | 2 +- 2 files changed, 10 insertions(+), 22 deletions(-) diff --git a/tests/unit_tests/include/test_helper.hpp b/tests/unit_tests/include/test_helper.hpp index 7e0024195..ad215761f 100644 --- a/tests/unit_tests/include/test_helper.hpp +++ b/tests/unit_tests/include/test_helper.hpp @@ -203,7 +203,7 @@ TEST_RUN_PORTFFT_SELECT_NO_ARGS(q, func); \ } while (0); -#define TEST_RUN_CT_SELECT(q, func, ...) \ +#define TEST_RUN_BLAS_CT_SELECT(q, func, ...) \ do { \ if (CHECK_HOST_OR_CPU(q)) \ TEST_RUN_INTELCPU_SELECT(q, func, __VA_ARGS__); \ @@ -214,21 +214,15 @@ TEST_RUN_INTELGPU_SELECT(q, func, __VA_ARGS__); \ else if (vendor_id == NVIDIA_ID) { \ TEST_RUN_NVIDIAGPU_CUBLAS_SELECT(q, func, __VA_ARGS__); \ - TEST_RUN_NVIDIAGPU_CUSOLVER_SELECT(q, func, __VA_ARGS__); \ - TEST_RUN_NVIDIAGPU_CURAND_SELECT(q, func, __VA_ARGS__); \ } \ else if (vendor_id == AMD_ID) { \ TEST_RUN_AMDGPU_ROCBLAS_SELECT(q, func, __VA_ARGS__); \ - TEST_RUN_AMDGPU_ROCRAND_SELECT(q, func, __VA_ARGS__); \ - TEST_RUN_AMDGPU_ROCSOLVER_SELECT(q, func, __VA_ARGS__); \ - TEST_RUN_AMDGPU_ROCFFT_SELECT(q, func, __VA_ARGS__); \ } \ } \ TEST_RUN_PORTBLAS_SELECT(q, func, __VA_ARGS__); \ - TEST_RUN_PORTFFT_SELECT(q, func, __VA_ARGS__); \ } while (0); -#define TEST_RUN_BLAS_CT_SELECT(q, func, ...) \ +#define TEST_RUN_RNG_CT_SELECT(q, func, ...) \ do { \ if (CHECK_HOST_OR_CPU(q)) \ TEST_RUN_INTELCPU_SELECT(q, func, __VA_ARGS__); \ @@ -238,16 +232,15 @@ if (vendor_id == INTEL_ID) \ TEST_RUN_INTELGPU_SELECT(q, func, __VA_ARGS__); \ else if (vendor_id == NVIDIA_ID) { \ - TEST_RUN_NVIDIAGPU_CUBLAS_SELECT(q, func, __VA_ARGS__); \ + TEST_RUN_NVIDIAGPU_CURAND_SELECT(q, func, __VA_ARGS__); \ } \ else if (vendor_id == AMD_ID) { \ - TEST_RUN_AMDGPU_ROCBLAS_SELECT(q, func, __VA_ARGS__); \ + TEST_RUN_AMDGPU_ROCRAND_SELECT(q, func, __VA_ARGS__); \ } \ } \ - TEST_RUN_PORTBLAS_SELECT(q, func, __VA_ARGS__); \ } while (0); -#define TEST_RUN_RNG_CT_SELECT(q, func, ...) \ +#define TEST_RUN_LAPACK_CT_SELECT(q, func, ...) \ do { \ if (CHECK_HOST_OR_CPU(q)) \ TEST_RUN_INTELCPU_SELECT(q, func, __VA_ARGS__); \ @@ -257,28 +250,23 @@ if (vendor_id == INTEL_ID) \ TEST_RUN_INTELGPU_SELECT(q, func, __VA_ARGS__); \ else if (vendor_id == NVIDIA_ID) { \ - TEST_RUN_NVIDIAGPU_CURAND_SELECT(q, func, __VA_ARGS__); \ + TEST_RUN_NVIDIAGPU_CUSOLVER_SELECT(q, func, __VA_ARGS__); \ } \ else if (vendor_id == AMD_ID) { \ - TEST_RUN_AMDGPU_ROCRAND_SELECT(q, func, __VA_ARGS__); \ + TEST_RUN_AMDGPU_ROCSOLVER_SELECT(q, func, __VA_ARGS__); \ } \ } \ } while (0); -#define TEST_RUN_LAPACK_CT_SELECT(q, func, ...) \ +#define TEST_RUN_SPARSE_CT_SELECT(q, func, ...) \ do { \ if (CHECK_HOST_OR_CPU(q)) \ TEST_RUN_INTELCPU_SELECT(q, func, __VA_ARGS__); \ else if (q.get_device().is_gpu()) { \ unsigned int vendor_id = static_cast( \ q.get_device().get_info()); \ - if (vendor_id == INTEL_ID) \ + if (vendor_id == INTEL_ID) { \ TEST_RUN_INTELGPU_SELECT(q, func, __VA_ARGS__); \ - else if (vendor_id == NVIDIA_ID) { \ - TEST_RUN_NVIDIAGPU_CUSOLVER_SELECT(q, func, __VA_ARGS__); \ - } \ - else if (vendor_id == AMD_ID) { \ - TEST_RUN_AMDGPU_ROCSOLVER_SELECT(q, func, __VA_ARGS__); \ } \ } \ } while (0); diff --git a/tests/unit_tests/sparse_blas/include/test_common.hpp b/tests/unit_tests/sparse_blas/include/test_common.hpp index 48d0b13ee..d579ff5fb 100644 --- a/tests/unit_tests/sparse_blas/include/test_common.hpp +++ b/tests/unit_tests/sparse_blas/include/test_common.hpp @@ -39,7 +39,7 @@ #ifdef CALL_RT_API #define CALL_RT_OR_CT(FUNC, QUEUE, ...) FUNC(QUEUE, __VA_ARGS__) #else -#define CALL_RT_OR_CT(FUNC, QUEUE, ...) TEST_RUN_CT_SELECT(QUEUE, FUNC, __VA_ARGS__); +#define CALL_RT_OR_CT(FUNC, QUEUE, ...) TEST_RUN_SPARSE_CT_SELECT(QUEUE, FUNC, __VA_ARGS__); #endif template From 2a59f228356de672ca30bc8ef00659d4459050d8 Mon Sep 17 00:00:00 2001 From: "romain.biessy" Date: Wed, 17 Jul 2024 12:08:35 +0200 Subject: [PATCH 26/39] clang-format --- .../run_time_dispatching/sparse_blas_spmv_usm.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/examples/sparse_blas/run_time_dispatching/sparse_blas_spmv_usm.cpp b/examples/sparse_blas/run_time_dispatching/sparse_blas_spmv_usm.cpp index 4a8a4280b..f93569a92 100644 --- a/examples/sparse_blas/run_time_dispatching/sparse_blas_spmv_usm.cpp +++ b/examples/sparse_blas/run_time_dispatching/sparse_blas_spmv_usm.cpp @@ -148,8 +148,10 @@ int run_sparse_matrix_vector_multiply_example(const sycl::device &dev) { // Create and initialize dense vector handles oneapi::mkl::sparse::dense_vector_handle_t x_handle = nullptr; oneapi::mkl::sparse::dense_vector_handle_t y_handle = nullptr; - oneapi::mkl::sparse::init_dense_vector(main_queue, &x_handle, static_cast(sizevec), x); - oneapi::mkl::sparse::init_dense_vector(main_queue, &y_handle, static_cast(sizevec), y); + oneapi::mkl::sparse::init_dense_vector(main_queue, &x_handle, + static_cast(sizevec), x); + oneapi::mkl::sparse::init_dense_vector(main_queue, &y_handle, + static_cast(sizevec), y); // Create operation descriptor oneapi::mkl::sparse::spmv_descr_t descr = nullptr; From 43f4669048634c40cef3dcd8c7e0c0615a71e8c0 Mon Sep 17 00:00:00 2001 From: "romain.biessy" Date: Thu, 18 Jul 2024 19:58:14 +0200 Subject: [PATCH 27/39] Take string as reference --- src/sparse_blas/backends/mkl_common/mkl_spmm.cxx | 2 +- src/sparse_blas/backends/mkl_common/mkl_spmv.cxx | 2 +- src/sparse_blas/backends/mkl_common/mkl_spsv.cxx | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/sparse_blas/backends/mkl_common/mkl_spmm.cxx b/src/sparse_blas/backends/mkl_common/mkl_spmm.cxx index aa292921e..604db11a7 100644 --- a/src/sparse_blas/backends/mkl_common/mkl_spmm.cxx +++ b/src/sparse_blas/backends/mkl_common/mkl_spmm.cxx @@ -29,7 +29,7 @@ sycl::event release_spmm_descr(sycl::queue &queue, oneapi::mkl::sparse::spmm_des return detail::collapse_dependencies(queue, dependencies); } -void check_valid_spmm(const std::string function_name, sycl::queue &queue, +void check_valid_spmm(const std::string &function_name, sycl::queue &queue, oneapi::mkl::transpose opA, oneapi::mkl::sparse::matrix_view A_view, oneapi::mkl::sparse::matrix_handle_t A_handle, oneapi::mkl::sparse::dense_matrix_handle_t B_handle, diff --git a/src/sparse_blas/backends/mkl_common/mkl_spmv.cxx b/src/sparse_blas/backends/mkl_common/mkl_spmv.cxx index 6a954feba..b35ad0847 100644 --- a/src/sparse_blas/backends/mkl_common/mkl_spmv.cxx +++ b/src/sparse_blas/backends/mkl_common/mkl_spmv.cxx @@ -29,7 +29,7 @@ sycl::event release_spmv_descr(sycl::queue &queue, oneapi::mkl::sparse::spmv_des return detail::collapse_dependencies(queue, dependencies); } -void check_valid_spmv(const std::string function_name, sycl::queue &queue, +void check_valid_spmv(const std::string &function_name, sycl::queue &queue, oneapi::mkl::transpose opA, oneapi::mkl::sparse::matrix_view A_view, oneapi::mkl::sparse::matrix_handle_t A_handle, oneapi::mkl::sparse::dense_vector_handle_t x_handle, diff --git a/src/sparse_blas/backends/mkl_common/mkl_spsv.cxx b/src/sparse_blas/backends/mkl_common/mkl_spsv.cxx index ea8410a88..4ca4ee9d8 100644 --- a/src/sparse_blas/backends/mkl_common/mkl_spsv.cxx +++ b/src/sparse_blas/backends/mkl_common/mkl_spsv.cxx @@ -29,7 +29,7 @@ sycl::event release_spsv_descr(sycl::queue &queue, oneapi::mkl::sparse::spsv_des return detail::collapse_dependencies(queue, dependencies); } -void check_valid_spsv(const std::string function_name, sycl::queue &queue, +void check_valid_spsv(const std::string &function_name, sycl::queue &queue, oneapi::mkl::transpose opA, oneapi::mkl::sparse::matrix_view A_view, oneapi::mkl::sparse::matrix_handle_t A_handle, oneapi::mkl::sparse::dense_vector_handle_t x_handle, From 2f59edce37db07bd9eb897892572b136dded53cf Mon Sep 17 00:00:00 2001 From: "romain.biessy" Date: Fri, 19 Jul 2024 11:20:46 +0200 Subject: [PATCH 28/39] Reduce number of calls to get_pointer_type --- .../backends/mkl_common/mkl_helper.hpp | 13 ++-- .../backends/mkl_common/mkl_spmm.cxx | 60 +++++++++++-------- .../backends/mkl_common/mkl_spmv.cxx | 47 ++++++++++----- .../backends/mkl_common/mkl_spsv.cxx | 35 +++++++---- 4 files changed, 97 insertions(+), 58 deletions(-) diff --git a/src/sparse_blas/backends/mkl_common/mkl_helper.hpp b/src/sparse_blas/backends/mkl_common/mkl_helper.hpp index d1303d949..c76af5cb6 100644 --- a/src/sparse_blas/backends/mkl_common/mkl_helper.hpp +++ b/src/sparse_blas/backends/mkl_common/mkl_helper.hpp @@ -43,10 +43,10 @@ inline bool is_ptr_accessible_on_host(sycl::queue &queue, const T *host_or_devic } /// Throw an exception if the scalar is not accessible in the host -template -void check_ptr_is_host_accessible(const std::string &function_name, const std::string &scalar_name, - sycl::queue &queue, const T *host_or_device_ptr) { - if (!is_ptr_accessible_on_host(queue, host_or_device_ptr)) { +inline void check_ptr_is_host_accessible(const std::string &function_name, + const std::string &scalar_name, + bool is_ptr_accessible_on_host) { + if (!is_ptr_accessible_on_host) { throw mkl::invalid_argument( "sparse_blas", function_name, "Scalar " + scalar_name + " must be accessible on the host for buffer functions."); @@ -56,8 +56,9 @@ void check_ptr_is_host_accessible(const std::string &function_name, const std::s /// Return a scalar on the host from a pointer to host or device memory /// Used for USM functions template -inline T get_scalar_on_host(sycl::queue &queue, const T *host_or_device_ptr) { - if (is_ptr_accessible_on_host(queue, host_or_device_ptr)) { +inline T get_scalar_on_host(sycl::queue &queue, const T *host_or_device_ptr, + bool is_ptr_accessible_on_host) { + if (is_ptr_accessible_on_host) { return *host_or_device_ptr; } T scalar; diff --git a/src/sparse_blas/backends/mkl_common/mkl_spmm.cxx b/src/sparse_blas/backends/mkl_common/mkl_spmm.cxx index 604db11a7..3c2a9f161 100644 --- a/src/sparse_blas/backends/mkl_common/mkl_spmm.cxx +++ b/src/sparse_blas/backends/mkl_common/mkl_spmm.cxx @@ -29,12 +29,12 @@ sycl::event release_spmm_descr(sycl::queue &queue, oneapi::mkl::sparse::spmm_des return detail::collapse_dependencies(queue, dependencies); } -void check_valid_spmm(const std::string &function_name, sycl::queue &queue, - oneapi::mkl::transpose opA, oneapi::mkl::sparse::matrix_view A_view, +void check_valid_spmm(const std::string &function_name, oneapi::mkl::transpose opA, + oneapi::mkl::sparse::matrix_view A_view, oneapi::mkl::sparse::matrix_handle_t A_handle, oneapi::mkl::sparse::dense_matrix_handle_t B_handle, - oneapi::mkl::sparse::dense_matrix_handle_t C_handle, const void *alpha, - const void *beta) { + oneapi::mkl::sparse::dense_matrix_handle_t C_handle, + bool is_alpha_host_accessible, bool is_beta_host_accessible) { THROW_IF_NULLPTR(function_name, A_handle); THROW_IF_NULLPTR(function_name, B_handle); THROW_IF_NULLPTR(function_name, C_handle); @@ -42,11 +42,10 @@ void check_valid_spmm(const std::string &function_name, sycl::queue &queue, auto internal_A_handle = detail::get_internal_handle(A_handle); detail::check_all_containers_compatible(function_name, internal_A_handle, B_handle, C_handle); if (internal_A_handle->all_use_buffer()) { - detail::check_ptr_is_host_accessible("spmm", "alpha", queue, alpha); - detail::check_ptr_is_host_accessible("spmm", "beta", queue, beta); + detail::check_ptr_is_host_accessible("spmm", "alpha", is_alpha_host_accessible); + detail::check_ptr_is_host_accessible("spmm", "beta", is_beta_host_accessible); } - if (detail::is_ptr_accessible_on_host(queue, alpha) != - detail::is_ptr_accessible_on_host(queue, beta)) { + if (is_alpha_host_accessible != is_beta_host_accessible) { throw mkl::invalid_argument( "sparse_blas", function_name, "Alpha and beta must both be placed on host memory or device memory."); @@ -91,7 +90,10 @@ void spmm_buffer_size(sycl::queue &queue, oneapi::mkl::transpose opA, oneapi::mkl::sparse::spmm_descr_t /*spmm_descr*/, std::size_t &temp_buffer_size) { // TODO: Add support for external workspace once the close-source oneMKL backend supports it. - check_valid_spmm(__func__, queue, opA, A_view, A_handle, B_handle, C_handle, alpha, beta); + bool is_alpha_host_accessible = detail::is_ptr_accessible_on_host(queue, alpha); + bool is_beta_host_accessible = detail::is_ptr_accessible_on_host(queue, beta); + check_valid_spmm(__func__, opA, A_view, A_handle, B_handle, C_handle, is_alpha_host_accessible, + is_beta_host_accessible); temp_buffer_size = 0; } @@ -103,7 +105,10 @@ void spmm_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, oneapi::mkl:: oneapi::mkl::sparse::spmm_alg alg, oneapi::mkl::sparse::spmm_descr_t /*spmm_descr*/, sycl::buffer /*workspace*/) { - check_valid_spmm(__func__, queue, opA, A_view, A_handle, B_handle, C_handle, alpha, beta); + bool is_alpha_host_accessible = detail::is_ptr_accessible_on_host(queue, alpha); + bool is_beta_host_accessible = detail::is_ptr_accessible_on_host(queue, beta); + check_valid_spmm(__func__, opA, A_view, A_handle, B_handle, C_handle, is_alpha_host_accessible, + is_beta_host_accessible); auto internal_A_handle = detail::get_internal_handle(A_handle); if (!internal_A_handle->all_use_buffer()) { detail::throw_incompatible_container(__func__); @@ -124,7 +129,10 @@ sycl::event spmm_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, oneapi::mkl::sparse::spmm_alg alg, oneapi::mkl::sparse::spmm_descr_t /*spmm_descr*/, void * /*workspace*/, const std::vector &dependencies) { - check_valid_spmm(__func__, queue, opA, A_view, A_handle, B_handle, C_handle, alpha, beta); + bool is_alpha_host_accessible = detail::is_ptr_accessible_on_host(queue, alpha); + bool is_beta_host_accessible = detail::is_ptr_accessible_on_host(queue, beta); + check_valid_spmm(__func__, opA, A_view, A_handle, B_handle, C_handle, is_alpha_host_accessible, + is_beta_host_accessible); auto internal_A_handle = detail::get_internal_handle(A_handle); if (internal_A_handle->all_use_buffer()) { detail::throw_incompatible_container(__func__); @@ -138,17 +146,17 @@ sycl::event spmm_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, } template -sycl::event internal_spmm(sycl::queue &queue, oneapi::mkl::transpose opA, - oneapi::mkl::transpose opB, const void *alpha, - oneapi::mkl::sparse::matrix_view /*A_view*/, - oneapi::mkl::sparse::matrix_handle_t A_handle, - oneapi::mkl::sparse::dense_matrix_handle_t B_handle, const void *beta, - oneapi::mkl::sparse::dense_matrix_handle_t C_handle, - oneapi::mkl::sparse::spmm_alg /*alg*/, - oneapi::mkl::sparse::spmm_descr_t /*spmm_descr*/, - const std::vector &dependencies) { - T host_alpha = detail::get_scalar_on_host(queue, static_cast(alpha)); - T host_beta = detail::get_scalar_on_host(queue, static_cast(beta)); +sycl::event internal_spmm( + sycl::queue &queue, oneapi::mkl::transpose opA, oneapi::mkl::transpose opB, const void *alpha, + oneapi::mkl::sparse::matrix_view /*A_view*/, oneapi::mkl::sparse::matrix_handle_t A_handle, + oneapi::mkl::sparse::dense_matrix_handle_t B_handle, const void *beta, + oneapi::mkl::sparse::dense_matrix_handle_t C_handle, oneapi::mkl::sparse::spmm_alg /*alg*/, + oneapi::mkl::sparse::spmm_descr_t /*spmm_descr*/, const std::vector &dependencies, + bool is_alpha_host_accessible, bool is_beta_host_accessible) { + T host_alpha = + detail::get_scalar_on_host(queue, static_cast(alpha), is_alpha_host_accessible); + T host_beta = + detail::get_scalar_on_host(queue, static_cast(beta), is_beta_host_accessible); auto internal_A_handle = detail::get_internal_handle(A_handle); internal_A_handle->can_be_reset = false; auto layout = B_handle->dense_layout; @@ -177,8 +185,12 @@ sycl::event spmm(sycl::queue &queue, oneapi::mkl::transpose opA, oneapi::mkl::tr oneapi::mkl::sparse::dense_matrix_handle_t C_handle, oneapi::mkl::sparse::spmm_alg alg, oneapi::mkl::sparse::spmm_descr_t spmm_descr, const std::vector &dependencies) { - check_valid_spmm(__func__, queue, opA, A_view, A_handle, B_handle, C_handle, alpha, beta); + bool is_alpha_host_accessible = detail::is_ptr_accessible_on_host(queue, alpha); + bool is_beta_host_accessible = detail::is_ptr_accessible_on_host(queue, beta); + check_valid_spmm(__func__, opA, A_view, A_handle, B_handle, C_handle, is_alpha_host_accessible, + is_beta_host_accessible); auto value_type = detail::get_internal_handle(A_handle)->get_value_type(); DISPATCH_MKL_OPERATION("spmm", value_type, internal_spmm, queue, opA, opB, alpha, A_view, - A_handle, B_handle, beta, C_handle, alg, spmm_descr, dependencies); + A_handle, B_handle, beta, C_handle, alg, spmm_descr, dependencies, + is_alpha_host_accessible, is_beta_host_accessible); } diff --git a/src/sparse_blas/backends/mkl_common/mkl_spmv.cxx b/src/sparse_blas/backends/mkl_common/mkl_spmv.cxx index b35ad0847..930e1ec87 100644 --- a/src/sparse_blas/backends/mkl_common/mkl_spmv.cxx +++ b/src/sparse_blas/backends/mkl_common/mkl_spmv.cxx @@ -29,12 +29,12 @@ sycl::event release_spmv_descr(sycl::queue &queue, oneapi::mkl::sparse::spmv_des return detail::collapse_dependencies(queue, dependencies); } -void check_valid_spmv(const std::string &function_name, sycl::queue &queue, - oneapi::mkl::transpose opA, oneapi::mkl::sparse::matrix_view A_view, +void check_valid_spmv(const std::string &function_name, oneapi::mkl::transpose opA, + oneapi::mkl::sparse::matrix_view A_view, oneapi::mkl::sparse::matrix_handle_t A_handle, oneapi::mkl::sparse::dense_vector_handle_t x_handle, - oneapi::mkl::sparse::dense_vector_handle_t y_handle, const void *alpha, - const void *beta) { + oneapi::mkl::sparse::dense_vector_handle_t y_handle, + bool is_alpha_host_accessible, bool is_beta_host_accessible) { THROW_IF_NULLPTR(function_name, A_handle); THROW_IF_NULLPTR(function_name, x_handle); THROW_IF_NULLPTR(function_name, y_handle); @@ -42,11 +42,10 @@ void check_valid_spmv(const std::string &function_name, sycl::queue &queue, auto internal_A_handle = detail::get_internal_handle(A_handle); detail::check_all_containers_compatible(function_name, internal_A_handle, x_handle, y_handle); if (internal_A_handle->all_use_buffer()) { - detail::check_ptr_is_host_accessible("spmv", "alpha", queue, alpha); - detail::check_ptr_is_host_accessible("spmv", "beta", queue, beta); + detail::check_ptr_is_host_accessible("spmv", "alpha", is_alpha_host_accessible); + detail::check_ptr_is_host_accessible("spmv", "beta", is_beta_host_accessible); } - if (detail::is_ptr_accessible_on_host(queue, alpha) != - detail::is_ptr_accessible_on_host(queue, beta)) { + if (is_alpha_host_accessible != is_beta_host_accessible) { throw mkl::invalid_argument( "sparse_blas", function_name, "Alpha and beta must both be placed on host memory or device memory."); @@ -81,7 +80,10 @@ void spmv_buffer_size(sycl::queue &queue, oneapi::mkl::transpose opA, const void oneapi::mkl::sparse::spmv_descr_t /*spmv_descr*/, std::size_t &temp_buffer_size) { // TODO: Add support for external workspace once the close-source oneMKL backend supports it. - check_valid_spmv(__func__, queue, opA, A_view, A_handle, x_handle, y_handle, alpha, beta); + bool is_alpha_host_accessible = detail::is_ptr_accessible_on_host(queue, alpha); + bool is_beta_host_accessible = detail::is_ptr_accessible_on_host(queue, beta); + check_valid_spmv(__func__, opA, A_view, A_handle, x_handle, y_handle, is_alpha_host_accessible, + is_beta_host_accessible); temp_buffer_size = 0; } @@ -93,7 +95,10 @@ void spmv_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, const void *a oneapi::mkl::sparse::spmv_alg alg, oneapi::mkl::sparse::spmv_descr_t /*spmv_descr*/, sycl::buffer /*workspace*/) { - check_valid_spmv(__func__, queue, opA, A_view, A_handle, x_handle, y_handle, alpha, beta); + bool is_alpha_host_accessible = detail::is_ptr_accessible_on_host(queue, alpha); + bool is_beta_host_accessible = detail::is_ptr_accessible_on_host(queue, beta); + check_valid_spmv(__func__, opA, A_view, A_handle, x_handle, y_handle, is_alpha_host_accessible, + is_beta_host_accessible); auto internal_A_handle = detail::get_internal_handle(A_handle); if (!internal_A_handle->all_use_buffer()) { detail::throw_incompatible_container(__func__); @@ -127,7 +132,10 @@ sycl::event spmv_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, const oneapi::mkl::sparse::spmv_alg alg, oneapi::mkl::sparse::spmv_descr_t /*spmv_descr*/, void * /*workspace*/, const std::vector &dependencies) { - check_valid_spmv(__func__, queue, opA, A_view, A_handle, x_handle, y_handle, alpha, beta); + bool is_alpha_host_accessible = detail::is_ptr_accessible_on_host(queue, alpha); + bool is_beta_host_accessible = detail::is_ptr_accessible_on_host(queue, beta); + check_valid_spmv(__func__, opA, A_view, A_handle, x_handle, y_handle, is_alpha_host_accessible, + is_beta_host_accessible); auto internal_A_handle = detail::get_internal_handle(A_handle); if (internal_A_handle->all_use_buffer()) { detail::throw_incompatible_container(__func__); @@ -158,9 +166,12 @@ sycl::event internal_spmv(sycl::queue &queue, oneapi::mkl::transpose opA, const oneapi::mkl::sparse::dense_vector_handle_t y_handle, oneapi::mkl::sparse::spmv_alg /*alg*/, oneapi::mkl::sparse::spmv_descr_t /*spmv_descr*/, - const std::vector &dependencies) { - T host_alpha = detail::get_scalar_on_host(queue, static_cast(alpha)); - T host_beta = detail::get_scalar_on_host(queue, static_cast(beta)); + const std::vector &dependencies, + bool is_alpha_host_accessible, bool is_beta_host_accessible) { + T host_alpha = + detail::get_scalar_on_host(queue, static_cast(alpha), is_alpha_host_accessible); + T host_beta = + detail::get_scalar_on_host(queue, static_cast(beta), is_beta_host_accessible); auto internal_A_handle = detail::get_internal_handle(A_handle); internal_A_handle->can_be_reset = false; auto backend_handle = internal_A_handle->backend_handle; @@ -210,8 +221,12 @@ sycl::event spmv(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alp oneapi::mkl::sparse::dense_vector_handle_t y_handle, oneapi::mkl::sparse::spmv_alg alg, oneapi::mkl::sparse::spmv_descr_t spmv_descr, const std::vector &dependencies) { - check_valid_spmv(__func__, queue, opA, A_view, A_handle, x_handle, y_handle, alpha, beta); + bool is_alpha_host_accessible = detail::is_ptr_accessible_on_host(queue, alpha); + bool is_beta_host_accessible = detail::is_ptr_accessible_on_host(queue, beta); + check_valid_spmv(__func__, opA, A_view, A_handle, x_handle, y_handle, is_alpha_host_accessible, + is_beta_host_accessible); auto value_type = detail::get_internal_handle(A_handle)->get_value_type(); DISPATCH_MKL_OPERATION("spmv", value_type, internal_spmv, queue, opA, alpha, A_view, A_handle, - x_handle, beta, y_handle, alg, spmv_descr, dependencies); + x_handle, beta, y_handle, alg, spmv_descr, dependencies, + is_alpha_host_accessible, is_beta_host_accessible); } diff --git a/src/sparse_blas/backends/mkl_common/mkl_spsv.cxx b/src/sparse_blas/backends/mkl_common/mkl_spsv.cxx index 4ca4ee9d8..849919f12 100644 --- a/src/sparse_blas/backends/mkl_common/mkl_spsv.cxx +++ b/src/sparse_blas/backends/mkl_common/mkl_spsv.cxx @@ -29,12 +29,12 @@ sycl::event release_spsv_descr(sycl::queue &queue, oneapi::mkl::sparse::spsv_des return detail::collapse_dependencies(queue, dependencies); } -void check_valid_spsv(const std::string &function_name, sycl::queue &queue, - oneapi::mkl::transpose opA, oneapi::mkl::sparse::matrix_view A_view, +void check_valid_spsv(const std::string &function_name, oneapi::mkl::transpose opA, + oneapi::mkl::sparse::matrix_view A_view, oneapi::mkl::sparse::matrix_handle_t A_handle, oneapi::mkl::sparse::dense_vector_handle_t x_handle, - oneapi::mkl::sparse::dense_vector_handle_t y_handle, const void *alpha, - oneapi::mkl::sparse::spsv_alg alg) { + oneapi::mkl::sparse::dense_vector_handle_t y_handle, + bool is_alpha_host_accessible, oneapi::mkl::sparse::spsv_alg alg) { THROW_IF_NULLPTR(function_name, A_handle); THROW_IF_NULLPTR(function_name, x_handle); THROW_IF_NULLPTR(function_name, y_handle); @@ -67,7 +67,7 @@ void check_valid_spsv(const std::string &function_name, sycl::queue &queue, } if (internal_A_handle->all_use_buffer()) { - detail::check_ptr_is_host_accessible("spsv", "alpha", queue, alpha); + detail::check_ptr_is_host_accessible("spsv", "alpha", is_alpha_host_accessible); } } @@ -80,7 +80,9 @@ void spsv_buffer_size(sycl::queue &queue, oneapi::mkl::transpose opA, const void oneapi::mkl::sparse::spsv_descr_t /*spsv_descr*/, std::size_t &temp_buffer_size) { // TODO: Add support for external workspace once the close-source oneMKL backend supports it. - check_valid_spsv(__func__, queue, opA, A_view, A_handle, x_handle, y_handle, alpha, alg); + bool is_alpha_host_accessible = detail::is_ptr_accessible_on_host(queue, alpha); + check_valid_spsv(__func__, opA, A_view, A_handle, x_handle, y_handle, is_alpha_host_accessible, + alg); temp_buffer_size = 0; } @@ -92,7 +94,9 @@ void spsv_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, const void *a oneapi::mkl::sparse::spsv_alg alg, oneapi::mkl::sparse::spsv_descr_t /*spsv_descr*/, sycl::buffer /*workspace*/) { - check_valid_spsv(__func__, queue, opA, A_view, A_handle, x_handle, y_handle, alpha, alg); + bool is_alpha_host_accessible = detail::is_ptr_accessible_on_host(queue, alpha); + check_valid_spsv(__func__, opA, A_view, A_handle, x_handle, y_handle, is_alpha_host_accessible, + alg); auto internal_A_handle = detail::get_internal_handle(A_handle); if (!internal_A_handle->all_use_buffer()) { detail::throw_incompatible_container(__func__); @@ -115,7 +119,9 @@ sycl::event spsv_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, const oneapi::mkl::sparse::spsv_alg alg, oneapi::mkl::sparse::spsv_descr_t /*spsv_descr*/, void * /*workspace*/, const std::vector &dependencies) { - check_valid_spsv(__func__, queue, opA, A_view, A_handle, x_handle, y_handle, alpha, alg); + bool is_alpha_host_accessible = detail::is_ptr_accessible_on_host(queue, alpha); + check_valid_spsv(__func__, opA, A_view, A_handle, x_handle, y_handle, is_alpha_host_accessible, + alg); auto internal_A_handle = detail::get_internal_handle(A_handle); if (internal_A_handle->all_use_buffer()) { detail::throw_incompatible_container(__func__); @@ -136,8 +142,10 @@ sycl::event internal_spsv(sycl::queue &queue, oneapi::mkl::transpose opA, const oneapi::mkl::sparse::dense_vector_handle_t y_handle, oneapi::mkl::sparse::spsv_alg /*alg*/, oneapi::mkl::sparse::spsv_descr_t /*spsv_descr*/, - const std::vector &dependencies) { - T host_alpha = detail::get_scalar_on_host(queue, static_cast(alpha)); + const std::vector &dependencies, + bool is_alpha_host_accessible) { + T host_alpha = + detail::get_scalar_on_host(queue, static_cast(alpha), is_alpha_host_accessible); auto internal_A_handle = detail::get_internal_handle(A_handle); internal_A_handle->can_be_reset = false; if (internal_A_handle->all_use_buffer()) { @@ -162,8 +170,11 @@ sycl::event spsv(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alp oneapi::mkl::sparse::dense_vector_handle_t y_handle, oneapi::mkl::sparse::spsv_alg alg, oneapi::mkl::sparse::spsv_descr_t spsv_descr, const std::vector &dependencies) { - check_valid_spsv(__func__, queue, opA, A_view, A_handle, x_handle, y_handle, alpha, alg); + bool is_alpha_host_accessible = detail::is_ptr_accessible_on_host(queue, alpha); + check_valid_spsv(__func__, opA, A_view, A_handle, x_handle, y_handle, is_alpha_host_accessible, + alg); auto value_type = detail::get_internal_handle(A_handle)->get_value_type(); DISPATCH_MKL_OPERATION("spsv", value_type, internal_spsv, queue, opA, alpha, A_view, A_handle, - x_handle, y_handle, alg, spsv_descr, dependencies); + x_handle, y_handle, alg, spsv_descr, dependencies, + is_alpha_host_accessible); } From cd1a71ad5681dd51925da5fd18a12ed60277a8c6 Mon Sep 17 00:00:00 2001 From: "romain.biessy" Date: Mon, 29 Jul 2024 18:08:02 +0200 Subject: [PATCH 29/39] Wait before freeing USM pointers --- tests/unit_tests/sparse_blas/source/sparse_spmm_usm.cpp | 2 ++ tests/unit_tests/sparse_blas/source/sparse_spmv_usm.cpp | 2 ++ tests/unit_tests/sparse_blas/source/sparse_spsv_usm.cpp | 2 ++ 3 files changed, 6 insertions(+) diff --git a/tests/unit_tests/sparse_blas/source/sparse_spmm_usm.cpp b/tests/unit_tests/sparse_blas/source/sparse_spmm_usm.cpp index 766915537..5778430a6 100644 --- a/tests/unit_tests/sparse_blas/source/sparse_spmm_usm.cpp +++ b/tests/unit_tests/sparse_blas/source/sparse_spmm_usm.cpp @@ -157,6 +157,8 @@ int test_spmm(sycl::device *dev, sparse_matrix_format_t format, intType nrows_A, a_host.data(), reset_nnz, static_cast(nrows_A)); } if (reset_nnz > nnz) { + // Wait before freeing usm pointers + ev_spmm.wait_and_throw(); ia_usm_uptr = malloc_device_uptr(main_queue, ia_host.size()); ja_usm_uptr = malloc_device_uptr(main_queue, ja_host.size()); a_usm_uptr = malloc_device_uptr(main_queue, a_host.size()); diff --git a/tests/unit_tests/sparse_blas/source/sparse_spmv_usm.cpp b/tests/unit_tests/sparse_blas/source/sparse_spmv_usm.cpp index fdeb57913..ded92a770 100644 --- a/tests/unit_tests/sparse_blas/source/sparse_spmv_usm.cpp +++ b/tests/unit_tests/sparse_blas/source/sparse_spmv_usm.cpp @@ -149,6 +149,8 @@ int test_spmv(sycl::device *dev, sparse_matrix_format_t format, intType nrows_A, a_host.data(), reset_nnz, static_cast(nrows_A)); } if (reset_nnz > nnz) { + // Wait before freeing usm pointers + ev_spmv.wait_and_throw(); ia_usm_uptr = malloc_device_uptr(main_queue, ia_host.size()); ja_usm_uptr = malloc_device_uptr(main_queue, ja_host.size()); a_usm_uptr = malloc_device_uptr(main_queue, a_host.size()); diff --git a/tests/unit_tests/sparse_blas/source/sparse_spsv_usm.cpp b/tests/unit_tests/sparse_blas/source/sparse_spsv_usm.cpp index 1bc025851..3b58db914 100644 --- a/tests/unit_tests/sparse_blas/source/sparse_spsv_usm.cpp +++ b/tests/unit_tests/sparse_blas/source/sparse_spsv_usm.cpp @@ -145,6 +145,8 @@ int test_spsv(sycl::device *dev, sparse_matrix_format_t format, intType m, doubl a_host.data(), reset_nnz, mu); } if (reset_nnz > nnz) { + // Wait before freeing usm pointers + ev_spsv.wait_and_throw(); ia_usm_uptr = malloc_device_uptr(main_queue, ia_host.size()); ja_usm_uptr = malloc_device_uptr(main_queue, ja_host.size()); a_usm_uptr = malloc_device_uptr(main_queue, a_host.size()); From 95a902a3a011f919d5ca0feb82bf7c35522bd38d Mon Sep 17 00:00:00 2001 From: "romain.biessy" Date: Fri, 16 Aug 2024 18:19:27 +0200 Subject: [PATCH 30/39] Move example static_cast --- .../run_time_dispatching/sparse_blas_spmv_usm.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/examples/sparse_blas/run_time_dispatching/sparse_blas_spmv_usm.cpp b/examples/sparse_blas/run_time_dispatching/sparse_blas_spmv_usm.cpp index f93569a92..d87297600 100644 --- a/examples/sparse_blas/run_time_dispatching/sparse_blas_spmv_usm.cpp +++ b/examples/sparse_blas/run_time_dispatching/sparse_blas_spmv_usm.cpp @@ -93,6 +93,7 @@ int run_sparse_matrix_vector_multiply_example(const sycl::device &dev) { std::size_t sizeja = static_cast(27 * nrows); std::size_t sizeia = static_cast(nrows + 1); std::size_t sizevec = static_cast(nrows); + auto sizevec_i64 = static_cast(sizevec); ia = (intType *)sycl::malloc_shared(sizeia * sizeof(intType), main_queue); ja = (intType *)sycl::malloc_shared(sizeja * sizeof(intType), main_queue); @@ -148,10 +149,8 @@ int run_sparse_matrix_vector_multiply_example(const sycl::device &dev) { // Create and initialize dense vector handles oneapi::mkl::sparse::dense_vector_handle_t x_handle = nullptr; oneapi::mkl::sparse::dense_vector_handle_t y_handle = nullptr; - oneapi::mkl::sparse::init_dense_vector(main_queue, &x_handle, - static_cast(sizevec), x); - oneapi::mkl::sparse::init_dense_vector(main_queue, &y_handle, - static_cast(sizevec), y); + oneapi::mkl::sparse::init_dense_vector(main_queue, &x_handle, sizevec_i64, x); + oneapi::mkl::sparse::init_dense_vector(main_queue, &y_handle, sizevec_i64, y); // Create operation descriptor oneapi::mkl::sparse::spmv_descr_t descr = nullptr; From d76ca037dca01cf0c53959d3685665e928f08cf9 Mon Sep 17 00:00:00 2001 From: "romain.biessy" Date: Tue, 20 Aug 2024 10:32:51 +0200 Subject: [PATCH 31/39] Make buffer optimize functions asynchronous --- src/sparse_blas/backends/mkl_common/mkl_spmv.cxx | 7 ++----- src/sparse_blas/backends/mkl_common/mkl_spsv.cxx | 4 +--- 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/src/sparse_blas/backends/mkl_common/mkl_spmv.cxx b/src/sparse_blas/backends/mkl_common/mkl_spmv.cxx index 930e1ec87..13fa071af 100644 --- a/src/sparse_blas/backends/mkl_common/mkl_spmv.cxx +++ b/src/sparse_blas/backends/mkl_common/mkl_spmv.cxx @@ -106,10 +106,9 @@ void spmv_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, const void *a if (alg == oneapi::mkl::sparse::spmv_alg::no_optimize_alg) { return; } - sycl::event event; internal_A_handle->can_be_reset = false; if (A_view.type_view == matrix_descr::triangular) { - event = oneapi::mkl::sparse::optimize_trmv(queue, A_view.uplo_view, opA, A_view.diag_view, + oneapi::mkl::sparse::optimize_trmv(queue, A_view.uplo_view, opA, A_view.diag_view, internal_A_handle->backend_handle); } else if (A_view.type_view == matrix_descr::symmetric || @@ -118,10 +117,8 @@ void spmv_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, const void *a return; } else { - event = oneapi::mkl::sparse::optimize_gemv(queue, opA, internal_A_handle->backend_handle); + oneapi::mkl::sparse::optimize_gemv(queue, opA, internal_A_handle->backend_handle); } - // spmv_optimize is not asynchronous for buffers as the backend optimize functions don't take buffers. - event.wait_and_throw(); } sycl::event spmv_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, diff --git a/src/sparse_blas/backends/mkl_common/mkl_spsv.cxx b/src/sparse_blas/backends/mkl_common/mkl_spsv.cxx index 849919f12..55d8a47fb 100644 --- a/src/sparse_blas/backends/mkl_common/mkl_spsv.cxx +++ b/src/sparse_blas/backends/mkl_common/mkl_spsv.cxx @@ -105,10 +105,8 @@ void spsv_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, const void *a return; } internal_A_handle->can_be_reset = false; - auto event = oneapi::mkl::sparse::optimize_trsv(queue, A_view.uplo_view, opA, A_view.diag_view, + oneapi::mkl::sparse::optimize_trsv(queue, A_view.uplo_view, opA, A_view.diag_view, internal_A_handle->backend_handle); - // spsv_optimize is not asynchronous for buffers as the backend optimize functions don't take buffers. - event.wait_and_throw(); } sycl::event spsv_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, From 0c34cfcf7c98982733126dd6b79dacd74b61f9dc Mon Sep 17 00:00:00 2001 From: "romain.biessy" Date: Fri, 16 Aug 2024 18:54:57 +0200 Subject: [PATCH 32/39] Remove fill_buffer_to_0 --- tests/unit_tests/sparse_blas/include/test_common.hpp | 9 --------- .../unit_tests/sparse_blas/source/sparse_spmm_buffer.cpp | 2 +- .../unit_tests/sparse_blas/source/sparse_spmv_buffer.cpp | 2 +- .../unit_tests/sparse_blas/source/sparse_spsv_buffer.cpp | 2 +- 4 files changed, 3 insertions(+), 12 deletions(-) diff --git a/tests/unit_tests/sparse_blas/include/test_common.hpp b/tests/unit_tests/sparse_blas/include/test_common.hpp index d579ff5fb..725929476 100644 --- a/tests/unit_tests/sparse_blas/include/test_common.hpp +++ b/tests/unit_tests/sparse_blas/include/test_common.hpp @@ -113,15 +113,6 @@ void copy_host_to_buffer(sycl::queue queue, const std::vector &src, sycl::buf }); } -template -void fill_buffer_to_0(sycl::queue queue, sycl::buffer dst) { - queue.submit([&](sycl::handler &cgh) { - auto dst_acc = dst.template get_access( - cgh, sycl::range<1>(dst.size())); - cgh.fill(dst_acc, T(0)); - }); -} - template std::pair swap_if_cond(bool swap, XT x, YT y) { if (swap) { diff --git a/tests/unit_tests/sparse_blas/source/sparse_spmm_buffer.cpp b/tests/unit_tests/sparse_blas/source/sparse_spmm_buffer.cpp index 933e27c76..b6f9e1185 100644 --- a/tests/unit_tests/sparse_blas/source/sparse_spmm_buffer.cpp +++ b/tests/unit_tests/sparse_blas/source/sparse_spmm_buffer.cpp @@ -134,7 +134,7 @@ int test_spmm(sycl::device *dev, sparse_matrix_format_t format, intType nrows_A, copy_host_to_buffer(main_queue, a_host, a_buf); } nnz = reset_nnz; - fill_buffer_to_0(main_queue, c_buf); + copy_host_to_buffer(main_queue, c_ref_host, c_buf); set_matrix_data(main_queue, format, A_handle, nrows_A, ncols_A, nnz, index, ia_buf, ja_buf, a_buf); diff --git a/tests/unit_tests/sparse_blas/source/sparse_spmv_buffer.cpp b/tests/unit_tests/sparse_blas/source/sparse_spmv_buffer.cpp index 09f16b335..3d99f9e94 100644 --- a/tests/unit_tests/sparse_blas/source/sparse_spmv_buffer.cpp +++ b/tests/unit_tests/sparse_blas/source/sparse_spmv_buffer.cpp @@ -123,7 +123,7 @@ int test_spmv(sycl::device *dev, sparse_matrix_format_t format, intType nrows_A, copy_host_to_buffer(main_queue, ja_host, ja_buf); copy_host_to_buffer(main_queue, a_host, a_buf); } - fill_buffer_to_0(main_queue, y_buf); + copy_host_to_buffer(main_queue, y_ref_host, y_buf); nnz = reset_nnz; set_matrix_data(main_queue, format, A_handle, nrows_A, ncols_A, nnz, index, ia_buf, ja_buf, a_buf); diff --git a/tests/unit_tests/sparse_blas/source/sparse_spsv_buffer.cpp b/tests/unit_tests/sparse_blas/source/sparse_spsv_buffer.cpp index 13e6d5914..6b276dff4 100644 --- a/tests/unit_tests/sparse_blas/source/sparse_spsv_buffer.cpp +++ b/tests/unit_tests/sparse_blas/source/sparse_spsv_buffer.cpp @@ -123,7 +123,7 @@ int test_spsv(sycl::device *dev, sparse_matrix_format_t format, intType m, doubl copy_host_to_buffer(main_queue, ja_host, ja_buf); copy_host_to_buffer(main_queue, a_host, a_buf); } - fill_buffer_to_0(main_queue, y_buf); + copy_host_to_buffer(main_queue, y_ref_host, y_buf); nnz = reset_nnz; set_matrix_data(main_queue, format, A_handle, m, m, nnz, index, ia_buf, ja_buf, a_buf); From aff9ee2c170bdaf1c57d2ffe31ad7bd1d05a85c0 Mon Sep 17 00:00:00 2001 From: "romain.biessy" Date: Tue, 20 Aug 2024 14:29:20 +0200 Subject: [PATCH 33/39] format with clang-format-9 --- .../mkl/sparse_blas/detail/helper_types.hpp | 2 +- .../backends/mkl_common/mkl_handles.cxx | 20 +++++++++---------- .../backends/mkl_common/mkl_spmv.cxx | 2 +- .../backends/mkl_common/mkl_spsv.cxx | 7 +++---- src/sparse_blas/function_table.hpp | 20 +++++++++---------- .../sparse_blas/include/test_common.hpp | 6 +++--- 6 files changed, 28 insertions(+), 29 deletions(-) diff --git a/include/oneapi/mkl/sparse_blas/detail/helper_types.hpp b/include/oneapi/mkl/sparse_blas/detail/helper_types.hpp index 75ee22211..ace216f00 100644 --- a/include/oneapi/mkl/sparse_blas/detail/helper_types.hpp +++ b/include/oneapi/mkl/sparse_blas/detail/helper_types.hpp @@ -40,7 +40,7 @@ inline constexpr bool is_int_supported_v = template inline constexpr bool are_fp_int_supported_v = - is_fp_supported_v && is_int_supported_v; + is_fp_supported_v&& is_int_supported_v; } // namespace detail } // namespace sparse diff --git a/src/sparse_blas/backends/mkl_common/mkl_handles.cxx b/src/sparse_blas/backends/mkl_common/mkl_handles.cxx index 49b2591fd..3ae84ca64 100644 --- a/src/sparse_blas/backends/mkl_common/mkl_handles.cxx +++ b/src/sparse_blas/backends/mkl_common/mkl_handles.cxx @@ -72,13 +72,13 @@ void set_dense_vector_data(sycl::queue & /*queue*/, std::int64_t size, sycl::buffer val); \ template void init_dense_vector( \ sycl::queue & queue, oneapi::mkl::sparse::dense_vector_handle_t * p_dvhandle, \ - std::int64_t size, FP_TYPE *val); \ + std::int64_t size, FP_TYPE * val); \ template void set_dense_vector_data( \ sycl::queue & queue, oneapi::mkl::sparse::dense_vector_handle_t dvhandle, \ std::int64_t size, sycl::buffer val); \ template void set_dense_vector_data( \ sycl::queue & queue, oneapi::mkl::sparse::dense_vector_handle_t dvhandle, \ - std::int64_t size, FP_TYPE *val) + std::int64_t size, FP_TYPE * val) FOR_EACH_FP_TYPE(INSTANTIATE_DENSE_VECTOR_FUNCS); #undef INSTANTIATE_DENSE_VECTOR_FUNCS @@ -141,7 +141,7 @@ void set_dense_matrix_data(sycl::queue & /*queue*/, template void init_dense_matrix( \ sycl::queue & queue, oneapi::mkl::sparse::dense_matrix_handle_t * p_dmhandle, \ std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, \ - oneapi::mkl::layout dense_layout, FP_TYPE *val); \ + oneapi::mkl::layout dense_layout, FP_TYPE * val); \ template void set_dense_matrix_data( \ sycl::queue & queue, oneapi::mkl::sparse::dense_matrix_handle_t dmhandle, \ std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, \ @@ -149,7 +149,7 @@ void set_dense_matrix_data(sycl::queue & /*queue*/, template void set_dense_matrix_data( \ sycl::queue & queue, oneapi::mkl::sparse::dense_matrix_handle_t dmhandle, \ std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, \ - oneapi::mkl::layout dense_layout, FP_TYPE *val) + oneapi::mkl::layout dense_layout, FP_TYPE * val) FOR_EACH_FP_TYPE(INSTANTIATE_DENSE_MATRIX_FUNCS); #undef INSTANTIATE_DENSE_MATRIX_FUNCS @@ -255,7 +255,7 @@ void set_coo_matrix_data(sycl::queue &queue, oneapi::mkl::sparse::matrix_handle_ template void init_coo_matrix( \ sycl::queue & queue, oneapi::mkl::sparse::matrix_handle_t * p_smhandle, \ std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, \ - oneapi::mkl::index_base index, INT_TYPE *row_ind, INT_TYPE *col_ind, FP_TYPE *val); \ + oneapi::mkl::index_base index, INT_TYPE * row_ind, INT_TYPE * col_ind, FP_TYPE * val); \ template void set_coo_matrix_data( \ sycl::queue & queue, oneapi::mkl::sparse::matrix_handle_t smhandle, std::int64_t num_rows, \ std::int64_t num_cols, std::int64_t nnz, oneapi::mkl::index_base index, \ @@ -263,8 +263,8 @@ void set_coo_matrix_data(sycl::queue &queue, oneapi::mkl::sparse::matrix_handle_ sycl::buffer val); \ template void set_coo_matrix_data( \ sycl::queue & queue, oneapi::mkl::sparse::matrix_handle_t smhandle, std::int64_t num_rows, \ - std::int64_t num_cols, std::int64_t nnz, oneapi::mkl::index_base index, INT_TYPE *row_ind, \ - INT_TYPE *col_ind, FP_TYPE *val) + std::int64_t num_cols, std::int64_t nnz, oneapi::mkl::index_base index, \ + INT_TYPE * row_ind, INT_TYPE * col_ind, FP_TYPE * val) FOR_EACH_FP_AND_INT_TYPE(INSTANTIATE_COO_MATRIX_FUNCS); #undef INSTANTIATE_COO_MATRIX_FUNCS @@ -349,7 +349,7 @@ void set_csr_matrix_data(sycl::queue &queue, oneapi::mkl::sparse::matrix_handle_ template void init_csr_matrix( \ sycl::queue & queue, oneapi::mkl::sparse::matrix_handle_t * p_smhandle, \ std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, \ - oneapi::mkl::index_base index, INT_TYPE *row_ptr, INT_TYPE *col_ind, FP_TYPE *val); \ + oneapi::mkl::index_base index, INT_TYPE * row_ptr, INT_TYPE * col_ind, FP_TYPE * val); \ template void set_csr_matrix_data( \ sycl::queue & queue, oneapi::mkl::sparse::matrix_handle_t smhandle, std::int64_t num_rows, \ std::int64_t num_cols, std::int64_t nnz, oneapi::mkl::index_base index, \ @@ -357,8 +357,8 @@ void set_csr_matrix_data(sycl::queue &queue, oneapi::mkl::sparse::matrix_handle_ sycl::buffer val); \ template void set_csr_matrix_data( \ sycl::queue & queue, oneapi::mkl::sparse::matrix_handle_t smhandle, std::int64_t num_rows, \ - std::int64_t num_cols, std::int64_t nnz, oneapi::mkl::index_base index, INT_TYPE *row_ptr, \ - INT_TYPE *col_ind, FP_TYPE *val) + std::int64_t num_cols, std::int64_t nnz, oneapi::mkl::index_base index, \ + INT_TYPE * row_ptr, INT_TYPE * col_ind, FP_TYPE * val) FOR_EACH_FP_AND_INT_TYPE(INSTANTIATE_CSR_MATRIX_FUNCS); #undef INSTANTIATE_CSR_MATRIX_FUNCS diff --git a/src/sparse_blas/backends/mkl_common/mkl_spmv.cxx b/src/sparse_blas/backends/mkl_common/mkl_spmv.cxx index 13fa071af..d97e7c59f 100644 --- a/src/sparse_blas/backends/mkl_common/mkl_spmv.cxx +++ b/src/sparse_blas/backends/mkl_common/mkl_spmv.cxx @@ -109,7 +109,7 @@ void spmv_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, const void *a internal_A_handle->can_be_reset = false; if (A_view.type_view == matrix_descr::triangular) { oneapi::mkl::sparse::optimize_trmv(queue, A_view.uplo_view, opA, A_view.diag_view, - internal_A_handle->backend_handle); + internal_A_handle->backend_handle); } else if (A_view.type_view == matrix_descr::symmetric || A_view.type_view == matrix_descr::hermitian) { diff --git a/src/sparse_blas/backends/mkl_common/mkl_spsv.cxx b/src/sparse_blas/backends/mkl_common/mkl_spsv.cxx index 55d8a47fb..718ab0f19 100644 --- a/src/sparse_blas/backends/mkl_common/mkl_spsv.cxx +++ b/src/sparse_blas/backends/mkl_common/mkl_spsv.cxx @@ -52,9 +52,8 @@ void check_valid_spsv(const std::string &function_name, oneapi::mkl::transpose o if ((data_type == detail::data_type::complex_fp32 || data_type == detail::data_type::complex_fp64) && opA == oneapi::mkl::transpose::conjtrans) { - throw mkl::unimplemented( - "sparse_blas", function_name, - "The backend does not support spsv using conjtrans."); + throw mkl::unimplemented("sparse_blas", function_name, + "The backend does not support spsv using conjtrans."); } #else (void)opA; @@ -106,7 +105,7 @@ void spsv_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, const void *a } internal_A_handle->can_be_reset = false; oneapi::mkl::sparse::optimize_trsv(queue, A_view.uplo_view, opA, A_view.diag_view, - internal_A_handle->backend_handle); + internal_A_handle->backend_handle); } sycl::event spsv_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, diff --git a/src/sparse_blas/function_table.hpp b/src/sparse_blas/function_table.hpp index 2fc5d4d28..d1e3d8189 100644 --- a/src/sparse_blas/function_table.hpp +++ b/src/sparse_blas/function_table.hpp @@ -30,13 +30,13 @@ std::int64_t size, sycl::buffer val); \ void (*init_dense_vector_usm##FP_SUFFIX)( \ sycl::queue & queue, oneapi::mkl::sparse::dense_vector_handle_t * p_dvhandle, \ - std::int64_t size, FP_TYPE *val); \ + std::int64_t size, FP_TYPE * val); \ void (*set_dense_vector_data_buffer##FP_SUFFIX)( \ sycl::queue & queue, oneapi::mkl::sparse::dense_vector_handle_t dvhandle, \ std::int64_t size, sycl::buffer val); \ void (*set_dense_vector_data_usm##FP_SUFFIX)( \ sycl::queue & queue, oneapi::mkl::sparse::dense_vector_handle_t dvhandle, \ - std::int64_t size, FP_TYPE *val) + std::int64_t size, FP_TYPE * val) // Dense matrix #define DEFINE_DENSE_MATRIX_FUNCS(FP_TYPE, FP_SUFFIX) \ @@ -47,7 +47,7 @@ void (*init_dense_matrix_usm##FP_SUFFIX)( \ sycl::queue & queue, oneapi::mkl::sparse::dense_matrix_handle_t * p_dmhandle, \ std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, \ - oneapi::mkl::layout dense_layout, FP_TYPE *val); \ + oneapi::mkl::layout dense_layout, FP_TYPE * val); \ void (*set_dense_matrix_data_buffer##FP_SUFFIX)( \ sycl::queue & queue, oneapi::mkl::sparse::dense_matrix_handle_t dmhandle, \ std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, \ @@ -55,7 +55,7 @@ void (*set_dense_matrix_data_usm##FP_SUFFIX)( \ sycl::queue & queue, oneapi::mkl::sparse::dense_matrix_handle_t dmhandle, \ std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, \ - oneapi::mkl::layout dense_layout, FP_TYPE *val) + oneapi::mkl::layout dense_layout, FP_TYPE * val) // COO matrix #define DEFINE_COO_MATRIX_FUNCS(FP_TYPE, FP_SUFFIX, INT_TYPE, INT_SUFFIX) \ @@ -67,7 +67,7 @@ void (*init_coo_matrix_usm##FP_SUFFIX##INT_SUFFIX)( \ sycl::queue & queue, oneapi::mkl::sparse::matrix_handle_t * p_smhandle, \ std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, \ - oneapi::mkl::index_base index, INT_TYPE *row_ind, INT_TYPE *col_ind, FP_TYPE *val); \ + oneapi::mkl::index_base index, INT_TYPE * row_ind, INT_TYPE * col_ind, FP_TYPE * val); \ void (*set_coo_matrix_data_buffer##FP_SUFFIX##INT_SUFFIX)( \ sycl::queue & queue, oneapi::mkl::sparse::matrix_handle_t smhandle, std::int64_t num_rows, \ std::int64_t num_cols, std::int64_t nnz, oneapi::mkl::index_base index, \ @@ -75,8 +75,8 @@ sycl::buffer val); \ void (*set_coo_matrix_data_usm##FP_SUFFIX##INT_SUFFIX)( \ sycl::queue & queue, oneapi::mkl::sparse::matrix_handle_t smhandle, std::int64_t num_rows, \ - std::int64_t num_cols, std::int64_t nnz, oneapi::mkl::index_base index, INT_TYPE *row_ind, \ - INT_TYPE *col_ind, FP_TYPE *val) + std::int64_t num_cols, std::int64_t nnz, oneapi::mkl::index_base index, \ + INT_TYPE * row_ind, INT_TYPE * col_ind, FP_TYPE * val) // CSR matrix #define DEFINE_CSR_MATRIX_FUNCS(FP_TYPE, FP_SUFFIX, INT_TYPE, INT_SUFFIX) \ @@ -88,7 +88,7 @@ void (*init_csr_matrix_usm##FP_SUFFIX##INT_SUFFIX)( \ sycl::queue & queue, oneapi::mkl::sparse::matrix_handle_t * p_smhandle, \ std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, \ - oneapi::mkl::index_base index, INT_TYPE *row_ptr, INT_TYPE *col_ind, FP_TYPE *val); \ + oneapi::mkl::index_base index, INT_TYPE * row_ptr, INT_TYPE * col_ind, FP_TYPE * val); \ void (*set_csr_matrix_data_buffer##FP_SUFFIX##INT_SUFFIX)( \ sycl::queue & queue, oneapi::mkl::sparse::matrix_handle_t smhandle, std::int64_t num_rows, \ std::int64_t num_cols, std::int64_t nnz, oneapi::mkl::index_base index, \ @@ -96,8 +96,8 @@ sycl::buffer val); \ void (*set_csr_matrix_data_usm##FP_SUFFIX##INT_SUFFIX)( \ sycl::queue & queue, oneapi::mkl::sparse::matrix_handle_t smhandle, std::int64_t num_rows, \ - std::int64_t num_cols, std::int64_t nnz, oneapi::mkl::index_base index, INT_TYPE *row_ptr, \ - INT_TYPE *col_ind, FP_TYPE *val) + std::int64_t num_cols, std::int64_t nnz, oneapi::mkl::index_base index, \ + INT_TYPE * row_ptr, INT_TYPE * col_ind, FP_TYPE * val) typedef struct { int version; diff --git a/tests/unit_tests/sparse_blas/include/test_common.hpp b/tests/unit_tests/sparse_blas/include/test_common.hpp index 725929476..c11255a9a 100644 --- a/tests/unit_tests/sparse_blas/include/test_common.hpp +++ b/tests/unit_tests/sparse_blas/include/test_common.hpp @@ -407,7 +407,7 @@ void set_matrix_data(sycl::queue &queue, sparse_matrix_format_t format, template inline void free_handles(sycl::queue &queue, const std::vector dependencies, - HandlesT &&...handles) { + HandlesT &&... handles) { // Fold expression so that handles expands to each value one after the other. ( [&] { @@ -436,12 +436,12 @@ inline void free_handles(sycl::queue &queue, const std::vector depe } template -inline void free_handles(sycl::queue &queue, HandlesT &&...handles) { +inline void free_handles(sycl::queue &queue, HandlesT &&... handles) { free_handles(queue, {}, handles...); } template -inline void wait_and_free_handles(sycl::queue &queue, HandlesT &&...handles) { +inline void wait_and_free_handles(sycl::queue &queue, HandlesT &&... handles) { queue.wait(); free_handles(queue, handles...); } From c7a4420b1494eed0068b9cf8e38dd546cc404120 Mon Sep 17 00:00:00 2001 From: "romain.biessy" Date: Mon, 26 Aug 2024 15:27:45 +0200 Subject: [PATCH 34/39] Throw unsupported for spmv using symmetric or hermitian + conjtrans --- docs/domains/sparse_linear_algebra.rst | 4 ++ .../backends/mkl_common/mkl_spmv.cxx | 4 +- .../sparse_blas/include/test_spmv.hpp | 63 +++++++++---------- 3 files changed, 36 insertions(+), 35 deletions(-) diff --git a/docs/domains/sparse_linear_algebra.rst b/docs/domains/sparse_linear_algebra.rst index 41b7c9d9e..eab5afd56 100644 --- a/docs/domains/sparse_linear_algebra.rst +++ b/docs/domains/sparse_linear_algebra.rst @@ -29,6 +29,10 @@ Currently known limitations: ``oneapi::mkl::transpose::conjtrans`` and has the ``oneapi::mkl::sparse::matrix_property::symmetric`` property will throw an ``oneapi::mkl::unimplemented`` exception. +- Using ``spmv`` with a sparse matrix that is + ``oneapi::mkl::transpose::conjtrans`` with a ``type_view`` + ``matrix_descr::symmetric`` or ``matrix_descr::hermitian`` will throw an + ``oneapi::mkl::unimplemented`` exception. - Using ``spsv`` on Intel GPU with a sparse matrix that is ``oneapi::mkl::transpose::conjtrans`` and will throw an ``oneapi::mkl::unimplemented`` exception. diff --git a/src/sparse_blas/backends/mkl_common/mkl_spmv.cxx b/src/sparse_blas/backends/mkl_common/mkl_spmv.cxx index d97e7c59f..d5da0c433 100644 --- a/src/sparse_blas/backends/mkl_common/mkl_spmv.cxx +++ b/src/sparse_blas/backends/mkl_common/mkl_spmv.cxx @@ -65,9 +65,9 @@ void check_valid_spmv(const std::string &function_name, oneapi::mkl::transpose o if ((A_view.type_view == oneapi::mkl::sparse::matrix_descr::symmetric || A_view.type_view == oneapi::mkl::sparse::matrix_descr::hermitian) && opA == oneapi::mkl::transpose::conjtrans) { - throw mkl::invalid_argument( + throw mkl::unimplemented( "sparse_blas", function_name, - "Symmetric or Hermitian matrix cannot be conjugated with `conjtrans`."); + "The backend does not support Symmetric or Hermitian matrix with `conjtrans`."); } } diff --git a/tests/unit_tests/sparse_blas/include/test_spmv.hpp b/tests/unit_tests/sparse_blas/include/test_spmv.hpp index 6ee256adb..5ff5a57db 100644 --- a/tests/unit_tests/sparse_blas/include/test_spmv.hpp +++ b/tests/unit_tests/sparse_blas/include/test_spmv.hpp @@ -152,39 +152,36 @@ void test_helper_with_format_with_transpose( fp_one, fp_zero, default_alg, triangular_unit_A_view, no_properties, no_reset_data, no_scalars_on_device), num_passed, num_skipped); - if (transpose_val != oneapi::mkl::transpose::conjtrans) { - // Do not test conjtrans with symmetric or hermitian views as no backend supports it. - // Lower symmetric - oneapi::mkl::sparse::matrix_view symmetric_view( - oneapi::mkl::sparse::matrix_descr::symmetric); - EXPECT_TRUE_OR_FUTURE_SKIP( - test_functor_i32(dev, format, nrows_A, ncols_A, density_A_matrix, index_zero, - transpose_val, fp_one, fp_zero, default_alg, symmetric_view, - no_properties, no_reset_data, no_scalars_on_device), - num_passed, num_skipped); - // Upper symmetric - symmetric_view.uplo_view = oneapi::mkl::uplo::upper; - EXPECT_TRUE_OR_FUTURE_SKIP( - test_functor_i32(dev, format, nrows_A, ncols_A, density_A_matrix, index_zero, - transpose_val, fp_one, fp_zero, default_alg, symmetric_view, - no_properties, no_reset_data, no_scalars_on_device), - num_passed, num_skipped); - // Lower hermitian - oneapi::mkl::sparse::matrix_view hermitian_view( - oneapi::mkl::sparse::matrix_descr::hermitian); - EXPECT_TRUE_OR_FUTURE_SKIP( - test_functor_i32(dev, format, nrows_A, ncols_A, density_A_matrix, index_zero, - transpose_val, fp_one, fp_zero, default_alg, hermitian_view, - no_properties, no_reset_data, no_scalars_on_device), - num_passed, num_skipped); - // Upper hermitian - hermitian_view.uplo_view = oneapi::mkl::uplo::upper; - EXPECT_TRUE_OR_FUTURE_SKIP( - test_functor_i32(dev, format, nrows_A, ncols_A, density_A_matrix, index_zero, - transpose_val, fp_one, fp_zero, default_alg, hermitian_view, - no_properties, no_reset_data, no_scalars_on_device), - num_passed, num_skipped); - } + // Lower symmetric + oneapi::mkl::sparse::matrix_view symmetric_view( + oneapi::mkl::sparse::matrix_descr::symmetric); + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i32(dev, format, nrows_A, ncols_A, density_A_matrix, index_zero, + transpose_val, fp_one, fp_zero, default_alg, symmetric_view, + no_properties, no_reset_data, no_scalars_on_device), + num_passed, num_skipped); + // Upper symmetric + symmetric_view.uplo_view = oneapi::mkl::uplo::upper; + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i32(dev, format, nrows_A, ncols_A, density_A_matrix, index_zero, + transpose_val, fp_one, fp_zero, default_alg, symmetric_view, + no_properties, no_reset_data, no_scalars_on_device), + num_passed, num_skipped); + // Lower hermitian + oneapi::mkl::sparse::matrix_view hermitian_view( + oneapi::mkl::sparse::matrix_descr::hermitian); + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i32(dev, format, nrows_A, ncols_A, density_A_matrix, index_zero, + transpose_val, fp_one, fp_zero, default_alg, hermitian_view, + no_properties, no_reset_data, no_scalars_on_device), + num_passed, num_skipped); + // Upper hermitian + hermitian_view.uplo_view = oneapi::mkl::uplo::upper; + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i32(dev, format, nrows_A, ncols_A, density_A_matrix, index_zero, + transpose_val, fp_one, fp_zero, default_alg, hermitian_view, + no_properties, no_reset_data, no_scalars_on_device), + num_passed, num_skipped); // Test other algorithms for (auto alg : non_default_algorithms) { EXPECT_TRUE_OR_FUTURE_SKIP( From 5f8e1838b612370e02e4dda55e43d938be731c6c Mon Sep 17 00:00:00 2001 From: "romain.biessy" Date: Mon, 26 Aug 2024 18:42:20 +0200 Subject: [PATCH 35/39] Add checks that buffer_size and optimize functions are called before when possible --- .../backends/mkl_common/mkl_helper.hpp | 9 ++ .../backends/mkl_common/mkl_spmm.cxx | 91 +++++++++++++++---- .../backends/mkl_common/mkl_spmv.cxx | 86 ++++++++++++++---- .../backends/mkl_common/mkl_spsv.cxx | 80 ++++++++++++---- .../backends/mklcpu/mklcpu_operations.cpp | 7 +- .../backends/mklgpu/mklgpu_operations.cpp | 7 +- src/sparse_blas/matrix_view_comparison.hpp | 36 ++++++++ 7 files changed, 256 insertions(+), 60 deletions(-) create mode 100644 src/sparse_blas/matrix_view_comparison.hpp diff --git a/src/sparse_blas/backends/mkl_common/mkl_helper.hpp b/src/sparse_blas/backends/mkl_common/mkl_helper.hpp index c76af5cb6..ca15c5b4f 100644 --- a/src/sparse_blas/backends/mkl_common/mkl_helper.hpp +++ b/src/sparse_blas/backends/mkl_common/mkl_helper.hpp @@ -97,6 +97,15 @@ inline sycl::event collapse_dependencies(sycl::queue &queue, "Internal error: unsupported type " + data_type_to_str(value_type)); \ } +#define CHECK_DESCR_MATCH(descr, argument, optimize_func_name) \ + do { \ + if (descr->last_optimized_##argument != argument) { \ + throw mkl::invalid_argument( \ + "sparse_blas", __func__, \ + #argument " argument must match with the previous call to " #optimize_func_name); \ + } \ + } while (0) + } // namespace oneapi::mkl::sparse::detail #endif // _ONEMKL_SRC_SPARSE_BLAS_BACKENDS_MKL_COMMON_MKL_HELPER_HPP_ diff --git a/src/sparse_blas/backends/mkl_common/mkl_spmm.cxx b/src/sparse_blas/backends/mkl_common/mkl_spmm.cxx index 3c2a9f161..eb1b45ebf 100644 --- a/src/sparse_blas/backends/mkl_common/mkl_spmm.cxx +++ b/src/sparse_blas/backends/mkl_common/mkl_spmm.cxx @@ -17,16 +17,31 @@ * **************************************************************************/ -// The operation descriptor is not needed as long as the backend does not have an equivalent type and does not support external workspace. -using spmm_descr = void *; +namespace oneapi::mkl::sparse { + +struct spmm_descr { + bool buffer_size_called = false; + bool optimized_called = false; + oneapi::mkl::transpose last_optimized_opA; + oneapi::mkl::transpose last_optimized_opB; + oneapi::mkl::sparse::matrix_view last_optimized_A_view; + oneapi::mkl::sparse::matrix_handle_t last_optimized_A_handle; + oneapi::mkl::sparse::dense_matrix_handle_t last_optimized_B_handle; + oneapi::mkl::sparse::dense_matrix_handle_t last_optimized_C_handle; + oneapi::mkl::sparse::spmm_alg last_optimized_alg; +}; + +} // namespace oneapi::mkl::sparse + +namespace oneapi::mkl::sparse::BACKEND { void init_spmm_descr(sycl::queue & /*queue*/, oneapi::mkl::sparse::spmm_descr_t *p_spmm_descr) { - *p_spmm_descr = nullptr; + *p_spmm_descr = new spmm_descr(); } -sycl::event release_spmm_descr(sycl::queue &queue, oneapi::mkl::sparse::spmm_descr_t /*spmm_descr*/, +sycl::event release_spmm_descr(sycl::queue &queue, oneapi::mkl::sparse::spmm_descr_t spmm_descr, const std::vector &dependencies) { - return detail::collapse_dependencies(queue, dependencies); + return detail::submit_release(queue, spmm_descr, dependencies); } void check_valid_spmm(const std::string &function_name, oneapi::mkl::transpose opA, @@ -87,28 +102,50 @@ void spmm_buffer_size(sycl::queue &queue, oneapi::mkl::transpose opA, oneapi::mkl::sparse::dense_matrix_handle_t B_handle, const void *beta, oneapi::mkl::sparse::dense_matrix_handle_t C_handle, oneapi::mkl::sparse::spmm_alg /*alg*/, - oneapi::mkl::sparse::spmm_descr_t /*spmm_descr*/, - std::size_t &temp_buffer_size) { + oneapi::mkl::sparse::spmm_descr_t spmm_descr, std::size_t &temp_buffer_size) { // TODO: Add support for external workspace once the close-source oneMKL backend supports it. bool is_alpha_host_accessible = detail::is_ptr_accessible_on_host(queue, alpha); bool is_beta_host_accessible = detail::is_ptr_accessible_on_host(queue, beta); check_valid_spmm(__func__, opA, A_view, A_handle, B_handle, C_handle, is_alpha_host_accessible, is_beta_host_accessible); temp_buffer_size = 0; + spmm_descr->buffer_size_called = true; +} + +inline void common_spmm_optimize( + sycl::queue &queue, oneapi::mkl::transpose opA, oneapi::mkl::transpose opB, const void *alpha, + oneapi::mkl::sparse::matrix_view A_view, oneapi::mkl::sparse::matrix_handle_t A_handle, + oneapi::mkl::sparse::dense_matrix_handle_t B_handle, const void *beta, + oneapi::mkl::sparse::dense_matrix_handle_t C_handle, oneapi::mkl::sparse::spmm_alg alg, + oneapi::mkl::sparse::spmm_descr_t spmm_descr) { + bool is_alpha_host_accessible = detail::is_ptr_accessible_on_host(queue, alpha); + bool is_beta_host_accessible = detail::is_ptr_accessible_on_host(queue, beta); + check_valid_spmm(__func__, opA, A_view, A_handle, B_handle, C_handle, is_alpha_host_accessible, + is_beta_host_accessible); + if (!spmm_descr->buffer_size_called) { + throw mkl::uninitialized( + "sparse_blas", __func__, + "spmm_buffer_size must be called with the same arguments before spmm_optimize."); + } + spmm_descr->optimized_called = true; + spmm_descr->last_optimized_opA = opA; + spmm_descr->last_optimized_opB = opB; + spmm_descr->last_optimized_A_view = A_view; + spmm_descr->last_optimized_A_handle = A_handle; + spmm_descr->last_optimized_B_handle = B_handle; + spmm_descr->last_optimized_C_handle = C_handle; + spmm_descr->last_optimized_alg = alg; } -void spmm_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, oneapi::mkl::transpose /*opB*/, +void spmm_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, oneapi::mkl::transpose opB, const void *alpha, oneapi::mkl::sparse::matrix_view A_view, oneapi::mkl::sparse::matrix_handle_t A_handle, oneapi::mkl::sparse::dense_matrix_handle_t B_handle, const void *beta, oneapi::mkl::sparse::dense_matrix_handle_t C_handle, - oneapi::mkl::sparse::spmm_alg alg, - oneapi::mkl::sparse::spmm_descr_t /*spmm_descr*/, + oneapi::mkl::sparse::spmm_alg alg, oneapi::mkl::sparse::spmm_descr_t spmm_descr, sycl::buffer /*workspace*/) { - bool is_alpha_host_accessible = detail::is_ptr_accessible_on_host(queue, alpha); - bool is_beta_host_accessible = detail::is_ptr_accessible_on_host(queue, beta); - check_valid_spmm(__func__, opA, A_view, A_handle, B_handle, C_handle, is_alpha_host_accessible, - is_beta_host_accessible); + common_spmm_optimize(queue, opA, opB, alpha, A_view, A_handle, B_handle, beta, C_handle, alg, + spmm_descr); auto internal_A_handle = detail::get_internal_handle(A_handle); if (!internal_A_handle->all_use_buffer()) { detail::throw_incompatible_container(__func__); @@ -121,18 +158,16 @@ void spmm_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, oneapi::mkl:: } sycl::event spmm_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, - oneapi::mkl::transpose /*opB*/, const void *alpha, + oneapi::mkl::transpose opB, const void *alpha, oneapi::mkl::sparse::matrix_view A_view, oneapi::mkl::sparse::matrix_handle_t A_handle, oneapi::mkl::sparse::dense_matrix_handle_t B_handle, const void *beta, oneapi::mkl::sparse::dense_matrix_handle_t C_handle, oneapi::mkl::sparse::spmm_alg alg, - oneapi::mkl::sparse::spmm_descr_t /*spmm_descr*/, void * /*workspace*/, + oneapi::mkl::sparse::spmm_descr_t spmm_descr, void * /*workspace*/, const std::vector &dependencies) { - bool is_alpha_host_accessible = detail::is_ptr_accessible_on_host(queue, alpha); - bool is_beta_host_accessible = detail::is_ptr_accessible_on_host(queue, beta); - check_valid_spmm(__func__, opA, A_view, A_handle, B_handle, C_handle, is_alpha_host_accessible, - is_beta_host_accessible); + common_spmm_optimize(queue, opA, opB, alpha, A_view, A_handle, B_handle, beta, C_handle, alg, + spmm_descr); auto internal_A_handle = detail::get_internal_handle(A_handle); if (internal_A_handle->all_use_buffer()) { detail::throw_incompatible_container(__func__); @@ -189,8 +224,24 @@ sycl::event spmm(sycl::queue &queue, oneapi::mkl::transpose opA, oneapi::mkl::tr bool is_beta_host_accessible = detail::is_ptr_accessible_on_host(queue, beta); check_valid_spmm(__func__, opA, A_view, A_handle, B_handle, C_handle, is_alpha_host_accessible, is_beta_host_accessible); + + if (!spmm_descr->optimized_called) { + throw mkl::uninitialized( + "sparse_blas", __func__, + "spmm_optimize must be called with the same arguments before spmm."); + } + CHECK_DESCR_MATCH(spmm_descr, opA, "spmm_optimize"); + CHECK_DESCR_MATCH(spmm_descr, opB, "spmm_optimize"); + CHECK_DESCR_MATCH(spmm_descr, A_view, "spmm_optimize"); + CHECK_DESCR_MATCH(spmm_descr, A_handle, "spmm_optimize"); + CHECK_DESCR_MATCH(spmm_descr, B_handle, "spmm_optimize"); + CHECK_DESCR_MATCH(spmm_descr, C_handle, "spmm_optimize"); + CHECK_DESCR_MATCH(spmm_descr, alg, "spmm_optimize"); + auto value_type = detail::get_internal_handle(A_handle)->get_value_type(); DISPATCH_MKL_OPERATION("spmm", value_type, internal_spmm, queue, opA, opB, alpha, A_view, A_handle, B_handle, beta, C_handle, alg, spmm_descr, dependencies, is_alpha_host_accessible, is_beta_host_accessible); } + +} // namespace oneapi::mkl::sparse::BACKEND diff --git a/src/sparse_blas/backends/mkl_common/mkl_spmv.cxx b/src/sparse_blas/backends/mkl_common/mkl_spmv.cxx index d5da0c433..4e5aeffdb 100644 --- a/src/sparse_blas/backends/mkl_common/mkl_spmv.cxx +++ b/src/sparse_blas/backends/mkl_common/mkl_spmv.cxx @@ -17,16 +17,30 @@ * **************************************************************************/ -// The operation descriptor is not needed as long as the backend does not have an equivalent type and does not support external workspace. -using spmv_descr = void *; +namespace oneapi::mkl::sparse { + +struct spmv_descr { + bool buffer_size_called = false; + bool optimized_called = false; + oneapi::mkl::transpose last_optimized_opA; + oneapi::mkl::sparse::matrix_view last_optimized_A_view; + oneapi::mkl::sparse::matrix_handle_t last_optimized_A_handle; + oneapi::mkl::sparse::dense_vector_handle_t last_optimized_x_handle; + oneapi::mkl::sparse::dense_vector_handle_t last_optimized_y_handle; + oneapi::mkl::sparse::spmv_alg last_optimized_alg; +}; + +} // namespace oneapi::mkl::sparse + +namespace oneapi::mkl::sparse::BACKEND { void init_spmv_descr(sycl::queue & /*queue*/, oneapi::mkl::sparse::spmv_descr_t *p_spmv_descr) { - *p_spmv_descr = nullptr; + *p_spmv_descr = new spmv_descr(); } -sycl::event release_spmv_descr(sycl::queue &queue, oneapi::mkl::sparse::spmv_descr_t /*spmv_descr*/, +sycl::event release_spmv_descr(sycl::queue &queue, oneapi::mkl::sparse::spmv_descr_t spmv_descr, const std::vector &dependencies) { - return detail::collapse_dependencies(queue, dependencies); + return detail::submit_release(queue, spmv_descr, dependencies); } void check_valid_spmv(const std::string &function_name, oneapi::mkl::transpose opA, @@ -77,14 +91,40 @@ void spmv_buffer_size(sycl::queue &queue, oneapi::mkl::transpose opA, const void oneapi::mkl::sparse::dense_vector_handle_t x_handle, const void *beta, oneapi::mkl::sparse::dense_vector_handle_t y_handle, oneapi::mkl::sparse::spmv_alg /*alg*/, - oneapi::mkl::sparse::spmv_descr_t /*spmv_descr*/, - std::size_t &temp_buffer_size) { + oneapi::mkl::sparse::spmv_descr_t spmv_descr, std::size_t &temp_buffer_size) { // TODO: Add support for external workspace once the close-source oneMKL backend supports it. bool is_alpha_host_accessible = detail::is_ptr_accessible_on_host(queue, alpha); bool is_beta_host_accessible = detail::is_ptr_accessible_on_host(queue, beta); check_valid_spmv(__func__, opA, A_view, A_handle, x_handle, y_handle, is_alpha_host_accessible, is_beta_host_accessible); temp_buffer_size = 0; + spmv_descr->buffer_size_called = true; +} + +inline void common_spmv_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, + oneapi::mkl::sparse::matrix_view A_view, + oneapi::mkl::sparse::matrix_handle_t A_handle, + oneapi::mkl::sparse::dense_vector_handle_t x_handle, + const void *beta, + oneapi::mkl::sparse::dense_vector_handle_t y_handle, + oneapi::mkl::sparse::spmv_alg alg, + oneapi::mkl::sparse::spmv_descr_t spmv_descr) { + bool is_alpha_host_accessible = detail::is_ptr_accessible_on_host(queue, alpha); + bool is_beta_host_accessible = detail::is_ptr_accessible_on_host(queue, beta); + check_valid_spmv(__func__, opA, A_view, A_handle, x_handle, y_handle, is_alpha_host_accessible, + is_beta_host_accessible); + if (!spmv_descr->buffer_size_called) { + throw mkl::uninitialized( + "sparse_blas", __func__, + "spmv_buffer_size must be called with the same arguments before spmv_optimize."); + } + spmv_descr->optimized_called = true; + spmv_descr->last_optimized_opA = opA; + spmv_descr->last_optimized_A_view = A_view; + spmv_descr->last_optimized_A_handle = A_handle; + spmv_descr->last_optimized_x_handle = x_handle; + spmv_descr->last_optimized_y_handle = y_handle; + spmv_descr->last_optimized_alg = alg; } void spmv_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, @@ -92,13 +132,10 @@ void spmv_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, const void *a oneapi::mkl::sparse::matrix_handle_t A_handle, oneapi::mkl::sparse::dense_vector_handle_t x_handle, const void *beta, oneapi::mkl::sparse::dense_vector_handle_t y_handle, - oneapi::mkl::sparse::spmv_alg alg, - oneapi::mkl::sparse::spmv_descr_t /*spmv_descr*/, + oneapi::mkl::sparse::spmv_alg alg, oneapi::mkl::sparse::spmv_descr_t spmv_descr, sycl::buffer /*workspace*/) { - bool is_alpha_host_accessible = detail::is_ptr_accessible_on_host(queue, alpha); - bool is_beta_host_accessible = detail::is_ptr_accessible_on_host(queue, beta); - check_valid_spmv(__func__, opA, A_view, A_handle, x_handle, y_handle, is_alpha_host_accessible, - is_beta_host_accessible); + common_spmv_optimize(queue, opA, alpha, A_view, A_handle, x_handle, beta, y_handle, alg, + spmv_descr); auto internal_A_handle = detail::get_internal_handle(A_handle); if (!internal_A_handle->all_use_buffer()) { detail::throw_incompatible_container(__func__); @@ -127,12 +164,10 @@ sycl::event spmv_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, const oneapi::mkl::sparse::dense_vector_handle_t x_handle, const void *beta, oneapi::mkl::sparse::dense_vector_handle_t y_handle, oneapi::mkl::sparse::spmv_alg alg, - oneapi::mkl::sparse::spmv_descr_t /*spmv_descr*/, void * /*workspace*/, + oneapi::mkl::sparse::spmv_descr_t spmv_descr, void * /*workspace*/, const std::vector &dependencies) { - bool is_alpha_host_accessible = detail::is_ptr_accessible_on_host(queue, alpha); - bool is_beta_host_accessible = detail::is_ptr_accessible_on_host(queue, beta); - check_valid_spmv(__func__, opA, A_view, A_handle, x_handle, y_handle, is_alpha_host_accessible, - is_beta_host_accessible); + common_spmv_optimize(queue, opA, alpha, A_view, A_handle, x_handle, beta, y_handle, alg, + spmv_descr); auto internal_A_handle = detail::get_internal_handle(A_handle); if (internal_A_handle->all_use_buffer()) { detail::throw_incompatible_container(__func__); @@ -222,8 +257,23 @@ sycl::event spmv(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alp bool is_beta_host_accessible = detail::is_ptr_accessible_on_host(queue, beta); check_valid_spmv(__func__, opA, A_view, A_handle, x_handle, y_handle, is_alpha_host_accessible, is_beta_host_accessible); + + if (!spmv_descr->optimized_called) { + throw mkl::uninitialized( + "sparse_blas", __func__, + "spmv_optimize must be called with the same arguments before spmv."); + } + CHECK_DESCR_MATCH(spmv_descr, opA, "spmv_optimize"); + CHECK_DESCR_MATCH(spmv_descr, A_view, "spmv_optimize"); + CHECK_DESCR_MATCH(spmv_descr, A_handle, "spmv_optimize"); + CHECK_DESCR_MATCH(spmv_descr, x_handle, "spmv_optimize"); + CHECK_DESCR_MATCH(spmv_descr, y_handle, "spmv_optimize"); + CHECK_DESCR_MATCH(spmv_descr, alg, "spmv_optimize"); + auto value_type = detail::get_internal_handle(A_handle)->get_value_type(); DISPATCH_MKL_OPERATION("spmv", value_type, internal_spmv, queue, opA, alpha, A_view, A_handle, x_handle, beta, y_handle, alg, spmv_descr, dependencies, is_alpha_host_accessible, is_beta_host_accessible); } + +} // namespace oneapi::mkl::sparse::BACKEND diff --git a/src/sparse_blas/backends/mkl_common/mkl_spsv.cxx b/src/sparse_blas/backends/mkl_common/mkl_spsv.cxx index 718ab0f19..371fac38b 100644 --- a/src/sparse_blas/backends/mkl_common/mkl_spsv.cxx +++ b/src/sparse_blas/backends/mkl_common/mkl_spsv.cxx @@ -17,16 +17,30 @@ * **************************************************************************/ -// The operation descriptor is not needed as long as the backend does not have an equivalent type and does not support external workspace. -using spsv_descr = void *; +namespace oneapi::mkl::sparse { + +struct spsv_descr { + bool buffer_size_called = false; + bool optimized_called = false; + oneapi::mkl::transpose last_optimized_opA; + oneapi::mkl::sparse::matrix_view last_optimized_A_view; + oneapi::mkl::sparse::matrix_handle_t last_optimized_A_handle; + oneapi::mkl::sparse::dense_vector_handle_t last_optimized_x_handle; + oneapi::mkl::sparse::dense_vector_handle_t last_optimized_y_handle; + oneapi::mkl::sparse::spsv_alg last_optimized_alg; +}; + +} // namespace oneapi::mkl::sparse + +namespace oneapi::mkl::sparse::BACKEND { void init_spsv_descr(sycl::queue & /*queue*/, oneapi::mkl::sparse::spsv_descr_t *p_spsv_descr) { - *p_spsv_descr = nullptr; + *p_spsv_descr = new spsv_descr(); } -sycl::event release_spsv_descr(sycl::queue &queue, oneapi::mkl::sparse::spsv_descr_t /*spsv_descr*/, +sycl::event release_spsv_descr(sycl::queue &queue, oneapi::mkl::sparse::spsv_descr_t spsv_descr, const std::vector &dependencies) { - return detail::collapse_dependencies(queue, dependencies); + return detail::submit_release(queue, spsv_descr, dependencies); } void check_valid_spsv(const std::string &function_name, oneapi::mkl::transpose opA, @@ -76,13 +90,37 @@ void spsv_buffer_size(sycl::queue &queue, oneapi::mkl::transpose opA, const void oneapi::mkl::sparse::dense_vector_handle_t x_handle, oneapi::mkl::sparse::dense_vector_handle_t y_handle, oneapi::mkl::sparse::spsv_alg alg, - oneapi::mkl::sparse::spsv_descr_t /*spsv_descr*/, - std::size_t &temp_buffer_size) { + oneapi::mkl::sparse::spsv_descr_t spsv_descr, std::size_t &temp_buffer_size) { // TODO: Add support for external workspace once the close-source oneMKL backend supports it. bool is_alpha_host_accessible = detail::is_ptr_accessible_on_host(queue, alpha); check_valid_spsv(__func__, opA, A_view, A_handle, x_handle, y_handle, is_alpha_host_accessible, alg); temp_buffer_size = 0; + spsv_descr->buffer_size_called = true; +} + +inline void common_spsv_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, + oneapi::mkl::sparse::matrix_view A_view, + oneapi::mkl::sparse::matrix_handle_t A_handle, + oneapi::mkl::sparse::dense_vector_handle_t x_handle, + oneapi::mkl::sparse::dense_vector_handle_t y_handle, + oneapi::mkl::sparse::spsv_alg alg, + oneapi::mkl::sparse::spsv_descr_t spsv_descr) { + bool is_alpha_host_accessible = detail::is_ptr_accessible_on_host(queue, alpha); + check_valid_spsv(__func__, opA, A_view, A_handle, x_handle, y_handle, is_alpha_host_accessible, + alg); + if (!spsv_descr->buffer_size_called) { + throw mkl::uninitialized( + "sparse_blas", __func__, + "spsv_buffer_size must be called with the same arguments before spsv_optimize."); + } + spsv_descr->optimized_called = true; + spsv_descr->last_optimized_opA = opA; + spsv_descr->last_optimized_A_view = A_view; + spsv_descr->last_optimized_A_handle = A_handle; + spsv_descr->last_optimized_x_handle = x_handle; + spsv_descr->last_optimized_y_handle = y_handle; + spsv_descr->last_optimized_alg = alg; } void spsv_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, @@ -90,12 +128,9 @@ void spsv_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, const void *a oneapi::mkl::sparse::matrix_handle_t A_handle, oneapi::mkl::sparse::dense_vector_handle_t x_handle, oneapi::mkl::sparse::dense_vector_handle_t y_handle, - oneapi::mkl::sparse::spsv_alg alg, - oneapi::mkl::sparse::spsv_descr_t /*spsv_descr*/, + oneapi::mkl::sparse::spsv_alg alg, oneapi::mkl::sparse::spsv_descr_t spsv_descr, sycl::buffer /*workspace*/) { - bool is_alpha_host_accessible = detail::is_ptr_accessible_on_host(queue, alpha); - check_valid_spsv(__func__, opA, A_view, A_handle, x_handle, y_handle, is_alpha_host_accessible, - alg); + common_spsv_optimize(queue, opA, alpha, A_view, A_handle, x_handle, y_handle, alg, spsv_descr); auto internal_A_handle = detail::get_internal_handle(A_handle); if (!internal_A_handle->all_use_buffer()) { detail::throw_incompatible_container(__func__); @@ -114,11 +149,9 @@ sycl::event spsv_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, const oneapi::mkl::sparse::dense_vector_handle_t x_handle, oneapi::mkl::sparse::dense_vector_handle_t y_handle, oneapi::mkl::sparse::spsv_alg alg, - oneapi::mkl::sparse::spsv_descr_t /*spsv_descr*/, void * /*workspace*/, + oneapi::mkl::sparse::spsv_descr_t spsv_descr, void * /*workspace*/, const std::vector &dependencies) { - bool is_alpha_host_accessible = detail::is_ptr_accessible_on_host(queue, alpha); - check_valid_spsv(__func__, opA, A_view, A_handle, x_handle, y_handle, is_alpha_host_accessible, - alg); + common_spsv_optimize(queue, opA, alpha, A_view, A_handle, x_handle, y_handle, alg, spsv_descr); auto internal_A_handle = detail::get_internal_handle(A_handle); if (internal_A_handle->all_use_buffer()) { detail::throw_incompatible_container(__func__); @@ -170,8 +203,23 @@ sycl::event spsv(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alp bool is_alpha_host_accessible = detail::is_ptr_accessible_on_host(queue, alpha); check_valid_spsv(__func__, opA, A_view, A_handle, x_handle, y_handle, is_alpha_host_accessible, alg); + + if (!spsv_descr->optimized_called) { + throw mkl::uninitialized( + "sparse_blas", __func__, + "spsv_optimize must be called with the same arguments before spsv."); + } + CHECK_DESCR_MATCH(spsv_descr, opA, "spsv_optimize"); + CHECK_DESCR_MATCH(spsv_descr, A_view, "spsv_optimize"); + CHECK_DESCR_MATCH(spsv_descr, A_handle, "spsv_optimize"); + CHECK_DESCR_MATCH(spsv_descr, x_handle, "spsv_optimize"); + CHECK_DESCR_MATCH(spsv_descr, y_handle, "spsv_optimize"); + CHECK_DESCR_MATCH(spsv_descr, alg, "spsv_optimize"); + auto value_type = detail::get_internal_handle(A_handle)->get_value_type(); DISPATCH_MKL_OPERATION("spsv", value_type, internal_spsv, queue, opA, alpha, A_view, A_handle, x_handle, y_handle, alg, spsv_descr, dependencies, is_alpha_host_accessible); } + +} // namespace oneapi::mkl::sparse::BACKEND diff --git a/src/sparse_blas/backends/mklcpu/mklcpu_operations.cpp b/src/sparse_blas/backends/mklcpu/mklcpu_operations.cpp index 4e0242c2d..0929a7ef4 100644 --- a/src/sparse_blas/backends/mklcpu/mklcpu_operations.cpp +++ b/src/sparse_blas/backends/mklcpu/mklcpu_operations.cpp @@ -17,16 +17,17 @@ * **************************************************************************/ +#include "sparse_blas/backends/mkl_common/mkl_handles.hpp" #include "sparse_blas/backends/mkl_common/mkl_helper.hpp" #include "sparse_blas/macros.hpp" -#include "sparse_blas/backends/mkl_common/mkl_handles.hpp" +#include "sparse_blas/matrix_view_comparison.hpp" #include "oneapi/mkl/sparse_blas/detail/mklcpu/onemkl_sparse_blas_mklcpu.hpp" -namespace oneapi::mkl::sparse::mklcpu { +#define BACKEND mklcpu #include "sparse_blas/backends/mkl_common/mkl_spmm.cxx" #include "sparse_blas/backends/mkl_common/mkl_spmv.cxx" #include "sparse_blas/backends/mkl_common/mkl_spsv.cxx" -} // namespace oneapi::mkl::sparse::mklcpu +#undef BACKEND diff --git a/src/sparse_blas/backends/mklgpu/mklgpu_operations.cpp b/src/sparse_blas/backends/mklgpu/mklgpu_operations.cpp index 0c5a73fb0..be5e0c0aa 100644 --- a/src/sparse_blas/backends/mklgpu/mklgpu_operations.cpp +++ b/src/sparse_blas/backends/mklgpu/mklgpu_operations.cpp @@ -17,16 +17,17 @@ * **************************************************************************/ +#include "sparse_blas/backends/mkl_common/mkl_handles.hpp" #include "sparse_blas/backends/mkl_common/mkl_helper.hpp" #include "sparse_blas/macros.hpp" -#include "sparse_blas/backends/mkl_common/mkl_handles.hpp" +#include "sparse_blas/matrix_view_comparison.hpp" #include "oneapi/mkl/sparse_blas/detail/mklgpu/onemkl_sparse_blas_mklgpu.hpp" -namespace oneapi::mkl::sparse::mklgpu { +#define BACKEND mklgpu #include "sparse_blas/backends/mkl_common/mkl_spmm.cxx" #include "sparse_blas/backends/mkl_common/mkl_spmv.cxx" #include "sparse_blas/backends/mkl_common/mkl_spsv.cxx" -} // namespace oneapi::mkl::sparse::mklgpu +#undef BACKEND diff --git a/src/sparse_blas/matrix_view_comparison.hpp b/src/sparse_blas/matrix_view_comparison.hpp new file mode 100644 index 000000000..e01be7311 --- /dev/null +++ b/src/sparse_blas/matrix_view_comparison.hpp @@ -0,0 +1,36 @@ +/*************************************************************************** +* Copyright (C) Codeplay Software Limited +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* For your convenience, a copy of the License has been included in this +* repository. +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +* +**************************************************************************/ + +#ifndef _ONEMKL_SRC_SPARSE_BLAS_MATRIX_VIEW_COMPARISON_HPP_ +#define _ONEMKL_SRC_SPARSE_BLAS_MATRIX_VIEW_COMPARISON_HPP_ + +#include "oneapi/mkl/sparse_blas/matrix_view.hpp" + +inline bool operator==(const oneapi::mkl::sparse::matrix_view& lhs, + const oneapi::mkl::sparse::matrix_view& rhs) { + return lhs.type_view == rhs.type_view && lhs.uplo_view == rhs.uplo_view && + lhs.diag_view == rhs.diag_view; +} + +inline bool operator!=(const oneapi::mkl::sparse::matrix_view& lhs, + const oneapi::mkl::sparse::matrix_view& rhs) { + return !(lhs == rhs); +} + +#endif // _ONEMKL_SRC_SPARSE_BLAS_MATRIX_VIEW_COMPARISON_HPP_ \ No newline at end of file From 6a533dfc03377262b0bf696706819a6421d1fc42 Mon Sep 17 00:00:00 2001 From: "romain.biessy" Date: Tue, 27 Aug 2024 11:06:19 +0200 Subject: [PATCH 36/39] clang-format-9 --- .../sparse_blas/include/test_spmv.hpp | 30 +++++++++---------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/tests/unit_tests/sparse_blas/include/test_spmv.hpp b/tests/unit_tests/sparse_blas/include/test_spmv.hpp index 5ff5a57db..f141db893 100644 --- a/tests/unit_tests/sparse_blas/include/test_spmv.hpp +++ b/tests/unit_tests/sparse_blas/include/test_spmv.hpp @@ -153,34 +153,32 @@ void test_helper_with_format_with_transpose( no_reset_data, no_scalars_on_device), num_passed, num_skipped); // Lower symmetric - oneapi::mkl::sparse::matrix_view symmetric_view( - oneapi::mkl::sparse::matrix_descr::symmetric); + oneapi::mkl::sparse::matrix_view symmetric_view(oneapi::mkl::sparse::matrix_descr::symmetric); EXPECT_TRUE_OR_FUTURE_SKIP( - test_functor_i32(dev, format, nrows_A, ncols_A, density_A_matrix, index_zero, - transpose_val, fp_one, fp_zero, default_alg, symmetric_view, - no_properties, no_reset_data, no_scalars_on_device), + test_functor_i32(dev, format, nrows_A, ncols_A, density_A_matrix, index_zero, transpose_val, + fp_one, fp_zero, default_alg, symmetric_view, no_properties, no_reset_data, + no_scalars_on_device), num_passed, num_skipped); // Upper symmetric symmetric_view.uplo_view = oneapi::mkl::uplo::upper; EXPECT_TRUE_OR_FUTURE_SKIP( - test_functor_i32(dev, format, nrows_A, ncols_A, density_A_matrix, index_zero, - transpose_val, fp_one, fp_zero, default_alg, symmetric_view, - no_properties, no_reset_data, no_scalars_on_device), + test_functor_i32(dev, format, nrows_A, ncols_A, density_A_matrix, index_zero, transpose_val, + fp_one, fp_zero, default_alg, symmetric_view, no_properties, no_reset_data, + no_scalars_on_device), num_passed, num_skipped); // Lower hermitian - oneapi::mkl::sparse::matrix_view hermitian_view( - oneapi::mkl::sparse::matrix_descr::hermitian); + oneapi::mkl::sparse::matrix_view hermitian_view(oneapi::mkl::sparse::matrix_descr::hermitian); EXPECT_TRUE_OR_FUTURE_SKIP( - test_functor_i32(dev, format, nrows_A, ncols_A, density_A_matrix, index_zero, - transpose_val, fp_one, fp_zero, default_alg, hermitian_view, - no_properties, no_reset_data, no_scalars_on_device), + test_functor_i32(dev, format, nrows_A, ncols_A, density_A_matrix, index_zero, transpose_val, + fp_one, fp_zero, default_alg, hermitian_view, no_properties, no_reset_data, + no_scalars_on_device), num_passed, num_skipped); // Upper hermitian hermitian_view.uplo_view = oneapi::mkl::uplo::upper; EXPECT_TRUE_OR_FUTURE_SKIP( - test_functor_i32(dev, format, nrows_A, ncols_A, density_A_matrix, index_zero, - transpose_val, fp_one, fp_zero, default_alg, hermitian_view, - no_properties, no_reset_data, no_scalars_on_device), + test_functor_i32(dev, format, nrows_A, ncols_A, density_A_matrix, index_zero, transpose_val, + fp_one, fp_zero, default_alg, hermitian_view, no_properties, no_reset_data, + no_scalars_on_device), num_passed, num_skipped); // Test other algorithms for (auto alg : non_default_algorithms) { From 4f39b22bac1443e1d81e3bc1b53dba94d86612a0 Mon Sep 17 00:00:00 2001 From: "romain.biessy" Date: Tue, 27 Aug 2024 16:10:40 +0200 Subject: [PATCH 37/39] Move check for incompatible container earlier --- src/sparse_blas/backends/mkl_common/mkl_spmm.cxx | 8 ++++---- src/sparse_blas/backends/mkl_common/mkl_spmv.cxx | 8 ++++---- src/sparse_blas/backends/mkl_common/mkl_spsv.cxx | 4 ++-- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/sparse_blas/backends/mkl_common/mkl_spmm.cxx b/src/sparse_blas/backends/mkl_common/mkl_spmm.cxx index eb1b45ebf..857a1983c 100644 --- a/src/sparse_blas/backends/mkl_common/mkl_spmm.cxx +++ b/src/sparse_blas/backends/mkl_common/mkl_spmm.cxx @@ -144,12 +144,12 @@ void spmm_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, oneapi::mkl:: oneapi::mkl::sparse::dense_matrix_handle_t C_handle, oneapi::mkl::sparse::spmm_alg alg, oneapi::mkl::sparse::spmm_descr_t spmm_descr, sycl::buffer /*workspace*/) { - common_spmm_optimize(queue, opA, opB, alpha, A_view, A_handle, B_handle, beta, C_handle, alg, - spmm_descr); auto internal_A_handle = detail::get_internal_handle(A_handle); if (!internal_A_handle->all_use_buffer()) { detail::throw_incompatible_container(__func__); } + common_spmm_optimize(queue, opA, opB, alpha, A_view, A_handle, B_handle, beta, C_handle, alg, + spmm_descr); if (alg == oneapi::mkl::sparse::spmm_alg::no_optimize_alg) { return; } @@ -166,12 +166,12 @@ sycl::event spmm_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, oneapi::mkl::sparse::spmm_alg alg, oneapi::mkl::sparse::spmm_descr_t spmm_descr, void * /*workspace*/, const std::vector &dependencies) { - common_spmm_optimize(queue, opA, opB, alpha, A_view, A_handle, B_handle, beta, C_handle, alg, - spmm_descr); auto internal_A_handle = detail::get_internal_handle(A_handle); if (internal_A_handle->all_use_buffer()) { detail::throw_incompatible_container(__func__); } + common_spmm_optimize(queue, opA, opB, alpha, A_view, A_handle, B_handle, beta, C_handle, alg, + spmm_descr); if (alg == oneapi::mkl::sparse::spmm_alg::no_optimize_alg) { return detail::collapse_dependencies(queue, dependencies); } diff --git a/src/sparse_blas/backends/mkl_common/mkl_spmv.cxx b/src/sparse_blas/backends/mkl_common/mkl_spmv.cxx index 4e5aeffdb..7ddd534d8 100644 --- a/src/sparse_blas/backends/mkl_common/mkl_spmv.cxx +++ b/src/sparse_blas/backends/mkl_common/mkl_spmv.cxx @@ -134,12 +134,12 @@ void spmv_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, const void *a oneapi::mkl::sparse::dense_vector_handle_t y_handle, oneapi::mkl::sparse::spmv_alg alg, oneapi::mkl::sparse::spmv_descr_t spmv_descr, sycl::buffer /*workspace*/) { - common_spmv_optimize(queue, opA, alpha, A_view, A_handle, x_handle, beta, y_handle, alg, - spmv_descr); auto internal_A_handle = detail::get_internal_handle(A_handle); if (!internal_A_handle->all_use_buffer()) { detail::throw_incompatible_container(__func__); } + common_spmv_optimize(queue, opA, alpha, A_view, A_handle, x_handle, beta, y_handle, alg, + spmv_descr); if (alg == oneapi::mkl::sparse::spmv_alg::no_optimize_alg) { return; } @@ -166,12 +166,12 @@ sycl::event spmv_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, const oneapi::mkl::sparse::spmv_alg alg, oneapi::mkl::sparse::spmv_descr_t spmv_descr, void * /*workspace*/, const std::vector &dependencies) { - common_spmv_optimize(queue, opA, alpha, A_view, A_handle, x_handle, beta, y_handle, alg, - spmv_descr); auto internal_A_handle = detail::get_internal_handle(A_handle); if (internal_A_handle->all_use_buffer()) { detail::throw_incompatible_container(__func__); } + common_spmv_optimize(queue, opA, alpha, A_view, A_handle, x_handle, beta, y_handle, alg, + spmv_descr); if (alg == oneapi::mkl::sparse::spmv_alg::no_optimize_alg) { return detail::collapse_dependencies(queue, dependencies); } diff --git a/src/sparse_blas/backends/mkl_common/mkl_spsv.cxx b/src/sparse_blas/backends/mkl_common/mkl_spsv.cxx index 371fac38b..078a5abac 100644 --- a/src/sparse_blas/backends/mkl_common/mkl_spsv.cxx +++ b/src/sparse_blas/backends/mkl_common/mkl_spsv.cxx @@ -130,11 +130,11 @@ void spsv_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, const void *a oneapi::mkl::sparse::dense_vector_handle_t y_handle, oneapi::mkl::sparse::spsv_alg alg, oneapi::mkl::sparse::spsv_descr_t spsv_descr, sycl::buffer /*workspace*/) { - common_spsv_optimize(queue, opA, alpha, A_view, A_handle, x_handle, y_handle, alg, spsv_descr); auto internal_A_handle = detail::get_internal_handle(A_handle); if (!internal_A_handle->all_use_buffer()) { detail::throw_incompatible_container(__func__); } + common_spsv_optimize(queue, opA, alpha, A_view, A_handle, x_handle, y_handle, alg, spsv_descr); if (alg == oneapi::mkl::sparse::spsv_alg::no_optimize_alg) { return; } @@ -151,11 +151,11 @@ sycl::event spsv_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, const oneapi::mkl::sparse::spsv_alg alg, oneapi::mkl::sparse::spsv_descr_t spsv_descr, void * /*workspace*/, const std::vector &dependencies) { - common_spsv_optimize(queue, opA, alpha, A_view, A_handle, x_handle, y_handle, alg, spsv_descr); auto internal_A_handle = detail::get_internal_handle(A_handle); if (internal_A_handle->all_use_buffer()) { detail::throw_incompatible_container(__func__); } + common_spsv_optimize(queue, opA, alpha, A_view, A_handle, x_handle, y_handle, alg, spsv_descr); if (alg == oneapi::mkl::sparse::spsv_alg::no_optimize_alg) { return detail::collapse_dependencies(queue, dependencies); } From 0ecb032dc02f141ad57800b591408aa680428cac Mon Sep 17 00:00:00 2001 From: "romain.biessy" Date: Mon, 2 Sep 2024 10:55:24 +0200 Subject: [PATCH 38/39] Reword exception --- src/sparse_blas/backends/mkl_common/mkl_spmm.cxx | 10 ++++------ src/sparse_blas/backends/mkl_common/mkl_spmv.cxx | 10 ++++------ src/sparse_blas/backends/mkl_common/mkl_spsv.cxx | 10 ++++------ 3 files changed, 12 insertions(+), 18 deletions(-) diff --git a/src/sparse_blas/backends/mkl_common/mkl_spmm.cxx b/src/sparse_blas/backends/mkl_common/mkl_spmm.cxx index 857a1983c..178e75dc1 100644 --- a/src/sparse_blas/backends/mkl_common/mkl_spmm.cxx +++ b/src/sparse_blas/backends/mkl_common/mkl_spmm.cxx @@ -123,9 +123,8 @@ inline void common_spmm_optimize( check_valid_spmm(__func__, opA, A_view, A_handle, B_handle, C_handle, is_alpha_host_accessible, is_beta_host_accessible); if (!spmm_descr->buffer_size_called) { - throw mkl::uninitialized( - "sparse_blas", __func__, - "spmm_buffer_size must be called with the same arguments before spmm_optimize."); + throw mkl::uninitialized("sparse_blas", __func__, + "spmm_buffer_size must be called before spmm_optimize."); } spmm_descr->optimized_called = true; spmm_descr->last_optimized_opA = opA; @@ -226,9 +225,8 @@ sycl::event spmm(sycl::queue &queue, oneapi::mkl::transpose opA, oneapi::mkl::tr is_beta_host_accessible); if (!spmm_descr->optimized_called) { - throw mkl::uninitialized( - "sparse_blas", __func__, - "spmm_optimize must be called with the same arguments before spmm."); + throw mkl::uninitialized("sparse_blas", __func__, + "spmm_optimize must be called before spmm."); } CHECK_DESCR_MATCH(spmm_descr, opA, "spmm_optimize"); CHECK_DESCR_MATCH(spmm_descr, opB, "spmm_optimize"); diff --git a/src/sparse_blas/backends/mkl_common/mkl_spmv.cxx b/src/sparse_blas/backends/mkl_common/mkl_spmv.cxx index 7ddd534d8..0c5d9492c 100644 --- a/src/sparse_blas/backends/mkl_common/mkl_spmv.cxx +++ b/src/sparse_blas/backends/mkl_common/mkl_spmv.cxx @@ -114,9 +114,8 @@ inline void common_spmv_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, check_valid_spmv(__func__, opA, A_view, A_handle, x_handle, y_handle, is_alpha_host_accessible, is_beta_host_accessible); if (!spmv_descr->buffer_size_called) { - throw mkl::uninitialized( - "sparse_blas", __func__, - "spmv_buffer_size must be called with the same arguments before spmv_optimize."); + throw mkl::uninitialized("sparse_blas", __func__, + "spmv_buffer_size must be called before spmv_optimize."); } spmv_descr->optimized_called = true; spmv_descr->last_optimized_opA = opA; @@ -259,9 +258,8 @@ sycl::event spmv(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alp is_beta_host_accessible); if (!spmv_descr->optimized_called) { - throw mkl::uninitialized( - "sparse_blas", __func__, - "spmv_optimize must be called with the same arguments before spmv."); + throw mkl::uninitialized("sparse_blas", __func__, + "spmv_optimize must be called before spmv."); } CHECK_DESCR_MATCH(spmv_descr, opA, "spmv_optimize"); CHECK_DESCR_MATCH(spmv_descr, A_view, "spmv_optimize"); diff --git a/src/sparse_blas/backends/mkl_common/mkl_spsv.cxx b/src/sparse_blas/backends/mkl_common/mkl_spsv.cxx index 078a5abac..f601049c6 100644 --- a/src/sparse_blas/backends/mkl_common/mkl_spsv.cxx +++ b/src/sparse_blas/backends/mkl_common/mkl_spsv.cxx @@ -110,9 +110,8 @@ inline void common_spsv_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, check_valid_spsv(__func__, opA, A_view, A_handle, x_handle, y_handle, is_alpha_host_accessible, alg); if (!spsv_descr->buffer_size_called) { - throw mkl::uninitialized( - "sparse_blas", __func__, - "spsv_buffer_size must be called with the same arguments before spsv_optimize."); + throw mkl::uninitialized("sparse_blas", __func__, + "spsv_buffer_size must be called before spsv_optimize."); } spsv_descr->optimized_called = true; spsv_descr->last_optimized_opA = opA; @@ -205,9 +204,8 @@ sycl::event spsv(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alp alg); if (!spsv_descr->optimized_called) { - throw mkl::uninitialized( - "sparse_blas", __func__, - "spsv_optimize must be called with the same arguments before spsv."); + throw mkl::uninitialized("sparse_blas", __func__, + "spsv_optimize must be called before spsv."); } CHECK_DESCR_MATCH(spsv_descr, opA, "spsv_optimize"); CHECK_DESCR_MATCH(spsv_descr, A_view, "spsv_optimize"); From b6f5a3a12d9041f9fd2d242bd5a16f12df8f98a7 Mon Sep 17 00:00:00 2001 From: "romain.biessy" Date: Wed, 4 Sep 2024 17:14:29 +0200 Subject: [PATCH 39/39] Improve function name in exceptions --- src/sparse_blas/backends/mkl_common/mkl_spmm.cxx | 6 +++--- src/sparse_blas/backends/mkl_common/mkl_spmv.cxx | 6 +++--- src/sparse_blas/backends/mkl_common/mkl_spsv.cxx | 6 +++--- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/sparse_blas/backends/mkl_common/mkl_spmm.cxx b/src/sparse_blas/backends/mkl_common/mkl_spmm.cxx index 178e75dc1..dad611252 100644 --- a/src/sparse_blas/backends/mkl_common/mkl_spmm.cxx +++ b/src/sparse_blas/backends/mkl_common/mkl_spmm.cxx @@ -120,10 +120,10 @@ inline void common_spmm_optimize( oneapi::mkl::sparse::spmm_descr_t spmm_descr) { bool is_alpha_host_accessible = detail::is_ptr_accessible_on_host(queue, alpha); bool is_beta_host_accessible = detail::is_ptr_accessible_on_host(queue, beta); - check_valid_spmm(__func__, opA, A_view, A_handle, B_handle, C_handle, is_alpha_host_accessible, - is_beta_host_accessible); + check_valid_spmm("spmm_optimize", opA, A_view, A_handle, B_handle, C_handle, + is_alpha_host_accessible, is_beta_host_accessible); if (!spmm_descr->buffer_size_called) { - throw mkl::uninitialized("sparse_blas", __func__, + throw mkl::uninitialized("sparse_blas", "spmm_optimize", "spmm_buffer_size must be called before spmm_optimize."); } spmm_descr->optimized_called = true; diff --git a/src/sparse_blas/backends/mkl_common/mkl_spmv.cxx b/src/sparse_blas/backends/mkl_common/mkl_spmv.cxx index 0c5d9492c..d2332286b 100644 --- a/src/sparse_blas/backends/mkl_common/mkl_spmv.cxx +++ b/src/sparse_blas/backends/mkl_common/mkl_spmv.cxx @@ -111,10 +111,10 @@ inline void common_spmv_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, oneapi::mkl::sparse::spmv_descr_t spmv_descr) { bool is_alpha_host_accessible = detail::is_ptr_accessible_on_host(queue, alpha); bool is_beta_host_accessible = detail::is_ptr_accessible_on_host(queue, beta); - check_valid_spmv(__func__, opA, A_view, A_handle, x_handle, y_handle, is_alpha_host_accessible, - is_beta_host_accessible); + check_valid_spmv("spmv_optimize", opA, A_view, A_handle, x_handle, y_handle, + is_alpha_host_accessible, is_beta_host_accessible); if (!spmv_descr->buffer_size_called) { - throw mkl::uninitialized("sparse_blas", __func__, + throw mkl::uninitialized("sparse_blas", "spmv_optimize", "spmv_buffer_size must be called before spmv_optimize."); } spmv_descr->optimized_called = true; diff --git a/src/sparse_blas/backends/mkl_common/mkl_spsv.cxx b/src/sparse_blas/backends/mkl_common/mkl_spsv.cxx index f601049c6..7ef5b3c39 100644 --- a/src/sparse_blas/backends/mkl_common/mkl_spsv.cxx +++ b/src/sparse_blas/backends/mkl_common/mkl_spsv.cxx @@ -107,10 +107,10 @@ inline void common_spsv_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, oneapi::mkl::sparse::spsv_alg alg, oneapi::mkl::sparse::spsv_descr_t spsv_descr) { bool is_alpha_host_accessible = detail::is_ptr_accessible_on_host(queue, alpha); - check_valid_spsv(__func__, opA, A_view, A_handle, x_handle, y_handle, is_alpha_host_accessible, - alg); + check_valid_spsv("spsv_optimize", opA, A_view, A_handle, x_handle, y_handle, + is_alpha_host_accessible, alg); if (!spsv_descr->buffer_size_called) { - throw mkl::uninitialized("sparse_blas", __func__, + throw mkl::uninitialized("sparse_blas", "spsv_optimize", "spsv_buffer_size must be called before spsv_optimize."); } spsv_descr->optimized_called = true;