[SPARSE] Update oneMKL backends to match new sparse API

oneapi-src · May 27, 2024 · 3442318 · 3442318
1 parent 90bc218
commit 3442318
Show file tree

Hide file tree

Showing 70 changed files with 6,140 additions and 2,939 deletions.
diff --git a/docs/domains/sparse_linear_algebra.rst b/docs/domains/sparse_linear_algebra.rst
@@ -0,0 +1,31 @@
+.. _onemkl_sparse_linear_algebra:
+
+Sparse Linear Algebra
+=====================
+
+See the latest specification for the sparse domain `here
+<https://oneapi-spec.uxlfoundation.org/specifications/oneapi/latest/elements/onemkl/source/domains/spblas/spblas>`_.
+
+This page documents implementation specific or backend specific details of the
+sparse domain.
+
+OneMKL Intel CPU and GPU backends
+---------------------------------
+
+Known limitations as of Intel oneMKL product release 2024.1:
+
+- All operations' algorithms except ``no_optimize_alg`` map to the default
+  algorithm.
+- The required external workspace size is always 0 bytes.
+- ``oneapi::mkl::sparse::set_csr_data`` and
+  ``oneapi::mkl::sparse::set_coo_data`` functions cannot be used on a handle
+  that has already been used for an operation or its optimize function. Doing so
+  will throw an ``oneapi::mkl::unimplemented`` exception.
+- Using ``spsv`` with the ``oneapi::mkl::sparse::spsv_alg::no_optimize_alg`` and
+  a sparse matrix that does not have the
+  ``oneapi::mkl::sparse::matrix_property::sorted`` property will throw an
+  ``oneapi::mkl::unimplemented`` exception.
+- Using ``spmm`` on Intel GPU with a sparse matrix that is
+  ``oneapi::mkl::transpose::conjtrans`` and has the
+  ``oneapi::mkl::sparse::matrix_property::symmetric`` property will throw an
+  ``oneapi::mkl::unimplemented`` exception.
diff --git a/docs/index.rst b/docs/index.rst
@@ -30,4 +30,5 @@ Contents
 
    onemkl-datatypes.rst
    domains/dense_linear_algebra.rst
+   domains/sparse_linear_algebra.rst
    create_new_backend.rst
diff --git a/examples/sparse_blas/compile_time_dispatching/CMakeLists.txt b/examples/sparse_blas/compile_time_dispatching/CMakeLists.txt
@@ -27,8 +27,8 @@ endif()
 include(WarningsUtils)
 
 foreach(backend ${SPARSE_BLAS_BACKENDS})
-  set(EXAMPLE_NAME example_sparse_blas_gemv_usm_${backend})
-  add_executable(${EXAMPLE_NAME} sparse_blas_gemv_usm_${backend}.cpp)
+  set(EXAMPLE_NAME example_sparse_blas_spmv_usm_${backend})
+  add_executable(${EXAMPLE_NAME} sparse_blas_spmv_usm_${backend}.cpp)
   target_include_directories(${EXAMPLE_NAME}
       PUBLIC ${PROJECT_SOURCE_DIR}/examples/include
       PUBLIC ${PROJECT_SOURCE_DIR}/include
@@ -39,6 +39,6 @@ foreach(backend ${SPARSE_BLAS_BACKENDS})
   target_link_libraries(${EXAMPLE_NAME} PRIVATE ONEMKL::SYCL::SYCL onemkl_sparse_blas_${backend})
 
   # Register example as ctest
-  add_test(NAME sparse_blas/EXAMPLE/CT/sparse_blas_gemv_usm_${backend} COMMAND ${EXAMPLE_NAME})
+  add_test(NAME sparse_blas/EXAMPLE/CT/sparse_blas_spmv_usm_${backend} COMMAND ${EXAMPLE_NAME})
 endforeach(backend)
 
diff --git a/...spatching/sparse_blas_gemv_usm_mklcpu.cpp → ...spatching/sparse_blas_spmv_usm_mklcpu.cpp b/...spatching/sparse_blas_gemv_usm_mklcpu.cpp → ...spatching/sparse_blas_spmv_usm_mklcpu.cpp
@@ -20,7 +20,7 @@
 /*
 *
 *  Content:
-*       This example demonstrates use of DPCPP API oneapi::mkl::sparse::gemv
+*       This example demonstrates use of DPCPP API oneapi::mkl::sparse::spmv
 *       using unified shared memory to perform general sparse matrix-vector
 *       multiplication on a INTEL CPU SYCL device.
 *
@@ -32,7 +32,7 @@
 *
 *
 *       This example demonstrates only single precision (float) data type for
-*       gemv matrix data
+*       spmv matrix data
 *
 *
 *******************************************************************************/
@@ -77,7 +77,7 @@ int run_sparse_matrix_vector_multiply_example(const sycl::device &cpu_dev) {
             }
             catch (sycl::exception const &e) {
                 std::cout << "Caught asynchronous SYCL "
-                             "exception during sparse::gemv:\n"
+                             "exception during sparse::spmv:\n"
                           << e.what() << std::endl;
             }
         }
@@ -128,7 +128,10 @@ int run_sparse_matrix_vector_multiply_example(const sycl::device &cpu_dev) {
     //
 
     oneapi::mkl::transpose transA = oneapi::mkl::transpose::nontrans;
-    std::cout << "\n\t\tsparse::gemv parameters:\n";
+    oneapi::mkl::sparse::spmv_alg alg = oneapi::mkl::sparse::spmv_alg::default_alg;
+    oneapi::mkl::sparse::matrix_view A_view;
+
+    std::cout << "\n\t\tsparse::spmv parameters:\n";
     std::cout << "\t\t\ttransA = "
               << (transA == oneapi::mkl::transpose::nontrans
                       ? "nontrans"
@@ -137,23 +140,49 @@ int run_sparse_matrix_vector_multiply_example(const sycl::device &cpu_dev) {
     std::cout << "\t\t\tnrows = " << nrows << std::endl;
     std::cout << "\t\t\talpha = " << alpha << ", beta = " << beta << std::endl;
 
-    // create and initialize handle for a Sparse Matrix in CSR format
-    oneapi::mkl::sparse::matrix_handle_t handle = nullptr;
-
-    oneapi::mkl::sparse::init_matrix_handle(cpu_selector, &handle);
-
-    auto ev_set = oneapi::mkl::sparse::set_csr_data(cpu_selector, handle, nrows, nrows, nnz,
-                                                    oneapi::mkl::index_base::zero, ia, ja, a);
-
-    auto ev_opt = oneapi::mkl::sparse::optimize_gemv(cpu_selector, transA, handle, { ev_set });
-
-    auto ev_gemv =
-        oneapi::mkl::sparse::gemv(cpu_selector, transA, alpha, handle, x, beta, y, { ev_opt });
-
-    auto ev_release =
-        oneapi::mkl::sparse::release_matrix_handle(cpu_selector, &handle, { ev_gemv });
-
-    ev_release.wait_and_throw();
+    // Create and initialize handle for a Sparse Matrix in CSR format
+    oneapi::mkl::sparse::matrix_handle_t A_handle = nullptr;
+    oneapi::mkl::sparse::init_csr_matrix(cpu_selector, &A_handle, nrows, nrows, nnz,
+                                         oneapi::mkl::index_base::zero, ia, ja, a);
+
+    // Create and initialize dense vector handles
+    oneapi::mkl::sparse::dense_vector_handle_t x_handle = nullptr;
+    oneapi::mkl::sparse::dense_vector_handle_t y_handle = nullptr;
+    oneapi::mkl::sparse::init_dense_vector(cpu_selector, &x_handle, sizevec, x);
+    oneapi::mkl::sparse::init_dense_vector(cpu_selector, &y_handle, sizevec, y);
+
+    // Create operation descriptor
+    oneapi::mkl::sparse::spmv_descr_t descr = nullptr;
+    oneapi::mkl::sparse::init_spmv_descr(cpu_selector, &descr);
+
+    // Allocate external workspace
+    std::size_t workspace_size = 0;
+    oneapi::mkl::sparse::spmv_buffer_size(cpu_selector, transA, &alpha, A_view, A_handle, x_handle,
+                                          &beta, y_handle, alg, descr, workspace_size);
+    void *workspace = sycl::malloc_device(workspace_size, cpu_queue);
+
+    // Optimize spmv
+    auto ev_opt =
+        oneapi::mkl::sparse::spmv_optimize(cpu_selector, transA, &alpha, A_view, A_handle, x_handle,
+                                           &beta, y_handle, alg, descr, workspace);
+
+    // Run spmv
+    auto ev_spmv = oneapi::mkl::sparse::spmv(cpu_selector, transA, &alpha, A_view, A_handle,
+                                             x_handle, &beta, y_handle, alg, descr, { ev_opt });
+
+    // Release handles and descriptor
+    std::vector<sycl::event> release_events;
+    release_events.push_back(
+        oneapi::mkl::sparse::release_dense_vector(cpu_selector, x_handle, { ev_spmv }));
+    release_events.push_back(
+        oneapi::mkl::sparse::release_dense_vector(cpu_selector, y_handle, { ev_spmv }));
+    release_events.push_back(
+        oneapi::mkl::sparse::release_sparse_matrix(cpu_selector, A_handle, { ev_spmv }));
+    release_events.push_back(
+        oneapi::mkl::sparse::release_spmv_descr(cpu_selector, descr, { ev_spmv }));
+    for (auto event : release_events) {
+        event.wait_and_throw();
+    }
 
     //
     // Post Processing
@@ -181,7 +210,7 @@ int run_sparse_matrix_vector_multiply_example(const sycl::device &cpu_dev) {
         good &= check_result(res[row], z[row], nrows, row);
     }
 
-    std::cout << "\n\t\t sparse::gemv example " << (good ? "passed" : "failed") << "\n\tFinished"
+    std::cout << "\n\t\t sparse::spmv example " << (good ? "passed" : "failed") << "\n\tFinished"
               << std::endl;
 
     free_vec(fp_ptr_vec, cpu_queue);
@@ -211,7 +240,7 @@ void print_example_banner() {
     std::cout << "# and alpha, beta are floating point type precision scalars." << std::endl;
     std::cout << "# " << std::endl;
     std::cout << "# Using apis:" << std::endl;
-    std::cout << "#   sparse::gemv" << std::endl;
+    std::cout << "#   sparse::spmv" << std::endl;
     std::cout << "# " << std::endl;
     std::cout << "# Using single precision (float) data type" << std::endl;
     std::cout << "# " << std::endl;
@@ -232,22 +261,22 @@ int main(int /*argc*/, char ** /*argv*/) {
         // TODO: Add cuSPARSE compile-time dispatcher in this example once it is supported.
         sycl::device cpu_dev(sycl::cpu_selector_v);
 
-        std::cout << "Running Sparse BLAS GEMV USM example on CPU device." << std::endl;
+        std::cout << "Running Sparse BLAS SPMV USM example on CPU device." << std::endl;
         std::cout << "Device name is: " << cpu_dev.get_info<sycl::info::device::name>()
                   << std::endl;
         std::cout << "Running with single precision real data type:" << std::endl;
 
         run_sparse_matrix_vector_multiply_example<float, std::int32_t>(cpu_dev);
-        std::cout << "Sparse BLAS GEMV USM example ran OK." << std::endl;
+        std::cout << "Sparse BLAS SPMV USM example ran OK." << std::endl;
     }
     catch (sycl::exception const &e) {
-        std::cerr << "Caught synchronous SYCL exception during Sparse GEMV:" << std::endl;
+        std::cerr << "Caught synchronous SYCL exception during Sparse SPMV:" << std::endl;
         std::cerr << "\t" << e.what() << std::endl;
         std::cerr << "\tSYCL error code: " << e.code().value() << std::endl;
         return 1;
     }
     catch (std::exception const &e) {
-        std::cerr << "Caught std::exception during Sparse GEMV:" << std::endl;
+        std::cerr << "Caught std::exception during Sparse SPMV:" << std::endl;
         std::cerr << "\t" << e.what() << std::endl;
         return 1;
     }

diff --git a/examples/sparse_blas/run_time_dispatching/CMakeLists.txt b/examples/sparse_blas/run_time_dispatching/CMakeLists.txt
@@ -22,7 +22,7 @@
 include(WarningsUtils)
 
 # Build object from all example sources
-set(SPARSE_BLAS_RT_SOURCES "sparse_blas_gemv_usm")
+set(SPARSE_BLAS_RT_SOURCES "sparse_blas_spmv_usm")
 # Set up for the right backend for run-time dispatching examples
 # If users build more than one backend (i.e. mklcpu and mklgpu, or mklcpu and CUDA), they may need to
 # overwrite SYCL_DEVICE_FILTER in their environment to run on the desired backend

diff --git a/...time_dispatching/sparse_blas_gemv_usm.cpp → ...time_dispatching/sparse_blas_spmv_usm.cpp b/...time_dispatching/sparse_blas_gemv_usm.cpp → ...time_dispatching/sparse_blas_spmv_usm.cpp
@@ -20,7 +20,7 @@
 /*
 *
 *  Content:
-*       This example demonstrates use of DPCPP API oneapi::mkl::sparse::gemv
+*       This example demonstrates use of DPCPP API oneapi::mkl::sparse::spmv
 *       using unified shared memory to perform general sparse matrix-vector
 *       multiplication on a SYCL device (HOST, CPU, GPU) that is selected
 *       during runtime.
@@ -33,7 +33,7 @@
 *
 *
 *       This example demonstrates only single precision (float) data type for
-*       gemv matrix data
+*       spmv matrix data
 *
 *
 *******************************************************************************/
@@ -78,7 +78,7 @@ int run_sparse_matrix_vector_multiply_example(const sycl::device &dev) {
             }
             catch (sycl::exception const &e) {
                 std::cout << "Caught asynchronous SYCL "
-                             "exception during sparse::gemv:\n"
+                             "exception during sparse::spmv:\n"
                           << e.what() << std::endl;
             }
         }
@@ -128,7 +128,10 @@ int run_sparse_matrix_vector_multiply_example(const sycl::device &dev) {
     //
 
     oneapi::mkl::transpose transA = oneapi::mkl::transpose::nontrans;
-    std::cout << "\n\t\tsparse::gemv parameters:\n";
+    oneapi::mkl::sparse::spmv_alg alg = oneapi::mkl::sparse::spmv_alg::default_alg;
+    oneapi::mkl::sparse::matrix_view A_view;
+
+    std::cout << "\n\t\tsparse::spmv parameters:\n";
     std::cout << "\t\t\ttransA = "
               << (transA == oneapi::mkl::transpose::nontrans
                       ? "nontrans"
@@ -137,22 +140,49 @@ int run_sparse_matrix_vector_multiply_example(const sycl::device &dev) {
     std::cout << "\t\t\tnrows = " << nrows << std::endl;
     std::cout << "\t\t\talpha = " << alpha << ", beta = " << beta << std::endl;
 
-    // create and initialize handle for a Sparse Matrix in CSR format
-    oneapi::mkl::sparse::matrix_handle_t handle = nullptr;
-
-    oneapi::mkl::sparse::init_matrix_handle(main_queue, &handle);
-
-    auto ev_set = oneapi::mkl::sparse::set_csr_data(main_queue, handle, nrows, nrows, nnz,
-                                                    oneapi::mkl::index_base::zero, ia, ja, a);
-
-    auto ev_opt = oneapi::mkl::sparse::optimize_gemv(main_queue, transA, handle, { ev_set });
-
-    auto ev_gemv =
-        oneapi::mkl::sparse::gemv(main_queue, transA, alpha, handle, x, beta, y, { ev_opt });
-
-    auto ev_release = oneapi::mkl::sparse::release_matrix_handle(main_queue, &handle, { ev_gemv });
-
-    ev_release.wait_and_throw();
+    // Create and initialize handle for a Sparse Matrix in CSR format
+    oneapi::mkl::sparse::matrix_handle_t A_handle = nullptr;
+    oneapi::mkl::sparse::init_csr_matrix(main_queue, &A_handle, nrows, nrows, nnz,
+                                         oneapi::mkl::index_base::zero, ia, ja, a);
+
+    // Create and initialize dense vector handles
+    oneapi::mkl::sparse::dense_vector_handle_t x_handle = nullptr;
+    oneapi::mkl::sparse::dense_vector_handle_t y_handle = nullptr;
+    oneapi::mkl::sparse::init_dense_vector(main_queue, &x_handle, static_cast<intType>(sizevec), x);
+    oneapi::mkl::sparse::init_dense_vector(main_queue, &y_handle, static_cast<intType>(sizevec), y);
+
+    // Create operation descriptor
+    oneapi::mkl::sparse::spmv_descr_t descr = nullptr;
+    oneapi::mkl::sparse::init_spmv_descr(main_queue, &descr);
+
+    // Allocate external workspace
+    std::size_t workspace_size = 0;
+    oneapi::mkl::sparse::spmv_buffer_size(main_queue, transA, &alpha, A_view, A_handle, x_handle,
+                                          &beta, y_handle, alg, descr, workspace_size);
+    void *workspace = sycl::malloc_device(workspace_size, main_queue);
+
+    // Optimize spmv
+    auto ev_opt =
+        oneapi::mkl::sparse::spmv_optimize(main_queue, transA, &alpha, A_view, A_handle, x_handle,
+                                           &beta, y_handle, alg, descr, workspace);
+
+    // Run spmv
+    auto ev_spmv = oneapi::mkl::sparse::spmv(main_queue, transA, &alpha, A_view, A_handle, x_handle,
+                                             &beta, y_handle, alg, descr, { ev_opt });
+
+    // Release handles and descriptor
+    std::vector<sycl::event> release_events;
+    release_events.push_back(
+        oneapi::mkl::sparse::release_dense_vector(main_queue, x_handle, { ev_spmv }));
+    release_events.push_back(
+        oneapi::mkl::sparse::release_dense_vector(main_queue, y_handle, { ev_spmv }));
+    release_events.push_back(
+        oneapi::mkl::sparse::release_sparse_matrix(main_queue, A_handle, { ev_spmv }));
+    release_events.push_back(
+        oneapi::mkl::sparse::release_spmv_descr(main_queue, descr, { ev_spmv }));
+    for (auto event : release_events) {
+        event.wait_and_throw();
+    }
 
     //
     // Post Processing
@@ -180,7 +210,7 @@ int run_sparse_matrix_vector_multiply_example(const sycl::device &dev) {
         good &= check_result(res[row], z[row], nrows, row);
     }
 
-    std::cout << "\n\t\t sparse::gemv example " << (good ? "passed" : "failed") << "\n\tFinished"
+    std::cout << "\n\t\t sparse::spmv example " << (good ? "passed" : "failed") << "\n\tFinished"
               << std::endl;
 
     free_vec(fp_ptr_vec, main_queue);
@@ -210,7 +240,7 @@ void print_example_banner() {
     std::cout << "# and alpha, beta are floating point type precision scalars." << std::endl;
     std::cout << "# " << std::endl;
     std::cout << "# Using apis:" << std::endl;
-    std::cout << "#   sparse::gemv" << std::endl;
+    std::cout << "#   sparse::spmv" << std::endl;
     std::cout << "# " << std::endl;
     std::cout << "# Using single precision (float) data type" << std::endl;
     std::cout << "# " << std::endl;
@@ -234,28 +264,28 @@ int main(int /*argc*/, char ** /*argv*/) {
         sycl::device dev = sycl::device();
 
         if (dev.is_gpu()) {
-            std::cout << "Running Sparse BLAS GEMV USM example on GPU device." << std::endl;
+            std::cout << "Running Sparse BLAS SPMV USM example on GPU device." << std::endl;
             std::cout << "Device name is: " << dev.get_info<sycl::info::device::name>()
                       << std::endl;
         }
         else {
-            std::cout << "Running Sparse BLAS GEMV USM example on CPU device." << std::endl;
+            std::cout << "Running Sparse BLAS SPMV USM example on CPU device." << std::endl;
             std::cout << "Device name is: " << dev.get_info<sycl::info::device::name>()
                       << std::endl;
         }
         std::cout << "Running with single precision real data type:" << std::endl;
 
         run_sparse_matrix_vector_multiply_example<float, std::int32_t>(dev);
-        std::cout << "Sparse BLAS GEMV USM example ran OK." << std::endl;
+        std::cout << "Sparse BLAS SPMV USM example ran OK." << std::endl;
     }
     catch (sycl::exception const &e) {
-        std::cerr << "Caught synchronous SYCL exception during Sparse GEMV:" << std::endl;
+        std::cerr << "Caught synchronous SYCL exception during Sparse SPMV:" << std::endl;
         std::cerr << "\t" << e.what() << std::endl;
         std::cerr << "\tSYCL error code: " << e.code().value() << std::endl;
         return 1;
     }
     catch (std::exception const &e) {
-        std::cerr << "Caught std::exception during Sparse GEMV:" << std::endl;
+        std::cerr << "Caught std::exception during Sparse SPMV:" << std::endl;
         std::cerr << "\t" << e.what() << std::endl;
         return 1;
     }