From 317c4998b642dc141f7ea41440475fa8ac0cc381 Mon Sep 17 00:00:00 2001 From: Liam Adams Date: Wed, 13 Nov 2024 22:08:27 +0000 Subject: [PATCH 01/11] Extract dummyShouldNotBeCalled function and hic namespace macro. --- hic/src/CMakeLists.txt | 2 ++ .../hic/hic_dummy/dummyShouldNotBeCalled.h | 21 +++++++++++++++++++ hic/src/hic/hic_dummy/hic_dummy_runtime.h | 7 +------ hic/src/hic/hic_namespace_macro.h | 19 +++++++++++++++++ hic/src/hic/hic_runtime.h | 9 +------- 5 files changed, 44 insertions(+), 14 deletions(-) create mode 100644 hic/src/hic/hic_dummy/dummyShouldNotBeCalled.h create mode 100644 hic/src/hic/hic_namespace_macro.h diff --git a/hic/src/CMakeLists.txt b/hic/src/CMakeLists.txt index 5048de357..82f94ee4c 100644 --- a/hic/src/CMakeLists.txt +++ b/hic/src/CMakeLists.txt @@ -30,6 +30,8 @@ ecbuild_add_library( TARGET hic install( FILES ${PROJECT_BINARY_DIR}/src/hic/hic_config.h DESTINATION include/hic ) install( FILES hic/hic.h DESTINATION include/hic ) install( FILES hic/hic_runtime.h DESTINATION include/hic ) +install( FILES hic/hic_namespace_macro.h DESTINATION include/hic ) +install( FILES hic/hic_dummy/dummyShouldNotBeCalled.h DESTINATION include/hic/hic_dummy ) install( FILES hic/hic_dummy/hic_dummy_runtime.h DESTINATION include/hic/hic_dummy ) if( HAVE_CUDA ) diff --git a/hic/src/hic/hic_dummy/dummyShouldNotBeCalled.h b/hic/src/hic/hic_dummy/dummyShouldNotBeCalled.h new file mode 100644 index 000000000..53fe40ad1 --- /dev/null +++ b/hic/src/hic/hic_dummy/dummyShouldNotBeCalled.h @@ -0,0 +1,21 @@ +/* + * (C) Copyright 2024- ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ +#pragma once + +#include +#include + +namespace { + +[[noreturn]] void dummyShouldNotBeCalled(const char* symbol) { + throw std::runtime_error(std::string(symbol)+" is using the dummy backend and should not be called"); +} + +} \ No newline at end of file diff --git a/hic/src/hic/hic_dummy/hic_dummy_runtime.h b/hic/src/hic/hic_dummy/hic_dummy_runtime.h index 1560b790c..910e0378c 100644 --- a/hic/src/hic/hic_dummy/hic_dummy_runtime.h +++ b/hic/src/hic/hic_dummy/hic_dummy_runtime.h @@ -8,8 +8,7 @@ * nor does it submit to any jurisdiction. */ -#include -#include +#include "hic/hic_dummy/dummyShouldNotBeCalled.h" #define DUMMY_SHOULD_NOT_BE_CALLED(SYMBOL) dummyShouldNotBeCalled( #SYMBOL ) #define DUMMY_FUNCTION(SYMBOL) \ @@ -23,10 +22,6 @@ namespace { -[[noreturn]] void dummyShouldNotBeCalled(const char* symbol) { - throw std::runtime_error(std::string(symbol)+" is using the dummy backend and should not be called"); -} - using dummyError_t = int; using dummyEvent_t = void*; using dummyStream_t = void*; diff --git a/hic/src/hic/hic_namespace_macro.h b/hic/src/hic/hic_namespace_macro.h new file mode 100644 index 000000000..af602b3a6 --- /dev/null +++ b/hic/src/hic/hic_namespace_macro.h @@ -0,0 +1,19 @@ +/* + * (C) Copyright 2024- ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ +#pragma once + +#if !defined(HIC_NAMESPACE) + #define HIC_NAMESPACE + #define HIC_NAMESPACE_BEGIN + #define HIC_NAMESPACE_END +#else + #define HIC_NAMESPACE_BEGIN namespace HIC_NAMESPACE { + #define HIC_NAMESPACE_END } +#endif \ No newline at end of file diff --git a/hic/src/hic/hic_runtime.h b/hic/src/hic/hic_runtime.h index 25af23cb0..1e4553193 100644 --- a/hic/src/hic/hic_runtime.h +++ b/hic/src/hic/hic_runtime.h @@ -9,14 +9,7 @@ */ #pragma once -#if !defined(HIC_NAMESPACE) - #define HIC_NAMESPACE - #define HIC_NAMESPACE_BEGIN - #define HIC_NAMESPACE_END -#else - #define HIC_NAMESPACE_BEGIN namespace HIC_NAMESPACE { - #define HIC_NAMESPACE_END } -#endif +#include "hic/hic_namespace_macro.h" #if HIC_BACKEND_CUDA #define HIC_BACKEND cuda From ccccf70aa204eb1809e162c9203d4deaed139016 Mon Sep 17 00:00:00 2001 From: Liam Adams Date: Wed, 13 Nov 2024 22:09:33 +0000 Subject: [PATCH 02/11] Find and link to hipsparse and cusparse. --- hic/CMakeLists.txt | 1 + hic/src/CMakeLists.txt | 2 ++ 2 files changed, 3 insertions(+) diff --git a/hic/CMakeLists.txt b/hic/CMakeLists.txt index 904f7a077..a2f54ebb9 100644 --- a/hic/CMakeLists.txt +++ b/hic/CMakeLists.txt @@ -35,6 +35,7 @@ if( HAVE_CUDA ) find_package(CUDAToolkit REQUIRED) elseif( HAVE_HIP ) find_package(hip CONFIG REQUIRED) + find_package(hipsparse CONFIG REQUIRED) endif() add_subdirectory( src ) diff --git a/hic/src/CMakeLists.txt b/hic/src/CMakeLists.txt index 82f94ee4c..41a25c882 100644 --- a/hic/src/CMakeLists.txt +++ b/hic/src/CMakeLists.txt @@ -36,6 +36,8 @@ install( FILES hic/hic_dummy/hic_dummy_runtime.h DESTINATION include/hi if( HAVE_CUDA ) target_link_libraries( hic INTERFACE CUDA::cudart ) + target_link_libraries( hic INTERFACE CUDA::cusparse ) elseif( HAVE_HIP ) target_link_libraries( hic INTERFACE hip::host ) + target_link_libraries( hic INTERFACE roc::hipsparse ) endif() From 47c246ae2fc7d902f28a5f26443698b4b2d26d1b Mon Sep 17 00:00:00 2001 From: Liam Adams Date: Wed, 13 Nov 2024 22:10:31 +0000 Subject: [PATCH 03/11] Add hicsparse wrapper to parts of hipsparse and cusparse. --- hic/src/CMakeLists.txt | 3 + hic/src/hic/hic_dummy/hicsparse_dummy.h | 78 ++++++ hic/src/hic/hic_library_types.h | 43 ++++ hic/src/hic/hicsparse.h | 135 ++++++++++ hic/tests/CMakeLists.txt | 5 + hic/tests/test_hicsparse.cc | 327 ++++++++++++++++++++++++ 6 files changed, 591 insertions(+) create mode 100644 hic/src/hic/hic_dummy/hicsparse_dummy.h create mode 100644 hic/src/hic/hic_library_types.h create mode 100644 hic/src/hic/hicsparse.h create mode 100644 hic/tests/test_hicsparse.cc diff --git a/hic/src/CMakeLists.txt b/hic/src/CMakeLists.txt index 41a25c882..42803534b 100644 --- a/hic/src/CMakeLists.txt +++ b/hic/src/CMakeLists.txt @@ -31,8 +31,11 @@ install( FILES ${PROJECT_BINARY_DIR}/src/hic/hic_config.h DESTINATION include/hi install( FILES hic/hic.h DESTINATION include/hic ) install( FILES hic/hic_runtime.h DESTINATION include/hic ) install( FILES hic/hic_namespace_macro.h DESTINATION include/hic ) +install( FILES hic/hic_library_types.h DESTINATION include/hic ) +install( FILES hic/hicsparse.h DESTINATION include/hic ) install( FILES hic/hic_dummy/dummyShouldNotBeCalled.h DESTINATION include/hic/hic_dummy ) install( FILES hic/hic_dummy/hic_dummy_runtime.h DESTINATION include/hic/hic_dummy ) +install( FILES hic/hic_dummy/hicsparse_dummy.h DESTINATION include/hic/hic_dummy ) if( HAVE_CUDA ) target_link_libraries( hic INTERFACE CUDA::cudart ) diff --git a/hic/src/hic/hic_dummy/hicsparse_dummy.h b/hic/src/hic/hic_dummy/hicsparse_dummy.h new file mode 100644 index 000000000..c9256d888 --- /dev/null +++ b/hic/src/hic/hic_dummy/hicsparse_dummy.h @@ -0,0 +1,78 @@ +/* + * (C) Copyright 2024- ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ + +#include "hic/hic_dummy/dummyShouldNotBeCalled.h" + +#define DUMMY_SHOULD_NOT_BE_CALLED(SYMBOL) dummyShouldNotBeCalled( #SYMBOL ) +#define DUMMY_FUNCTION(SYMBOL) \ + template inline \ + dummysparseStatus_t dummy##SYMBOL(Args&&... args) { \ + DUMMY_SHOULD_NOT_BE_CALLED( hic##SYMBOL ); \ + return dummysparseStatus_t{0}; \ + } +#define DUMMY_VALUE(SYMBOL) \ + constexpr int dummy##SYMBOL = 0; + +namespace { + +using dummysparseHandle_t = int; +using dummysparseStatus_t = int; +using dummysparseIndexType_t = int; +using dummysparseOrder_t = int; +using dummysparseConstDnVecDescr_t = void*; +using dummysparseDnVecDescr_t = void*; +using dummysparseConstDnMatDescr_t = void*; +using dummysparseDnMatDescr_t = void*; +using dummysparseConstSpVecDescr_t = void*; +using dummysparseSpVecDescr_t = void*; +using dummysparseConstSpMatDescr_t = void*; +using dummysparseSpMatDescr_t = void*; +using dummysparseSpMVAlg_t = int; +using dummysparseSpMMAlg_t = int; + +DUMMY_FUNCTION(sparseGetErrorString) +DUMMY_FUNCTION(sparseCreate) +DUMMY_FUNCTION(sparseDestroy) +DUMMY_FUNCTION(sparseCreateConstDnVec) +DUMMY_FUNCTION(sparseCreateDnVec) +DUMMY_FUNCTION(sparseDestroyDnVec) +DUMMY_FUNCTION(sparseCreateConstDnMat) +DUMMY_FUNCTION(sparseCreateDnMat) +DUMMY_FUNCTION(sparseDestroyDnMat) +DUMMY_FUNCTION(sparseCreateConstSpVec) +DUMMY_FUNCTION(sparseCreateSpVec) +DUMMY_FUNCTION(sparseDestroySpVec) +DUMMY_FUNCTION(sparseCreateConstCsr) +DUMMY_FUNCTION(sparseDestroySpMat) +DUMMY_FUNCTION(sparseSpMV_bufferSize) +DUMMY_FUNCTION(sparseSpMV_preprocess) +DUMMY_FUNCTION(sparseSpMV) +DUMMY_FUNCTION(sparseSpMM_bufferSize) +DUMMY_FUNCTION(sparseSpMM_preprocess) +DUMMY_FUNCTION(sparseSpMM) + +DUMMY_VALUE(SPARSE_STATUS_SUCCESS) +DUMMY_VALUE(SPARSE_ORDER_COL) +DUMMY_VALUE(SPARSE_ORDER_ROW) +DUMMY_VALUE(SPARSE_INDEX_32I) +DUMMY_VALUE(SPARSE_INDEX_64I) +DUMMY_VALUE(SPARSE_INDEX_BASE_ZERO) +DUMMY_VALUE(SPARSE_INDEX_BASE_ONE) +DUMMY_VALUE(SPARSE_SPMV_ALG_DEFAULT) +DUMMY_VALUE(SPARSE_SPMM_ALG_DEFAULT) +DUMMY_VALUE(SPARSE_OPERATION_NON_TRANSPOSE) +DUMMY_VALUE(SPARSE_OPERATION_TRANSPOSE) +DUMMY_VALUE(SPARSE_OPERATION_CONJUGATE_TRANSPOSE) + +} + +#undef DUMMY_FUNCTION +#undef DUMMY_VALUE +#undef DUMMY_SHOULD_NOT_BE_CALLED \ No newline at end of file diff --git a/hic/src/hic/hic_library_types.h b/hic/src/hic/hic_library_types.h new file mode 100644 index 000000000..c9c612b5a --- /dev/null +++ b/hic/src/hic/hic_library_types.h @@ -0,0 +1,43 @@ +/* + * (C) Copyright 2024- ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ +#pragma once + +#include "hic/hic_namespace_macro.h" + +#if HIC_BACKEND_CUDA + #include +#elif HIC_BACKEND_HIP + #include +#elif HIC_BACKEND_DUMMY + // intentionally blank +#else + #error Unsupported hic backend. Please define HIC_BACKEND_CUDA or HIC_BACKEND_HIP or HIC_BACKEND_DUMMY +#endif + +//------------------------------------------------ +HIC_NAMESPACE_BEGIN +//------------------------------------------------ + +#if HIC_BACKEND_CUDA + constexpr decltype(CUDA_R_32F) HIC_R_32F = CUDA_R_32F; + constexpr decltype(CUDA_R_64F) HIC_R_64F = CUDA_R_64F; +#elif HIC_BACKEND_HIP + constexpr decltype(HIP_R_32F) HIC_R_32F = HIP_R_32F; + constexpr decltype(HIP_R_64F) HIC_R_64F = HIP_R_64F; +#elif HIC_BACKEND_DUMMY + constexpr int HIC_R_32F = 0; + constexpr int HIC_R_64F = 0; +#else + #error Unsupported hic backend. Please define HIC_BACKEND_CUDA or HIC_BACKEND_HIP or HIC_BACKEND_DUMMY +#endif + +//------------------------------------------------ +HIC_NAMESPACE_END +//------------------------------------------------ \ No newline at end of file diff --git a/hic/src/hic/hicsparse.h b/hic/src/hic/hicsparse.h new file mode 100644 index 000000000..cde17727d --- /dev/null +++ b/hic/src/hic/hicsparse.h @@ -0,0 +1,135 @@ +/* + * (C) Copyright 2024- ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ +#pragma once + +#include "hic/hic_namespace_macro.h" +#include "hic/hic_library_types.h" + +#if HIC_BACKEND_CUDA + #define HICSPARSE_BACKEND_PREFIX cu + #define HICSPARSE_BACKEND_PREFIX_CAPS CU + #include +#elif HIC_BACKEND_HIP + #define HICSPARSE_BACKEND_PREFIX hip + #define HICSPARSE_BACKEND_PREFIX_CAPS HIP + #include +#elif HIC_BACKEND_DUMMY + #define HICSPARSE_BACKEND_PREFIX dummy + #define HICSPARSE_BACKEND_PREFIX_CAPS dummy + #include "hic/hic_dummy/hicsparse_dummy.h" +#else + #error Unsupported hic backend. Please define HIC_BACKEND_CUDA or HIC_BACKEND_HIP or HIC_BACKEND_DUMMY +#endif + +#define HIC_PREFIX hic +#define HIC_PREFIX_CAPS HIC +#define HIC_CONCAT_(A, B) A ## B +#define HIC_CONCAT(A, B) HIC_CONCAT_(A, B) +#define HIC_SYMBOL(API) HIC_CONCAT(HIC_PREFIX, API) +#define HIC_SYMBOL_CAPS(API) HIC_CONCAT(HIC_PREFIX_CAPS, API) + +#define HIC_TYPE(TYPE) \ + using HIC_SYMBOL(TYPE) = HIC_CONCAT(HICSPARSE_BACKEND_PREFIX, TYPE); + +#define HIC_FUNCTION(FUNCTION) \ + template inline \ + auto HIC_SYMBOL(FUNCTION)(Args&&... args) -> decltype(HIC_CONCAT(HICSPARSE_BACKEND_PREFIX, FUNCTION)(std::forward(args)...)) { \ + return HIC_CONCAT(HICSPARSE_BACKEND_PREFIX, FUNCTION)(std::forward(args)...); \ + } + +#define HIC_VALUE(VALUE) \ + constexpr decltype(HIC_CONCAT(HICSPARSE_BACKEND_PREFIX_CAPS, VALUE)) HIC_SYMBOL_CAPS(VALUE) = HIC_CONCAT(HICSPARSE_BACKEND_PREFIX_CAPS, VALUE); + +//------------------------------------------------ +HIC_NAMESPACE_BEGIN +//------------------------------------------------ + +HIC_TYPE(sparseHandle_t) +HIC_TYPE(sparseStatus_t) +HIC_TYPE(sparseIndexType_t) +HIC_TYPE(sparseOrder_t) +HIC_TYPE(sparseConstDnVecDescr_t) +HIC_TYPE(sparseDnVecDescr_t) +HIC_TYPE(sparseConstDnMatDescr_t) +HIC_TYPE(sparseDnMatDescr_t) +HIC_TYPE(sparseConstSpVecDescr_t) +HIC_TYPE(sparseSpVecDescr_t) +HIC_TYPE(sparseConstSpMatDescr_t) +HIC_TYPE(sparseSpMatDescr_t) +HIC_TYPE(sparseSpMVAlg_t) +HIC_TYPE(sparseSpMMAlg_t) + +HIC_FUNCTION(sparseGetErrorString) +HIC_FUNCTION(sparseCreate) +HIC_FUNCTION(sparseDestroy) +HIC_FUNCTION(sparseCreateConstDnVec) +HIC_FUNCTION(sparseCreateDnVec) +HIC_FUNCTION(sparseDestroyDnVec) +HIC_FUNCTION(sparseCreateConstDnMat) +HIC_FUNCTION(sparseCreateDnMat) +HIC_FUNCTION(sparseDestroyDnMat) +HIC_FUNCTION(sparseCreateConstSpVec) +HIC_FUNCTION(sparseCreateSpVec) +HIC_FUNCTION(sparseDestroySpVec) +HIC_FUNCTION(sparseCreateConstCsr) +HIC_FUNCTION(sparseDestroySpMat) +HIC_FUNCTION(sparseSpMV_bufferSize) +HIC_FUNCTION(sparseSpMV_preprocess) +HIC_FUNCTION(sparseSpMV) +HIC_FUNCTION(sparseSpMM_bufferSize) +HIC_FUNCTION(sparseSpMM_preprocess) +HIC_FUNCTION(sparseSpMM) + +HIC_VALUE(SPARSE_STATUS_SUCCESS) +HIC_VALUE(SPARSE_ORDER_COL) +HIC_VALUE(SPARSE_ORDER_ROW) +HIC_VALUE(SPARSE_INDEX_32I) +HIC_VALUE(SPARSE_INDEX_64I) +HIC_VALUE(SPARSE_INDEX_BASE_ZERO) +HIC_VALUE(SPARSE_INDEX_BASE_ONE) +HIC_VALUE(SPARSE_SPMV_ALG_DEFAULT) +HIC_VALUE(SPARSE_SPMM_ALG_DEFAULT) +HIC_VALUE(SPARSE_OPERATION_NON_TRANSPOSE) +HIC_VALUE(SPARSE_OPERATION_TRANSPOSE) +HIC_VALUE(SPARSE_OPERATION_CONJUGATE_TRANSPOSE) + +#if HIC_BACKEND_DUMMY +#define HICSPARSE_CALL(val) +#else +#define HICSPARSE_CALL(val) hicsparse_assert((val), #val, __FILE__, __LINE__) +#endif + +inline void hicsparse_assert(hicsparseStatus_t status, const char* const func, const char* const file, const int line) { + // Ignore errors when HIP/CUDA runtime is unloaded or deinitialized. + // This happens when calling HIP/CUDA after main has ended, e.g. in teardown of static variables calling `hicFree` + // --> ignore hicErrorDeinitialized (a.k.a. cudaErrorCudartUnloading / hipErrorDeinitialized) + if (status != HICSPARSE_STATUS_SUCCESS) { + std::ostringstream msg; + msg << "HIC Runtime Error [code="< +#include +#include +#include +#include + +#include "hic/hic.h" +#include "hic/hicsparse.h" + +// ----------------------------------------------------------------------------- + +int test_hicsparseCreate() { + std::cout << "--- " << __FUNCTION__ << std::endl; + hicsparseHandle_t handle; + HICSPARSE_CALL( hicsparseCreate(&handle) ); + HICSPARSE_CALL( hicsparseDestroy(handle) ); + std::cout << "--- " << __FUNCTION__ << " SUCCEEDED " << std::endl; + return 0; // success +} + +// ----------------------------------------------------------------------------- + +int test_hicsparseSpMV() { + std::cout << "--- " << __FUNCTION__ << std::endl; + + // Create a sparse matrix + const int rows = 3; + const int cols = 3; + const int nnz = 3; + double values[nnz] = {1.0, 2.0, 3.0}; + int row_offsets[rows+1] = {0, 1, 2, 3}; + int column_indices[nnz] = {0, 1, 2}; + + // Put the sparse matrix onto the device + double* dvalues; + int* drow_offsets; + int* dcolumn_indices; + HIC_CALL( hicMalloc((void**)&dvalues, nnz * sizeof(double)) ); + HIC_CALL( hicMalloc((void**)&drow_offsets, (rows+1) * sizeof(int)) ); + HIC_CALL( hicMalloc((void**)&dcolumn_indices, nnz * sizeof(int)) ); + HIC_CALL( hicMemcpy(dvalues, values, nnz * sizeof(double), hicMemcpyHostToDevice) ); + HIC_CALL( hicMemcpy(drow_offsets, row_offsets, (rows+1) * sizeof(int), hicMemcpyHostToDevice) ); + HIC_CALL( hicMemcpy(dcolumn_indices, column_indices, nnz * sizeof(int), hicMemcpyHostToDevice) ); + + // Create a dense vector + const int N = 3; + double x[N] = {1.0, 2.0, 3.0}; + double y[N] = {0.0, 0.0, 0.0}; + + // Put the dense vector onto the device + double* dx; + double* dy; + HIC_CALL( hicMalloc((void**)&dx, N * sizeof(double)) ); + HIC_CALL( hicMalloc((void**)&dy, N * sizeof(double)) ); + HIC_CALL( hicMemcpy(dx, x, N * sizeof(double), hicMemcpyHostToDevice) ); + HIC_CALL( hicMemcpy(dy, y, N * sizeof(double), hicMemcpyHostToDevice) ); + + // Create sparse library handle + hicsparseHandle_t handle; + HICSPARSE_CALL( hicsparseCreate(&handle) ); + + // Create a sparse matrix descriptor + hicsparseConstSpMatDescr_t matA; + HICSPARSE_CALL( hicsparseCreateConstCsr( + &matA, + rows, cols, nnz, + drow_offsets, + dcolumn_indices, + dvalues, + HICSPARSE_INDEX_32I, + HICSPARSE_INDEX_32I, + HICSPARSE_INDEX_BASE_ZERO, + HIC_R_64F) ); + + // Create dense matrix descriptors + hicsparseConstDnVecDescr_t vecX; + HICSPARSE_CALL( hicsparseCreateConstDnVec( + &vecX, + N, + dx, + HIC_R_64F) ); + + hicsparseDnVecDescr_t vecY; + HICSPARSE_CALL( hicsparseCreateDnVec( + &vecY, + N, + dy, + HIC_R_64F) ); + + // Set parameters + const double alpha = 1.0; + const double beta = 0.0; + + // Determine buffer size + size_t bufferSize = 0; + HICSPARSE_CALL( hicsparseSpMV_bufferSize( + handle, + HICSPARSE_OPERATION_NON_TRANSPOSE, + &alpha, + matA, + vecX, + &beta, + vecY, + HIC_R_64F, + HICSPARSE_SPMV_ALG_DEFAULT, + &bufferSize) ); + + // Allocate buffer + char* buffer; + HIC_CALL( hicMalloc(&buffer, bufferSize) ); + + // Perform SpMV + HICSPARSE_CALL( hicsparseSpMV( + handle, + HICSPARSE_OPERATION_NON_TRANSPOSE, + &alpha, + matA, + vecX, + &beta, + vecY, + HIC_R_64F, + HICSPARSE_SPMV_ALG_DEFAULT, + buffer) ); + + // Copy result back to host + HIC_CALL( hicMemcpy(y, dy, N * sizeof(double), hicMemcpyDeviceToHost) ); + + // Check result + const double expected_y[N] = {1.0, 4.0, 9.0}; + for( int i = 0; i < N; ++i ) { + if( y[i] != expected_y[i] ) { + throw std::runtime_error("Error: y[" + std::to_string(i) + "] = " + std::to_string(y[i]) + " != " + std::to_string(expected_y[i])); + } + } + + // Clean up + HIC_CALL( hicFree(dy) ); + HIC_CALL( hicFree(dx) ); + HIC_CALL( hicFree(dcolumn_indices) ); + HIC_CALL( hicFree(drow_offsets) ); + HIC_CALL( hicFree(dvalues) ); + HICSPARSE_CALL( hicsparseDestroyDnVec(vecY) ); + HICSPARSE_CALL( hicsparseDestroyDnVec(vecX) ); + HICSPARSE_CALL( hicsparseDestroySpMat(matA) ); + HICSPARSE_CALL( hicsparseDestroy(handle) ); + + std::cout << "--- " << __FUNCTION__ << " SUCCEEDED " << std::endl; + return 0; // success +} + +// ----------------------------------------------------------------------------- + +int test_hicsparseSpMM() { + std::cout << "--- " << __FUNCTION__ << std::endl; + + // Create a sparse matrix + const int rows = 3; + const int cols = 3; + const int nnz = 3; + double values[nnz] = {1.0, 2.0, 3.0}; + int row_offsets[rows + 1] = {0, 1, 2, 3}; + int column_indices[nnz] = {0, 1, 2}; + + // Put the sparse matrix onto the device + double* dvalues; + int* drow_offsets; + int* dcolumn_indices; + HIC_CALL(hicMalloc((void**)&dvalues, nnz * sizeof(double))); + HIC_CALL(hicMalloc((void**)&drow_offsets, (rows + 1) * sizeof(int))); + HIC_CALL(hicMalloc((void**)&dcolumn_indices, nnz * sizeof(int))); + HIC_CALL(hicMemcpy(dvalues, values, nnz * sizeof(double), hicMemcpyHostToDevice)); + HIC_CALL(hicMemcpy(drow_offsets, row_offsets, (rows + 1) * sizeof(int), hicMemcpyHostToDevice)); + HIC_CALL(hicMemcpy(dcolumn_indices, column_indices, nnz * sizeof(int), hicMemcpyHostToDevice)); + + // Create dense matrices + const int B_rows = 3; + const int B_cols = 3; + double B[B_rows * B_cols] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0}; + double C[rows * B_cols] = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}; + + // Put the dense matrices onto the device + double* dB; + double* dC; + HIC_CALL(hicMalloc((void**)&dB, B_rows * B_cols * sizeof(double))); + HIC_CALL(hicMalloc((void**)&dC, rows * B_cols * sizeof(double))); + HIC_CALL(hicMemcpy(dB, B, B_rows * B_cols * sizeof(double), hicMemcpyHostToDevice)); + HIC_CALL(hicMemcpy(dC, C, rows * B_cols * sizeof(double), hicMemcpyHostToDevice)); + + // Create sparse library handle + hicsparseHandle_t handle; + HICSPARSE_CALL(hicsparseCreate(&handle)); + + // Create a sparse matrix descriptor + hicsparseConstSpMatDescr_t matA; + HICSPARSE_CALL(hicsparseCreateConstCsr( + &matA, + rows, cols, nnz, + drow_offsets, + dcolumn_indices, + dvalues, + HICSPARSE_INDEX_32I, + HICSPARSE_INDEX_32I, + HICSPARSE_INDEX_BASE_ZERO, + HIC_R_64F)); + + // Create dense matrix descriptors + hicsparseConstDnMatDescr_t matB; + HICSPARSE_CALL(hicsparseCreateConstDnMat( + &matB, + B_rows, + B_cols, + B_cols, + dB, + HIC_R_64F, + HICSPARSE_ORDER_ROW)); + + hicsparseDnMatDescr_t matC; + HICSPARSE_CALL(hicsparseCreateDnMat( + &matC, + rows, + B_cols, + B_cols, + dC, + HIC_R_64F, + HICSPARSE_ORDER_ROW)); + + // Set parameters + const double alpha = 1.0; + const double beta = 0.0; + + // Determine buffer size + size_t bufferSize = 0; + HICSPARSE_CALL(hicsparseSpMM_bufferSize( + handle, + HICSPARSE_OPERATION_NON_TRANSPOSE, + HICSPARSE_OPERATION_NON_TRANSPOSE, + &alpha, + matA, + matB, + &beta, + matC, + HIC_R_64F, + HICSPARSE_SPMM_ALG_DEFAULT, + &bufferSize)); + + // Allocate buffer + char* buffer; + HIC_CALL(hicMalloc(&buffer, bufferSize)); + + // Perform SpMM + HICSPARSE_CALL(hicsparseSpMM( + handle, + HICSPARSE_OPERATION_NON_TRANSPOSE, + HICSPARSE_OPERATION_NON_TRANSPOSE, + &alpha, + matA, + matB, + &beta, + matC, + HIC_R_64F, + HICSPARSE_SPMM_ALG_DEFAULT, + buffer)); + + // Copy result back to host + HIC_CALL(hicMemcpy(C, dC, rows * B_cols * sizeof(double), hicMemcpyDeviceToHost)); + + // Check result + const double expected_C[rows * B_cols] = {1.0, 2.0, 3.0, 8.0, 10.0, 12.0, 21.0, 24.0, 27.0}; + for (int i = 0; i < rows * B_cols; ++i) { + if (C[i] != expected_C[i]) { + throw std::runtime_error("Error: C[" + std::to_string(i) + "] = " + std::to_string(C[i]) + " != " + std::to_string(expected_C[i])); + } + } + + // Clean up + HIC_CALL(hicFree(dC)); + HIC_CALL(hicFree(dB)); + HIC_CALL(hicFree(dcolumn_indices)); + HIC_CALL(hicFree(drow_offsets)); + HIC_CALL(hicFree(dvalues)); + HICSPARSE_CALL(hicsparseDestroyDnMat(matC)); + HICSPARSE_CALL(hicsparseDestroyDnMat(matB)); + HICSPARSE_CALL(hicsparseDestroySpMat(matA)); + HICSPARSE_CALL(hicsparseDestroy(handle)); + + std::cout << "--- " << __FUNCTION__ << " SUCCEEDED " << std::endl; + return 0; // success +} + +// ----------------------------------------------------------------------------- + +std::vector> tests = { + test_hicsparseCreate, + test_hicsparseSpMV, + test_hicsparseSpMM, +}; + +int main(int argc, char* argv[]) { + int num_devices = 0; + hicGetDeviceCount(&num_devices); + if( num_devices == 0 ) { + std::ignore = hicGetLastError(); + std::cout << "TEST IGNORED, hicGetDeviceCount -> 0" << std::endl; + return 0; + } + std::cout << "hicGetDeviceCount -> " << num_devices << std::endl; + int error = 0; + for( auto& test: tests) { + try { + error += test(); + } + catch( std::exception& e ) { + error += 1; + std::cout << e.what() << std::endl; + } + } + return error; +} From 17aa0e4f1d3dd58a2d97325b519908b4ac9854ae Mon Sep 17 00:00:00 2001 From: Liam Adams Date: Thu, 14 Nov 2024 17:38:02 +0000 Subject: [PATCH 04/11] Add SparseMatrix class into Atlas with host-device memory management. --- src/atlas/CMakeLists.txt | 2 + src/atlas/linalg/SparseMatrix.cc | 86 +++++++++++++++ src/atlas/linalg/SparseMatrix.h | 174 +++++++++++++++++++++++++++++++ 3 files changed, 262 insertions(+) create mode 100644 src/atlas/linalg/SparseMatrix.cc create mode 100644 src/atlas/linalg/SparseMatrix.h diff --git a/src/atlas/CMakeLists.txt b/src/atlas/CMakeLists.txt index 60b93b9a1..2028dbd10 100644 --- a/src/atlas/CMakeLists.txt +++ b/src/atlas/CMakeLists.txt @@ -703,6 +703,8 @@ linalg/Indexing.h linalg/Introspection.h linalg/View.h linalg/sparse.h +linalg/SparseMatrix.h +linalg/SparseMatrix.cc linalg/sparse/Backend.h linalg/sparse/Backend.cc linalg/sparse/SparseMatrixMultiply.h diff --git a/src/atlas/linalg/SparseMatrix.cc b/src/atlas/linalg/SparseMatrix.cc new file mode 100644 index 000000000..643ca464c --- /dev/null +++ b/src/atlas/linalg/SparseMatrix.cc @@ -0,0 +1,86 @@ + +#include "atlas/linalg/SparseMatrix.h" + +#include +#include +#include + +#include "atlas/array/helpers/ArrayCopier.h" + +#include "eckit/exception/Exceptions.h" + +namespace atlas { +namespace linalg { + +//---------------------------------------------------------------------------------------------------------------------- + +SparseMatrix::SparseMatrix() : host_matrix_() {} + + +SparseMatrix::SparseMatrix(Size nrows, Size ncols, const std::vector& triplets) : + host_matrix_(nrows, ncols, triplets) { + resetDeviceStorage(); +} + + +SparseMatrix::SparseMatrix(const SparseMatrix& other) : host_matrix_(other.host_matrix_) { + if (!other.empty()) { // in case we copy an other that was constructed empty + resetDeviceStorage(); + } +} + + +SparseMatrix& SparseMatrix::operator=(const SparseMatrix& other) { + SparseMatrix copy(other); + swap(copy); + return *this; +} + + +void SparseMatrix::swap(SparseMatrix& other) { + host_matrix_.swap(other.host_matrix_); + outer_.swap(other.outer_); + inner_.swap(other.inner_); + data_.swap(other.data_); +} + + +size_t SparseMatrix::footprint() const { + return host_matrix_.footprint() + + outer_->footprint() + + inner_->footprint() + + data_->footprint(); +} + + +SparseMatrix& SparseMatrix::prune(Scalar val) { + host_matrix_.prune(val); + resetDeviceStorage(); + setDeviceNeedsUpdate(true); + return *this; +} + + +SparseMatrix& SparseMatrix::transpose() { + host_matrix_.transpose(); + resetDeviceStorage(); + setDeviceNeedsUpdate(true); + return *this; +} + +SparseMatrix& SparseMatrix::setIdentity(Size nrows, Size ncols) { + host_matrix_.setIdentity(nrows, ncols); + resetDeviceStorage(); + setDeviceNeedsUpdate(true); + return *this; +} + +void SparseMatrix::resetDeviceStorage() { + outer_.reset(atlas::array::Array::wrap(const_cast(outer()), atlas::array::make_shape(rows() + 1))); + inner_.reset(atlas::array::Array::wrap(const_cast(inner()), atlas::array::make_shape(nonZeros()))); + data_.reset(atlas::array::Array::wrap(const_cast(data()), atlas::array::make_shape(nonZeros()))); +} + +//---------------------------------------------------------------------------------------------------------------------- +} // namespace linalg +} // namespace atlas \ No newline at end of file diff --git a/src/atlas/linalg/SparseMatrix.h b/src/atlas/linalg/SparseMatrix.h new file mode 100644 index 000000000..4f12efca9 --- /dev/null +++ b/src/atlas/linalg/SparseMatrix.h @@ -0,0 +1,174 @@ +#pragma once + +#include +#include + +#include "atlas/array.h" + +#include "eckit/linalg/SparseMatrix.h" +#include "eckit/linalg/Triplet.h" +#include "eckit/linalg/types.h" + +namespace atlas { +namespace linalg { + +//---------------------------------------------------------------------------------------------------------------------- + +/// Sparse matrix in CRS (compressed row storage) format +class SparseMatrix { +public: + using Scalar = eckit::linalg::Scalar; + using Index = eckit::linalg::Index; + using Size = eckit::linalg::Size; + using iterator = eckit::linalg::SparseMatrix::iterator; + using const_iterator = eckit::linalg::SparseMatrix::const_iterator; + +public: + // -- Constructors + + /// Default constructor, empty matrix + SparseMatrix(); + + /// Constructor from triplets + SparseMatrix(Size nrows, Size ncols, const std::vector&); + + /// Copy constructor + SparseMatrix(const SparseMatrix&); + + /// Assignment operator (allocates and copies data) + SparseMatrix& operator=(const SparseMatrix&); + +public: + void swap(SparseMatrix&); + + /// @returns number of rows + Size rows() const { return host_matrix_.rows(); } + + /// @returns number of columns + Size cols() const { return host_matrix_.cols(); } + + /// @returns number of non-zeros + Size nonZeros() const { return host_matrix_.nonZeros(); } + + /// @returns true if this matrix does not contain non-zero entries + bool empty() const { return host_matrix_.empty(); } + + /// @returns footprint of the matrix in memory + size_t footprint() const; + + /// Prune entries, in-place, with exactly the given value + SparseMatrix& prune(Scalar = 0); + + /// Transpose matrix in-place + SparseMatrix& transpose(); + + SparseMatrix& setIdentity(Size nrows, Size ncols); + +public: + void updateDevice() const { + outer_->updateDevice(); + inner_->updateDevice(); + data_->updateDevice(); + } + + void updateHost() const { + outer_->updateHost(); + inner_->updateHost(); + data_->updateHost(); + } + + bool hostNeedsUpdate() const { + return outer_->hostNeedsUpdate() || + inner_->hostNeedsUpdate() || + data_->hostNeedsUpdate(); + } + + bool deviceNeedsUpdate() const { + return outer_->deviceNeedsUpdate() || + inner_->deviceNeedsUpdate() || + data_->deviceNeedsUpdate(); + } + + void setHostNeedsUpdate(bool v) const { + outer_->setHostNeedsUpdate(v); + inner_->setHostNeedsUpdate(v); + data_->setHostNeedsUpdate(v); + } + + void setDeviceNeedsUpdate(bool v) const { + outer_->setDeviceNeedsUpdate(v); + inner_->setDeviceNeedsUpdate(v); + data_->setDeviceNeedsUpdate(v); + } + + bool deviceAllocated() const { + return outer_->deviceAllocated() && + inner_->deviceAllocated() && + data_->deviceAllocated(); + } + + void allocateDevice() { + outer_->allocateDevice(); + inner_->allocateDevice(); + data_->allocateDevice(); + } + + void deallocateDevice() { + outer_->deallocateDevice(); + inner_->deallocateDevice(); + data_->deallocateDevice(); + } + + const eckit::linalg::SparseMatrix& host_matrix() const { return host_matrix_; } + + // eckit::linalg::SparseMatrix& host_matrix() { return host_matrix_; } + + const Scalar* data() const { return host_matrix_.data(); } + + const Index* outer() const { return host_matrix_.outer(); } + + const Index* inner() const { return host_matrix_.inner(); } + + const Scalar* host_data() const { return host_matrix_.data(); } + + const Index* host_outer() const { return host_matrix_.outer(); } + + const Index* host_inner() const { return host_matrix_.inner(); } + + const Scalar* device_data() const { return data_->device_data(); } + + const Index* device_outer() const { return outer_->device_data(); } + + const Index* device_inner() const { return inner_->device_data(); } + +public: // iterators + + /// const iterators to begin/end of row + const_iterator begin(Size row) const { return host_matrix_.begin(row); } + const_iterator end(Size row) const { return host_matrix_.end(row); } + + /// const iterators to begin/end of matrix + const_iterator begin() const { return host_matrix_.begin(); } + const_iterator end() const { return host_matrix_.end(); } + + /// iterators to begin/end of row + iterator begin(Size row) { return host_matrix_.begin(row); } + iterator end(Size row) { return host_matrix_.end(row); } + + /// const iterators to begin/end of matrix + iterator begin() { return host_matrix_.begin(); } + iterator end() { return host_matrix_.end(); } + +private: + void resetDeviceStorage(); + +private: + eckit::linalg::SparseMatrix host_matrix_; + std::unique_ptr outer_; + std::unique_ptr inner_; + std::unique_ptr data_; +}; + +//---------------------------------------------------------------------------------------------------------------------- +} // namespace linalg +} // namespace atlas \ No newline at end of file From 082faaa52b22a8e8ce46e5ccc04d10a2cadb3fc8 Mon Sep 17 00:00:00 2001 From: Liam Adams Date: Thu, 14 Nov 2024 22:10:37 +0000 Subject: [PATCH 05/11] Use atlas::linalg::SparseMatrix class instead of eckit::linalg::SparseMatrix. --- src/atlas/interpolation/Cache.h | 5 +- src/atlas/interpolation/method/Method.h | 4 +- .../interpolation/method/binning/Binning.cc | 4 +- .../method/knn/GridBoxMaximum.cc | 2 +- ...nservativeSphericalPolygonInterpolation.cc | 4 +- src/atlas/interpolation/nonlinear/Missing.cc | 6 +- src/atlas/interpolation/nonlinear/NonLinear.h | 8 +- .../linalg/sparse/SparseMatrixMultiply.h | 4 +- .../SparseMatrixMultiply_EckitLinalg.cc | 4 +- ...est_interpolation_finite_element_cached.cc | 4 +- src/tests/linalg/test_linalg_sparse.cc | 113 ++++++++++++++++++ 11 files changed, 135 insertions(+), 23 deletions(-) diff --git a/src/atlas/interpolation/Cache.h b/src/atlas/interpolation/Cache.h index 2e70f827c..ad193049a 100644 --- a/src/atlas/interpolation/Cache.h +++ b/src/atlas/interpolation/Cache.h @@ -17,8 +17,7 @@ #include "eckit/filesystem/PathName.h" #include "eckit/io/Buffer.h" -#include "eckit/linalg/SparseMatrix.h" - +#include "atlas/linalg/SparseMatrix.h" #include "atlas/runtime/Exception.h" #include "atlas/util/KDTree.h" @@ -82,7 +81,7 @@ class Cache { class MatrixCacheEntry : public InterpolationCacheEntry { public: - using Matrix = eckit::linalg::SparseMatrix; + using Matrix = atlas::linalg::SparseMatrix; ~MatrixCacheEntry() override; MatrixCacheEntry(const Matrix* matrix, const std::string& uid = ""): matrix_{matrix}, uid_(uid) { ATLAS_ASSERT(matrix_ != nullptr); diff --git a/src/atlas/interpolation/method/Method.h b/src/atlas/interpolation/method/Method.h index 7e52d5bf3..5b37ca25c 100644 --- a/src/atlas/interpolation/method/Method.h +++ b/src/atlas/interpolation/method/Method.h @@ -16,10 +16,10 @@ #include "atlas/interpolation/Cache.h" #include "atlas/interpolation/NonLinear.h" +#include "atlas/linalg/SparseMatrix.h" #include "atlas/util/Metadata.h" #include "atlas/util/Object.h" #include "eckit/config/Configuration.h" -#include "eckit/linalg/SparseMatrix.h" namespace atlas { class Field; @@ -87,7 +87,7 @@ class Method : public util::Object { using Triplet = eckit::linalg::Triplet; using Triplets = std::vector; - using Matrix = eckit::linalg::SparseMatrix; + using Matrix = atlas::linalg::SparseMatrix; static void normalise(Triplets& triplets); diff --git a/src/atlas/interpolation/method/binning/Binning.cc b/src/atlas/interpolation/method/binning/Binning.cc index f5a6b0743..902072f4f 100644 --- a/src/atlas/interpolation/method/binning/Binning.cc +++ b/src/atlas/interpolation/method/binning/Binning.cc @@ -12,12 +12,12 @@ #include "atlas/interpolation/Interpolation.h" #include "atlas/interpolation/method/binning/Binning.h" #include "atlas/interpolation/method/MethodFactory.h" +#include "atlas/linalg/SparseMatrix.h" #include "atlas/mesh.h" #include "atlas/mesh/actions/GetCubedSphereNodalArea.h" #include "atlas/runtime/Trace.h" #include "eckit/config/LocalConfiguration.h" -#include "eckit/linalg/SparseMatrix.h" #include "eckit/linalg/Triplet.h" #include "eckit/mpi/Comm.h" @@ -58,7 +58,7 @@ void Binning::do_setup(const FunctionSpace& source, using Index = eckit::linalg::Index; using Triplet = eckit::linalg::Triplet; - using SMatrix = eckit::linalg::SparseMatrix; + using SMatrix = atlas::linalg::SparseMatrix; source_ = source; target_ = target; diff --git a/src/atlas/interpolation/method/knn/GridBoxMaximum.cc b/src/atlas/interpolation/method/knn/GridBoxMaximum.cc index 8ca86672a..fb47ffb97 100644 --- a/src/atlas/interpolation/method/knn/GridBoxMaximum.cc +++ b/src/atlas/interpolation/method/knn/GridBoxMaximum.cc @@ -61,7 +61,7 @@ void GridBoxMaximum::do_execute(const Field& source, Field& target, Metadata&) c if (!matrixFree_) { const Matrix& m = matrix(); - Matrix::const_iterator k(m); + Matrix::const_iterator k(m.host_matrix()); for (decltype(m.rows()) i = 0, j = 0; i < m.rows(); ++i) { double max = std::numeric_limits::lowest(); diff --git a/src/atlas/interpolation/method/unstructured/ConservativeSphericalPolygonInterpolation.cc b/src/atlas/interpolation/method/unstructured/ConservativeSphericalPolygonInterpolation.cc index d1ea5addf..334b65422 100644 --- a/src/atlas/interpolation/method/unstructured/ConservativeSphericalPolygonInterpolation.cc +++ b/src/atlas/interpolation/method/unstructured/ConservativeSphericalPolygonInterpolation.cc @@ -1248,7 +1248,7 @@ void ConservativeSphericalPolygonInterpolation::intersect_polygons(const CSPolyg } } -eckit::linalg::SparseMatrix ConservativeSphericalPolygonInterpolation::compute_1st_order_matrix() { +atlas::linalg::SparseMatrix ConservativeSphericalPolygonInterpolation::compute_1st_order_matrix() { ATLAS_TRACE("ConservativeMethod::setup: build cons-1 interpolant matrix"); ATLAS_ASSERT(not matrix_free_); Triplets triplets; @@ -1358,7 +1358,7 @@ eckit::linalg::SparseMatrix ConservativeSphericalPolygonInterpolation::compute_1 return Matrix(n_tpoints_, n_spoints_, triplets); } -eckit::linalg::SparseMatrix ConservativeSphericalPolygonInterpolation::compute_2nd_order_matrix() { +atlas::linalg::SparseMatrix ConservativeSphericalPolygonInterpolation::compute_2nd_order_matrix() { ATLAS_TRACE("ConservativeMethod::setup: build cons-2 interpolant matrix"); ATLAS_ASSERT(not matrix_free_); const auto& src_points_ = data_->src_points_; diff --git a/src/atlas/interpolation/nonlinear/Missing.cc b/src/atlas/interpolation/nonlinear/Missing.cc index 1258f9fdc..f222397f5 100644 --- a/src/atlas/interpolation/nonlinear/Missing.cc +++ b/src/atlas/interpolation/nonlinear/Missing.cc @@ -81,7 +81,7 @@ bool MissingIfAllMissing::executeT(NonLinear::Matrix& W, const Field& field) con bool zeros = false; Size i = 0; - Matrix::iterator it(W); + Matrix::iterator it = W.begin(); for (Size r = 0; r < W.rows(); ++r) { const Matrix::iterator end = W.end(r); @@ -161,7 +161,7 @@ bool MissingIfAnyMissing::executeT(NonLinear::Matrix& W, const Field& field) con bool zeros = false; Size i = 0; - Matrix::iterator it(W); + Matrix::iterator it = W.begin(); for (Size r = 0; r < W.rows(); ++r) { const Matrix::iterator end = W.end(r); @@ -228,7 +228,7 @@ bool MissingIfHeaviestMissing::executeT(NonLinear::Matrix& W, const Field& field bool zeros = false; Size i = 0; - Matrix::iterator it(W); + Matrix::iterator it = W.begin(); for (Size r = 0; r < W.rows(); ++r) { const Matrix::iterator end = W.end(r); diff --git a/src/atlas/interpolation/nonlinear/NonLinear.h b/src/atlas/interpolation/nonlinear/NonLinear.h index dce24441e..7ce865434 100644 --- a/src/atlas/interpolation/nonlinear/NonLinear.h +++ b/src/atlas/interpolation/nonlinear/NonLinear.h @@ -16,10 +16,10 @@ #include #include "eckit/config/Parametrisation.h" -#include "eckit/linalg/SparseMatrix.h" #include "atlas/array.h" #include "atlas/field/Field.h" +#include "atlas/linalg/SparseMatrix.h" #include "atlas/runtime/Exception.h" #include "atlas/util/Factory.h" #include "atlas/util/ObjectHandle.h" @@ -37,9 +37,9 @@ namespace nonlinear { class NonLinear : public util::Object { public: using Config = eckit::Parametrisation; - using Matrix = eckit::linalg::SparseMatrix; - using Scalar = eckit::linalg::Scalar; - using Size = eckit::linalg::Size; + using Matrix = atlas::linalg::SparseMatrix; + using Scalar = atlas::linalg::SparseMatrix::Scalar; + using Size = atlas::linalg::SparseMatrix::Size; /** * @brief ctor diff --git a/src/atlas/linalg/sparse/SparseMatrixMultiply.h b/src/atlas/linalg/sparse/SparseMatrixMultiply.h index d1776975b..10a8ad76c 100644 --- a/src/atlas/linalg/sparse/SparseMatrixMultiply.h +++ b/src/atlas/linalg/sparse/SparseMatrixMultiply.h @@ -11,9 +11,9 @@ #pragma once #include "eckit/config/Configuration.h" -#include "eckit/linalg/SparseMatrix.h" #include "atlas/linalg/Indexing.h" +#include "atlas/linalg/SparseMatrix.h" #include "atlas/linalg/View.h" #include "atlas/linalg/sparse/Backend.h" #include "atlas/runtime/Exception.h" @@ -22,7 +22,7 @@ namespace atlas { namespace linalg { -using SparseMatrix = eckit::linalg::SparseMatrix; +using SparseMatrix = atlas::linalg::SparseMatrix; using Configuration = eckit::Configuration; template diff --git a/src/atlas/linalg/sparse/SparseMatrixMultiply_EckitLinalg.cc b/src/atlas/linalg/sparse/SparseMatrixMultiply_EckitLinalg.cc index 72d55c1a9..b9bb1f9b4 100644 --- a/src/atlas/linalg/sparse/SparseMatrixMultiply_EckitLinalg.cc +++ b/src/atlas/linalg/sparse/SparseMatrixMultiply_EckitLinalg.cc @@ -70,7 +70,7 @@ void SparseMatrixMultiply::apply( @@ -81,7 +81,7 @@ void SparseMatrixMultiply= W.rows()); eckit::linalg::Matrix m_src(src.data(), src.shape(1), src.shape(0)); eckit::linalg::Matrix m_tgt(tgt.data(), tgt.shape(1), tgt.shape(0)); - eckit_linalg_backend(config).spmm(W, m_src, m_tgt); + eckit_linalg_backend(config).spmm(W.host_matrix(), m_src, m_tgt); } void SparseMatrixMultiply::apply( diff --git a/src/tests/interpolation/test_interpolation_finite_element_cached.cc b/src/tests/interpolation/test_interpolation_finite_element_cached.cc index 99d9803c6..971ffd90d 100644 --- a/src/tests/interpolation/test_interpolation_finite_element_cached.cc +++ b/src/tests/interpolation/test_interpolation_finite_element_cached.cc @@ -106,7 +106,7 @@ CASE("extract cache, copy it, and move it for use") { set_field(field_source, grid_source, func); - eckit::linalg::SparseMatrix matrix = get_or_create_cache(grid_source, grid_target).matrix(); + atlas::linalg::SparseMatrix matrix = get_or_create_cache(grid_source, grid_target).matrix(); EXPECT(not matrix.empty()); @@ -133,7 +133,7 @@ CASE("extract cache, copy it, and pass non-owning pointer") { set_field(field_source, grid_source, func); - eckit::linalg::SparseMatrix matrix = get_or_create_cache(grid_source, grid_target).matrix(); + atlas::linalg::SparseMatrix matrix = get_or_create_cache(grid_source, grid_target).matrix(); EXPECT(not matrix.empty()); diff --git a/src/tests/linalg/test_linalg_sparse.cc b/src/tests/linalg/test_linalg_sparse.cc index ee1b10302..aff540c57 100644 --- a/src/tests/linalg/test_linalg_sparse.cc +++ b/src/tests/linalg/test_linalg_sparse.cc @@ -104,6 +104,23 @@ struct ArrayVector { array::ArrayView view_; }; +bool operator==(const SparseMatrix& A, const SparseMatrix& B) { + if (A.rows() != B.rows() || A.cols() != B.cols() || A.nonZeros() != B.nonZeros()) { + return false; + } + const auto A_data_view = eckit::testing::make_view(A.data(), A.nonZeros()); + const auto A_outer_view = eckit::testing::make_view(A.outer(), A.rows()+1); + const auto A_inner_view = eckit::testing::make_view(A.inner(), A.nonZeros()); + const auto B_data_view = eckit::testing::make_view(B.data(), B.nonZeros()); + const auto B_outer_view = eckit::testing::make_view(B.outer(), B.rows()+1); + const auto B_inner_view = eckit::testing::make_view(B.inner(), B.nonZeros()); + if (A_data_view != B_data_view || + A_outer_view != B_outer_view || + A_inner_view != B_inner_view) { + return false; + } + return true; +} //---------------------------------------------------------------------------------------------------------------------- @@ -199,6 +216,102 @@ CASE("test backend functionalities") { //---------------------------------------------------------------------------------------------------------------------- +CASE("SparseMatrix default constructor") { + SparseMatrix A; + EXPECT_EQ(A.rows(), 0); + EXPECT_EQ(A.cols(), 0); + EXPECT_EQ(A.nonZeros(), 0); +} + +CASE("SparseMatrix copy constructor") { + SparseMatrix A{3, 3, {{0, 0, 2.}, {0, 2, -3.}, {1, 1, 2.}, {2, 2, 2.}}}; + SparseMatrix B{A}; + EXPECT(A == B); +} + +CASE("SparseMatrix assignment constructor") { + SparseMatrix A{3, 3, {{0, 0, 2.}, {0, 2, -3.}, {1, 1, 2.}, {2, 2, 2.}}}; + auto B = A; + EXPECT(A == B); +} + +CASE("SparseMatrix assignment") { + SparseMatrix A{3, 3, {{0, 0, 2.}, {0, 2, -3.}, {1, 1, 2.}, {2, 2, 2.}}}; + SparseMatrix B; + B = A; + EXPECT(A == B); +} + +CASE("SparseMatrix triplet constructor") { + SparseMatrix A{3, 3, {{0, 0, 2.}, {0, 2, -3.}, {1, 1, 2.}, {2, 2, 2.}}}; + const auto A_data_view = eckit::testing::make_view(A.data(), A.nonZeros()); + const auto A_outer_view = eckit::testing::make_view(A.outer(), A.rows()+1); + const auto A_inner_view = eckit::testing::make_view(A.inner(), A.nonZeros()); + + EXPECT_EQ(A.rows(), 3); + EXPECT_EQ(A.cols(), 3); + EXPECT_EQ(A.nonZeros(), 4); + + const std::vector test_data{2., -3., 2., 2.}; + const std::vector test_outer{0, 2, 3, 4}; + const std::vector test_inner{0, 2, 1, 2}; + const auto test_data_view = eckit::testing::make_view(test_data.data(), test_data.size()); + const auto test_outer_view = eckit::testing::make_view(test_outer.data(), test_outer.size()); + const auto test_inner_view = eckit::testing::make_view(test_inner.data(), test_inner.size()); + + EXPECT(A_data_view == test_data_view); + EXPECT(A_outer_view == test_outer_view); + EXPECT(A_inner_view == test_inner_view); +} + +CASE("SparseMatrix triplet constructor 2") { + SparseMatrix A{3, 3, {{0, 0, 2.}, {0, 2, 0.}, {1, 1, 2.}, {2, 2, 2.}}}; + const auto A_data_view = eckit::testing::make_view(A.data(), A.nonZeros()); + const auto A_outer_view = eckit::testing::make_view(A.outer(), A.rows()+1); + const auto A_inner_view = eckit::testing::make_view(A.inner(), A.nonZeros()); + + EXPECT_EQ(A.rows(), 3); + EXPECT_EQ(A.cols(), 3); + EXPECT_EQ(A.nonZeros(), 3); + + const std::vector test_data{2., 2., 2.}; + const std::vector test_outer{0, 1, 2, 3}; + const std::vector test_inner{0, 1, 2}; + const auto test_data_view = eckit::testing::make_view(test_data.data(), test_data.size()); + const auto test_outer_view = eckit::testing::make_view(test_outer.data(), test_outer.size()); + const auto test_inner_view = eckit::testing::make_view(test_inner.data(), test_inner.size()); + + EXPECT(A_data_view == test_data_view); + EXPECT(A_outer_view == test_outer_view); + EXPECT(A_inner_view == test_inner_view); +} + +CASE("SparseMatrix swap") { + SparseMatrix A_test{3, 3, {{0, 0, 2.}, {0, 2, 0.}, {1, 1, 2.}, {2, 2, 2.}}}; + SparseMatrix A{A_test}; + SparseMatrix B_test{1, 1, {{0, 0, 7.}}}; + SparseMatrix B{B_test}; + A.swap(B); + EXPECT(A == B_test); + EXPECT(B == A_test); +} + +CASE("SparseMatrix transpose") { + SparseMatrix A{3, 3, {{0, 0, 2.}, {0, 2, -3.}, {1, 1, 2.}, {2, 2, 2.}}}; + SparseMatrix AT{3, 3, {{0, 0, 2.}, {1, 1, 2.}, {2, 0, -3.}, {2, 2, 2.}}}; + A.transpose(); + EXPECT(A == AT); +} + +CASE("SparseMatrix prune") { + SparseMatrix A{3, 3, {{0, 0, 2.}, {0, 2, 0}, {1, 1, 2.}, {2, 2, 2.}}}; + SparseMatrix A_pruned{3, 3, {{0, 0, 2.}, {1, 1, 2.}, {2, 2, 2.}}}; + A.prune(); + EXPECT(A == A_pruned); +} + +//---------------------------------------------------------------------------------------------------------------------- + CASE("sparse_matrix vector multiply (spmv)") { // "square" matrix // A = 2 . -3 From a4a423e5315299a79a17a78cb5b77ef60257e6e8 Mon Sep 17 00:00:00 2001 From: Liam Adams Date: Thu, 14 Nov 2024 23:17:37 +0000 Subject: [PATCH 06/11] Add hicSparse backend to sparse matrix multiply. --- src/atlas/CMakeLists.txt | 2 + src/atlas/linalg/sparse/Backend.h | 5 + .../linalg/sparse/SparseMatrixMultiply.h | 1 + .../linalg/sparse/SparseMatrixMultiply.tcc | 3 + .../sparse/SparseMatrixMultiply_HicSparse.cc | 294 ++++++++++++++++++ .../sparse/SparseMatrixMultiply_HicSparse.h | 45 +++ src/tests/linalg/CMakeLists.txt | 8 + src/tests/linalg/test_linalg_sparse.cc | 10 +- src/tests/linalg/test_linalg_sparse_gpu.cc | 242 ++++++++++++++ 9 files changed, 609 insertions(+), 1 deletion(-) create mode 100644 src/atlas/linalg/sparse/SparseMatrixMultiply_HicSparse.cc create mode 100644 src/atlas/linalg/sparse/SparseMatrixMultiply_HicSparse.h create mode 100644 src/tests/linalg/test_linalg_sparse_gpu.cc diff --git a/src/atlas/CMakeLists.txt b/src/atlas/CMakeLists.txt index 2028dbd10..c28715e0a 100644 --- a/src/atlas/CMakeLists.txt +++ b/src/atlas/CMakeLists.txt @@ -713,6 +713,8 @@ linalg/sparse/SparseMatrixMultiply_EckitLinalg.h linalg/sparse/SparseMatrixMultiply_EckitLinalg.cc linalg/sparse/SparseMatrixMultiply_OpenMP.h linalg/sparse/SparseMatrixMultiply_OpenMP.cc +linalg/sparse/SparseMatrixMultiply_HicSparse.h +linalg/sparse/SparseMatrixMultiply_HicSparse.cc linalg/dense.h linalg/dense/Backend.h linalg/dense/Backend.cc diff --git a/src/atlas/linalg/sparse/Backend.h b/src/atlas/linalg/sparse/Backend.h index fea22178f..698b327a8 100644 --- a/src/atlas/linalg/sparse/Backend.h +++ b/src/atlas/linalg/sparse/Backend.h @@ -43,6 +43,11 @@ struct eckit_linalg : Backend { static std::string type() { return "eckit_linalg"; } eckit_linalg(): Backend(type()) {} }; + +struct hicsparse : Backend { + static std::string type() { return "hicsparse"; } + hicsparse(): Backend(type()) {} +}; } // namespace backend diff --git a/src/atlas/linalg/sparse/SparseMatrixMultiply.h b/src/atlas/linalg/sparse/SparseMatrixMultiply.h index 10a8ad76c..e756bf3fa 100644 --- a/src/atlas/linalg/sparse/SparseMatrixMultiply.h +++ b/src/atlas/linalg/sparse/SparseMatrixMultiply.h @@ -78,3 +78,4 @@ struct SparseMatrixMultiply { #include "SparseMatrixMultiply.tcc" #include "SparseMatrixMultiply_EckitLinalg.h" #include "SparseMatrixMultiply_OpenMP.h" +#include "SparseMatrixMultiply_HicSparse.h" diff --git a/src/atlas/linalg/sparse/SparseMatrixMultiply.tcc b/src/atlas/linalg/sparse/SparseMatrixMultiply.tcc index b4e1ed9cd..108393189 100644 --- a/src/atlas/linalg/sparse/SparseMatrixMultiply.tcc +++ b/src/atlas/linalg/sparse/SparseMatrixMultiply.tcc @@ -87,6 +87,9 @@ void sparse_matrix_multiply( const Matrix& matrix, const SourceView& src, Target #endif sparse::dispatch_sparse_matrix_multiply( matrix, src, tgt, indexing, util::Config("backend",type) ); } + else if ( type == sparse::backend::hicsparse::type() ) { + sparse::dispatch_sparse_matrix_multiply( matrix, src, tgt, indexing, config ); + } else { throw_NotImplemented( "sparse_matrix_multiply cannot be performed with unsupported backend [" + type + "]", Here() ); diff --git a/src/atlas/linalg/sparse/SparseMatrixMultiply_HicSparse.cc b/src/atlas/linalg/sparse/SparseMatrixMultiply_HicSparse.cc new file mode 100644 index 000000000..62fb7d1c2 --- /dev/null +++ b/src/atlas/linalg/sparse/SparseMatrixMultiply_HicSparse.cc @@ -0,0 +1,294 @@ +/* + * (C) Copyright 2024 ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ + +#include "atlas/linalg/sparse/SparseMatrixMultiply_HicSparse.h" + +#include "atlas/parallel/omp/omp.h" +#include "atlas/runtime/Exception.h" + +#include "hic/hic.h" +#include "hic/hic_library_types.h" +#include "hic/hicsparse.h" + +namespace { + +template +constexpr hicsparseIndexType_t getHicsparseIndexType() { + using base_type = std::remove_const_t; + if constexpr (std::is_same_v) { + return HICSPARSE_INDEX_32I; + } else { + static_assert(std::is_same_v, "Unsupported index type"); + return HICSPARSE_INDEX_64I; + } +} + +template +constexpr auto getHicsparseValueType() { + using base_type = std::remove_const_t; + if constexpr (std::is_same_v) { + return HIC_R_32F; + } else { + static_assert(std::is_same_v, "Unsupported value type");\ + return HIC_R_64F; + } +} + +template +hicsparseOrder_t getHicsparseOrder(const atlas::linalg::View& v) { + constexpr int row_idx = (IndexLayout == atlas::linalg::Indexing::layout_left) ? 0 : 1; + constexpr int col_idx = (IndexLayout == atlas::linalg::Indexing::layout_left) ? 1 : 0; + + if (v.stride(row_idx) == 1) { + return HICSPARSE_ORDER_COL; + } else if (v.stride(col_idx) == 1) { + return HICSPARSE_ORDER_ROW; + } else { + atlas::throw_Exception("Unsupported dense matrix memory order", Here()); + return HICSPARSE_ORDER_COL; + } +} + +template +int64_t getLeadingDimension(const atlas::linalg::View& v) { + if (v.stride(0) == 1) { + return v.stride(1); + } else if (v.stride(1) == 1) { + return v.stride(0); + } else { + atlas::throw_Exception("Unsupported dense matrix memory order", Here()); + return 0; + } +} + +} + +namespace atlas { +namespace linalg { +namespace sparse { + +template +void hsSpMV(const SparseMatrix& W, const View& src, TargetValue beta, View& tgt) { + // Assume that src and tgt are device views + + ATLAS_ASSERT(src.shape(0) >= W.cols()); + ATLAS_ASSERT(tgt.shape(0) >= W.rows()); + + // Check if W is on the device and if not, copy it to the device + if (W.deviceNeedsUpdate()) { + W.updateDevice(); + } + + // Create sparse library handle + // todo: use singleton class for storing hicSparse library handle. + hicsparseHandle_t handle; + HICSPARSE_CALL(hicsparseCreate(&handle)); + + // Create a sparse matrix descriptor + hicsparseConstSpMatDescr_t matA; + HICSPARSE_CALL(hicsparseCreateConstCsr( + &matA, + W.rows(), W.cols(), W.nonZeros(), + W.device_outer(), // row_offsets + W.device_inner(), // column_indices + W.device_data(), // values + getHicsparseIndexType(), + getHicsparseIndexType(), + HICSPARSE_INDEX_BASE_ZERO, + getHicsparseValueType())); + + // Create dense matrix descriptors + hicsparseConstDnVecDescr_t vecX; + HICSPARSE_CALL(hicsparseCreateConstDnVec( + &vecX, + static_cast(W.cols()), + src.data(), + getHicsparseValueType::value_type>())); + + hicsparseDnVecDescr_t vecY; + HICSPARSE_CALL(hicsparseCreateDnVec( + &vecY, + W.rows(), + tgt.data(), + getHicsparseValueType::value_type>())); + + using ComputeType = typename View::value_type; + constexpr auto compute_type = getHicsparseValueType(); + + ComputeType alpha = 1; + + // Determine buffer size + size_t bufferSize = 0; + HICSPARSE_CALL(hicsparseSpMV_bufferSize( + handle, + HICSPARSE_OPERATION_NON_TRANSPOSE, + &alpha, + matA, + vecX, + &beta, + vecY, + compute_type, + HICSPARSE_SPMV_ALG_DEFAULT, + &bufferSize)); + + // Allocate buffer + char* buffer; + HIC_CALL(hicMalloc(&buffer, bufferSize)); + + // Perform SpMV + HICSPARSE_CALL(hicsparseSpMV( + handle, + HICSPARSE_OPERATION_NON_TRANSPOSE, + &alpha, + matA, + vecX, + &beta, + vecY, + compute_type, + HICSPARSE_SPMV_ALG_DEFAULT, + buffer)); + + HIC_CALL(hicFree(buffer)); + HICSPARSE_CALL(hicsparseDestroyDnVec(vecX)); + HICSPARSE_CALL(hicsparseDestroyDnVec(vecY)); + HICSPARSE_CALL(hicsparseDestroySpMat(matA)); + HICSPARSE_CALL(hicsparseDestroy(handle)); + + HIC_CALL(hicDeviceSynchronize()); +} + + +template +void hsSpMM(const SparseMatrix& W, const View& src, TargetValue beta, View& tgt) { + // Assume that src and tgt are device views + + constexpr int row_idx = (IndexLayout == Indexing::layout_left) ? 0 : 1; + constexpr int col_idx = (IndexLayout == Indexing::layout_left) ? 1 : 0; + + ATLAS_ASSERT(src.shape(row_idx) >= W.cols()); + ATLAS_ASSERT(tgt.shape(row_idx) >= W.rows()); + ATLAS_ASSERT(src.shape(col_idx) == tgt.shape(col_idx)); + + // Check if W is on the device and if not, copy it to the device + if (W.deviceNeedsUpdate()) { + W.updateDevice(); + } + + // Create sparse library handle + // todo: use singleton class for storing hicSparse library handle. + hicsparseHandle_t handle; + HICSPARSE_CALL(hicsparseCreate(&handle)); + + // Create a sparse matrix descriptor + hicsparseConstSpMatDescr_t matA; + HICSPARSE_CALL(hicsparseCreateConstCsr( + &matA, + W.rows(), W.cols(), W.nonZeros(), + W.device_outer(), // row_offsets + W.device_inner(), // column_indices + W.device_data(), // values + getHicsparseIndexType(), + getHicsparseIndexType(), + HICSPARSE_INDEX_BASE_ZERO, + getHicsparseValueType())); + + // Create dense matrix descriptors + hicsparseConstDnMatDescr_t matB; + HICSPARSE_CALL(hicsparseCreateConstDnMat( + &matB, + W.cols(), src.shape(col_idx), + getLeadingDimension(src), + src.data(), + getHicsparseValueType::value_type>(), + getHicsparseOrder(src))); + + hicsparseDnMatDescr_t matC; + HICSPARSE_CALL(hicsparseCreateDnMat( + &matC, + W.rows(), tgt.shape(col_idx), + getLeadingDimension(tgt), + tgt.data(), + getHicsparseValueType::value_type>(), + getHicsparseOrder(tgt))); + + using ComputeType = typename View::value_type; + constexpr auto compute_type = getHicsparseValueType(); + + ComputeType alpha = 1; + + // Determine buffer size + size_t bufferSize = 0; + HICSPARSE_CALL(hicsparseSpMM_bufferSize( + handle, + HICSPARSE_OPERATION_NON_TRANSPOSE, + HICSPARSE_OPERATION_NON_TRANSPOSE, + &alpha, + matA, + matB, + &beta, + matC, + compute_type, + HICSPARSE_SPMM_ALG_DEFAULT, + &bufferSize)); + + // Allocate buffer + char* buffer; + HIC_CALL(hicMalloc(&buffer, bufferSize)); + + // Perform SpMM + HICSPARSE_CALL(hicsparseSpMM( + handle, + HICSPARSE_OPERATION_NON_TRANSPOSE, + HICSPARSE_OPERATION_NON_TRANSPOSE, + &alpha, + matA, + matB, + &beta, + matC, + compute_type, + HICSPARSE_SPMM_ALG_DEFAULT, + buffer)); + + HIC_CALL(hicFree(buffer)); + HICSPARSE_CALL(hicsparseDestroyDnMat(matC)); + HICSPARSE_CALL(hicsparseDestroyDnMat(matB)); + HICSPARSE_CALL(hicsparseDestroySpMat(matA)); + HICSPARSE_CALL(hicsparseDestroy(handle)); + + HIC_CALL(hicDeviceSynchronize()); +} + +void SparseMatrixMultiply::apply( + const SparseMatrix& W, const View& src, View& tgt, const Configuration&) { + double beta = 0; + hsSpMV(W, src, beta, tgt); +} + +void SparseMatrixMultiply::apply( + const SparseMatrix& W, const View& src, View& tgt, const Configuration&) { + double beta = 0; + hsSpMM(W, src, beta, tgt); +} + +void SparseMatrixMultiply::apply( + const SparseMatrix& W, const View& src, View& tgt, const Configuration&) { + double beta = 0; + hsSpMV(W, src, beta, tgt); +} + +void SparseMatrixMultiply::apply( + const SparseMatrix& W, const View& src, View& tgt, const Configuration&) { + double beta = 0; + hsSpMM(W, src, beta, tgt); +} + +} // namespace sparse +} // namespace linalg +} // namespace atlas \ No newline at end of file diff --git a/src/atlas/linalg/sparse/SparseMatrixMultiply_HicSparse.h b/src/atlas/linalg/sparse/SparseMatrixMultiply_HicSparse.h new file mode 100644 index 000000000..91f6d4929 --- /dev/null +++ b/src/atlas/linalg/sparse/SparseMatrixMultiply_HicSparse.h @@ -0,0 +1,45 @@ +/* + * (C) Copyright 2024 ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ + +#pragma once + +#include "atlas/linalg/sparse/SparseMatrixMultiply.h" + +namespace atlas { +namespace linalg { +namespace sparse { + +template <> +struct SparseMatrixMultiply { + static void apply(const SparseMatrix& W, const View& src, View& tgt, + const Configuration&); +}; + +template <> +struct SparseMatrixMultiply { + static void apply(const SparseMatrix& W, const View& src, View& tgt, + const Configuration&); +}; + +template <> +struct SparseMatrixMultiply { + static void apply(const SparseMatrix& W, const View& src, View& tgt, + const Configuration&); +}; + +template <> +struct SparseMatrixMultiply { + static void apply(const SparseMatrix& W, const View& src, View& tgt, + const Configuration&); +}; + +} // namespace sparse +} // namespace linalg +} // namespace atlas \ No newline at end of file diff --git a/src/tests/linalg/CMakeLists.txt b/src/tests/linalg/CMakeLists.txt index d060dcc8a..e98fa0811 100644 --- a/src/tests/linalg/CMakeLists.txt +++ b/src/tests/linalg/CMakeLists.txt @@ -14,6 +14,14 @@ ecbuild_add_test( TARGET atlas_test_linalg_sparse ENVIRONMENT ${ATLAS_TEST_ENVIRONMENT} ) +ecbuild_add_test( TARGET atlas_test_linalg_sparse_gpu + SOURCES test_linalg_sparse_gpu.cc + LIBS atlas + ENVIRONMENT ${ATLAS_TEST_ENVIRONMENT} + CONDITION atlas_HAVE_CUDA OR atlas_HAVE_HIP +) + + ecbuild_add_test( TARGET atlas_test_linalg_dense SOURCES test_linalg_dense.cc LIBS atlas diff --git a/src/tests/linalg/test_linalg_sparse.cc b/src/tests/linalg/test_linalg_sparse.cc index aff540c57..9897bfb13 100644 --- a/src/tests/linalg/test_linalg_sparse.cc +++ b/src/tests/linalg/test_linalg_sparse.cc @@ -30,6 +30,7 @@ namespace test { // strings to be used in the tests static std::string eckit_linalg = sparse::backend::eckit_linalg::type(); static std::string openmp = sparse::backend::openmp::type(); +static std::string hicsparse = sparse::backend::hicsparse::type(); //---------------------------------------------------------------------------------------------------------------------- @@ -193,6 +194,7 @@ CASE("test backend functionalities") { sparse::current_backend(eckit_linalg); EXPECT_EQ(sparse::current_backend().type(), "eckit_linalg"); EXPECT_EQ(sparse::current_backend().getString("backend", "undefined"), "undefined"); + sparse::current_backend().set("backend", "default"); EXPECT_EQ(sparse::current_backend().getString("backend"), "default"); @@ -200,15 +202,21 @@ CASE("test backend functionalities") { EXPECT_EQ(sparse::current_backend().getString("backend", "undefined"), "undefined"); EXPECT_EQ(sparse::default_backend(eckit_linalg).getString("backend"), "default"); + sparse::current_backend(hicsparse); + EXPECT_EQ(sparse::current_backend().type(), "hicsparse"); + EXPECT_EQ(sparse::current_backend().getString("backend", "undefined"), "undefined"); + sparse::default_backend(eckit_linalg).set("backend", "generic"); EXPECT_EQ(sparse::default_backend(eckit_linalg).getString("backend"), "generic"); const sparse::Backend backend_default = sparse::Backend(); const sparse::Backend backend_openmp = sparse::backend::openmp(); const sparse::Backend backend_eckit_linalg = sparse::backend::eckit_linalg(); - EXPECT_EQ(backend_default.type(), openmp); + const sparse::Backend backend_hicsparse = sparse::backend::hicsparse(); + EXPECT_EQ(backend_default.type(), hicsparse); EXPECT_EQ(backend_openmp.type(), openmp); EXPECT_EQ(backend_eckit_linalg.type(), eckit_linalg); + EXPECT_EQ(backend_hicsparse.type(), hicsparse); EXPECT_EQ(std::string(backend_openmp), openmp); EXPECT_EQ(std::string(backend_eckit_linalg), eckit_linalg); diff --git a/src/tests/linalg/test_linalg_sparse_gpu.cc b/src/tests/linalg/test_linalg_sparse_gpu.cc new file mode 100644 index 000000000..63b172569 --- /dev/null +++ b/src/tests/linalg/test_linalg_sparse_gpu.cc @@ -0,0 +1,242 @@ +/* + * (C) Copyright 2024- ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ + +#include +#include + +#include "eckit/linalg/Matrix.h" +#include "eckit/linalg/Vector.h" + +#include "atlas/array.h" +#include "atlas/linalg/sparse.h" + +#include "tests/AtlasTestEnvironment.h" + + +using namespace atlas::linalg; + +namespace atlas { +namespace test { + +//---------------------------------------------------------------------------------------------------------------------- + +// strings to be used in the tests +static std::string hicsparse = sparse::backend::hicsparse::type(); + +//---------------------------------------------------------------------------------------------------------------------- + +// Only reason to define these derived classes is for nicer constructors and convenience in the tests + +class Vector : public eckit::linalg::Vector { +public: + using Scalar = eckit::linalg::Scalar; + using eckit::linalg::Vector::Vector; + Vector(const std::initializer_list& v): eckit::linalg::Vector::Vector(v.size()) { + size_t i = 0; + for (auto& s : v) { + operator[](i++) = s; + } + } +}; + +class Matrix : public eckit::linalg::Matrix { +public: + using Scalar = eckit::linalg::Scalar; + using eckit::linalg::Matrix::Matrix; + + Matrix(const std::initializer_list>& m): + eckit::linalg::Matrix::Matrix(m.size(), m.size() ? m.begin()->size() : 0) { + size_t r = 0; + for (auto& row : m) { + for (size_t c = 0; c < cols(); ++c) { + operator()(r, c) = row[c]; + } + ++r; + } + } +}; + +// 2D array constructable from eckit::linalg::Matrix +// Indexing/memorylayout and data type can be customized for testing +template +struct ArrayMatrix { + array::ArrayView view() { + array.syncHostDevice(); + return array::make_view(array); + } + array::ArrayView device_view() { + array.syncHostDevice(); + return array::make_device_view(array); + } + void setHostNeedsUpdate(bool b) { + array.setHostNeedsUpdate(b); + } + ArrayMatrix(const eckit::linalg::Matrix& m): ArrayMatrix(m.rows(), m.cols()) { + auto view_ = array::make_view(array); + for (int r = 0; r < m.rows(); ++r) { + for (int c = 0; c < m.cols(); ++c) { + auto& v = layout_left ? view_(r, c) : view_(c, r); + v = m(r, c); + } + } + } + ArrayMatrix(int r, int c): array(make_shape(r, c)) {} + +private: + static constexpr bool layout_left = (indexing == Indexing::layout_left); + static array::ArrayShape make_shape(int rows, int cols) { + return layout_left ? array::make_shape(rows, cols) : array::make_shape(cols, rows); + } + array::ArrayT array; +}; + +// 1D array constructable from eckit::linalg::Vector +template +struct ArrayVector { + array::ArrayView view() { + array.syncHostDevice(); + return array::make_view(array); + } + array::ArrayView const_view() { + array.syncHostDevice(); + return array::make_view(array); + } + array::ArrayView device_view() { + array.syncHostDevice(); + return array::make_device_view(array); + } + void setHostNeedsUpdate(bool b) { + array.setHostNeedsUpdate(b); + } + ArrayVector(const eckit::linalg::Vector& v): ArrayVector(v.size()) { + auto view_ = array::make_view(array); + for (int n = 0; n < v.size(); ++n) { + view_[n] = v[n]; + } + } + ArrayVector(int size): array(size) {} + +private: + array::ArrayT array; +}; + +//---------------------------------------------------------------------------------------------------------------------- + +template +void expect_equal(T* v, T* r, size_t s) { + EXPECT(is_approximately_equal(eckit::testing::make_view(v, s), eckit::testing::make_view(r, s), T(1.e-5))); +} + +template +void expect_equal(const T1& v, const T2& r) { + expect_equal(v.data(), r.data(), std::min(v.size(), r.size())); +} + +//---------------------------------------------------------------------------------------------------------------------- + +CASE("sparse-matrix vector multiply (spmv) [backend=hicsparse]") { + // "square" matrix + // A = 2 . -3 + // . 2 . + // . . 2 + // x = 1 2 3 + // y = 1 2 3 + + sparse::current_backend(hicsparse); + + SparseMatrix A{3, 3, {{0, 0, 2.}, {0, 2, -3.}, {1, 1, 2.}, {2, 2, 2.}}}; + + SECTION("View of atlas::Array [backend=hicsparse]") { + ArrayVector x(Vector{1., 2., 3.}); + ArrayVector y(3); + const auto x_device_view = x.device_view(); + auto y_device_view = y.device_view(); + sparse_matrix_multiply(A, x_device_view, y_device_view); + y.setHostNeedsUpdate(true); + auto y_view = y.view(); + expect_equal(y.view(), Vector{-7., 4., 6.}); + // sparse_matrix_multiply of sparse matrix and vector of non-matching sizes should fail + { + ArrayVector x2(2); + auto x2_device_view = x2.device_view(); + EXPECT_THROWS_AS(sparse_matrix_multiply(A, x2_device_view, y_device_view), eckit::AssertionFailed); + } + } +} + +CASE("sparse-matrix matrix multiply (spmm) [backend=hicsparse]") { + // "square" + // A = 2 . -3 + // . 2 . + // . . 2 + // x = 1 2 3 + // y = 1 2 3 + sparse::current_backend(hicsparse); + + SparseMatrix A{3, 3, {{0, 0, 2.}, {0, 2, -3.}, {1, 1, 2.}, {2, 2, 2.}}}; + Matrix m{{1., 2.}, {3., 4.}, {5., 6.}}; + Matrix c_exp{{-13., -14.}, {6., 8.}, {10., 12.}}; + + SECTION("View of atlas::Array PointsRight [backend=hicsparse]") { + ArrayMatrix ma(m); + ArrayMatrix c(3, 2); + auto ma_device_view = ma.device_view(); + auto c_device_view = c.device_view(); + sparse_matrix_multiply(A, ma_device_view, c_device_view, Indexing::layout_right); + c.setHostNeedsUpdate(true); + auto c_view = c.view(); + expect_equal(c_view, ArrayMatrix(c_exp).view()); + } + + SECTION("sparse_matrix_multiply [backend=hicsparse]") { + auto backend = sparse::backend::hicsparse(); + ArrayMatrix ma(m); + ArrayMatrix c(3, 2); + auto ma_device_view = ma.device_view(); + auto c_device_view = c.device_view(); + sparse_matrix_multiply(A, ma_device_view, c_device_view, backend); + c.setHostNeedsUpdate(true); + auto c_view = c.view(); + expect_equal(c_view, ArrayMatrix(c_exp).view()); + } + + SECTION("SparseMatrixMultiply [backend=hicsparse] 1") { + auto spmm = SparseMatrixMultiply{sparse::backend::hicsparse()}; + ArrayMatrix ma(m); + ArrayMatrix c(3, 2); + auto ma_device_view = ma.device_view(); + auto c_device_view = c.device_view(); + spmm(A, ma_device_view, c_device_view); + c.setHostNeedsUpdate(true); + auto c_view = c.view(); + expect_equal(c_view, ArrayMatrix(c_exp).view()); + } + + SECTION("SparseMatrixMultiply [backend=hicsparse] 2") { + auto spmm = SparseMatrixMultiply{hicsparse}; + ArrayMatrix ma(m); + ArrayMatrix c(3, 2); + auto ma_device_view = ma.device_view(); + auto c_device_view = c.device_view(); + spmm(A, ma_device_view, c_device_view); + c.setHostNeedsUpdate(true); + auto c_view = c.view(); + expect_equal(c_view, ArrayMatrix(c_exp).view()); + } +} + +//---------------------------------------------------------------------------------------------------------------------- + +} // namespace test +} // namespace atlas + +int main(int argc, char** argv) { + return atlas::test::run(argc, argv); +} From a20766e6a20778e2b907607653ef7c385b6bd85b Mon Sep 17 00:00:00 2001 From: Liam Adams Date: Fri, 15 Nov 2024 21:13:17 +0000 Subject: [PATCH 07/11] Add multiply_add function for sparse matrix linear algebra. --- .../linalg/sparse/SparseMatrixMultiply.h | 43 +++++- .../linalg/sparse/SparseMatrixMultiply.tcc | 86 +++++++++++- .../SparseMatrixMultiply_EckitLinalg.cc | 47 ++++++- .../sparse/SparseMatrixMultiply_EckitLinalg.h | 12 +- .../sparse/SparseMatrixMultiply_HicSparse.cc | 32 ++++- .../sparse/SparseMatrixMultiply_HicSparse.h | 16 ++- .../sparse/SparseMatrixMultiply_OpenMP.cc | 127 +++++++++++++----- .../sparse/SparseMatrixMultiply_OpenMP.h | 24 +++- src/tests/linalg/test_linalg_sparse.cc | 27 ++++ src/tests/linalg/test_linalg_sparse_gpu.cc | 29 ++++ 10 files changed, 384 insertions(+), 59 deletions(-) diff --git a/src/atlas/linalg/sparse/SparseMatrixMultiply.h b/src/atlas/linalg/sparse/SparseMatrixMultiply.h index e756bf3fa..2087b19e7 100644 --- a/src/atlas/linalg/sparse/SparseMatrixMultiply.h +++ b/src/atlas/linalg/sparse/SparseMatrixMultiply.h @@ -38,6 +38,19 @@ template void sparse_matrix_multiply(const Matrix& matrix, const SourceView& src, TargetView& tgt, Indexing, const Configuration& config); +template +void sparse_matrix_multiply_add(const Matrix& matrix, const SourceView& src, TargetView& tgt); + +template +void sparse_matrix_multiply_add(const Matrix& matrix, const SourceView& src, TargetView& tgt, const Configuration& config); + +template +void sparse_matrix_multiply_add(const Matrix& matrix, const SourceView& src, TargetView& tgt, Indexing); + +template +void sparse_matrix_multiply_add(const Matrix& matrix, const SourceView& src, TargetView& tgt, Indexing, + const Configuration& config); + class SparseMatrixMultiply { public: SparseMatrixMultiply() = default; @@ -46,14 +59,34 @@ class SparseMatrixMultiply { template void operator()(const Matrix& matrix, const SourceView& src, TargetView& tgt) const { - sparse_matrix_multiply(matrix, src, tgt, backend()); + multiply(matrix, src, tgt); } template void operator()(const Matrix& matrix, const SourceView& src, TargetView& tgt, Indexing indexing) const { + multiply(matrix, src, tgt, indexing); + } + + template + void multiply(const Matrix& matrix, const SourceView& src, TargetView& tgt) const { + sparse_matrix_multiply(matrix, src, tgt, backend()); + } + + template + void multiply(const Matrix& matrix, const SourceView& src, TargetView& tgt, Indexing indexing) const { sparse_matrix_multiply(matrix, src, tgt, indexing, backend()); } + template + void multiply_add(const Matrix& matrix, const SourceView& src, TargetView& tgt) const { + sparse_matrix_multiply_add(matrix, src, tgt, backend()); + } + + template + void multiply_add(const Matrix& matrix, const SourceView& src, TargetView& tgt, Indexing indexing) const { + sparse_matrix_multiply_add(matrix, src, tgt, indexing, backend()); + } + const sparse::Backend& backend() const { return backend_; } private: @@ -65,8 +98,12 @@ namespace sparse { // Template class which needs (full or partial) specialization for concrete template parameters template struct SparseMatrixMultiply { - static void apply(const SparseMatrix&, const View&, View&, - const Configuration&) { + static void multiply(const SparseMatrix&, const View&, View&, + const Configuration&) { + throw_NotImplemented("SparseMatrixMultiply needs a template specialization with the implementation", Here()); + } + static void multiply_add(const SparseMatrix&, const View&, View&, + const Configuration&) { throw_NotImplemented("SparseMatrixMultiply needs a template specialization with the implementation", Here()); } }; diff --git a/src/atlas/linalg/sparse/SparseMatrixMultiply.tcc b/src/atlas/linalg/sparse/SparseMatrixMultiply.tcc index 108393189..2b5797c33 100644 --- a/src/atlas/linalg/sparse/SparseMatrixMultiply.tcc +++ b/src/atlas/linalg/sparse/SparseMatrixMultiply.tcc @@ -33,14 +33,25 @@ namespace { template struct SparseMatrixMultiplyHelper { template - static void apply( const SparseMatrix& W, const SourceView& src, TargetView& tgt, + static void multiply( const SparseMatrix& W, const SourceView& src, TargetView& tgt, const eckit::Configuration& config ) { using SourceValue = const typename std::remove_const::type; using TargetValue = typename std::remove_const::type; constexpr int src_rank = introspection::rank(); constexpr int tgt_rank = introspection::rank(); static_assert( src_rank == tgt_rank, "src and tgt need same rank" ); - SparseMatrixMultiply::apply( W, src, tgt, config ); + SparseMatrixMultiply::multiply( W, src, tgt, config ); + } + + template + static void multiply_add( const SparseMatrix& W, const SourceView& src, TargetView& tgt, + const eckit::Configuration& config ) { + using SourceValue = const typename std::remove_const::type; + using TargetValue = typename std::remove_const::type; + constexpr int src_rank = introspection::rank(); + constexpr int tgt_rank = introspection::rank(); + static_assert( src_rank == tgt_rank, "src and tgt need same rank" ); + SparseMatrixMultiply::multiply_add( W, src, tgt, config ); } }; @@ -53,14 +64,38 @@ void dispatch_sparse_matrix_multiply( const Matrix& matrix, const SourceView& sr if ( introspection::layout_right( src ) || introspection::layout_right( tgt ) ) { ATLAS_ASSERT( introspection::layout_right( src ) && introspection::layout_right( tgt ) ); // Override layout with known layout given by introspection - SparseMatrixMultiplyHelper::apply( matrix, src_v, tgt_v, config ); + SparseMatrixMultiplyHelper::multiply( matrix, src_v, tgt_v, config ); + } + else { + if( indexing == Indexing::layout_left ) { + SparseMatrixMultiplyHelper::multiply( matrix, src_v, tgt_v, config ); + } + else if( indexing == Indexing::layout_right ) { + SparseMatrixMultiplyHelper::multiply( matrix, src_v, tgt_v, config ); + } + else { + throw_NotImplemented( "indexing not implemented", Here() ); + } + } +} + +template +void dispatch_sparse_matrix_multiply_add( const Matrix& matrix, const SourceView& src, TargetView& tgt, Indexing indexing, + const eckit::Configuration& config ) { + auto src_v = make_view( src ); + auto tgt_v = make_view( tgt ); + + if ( introspection::layout_right( src ) || introspection::layout_right( tgt ) ) { + ATLAS_ASSERT( introspection::layout_right( src ) && introspection::layout_right( tgt ) ); + // Override layout with known layout given by introspection + SparseMatrixMultiplyHelper::multiply_add( matrix, src_v, tgt_v, config ); } else { if( indexing == Indexing::layout_left ) { - SparseMatrixMultiplyHelper::apply( matrix, src_v, tgt_v, config ); + SparseMatrixMultiplyHelper::multiply_add( matrix, src_v, tgt_v, config ); } else if( indexing == Indexing::layout_right ) { - SparseMatrixMultiplyHelper::apply( matrix, src_v, tgt_v, config ); + SparseMatrixMultiplyHelper::multiply_add( matrix, src_v, tgt_v, config ); } else { throw_NotImplemented( "indexing not implemented", Here() ); @@ -111,6 +146,47 @@ void sparse_matrix_multiply( const Matrix& matrix, const SourceView& src, Target sparse_matrix_multiply( matrix, src, tgt, Indexing::layout_left ); } +template +void sparse_matrix_multiply_add( const Matrix& matrix, const SourceView& src, TargetView& tgt, Indexing indexing, + const eckit::Configuration& config ) { + std::string type = config.getString( "type", sparse::current_backend() ); + if ( type == sparse::backend::openmp::type() ) { + sparse::dispatch_sparse_matrix_multiply_add( matrix, src, tgt, indexing, config ); + } + else if ( type == sparse::backend::eckit_linalg::type() ) { + sparse::dispatch_sparse_matrix_multiply_add( matrix, src, tgt, indexing, config ); + } +#if ATLAS_ECKIT_HAVE_ECKIT_585 + else if( eckit::linalg::LinearAlgebraSparse::hasBackend(type) ) { +#else + else if( eckit::linalg::LinearAlgebra::hasBackend(type) ) { +#endif + sparse::dispatch_sparse_matrix_multiply_add( matrix, src, tgt, indexing, util::Config("backend",type) ); + } + else if ( type == sparse::backend::hicsparse::type() ) { + sparse::dispatch_sparse_matrix_multiply_add( matrix, src, tgt, indexing, config ); + } + else { + throw_NotImplemented( "sparse_matrix_multiply_add cannot be performed with unsupported backend [" + type + "]", + Here() ); + } +} + +template +void sparse_matrix_multiply_add( const Matrix& matrix, const SourceView& src, TargetView& tgt, const eckit::Configuration& config ) { + sparse_matrix_multiply_add( matrix, src, tgt, Indexing::layout_left, config ); +} + +template +void sparse_matrix_multiply_add( const Matrix& matrix, const SourceView& src, TargetView& tgt, Indexing indexing ) { + sparse_matrix_multiply_add( matrix, src, tgt, indexing, sparse::Backend() ); +} + +template +void sparse_matrix_multiply_add( const Matrix& matrix, const SourceView& src, TargetView& tgt ) { + sparse_matrix_multiply_add( matrix, src, tgt, Indexing::layout_left ); +} + } // namespace linalg } // namespace atlas diff --git a/src/atlas/linalg/sparse/SparseMatrixMultiply_EckitLinalg.cc b/src/atlas/linalg/sparse/SparseMatrixMultiply_EckitLinalg.cc index b9bb1f9b4..1f5953392 100644 --- a/src/atlas/linalg/sparse/SparseMatrixMultiply_EckitLinalg.cc +++ b/src/atlas/linalg/sparse/SparseMatrixMultiply_EckitLinalg.cc @@ -20,6 +20,7 @@ #include "SparseMatrixMultiply_EckitLinalg.h" +#include "atlas/array.h" #include "atlas/library/config.h" #if ATLAS_ECKIT_HAVE_ECKIT_585 @@ -64,7 +65,7 @@ const eckit::linalg::LinearAlgebra& eckit_linalg_backend(const Configuration& co } // namespace -void SparseMatrixMultiply::apply( +void SparseMatrixMultiply::multiply( const SparseMatrix& W, const View& src, View& tgt, const Configuration& config) { ATLAS_ASSERT(src.contiguous()); ATLAS_ASSERT(tgt.contiguous()); @@ -73,7 +74,7 @@ void SparseMatrixMultiply::apply( +void SparseMatrixMultiply::multiply( const SparseMatrix& W, const View& src, View& tgt, const Configuration& config) { ATLAS_ASSERT(src.contiguous()); ATLAS_ASSERT(tgt.contiguous()); @@ -84,9 +85,47 @@ void SparseMatrixMultiply::apply( +void SparseMatrixMultiply::multiply( const SparseMatrix& W, const View& src, View& tgt, const Configuration& config) { - SparseMatrixMultiply::apply(W, src, tgt, + SparseMatrixMultiply::multiply(W, src, tgt, + config); +} + +void SparseMatrixMultiply::multiply_add( + const SparseMatrix& W, const View& src, View& tgt, const Configuration& config) { + + array::ArrayT tmp(src.shape(0)); + auto v_tmp_tmp = array::make_view(tmp); + v_tmp_tmp.assign(0.); + auto v_tmp = make_view(v_tmp_tmp); + + SparseMatrixMultiply::multiply(W, src, v_tmp, config); + + for (idx_t t = 0; t < tmp.shape(0); ++t) { + tgt(t) += v_tmp(t); + } +} + +void SparseMatrixMultiply::multiply_add( + const SparseMatrix& W, const View& src, View& tgt, const Configuration& config) { + + array::ArrayT tmp(src.shape(0), src.shape(1)); + auto v_tmp_tmp = array::make_view(tmp); + v_tmp_tmp.assign(0.); + auto v_tmp = make_view(v_tmp_tmp); + + SparseMatrixMultiply::multiply(W, src, v_tmp, config); + + for (idx_t t = 0; t < tmp.shape(0); ++t) { + for (idx_t k = 0; k < tmp.shape(1); ++k) { + tgt(t, k) += v_tmp(t, k); + } + } +} + +void SparseMatrixMultiply::multiply_add( + const SparseMatrix& W, const View& src, View& tgt, const Configuration& config) { + SparseMatrixMultiply::multiply_add(W, src, tgt, config); } diff --git a/src/atlas/linalg/sparse/SparseMatrixMultiply_EckitLinalg.h b/src/atlas/linalg/sparse/SparseMatrixMultiply_EckitLinalg.h index af99d57df..6915a10f3 100644 --- a/src/atlas/linalg/sparse/SparseMatrixMultiply_EckitLinalg.h +++ b/src/atlas/linalg/sparse/SparseMatrixMultiply_EckitLinalg.h @@ -28,20 +28,26 @@ namespace sparse { template <> struct SparseMatrixMultiply { - static void apply(const SparseMatrix&, const View& src, View& tgt, + static void multiply(const SparseMatrix&, const View& src, View& tgt, + const Configuration&); + static void multiply_add(const SparseMatrix&, const View& src, View& tgt, const Configuration&); }; template <> struct SparseMatrixMultiply { - static void apply(const SparseMatrix&, const View& src, View& tgt, + static void multiply(const SparseMatrix&, const View& src, View& tgt, + const Configuration&); + static void multiply_add(const SparseMatrix&, const View& src, View& tgt, const Configuration&); }; template <> struct SparseMatrixMultiply { - static void apply(const SparseMatrix&, const View& src, View& tgt, + static void multiply(const SparseMatrix&, const View& src, View& tgt, + const Configuration&); + static void multiply_add(const SparseMatrix&, const View& src, View& tgt, const Configuration&); }; diff --git a/src/atlas/linalg/sparse/SparseMatrixMultiply_HicSparse.cc b/src/atlas/linalg/sparse/SparseMatrixMultiply_HicSparse.cc index 62fb7d1c2..69f03743d 100644 --- a/src/atlas/linalg/sparse/SparseMatrixMultiply_HicSparse.cc +++ b/src/atlas/linalg/sparse/SparseMatrixMultiply_HicSparse.cc @@ -265,30 +265,54 @@ void hsSpMM(const SparseMatrix& W, const View& src, TargetValue HIC_CALL(hicDeviceSynchronize()); } -void SparseMatrixMultiply::apply( +void SparseMatrixMultiply::multiply( const SparseMatrix& W, const View& src, View& tgt, const Configuration&) { double beta = 0; hsSpMV(W, src, beta, tgt); } -void SparseMatrixMultiply::apply( +void SparseMatrixMultiply::multiply_add( + const SparseMatrix& W, const View& src, View& tgt, const Configuration&) { + double beta = 1; + hsSpMV(W, src, beta, tgt); +} + +void SparseMatrixMultiply::multiply( const SparseMatrix& W, const View& src, View& tgt, const Configuration&) { double beta = 0; hsSpMM(W, src, beta, tgt); } -void SparseMatrixMultiply::apply( +void SparseMatrixMultiply::multiply_add( + const SparseMatrix& W, const View& src, View& tgt, const Configuration&) { + double beta = 1; + hsSpMM(W, src, beta, tgt); +} + +void SparseMatrixMultiply::multiply( const SparseMatrix& W, const View& src, View& tgt, const Configuration&) { double beta = 0; hsSpMV(W, src, beta, tgt); } -void SparseMatrixMultiply::apply( +void SparseMatrixMultiply::multiply_add( + const SparseMatrix& W, const View& src, View& tgt, const Configuration&) { + double beta = 1; + hsSpMV(W, src, beta, tgt); +} + +void SparseMatrixMultiply::multiply( const SparseMatrix& W, const View& src, View& tgt, const Configuration&) { double beta = 0; hsSpMM(W, src, beta, tgt); } +void SparseMatrixMultiply::multiply_add( + const SparseMatrix& W, const View& src, View& tgt, const Configuration&) { + double beta = 1; + hsSpMM(W, src, beta, tgt); +} + } // namespace sparse } // namespace linalg } // namespace atlas \ No newline at end of file diff --git a/src/atlas/linalg/sparse/SparseMatrixMultiply_HicSparse.h b/src/atlas/linalg/sparse/SparseMatrixMultiply_HicSparse.h index 91f6d4929..616a8d617 100644 --- a/src/atlas/linalg/sparse/SparseMatrixMultiply_HicSparse.h +++ b/src/atlas/linalg/sparse/SparseMatrixMultiply_HicSparse.h @@ -18,25 +18,33 @@ namespace sparse { template <> struct SparseMatrixMultiply { - static void apply(const SparseMatrix& W, const View& src, View& tgt, + static void multiply(const SparseMatrix& W, const View& src, View& tgt, + const Configuration&); + static void multiply_add(const SparseMatrix& W, const View& src, View& tgt, const Configuration&); }; template <> struct SparseMatrixMultiply { - static void apply(const SparseMatrix& W, const View& src, View& tgt, + static void multiply(const SparseMatrix& W, const View& src, View& tgt, + const Configuration&); + static void multiply_add(const SparseMatrix& W, const View& src, View& tgt, const Configuration&); }; template <> struct SparseMatrixMultiply { - static void apply(const SparseMatrix& W, const View& src, View& tgt, + static void multiply(const SparseMatrix& W, const View& src, View& tgt, + const Configuration&); + static void multiply_add(const SparseMatrix& W, const View& src, View& tgt, const Configuration&); }; template <> struct SparseMatrixMultiply { - static void apply(const SparseMatrix& W, const View& src, View& tgt, + static void multiply(const SparseMatrix& W, const View& src, View& tgt, + const Configuration&); + static void multiply_add(const SparseMatrix& W, const View& src, View& tgt, const Configuration&); }; diff --git a/src/atlas/linalg/sparse/SparseMatrixMultiply_OpenMP.cc b/src/atlas/linalg/sparse/SparseMatrixMultiply_OpenMP.cc index 7a610b892..2cee471d3 100644 --- a/src/atlas/linalg/sparse/SparseMatrixMultiply_OpenMP.cc +++ b/src/atlas/linalg/sparse/SparseMatrixMultiply_OpenMP.cc @@ -17,9 +17,8 @@ namespace atlas { namespace linalg { namespace sparse { -template -void SparseMatrixMultiply::apply( - const SparseMatrix& W, const View& src, View& tgt, const Configuration&) { +template +void spmv_layout_left(const SparseMatrix& W, const View& src, View& tgt) { using Value = TargetValue; const auto outer = W.outer(); const auto index = W.inner(); @@ -28,9 +27,11 @@ void SparseMatrixMultiply= W.cols()); ATLAS_ASSERT(tgt.shape(0) >= W.rows()); - + atlas_omp_parallel_for(idx_t r = 0; r < rows; ++r) { - tgt[r] = 0.; + if constexpr (SetZero) { + tgt[r] = 0.; + } for (idx_t c = outer[r]; c < outer[r + 1]; ++c) { idx_t n = index[c]; Value w = static_cast(weight[c]); @@ -39,10 +40,20 @@ void SparseMatrixMultiply +void SparseMatrixMultiply::multiply( + const SparseMatrix& W, const View& src, View& tgt, const Configuration&) { + spmv_layout_left(W, src, tgt); +} template -void SparseMatrixMultiply::apply( - const SparseMatrix& W, const View& src, View& tgt, const Configuration&) { +void SparseMatrixMultiply::multiply_add( + const SparseMatrix& W, const View& src, View& tgt, const Configuration&) { + spmv_layout_left(W, src, tgt); +} + +template +void spmm_layout_left(const SparseMatrix& W, const View& src, View& tgt) { using Value = TargetValue; const auto outer = W.outer(); const auto index = W.inner(); @@ -54,8 +65,10 @@ void SparseMatrixMultiply= W.rows()); atlas_omp_parallel_for(idx_t r = 0; r < rows; ++r) { - for (idx_t k = 0; k < Nk; ++k) { - tgt(r, k) = 0.; + if constexpr (SetZero) { + for (idx_t k = 0; k < Nk; ++k) { + tgt(r, k) = 0.; + } } for (idx_t c = outer[r]; c < outer[r + 1]; ++c) { idx_t n = index[c]; @@ -68,14 +81,24 @@ void SparseMatrixMultiply -void SparseMatrixMultiply::apply( - const SparseMatrix& W, const View& src, View& tgt, const Configuration& config) { +void SparseMatrixMultiply::multiply( + const SparseMatrix& W, const View& src, View& tgt, const Configuration&) { + spmm_layout_left(W, src, tgt); +} + +template +void SparseMatrixMultiply::multiply_add( + const SparseMatrix& W, const View& src, View& tgt, const Configuration&) { + spmm_layout_left(W, src, tgt); +} + +template +void spmt_layout_left(const SparseMatrix& W, const View& src, View& tgt) { if (src.contiguous() && tgt.contiguous()) { // We can take a more optimized route by reducing rank auto src_v = View(src.data(), array::make_shape(src.shape(0), src.stride(0))); auto tgt_v = View(tgt.data(), array::make_shape(tgt.shape(0), tgt.stride(0))); - SparseMatrixMultiply::apply(W, src_v, - tgt_v, config); + spmm_layout_left(W, src_v, tgt_v); return; } using Value = TargetValue; @@ -87,9 +110,11 @@ void SparseMatrixMultiply -void SparseMatrixMultiply::apply( +void SparseMatrixMultiply::multiply( + const SparseMatrix& W, const View& src, View& tgt, const Configuration& config) { + spmt_layout_left(W, src, tgt); +} + +template +void SparseMatrixMultiply::multiply_add( + const SparseMatrix& W, const View& src, View& tgt, const Configuration& config) { + spmt_layout_left(W, src, tgt); +} + +template +void SparseMatrixMultiply::multiply( const SparseMatrix& W, const View& src, View& tgt, const Configuration& config) { - return SparseMatrixMultiply::apply(W, src, tgt, - config); + return SparseMatrixMultiply::multiply(W, src, tgt, config); } template -void SparseMatrixMultiply::apply( - const SparseMatrix& W, const View& src, View& tgt, const Configuration&) { +void SparseMatrixMultiply::multiply_add( + const SparseMatrix& W, const View& src, View& tgt, const Configuration& config) { + return SparseMatrixMultiply::multiply_add(W, src, tgt, config); +} + +template +void spmm_layout_right(const SparseMatrix& W, const View& src, View& tgt) { using Value = TargetValue; const auto outer = W.outer(); const auto index = W.inner(); @@ -125,8 +166,10 @@ void SparseMatrixMultiply= W.rows()); atlas_omp_parallel_for(idx_t r = 0; r < rows; ++r) { - for (idx_t k = 0; k < Nk; ++k) { - tgt(k, r) = 0.; + if constexpr (SetZero) { + for (idx_t k = 0; k < Nk; ++k) { + tgt(k, r) = 0.; + } } for (idx_t c = outer[r]; c < outer[r + 1]; ++c) { idx_t n = index[c]; @@ -139,14 +182,24 @@ void SparseMatrixMultiply -void SparseMatrixMultiply::apply( - const SparseMatrix& W, const View& src, View& tgt, const Configuration& config) { +void SparseMatrixMultiply::multiply( + const SparseMatrix& W, const View& src, View& tgt, const Configuration&) { + spmm_layout_right(W, src, tgt); +} + +template +void SparseMatrixMultiply::multiply_add( + const SparseMatrix& W, const View& src, View& tgt, const Configuration&) { + spmm_layout_right(W, src, tgt); +} + +template +void spmt_layout_right(const SparseMatrix& W, const View& src, View& tgt) { if (src.contiguous() && tgt.contiguous()) { // We can take a more optimized route by reducing rank auto src_v = View(src.data(), array::make_shape(src.shape(0), src.stride(0))); auto tgt_v = View(tgt.data(), array::make_shape(tgt.shape(0), tgt.stride(0))); - SparseMatrixMultiply::apply( - W, src_v, tgt_v, config); + spmm_layout_right(W, src_v, tgt_v); return; } using Value = TargetValue; @@ -158,9 +211,11 @@ void SparseMatrixMultiply +void SparseMatrixMultiply::multiply( + const SparseMatrix& W, const View& src, View& tgt, const Configuration& config) { + spmt_layout_right(W, src, tgt); +} + +template +void SparseMatrixMultiply::multiply_add( + const SparseMatrix& W, const View& src, View& tgt, const Configuration& config) { + spmt_layout_right(W, src, tgt); +} + #define EXPLICIT_TEMPLATE_INSTANTIATION(TYPE) \ template struct SparseMatrixMultiply; \ template struct SparseMatrixMultiply; \ diff --git a/src/atlas/linalg/sparse/SparseMatrixMultiply_OpenMP.h b/src/atlas/linalg/sparse/SparseMatrixMultiply_OpenMP.h index f6cd0a17a..a3bdcea44 100644 --- a/src/atlas/linalg/sparse/SparseMatrixMultiply_OpenMP.h +++ b/src/atlas/linalg/sparse/SparseMatrixMultiply_OpenMP.h @@ -19,37 +19,49 @@ namespace sparse { template struct SparseMatrixMultiply { - static void apply(const SparseMatrix& W, const View& src, View& tgt, + static void multiply(const SparseMatrix& W, const View& src, View& tgt, + const Configuration&); + static void multiply_add(const SparseMatrix& W, const View& src, View& tgt, const Configuration&); }; template struct SparseMatrixMultiply { - static void apply(const SparseMatrix& W, const View& src, View& tgt, + static void multiply(const SparseMatrix& W, const View& src, View& tgt, + const Configuration&); + static void multiply_add(const SparseMatrix& W, const View& src, View& tgt, const Configuration&); }; template struct SparseMatrixMultiply { - static void apply(const SparseMatrix& W, const View& src, View& tgt, + static void multiply(const SparseMatrix& W, const View& src, View& tgt, + const Configuration&); + static void multiply_add(const SparseMatrix& W, const View& src, View& tgt, const Configuration&); }; template struct SparseMatrixMultiply { - static void apply(const SparseMatrix& W, const View& src, View& tgt, + static void multiply(const SparseMatrix& W, const View& src, View& tgt, + const Configuration&); + static void multiply_add(const SparseMatrix& W, const View& src, View& tgt, const Configuration&); }; template struct SparseMatrixMultiply { - static void apply(const SparseMatrix& W, const View& src, View& tgt, + static void multiply(const SparseMatrix& W, const View& src, View& tgt, + const Configuration&); + static void multiply_add(const SparseMatrix& W, const View& src, View& tgt, const Configuration&); }; template struct SparseMatrixMultiply { - static void apply(const SparseMatrix& W, const View& src, View& tgt, + static void multiply(const SparseMatrix& W, const View& src, View& tgt, + const Configuration&); + static void multiply_add(const SparseMatrix& W, const View& src, View& tgt, const Configuration&); }; diff --git a/src/tests/linalg/test_linalg_sparse.cc b/src/tests/linalg/test_linalg_sparse.cc index 9897bfb13..138df76d0 100644 --- a/src/tests/linalg/test_linalg_sparse.cc +++ b/src/tests/linalg/test_linalg_sparse.cc @@ -379,6 +379,25 @@ CASE("sparse_matrix vector multiply (spmv)") { EXPECT_THROWS_AS(sparse_matrix_multiply(A, x2.view(), y.view()), eckit::AssertionFailed); } } + + SECTION("View of atlas::Array [backend=" + backend + "]") { + ArrayVector x(Vector{1., 2., 3.}); + ArrayVector y(Vector{4., 5., 6.}); + sparse_matrix_multiply_add(A, x.view(), y.view()); + expect_equal(y.view(), Vector{-3., 9., 12.}); + // sparse_matrix_multiply of sparse matrix and vector of non-matching sizes should fail + { + ArrayVector x2(2); + EXPECT_THROWS_AS(sparse_matrix_multiply_add(A, x2.view(), y.view()), eckit::AssertionFailed); + } + } + + SECTION("sparse_matrix_multiply_add [backend=" + backend + "]") { + ArrayVector x(Vector{1., 2., 3.}); + ArrayVector y(Vector{1., 2., 3.}); + sparse_matrix_multiply_add(A, x.view(), y.view()); + expect_equal(y.view(), Vector{-6., 6., 9.}); + } } } @@ -419,6 +438,14 @@ CASE("sparse_matrix matrix multiply (spmm)") { sparse_matrix_multiply(A, ma.view(), c.view(), Indexing::layout_right); expect_equal(c.view(), c_exp); } + + SECTION("sparse_matrix_multiply_add [backend=" + sparse::current_backend().type() + "]") { + ArrayMatrix x(m); + ArrayMatrix y(m); + Matrix y_exp{{-12., -12.}, {9., 12.}, {15., 18.}}; + sparse_matrix_multiply_add(A, x.view(), y.view(), Indexing::layout_right); + expect_equal(y.view(), y_exp); + } } SECTION("sparse_matrix_multiply [backend=openmp]") { diff --git a/src/tests/linalg/test_linalg_sparse_gpu.cc b/src/tests/linalg/test_linalg_sparse_gpu.cc index 63b172569..619308c81 100644 --- a/src/tests/linalg/test_linalg_sparse_gpu.cc +++ b/src/tests/linalg/test_linalg_sparse_gpu.cc @@ -169,6 +169,23 @@ CASE("sparse-matrix vector multiply (spmv) [backend=hicsparse]") { EXPECT_THROWS_AS(sparse_matrix_multiply(A, x2_device_view, y_device_view), eckit::AssertionFailed); } } + + SECTION("sparse_matrix_multiply_add [backend=hicsparse]") { + ArrayVector x(Vector{1., 2., 3.}); + ArrayVector y(Vector{4., 5., 6.}); + auto x_device_view = x.device_view(); + auto y_device_view = y.device_view(); + sparse_matrix_multiply_add(A, x_device_view, y_device_view); + y.setHostNeedsUpdate(true); + auto y_view = y.view(); + expect_equal(y.view(), Vector{-3., 9., 12.}); + // sparse_matrix_multiply of sparse matrix and vector of non-matching sizes should fail + { + ArrayVector x2(2); + auto x2_device_view = x2.device_view(); + EXPECT_THROWS_AS(sparse_matrix_multiply_add(A, x2_device_view, y_device_view), eckit::AssertionFailed); + } + } } CASE("sparse-matrix matrix multiply (spmm) [backend=hicsparse]") { @@ -230,6 +247,18 @@ CASE("sparse-matrix matrix multiply (spmm) [backend=hicsparse]") { auto c_view = c.view(); expect_equal(c_view, ArrayMatrix(c_exp).view()); } + + SECTION("sparse_matrix_multiply_add [backend=hicsparse]") { + ArrayMatrix x(m); + ArrayMatrix y(m); + Matrix y_exp{{-12., -12.}, {9., 12.}, {15., 18.}}; + auto x_device_view = x.device_view(); + auto y_device_view = y.device_view(); + sparse_matrix_multiply_add(A, x_device_view, y_device_view, sparse::backend::hicsparse()); + y.setHostNeedsUpdate(true); + auto y_view = y.view(); + expect_equal(y_view, ArrayMatrix(y_exp).view()); + } } //---------------------------------------------------------------------------------------------------------------------- From 8ed5e97579a2f5265e85f78cad749e4660d1ca1d Mon Sep 17 00:00:00 2001 From: Liam Adams Date: Fri, 15 Nov 2024 23:28:52 +0000 Subject: [PATCH 08/11] Update interpolation to work with hicsparse sparse matrix multiply backend. --- src/atlas/interpolation/method/Method.cc | 180 +++++++++++----- ...nservativeSphericalPolygonInterpolation.cc | 4 + src/tests/interpolation/CMakeLists.txt | 7 + .../test_interpolation_structured2D_gpu.cc | 196 ++++++++++++++++++ 4 files changed, 335 insertions(+), 52 deletions(-) create mode 100644 src/tests/interpolation/test_interpolation_structured2D_gpu.cc diff --git a/src/atlas/interpolation/method/Method.cc b/src/atlas/interpolation/method/Method.cc index 3a2349a1d..ce0211b13 100644 --- a/src/atlas/interpolation/method/Method.cc +++ b/src/atlas/interpolation/method/Method.cc @@ -95,14 +95,78 @@ void set_missing_values(Field& tgt, const std::vector& missing) { } } +enum class MemorySpace { Host, Device }; + +MemorySpace getSparseBackendMemorySpace(const sparse::Backend& backend) { + if (backend.type() == "eckit_linalg") { + return MemorySpace::Host; + } else if (backend.type() == "openmp") { + return MemorySpace::Host; + } else if (backend.type() == "hicsparse") { + return MemorySpace::Device; + } else { + ATLAS_NOTIMPLEMENTED; + } +} + +void fetch(const atlas::Field& f, MemorySpace memorySpace) { + if(memorySpace == MemorySpace::Host) { + if (f.hostNeedsUpdate()) { + f.updateHost(); + } + } + else { + ATLAS_ASSERT(memorySpace == MemorySpace::Device); + if (f.deviceNeedsUpdate()) { + f.updateDevice(); + } + } +} + +void setOtherMemorySpaceNeedsUpdate(const atlas::Field& f, MemorySpace memorySpace) { + if(memorySpace == MemorySpace::Host) { + f.setDeviceNeedsUpdate(true); + } + else { + ATLAS_ASSERT(memorySpace == MemorySpace::Device); + f.setHostNeedsUpdate(true); + } +} + +template +atlas::array::ArrayView make_device_view_fetched(atlas::Field& f) { + fetch(f, MemorySpace::Device); + return atlas::array::make_device_view(f); +} + +template +atlas::array::ArrayView make_device_view_fetched(const atlas::Field& f) { + fetch(f, MemorySpace::Device); + return atlas::array::make_device_view(f); +} + +template +atlas::array::ArrayView make_host_view_fetched(atlas::Field& f) { + fetch(f, MemorySpace::Host); + return atlas::array::make_host_view(f); +} + +template +atlas::array::ArrayView make_host_view_fetched(const atlas::Field& f) { + fetch(f, MemorySpace::Host); + return atlas::array::make_host_view(f); +} + } // anonymous namespace template void Method::interpolate_field_rank1(const Field& src, Field& tgt, const Matrix& W) const { auto backend = std::is_same::value ? sparse::backend::openmp() : sparse::Backend{linalg_backend_}; - auto src_v = array::make_view(src); - auto tgt_v = array::make_view(tgt); + const auto memorySpace = getSparseBackendMemorySpace(backend); + + auto src_v = (memorySpace == MemorySpace::Host) ? make_host_view_fetched(src) : make_device_view_fetched(src); + auto tgt_v = (memorySpace == MemorySpace::Host) ? make_host_view_fetched(tgt) : make_device_view_fetched(tgt); if (nonLinear_(src)) { Matrix W_nl(W); // copy (a big penalty -- copy-on-write would definitely be better) @@ -112,17 +176,25 @@ void Method::interpolate_field_rank1(const Field& src, Field& tgt, const Matrix& else { sparse_matrix_multiply(W, src_v, tgt_v, backend); } + + setOtherMemorySpaceNeedsUpdate(tgt, memorySpace); } template void Method::interpolate_field_rank2(const Field& src, Field& tgt, const Matrix& W) const { - sparse::Backend backend{linalg_backend_}; - auto src_v = array::make_view(src); - auto tgt_v = array::make_view(tgt); + auto backend = std::is_same::value ? sparse::backend::openmp() : sparse::Backend{linalg_backend_}; + + // To match previous logic of only using OpenMP (probably because eckit_linalg backend doesn't support "layout_left" indexing) + if (backend.type() == "eckit_linalg") { + backend = sparse::backend::openmp(); + } if (nonLinear_(src)) { // We cannot apply the same matrix to full columns as e.g. missing values could be present in only certain parts. + + auto src_v = array::make_view(src); + auto tgt_v = array::make_view(tgt); // Allocate temporary rank-1 fields corresponding to one horizontal level auto src_slice = Field("s", array::make_datatype(), {src.shape(0)}); @@ -144,13 +216,19 @@ void Method::interpolate_field_rank2(const Field& src, Field& tgt, const Matrix& interpolate_field_rank1(src_slice, tgt_slice, W); // Copy rank-1 field to this level in the rank-2 field + fetch(tgt_slice, MemorySpace::Host); for (idx_t i = 0; i < tgt.shape(0); ++i) { tgt_v(i, lev) = tgt_slice_v(i); } + setOtherMemorySpaceNeedsUpdate(tgt, MemorySpace::Host); } } else { - sparse_matrix_multiply(W, src_v, tgt_v, sparse::backend::openmp()); + const auto memorySpace = getSparseBackendMemorySpace(backend); + auto src_dv = (memorySpace == MemorySpace::Host) ? make_host_view_fetched(src) : make_device_view_fetched(src); + auto tgt_dv = (memorySpace == MemorySpace::Host) ? make_host_view_fetched(tgt) : make_device_view_fetched(tgt); + sparse_matrix_multiply(W, src_dv, tgt_dv, backend); + setOtherMemorySpaceNeedsUpdate(tgt, memorySpace); } } @@ -158,75 +236,57 @@ void Method::interpolate_field_rank2(const Field& src, Field& tgt, const Matrix& template void Method::interpolate_field_rank3(const Field& src, Field& tgt, const Matrix& W) const { sparse::Backend backend{linalg_backend_}; - auto src_v = array::make_view(src); - auto tgt_v = array::make_view(tgt); + auto src_v = make_host_view_fetched(src); + auto tgt_v = make_host_view_fetched(tgt); if (not W.empty() && nonLinear_(src)) { ATLAS_ASSERT(false, "nonLinear interpolation not supported for rank-3 fields."); } sparse_matrix_multiply(W, src_v, tgt_v, sparse::backend::openmp()); + setOtherMemorySpaceNeedsUpdate(tgt, MemorySpace::Host); } template void Method::adjoint_interpolate_field_rank1(Field& src, const Field& tgt, const Matrix& W) const { - array::ArrayT tmp(src.shape()); + auto backend = std::is_same::value ? sparse::backend::openmp() : sparse::Backend{linalg_backend_}; + const auto memorySpace = getSparseBackendMemorySpace(backend); - auto tmp_v = array::make_view(tmp); - auto src_v = array::make_view(src); - auto tgt_v = array::make_view(tgt); + auto src_v = (memorySpace == MemorySpace::Host) ? make_host_view_fetched(src) : make_device_view_fetched(src); + auto tgt_v = (memorySpace == MemorySpace::Host) ? make_host_view_fetched(tgt) : make_device_view_fetched(tgt); - tmp_v.assign(0.); + sparse_matrix_multiply_add(W, tgt_v, src_v, backend); - if (std::is_same::value) { - sparse_matrix_multiply(W, tgt_v, tmp_v, sparse::backend::openmp()); - } - else { - sparse_matrix_multiply(W, tgt_v, tmp_v, sparse::Backend{linalg_backend_}); - } - - - for (idx_t t = 0; t < tmp.shape(0); ++t) { - src_v(t) += tmp_v(t); - } + setOtherMemorySpaceNeedsUpdate(src, memorySpace); } template void Method::adjoint_interpolate_field_rank2(Field& src, const Field& tgt, const Matrix& W) const { - array::ArrayT tmp(src.shape()); + auto backend = std::is_same::value ? sparse::backend::openmp() : sparse::Backend{linalg_backend_}; + + // To match previous logic of only using OpenMP (probably because eckit_linalg backend doesn't support "layout_left" indexing) + if (backend.type() == "eckit_linalg") { + backend = sparse::backend::openmp(); + } - auto tmp_v = array::make_view(tmp); - auto src_v = array::make_view(src); - auto tgt_v = array::make_view(tgt); + const auto memorySpace = getSparseBackendMemorySpace(backend); - tmp_v.assign(0.); + auto src_v = (memorySpace == MemorySpace::Host) ? make_host_view_fetched(src) : make_device_view_fetched(src); + auto tgt_v = (memorySpace == MemorySpace::Host) ? make_host_view_fetched(tgt) : make_device_view_fetched(tgt); - sparse_matrix_multiply(W, tgt_v, tmp_v, sparse::backend::openmp()); + sparse_matrix_multiply_add(W, tgt_v, src_v, backend); - for (idx_t t = 0; t < tmp.shape(0); ++t) { - for (idx_t k = 0; k < tmp.shape(1); ++k) { - src_v(t, k) += tmp_v(t, k); - } - } + setOtherMemorySpaceNeedsUpdate(src, memorySpace); } template void Method::adjoint_interpolate_field_rank3(Field& src, const Field& tgt, const Matrix& W) const { - array::ArrayT tmp(src.shape()); - - auto tmp_v = array::make_view(tmp); - auto src_v = array::make_view(src); - auto tgt_v = array::make_view(tgt); + sparse::Backend backend{linalg_backend_}; - tmp_v.assign(0.); + auto src_v = make_host_view_fetched(src); + auto tgt_v = make_host_view_fetched(tgt); - sparse_matrix_multiply(W, tgt_v, tmp_v, sparse::backend::openmp()); + sparse_matrix_multiply_add(W, tgt_v, src_v, sparse::backend::openmp()); - for (idx_t t = 0; t < tmp.shape(0); ++t) { - for (idx_t j = 0; j < tmp.shape(1); ++j) { - for (idx_t k = 0; k < tmp.shape(2); ++k) { - src_v(t, j, k) += tmp_v(t, j, k); - } - } - } + setOtherMemorySpaceNeedsUpdate(src, MemorySpace::Host); } void Method::check_compatibility(const Field& src, const Field& tgt, const Matrix& W) const { @@ -381,8 +441,13 @@ void Method::do_execute(const FieldSet& fieldsSource, FieldSet& fieldsTarget, Me void Method::do_execute(const Field& src, Field& tgt, Metadata&) const { ATLAS_TRACE("atlas::interpolation::method::Method::do_execute()"); + // todo: dispatch to gpu-aware mpi if available + if (src.hostNeedsUpdate()) { + src.updateHost(); + } haloExchange(src); - + src.setDeviceNeedsUpdate(true); + if( matrix_ ) { // (matrix == nullptr) when a partition is empty if (src.datatype().kind() == array::DataType::KIND_REAL64) { interpolate_field(src, tgt, *matrix_); @@ -411,7 +476,13 @@ void Method::do_execute(const Field& src, Field& tgt, Metadata&) const { } // set missing values - set_missing_values(tgt, missing_); + if (not missing_.empty()) { + if (tgt.hostNeedsUpdate()) { + tgt.updateHost(); + } + set_missing_values(tgt, missing_); + tgt.setDeviceNeedsUpdate(true); + } tgt.set_dirty(); } @@ -454,7 +525,12 @@ void Method::do_execute_adjoint(Field& src, const Field& tgt, Metadata&) const { src.set_dirty(); + // todo: dispatch to gpu-aware mpi if available + if (src.hostNeedsUpdate()) { + src.updateHost(); + } adjointHaloExchange(src); + src.setDeviceNeedsUpdate(true); } @@ -501,4 +577,4 @@ interpolation::Cache Method::createCache() const { } // namespace interpolation -} // namespace atlas +} // namespace atlas \ No newline at end of file diff --git a/src/atlas/interpolation/method/unstructured/ConservativeSphericalPolygonInterpolation.cc b/src/atlas/interpolation/method/unstructured/ConservativeSphericalPolygonInterpolation.cc index 334b65422..de06a04c3 100644 --- a/src/atlas/interpolation/method/unstructured/ConservativeSphericalPolygonInterpolation.cc +++ b/src/atlas/interpolation/method/unstructured/ConservativeSphericalPolygonInterpolation.cc @@ -1683,7 +1683,11 @@ void ConservativeSphericalPolygonInterpolation::do_execute(const Field& src_fiel stopwatch.stop(); { ATLAS_TRACE("halo exchange target"); + if (tgt_field.hostNeedsUpdate()) { + tgt_field.updateHost(); + } tgt_field.haloExchange(); + tgt_field.setDeviceNeedsUpdate(true); } auto remap_stat = remap_stat_; diff --git a/src/tests/interpolation/CMakeLists.txt b/src/tests/interpolation/CMakeLists.txt index a5ac0e86a..f6be0ae85 100644 --- a/src/tests/interpolation/CMakeLists.txt +++ b/src/tests/interpolation/CMakeLists.txt @@ -104,6 +104,13 @@ ecbuild_add_test( TARGET atlas_test_interpolation_biquasicubic ENVIRONMENT ${ATLAS_TEST_ENVIRONMENT} ) +ecbuild_add_test( TARGET atlas_test_interpolation_bicubic_gpu + SOURCES test_interpolation_structured2D_gpu.cc + LIBS atlas + CONDITION atlas_HAVE_CUDA OR atlas_HAVE_HIP + ENVIRONMENT ${ATLAS_TEST_ENVIRONMENT} +) + ecbuild_add_test( TARGET atlas_test_interpolation_structured2D_to_unstructured SOURCES test_interpolation_structured2D_to_unstructured.cc LIBS atlas diff --git a/src/tests/interpolation/test_interpolation_structured2D_gpu.cc b/src/tests/interpolation/test_interpolation_structured2D_gpu.cc new file mode 100644 index 000000000..1e42dc152 --- /dev/null +++ b/src/tests/interpolation/test_interpolation_structured2D_gpu.cc @@ -0,0 +1,196 @@ +/* + * (C) Copyright 2024 ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ + +#include "atlas/array.h" +#include "atlas/field/Field.h" +#include "atlas/field/FieldSet.h" +#include "atlas/functionspace/NodeColumns.h" +#include "atlas/functionspace/StructuredColumns.h" +#include "atlas/grid/Grid.h" +#include "atlas/grid/Iterator.h" +#include "atlas/interpolation.h" +#include "atlas/mesh/Mesh.h" +#include "atlas/meshgenerator.h" +#include "atlas/output/Gmsh.h" +#include "atlas/util/CoordinateEnums.h" +#include "atlas/util/function/VortexRollup.h" + +#include "tests/AtlasTestEnvironment.h" + +using atlas::functionspace::NodeColumns; +using atlas::functionspace::StructuredColumns; +using atlas::util::Config; + +namespace atlas { +namespace test { + +//----------------------------------------------------------------------------- + +static Config scheme() { + Config scheme; + scheme.set("type", "structured-cubic2D"); + scheme.set("halo", 2); + scheme.set("name", "cubic"); + scheme.set("verbose",eckit::Resource("--verbose",false)); + scheme.set("sparse_matrix_multiply", "hicsparse"); + return scheme; +} + +std::string input_gridname(const std::string& default_grid) { + return eckit::Resource("--input-grid", default_grid); +} + +std::string output_gridname(const std::string& default_grid) { + return eckit::Resource("--output-grid", default_grid); +} + +CASE("which scheme?") { + Log::info() << scheme().getString("type") << std::endl; +} + +template +struct AdjointTolerance { + static const Value value; +}; +template <> +const double AdjointTolerance::value = 2.e-14; +template <> +const float AdjointTolerance::value = 2.e-5; + + +void test_interpolation_structured_using_fs_API_for_fieldset_w_hicsparse_backend() { + Grid input_grid(input_gridname("O32")); + Grid output_grid(output_gridname("O64")); + + // Cubic interpolation requires a StructuredColumns functionspace with 2 halos + StructuredColumns input_fs(input_grid, scheme() | option::levels(3)); + + MeshGenerator meshgen("structured"); + Mesh output_mesh = meshgen.generate(output_grid); + FunctionSpace output_fs = NodeColumns{output_mesh, option::levels(3)}; + + auto lonlat = array::make_view(input_fs.xy()); + + FieldSet fields_source; + FieldSet fields_target; + for (idx_t f = 0; f < 3; ++f) { + auto field_source = fields_source.add(input_fs.createField(option::name("field " + std::to_string(f)))); + fields_target.add(output_fs.createField(option::name("field " + std::to_string(f)))); + + auto source = array::make_view(field_source); + for (idx_t n = 0; n < input_fs.size(); ++n) { + for (idx_t k = 0; k < 3; ++k) { + source(n, k) = util::function::vortex_rollup(lonlat(n, LON), lonlat(n, LAT), 0.5 + double(k) / 2); + } + } + } + + ATLAS_TRACE_SCOPE("with matrix") { + Interpolation interpolation(scheme(), input_fs, output_fs); + interpolation.execute(fields_source, fields_target); + for (auto& field : fields_target) { + field.updateHost(); + } + + ATLAS_TRACE_SCOPE("output") { + output::Gmsh gmsh(scheme().getString("name") + "-multilevel-fieldset-output-with-matrix-" + + array::make_datatype().str() + ".msh", + Config("coordinates", "xy")); + gmsh.write(output_mesh); + output_fs.haloExchange(fields_target); + gmsh.write(fields_target); + } + } + + ATLAS_TRACE_SCOPE("with matrix adjoint") { + Interpolation interpolation(scheme() | Config("adjoint", true), input_fs, output_fs); + + std::vector AxAx(fields_source.field_names().size(), 0.); + std::vector xAtAx(fields_source.field_names().size(), 0.); + + FieldSet fields_source_reference; + for (atlas::Field& field : fields_source) { + Field temp_field(field.name(), field.datatype().kind(), field.shape()); + temp_field.set_levels(field.levels()); + + auto fieldInView = array::make_view(field); + auto fieldOutView = array::make_view(temp_field); + + for (atlas::idx_t jn = 0; jn < temp_field.shape(0); ++jn) { + for (atlas::idx_t jl = 0; jl < temp_field.levels(); ++jl) { + fieldOutView(jn, jl) = fieldInView(jn, jl); + } + } + fields_source_reference.add(temp_field); + } + + interpolation.execute(fields_source, fields_target); + for (auto& field : fields_target) { + field.updateHost(); + } + + std::size_t fIndx(0); + auto source_names = fields_source.field_names(); + for (const std::string& s : fields_target.field_names()) { + auto target = array::make_view(fields_target[s]); + auto source = array::make_view(fields_source[source_names[fIndx]]); + + for (idx_t n = 0; n < input_fs.size(); ++n) { + for (idx_t k = 0; k < 3; ++k) { + AxAx[fIndx] += source(n, k) * source(n, k); + } + } + + for (idx_t n = 0; n < output_fs.size(); ++n) { + for (idx_t k = 0; k < 3; ++k) { + AxAx[fIndx] += target(n, k) * target(n, k); + } + } + fIndx += 1; + } + + interpolation.execute_adjoint(fields_source, fields_target); + for (auto& field : fields_source) { + field.updateHost(); + } + + fIndx = 0; + for (const std::string& s : fields_source.field_names()) { + auto source_reference = array::make_view(fields_source_reference[s]); + auto source = array::make_view(fields_source[s]); + + for (idx_t n = 0; n < input_fs.size(); ++n) { + for (idx_t k = 0; k < 3; ++k) { + xAtAx[fIndx] += source(n, k) * source_reference(n, k); + } + } + fIndx += 1; + } + + for (std::size_t t = 0; t < AxAx.size(); ++t) { + Log::debug() << " Adjoint test t = " << t << " (Ax).(Ax) = " << AxAx[t] << " x.(AtAx) = " << xAtAx[t] + << " std::abs( 1.0 - xAtAx[t]/AxAx[t] ) " << std::abs(1.0 - xAtAx[t] / AxAx[t]) + << " AdjointTolerance::value " << AdjointTolerance::value << std::endl; + + EXPECT(std::abs(1.0 - xAtAx[t] / AxAx[t]) < AdjointTolerance::value); + } + } +} + +CASE("test_interpolation_structured using fs API for fieldset with hicsparse backend") { + test_interpolation_structured_using_fs_API_for_fieldset_w_hicsparse_backend(); +} + +} // namespace test +} // namespace atlas + +int main(int argc, char** argv) { + return atlas::test::run(argc, argv); +} From 487ff07d8fcebcc705c4c69eada0258502a49cb4 Mon Sep 17 00:00:00 2001 From: Liam Adams Date: Fri, 15 Nov 2024 23:31:37 +0000 Subject: [PATCH 09/11] Cache hicSparse handle to avoid library re-initialization. --- .../sparse/SparseMatrixMultiply_HicSparse.cc | 26 ++++++++++++------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/src/atlas/linalg/sparse/SparseMatrixMultiply_HicSparse.cc b/src/atlas/linalg/sparse/SparseMatrixMultiply_HicSparse.cc index 69f03743d..46b832b0c 100644 --- a/src/atlas/linalg/sparse/SparseMatrixMultiply_HicSparse.cc +++ b/src/atlas/linalg/sparse/SparseMatrixMultiply_HicSparse.cc @@ -19,6 +19,20 @@ namespace { +class HicSparseHandleRAIIWrapper { +public: + HicSparseHandleRAIIWrapper() { hicsparseCreate(&handle_); }; + ~HicSparseHandleRAIIWrapper() { hicsparseDestroy(handle_); } + hicsparseHandle_t value() { return handle_; } +private: + hicsparseHandle_t handle_; +}; + +hicsparseHandle_t getDefaultHicSparseHandle() { + static auto handle = HicSparseHandleRAIIWrapper(); + return handle.value(); +} + template constexpr hicsparseIndexType_t getHicsparseIndexType() { using base_type = std::remove_const_t; @@ -86,10 +100,7 @@ void hsSpMV(const SparseMatrix& W, const View& src, TargetValue W.updateDevice(); } - // Create sparse library handle - // todo: use singleton class for storing hicSparse library handle. - hicsparseHandle_t handle; - HICSPARSE_CALL(hicsparseCreate(&handle)); + auto handle = getDefaultHicSparseHandle(); // Create a sparse matrix descriptor hicsparseConstSpMatDescr_t matA; @@ -159,7 +170,6 @@ void hsSpMV(const SparseMatrix& W, const View& src, TargetValue HICSPARSE_CALL(hicsparseDestroyDnVec(vecX)); HICSPARSE_CALL(hicsparseDestroyDnVec(vecY)); HICSPARSE_CALL(hicsparseDestroySpMat(matA)); - HICSPARSE_CALL(hicsparseDestroy(handle)); HIC_CALL(hicDeviceSynchronize()); } @@ -181,10 +191,7 @@ void hsSpMM(const SparseMatrix& W, const View& src, TargetValue W.updateDevice(); } - // Create sparse library handle - // todo: use singleton class for storing hicSparse library handle. - hicsparseHandle_t handle; - HICSPARSE_CALL(hicsparseCreate(&handle)); + auto handle = getDefaultHicSparseHandle(); // Create a sparse matrix descriptor hicsparseConstSpMatDescr_t matA; @@ -260,7 +267,6 @@ void hsSpMM(const SparseMatrix& W, const View& src, TargetValue HICSPARSE_CALL(hicsparseDestroyDnMat(matC)); HICSPARSE_CALL(hicsparseDestroyDnMat(matB)); HICSPARSE_CALL(hicsparseDestroySpMat(matA)); - HICSPARSE_CALL(hicsparseDestroy(handle)); HIC_CALL(hicDeviceSynchronize()); } From a79d8eb332d8b7993b415d117de4754ee0fe46f6 Mon Sep 17 00:00:00 2001 From: Liam Adams Date: Thu, 21 Nov 2024 16:59:47 +0000 Subject: [PATCH 10/11] Enable on-device halo exchange in interpolation. --- src/atlas/interpolation/method/Method.cc | 70 ++++++--- src/atlas/interpolation/method/Method.h | 8 +- src/tests/interpolation/CMakeLists.txt | 8 + ...nterpolation_structured2D_to_points_gpu.cc | 145 ++++++++++++++++++ 4 files changed, 209 insertions(+), 22 deletions(-) create mode 100644 src/tests/interpolation/test_interpolation_structured2D_to_points_gpu.cc diff --git a/src/atlas/interpolation/method/Method.cc b/src/atlas/interpolation/method/Method.cc index ce0211b13..c56dd571b 100644 --- a/src/atlas/interpolation/method/Method.cc +++ b/src/atlas/interpolation/method/Method.cc @@ -441,12 +441,29 @@ void Method::do_execute(const FieldSet& fieldsSource, FieldSet& fieldsTarget, Me void Method::do_execute(const Field& src, Field& tgt, Metadata&) const { ATLAS_TRACE("atlas::interpolation::method::Method::do_execute()"); - // todo: dispatch to gpu-aware mpi if available - if (src.hostNeedsUpdate()) { - src.updateHost(); + if (src.hostNeedsUpdate() && src.deviceNeedsUpdate()) { + throw_AssertionFailed("Inconsistent memory state flags - we will not be able to " + "determine which memory space to perform the halo exchange on", + Here()); + } + + sparse::Backend backend{linalg_backend_}; + const auto memorySpace = getSparseBackendMemorySpace(backend); + + bool on_device = false; + if (!src.hostNeedsUpdate() && !src.deviceNeedsUpdate()) { + on_device = memorySpace == MemorySpace::Device; + } else { + on_device = !src.deviceNeedsUpdate() ? true : false; + } + + haloExchange(src, on_device); + + if (on_device) { + src.setHostNeedsUpdate(true); + } else { + src.setDeviceNeedsUpdate(true); } - haloExchange(src); - src.setDeviceNeedsUpdate(true); if( matrix_ ) { // (matrix == nullptr) when a partition is empty if (src.datatype().kind() == array::DataType::KIND_REAL64) { @@ -501,6 +518,15 @@ void Method::do_execute_adjoint(FieldSet& fieldsSource, const FieldSet& fieldsTa void Method::do_execute_adjoint(Field& src, const Field& tgt, Metadata&) const { ATLAS_TRACE("atlas::interpolation::method::Method::do_execute_adjoint()"); + if (src.hostNeedsUpdate() && src.deviceNeedsUpdate()) { + throw_AssertionFailed("Inconsistent memory state flags - we will not be able to " + "determine which memory space to perform the adjoint halo exchange on", + Here()); + } + + sparse::Backend backend{linalg_backend_}; + const auto memorySpace = getSparseBackendMemorySpace(backend); + if (nonLinear_(src)) { throw_NotImplemented("Adjoint interpolation only works for interpolation schemes that are linear", Here()); } @@ -525,12 +551,20 @@ void Method::do_execute_adjoint(Field& src, const Field& tgt, Metadata&) const { src.set_dirty(); - // todo: dispatch to gpu-aware mpi if available - if (src.hostNeedsUpdate()) { - src.updateHost(); + bool on_device = false; + if (!src.hostNeedsUpdate() && !src.deviceNeedsUpdate()) { + on_device = memorySpace == MemorySpace::Device; + } else { + on_device = !src.deviceNeedsUpdate() ? true : false; + } + + adjointHaloExchange(src, on_device); + + if (on_device) { + src.setHostNeedsUpdate(true); + } else { + src.setDeviceNeedsUpdate(true); } - adjointHaloExchange(src); - src.setDeviceNeedsUpdate(true); } @@ -549,25 +583,25 @@ void Method::normalise(Triplets& triplets) { } } -void Method::haloExchange(const FieldSet& fields) const { +void Method::haloExchange(const FieldSet& fields, bool on_device) const { for (auto& field : fields) { - haloExchange(field); + haloExchange(field, on_device); } } -void Method::haloExchange(const Field& field) const { +void Method::haloExchange(const Field& field, bool on_device) const { if (field.dirty() && allow_halo_exchange_) { - source().haloExchange(field); + source().haloExchange(field, on_device); } } -void Method::adjointHaloExchange(const FieldSet& fields) const { +void Method::adjointHaloExchange(const FieldSet& fields, bool on_device) const { for (auto& field : fields) { - adjointHaloExchange(field); + adjointHaloExchange(field, on_device); } } -void Method::adjointHaloExchange(const Field& field) const { +void Method::adjointHaloExchange(const Field& field, bool on_device) const { if (field.dirty() && allow_halo_exchange_) { - source().adjointHaloExchange(field); + source().adjointHaloExchange(field, on_device); } } diff --git a/src/atlas/interpolation/method/Method.h b/src/atlas/interpolation/method/Method.h index 5b37ca25c..5a210e3ce 100644 --- a/src/atlas/interpolation/method/Method.h +++ b/src/atlas/interpolation/method/Method.h @@ -91,11 +91,11 @@ class Method : public util::Object { static void normalise(Triplets& triplets); - void haloExchange(const FieldSet&) const; - void haloExchange(const Field&) const; + void haloExchange(const FieldSet&, bool on_device = false) const; + void haloExchange(const Field&, bool on_device = false) const; - void adjointHaloExchange(const FieldSet&) const; - void adjointHaloExchange(const Field&) const; + void adjointHaloExchange(const FieldSet&, bool on_device = false) const; + void adjointHaloExchange(const Field&, bool on_device = false) const; // NOTE : Matrix-free or non-linear interpolation operators do not have matrices, so do not expose here friend class atlas::test::Access; diff --git a/src/tests/interpolation/CMakeLists.txt b/src/tests/interpolation/CMakeLists.txt index f6be0ae85..8429513b7 100644 --- a/src/tests/interpolation/CMakeLists.txt +++ b/src/tests/interpolation/CMakeLists.txt @@ -127,6 +127,14 @@ ecbuild_add_test( TARGET atlas_test_interpolation_structured2D_to_points ENVIRONMENT ${ATLAS_TEST_ENVIRONMENT} ) +ecbuild_add_test( TARGET atlas_test_interpolation_structured2D_to_points_gpu + SOURCES test_interpolation_structured2D_to_points_gpu.cc + LIBS atlas + MPI 4 + CONDITION eckit_HAVE_MPI AND atlas_HAVE_GPU + ENVIRONMENT ${ATLAS_TEST_ENVIRONMENT} +) + ecbuild_add_test( TARGET atlas_test_interpolation_cubedsphere SOURCES test_interpolation_cubedsphere.cc LIBS atlas diff --git a/src/tests/interpolation/test_interpolation_structured2D_to_points_gpu.cc b/src/tests/interpolation/test_interpolation_structured2D_to_points_gpu.cc new file mode 100644 index 000000000..0ab56b4c8 --- /dev/null +++ b/src/tests/interpolation/test_interpolation_structured2D_to_points_gpu.cc @@ -0,0 +1,145 @@ +/* + * (C) Copyright 2013 ECMWF. + * + * This software is licensed under the terms of the Apache Licence Version 2.0 + * which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + * In applying this licence, ECMWF does not waive the privileges and immunities + * granted to it by virtue of its status as an intergovernmental organisation + * nor does it submit to any jurisdiction. + */ + +#include "atlas/array.h" +#include "atlas/field/Field.h" +#include "atlas/field/FieldSet.h" +#include "atlas/functionspace/PointCloud.h" +#include "atlas/functionspace/StructuredColumns.h" +#include "atlas/grid/Grid.h" +#include "atlas/grid/Iterator.h" +#include "atlas/interpolation.h" +#include "atlas/mesh/Mesh.h" +#include "atlas/meshgenerator.h" +#include "atlas/output/Gmsh.h" +#include "atlas/util/CoordinateEnums.h" +#include "atlas/util/function/VortexRollup.h" +#include "atlas/util/PolygonXY.h" + +#include "tests/AtlasTestEnvironment.h" + +using atlas::functionspace::PointCloud; +using atlas::functionspace::StructuredColumns; +using atlas::util::Config; + +namespace atlas { +namespace test { + +//----------------------------------------------------------------------------- + +std::string input_gridname(const std::string& default_grid) { + return eckit::Resource("--input-grid", default_grid); +} + +FunctionSpace output_functionspace( const FunctionSpace& input_functionspace, bool expect_fail ) { + + std::vector all_points { + {360., 90.}, + {360., 0.}, + {360., -90.}, + {0.,0.}, + }; + + // Only keep points that match the input partitioning. + // Note that with following implementation it could be that some points + // are present in two partitions, but it is not a problem for this test purpose. + std::vector points; + auto polygon = util::PolygonXY{input_functionspace.polygon()}; + for (const auto& p : all_points ) { + if (polygon.contains(p)) { + points.emplace_back(p); + } + } + if( expect_fail && mpi::rank() == mpi::size() - 1 ) { + points.emplace_back(720,0.); + } + return PointCloud(points); +} + + +FieldSet create_source_fields(StructuredColumns& fs, idx_t nb_fields, idx_t nb_levels) { + using Value = double; + FieldSet fields_source; + auto lonlat = array::make_view(fs.xy()); + for (idx_t f = 0; f < nb_fields; ++f) { + auto field_source = fields_source.add(fs.createField()); + auto source = array::make_view(field_source); + for (idx_t n = 0; n < fs.size(); ++n) { + for (idx_t k = 0; k < nb_levels; ++k) { + source(n, k) = util::function::vortex_rollup(lonlat(n, LON), lonlat(n, LAT), 0.5 + double(k) / 2); + } + }; + field_source.updateDevice(); + } + return fields_source; +} +FieldSet create_target_fields(FunctionSpace& fs, idx_t nb_fields, idx_t nb_levels) { + using Value = double; + FieldSet fields_target; + for (idx_t f = 0; f < nb_fields; ++f) { + auto field_target = fields_target.add(fs.createField(option::levels(nb_levels))); + field_target.updateDevice(); + } + return fields_target; +} + +void do_test( std::string type, int input_halo, bool matrix_free, bool expect_failure ) { + idx_t nb_fields = 2; + idx_t nb_levels = 3; + + Grid input_grid(input_gridname("O32")); + StructuredColumns input_fs(input_grid, option::levels(nb_levels) | + option::halo(input_halo)); + + FunctionSpace output_fs = output_functionspace(input_fs, expect_failure); + + Interpolation interpolation(option::type(type) | + util::Config("matrix_free",matrix_free) | + util::Config("sparse_matrix_multiply", "hicsparse") | + util::Config("verbose",eckit::Resource("--verbose",false)), + input_fs, output_fs); + + FieldSet fields_source = create_source_fields(input_fs, nb_fields, nb_levels); + FieldSet fields_target = create_target_fields(output_fs, nb_fields, nb_levels); + + interpolation.execute(fields_source, fields_target); +} + +CASE("test structured-bilinear, halo 2, with matrix") { + EXPECT_NO_THROW( do_test("structured-bilinear",2,false,false) ); +} + +CASE("test structured-bilinear, halo 2, with matrix, expected failure") { + EXPECT_THROWS_AS( do_test("structured-bilinear",2,false,true), eckit::Exception ); +} + +CASE("test structured-bilinear, halo 2, without matrix, expected failure") { + EXPECT_THROWS_AS( do_test("structured-bilinear",2,false,true), eckit::Exception ); +} + +CASE("test structured-bilinear, halo 1, with matrix, expected failure") { + EXPECT_THROWS_AS( do_test("structured-bilinear",1,false,false), eckit::Exception ); +} + +CASE("test structured-bicubic, halo 3, with matrix") { + EXPECT_NO_THROW( do_test("structured-bicubic",3,false,false) ); +} + +CASE("test structured-bicubic, halo 2, with matrix") { + EXPECT_THROWS_AS( do_test("structured-bicubic",2,false,false), eckit::Exception ); +} + + +} // namespace test +} // namespace atlas + +int main(int argc, char** argv) { + return atlas::test::run(argc, argv); +} From 9a33983263fb388262d50e700d31c81f3bf739de Mon Sep 17 00:00:00 2001 From: Liam Adams Date: Thu, 21 Nov 2024 19:51:29 +0000 Subject: [PATCH 11/11] Update add interpolation offload test to exercise passing in updated device memory. --- .../test_interpolation_structured2D_gpu.cc | 40 ++++++++++++++++--- 1 file changed, 35 insertions(+), 5 deletions(-) diff --git a/src/tests/interpolation/test_interpolation_structured2D_gpu.cc b/src/tests/interpolation/test_interpolation_structured2D_gpu.cc index 1e42dc152..4892a336d 100644 --- a/src/tests/interpolation/test_interpolation_structured2D_gpu.cc +++ b/src/tests/interpolation/test_interpolation_structured2D_gpu.cc @@ -65,7 +65,7 @@ template <> const float AdjointTolerance::value = 2.e-5; -void test_interpolation_structured_using_fs_API_for_fieldset_w_hicsparse_backend() { +void test_interpolation_structured_using_fs_API_for_fieldset_w_hicsparse_backend(const bool start_with_data_on_device) { Grid input_grid(input_gridname("O32")); Grid output_grid(output_gridname("O64")); @@ -83,7 +83,7 @@ void test_interpolation_structured_using_fs_API_for_fieldset_w_hicsparse_backend for (idx_t f = 0; f < 3; ++f) { auto field_source = fields_source.add(input_fs.createField(option::name("field " + std::to_string(f)))); fields_target.add(output_fs.createField(option::name("field " + std::to_string(f)))); - + auto source = array::make_view(field_source); for (idx_t n = 0; n < input_fs.size(); ++n) { for (idx_t k = 0; k < 3; ++k) { @@ -105,6 +105,9 @@ void test_interpolation_structured_using_fs_API_for_fieldset_w_hicsparse_backend Config("coordinates", "xy")); gmsh.write(output_mesh); output_fs.haloExchange(fields_target); + for (auto& field : fields_target) { + field.setDeviceNeedsUpdate(true); + } gmsh.write(fields_target); } } @@ -116,7 +119,7 @@ void test_interpolation_structured_using_fs_API_for_fieldset_w_hicsparse_backend std::vector xAtAx(fields_source.field_names().size(), 0.); FieldSet fields_source_reference; - for (atlas::Field& field : fields_source) { + for (const atlas::Field& field : fields_source) { Field temp_field(field.name(), field.datatype().kind(), field.shape()); temp_field.set_levels(field.levels()); @@ -131,6 +134,16 @@ void test_interpolation_structured_using_fs_API_for_fieldset_w_hicsparse_backend fields_source_reference.add(temp_field); } + + if (start_with_data_on_device) { + for (auto& field : fields_source) { + field.updateDevice(); + } + for (auto& field : fields_target) { + field.updateDevice(); + } + } + interpolation.execute(fields_source, fields_target); for (auto& field : fields_target) { field.updateHost(); @@ -156,6 +169,19 @@ void test_interpolation_structured_using_fs_API_for_fieldset_w_hicsparse_backend fIndx += 1; } + if (!start_with_data_on_device) { + for (auto& field : fields_source) { + field.syncHostDevice(); + field.deallocateDevice(); + field.setDeviceNeedsUpdate(true); + } + for (auto& field : fields_target) { + field.syncHostDevice(); + field.deallocateDevice(); + field.setDeviceNeedsUpdate(true); + } + } + interpolation.execute_adjoint(fields_source, fields_target); for (auto& field : fields_source) { field.updateHost(); @@ -184,8 +210,12 @@ void test_interpolation_structured_using_fs_API_for_fieldset_w_hicsparse_backend } } -CASE("test_interpolation_structured using fs API for fieldset with hicsparse backend") { - test_interpolation_structured_using_fs_API_for_fieldset_w_hicsparse_backend(); +CASE("test_interpolation_structured using fs API for fieldset with hicsparse backend (start with data on host)") { + test_interpolation_structured_using_fs_API_for_fieldset_w_hicsparse_backend(false); +} + +CASE("test_interpolation_structured using fs API for fieldset with hicsparse backend (start with data on device)") { + test_interpolation_structured_using_fs_API_for_fieldset_w_hicsparse_backend(true); } } // namespace test