From 4d2c9765b88d0e748b1b4589533d43429661484b Mon Sep 17 00:00:00 2001 From: Hugh Bird Date: Thu, 8 Aug 2024 10:09:17 +0100 Subject: [PATCH 1/2] Add generic device; Initial support in portBLAS * oneMKL Interfaces currently only supports known targets: Intel CPU/GPU, AMD GPU, Nvidia GPU * This PR: * Enables a new generic target * Enables the generic target to use the portBLAS backend * Adds documentation --- CMakeLists.txt | 8 +++++ README.md | 11 ++++-- docs/building_the_project_with_dpcpp.rst | 36 ++++++++++++++++++++ include/oneapi/mkl/detail/backends_table.hpp | 8 ++++- include/oneapi/mkl/detail/get_device_id.hpp | 8 +++++ src/CMakeLists.txt | 1 + src/blas/backends/portblas/CMakeLists.txt | 9 ++++- src/config.hpp.in | 2 ++ 8 files changed, 79 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 79af06f6a..bdbc26587 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -60,6 +60,9 @@ option(ENABLE_CUFFT_BACKEND "Enable the cuFFT backend for the DFT interface" OFF option(ENABLE_ROCFFT_BACKEND "Enable the rocFFT backend for the DFT interface" OFF) option(ENABLE_PORTFFT_BACKEND "Enable the portFFT DFT backend for the DFT interface. Cannot be used with other DFT backends." OFF) +# Generic devices +option(ENABLE_GENERIC_DEVICE "Enable generic devices. Requires the portBLAS backend." OFF) + set(ONEMKL_SYCL_IMPLEMENTATION "dpc++" CACHE STRING "Name of the SYCL compiler") set(HIP_TARGETS "" CACHE STRING "Target HIP architectures") @@ -123,6 +126,11 @@ if (ENABLE_PORTFFT_BACKEND message(FATAL_ERROR "ENABLE_PORTFFT_BACKEND cannot be enabled at the same time as other DFT backends.") endif() +if(ENABLE_GENERIC_DEVICE + AND NOT ENABLE_PORTBLAS_BACKEND) + message(FATAL_ERROR "ENABLE_GENERIC_DEVICE requires that the portBLAS backend is enabled.") +endif() + # Define required CXX compilers before project if(CMAKE_CXX_COMPILER OR NOT ONEMKL_SYCL_IMPLEMENTATION STREQUAL "dpc++") if(WIN32) diff --git a/README.md b/README.md index e74e3b5ed..37133efc4 100644 --- a/README.md +++ b/README.md @@ -62,7 +62,7 @@ oneMKL is part of the [UXL Foundation](http://www.uxlfoundation.org). portBLAS - x86 CPU, Intel GPU, NVIDIA GPU, AMD GPU + x86 CPU, Intel GPU, NVIDIA GPU, AMD GPU, Other SYCL devices (unsupported) portFFT @@ -172,7 +172,7 @@ Supported compilers include: - BLAS + BLAS x86 CPU Intel(R) oneMKL Intel DPC++
AdaptiveCpp @@ -221,6 +221,12 @@ Supported compilers include: Open DPC++ Dynamic, Static + + Other SYCL devices (unsupported) + portBLAS + Intel DPC++
Open DPC++ + Dynamic, Static + LAPACK x86 CPU @@ -405,6 +411,7 @@ Supported compilers include: - Intel(R) Data Center GPU Max Series - NVIDIA(R) A100 (Linux* only) - AMD(R) GPUs see [here](https://github.com/RadeonOpenCompute/ROCm#hardware-and-software-support) tested on AMD Vega 20 (gfx906) + - Other SYCL devices can be used, but are not supported --- ### Supported Operating Systems diff --git a/docs/building_the_project_with_dpcpp.rst b/docs/building_the_project_with_dpcpp.rst index 365028237..34688a84e 100644 --- a/docs/building_the_project_with_dpcpp.rst +++ b/docs/building_the_project_with_dpcpp.rst @@ -128,6 +128,9 @@ The most important supported build options are: * - ENABLE_PORTFFT_BACKEND - True, False - False + * - ENABLE_GENERIC_DEVICE + - True, False + - False * - BUILD_FUNCTIONAL_TESTS - True, False - True @@ -225,6 +228,23 @@ A few often-used architectures are listed below: For a host with ROCm installed, the device architecture can be retrieved via the ``rocminfo`` tool. The architecture will be displayed in the ``Name:`` row. +.. _build_for_other_SYCL_devices: + +Building for other SYCL devices +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +SYCL enables portable heterogeneous computing on a wide range of accelerators. +Consequently, it is possible to use oneMKL Interfaces with accelerators not +anticipated by the oneMKL Interfaces team. This can be enabled using the +``-DENABLE_GENERIC_DEVICE=ON`` option. However, this is not a supported +configuration. + +For generic SYCL devices, only the portBLAS backend is enabled. The user must +set the appropriate ``-fsycl-targets`` for their device, and also any +``PORTBLAS_TUNING_TARGET`` required for performance. See +`Building for portBLAS`_. Extensive testing is strongly advised for these +unsupported configurations. + .. _build_for_portlibs_dpcpp: Pure SYCL backends: portBLAS and portFFT @@ -408,6 +428,22 @@ set, the backend libraries to enable the use of BLAS, LAPACK and RNG with MKLGPU and MKLCPU would also be enabled. The build of examples is disabled. Since functional testing was not disabled, tests would be built. +Build oneMKL for the BLAS domain on a generic SYCL device: + +.. code-block:: bash + + cmake $ONEMKL_DIR \ + -DCMAKE_CXX_COMPILER=clang++ \ + -DCMAKE_C_COMPILER=clang \ + -DENABLE_MKLCPU_BACKEND=False \ + -DENABLE_MKLGPU_BACKEND=False \ + -DENABLE_PORTBLAS_BACKEND=True \ + -DENABLE_GENERIC_DEVICE=True + +Note that this is not a supported configuration. This builds oneMKL Interfaces +with the portBLAS backend only, for a generic SYCL device supported by the +Open DPC++ project. + .. _project_cleanup: Project Cleanup diff --git a/include/oneapi/mkl/detail/backends_table.hpp b/include/oneapi/mkl/detail/backends_table.hpp index 8e68674cc..8594de13d 100644 --- a/include/oneapi/mkl/detail/backends_table.hpp +++ b/include/oneapi/mkl/detail/backends_table.hpp @@ -40,7 +40,7 @@ namespace oneapi { namespace mkl { -enum class device : uint16_t { x86cpu, intelgpu, nvidiagpu, amdgpu }; +enum class device : uint16_t { x86cpu, intelgpu, nvidiagpu, amdgpu, generic_device }; enum class domain : uint16_t { blas, dft, lapack, rng, sparse_blas }; static std::map>> libraries = { @@ -82,6 +82,12 @@ static std::map>> libraries = #endif #ifdef ENABLE_PORTBLAS_BACKEND_NVIDIA_GPU LIB_NAME("blas_portblas"), +#endif + } }, + { device::generic_device, + { +#ifdef ENABLE_PORTBLAS_BACKEND_GENERIC_DEVICE + LIB_NAME("blas_portblas"), #endif } } } }, diff --git a/include/oneapi/mkl/detail/get_device_id.hpp b/include/oneapi/mkl/detail/get_device_id.hpp index 88b235754..91b460110 100644 --- a/include/oneapi/mkl/detail/get_device_id.hpp +++ b/include/oneapi/mkl/detail/get_device_id.hpp @@ -59,11 +59,19 @@ inline oneapi::mkl::device get_device_id(sycl::queue &queue) { else if (vendor_id == AMD_ID) device_id = device::amdgpu; else { +#ifdef ENABLE_GENERIC_DEVICE + device_id = device::generic_device; +#else throw unsupported_device("", "", queue.get_device()); +#endif // ENABLE_GENERIC_DEVICE } } else { +#ifdef ENABLE_GENERIC_DEVICE + device_id = device::generic_device; +#else throw unsupported_device("", "", queue.get_device()); +#endif // ENABLE_GENERIC_DEVICE } return device_id; } diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 0b632c1bd..918e0dc2b 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -29,6 +29,7 @@ set(ENABLE_PORTBLAS_BACKEND_INTEL_CPU OFF CACHE INTERNAL "") set(ENABLE_PORTBLAS_BACKEND_INTEL_GPU OFF CACHE INTERNAL "") set(ENABLE_PORTBLAS_BACKEND_AMD_GPU OFF CACHE INTERNAL "") set(ENABLE_PORTBLAS_BACKEND_NVIDIA_GPU OFF CACHE INTERNAL "") +set(ENABLE_PORTBLAS_BACKEND_GENERIC_DEVICE OFF CACHE INTERNAL "") # store path to CMAKE_CURRENT_BINARY_DIR to use it later (makes FetchContent_Declare workable) set(ONEMKL_GENERATED_INCLUDE_PATH ${CMAKE_CURRENT_BINARY_DIR}) diff --git a/src/blas/backends/portblas/CMakeLists.txt b/src/blas/backends/portblas/CMakeLists.txt index 03fddbb38..abb8586fe 100644 --- a/src/blas/backends/portblas/CMakeLists.txt +++ b/src/blas/backends/portblas/CMakeLists.txt @@ -46,7 +46,14 @@ if(NUM_TARGETS EQUAL 0) list(LENGTH SYCL_TARGETS NUM_TARGETS) endif() -if(PORTBLAS_TUNING_TARGET) +if(ENABLE_GENERIC_DEVICE) + set(ENABLE_PORTBLAS_BACKEND_GENERIC_DEVICE "ON" CACHE INTERAL "") + if(NOT PORTBLAS_TUNING_TARGET) + # If a generic device is specified, set the tuning target to default for best compatibility. + message(STATUS "Setting DEFAULT portBLAS tuning target for generic device.") + set(PORTBLAS_TUNING_TARGET "DEFAULT") + endif() +elseif (PORTBLAS_TUNING_TARGET) # Allow the user to manually enable a specific device type # for tuned portBLAS configurations and sets sycl-target. if(PORTBLAS_TUNING_TARGET STREQUAL "INTEL_CPU") diff --git a/src/config.hpp.in b/src/config.hpp.in index 5698abf9b..e68b3b10e 100644 --- a/src/config.hpp.in +++ b/src/config.hpp.in @@ -32,12 +32,14 @@ #cmakedefine ENABLE_PORTBLAS_BACKEND_INTEL_CPU #cmakedefine ENABLE_PORTBLAS_BACKEND_INTEL_GPU #cmakedefine ENABLE_PORTBLAS_BACKEND_NVIDIA_GPU +#cmakedefine ENABLE_PORTBLAS_BACKEND_GENERIC_DEVICE #cmakedefine ENABLE_PORTFFT_BACKEND #cmakedefine ENABLE_ROCBLAS_BACKEND #cmakedefine ENABLE_ROCFFT_BACKEND #cmakedefine ENABLE_ROCRAND_BACKEND #cmakedefine ENABLE_ROCSOLVER_BACKEND #cmakedefine BUILD_SHARED_LIBS +#cmakedefine ENABLE_GENERIC_DEVICE #cmakedefine REF_BLAS_LIBNAME "@REF_BLAS_LIBNAME@" #cmakedefine REF_CBLAS_LIBNAME "@REF_CBLAS_LIBNAME@" From 2dc654e0f23e301634a4ebc057977bbda6a4de06 Mon Sep 17 00:00:00 2001 From: Hugh Bird Date: Wed, 14 Aug 2024 16:22:14 +0100 Subject: [PATCH 2/2] CMake typo; Add -fno-sycl-instrument-device-code --- src/blas/backends/portblas/CMakeLists.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/blas/backends/portblas/CMakeLists.txt b/src/blas/backends/portblas/CMakeLists.txt index abb8586fe..3612de5dd 100644 --- a/src/blas/backends/portblas/CMakeLists.txt +++ b/src/blas/backends/portblas/CMakeLists.txt @@ -47,7 +47,8 @@ if(NUM_TARGETS EQUAL 0) endif() if(ENABLE_GENERIC_DEVICE) - set(ENABLE_PORTBLAS_BACKEND_GENERIC_DEVICE "ON" CACHE INTERAL "") + set(ENABLE_PORTBLAS_BACKEND_GENERIC_DEVICE "ON" CACHE INTERNAL "") + target_compile_options(ONEMKL::SYCL::SYCL INTERFACE -fno-sycl-instrument-device-code) if(NOT PORTBLAS_TUNING_TARGET) # If a generic device is specified, set the tuning target to default for best compatibility. message(STATUS "Setting DEFAULT portBLAS tuning target for generic device.")