diff --git a/.github/workflows/presubmit.yml b/.github/workflows/presubmit.yml index 03bca891..b8ce2db2 100644 --- a/.github/workflows/presubmit.yml +++ b/.github/workflows/presubmit.yml @@ -1,113 +1,783 @@ name: Presubmit + on: [push, pull_request] jobs: format: name: Code formatting - runs-on: ubuntu-20.04 + runs-on: ubuntu-latest + defaults: + run: + shell: bash steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 with: + # repository: ${{ github.repository }} (default) fetch-depth: 0 - name: Install clang-format - run: sudo apt-get install clang-format clang-format-9 + run: sudo apt-get install clang-format - name: Check format - run: ./scripts/check-format.sh - build: + run: $GITHUB_WORKSPACE/scripts/check-format.sh + origin/`if [[ "${{ github.event_name }}" == "push" ]]; then echo "main"; else echo "${{ github.base_ref }}"; fi` + --binary clang-format + + linux: + runs-on: ubuntu-latest needs: format - name: Build ${{ matrix.os }} ${{ matrix.compiler }} ${{ matrix.deps }} - runs-on: ${{ matrix.os }} + defaults: + run: + shell: bash strategy: matrix: - os: [ubuntu-20.04, macos-latest] - compiler: [gcc, clang] - deps: [os, fetch, vcpkg] + CMAKE: [3.26.4] + COMPILER: + - C_NAME: gcc + CXX_NAME: g++ + VER: 11 + EXCLUSIVE_C_FLAGS: -Wno-maybe-uninitialized + - C_NAME: gcc + CXX_NAME: g++ + VER: 13 + EXCLUSIVE_C_FLAGS: -Wno-maybe-uninitialized + - C_NAME: clang + CXX_NAME: clang++ + VER: 14 + EXCLUSIVE_C_FLAGS: "" + - C_NAME: clang + CXX_NAME: clang++ + VER: 16 + EXCLUSIVE_C_FLAGS: "" + DEPS: [system, vcpkg, fetch] + BIN: [64] + STD: + - C: 11 # Utils C library uses C11 functions (e.g. timespec_get) + CXX: 14 # Utils C++ library uses C14 types (e.g. integer_sequence) + - C: 17 + CXX: 17 + CONF: + - GEN: Unix Makefiles + CONFIG: Debug + - GEN: Unix Makefiles + CONFIG: Release + - GEN: Ninja Multi-Config + CONFIG: Release + IMAGE: + - khronosgroup/docker-images:opencl-sdk-intelcpu-ubuntu-22.04.20230717 + include: + - CMAKE: system + COMPILER: + C_NAME: gcc + CXX_NAME: g++ + VER: 9 + # A warning in libstb-dev + EXCLUSIVE_C_FLAGS: "-Wno-type-limits" + EXCLUSIVE_CXX_FLAGS: "-Wno-type-limits" + DEPS: system + BIN: 64 + STD: + C: 11 + CXX: 14 + CONF: + GEN: Unix Makefiles + CONFIG: Debug + IMAGE: khronosgroup/docker-images:opencl-sdk-intelcpu-ubuntu-20.04.20230717 + - CMAKE: system + COMPILER: + C_NAME: gcc + CXX_NAME: g++ + VER: 9 + # A warning in libstb-dev + EXCLUSIVE_C_FLAGS: "-Wno-type-limits" + EXCLUSIVE_CXX_FLAGS: "-Wno-type-limits" + DEPS: system + BIN: 64 + STD: + C: 11 + CXX: 14 + CONF: + GEN: Unix Makefiles + CONFIG: Release + IMAGE: khronosgroup/docker-images:opencl-sdk-intelcpu-ubuntu-20.04.20230717 + - CMAKE: system + COMPILER: + C_NAME: gcc + CXX_NAME: g++ + VER: 9 + EXCLUSIVE_C_FLAGS: "" + DEPS: vcpkg + BIN: 32 + STD: + C: 11 + CXX: 14 + CONF: + GEN: Unix Makefiles + CONFIG: Debug + IMAGE: khronosgroup/docker-images:opencl-sdk-intelcpu-ubuntu-20.04.20230717 + - CMAKE: system + COMPILER: + C_NAME: gcc + CXX_NAME: g++ + VER: 9 + EXCLUSIVE_C_FLAGS: "" + DEPS: vcpkg + BIN: 32 + STD: + C: 11 + CXX: 14 + CONF: + GEN: Unix Makefiles + CONFIG: Release + IMAGE: khronosgroup/docker-images:opencl-sdk-intelcpu-ubuntu-20.04.20230717 + container: ${{matrix.IMAGE}} + env: + CMAKE_EXE: /opt/Kitware/CMake/${{ matrix.CMAKE }}/bin/cmake + CPACK_EXE: /opt/Kitware/CMake/${{ matrix.CMAKE }}/bin/cpack + CTEST_EXE: /opt/Kitware/CMake/${{ matrix.CMAKE }}/bin/ctest + DEB_INSTALLATION_PATH: /usr + CC: ${{matrix.COMPILER.C_NAME}}-${{matrix.COMPILER.VER}} + CXX: ${{matrix.COMPILER.CXX_NAME}}-${{matrix.COMPILER.VER}} steps: - - uses: actions/checkout@v2 - with: + - name: Set up vcpkg triplet + if: matrix.DEPS == 'vcpkg' + run: if [[ "${{ matrix.BIN }}" == "64" ]]; then + echo "VCPKG_TRIPLET=x64-linux" >> $GITHUB_ENV; + else + echo "VCPKG_TRIPLET=x86-linux" >> $GITHUB_ENV; + fi + + - name: Install system CMake + if: matrix.CMAKE == 'system' + run: apt-get update -qq && apt-get install -y cmake && + echo "CMAKE_EXE=cmake" >> "$GITHUB_ENV" && + echo "CTEST_EXE=ctest" >> "$GITHUB_ENV" && + echo "CPACK_EXE=cpack" >> "$GITHUB_ENV" + + - name: Install dependencies (system) + if: matrix.DEPS == 'system' + run: apt-get update -qq && apt-get install -y libfreetype-dev libsfml-dev libglm-dev libglew-dev libtclap-dev libstb-dev + + - name: Cache dependencies (vcpkg) + if: matrix.DEPS == 'vcpkg' + id: vcpkg-install + uses: actions/cache@v4 + with: + path: /opt/Microsoft/vcpkg + key: vcpkg-linux-${{matrix.BIN}} + + - name: Install dependencies (vcpkg) + if: matrix.DEPS == 'vcpkg' && steps.vcpkg-install.outputs.cache-hit != 'true' + run: | + cd /opt/Microsoft/vcpkg + git pull + ./bootstrap-vcpkg.sh + ./vcpkg --triplet=$VCPKG_TRIPLET install tclap stb + # It is not possible to cross-compile the OpenGL samples on Ubuntu + # because system dev dependencies are not available for i386 + if [[ "${{ matrix.BIN }}" == "64" ]]; then + ./vcpkg --triplet=$VCPKG_TRIPLET install sfml glm glew; + fi + + - name: Set up compiler flags + run: | + # Excluding missing-field-initializers error because it comes from the Std dependency + # Excluding maybe-uninitialized error because cannot workaround the compiler issuing this error + # Not using -pedantic: error: ISO C forbids braced-groups within expressions + echo "CFLAGS=-Wall -Wextra -Werror -m${{matrix.BIN}} -Wno-missing-field-initializers ${{ matrix.COMPILER.EXCLUSIVE_C_FLAGS }}" >> $GITHUB_ENV; + # Excluding missing-field-initializers error because it comes from the Std dependency + echo "CXXFLAGS=-Wall -Wextra -pedantic -Werror -m${{matrix.BIN}} -Wno-missing-field-initializers ${{ matrix.COMPILER.EXCLUSIVE_CXX_FLAGS }}" >> $GITHUB_ENV; + + - name: Checkout OpenCL-SDK + uses: actions/checkout@v4 + with: fetch-depth: 0 submodules: recursive - - name: Build - run: | - if [[ "${{ matrix.os }}" == "ubuntu-20.04" ]]; then - sudo apt update - sudo apt install -y libidn11 libx11-dev libxrandr-dev libxcursor-dev libxi-dev mesa-common-dev libgl1-mesa-dev libglu1-mesa-dev libudev-dev \ - `if [[ "${{matrix.deps}}" == "os" ]]; then echo libtclap-dev libglm-dev libglew-dev libsfml-dev libstb-dev; fi;` - if [[ "${{matrix.deps}}" == "vcpkg" ]]; then - git clone https://github.com/Microsoft/vcpkg.git - ./vcpkg/bootstrap-vcpkg.sh - ./vcpkg/vcpkg install sfml tclap glm glew stb - TOOLCHAIN_ARG="-D CMAKE_TOOLCHAIN_FILE=./vcpkg/scripts/buildsystems/vcpkg.cmake" - else - TOOLCHAIN_ARG="" - fi - if [[ "${{matrix.deps}}" == "fetch" ]]; then - sudo apt remove -y libtclap-dev libglm-dev libglew-dev libsfml-dev libstb-dev libfreetype6-dev - fi - elif [[ "${{ matrix.os }}" == "macos-latest" ]]; then - brew install tclap glm glew sfml mesa-glu - git clone https://github.com/Microsoft/vcpkg.git - ./vcpkg/bootstrap-vcpkg.sh - ./vcpkg/vcpkg install stb - TOOLCHAIN_ARG="-D CMAKE_TOOLCHAIN_FILE=./vcpkg/scripts/buildsystems/vcpkg.cmake" - fi - if [[ "${{ matrix.compiler }}" == "gcc" ]]; then - CC=gcc - CXX=g++ - elif [[ "${{ matrix.compiler }}" == "clang" ]]; then - CC=clang - CXX=clang++ - fi - cmake \ - -D CMAKE_C_COMPILER=$CC \ - -D CMAKE_CXX_COMPILER=$CXX \ - $TOOLCHAIN_ARG \ - -S . -B build - cmake \ - --build ./build \ - --verbose \ - --parallel `numproc` - buildwin: - needs: format - name: Build Windows ${{ matrix.deps }} + + - name: Install samples dependencies + run: | + if [[ "${{ matrix.BIN }}" == "64" ]]; then + apt-get update -qq; + apt-get install -y libvulkan-dev; + else + dpkg --add-architecture i386; + apt-get update -qq; + apt-get install -y libvulkan-dev:i386; + fi + + - name: Configure, package & install OpenCL-Headers + run: $CMAKE_EXE + -G "${{matrix.CONF.GEN}}" + `if [[ "${{matrix.CONF.GEN}}" == "Unix Makefiles" ]]; then echo "-D CMAKE_BUILD_TYPE=${{matrix.CONF.CONFIG}}"; fi` + -D BUILD_TESTING=OFF + -D CPACK_PACKAGING_INSTALL_PREFIX=$DEB_INSTALLATION_PATH + -S $GITHUB_WORKSPACE/external/OpenCL-Headers + -B $GITHUB_WORKSPACE/external/OpenCL-Headers/build && + $CPACK_EXE + --config "$GITHUB_WORKSPACE/external/OpenCL-Headers/build/CPackConfig.cmake" + -G DEB + -C ${{matrix.CONF.CONFIG}} + -B "$GITHUB_WORKSPACE/external/OpenCL-Headers/package-deb" && + dpkg -i $GITHUB_WORKSPACE/external/OpenCL-Headers/package-deb/*.deb + + - name: Configure, package & install OpenCL-ICD-Loader + run: $CMAKE_EXE + -G "${{matrix.CONF.GEN}}" + `if [[ "${{matrix.CONF.GEN}}" == "Unix Makefiles" ]]; then echo "-D CMAKE_BUILD_TYPE=${{matrix.CONF.CONFIG}}"; fi` + -D BUILD_TESTING=OFF + -D CPACK_PACKAGING_INSTALL_PREFIX=$DEB_INSTALLATION_PATH + -S $GITHUB_WORKSPACE/external/OpenCL-ICD-Loader + -B $GITHUB_WORKSPACE/external/OpenCL-ICD-Loader/build && + if [[ "${{matrix.CONF.GEN}}" == "Unix Makefiles" ]]; + then + $CMAKE_EXE --build $GITHUB_WORKSPACE/external/OpenCL-ICD-Loader/build --parallel `nproc`; + else + $CMAKE_EXE --build $GITHUB_WORKSPACE/external/OpenCL-ICD-Loader/build --config Debug --parallel `nproc`; + $CMAKE_EXE --build $GITHUB_WORKSPACE/external/OpenCL-ICD-Loader/build --config Release --parallel `nproc`; + fi && + $CPACK_EXE + --config "$GITHUB_WORKSPACE/external/OpenCL-ICD-Loader/build/CPackConfig.cmake" + -G DEB + -C ${{matrix.CONF.CONFIG}} + -B "$GITHUB_WORKSPACE/external/OpenCL-ICD-Loader/package-deb" && + dpkg -i $GITHUB_WORKSPACE/external/OpenCL-ICD-Loader/package-deb/*.deb + + - name: Configure, package & install OpenCL-CLHPP + run: $CMAKE_EXE + -G "${{matrix.CONF.GEN}}" + `if [[ "${{matrix.CONF.GEN}}" == "Unix Makefiles" ]]; then echo "-D CMAKE_BUILD_TYPE=${{matrix.CONF.CONFIG}}"; fi` + -D BUILD_TESTING=OFF + -D BUILD_EXAMPLES=OFF + -D BUILD_DOCS=OFF + -D CPACK_PACKAGING_INSTALL_PREFIX=$DEB_INSTALLATION_PATH + -S $GITHUB_WORKSPACE/external/OpenCL-CLHPP + -B $GITHUB_WORKSPACE/external/OpenCL-CLHPP/build && + $CPACK_EXE + --config "$GITHUB_WORKSPACE/external/OpenCL-CLHPP/build/CPackConfig.cmake" + -G DEB + -C ${{matrix.CONF.CONFIG}} + -B "$GITHUB_WORKSPACE/external/OpenCL-CLHPP/package-deb" && + dpkg -i $GITHUB_WORKSPACE/external/OpenCL-CLHPP/package-deb/*.deb + + - name: Configure + run: $CMAKE_EXE + -G "${{matrix.CONF.GEN}}" + `if [[ "${{matrix.CONF.GEN}}" == "Unix Makefiles" ]]; then echo "-D CMAKE_BUILD_TYPE=${{matrix.CONF.CONFIG}}"; fi` + `if [[ "${{matrix.DEPS}}" == "vcpkg" ]]; then echo "-D CMAKE_TOOLCHAIN_FILE=/opt/Microsoft/vcpkg/scripts/buildsystems/vcpkg.cmake"; fi;` + `if [[ "${{matrix.DEPS}}" == "vcpkg" ]]; then echo "-D VCPKG_TARGET_TRIPLET=$VCPKG_TRIPLET"; fi;` + -D BUILD_DOCS=ON + -D BUILD_TESTING=ON + -D BUILD_EXAMPLES=ON + -D OPENCL_SDK_BUILD_SAMPLES=ON + `if [[ "${{matrix.BIN}}" == "32" ]]; then echo "-D OPENCL_SDK_BUILD_OPENGL_SAMPLES=OFF"; fi;` + -D OPENCL_ICD_LOADER_BUILD_TESTING=ON + -D CMAKE_C_STANDARD=${{matrix.STD.C}} + -D CMAKE_CXX_STANDARD=${{matrix.STD.CXX}} + -D CMAKE_INSTALL_PREFIX=$GITHUB_WORKSPACE/install + -D CPACK_PACKAGING_INSTALL_PREFIX=$DEB_INSTALLATION_PATH + -S $GITHUB_WORKSPACE + -B $GITHUB_WORKSPACE/build + + - name: Build + run: if [[ "${{matrix.CONF.GEN}}" == "Unix Makefiles" ]]; + then + $CMAKE_EXE --build $GITHUB_WORKSPACE/build --parallel `nproc`; + else + $CMAKE_EXE --build $GITHUB_WORKSPACE/build --config Debug; + $CMAKE_EXE --build $GITHUB_WORKSPACE/build --config Release; + fi; + + - name: Test + if: matrix.BIN != 32 + working-directory: ${{runner.workspace}}/OpenCL-SDK/build + run: if [[ "${{matrix.CONF.GEN}}" == "Unix Makefiles" ]]; + then + $CTEST_EXE --output-on-failure --no-tests=error -C ${{matrix.CONF.CONFIG}} --parallel `nproc`; + else + $CTEST_EXE --output-on-failure --no-tests=error -C Debug --parallel `nproc`; + $CTEST_EXE --output-on-failure --no-tests=error -C Release --parallel `nproc`; + fi; + + - name: Package DEB + run: $CPACK_EXE + --config "$GITHUB_WORKSPACE/build/CPackConfig.cmake" + -G DEB + -C ${{matrix.CONF.CONFIG}} + -B "$GITHUB_WORKSPACE/package-deb" + + - name: Consume (DEB) + run: dpkg -i $GITHUB_WORKSPACE/package-deb/*.deb && + $CMAKE_EXE + -G "${{matrix.CONF.GEN}}" + `if [[ "${{matrix.CONF.GEN}}" == "Unix Makefiles" ]]; then echo "-D CMAKE_BUILD_TYPE=${{matrix.CONF.CONFIG}}"; fi` + -D CMAKE_C_STANDARD=${{matrix.STD.C}} + -D CMAKE_CXX_STANDARD=${{matrix.STD.CXX}} + -S $GITHUB_WORKSPACE/test/cmake/pkgconfig/useutil + -B $GITHUB_WORKSPACE/build_package && + if [[ "${{matrix.CONF.GEN}}" == "Unix Makefiles" ]]; + then + $CMAKE_EXE --build $GITHUB_WORKSPACE/build_package --parallel `nproc`; + else + $CMAKE_EXE --build $GITHUB_WORKSPACE/build_package --config Debug; + $CMAKE_EXE --build $GITHUB_WORKSPACE/build_package --config Release; + fi + + - name: Run consume test (DEB) + if: matrix.BIN != 32 + working-directory: ${{runner.workspace}}/OpenCL-SDK/build_package + run: if [[ "${{matrix.CONF.GEN}}" == "Unix Makefiles" ]]; + then + $CTEST_EXE -C ${{matrix.CONF.CONFIG}} --no-tests=error --output-on-failure --parallel `nproc`; + else + $CTEST_EXE -C Debug --output-on-failure --no-tests=error --parallel `nproc`; + $CTEST_EXE -C Release --output-on-failure --no-tests=error --parallel `nproc`; + fi + + - name: Run clinfo (DEB) + run: clinfo + + - name: Uninstall (DEB) + run: apt-get remove -y "khronos-opencl-loader*" opencl-c-headers opencl-clhpp-headers opencl-sdk clinfo + + - name: Test install + run: $CMAKE_EXE + --build $GITHUB_WORKSPACE/build + --target install + --config ${{matrix.CONF.CONFIG}} + --parallel `nproc` + + - name: Consume (install) + run: $CMAKE_EXE + -G "${{matrix.CONF.GEN}}" + `if [[ "${{matrix.CONF.GEN}}" == "Unix Makefiles" ]]; then echo "-D CMAKE_BUILD_TYPE=${{matrix.CONF.CONFIG}}"; fi` + -D CMAKE_PREFIX_PATH=$GITHUB_WORKSPACE/install + -D CMAKE_C_STANDARD=${{matrix.STD.C}} + -D CMAKE_CXX_STANDARD=${{matrix.STD.CXX}} + -S $GITHUB_WORKSPACE/test/cmake/pkgconfig/useutil + -B $GITHUB_WORKSPACE/build_install && + if [[ "${{matrix.CONF.GEN}}" == "Unix Makefiles" ]]; + then + $CMAKE_EXE --build $GITHUB_WORKSPACE/build_install --parallel `nproc`; + else + $CMAKE_EXE --build $GITHUB_WORKSPACE/build_install --config Debug; + $CMAKE_EXE --build $GITHUB_WORKSPACE/build_install --config Release; + fi + + - name: Run consume test (install) + if: matrix.BIN != 32 + working-directory: ${{runner.workspace}}/OpenCL-SDK/build_install + run: if [[ "${{matrix.CONF.GEN}}" == "Unix Makefiles" ]]; + then + $CTEST_EXE -C ${{matrix.CONF.CONFIG}} --output-on-failure --no-tests=error --parallel `nproc`; + else + $CTEST_EXE -C Debug --output-on-failure --no-tests=error --parallel `nproc`; + $CTEST_EXE -C Release --output-on-failure --no-tests=error --parallel `nproc`; + fi + + windows: runs-on: windows-latest + needs: format + defaults: + run: + shell: pwsh strategy: matrix: - deps: [fetch, vcpkg] + VER: [v142, v143, clangcl] + GEN: [Visual Studio 17 2022, Ninja Multi-Config] + DEPS: [vcpkg, fetch] + BIN: [x64] + STD: + - C: 11 + CXX: 14 + - C: 17 + CXX: 17 + exclude: + - VER: clangcl + GEN: Ninja Multi-Config + include: + - VER: v142 + GEN: Visual Studio 17 2022 + BIN: x86 + DEPS: fetch + STD: + C: 11 + CXX: 14 + env: + NINJA_URL: https://github.com/ninja-build/ninja/releases/download/v1.10.2/ninja-win.zip + NINJA_ROOT: C:\Tools\Ninja + VS_ROOT: 'C:\Program Files\Microsoft Visual Studio\2022\Enterprise' + UseMultiToolTask: true # Better parallel MSBuild execution + EnforceProcessCountAcrossBuilds: 'true' # -=- + MultiProcMaxCount: '3' # -=- + WindowsSDKVersion: '10.0.22621.0' + INTEL_OCL_URL: https://github.com/intel/llvm/releases/download/2023-WW27/win-oclcpuexp-2023.16.6.0.28_rel.zip + INTEL_TBB_URL: https://github.com/oneapi-src/oneTBB/releases/download/v2021.10.0/oneapi-tbb-2021.10.0-win.zip + IMAGE_INTEL_PREFIX: C:\Tools\Intel + VULKAN_SDK_URL: https://sdk.lunarg.com/sdk/download/1.3.261.1/windows/VulkanSDK-1.3.261.1-Installer.exe + VULKAN_SDK: C:/VulkanSDK/1.3.261.1 + steps: - - uses: actions/checkout@v2 - with: - fetch-depth: 0 - submodules: recursive - - name: Build - shell: pwsh - run: | - if ('${{ matrix.deps }}' -eq 'vcpkg') { - Get-ChildItem Env:\ - & ${env:VCPKG_INSTALLATION_ROOT}\vcpkg.exe --triplet=x64-windows install sfml tclap glm glew stb - $TOOLCHAIN_ARG="-D CMAKE_TOOLCHAIN_FILE=${env:VCPKG_INSTALLATION_ROOT}\scripts\buildsystems\vcpkg.cmake" - } else { - $TOOLCHAIN_ARG='' - } - cmake ` - $TOOLCHAIN_ARG ` - -S . -B build - cmake ` - --build ./build ` + - name: Cache Ninja install + if: matrix.GEN == 'Ninja Multi-Config' + id: ninja-install + uses: actions/cache@v4 + with: + path: | + C:\Tools\Ninja + key: ${{runner.os}}-ninja-${{env.NINJA_URL}} + + - name: Install Ninja + if: matrix.GEN == 'Ninja Multi-Config' && steps.ninja-install.outputs.cache-hit != 'true' + run: | + Invoke-WebRequest ${env:NINJA_URL} -OutFile ~\Downloads\ninja-win.zip + Expand-Archive ~\Downloads\ninja-win.zip -DestinationPath ${env:NINJA_ROOT}\ + Remove-Item ~\Downloads\* + + - name: Install OpenCL runtime + if: matrix.BIN != 'x86' + run: | + $INTEL_OCL_ARCHIVE_NAME = Split-Path ${env:INTEL_OCL_URL} -Leaf; ` + Invoke-WebRequest ${env:INTEL_OCL_URL} -OutFile ${env:TEMP}\$INTEL_OCL_ARCHIVE_NAME; ` + Expand-Archive ${env:TEMP}\$INTEL_OCL_ARCHIVE_NAME -DestinationPath ${env:IMAGE_INTEL_PREFIX}\oclcpuexp; ` + Remove-Item ${env:TEMP}\$INTEL_OCL_ARCHIVE_NAME; ` + $INTEL_TBB_ARCHIVE_NAME = Split-Path ${env:INTEL_TBB_URL} -Leaf; ` + Invoke-WebRequest ${env:INTEL_TBB_URL} -OutFile ${env:TEMP}\$INTEL_TBB_ARCHIVE_NAME; ` + Expand-Archive ${env:TEMP}\$INTEL_TBB_ARCHIVE_NAME -DestinationPath ${env:IMAGE_INTEL_PREFIX}; ` + Get-ChildItem ${env:IMAGE_INTEL_PREFIX}\oneapi-tbb* | Rename-Item -NewName oneapi-tbb; ` + Remove-Item ${env:TEMP}\$INTEL_TBB_ARCHIVE_NAME; ` + New-Item -Type Directory ${env:IMAGE_INTEL_PREFIX}\oclcpuexp\tbb | Out-Null; ` + Get-ChildItem ${env:IMAGE_INTEL_PREFIX}\oneapi-tbb\redist\intel64\vc14\*.dll | ForEach-Object { New-Item -Type SymbolicLink -Path ${env:IMAGE_INTEL_PREFIX}\oclcpuexp\tbb -Name $_.Name -Value $_.FullName | Out-Null; }; ` + New-Item -Type Directory HKLM:\SOFTWARE\Khronos\OpenCL -Force | Out-Null; ` + New-Item -Type File HKLM:\SOFTWARE\Khronos\OpenCL\Vendors | Out-Null; ` + Set-ItemProperty -Path HKLM:\SOFTWARE\Khronos\OpenCL\Vendors -Name ${env:IMAGE_INTEL_PREFIX}\oclcpuexp\intelocl64.dll -Type DWord -Value 0; + + - name: Install samples dependencies + run: | + Invoke-WebRequest ${env:VULKAN_SDK_URL} -OutFile vulkan-sdk-installer.exe + .\vulkan-sdk-installer.exe --accept-licenses --default-answer --confirm-command install com.lunarg.vulkan.32bit + Remove-Item vulkan-sdk-installer.exe + if ('${{ matrix.BIN }}' -eq 'x64') + { + echo "Vulkan_LIB_DIR=$env:VULKAN_SDK/Lib" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append + } else { + echo "Vulkan_LIB_DIR=$env:VULKAN_SDK/Lib32" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append + } + + - name: Cache dependencies (vcpkg) + if: matrix.DEPS == 'vcpkg' + id: vcpkg-install + uses: actions/cache@v4 + with: + path: | + C:\vcpkg + key: x64-windows-sfml-tclap-glm-glew-stb + + - name: Install dependencies (vcpkg) + if: matrix.DEPS == 'vcpkg' && steps.vcpkg-install.outputs.cache-hit != 'true' + run: C:\vcpkg\vcpkg.exe --triplet=x64-windows install sfml tclap glm glew stb + + - name: Set up compiler flags + run: | + echo "CFLAGS=/W4 /WX" >> $GITHUB_ENV + echo "CXXFLAGS=/W4 /WX" >> $GITHUB_ENV + + - name: Checkout OpenCL-SDK + uses: actions/checkout@v4 + with: + fetch-depth: 0 + submodules: recursive + + - name: Configure (MSBuild) + if: matrix.GEN == 'Visual Studio 17 2022' + run: | + if ('${{ matrix.DEPS }}' -eq 'vcpkg') { + $TOOLCHAIN_ARG="-D CMAKE_TOOLCHAIN_FILE=${env:VCPKG_INSTALLATION_ROOT}\scripts\buildsystems\vcpkg.cmake" + } else { + $TOOLCHAIN_ARG='' + } + $BIN = if('${{matrix.BIN}}' -eq 'x86') {'Win32'} else {'x64'} + & cmake ` + $TOOLCHAIN_ARG ` + -G "${{matrix.GEN}}" ` + -A $BIN ` + -T ${{matrix.VER}} ` + -D BUILD_DOCS=ON ` + -D BUILD_TESTING=ON ` + -D OPENCL_SDK_BUILD_SAMPLES=ON ` + -D CMAKE_C_STANDARD=${{matrix.STD.C}} ` + -D CMAKE_CXX_STANDARD=${{matrix.STD.CXX}} ` + -D CMAKE_INSTALL_PREFIX=${env:GITHUB_WORKSPACE}\install ` + -D Vulkan_INCLUDE_DIR=${env:VULKAN_SDK}/Include ` + -D Vulkan_LIBRARY=${env:Vulkan_LIB_DIR}/vulkan-1.lib ` + -S ${env:GITHUB_WORKSPACE} ` + -B ${env:GITHUB_WORKSPACE}\build + if ($LASTEXITCODE -ne 0) { throw "Configuring OpenCL-SDK failed." } + + - name: Configure (Ninja Multi-Config) + if: matrix.GEN == 'Ninja Multi-Config' + run: | + if ('${{ matrix.DEPS }}' -eq 'vcpkg') { + $TOOLCHAIN_ARG="-D CMAKE_TOOLCHAIN_FILE=${env:VCPKG_INSTALLATION_ROOT}\scripts\buildsystems\vcpkg.cmake" + } else { + $TOOLCHAIN_ARG='' + } + $VER = switch ('${{matrix.VER}}') { ` + 'v142' {'14.2'} ` + 'v143' {'14.4'} } + Import-Module "${env:VS_ROOT}\Common7\Tools\Microsoft.VisualStudio.DevShell.dll" + Enter-VsDevShell -VsInstallPath ${env:VS_ROOT} -SkipAutomaticLocation -DevCmdArguments "-host_arch=x64 -arch=${{matrix.BIN}} -vcvars_ver=$VER" + & cmake ` + $TOOLCHAIN_ARG ` + -G "${{matrix.GEN}}" ` + -D CMAKE_MAKE_PROGRAM="${env:NINJA_ROOT}\ninja.exe" ` + -D BUILD_DOCS=ON ` + -D BUILD_TESTING=ON ` + -D OPENCL_SDK_BUILD_SAMPLES=ON ` + -D CMAKE_C_STANDARD=${{matrix.STD.C}} ` + -D CMAKE_CXX_STANDARD=${{matrix.STD.CXX}} ` + -D CMAKE_EXE_LINKER_FLAGS=/INCREMENTAL ` + -D CMAKE_INSTALL_PREFIX=${env:GITHUB_WORKSPACE}\install ` + -D Vulkan_INCLUDE_DIR=${env:VULKAN_SDK}/Include ` + -D Vulkan_LIBRARY=${env:Vulkan_LIB_DIR}/vulkan-1.lib ` + -S ${env:GITHUB_WORKSPACE} ` + -B ${env:GITHUB_WORKSPACE}\build + if ($LASTEXITCODE -ne 0) { throw "Configuring OpenCL-SDK failed." } + + - name: Build (MSBuild) + if: matrix.GEN == 'Visual Studio 17 2022' + run: | + foreach ($Config in 'Release','Debug') { ` + & cmake ` + --build "${env:GITHUB_WORKSPACE}\build" ` + --config $Config ` -- ` /verbosity:minimal ` /maxCpuCount ` /noLogo + if ($LASTEXITCODE -ne 0) { throw "Building OpenCL-SDK in $Config failed." } + } + + - name: Build (Ninja Multi-Config) + if: matrix.GEN == 'Ninja Multi-Config' + run: | + $VER = switch ('${{matrix.VER}}') { ` + 'v142' {'14.2'} ` + 'v143' {'14.4'} } + Import-Module "${env:VS_ROOT}\Common7\Tools\Microsoft.VisualStudio.DevShell.dll" + Enter-VsDevShell -VsInstallPath ${env:VS_ROOT} -SkipAutomaticLocation -DevCmdArguments "-host_arch=x64 -arch=${{matrix.BIN}} -vcvars_ver=$VER" + foreach ($Config in 'Release','Debug') { ` + & cmake ` + --build "${env:GITHUB_WORKSPACE}\build" ` + --config $Config + if ($LASTEXITCODE -ne 0) { throw "Building OpenCL-SDK in $Config failed." } + } + + - name: Test + if: matrix.BIN != 'x86' + working-directory: ${{runner.workspace}}/OpenCL-SDK/build + run: | + $EXCLUDE_REGEX = 'externalmemory.*' + foreach ($Config in 'Release','Debug') { + & ctest ` + --build-config ${Config} ` + --output-on-failure ` + --no-tests=error ` + --parallel ${env:NUMBER_OF_PROCESSORS} ` + --exclude-regex "$EXCLUDE_REGEX" + if ($LASTEXITCODE -ne 0) { throw "Running OpenCL-SDK tests in $Config failed." } + } + + - name: Install + run: | + & cmake ` + --install "${env:GITHUB_WORKSPACE}\build" ` + --prefix "${env:GITHUB_WORKSPACE}\install" ` + --config Release + if ($LASTEXITCODE -ne 0) { throw "Installing OpenCL-SDK failed." } + + - name: "Consume (MSBuild SDK): Configure/Build/Test" + if: matrix.GEN == 'Visual Studio 17 2022' + run: | + $BIN = if('${{matrix.BIN}}' -eq 'x86') {'Win32'} else {'x64'} + & cmake ` + -G '${{matrix.GEN}}' ` + -A $BIN ` + -T ${{matrix.VER}} ` + -D CMAKE_EXE_LINKER_FLAGS=/INCREMENTAL ` + -D CMAKE_PREFIX_PATH="${env:GITHUB_WORKSPACE}\install" ` + -D CMAKE_C_STANDARD=${{matrix.STD.C}} ` + -D CMAKE_CXX_STANDARD=${{matrix.STD.CXX}} ` + -S ${env:GITHUB_WORKSPACE}\test\cmake\pkgconfig\useutil ` + -B ${env:GITHUB_WORKSPACE}\downstream\pkgconfig\useutil + if ($LASTEXITCODE -ne 0) { throw "Configuring OpenCL-SDK consume test failed." } + foreach ($Config in 'Release') { + & cmake ` + --build "${env:GITHUB_WORKSPACE}\downstream\pkgconfig\useutil" ` + --config ${Config} + if ($LASTEXITCODE -ne 0) { throw "Building OpenCL-SDK consume test in $Config failed." } + } + + - name: "Consume (Ninja-Multi-Config SDK): Configure/Build/Test" + if: matrix.GEN == 'Ninja Multi-Config' + run: | + $VER = switch ('${{matrix.VER}}') { ` + 'v142' {'14.2'} ` + 'v143' {'14.4'} } + Import-Module "${env:VS_ROOT}\Common7\Tools\Microsoft.VisualStudio.DevShell.dll" + Enter-VsDevShell -VsInstallPath ${env:VS_ROOT} -SkipAutomaticLocation -DevCmdArguments "-host_arch=x64 -arch=${{matrix.BIN}} -vcvars_ver=${VER}" + & cmake ` + -G '${{matrix.GEN}}' ` + -D CMAKE_MAKE_PROGRAM="${env:NINJA_ROOT}\ninja.exe" ` + -D CMAKE_EXE_LINKER_FLAGS=/INCREMENTAL ` + -D CMAKE_PREFIX_PATH="${env:GITHUB_WORKSPACE}\external\OpenCL-Headers\install;${env:GITHUB_WORKSPACE}\install" ` + -D CMAKE_C_STANDARD=${{matrix.STD.C}} ` + -D CMAKE_CXX_STANDARD=${{matrix.STD.CXX}} ` + -S ${env:GITHUB_WORKSPACE}\test\cmake\pkgconfig\useutil ` + -B ${env:GITHUB_WORKSPACE}\downstream\pkgconfig\useutil + if ($LASTEXITCODE -ne 0) { throw "Configuring OpenCL-SDK consume test failed." } + foreach ($Config in 'Release') { + & cmake ` + --build "${env:GITHUB_WORKSPACE}\downstream\pkgconfig\useutil" ` + --config ${Config} + if ($LASTEXITCODE -ne 0) { throw "Building OpenCL-SDK consume test in $Config failed." } + } + + - name: Consume test + if: matrix.BIN != 'x86' + working-directory: ${{runner.workspace}}/OpenCL-SDK/downstream/pkgconfig/useutil + run: | + foreach ($Config in 'Release') { + & ctest ` + --build-config ${Config} ` + --output-on-failure ` + --no-tests=error ` + --parallel ${env:NUMBER_OF_PROCESSORS} + if ($LASTEXITCODE -ne 0) { throw "Running OpenCL-SDK consume test in $Config failed." } + } + + macos: + runs-on: macos-latest + needs: format + defaults: + run: + shell: bash + strategy: + matrix: + GEN: + - Xcode + - Ninja Multi-Config + DEPS: + - system + - vcpkg + - fetch + STD: + - C: 11 + CXX: 14 + - C: 17 + CXX: 17 + env: + CC: /usr/bin/clang + CXX: /usr/bin/clang++ + steps: + - name: Checkout OpenCL-SDK + uses: actions/checkout@v4 + with: + fetch-depth: 0 + submodules: recursive + + - name: Create Build Environment + run: | + # Install Ninja only if it's the selected generator and it's not available. + if [[ "${{matrix.GEN}}" == "Ninja Multi-Config" && ! `which ninja` ]]; then brew install ninja; fi && + # We need to provide an OpenCL driver for Intel CPU on mac + brew install pocl + echo "OCL_ICD_VENDORS=/opt/homebrew/Cellar/pocl/6.0/etc/OpenCL/vendors" >> $GITHUB_ENV + cmake --version + + - name: Install samples dependencies + run: | + brew install vulkan-loader + + - name: Install dependencies (Homebrew) + if: matrix.DEPS == 'system' + run: brew install tclap glm glew sfml mesa-glu + + - name: Install dependencies (vcpkg) + if: matrix.DEPS == 'vcpkg' + run: | + git clone https://github.com/Microsoft/vcpkg.git vcpkg + ./vcpkg/bootstrap-vcpkg.sh + ./vcpkg/vcpkg install tclap glm glew sfml stb + + - name: Set up compiler flags + run: | + # Not using -pedantic: error: ISO C forbids braced-groups within expressions + # The flags + # * -Wno-missing-field-initializers + # * -Wno-conditional-uninitialized + # have been added because of Std compilation errors + echo "CFLAGS=-Wall -Wextra -Werror -Wno-missing-field-initializers -Wno-conditional-uninitialized" >> $GITHUB_ENV; + # The flags + # * -Wno-deprecated-declarations + # * -Wno-missing-field-initializers + # have been added because of Std compilation errors + echo "CXXFLAGS=-Wall -Wextra -pedantic -Wno-format -Werror -Wno-missing-field-initializers -Wno-deprecated-declarations" >> $GITHUB_ENV; + + - name: Configure + run: cmake + -G "${{matrix.GEN}}" + `if [[ "${{matrix.DEPS}}" == "vcpkg" ]]; then echo "-D CMAKE_TOOLCHAIN_FILE=./vcpkg/scripts/buildsystems/vcpkg.cmake"; fi` + -D BUILD_DOCS=ON + -D BUILD_TESTING=ON + -D BUILD_EXAMPLES=ON + -D OPENCL_SDK_BUILD_SAMPLES=ON + -D CMAKE_C_STANDARD=${{matrix.STD.C}} + -D CMAKE_CXX_STANDARD=${{matrix.STD.CXX}} + -D CMAKE_INSTALL_PREFIX=$GITHUB_WORKSPACE/install + -D CMAKE_OSX_ARCHITECTURES=arm64 + -S $GITHUB_WORKSPACE + -B $GITHUB_WORKSPACE/build + + - name: Build + run: | + cmake --build $GITHUB_WORKSPACE/build --config Debug --parallel `sysctl -n hw.logicalcpu` + cmake --build $GITHUB_WORKSPACE/build --config Release --parallel `sysctl -n hw.logicalcpu` + + - name: Test + working-directory: ${{runner.workspace}}/OpenCL-SDK/build + run: | + EXCLUDE_REGEX="(multidevice|externalmemory).*" + ctest -C Debug --output-on-failure --no-tests=error --parallel `sysctl -n hw.logicalcpu` --exclude-regex "$EXCLUDE_REGEX" + ctest -C Release --output-on-failure --no-tests=error --parallel `sysctl -n hw.logicalcpu` --exclude-regex "$EXCLUDE_REGEX" + + - name: Install + run: | + cmake --build $GITHUB_WORKSPACE/build --config Release --target install + + - name: Consume (install) + run: cmake + -G "${{matrix.GEN}}" + -D CMAKE_C_STANDARD=${{matrix.STD.C}} + -D CMAKE_CXX_STANDARD=${{matrix.STD.CXX}} + -D CMAKE_PREFIX_PATH="$GITHUB_WORKSPACE/install" + -S $GITHUB_WORKSPACE/test/cmake/pkgconfig/useutil + -B $GITHUB_WORKSPACE/build_install && + cmake --build $GITHUB_WORKSPACE/build_install --config Debug --parallel `sysctl -n hw.logicalcpu` && + cmake --build $GITHUB_WORKSPACE/build_install --config Release --parallel `sysctl -n hw.logicalcpu` && + cd $GITHUB_WORKSPACE/build_install && + ctest -C Debug --output-on-failure --no-tests=error --parallel `sysctl -n hw.logicalcpu` && + ctest -C Release --output-on-failure --no-tests=error --parallel `sysctl -n hw.logicalcpu` python: name: Exercise Python examples on ${{matrix.os}} + defaults: + run: + shell: bash strategy: matrix: #os: [ubuntu-latest, macos-latest] os: [ubuntu-latest] runs-on: ${{ matrix.os }} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Environment setup run: | MINIFORGE_INSTALL_DIR=.miniforge3 @@ -138,13 +808,16 @@ jobs: checkruby: name: Check Ruby Samples ${{ matrix.os }} + defaults: + run: + shell: bash runs-on: ${{ matrix.os }} strategy: matrix: #os: [ubuntu-latest, macos-latest] os: [ubuntu-latest, macos-13] steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 with: fetch-depth: 0 submodules: recursive @@ -161,3 +834,44 @@ jobs: - name: Run Ruby Samples run: rake test working-directory: ruby + + android: + runs-on: ubuntu-latest + needs: format + defaults: + run: + shell: bash + strategy: + matrix: + ABI: + - arm64-v8a + - x86_64 + API_LEVEL: + - android-29 + - android-33 + CONFIG: + - Debug + - Release + env: + CFLAGS: -Wall -Wextra -pedantic -Werror -Wno-error=gnu-statement-expression + CXXFLAGS: -Wall -Wextra -pedantic -Werror -Wno-error=missing-field-initializers + steps: + - name: Checkout OpenCL-SDK + uses: actions/checkout@v4 + with: + fetch-depth: 0 + submodules: recursive + + - name: Configure + run: cmake + -G "Unix Makefiles" + -D CMAKE_BUILD_TYPE=${{matrix.CONFIG}} + -D CMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake + -D ANDROID_ABI=${{matrix.ABI}} + -D ANDROID_PLATFORM=${{matrix.API_LEVEL}} + -D OPENCL_SDK_BUILD_OPENGL_SAMPLES=OFF + -S $GITHUB_WORKSPACE + -B $GITHUB_WORKSPACE/build + + - name: Build + run: cmake --build $GITHUB_WORKSPACE/build --parallel `nproc` diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index f92c392d..023e3778 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -8,32 +8,26 @@ name: Create Release jobs: windows-binary: runs-on: windows-2022 + defaults: + run: + shell: pwsh strategy: matrix: VER: [v143] GEN: [Visual Studio 17 2022] BIN: [x64, x86] - + env: + UseMultiToolTask: true + EnforceProcessCountAcrossBuilds: true + MultiProcMaxCount: 3 steps: - name: Checkout OpenCL-SDK - uses: actions/checkout@v2 - - - name: Initialize git submodules - shell: pwsh - run: | - & git submodule init - & git submodule update - - - name: Create Build Environment - shell: pwsh - run: | - # Parallelize MSBuild across projects - [Environment]::SetEnvironmentVariable('UseMultiToolTask', 'true', [EnvironmentVariableTarget]::User) - [Environment]::SetEnvironmentVariable('EnforceProcessCountAcrossBuilds', 'true', [EnvironmentVariableTarget]::User) - [Environment]::SetEnvironmentVariable('MultiProcMaxCount', "$env:NUMBER_OF_PROCESSORS", [EnvironmentVariableTarget]::User) + uses: actions/checkout@v4 + with: + fetch-depth: 0 + submodules: recursive - name: Configure - shell: pwsh run: | $Bin = if('${{matrix.BIN}}' -eq 'x86') {'Win32'} else {'x64'} & cmake ` @@ -42,15 +36,16 @@ jobs: -T ${{matrix.VER}} ` -S "${env:GITHUB_WORKSPACE}" ` -B "${env:GITHUB_WORKSPACE}\build" ` + -D CMAKE_INSTALL_PREFIX="${env:GITHUB_WORKSPACE}\install" ` -D BUILD_DOCS=OFF ` -D BUILD_TESTING=OFF ` -D BUILD_EXAMPLES=OFF ` -D OPENCL_SDK_BUILD_SAMPLES=OFF ` -D OPENCL_ICD_LOADER_BUILD_TESTING=OFF ` -D CMAKE_POLICY_DEFAULT_CMP0096=NEW + if ($LASTEXITCODE -ne 0) { throw "Configuring OpenCL-SDK failed." } - name: Check version number - shell: pwsh run: | if ( -not (` Get-Content ${env:GITHUB_WORKSPACE}\build\CMakeCache.txt | ` @@ -59,7 +54,6 @@ jobs: { throw 'CMake project version mismatches Git tag name (without leading "v")'} - name: Build - shell: pwsh run: | foreach ($Config in "Debug","Release") { ` & cmake ` @@ -67,11 +61,11 @@ jobs: --config $Config ` -- ` /verbosity:minimal ` - /noLogo ` + /noLogo + if ($LASTEXITCODE -ne 0) { throw "Building OpenCL-SDK in $Config failed." } } - + - name: Package Binary - shell: pwsh run: | & cpack ` --config "${env:GITHUB_WORKSPACE}\build\CPackConfig.cmake" ` @@ -79,10 +73,10 @@ jobs: -C 'Debug;Release' ` -D CPACK_PACKAGE_FILE_NAME='OpenCL-SDK-${{github.ref_name}}-Win-${{matrix.BIN}}' ` -B "${env:GITHUB_WORKSPACE}\package" + if ($LASTEXITCODE -ne 0) { throw "Packaging OpenCL-SDK failed." } - name: Upload Package uses: softprops/action-gh-release@v1 - if: startsWith(github.ref, 'refs/tags/') with: draft: true files: | @@ -91,22 +85,24 @@ jobs: source: name: Source Release (${{ matrix.os }}) runs-on: ${{ matrix.os }} + defaults: + run: + shell: pwsh strategy: matrix: OS: [ubuntu-20.04, windows-2022] + env: + DEB_INSTALLATION_PATH: /usr + steps: - name: Checkout OpenCL-SDK - uses: actions/checkout@v2 - - - name: Initialize git submodules - shell: pwsh - run: | - & git submodule init - & git submodule update + uses: actions/checkout@v4 + with: + fetch-depth: 0 + submodules: recursive - name: Configure - shell: pwsh run: | & cmake ` -S "${env:GITHUB_WORKSPACE}" ` @@ -116,10 +112,11 @@ jobs: -D BUILD_EXAMPLES=OFF ` -D OPENCL_SDK_BUILD_SAMPLES=OFF ` -D OPENCL_ICD_LOADER_BUILD_TESTING=OFF ` - -D CMAKE_POLICY_DEFAULT_CMP0096=NEW + -D CMAKE_POLICY_DEFAULT_CMP0096=NEW ` + -D CPACK_PACKAGING_INSTALL_PREFIX=$DEB_INSTALLATION_PATH + if ($LASTEXITCODE -ne 0) { throw "Configuring OpenCL-SDK failed." } - name: Check version number - shell: pwsh run: | if ( -not (` Get-Content ${env:GITHUB_WORKSPACE}/build/CMakeCache.txt | ` @@ -127,8 +124,17 @@ jobs: )) ` { throw 'CMake project version mismatches Git tag name (without leading "v")'} + - name: Package DEB + if: ${{ contains(matrix.OS, 'ubuntu') }} + run: | + cpack ` + --config "${env:GITHUB_WORKSPACE}/build/CPackSourceConfig.cmake" ` + -G DEB ` + -C Release ` + "${env:GITHUB_WORKSPACE}/package-deb" + if ($LASTEXITCODE -ne 0) { throw "Packaging OpenCL-SDK deb failed." } + - name: Package Source - shell: pwsh run: | $Generator = if('${{matrix.OS}}' -match 'windows') {'ZIP'} else {'TGZ'} & cpack ` @@ -138,11 +144,96 @@ jobs: -D CPACK_SOURCE_IGNORE_FILES="/\\.git/;/\\.gitignore;/\\.gitmodules;/\\.gitlab/;/\\.github/;/\\.reuse/;/\\.appveyor.yml;/build/;/install/;/package/" ` -D CPACK_PACKAGE_FILE_NAME='OpenCL-SDK-${{github.ref_name}}-Source' ` -B "${env:GITHUB_WORKSPACE}/package" + if ($LASTEXITCODE -ne 0) { throw "Packaging OpenCL-SDK source failed." } - name: Release Source uses: softprops/action-gh-release@v1 - if: startsWith(github.ref, 'refs/tags/') with: draft: true files: | - package/OpenCL-SDK-${{github.ref_name}}-Source.* \ No newline at end of file + package/OpenCL-SDK-${{github.ref_name}}-Source.* + + ppa: + runs-on: ubuntu-latest + defaults: + run: + shell: bash + env: + OPENCL_HEADERS_REPOSITORY: mfep/OpenCL-Headers + OPENCL_ICD_LOADER_REPOSITORY: mfep/OpenCL-ICD-Loader + OPENCL_CLHPP_REPOSITORY: mfep/OpenCL-CLHPP + distroseries: jammy + steps: + - name: Add PPA + run: sudo add-apt-repository -y ppa:${{ vars.PPA }} + + - name: Install prerequisites + run: sudo apt-get update -qq && sudo apt-get install -y + cmake + devscripts + debhelper-compat=13 + opencl-c-headers + opencl-clhpp-headers + khronos-opencl-loader-libopencl1 + khronos-opencl-loader-opencl-dev + clinfo + + - name: Import GPG signing key + run: echo "${{ secrets.DEB_SIGNING_KEY }}" | gpg --import + + - name: Download and extract source code (OpenCL-SDK) + run: | + wget -O $GITHUB_WORKSPACE/source.orig.tar.gz https://github.com/$GITHUB_REPOSITORY/archive/refs/tags/$GITHUB_REF_NAME.tar.gz + tar -xvf $GITHUB_WORKSPACE/source.orig.tar.gz -C $GITHUB_WORKSPACE + mv $GITHUB_WORKSPACE/OpenCL-SDK-* $GITHUB_WORKSPACE/OpenCL-SDK + + - name: Download and extract source code (OpenCL-Headers) + run: | + wget -O $GITHUB_WORKSPACE/opencl-headers.tar.gz https://github.com/$OPENCL_HEADERS_REPOSITORY/archive/refs/tags/$GITHUB_REF_NAME.tar.gz + tar -xvf $GITHUB_WORKSPACE/opencl-headers.tar.gz -C $GITHUB_WORKSPACE/OpenCL-SDK/external + rm -rf $GITHUB_WORKSPACE/OpenCL-SDK/external/OpenCL-Headers + mv $GITHUB_WORKSPACE/OpenCL-SDK/external/OpenCL-Headers-* $GITHUB_WORKSPACE/OpenCL-SDK/external/OpenCL-Headers + + - name: Download and extract source code (OpenCL-ICD-Loader) + run: | + wget -O $GITHUB_WORKSPACE/opencl-icd-loader.tar.gz https://github.com/$OPENCL_ICD_LOADER_REPOSITORY/archive/refs/tags/$GITHUB_REF_NAME.tar.gz + tar -xvf $GITHUB_WORKSPACE/opencl-icd-loader.tar.gz -C $GITHUB_WORKSPACE/OpenCL-SDK/external + rm -rf $GITHUB_WORKSPACE/OpenCL-SDK/external/OpenCL-ICD-Loader + mv $GITHUB_WORKSPACE/OpenCL-SDK/external/OpenCL-ICD-Loader-* $GITHUB_WORKSPACE/OpenCL-SDK/external/OpenCL-ICD-Loader + + - name: Download and extract source code (OpenCL-CLHPP) + run: | + wget -O $GITHUB_WORKSPACE/opencl-clhpp.tar.gz https://github.com/$OPENCL_CLHPP_REPOSITORY/archive/refs/tags/$GITHUB_REF_NAME.tar.gz + tar -xvf $GITHUB_WORKSPACE/opencl-clhpp.tar.gz -C $GITHUB_WORKSPACE/OpenCL-SDK/external + rm -rf $GITHUB_WORKSPACE/OpenCL-SDK/external/OpenCL-CLHPP + mv $GITHUB_WORKSPACE/OpenCL-SDK/external/OpenCL-CLHPP-* $GITHUB_WORKSPACE/OpenCL-SDK/external/OpenCL-CLHPP + + - name: Configure project out-of-tree + run: cmake + -S $GITHUB_WORKSPACE/OpenCL-SDK* + -B $GITHUB_WORKSPACE/../build + -D CMAKE_BUILD_TYPE=Release + -D CMAKE_INSTALL_PREFIX=/usr + -D BUILD_TESTING=OFF + -D OPENCL_SDK_BUILD_SAMPLES=OFF + -D LATEST_RELEASE_VERSION=$GITHUB_REF_NAME + -D CPACK_DEBIAN_PACKAGE_MAINTAINER="${{ vars.DEB_MAINTAINER }}" + -D DEBIAN_VERSION_SUFFIX=${{ vars.DEB_VERSION_SUFFIX }} + + - name: Generate packaging scripts + run: cmake + -D CMAKE_CACHE_PATH=$GITHUB_WORKSPACE/../build/CMakeCache.txt + -D ORIG_ARCHIVE=$GITHUB_WORKSPACE/source.orig.tar.gz + -D LATEST_RELEASE_VERSION=$GITHUB_REF_NAME + -D DEBIAN_DISTROSERIES=${{ env.distroseries }} + -D DEBIAN_PACKAGE_MAINTAINER="${{ vars.DEB_MAINTAINER }}" + -D DEBIAN_VERSION_SUFFIX=${{ vars.DEB_VERSION_SUFFIX }} + -P $GITHUB_WORKSPACE/OpenCL-SDK*/cmake/DebSourcePkg.cmake + + - name: Build source package + run: | + cd $GITHUB_WORKSPACE/OpenCL-SDK*/ + debuild -S -sa + + - name: Push source package to the PPA + run: dput ppa:${{ vars.PPA }} $GITHUB_WORKSPACE/*source.changes diff --git a/.gitignore b/.gitignore index adacb9e4..2cbf6017 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,15 @@ -build/ -install/ +# Build dir +[Bb]uild/ + +# Install dir +[Ii]nstall/ + +# Package dirs +[Pp]ackage/ +[Pp]ackage-deb/ + +# Cmake +CMakeFiles/ # vim *~ diff --git a/.gitmodules b/.gitmodules index 940e8ead..08f7f035 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,9 +1,9 @@ [submodule "loader"] path = external/OpenCL-ICD-Loader - url = https://github.com/KhronosGroup/OpenCL-ICD-Loader.git + url = https://github.com/StreamHPC/OpenCL-ICD-Loader.git [submodule "headers"] path = external/OpenCL-Headers url = https://github.com/KhronosGroup/OpenCL-Headers.git [submodule "headers-cpp"] path = external/OpenCL-CLHPP - url = https://github.com/KhronosGroup/OpenCL-CLHPP.git + url = https://github.com/StreamHPC/OpenCL-CLHPP.git diff --git a/CMakeLists.txt b/CMakeLists.txt index cdaa68bf..7264d983 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -cmake_minimum_required(VERSION 3.0) +cmake_minimum_required(VERSION 3.16) set(CMAKE_CXX_STANDARD 14) @@ -26,8 +26,14 @@ include(CMakeDependentOption) option(OPENCL_SDK_BUILD_UTILITY_LIBRARIES "Build utility libraries" ON) cmake_dependent_option(OPENCL_SDK_BUILD_SAMPLES "Build sample code" ON OPENCL_SDK_BUILD_UTILITY_LIBRARIES OFF) cmake_dependent_option(OPENCL_SDK_BUILD_OPENGL_SAMPLES "Build OpenCL-OpenGL interop sample code" ON OPENCL_SDK_BUILD_SAMPLES OFF) +cmake_dependent_option(OPENCL_SDK_BUILD_VULKAN_SAMPLES "Build OpenCL-Vulkan interop sample code" ON OPENCL_SDK_BUILD_SAMPLES OFF) cmake_dependent_option(OPENCL_SDK_TEST_SAMPLES "Add CTest to samples (where applicable)" ON OPENCL_SDK_BUILD_SAMPLES OFF) +option(OPENCL_SDK_BUILD_CLINFO "Build clinfo utility" ON) +if (("${CMAKE_SYSTEM_NAME}" STREQUAL "Darwin") AND ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")) + string(APPEND CMAKE_CXX_FLAGS " -stdlib=libstdc++") +endif() + include(CTest) if (NOT CMAKE_BUILD_TYPE) @@ -48,6 +54,7 @@ list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake/Modules ) include(Dependencies) +include(Package) if(OPENCL_SDK_BUILD_UTILITY_LIBRARIES) add_subdirectory(lib) endif() @@ -71,6 +78,7 @@ set(config_package_location ${CMAKE_INSTALL_DATADIR}/cmake/OpenCL) install( FILES ${PROJECT_BINARY_DIR}/OpenCL/OpenCLConfig.cmake DESTINATION ${config_package_location} + COMPONENT binary ) unset(CMAKE_SIZEOF_VOID_P) @@ -83,6 +91,7 @@ write_basic_package_version_file( install( FILES ${CMAKE_CURRENT_BINARY_DIR}/OpenCL/OpenCLConfigVersion.cmake DESTINATION ${config_package_location} + COMPONENT binary ) include(CPack) diff --git a/cmake/DebSourcePkg.cmake b/cmake/DebSourcePkg.cmake new file mode 100644 index 00000000..405785bc --- /dev/null +++ b/cmake/DebSourcePkg.cmake @@ -0,0 +1,110 @@ +# This script produces the changelog, control and rules file in the debian +# directory. These files are needed to build a Debian source package from the repository. +# Run this in CMake script mode, e.g. +# $ cd OpenCL-SDK +# $ cmake -S . -B ../build -D BUILD_TESTING=OFF +# $ cmake +# -DCMAKE_CACHE_PATH=../build/CMakeCache.txt +# -DCPACK_DEBIAN_PACKAGE_MAINTAINER="Example Name " +# -DDEBIAN_DISTROSERIES=jammy +# -DORIG_ARCHIVE=../OpenCL-SDK.tar.gz +# -DLATEST_RELEASE_VERSION=v2023.08.29 +# -P cmake/DebSourcePkg.cmake +# $ debuild -S -sa + +cmake_minimum_required(VERSION 3.21) # file(COPY_FILE) is added in CMake 3.21 + +set(DEB_SOURCE_PKG_NAME "khronos-opencl-sdk") + +if(NOT EXISTS "${CMAKE_CACHE_PATH}") + message(FATAL_ERROR "CMAKE_CACHE_PATH is not set or does not exist") +endif() +if(NOT DEFINED DEBIAN_PACKAGE_MAINTAINER) + message(FATAL_ERROR "DEBIAN_PACKAGE_MAINTAINER is not set") +endif() +if(NOT DEFINED DEBIAN_DISTROSERIES) + message(FATAL_ERROR "DEBIAN_DISTROSERIES is not set") +endif() +if(NOT DEFINED ORIG_ARCHIVE) + message(WARNING "ORIG_ARCHIVE is not set") +elseif(NOT EXISTS "${ORIG_ARCHIVE}") + message(FATAL_ERROR "ORIG_ARCHIVE is defined, but the file does not exist at \"${ORIG_ARCHIVE}\"") +endif() +if(NOT DEFINED LATEST_RELEASE_VERSION) + message(WARNING "LATEST_RELEASE_VERSION is not set") +endif() +if(NOT DEFINED DEBIAN_VERSION_SUFFIX) + message(WARNING "DEBIAN_VERSION_SUFFIX is not set") +endif() + +# Extracting the project version from the main CMakeLists.txt via regex +file(READ "${CMAKE_CACHE_PATH}" CMAKE_CACHE) +string(REGEX MATCH "CMAKE_PROJECT_VERSION[^=]*=([^\n]*)" REGEX_MATCH "${CMAKE_CACHE}") +if(NOT REGEX_MATCH) + message(FATAL_ERROR "Could not extract project version from CMakeLists.txt") +endif() +set(PROJECT_VERSION "${CMAKE_MATCH_1}") + +list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}") +# Package.cmake contains all details for packaging +include(PackageSetup) + +# Append a space after every newline in the description. This format is required +# in the control file. +string(REPLACE "\n" "\n " CPACK_DEBIAN_DESCRIPTION "${CPACK_DEBIAN_DESCRIPTION}") + +set(DEB_SOURCE_PKG_DIR "${CMAKE_CURRENT_LIST_DIR}/../debian") +# Write debian/control +file(WRITE "${DEB_SOURCE_PKG_DIR}/control" +"Source: ${DEB_SOURCE_PKG_NAME} +Section: ${CPACK_DEBIAN_BINARY_PACKAGE_SECTION} +Priority: optional +Maintainer: ${DEBIAN_PACKAGE_MAINTAINER} +Build-Depends: cmake, debhelper-compat (=13), ${CPACK_DEBIAN_BINARY_PACKAGE_DEPENDS} +Rules-Requires-Root: no +Homepage: ${CPACK_DEBIAN_PACKAGE_HOMEPAGE} +Standards-Version: 4.6.2 + +Package: ${CPACK_DEBIAN_BINARY_PACKAGE_NAME} +Architecture: any +Multi-Arch: same +Depends: ${CPACK_DEBIAN_BINARY_PACKAGE_DEPENDS} +Description: ${CPACK_DEBIAN_DESCRIPTION} +" +) +# Write debian/changelog +string(TIMESTAMP CURRENT_TIMESTAMP "%a, %d %b %Y %H:%M:%S +0000" UTC) +file(WRITE "${DEB_SOURCE_PKG_DIR}/changelog" +"${DEB_SOURCE_PKG_NAME} (${PACKAGE_VERSION_REVISION}) ${DEBIAN_DISTROSERIES}; urgency=medium + + * Released version ${PACKAGE_VERSION_REVISION} + + -- ${DEBIAN_PACKAGE_MAINTAINER} ${CURRENT_TIMESTAMP} +") +# Write debian/rules +file(WRITE "${DEB_SOURCE_PKG_DIR}/rules" +"#!/usr/bin/make -f +%: +\tdh $@ + +override_dh_auto_configure: +\tdh_auto_configure -- -DCMAKE_BUILD_TYPE=Release -DBUILD_TESTING=OFF -DOPENCL_SDK_BUILD_SAMPLES=OFF -DOPENCL_SDK_BUILD_CLINFO=OFF + +override_dh_auto_install: + +override_dh_install: +\tcmake --install obj-* --component binary --prefix ./debian/${CPACK_DEBIAN_BINARY_PACKAGE_NAME}/usr +") + +if(DEFINED ORIG_ARCHIVE) + # Copy the passed orig.tar.gz file. The target filename is deduced from the version number, as expected by debuild + cmake_path(IS_ABSOLUTE ORIG_ARCHIVE IS_ORIG_ARCHIVE_ABSOLUTE) + if (NOT IS_ORIG_ARCHIVE_ABSOLUTE) + message(FATAL_ERROR "ORIG_ARCHIVE must be an absolute path (passed: \"${ORIG_ARCHIVE}\")") + endif() + cmake_path(GET ORIG_ARCHIVE EXTENSION ORIG_ARCHIVE_EXT) + cmake_path(GET ORIG_ARCHIVE PARENT_PATH ORIG_ARCHIVE_PARENT) + set(TARGET_PATH "${ORIG_ARCHIVE_PARENT}/${DEB_SOURCE_PKG_NAME}_${CPACK_DEBIAN_PACKAGE_VERSION}${ORIG_ARCHIVE_EXT}") + message(STATUS "Copying \"${ORIG_ARCHIVE}\" to \"${TARGET_PATH}\"") + file(COPY_FILE "${ORIG_ARCHIVE}" "${TARGET_PATH}") +endif() diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake index f9176312..06145cb8 100644 --- a/cmake/Dependencies.cmake +++ b/cmake/Dependencies.cmake @@ -5,6 +5,41 @@ if(OPENCL_SDK_BUILD_UTILITY_LIBRARIES) endforeach() endif() +# Save global flags and strip diagnostics locally +set(USER_C_FLAGS ${CMAKE_C_FLAGS}) +set(USER_CXX_FLAGS ${CMAKE_CXX_FLAGS}) +if(DEFINED BUILD_SHARED_LIBS) + set(USER_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS}) +endif() +set(USER_ROCM_WARN_TOOLCHAIN_VAR ${ROCM_WARN_TOOLCHAIN_VAR}) + +set(ROCM_WARN_TOOLCHAIN_VAR OFF CACHE BOOL "") +# Turn off C warnings and errors for all warnings in dependencies +separate_arguments(C_FLAGS_LIST NATIVE_COMMAND ${CMAKE_C_FLAGS}) +list(REMOVE_ITEM C_FLAGS_LIST /WX -Werror -Werror=pendantic -pedantic-errors) +if(MSVC) + list(FILTER C_FLAGS_LIST EXCLUDE REGEX "/[Ww]([0-4]?)(all)?") # Remove MSVC warning flags + list(APPEND C_FLAGS_LIST /w) +else() + list(FILTER C_FLAGS_LIST EXCLUDE REGEX "-W(all|extra|everything)") # Remove GCC/LLVM flags + list(APPEND C_FLAGS_LIST -w) +endif() +list(JOIN C_FLAGS_LIST " " CMAKE_C_FLAGS) +# Turn off C++ warnings and errors for all warnings in dependencies +separate_arguments(CXX_FLAGS_LIST NATIVE_COMMAND ${CMAKE_CXX_FLAGS}) +list(REMOVE_ITEM CXX_FLAGS_LIST /WX -Werror -Werror=pendantic -pedantic-errors) +if(MSVC) + list(FILTER CXX_FLAGS_LIST EXCLUDE REGEX "/[Ww]([0-4]?)(all)?") # Remove MSVC warning flags + list(APPEND CXX_FLAGS_LIST /w) +else() + list(FILTER CXX_FLAGS_LIST EXCLUDE REGEX "-W(all|extra|everything)") # Remove GCC/LLVM flags + list(APPEND CXX_FLAGS_LIST -w) +endif() +list(JOIN CXX_FLAGS_LIST " " CMAKE_CXX_FLAGS) +# Don't build client dependencies as shared +set(BUILD_SHARED_LIBS OFF CACHE BOOL "Global flag to cause add_library() to create shared libraries if on." FORCE) + +# Fetch dependencies if(OPENCL_SDK_BUILD_SAMPLES) foreach(DEP IN ITEMS cargs TCLAP Stb) list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}/Dependencies/${DEP}") @@ -17,4 +52,22 @@ if(OPENCL_SDK_BUILD_SAMPLES) include(${DEP}) endforeach() endif(OPENCL_SDK_BUILD_OPENGL_SAMPLES) -endif(OPENCL_SDK_BUILD_SAMPLES) \ No newline at end of file + + if(OPENCL_SDK_BUILD_VULKAN_SAMPLES) + list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}/Dependencies/Vulkan") + include(Vulkan) + endif(OPENCL_SDK_BUILD_VULKAN_SAMPLES) +endif(OPENCL_SDK_BUILD_SAMPLES) + +if(OPENCL_SDK_BUILD_CLINFO) + include("${CMAKE_CURRENT_LIST_DIR}/Dependencies/clinfo/clinfo.cmake") +endif() + +# Restore user global state +set(CMAKE_C_FLAGS ${USER_C_FLAGS}) +set(CMAKE_CXX_FLAGS ${USER_CXX_FLAGS}) +if(DEFINED USER_BUILD_SHARED_LIBS) + set(BUILD_SHARED_LIBS ${USER_BUILD_SHARED_LIBS}) +else() + unset(BUILD_SHARED_LIBS CACHE ) +endif() diff --git a/cmake/Dependencies/SFML/CMakeLists.txt b/cmake/Dependencies/SFML/CMakeLists.txt index 88440e50..bd8c9581 100644 --- a/cmake/Dependencies/SFML/CMakeLists.txt +++ b/cmake/Dependencies/SFML/CMakeLists.txt @@ -49,10 +49,6 @@ set(SRC ${SRCROOT}/Vertex.cpp ${INCROOT}/Vertex.hpp ) -if(NOT SFML_OPENGL_ES) - list(APPEND SRC ${SRCROOT}/GLLoader.cpp) - list(APPEND SRC ${SRCROOT}/GLLoader.hpp) -endif() source_group("" FILES ${SRC}) # drawables sources @@ -97,7 +93,10 @@ sfml_add_library(sfml-graphics target_link_libraries(sfml-graphics PUBLIC sfml-window) # stb_image sources -target_include_directories(sfml-graphics PRIVATE "${PROJECT_SOURCE_DIR}/extlibs/headers/stb_image") +target_include_directories(sfml-graphics SYSTEM PRIVATE "${PROJECT_SOURCE_DIR}/extlibs/headers/stb_image") + +# glad sources +target_include_directories(sfml-graphics SYSTEM PRIVATE "${PROJECT_SOURCE_DIR}/extlibs/headers/glad/include") # let CMake know about our additional graphics libraries paths if(SFML_OS_WINDOWS) @@ -112,26 +111,16 @@ elseif(SFML_OS_ANDROID) endif() # find external libraries -if(SFML_OPENGL_ES) - if(SFML_OS_LINUX) - sfml_find_package(EGL INCLUDE "EGL_INCLUDE_DIR" LINK "EGL_LIBRARY") - sfml_find_package(GLES INCLUDE "GLES_INCLUDE_DIR" LINK "GLES_LIBRARY") - target_link_libraries(sfml-graphics PRIVATE EGL GLES) - elseif(SFML_OS_IOS) - target_link_libraries(sfml-graphics PRIVATE "-framework OpenGLES") - endif() -else() - # Target OpenGL already defined for Window component so no sfml_find_package() here - target_link_libraries(sfml-graphics PRIVATE OpenGL) - - if(SFML_OS_LINUX) - # Target X11 already defined for Window component so no sfml_find_package() here - target_link_libraries(sfml-graphics PRIVATE X11) - endif() +if(SFML_OS_ANDROID) + target_link_libraries(sfml-graphics PRIVATE z) +elseif(SFML_OS_IOS) + target_link_libraries(sfml-graphics PRIVATE z bz2) endif() -if(SFML_OS_ANDROID) - target_link_libraries(sfml-graphics PRIVATE z EGL GLESv1_CM) +# starting from Visual Studio 2015, inline versions of some C functions are used; for compatibility link this library +# see https://docs.microsoft.com/en-us/cpp/porting/overview-of-potential-upgrade-issues-visual-cpp?view=msvc-160#libraries +if((SFML_COMPILER_MSVC AND SFML_MSVC_VERSION GREATER_EQUAL 14) OR (SFML_COMPILER_CLANG AND SFML_OS_WINDOWS AND NOT MINGW)) + target_link_libraries(sfml-graphics PRIVATE legacy_stdio_definitions.lib) endif() target_link_libraries(sfml-graphics diff --git a/cmake/Dependencies/SFML/SFML.cmake b/cmake/Dependencies/SFML/SFML.cmake index 56f6deee..22025ac5 100644 --- a/cmake/Dependencies/SFML/SFML.cmake +++ b/cmake/Dependencies/SFML/SFML.cmake @@ -24,7 +24,7 @@ if(NOT (SFML_FOUND OR TARGET SFML::Graphics)) FetchContent_Declare( sfml-external GIT_REPOSITORY https://github.com/SFML/SFML.git - GIT_TAG 2.5.1 # 2f11710abc5aa478503a7ff3f9e654bd2078ebab + GIT_TAG 2.6.1 # 69ea0cd863aed1d4092b970b676924a716ff718b PATCH_COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_LIST_DIR}/CMakeLists.txt" "${CMAKE_CURRENT_BINARY_DIR}/_deps/sfml-external-src/src/SFML/Graphics/CMakeLists.txt" ) FetchContent_MakeAvailable(sfml-external) @@ -36,4 +36,20 @@ if(NOT (SFML_FOUND OR TARGET SFML::Graphics)) INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}" FOLDER "Dependencies" ) + if((CMAKE_C_COMPILER_ID MATCHES "GNU") OR (CMAKE_C_COMPILER_ID MATCHES "Clang")) + target_compile_options (sfml-window + PRIVATE + -Wno-implicit-fallthrough + -Wno-sign-compare + -Wno-unused-parameter) + target_compile_options (sfml-graphics + PRIVATE + -Wno-implicit-fallthrough + -Wno-unused-but-set-variable + -Wno-unused-parameter) + target_compile_options (sfml-system + PRIVATE + -Wno-implicit-fallthrough + -Wno-maybe-uninitialized) + endif() endif() \ No newline at end of file diff --git a/cmake/Dependencies/Vulkan/Vulkan.cmake b/cmake/Dependencies/Vulkan/Vulkan.cmake new file mode 100644 index 00000000..ffdcdbd0 --- /dev/null +++ b/cmake/Dependencies/Vulkan/Vulkan.cmake @@ -0,0 +1 @@ +find_package(Vulkan REQUIRED) diff --git a/cmake/Dependencies/clinfo/CMakeLists.txt b/cmake/Dependencies/clinfo/CMakeLists.txt new file mode 100644 index 00000000..bb3541e4 --- /dev/null +++ b/cmake/Dependencies/clinfo/CMakeLists.txt @@ -0,0 +1,38 @@ +include(GNUInstallDirs) + +add_executable(clinfo src/clinfo.c) +target_include_directories(clinfo PRIVATE src) +target_link_libraries(clinfo PRIVATE OpenCL::OpenCL) +target_compile_definitions(clinfo PRIVATE + CL_TARGET_OPENCL_VERSION=300 + CL_NO_NON_ICD_DISPATCH_EXTENSION_PROTOTYPES +) +add_test(NAME clinfo COMMAND $) + +install( + TARGETS clinfo + RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}" + COMPONENT clinfo +) + +# Generating and installing the documentation +find_program(gzip_program gzip) +if (EXISTS "${gzip_program}") + add_custom_command( + OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/clinfo.1.gz" + COMMAND gzip -c "${CMAKE_CURRENT_SOURCE_DIR}/man1/clinfo.1" > "${CMAKE_CURRENT_BINARY_DIR}/clinfo.1.gz" + MAIN_DEPENDENCY "${CMAKE_CURRENT_SOURCE_DIR}/man1/clinfo.1" + ) + add_custom_target( + clinfo_manpage + ALL + DEPENDS "${CMAKE_CURRENT_BINARY_DIR}/clinfo.1.gz" + ) + install( + FILES "${CMAKE_CURRENT_BINARY_DIR}/clinfo.1.gz" + DESTINATION "${CMAKE_INSTALL_DATADIR}/man/man1" + COMPONENT clinfo + ) +else() + message(WARNING "Could not find gzip. Skipping the generation of documentation for clinfo") +endif() diff --git a/cmake/Dependencies/clinfo/clinfo.cmake b/cmake/Dependencies/clinfo/clinfo.cmake new file mode 100644 index 00000000..4ea210eb --- /dev/null +++ b/cmake/Dependencies/clinfo/clinfo.cmake @@ -0,0 +1,20 @@ +if(NOT DEPENDENCIES_FORCE_DOWNLOAD) + find_package(clinfo QUIET) +endif() + +if(NOT clinfo_FOUND) + if(DEPENDENCIES_FORCE_DOWNLOAD) + message(STATUS "DEPENDENCIES_FORCE_DOWNLOAD is ON. Fetching clinfo.") + else() + message(STATUS "Fetching clinfo.") + endif() + + include(FetchContent) + FetchContent_Declare( + clinfo + GIT_REPOSITORY https://github.com/Oblomov/clinfo.git + GIT_TAG 3.0.23.01.25 + PATCH_COMMAND ${CMAKE_COMMAND} -P "${CMAKE_CURRENT_LIST_DIR}/patch.cmake" + ) + FetchContent_MakeAvailable(clinfo) +endif() diff --git a/cmake/Dependencies/clinfo/ext.h b/cmake/Dependencies/clinfo/ext.h new file mode 100644 index 00000000..10337b23 --- /dev/null +++ b/cmake/Dependencies/clinfo/ext.h @@ -0,0 +1,115 @@ +/* Include OpenCL header, and define OpenCL extensions, since what is and is not + * available in the official headers is very system-dependent */ + +#ifndef EXT_H +#define EXT_H + +/* Khronos now provides unified headers for all OpenCL versions, and + * it should be included after defining a target OpenCL version + * (otherwise, the maximum version will simply be used, but a message + * will be printed). + * + * TODO: until 3.0 gets finalized, we only target 2.2 because the 3.0 + * defines etc are still changing, so users may have an older version + * of the 3.0 headers lying around, which may prevent clinfo from being + * compilable. + */ +#ifndef CL_TARGET_OPENCL_VERSION +#define CL_TARGET_OPENCL_VERSION 220 +#endif + +/* We will use the deprecated clGetExtensionFunctionAddress, + * so let the headers know that we don't care about it being deprecated. + * The standard CL_USE_DEPRECATED_OPENCL_1_1_APIS define apparently + * doesn't work for macOS, so we'll just tell the compiler to not + * warn about deprecated functions. + * A more correct solution would be to suppress the warning only around the + * clGetExtensionFunctionAddress call, but honestly I just cleaned up that + * piece of code. And I'm actually wondering if it even makes sense to + * build that part of the code on macOS: does anybody actually use + * ocl-icd as OpenCL dispatcher on macOS? + */ + +#ifdef __APPLE__ +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" +#include +#else +#define CL_USE_DEPRECATED_OPENCL_1_1_APIS +#include +#include +#endif + +/* cl_amd_object_metadata */ +#define CL_PLATFORM_MAX_KEYS_AMD 0x403C + +/* cl_khr_terminate_context */ +#define CL_DEVICE_TERMINATE_CAPABILITY_KHR_1x 0x200F +#define CL_DEVICE_TERMINATE_CAPABILITY_KHR 0x2031 + +/* cl_nv_device_attribute_query */ +#define CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV 0x4000 +#define CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV 0x4001 +#define CL_DEVICE_REGISTERS_PER_BLOCK_NV 0x4002 +#define CL_DEVICE_WARP_SIZE_NV 0x4003 +#define CL_DEVICE_GPU_OVERLAP_NV 0x4004 +#define CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV 0x4005 +#define CL_DEVICE_INTEGRATED_MEMORY_NV 0x4006 +#define CL_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT_NV 0x4007 +#define CL_DEVICE_PCI_BUS_ID_NV 0x4008 +#define CL_DEVICE_PCI_SLOT_ID_NV 0x4009 +#define CL_DEVICE_PCI_DOMAIN_ID_NV 0x400A + +/* cl_ext_atomic_counters_{32,64} */ +#define CL_DEVICE_MAX_ATOMIC_COUNTERS_EXT 0x4032 + +/* cl_amd_device_attribute_query */ +#define CL_DEVICE_PROFILING_TIMER_OFFSET_AMD 0x4036 +#define CL_DEVICE_TOPOLOGY_AMD 0x4037 +#define CL_DEVICE_BOARD_NAME_AMD 0x4038 +#define CL_DEVICE_GLOBAL_FREE_MEMORY_AMD 0x4039 +#define CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD 0x4040 +#define CL_DEVICE_SIMD_WIDTH_AMD 0x4041 +#define CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD 0x4042 +#define CL_DEVICE_WAVEFRONT_WIDTH_AMD 0x4043 +#define CL_DEVICE_GLOBAL_MEM_CHANNELS_AMD 0x4044 +#define CL_DEVICE_GLOBAL_MEM_CHANNEL_BANKS_AMD 0x4045 +#define CL_DEVICE_GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD 0x4046 +#define CL_DEVICE_LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD 0x4047 +#define CL_DEVICE_LOCAL_MEM_BANKS_AMD 0x4048 +#define CL_DEVICE_THREAD_TRACE_SUPPORTED_AMD 0x4049 +#define CL_DEVICE_GFXIP_MAJOR_AMD 0x404A +#define CL_DEVICE_GFXIP_MINOR_AMD 0x404B +#define CL_DEVICE_AVAILABLE_ASYNC_QUEUES_AMD 0x404C +/* These two are undocumented */ +#define CL_DEVICE_MAX_REAL_TIME_COMPUTE_QUEUES_AMD 0x404D +#define CL_DEVICE_MAX_REAL_TIME_COMPUTE_UNITS_AMD 0x404E +/* These were added in v4 of the extension, but have values lower than + * than the older ones, and spanning around the cl_ext_atomic_counters_* + * define + */ +#define CL_DEVICE_PREFERRED_WORK_GROUP_SIZE_AMD 0x4030 +#define CL_DEVICE_MAX_WORK_GROUP_SIZE_AMD 0x4031 +#define CL_DEVICE_PREFERRED_CONSTANT_BUFFER_SIZE_AMD 0x4033 +#define CL_DEVICE_PCIE_ID_AMD 0x4034 + +#ifndef CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD +#define CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD 1 + +typedef union +{ + struct { cl_uint type; cl_uint data[5]; } raw; + struct { cl_uint type; cl_char unused[17]; cl_char bus; cl_char device; cl_char function; } pcie; +} cl_device_topology_amd; +#endif + +/* cl_amd_offline_devices */ +#define CL_CONTEXT_OFFLINE_DEVICES_AMD 0x403F + +/* cl_amd_copy_buffer_p2p */ +#define CL_DEVICE_NUM_P2P_DEVICES_AMD 0x4088 +#define CL_DEVICE_P2P_DEVICES_AMD 0x4089 + +/* cl_altera_device_temperature */ +#define CL_DEVICE_CORE_TEMPERATURE_ALTERA 0x40F3 + +#endif diff --git a/cmake/Dependencies/clinfo/patch.cmake b/cmake/Dependencies/clinfo/patch.cmake new file mode 100644 index 00000000..d1956167 --- /dev/null +++ b/cmake/Dependencies/clinfo/patch.cmake @@ -0,0 +1,11 @@ +cmake_minimum_required(VERSION 3.16) + +execute_process(COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_LIST_DIR}/CMakeLists.txt" "${CMAKE_CURRENT_BINARY_DIR}/CMakeLists.txt" RESULT_VARIABLE RESULT_VAR) +if (NOT "${RESULT_VAR}" EQUAL "0") + message(FATAL_ERROR "Could not copy file with CMake") +endif() + +execute_process(COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_LIST_DIR}/ext.h" "${CMAKE_CURRENT_BINARY_DIR}/src/ext.h" RESULT_VARIABLE RESULT_VAR) +if (NOT "${RESULT_VAR}" EQUAL "0") + message(FATAL_ERROR "Could not copy file with CMake") +endif() diff --git a/cmake/Modules/FindStb.cmake b/cmake/Modules/FindStb.cmake index bafa70a8..e8673a2e 100644 --- a/cmake/Modules/FindStb.cmake +++ b/cmake/Modules/FindStb.cmake @@ -16,7 +16,8 @@ find_path (Stb_INCLUDE_PATH stb_image.h PATH_SUFFIXES include include/stb - ) + NO_CMAKE_FIND_ROOT_PATH +) # handle the QUIETLY and REQUIRED arguments and set Stb_FOUND to # TRUE if all listed variables are TRUE diff --git a/cmake/Modules/FindTCLAP.cmake b/cmake/Modules/FindTCLAP.cmake index 2e55bd14..c2e05c74 100644 --- a/cmake/Modules/FindTCLAP.cmake +++ b/cmake/Modules/FindTCLAP.cmake @@ -14,6 +14,7 @@ find_path (TCLAP_INCLUDE_PATH tclap/CmdLine.h PATHS ${CMAKE_SOURCE_DIR}/include ${CMAKE_INSTALL_PREFIX}/include + NO_CMAKE_FIND_ROOT_PATH ) # handle the QUIETLY and REQUIRED arguments and set TCLAP_FOUND to diff --git a/cmake/Package.cmake b/cmake/Package.cmake new file mode 100644 index 00000000..d2bb20c1 --- /dev/null +++ b/cmake/Package.cmake @@ -0,0 +1,4 @@ +if("${CMAKE_SYSTEM_NAME}" STREQUAL "Linux") + include("${CMAKE_CURRENT_LIST_DIR}/PackageSetup.cmake") + set(CPACK_DEBIAN_PACKAGE_DEBUG ON) +endif() diff --git a/cmake/PackageSetup.cmake b/cmake/PackageSetup.cmake new file mode 100644 index 00000000..90a5b47e --- /dev/null +++ b/cmake/PackageSetup.cmake @@ -0,0 +1,65 @@ +set(CPACK_PACKAGE_VENDOR "khronos") + +set(CPACK_DEBIAN_DESCRIPTION "Khronos OpenCL Software Development Kit") + +set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE") + +set(CPACK_RESOURCE_FILE_README "${CMAKE_CURRENT_SOURCE_DIR}/README.md") + +if(NOT CPACK_PACKAGING_INSTALL_PREFIX) + set(CPACK_PACKAGING_INSTALL_PREFIX "${CMAKE_INSTALL_PREFIX}") +endif() + +# DEB packaging configuration +set(CPACK_DEB_COMPONENT_INSTALL ON) +set(CPACK_COMPONENTS_ALL binary) + +set(CPACK_DEBIAN_PACKAGE_MAINTAINER ${CPACK_PACKAGE_VENDOR}) + +set(CPACK_DEBIAN_PACKAGE_HOMEPAGE + "https://github.com/KhronosGroup/OpenCL-SDK") + +# Version number [epoch:]upstream_version[-debian_revision] +set(CPACK_DEBIAN_PACKAGE_VERSION "${PROJECT_VERSION}") # upstream_version +set(CPACK_DEBIAN_PACKAGE_RELEASE "1") # debian_revision (because this is a + # non-native pkg) +set(PACKAGE_VERSION_REVISION "${CPACK_DEBIAN_PACKAGE_VERSION}-${CPACK_DEBIAN_PACKAGE_RELEASE}${DEBIAN_VERSION_SUFFIX}") + +# Get architecture +execute_process(COMMAND dpkg "--print-architecture" OUTPUT_VARIABLE CPACK_DEBIAN_PACKAGE_ARCHITECTURE) +string(STRIP "${CPACK_DEBIAN_PACKAGE_ARCHITECTURE}" CPACK_DEBIAN_PACKAGE_ARCHITECTURE) + +## Package runtime component +set(CPACK_DEBIAN_PACKAGE_NAME "opencl-sdk") + +set(CPACK_DEBIAN_BINARY_PACKAGE_NAME "${CPACK_DEBIAN_PACKAGE_NAME}") + +# Package file name in deb format: +# _-_.deb +set(CPACK_DEBIAN_BINARY_FILE_NAME "${CPACK_DEBIAN_BINARY_PACKAGE_NAME}_${PACKAGE_VERSION_REVISION}_${CPACK_DEBIAN_PACKAGE_ARCHITECTURE}.deb") + +# Replacements +# ToDo +# set(CPACK_DEBIAN_BINARY_PACKAGE_DEPENDS "opencl-c-headers (>= 3.0~${PROJECT_VERSION}), opencl-clhpp-headers (>= 3.0~${PROJECT_VERSION}), khronos-opencl-loader-libopencl1 (>= 3.0~${PROJECT_VERSION}), khronos-opencl-loader-opencl-dev (>= 3.0~${PROJECT_VERSION})") +set(CPACK_DEBIAN_BINARY_PACKAGE_DEPENDS "opencl-c-headers, opencl-clhpp-headers, khronos-opencl-loader-libopencl1, khronos-opencl-loader-opencl-dev, clinfo") +set(CPACK_DEBIAN_BINARY_PACKAGE_SECTION "libdevel") + +# Package clinfo, if enabled +if(OPENCL_SDK_BUILD_CLINFO) + list(APPEND CPACK_COMPONENTS_ALL "clinfo") + set(CPACK_DEBIAN_CLINFO_PACKAGE_NAME "clinfo") + set(CPACK_DEBIAN_CLINFO_FILE_NAME "clinfo_${PACKAGE_VERSION_REVISION}_${CPACK_DEBIAN_PACKAGE_ARCHITECTURE}.deb") + set(CPACK_DEBIAN_CLINFO_DESCRIPTION +"Query OpenCL system information +OpenCL (Open Computing Language) is a multivendor open standard for +general-purpose parallel programming of heterogeneous systems that include +CPUs, GPUs and other processors. +. +This package contains a tool that queries the capabilities of the available +OpenCL drivers.") + set(CPACK_DEBIAN_CLINFO_PACKAGE_DEPENDS "libc6 (>= 2.14), khronos-opencl-loader-libopencl1 (>= 3.0~${CPACK_DEBIAN_PACKAGE_VERSION}) | libopencl1") + set(CPACK_DEBIAN_CLINFO_PACKAGE_CONFLICTS "amd-clinfo, clinfo, fglrx-updates-core") + set(CPACK_DEBIAN_CLINFO_PACKAGE_REPLACES "amd-clinfo, clinfo, fglrx-updates-core") + set(CPACK_DEBIAN_CLINFO_PACKAGE_PROVIDES "clinfo") + set(CPACK_DEBIAN_CLINFO_PACKAGE_SECTION "admin") +endif() diff --git a/docs/RELEASE.md b/docs/RELEASE.md index aa76ec2d..228ba058 100644 --- a/docs/RELEASE.md +++ b/docs/RELEASE.md @@ -44,6 +44,27 @@ git push origin vYYYY.MM.DD > > Note 2: Compatibility between packages is guaranteed manually. CI for each project fetches newest `main` and not using the same tag. Pushing tags in dep order is important to guarantee that when CI runs on pushing tags in these repos, tests are run using the correct versions of their deps. +### Uploading source packages to the Launchpad PPA + +As part of the release workflow for the sub-projects, a Debian source package is created and uploaded to the prescribed PPA. The details of the packaging must be set up in the GitHub projects of each sub-project separately. The following values must be set up prior to launching the release workflow: + +|Setting type |Setting name |Example value |Note | +|------------ |------------ |------------- |---- | +|Action variable|`DEB_MAINTAINER` |`Test User ` | | +|Action variable|`DEB_VERSION_SUFFIX`|`ppa0` | | +|Action variable|`PPA` |`KhronosGroup/OpenCL` |Has to be created on [launchpad.net](https://launchpad.net) beforehand| +|Action secret |`DEB_SIGNING_KEY` |`BEGIN PGP PRIVATE KEY BLOCK` ...|Output of `gpg --armor --export-secret-keys ` | + + +Be aware, that the automatic process of publishing of the binary Debian packages on the PPA can take hours. Moreover, since the projects depend on each other, the person creating the releases **must trigger the release workflow once the binary packages from the prerequisites are live**. The source package dependencies are the following: + +|Project |Dependencies| +|------- |------------| +|[OpenCL-Headers](https://github.com/KhronosGroup/OpenCL-Headers) |-| +|[OpenCL-ICD-Loader](https://github.com/KhronosGroup/OpenCL-ICD-Loader)|[OpenCL-Headers](https://github.com/KhronosGroup/OpenCL-Headers)| +|[OpenCL-CLHPP](https://github.com/KhronosGroup/OpenCL-CLHPP) |[OpenCL-Headers](https://github.com/KhronosGroup/OpenCL-Headers)| +|[OpenCL-SDK](https://github.com/KhronosGroup/OpenCL-SDK) |[OpenCL-Headers](https://github.com/KhronosGroup/OpenCL-Headers), [OpenCL-ICD-Loader](https://github.com/KhronosGroup/OpenCL-ICD-Loader), [OpenCL-CLHPP](https://github.com/KhronosGroup/OpenCL-CLHPP)| + ## Update submodule hashes Submodules may have moved to a different commit hash due to the previous step. The SDK wants to pick up all those changes (if it hasn't already been done). If `git status` shows, changes, push the changes. @@ -67,6 +88,14 @@ git commit -a -m "Update project version" ## Tag SDK +The automatic release pipeline that is triggered on git tags, generates a Debian source package from the SDK code and uploads it to Launchpad. The packaging details and credentials have to be set as [described before](#uploading-source-packages-to-the-launchpad-ppa). + +While the SDK repository consumes the [OpenCL-Headers](https://github.com/KhronosGroup/OpenCL-Headers), [OpenCL-ICD-Loader](https://github.com/KhronosGroup/OpenCL-ICD-Loader) and [OpenCL-CLHPP](https://github.com/KhronosGroup/OpenCL-CLHPP) as git submodules, the same relation between the generated source packages is defined as a package dependency. Therefore, all 3 dependencies must be available in the Launchpad PPA to be able to build the SDK source package. The maintainer must make sure that the dependencies have already become available in the prescribed PPA before attempting to push a release tag for the OpenCL SDK. + +All Debian packages are version locked, which means that the **OpenCL SDK Debian package depends on exactly the same versions of the Headers, CLHPP and ICD-Loader as itself**. The source of this version information is the main `project` statement in the CMake scripts, therefore the maintainer has to make sure that all repositories have updated the version number, as [described above](#update-project-version). + +When all dependencies are published to the PPA, the SDK can be released by the following commands: + ``` git tag vYYYY.MM.DD git push vYYYY.MM.DD diff --git a/external/OpenCL-CLHPP b/external/OpenCL-CLHPP index 0bdbbfe5..1c92cd31 160000 --- a/external/OpenCL-CLHPP +++ b/external/OpenCL-CLHPP @@ -1 +1 @@ -Subproject commit 0bdbbfe5ecda42cff50c96cc5e33527f42fcbd45 +Subproject commit 1c92cd31171b95ed01201cdb3451a555a6c43a68 diff --git a/external/OpenCL-Headers b/external/OpenCL-Headers index 8275634c..c860bb55 160000 --- a/external/OpenCL-Headers +++ b/external/OpenCL-Headers @@ -1 +1 @@ -Subproject commit 8275634cf9ec31b6484c2e6be756237cb583999d +Subproject commit c860bb551eeef9a47d56286a70cea903db3d6ed2 diff --git a/external/OpenCL-ICD-Loader b/external/OpenCL-ICD-Loader index 861b68b2..a845947e 160000 --- a/external/OpenCL-ICD-Loader +++ b/external/OpenCL-ICD-Loader @@ -1 +1 @@ -Subproject commit 861b68b290e76d08e7241608479c16431f529945 +Subproject commit a845947effa4352fa010d813aa211c7be0b29865 diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt index e605c8ff..dfb336db 100644 --- a/lib/CMakeLists.txt +++ b/lib/CMakeLists.txt @@ -66,14 +66,17 @@ foreach(UTIL_LIB_NAME IN ITEMS Utils UtilsCpp) ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} + COMPONENT binary ) install( DIRECTORY include/CL/Utils DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/CL + COMPONENT binary ) install( FILES "${CMAKE_CURRENT_BINARY_DIR}/OpenCL${UTIL_LIB_NAME}_Export.h" DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/CL/Utils + COMPONENT binary ) export( EXPORT OpenCL${UTIL_LIB_NAME}Targets @@ -91,10 +94,12 @@ foreach(UTIL_LIB_NAME IN ITEMS Utils UtilsCpp) FILE OpenCL${UTIL_LIB_NAME}Targets.cmake NAMESPACE OpenCL:: DESTINATION ${config_package_location} + COMPONENT binary ) install( FILES ${PROJECT_BINARY_DIR}/${CMAKE_INSTALL_DATADIR}/cmake/OpenCL${UTIL_LIB_NAME}/OpenCL${UTIL_LIB_NAME}Config.cmake DESTINATION ${config_package_location} + COMPONENT binary ) unset(CMAKE_SIZEOF_VOID_P) @@ -107,6 +112,14 @@ foreach(UTIL_LIB_NAME IN ITEMS Utils UtilsCpp) install( FILES ${PROJECT_BINARY_DIR}/${CMAKE_INSTALL_DATADIR}/cmake/OpenCL${UTIL_LIB_NAME}/OpenCL${UTIL_LIB_NAME}ConfigVersion.cmake DESTINATION ${config_package_location} + COMPONENT binary + ) + + # For packaging the utils libraries + install( + TARGETS ${UTIL_LIB_TARGET} + DESTINATION ${CMAKE_INSTALL_LIBDIR} + COMPONENT binary ) endforeach() diff --git a/lib/include/CL/SDK/CLI.h b/lib/include/CL/SDK/CLI.h index f85470cf..66356cbb 100644 --- a/lib/include/CL/SDK/CLI.h +++ b/lib/include/CL/SDK/CLI.h @@ -10,8 +10,6 @@ // cargs includes #include -typedef struct cag_option cag_option; - SDK_EXPORT cag_option *add_CLI_options(cag_option *opts, size_t *const num_opts, cag_option *add_opts, size_t add_num_opts); diff --git a/lib/include/CL/Utils/Event.h b/lib/include/CL/Utils/Event.h index f144e215..0cd45ca0 100644 --- a/lib/include/CL/Utils/Event.h +++ b/lib/include/CL/Utils/Event.h @@ -10,4 +10,4 @@ UTILS_EXPORT cl_ulong cl_util_get_event_duration(const cl_event event, const cl_profiling_info start, const cl_profiling_info end, - cl_int* const error); \ No newline at end of file + cl_int* const error); diff --git a/lib/src/Extensions/CMakeLists.txt b/lib/src/Extensions/CMakeLists.txt index 4bb34d6c..e05d4e74 100644 --- a/lib/src/Extensions/CMakeLists.txt +++ b/lib/src/Extensions/CMakeLists.txt @@ -4,7 +4,7 @@ # Note: cmake 3.7 is needed to use OpenCL::OpenCL. # Older versions may work by explicitly specifying OpenCL_INCLUDE_DIRS and OpenCL_LIBRARIES. -cmake_minimum_required(VERSION 3.7 FATAL_ERROR) +cmake_minimum_required(VERSION 3.16 FATAL_ERROR) set_property(GLOBAL PROPERTY USE_FOLDERS ON) set(CMAKE_CXX_STANDARD 11) @@ -120,6 +120,7 @@ if (OPENCL_EXTENSION_LOADER_INSTALL) RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + COMPONENT binary ) export(EXPORT OpenCLExtensionLoaderTargets @@ -130,6 +131,7 @@ if (OPENCL_EXTENSION_LOADER_INSTALL) FILE OpenCLExtensionLoaderTargets.cmake NAMESPACE OpenCL:: DESTINATION ${OPENCL_EXTENSION_LOADER_CONFIG_PATH} + COMPONENT binary ) file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/OpenCLExtensionLoader/OpenCLExtensionLoaderConfig.cmake @@ -137,6 +139,7 @@ if (OPENCL_EXTENSION_LOADER_INSTALL) ) install(FILES ${CMAKE_CURRENT_BINARY_DIR}/OpenCLExtensionLoader/OpenCLExtensionLoaderConfig.cmake DESTINATION ${OPENCL_EXTENSION_LOADER_CONFIG_PATH} + COMPONENT binary ) write_basic_package_version_file( @@ -146,6 +149,14 @@ if (OPENCL_EXTENSION_LOADER_INSTALL) ) install(FILES ${CMAKE_CURRENT_BINARY_DIR}/OpenCLExtensionLoader/OpenCLExtensionLoaderConfigVersion.cmake DESTINATION ${OPENCL_EXTENSION_LOADER_CONFIG_PATH} + COMPONENT binary + ) + + # For packaging the extensions library + install( + TARGETS OpenCLExt + DESTINATION ${CMAKE_INSTALL_LIBDIR} + COMPONENT binary ) endif() diff --git a/lib/src/Extensions/scripts/openclext.cpp.mako b/lib/src/Extensions/scripts/openclext.cpp.mako index af887c31..0cf5af02 100644 --- a/lib/src/Extensions/scripts/openclext.cpp.mako +++ b/lib/src/Extensions/scripts/openclext.cpp.mako @@ -181,11 +181,6 @@ def getCParameterStrings(params): #include -static inline cl_platform_id _get_platform(cl_platform_id platform) -{ - return platform; -} - static inline cl_platform_id _get_platform(cl_device_id device) { if (device == nullptr) return nullptr; diff --git a/lib/src/Extensions/src/openclext.cpp b/lib/src/Extensions/src/openclext.cpp index 384e91ed..c4f3afd5 100644 --- a/lib/src/Extensions/src/openclext.cpp +++ b/lib/src/Extensions/src/openclext.cpp @@ -64,11 +64,6 @@ #include -static inline cl_platform_id _get_platform(cl_platform_id platform) -{ - return platform; -} - static inline cl_platform_id _get_platform(cl_device_id device) { if (device == nullptr) return nullptr; diff --git a/lib/src/SDK/CLI.c b/lib/src/SDK/CLI.c index ac110370..4dc7c6a1 100644 --- a/lib/src/SDK/CLI.c +++ b/lib/src/SDK/CLI.c @@ -108,8 +108,8 @@ SDK_EXPORT ParseState parse_SingleDeviceOptions( switch (identifier) { - case 'p': IF_ERR(dev_opts->triplet.plat_index = strtoul(value, NULL, 0)) - case 'd': IF_ERR(dev_opts->triplet.dev_index = strtoul(value, NULL, 0)) + case 'p': IF_ERR(dev_opts->triplet.plat_index = (cl_uint)strtoul(value, NULL, 0)) + case 'd': IF_ERR(dev_opts->triplet.dev_index = (cl_uint)strtoul(value, NULL, 0)) case 't': IF_ERR(dev_opts->triplet.dev_type = get_dev_type(value)) } return NotParsed; diff --git a/samples/CMakeLists.txt b/samples/CMakeLists.txt index 0834853a..b3a92a4b 100644 --- a/samples/CMakeLists.txt +++ b/samples/CMakeLists.txt @@ -33,11 +33,12 @@ CHECK_LIBRARY_EXISTS(m sin "" HAVE_LIB_M) # KERNELS ... # optional, specifies kernel files for the sample # INCLUDES ... # optional, specifies additional include directories for the sample # LIBS ... # optional, specifies additional libraries for the sample +# DEFINITIONS # optional, specifies additional compile definitions for the sample # ) macro(add_sample) set(options TEST) set(one_value_args TARGET VERSION CATEGORY) - set(multi_value_args SOURCES KERNELS SHADERS INCLUDES LIBS) + set(multi_value_args SOURCES KERNELS SHADERS INCLUDES LIBS DEFINITIONS) cmake_parse_arguments(OPENCL_SAMPLE "${options}" "${one_value_args}" "${multi_value_args}" ${ARGN} @@ -75,6 +76,7 @@ macro(add_sample) CL_HPP_MINIMUM_OPENCL_VERSION=${OPENCL_SAMPLE_VERSION} CL_HPP_ENABLE_EXCEPTIONS $<$:_CRT_SECURE_NO_WARNINGS> # TODO: remove + ${OPENCL_SAMPLE_DEFINITIONS} ) set_target_properties(${OPENCL_SAMPLE_TARGET} @@ -130,17 +132,18 @@ macro(add_sample) ) foreach(CONFIG ${OPENCL_SAMPLE_CONFIGS}) - install(TARGETS ${OPENCL_SAMPLE_TARGET} CONFIGURATIONS ${CONFIG} DESTINATION ${CMAKE_INSTALL_BINDIR}) - install(FILES ${OPENCL_SAMPLE_KERNELS} CONFIGURATIONS ${CONFIG} DESTINATION ${CMAKE_INSTALL_BINDIR}) - install(FILES ${OPENCL_SAMPLE_SHADERS} CONFIGURATIONS ${CONFIG} DESTINATION ${CMAKE_INSTALL_BINDIR}) + install(TARGETS ${OPENCL_SAMPLE_TARGET} CONFIGURATIONS ${CONFIG} DESTINATION ${CMAKE_INSTALL_BINDIR}/${CONFIG}) + install(FILES ${OPENCL_SAMPLE_KERNELS} CONFIGURATIONS ${CONFIG} DESTINATION ${CMAKE_INSTALL_BINDIR}/${CONFIG}) + install(FILES ${OPENCL_SAMPLE_SHADERS} CONFIGURATIONS ${CONFIG} DESTINATION ${CMAKE_INSTALL_BINDIR}/${CONFIG}) + if(OPENCL_SDK_TEST_SAMPLES AND OPENCL_SAMPLE_TEST) + add_test( + NAME "${OPENCL_SAMPLE_TARGET}_${CONFIG}" + COMMAND ${OPENCL_SAMPLE_TARGET} + CONFIGURATIONS ${CONFIG} + WORKING_DIRECTORY "$" + ) + endif() endforeach() - if(OPENCL_SDK_TEST_SAMPLES AND OPENCL_SAMPLE_TEST) - add_test( - NAME ${OPENCL_SAMPLE_TARGET} - COMMAND ${OPENCL_SAMPLE_TARGET} - WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR} - ) - endif() endmacro() diff --git a/samples/core/binaries/main.c b/samples/core/binaries/main.c index b9d5f5bb..c1a09abc 100644 --- a/samples/core/binaries/main.c +++ b/samples/core/binaries/main.c @@ -187,7 +187,7 @@ int main(int argc, char *argv[]) if (!diag_opts.quiet) cl_util_print_device_info(device); /// Try to read binary - program = cl_util_read_binaries(context, &device, 1, "Collatz", &error); + program = cl_util_read_binaries(context, &device, 1, "Collatz-c", &error); if (error != CL_SUCCESS) { // if binary not present, compile and save @@ -198,7 +198,7 @@ int main(int argc, char *argv[]) OCLERROR_PAR(kernel = cl_util_read_exe_relative_text_file( "Collatz.cl", &program_size, &error), error, cont); - printf("OpenCL file red... "); + printf("OpenCL file read... "); OCLERROR_PAR(program = clCreateProgramWithSource(context, 1, (const char **)&kernel, @@ -207,16 +207,16 @@ int main(int argc, char *argv[]) OCLERROR_RET(cl_util_build_program(program, device, options), error, prgs); - OCLERROR_RET(cl_util_write_binaries(program, "Collatz"), error, prgs); + OCLERROR_RET(cl_util_write_binaries(program, "Collatz-c"), error, prgs); printf("Binary file written.\n\n"); prgs: - OCLERROR_RET(clReleaseProgram(program), end_error, que); + OCLERROR_RET(clReleaseProgram(program), end_error, cont); ker: free(kernel); OCLERROR_PAR(program = cl_util_read_binaries(context, &device, 1, - "Collatz", &error), + "Collatz-c", &error), error, cont); } diff --git a/samples/core/binaries/main.cpp b/samples/core/binaries/main.cpp index 035cfab9..32776c7d 100644 --- a/samples/core/binaries/main.cpp +++ b/samples/core/binaries/main.cpp @@ -93,7 +93,8 @@ int main(int argc, char* argv[]) try { /// Try to read binary - binaries = cl::util::read_binary_files(devices, "Collatz", &error); + binaries = + cl::util::read_binary_files(devices, "Collatz-cpp", &error); } catch (cl::util::Error& e) { // if binary not present, compile and save @@ -106,7 +107,7 @@ int main(int argc, char* argv[]) program.build(devices.at(0)); binaries = program.getInfo(&error); - cl::util::write_binaries(binaries, devices, "Collatz"); + cl::util::write_binaries(binaries, devices, "Collatz-cpp"); } // if the binary is already present - calculate @@ -208,7 +209,7 @@ int main(int argc, char* argv[]) std::exit(e.err()); } catch (cl::Error& e) { - std::cerr << "OpenCL rutnime error: " << e.what() << std::endl; + std::cerr << "OpenCL runtime error: " << e.what() << std::endl; std::exit(e.err()); } catch (std::exception& e) { diff --git a/samples/core/blur/blur.cpp b/samples/core/blur/blur.cpp index 06f6dd52..e94bdbdf 100644 --- a/samples/core/blur/blur.cpp +++ b/samples/core/blur/blur.cpp @@ -19,10 +19,15 @@ #include #include -// STL includes -#include +// standard includes #include +#include #include +#include +#include +#include + +// C header includes #include // TCLAP includes @@ -40,7 +45,7 @@ template <> auto cl::sdk::parse() std::make_shared>( "i", "in", "Input image file", false, "", "name"), std::make_shared>( - "o", "out", "Output image file", false, "out.png", "name"), + "o", "out", "Output image file", false, "blurcpp_out.png", "name"), std::make_shared>("s", "size", "Size of blur kernel", false, (float)1.0, "positive float"), @@ -439,14 +444,18 @@ void BlurCppExample::read_input_image() /// If file not provided in command line, create a default one. if (blur_opts.in.empty()) { - std::string fname("andrew_svk_7oJ4D_ewB7c_unsplash.png"); + const int random_val = std::random_device{}(); + std::stringstream fname; + fname << "andrew_svk_7oJ4D_ewB7c_unsplash_" << std::hex << random_val + << ".png"; - std::cout << "No file given, use standard image " << fname << std::endl; + std::cout << "No file given, use standard image " << fname.str() + << std::endl; const char* fcont = (const char*)andrew_svk_7oJ4D_ewB7c_unsplash_png; const size_t fsize = andrew_svk_7oJ4D_ewB7c_unsplash_png_size; - std::fstream f(fname, std::ios::out | std::ios::binary); + std::fstream f(fname.str(), std::ios::out | std::ios::binary); if (!f.is_open()) { throw std::runtime_error{ std::string{ @@ -456,7 +465,7 @@ void BlurCppExample::read_input_image() f.write(fcont, fsize); f.close(); - blur_opts.in = fname; + blur_opts.in = fname.str(); } input_image = cl::sdk::read_image(blur_opts.in.c_str(), nullptr); @@ -472,12 +481,17 @@ void BlurCppExample::prepare_output_image() output_image.height = input_image.height; output_image.pixel_size = input_image.pixel_size; output_image.pixels.clear(); - output_image.pixels.reserve(sizeof(unsigned char) * output_image.width - * output_image.height - * output_image.pixel_size); + output_image.pixels.resize(output_image.width * output_image.height + * output_image.pixel_size); +} + +bool opencl_version_contains(const cl::string& dev_version, + const cl::string& version_fragment) +{ + return dev_version.find(version_fragment) != cl::string::npos; } -std::tuple BlurCppExample::query_capabilities() +std::tuple BlurCppExample::query_capabilities() { // 1) query image support if (!device.getInfo()) @@ -499,8 +513,28 @@ std::tuple BlurCppExample::query_capabilities() bool use_subgroup_exchange_relative = cl::util::supports_extension( device, "cl_khr_subgroup_shuffle_relative"); + // 5) Query OpenCL version to compile for. + // If no -cl-std option is specified then the highest 1.x version + // supported by each device is used to compile the program. Therefore, + // it's only necessary to add the -cl-std option for 2.0 and 3.0 OpenCL + // versions. + const std::string dev_version = device.getInfo(); + cl::string compiler_options; + constexpr int max_major_version = 3; + for (auto i = 2; i <= max_major_version; ++i) + { + std::string version_str = std::to_string(i) + "."; // "i." + std::string compiler_opt_str = + "-cl-std=CL" + std::to_string(i) + ".0 "; // -cl-std=CLi.0 + + compiler_options += + cl::string{ opencl_version_contains(dev_version, version_str) + ? compiler_opt_str + : "" }; + } + return std::make_tuple(use_local_mem, use_subgroup_exchange, - use_subgroup_exchange_relative); + use_subgroup_exchange_relative, compiler_options); } void BlurCppExample::create_image_buffers() @@ -696,10 +730,10 @@ cl::ImageFormat BlurCppExample::set_image_format() std::cout << "Converting picture into supported format... "; const size_t pixels = input_image.width * input_image.height; - const size_t new_size = sizeof(unsigned char) * pixels * 4; + const size_t new_size = pixels * 4; - input_image.pixels.reserve(new_size); - output_image.pixels.reserve(new_size); + input_image.pixels.resize(new_size); + output_image.pixels.resize(new_size); // change picture const size_t pixel_size = input_image.pixel_size; diff --git a/samples/core/blur/blur.hpp b/samples/core/blur/blur.hpp index 160dbed9..b6ff3d27 100644 --- a/samples/core/blur/blur.hpp +++ b/samples/core/blur/blur.hpp @@ -7,11 +7,12 @@ // STL includes #include +#include class BlurCppExample { public: BlurCppExample(int argc, char* argv[]) - : gauss_kernel(nullptr), origin({ 0, 0 }) + : origin({ 0, 0 }), gauss_kernel(nullptr) { parse_command_line(argc, argv); } @@ -39,7 +40,7 @@ class BlurCppExample { void prepare_output_image(); // Query device and runtime capabilities - std::tuple query_capabilities(); + std::tuple query_capabilities(); void create_image_buffers(); diff --git a/samples/core/blur/main.c b/samples/core/blur/main.c index d29a3a41..c6fe07b8 100644 --- a/samples/core/blur/main.c +++ b/samples/core/blur/main.c @@ -920,9 +920,16 @@ cl_int dual_pass_subgroup_exchange_kernel_blur(state *const s, cl_int size, return error; } +cl_int opencl_version_contains(const char *dev_version, + const char *version_fragment) +{ + char *found_version = strstr(dev_version, version_fragment); + return (found_version != NULL); +} int main(int argc, char *argv[]) { + srand((unsigned int)time(NULL)); cl_int error = CL_SUCCESS, end_error = CL_SUCCESS; state s; cl_platform_id platform; @@ -934,7 +941,7 @@ int main(int argc, char *argv[]) .triplet = { 0, 0, CL_DEVICE_TYPE_ALL } }; struct options_Blur blur_opts = { - .size = 1, .op = "box", .in = NULL, .out = "out.png" + .size = 1, .op = "box", .in = NULL, .out = "blur_out.png" }; OCLERROR_RET(parse_options(argc, argv, &diag_opts, &dev_opts, &blur_opts), @@ -969,9 +976,11 @@ int main(int argc, char *argv[]) if (!diag_opts.quiet) cl_util_print_device_info(s.device); /// Read input image and prepare output image - const char fname[] = "andrew_svk_7oJ4D_ewB7c_unsplash.png"; + char fname[FILENAME_MAX]; + memset(fname, 0, FILENAME_MAX); if (!blur_opts.in) { + sprintf(fname, "andrew_svk_7oJ4D_ewB7c_unsplash_%x.png", rand()); printf("No file given, use standard image %s\n", fname); const unsigned char *fcont = andrew_svk_7oJ4D_ewB7c_unsplash_png; const size_t fsize = andrew_svk_7oJ4D_ewB7c_unsplash_png_size; @@ -1048,6 +1057,25 @@ int main(int argc, char *argv[]) free(name); } + // 5) Query OpenCL version to compile for. + // If no -cl-std option is specified then the highest 1.x version + // supported by each device is used to compile the program. Therefore, + // it's only necessary to add the -cl-std option for 2.0 and 3.0 OpenCL + // versions. + char dev_version[64]; + OCLERROR_RET(clGetDeviceInfo(s.device, CL_DEVICE_VERSION, + sizeof(dev_version), &dev_version, NULL), + error, end); + char compiler_options[1024] = ""; + if (opencl_version_contains(dev_version, "3.")) + { + strcat(compiler_options, "-cl-std=CL3.0 "); + } + else if (opencl_version_contains(dev_version, "2.")) + { + strcat(compiler_options, "-cl-std=CL2.0 "); + } + /// Create image buffers const cl_image_desc desc = { .image_type = CL_MEM_OBJECT_IMAGE2D, .image_width = s.input_image.width, @@ -1118,8 +1146,8 @@ int main(int argc, char *argv[]) printf("Dual-pass subgroup relative exchange blur\n"); kernel_op[0] = '\0'; + strcat(kernel_op, compiler_options); strcat(kernel_op, "-D USE_SUBGROUP_EXCHANGE_RELATIVE "); - OCLERROR_RET(dual_pass_subgroup_exchange_box_blur( &s, (cl_int)blur_opts.size), error, prg); @@ -1129,6 +1157,7 @@ int main(int argc, char *argv[]) printf("Dual-pass subgroup exchange blur\n"); kernel_op[0] = '\0'; + strcat(kernel_op, compiler_options); strcat(kernel_op, "-D USE_SUBGROUP_EXCHANGE "); OCLERROR_RET(dual_pass_subgroup_exchange_box_blur( @@ -1172,6 +1201,7 @@ int main(int argc, char *argv[]) printf("Dual-pass subgroup relative exchange Gaussian blur\n"); kernel_op[0] = '\0'; + strcat(kernel_op, compiler_options); strcat(kernel_op, "-D USE_SUBGROUP_EXCHANGE_RELATIVE "); OCLERROR_RET(dual_pass_subgroup_exchange_kernel_blur(&s, gauss_size, @@ -1183,6 +1213,7 @@ int main(int argc, char *argv[]) printf("Dual-pass subgroup exchange Gaussian blur\n"); kernel_op[0] = '\0'; + strcat(kernel_op, compiler_options); strcat(kernel_op, "-D USE_SUBGROUP_EXCHANGE "); OCLERROR_RET(dual_pass_subgroup_exchange_kernel_blur(&s, gauss_size, diff --git a/samples/core/blur/main.cpp b/samples/core/blur/main.cpp index 1face850..2ded6008 100644 --- a/samples/core/blur/main.cpp +++ b/samples/core/blur/main.cpp @@ -46,8 +46,10 @@ int main(int argc, char* argv[]) // Query device and runtime capabilities bool use_local_mem, use_subgroup_exchange, use_subgroup_exchange_relative; + std::string compiler_options; std::tie(use_local_mem, use_subgroup_exchange, - use_subgroup_exchange_relative) = blur.query_capabilities(); + use_subgroup_exchange_relative, compiler_options) = + blur.query_capabilities(); // Create image buffers used for operation. In this example input, // output and temporary image buffers are used. Temporary buffer is used @@ -59,7 +61,7 @@ int main(int argc, char* argv[]) // Create kernel and build program for selected device and blur.cl file // without any options. If this function fails, ensure that the blur.cl // file is available in place of execution. - blur.build_program(""); + blur.build_program(compiler_options); // The box blur operation will be performed if you pass "-b box" or // don't select any option. @@ -89,8 +91,8 @@ int main(int argc, char* argv[]) { std::cout << "Dual-pass subgroup relative exchange blur" << std::endl; - - blur.build_program("-D USE_SUBGROUP_EXCHANGE_RELATIVE "); + blur.build_program(compiler_options + + "-D USE_SUBGROUP_EXCHANGE_RELATIVE "); blur.dual_pass_subgroup_exchange_box_blur(); } @@ -99,14 +101,14 @@ int main(int argc, char* argv[]) if (use_subgroup_exchange) { std::cout << "Dual-pass subgroup exchange blur" << std::endl; - - blur.build_program("-D USE_SUBGROUP_EXCHANGE "); + blur.build_program(compiler_options + + "-D USE_SUBGROUP_EXCHANGE "); blur.dual_pass_subgroup_exchange_box_blur(); } } // Box blur // Build default program with no kernel arguments. - blur.build_program(""); + blur.build_program(compiler_options); // The gauss blur operation is performed when the "-b gauss" option or // no option is passed. The following examples use a manually created @@ -137,8 +139,8 @@ int main(int argc, char* argv[]) std::cout << "Dual-pass subgroup relative exchange Gaussian blur" << std::endl; - - blur.build_program("-D USE_SUBGROUP_EXCHANGE_RELATIVE "); + blur.build_program(compiler_options + + "-D USE_SUBGROUP_EXCHANGE_RELATIVE "); blur.dual_pass_subgroup_exchange_kernel_blur(); } @@ -148,8 +150,8 @@ int main(int argc, char* argv[]) { std::cout << "Dual-pass subgroup exchange Gaussian blur" << std::endl; - - blur.build_program("-D USE_SUBGROUP_EXCHANGE "); + blur.build_program(compiler_options + + "-D USE_SUBGROUP_EXCHANGE "); blur.dual_pass_subgroup_exchange_kernel_blur(); } } // Gaussian blur diff --git a/samples/core/copybuffer/main.cpp b/samples/core/copybuffer/main.cpp index 5429afa6..9895d44c 100644 --- a/samples/core/copybuffer/main.cpp +++ b/samples/core/copybuffer/main.cpp @@ -102,7 +102,8 @@ int main(int argc, char** argv) ++i; if (i < argc) { - deviceIndex = strtoul(argv[i], NULL, 10); + deviceIndex = + static_cast(strtoul(argv[i], NULL, 10)); } } else if (!strcmp(argv[i], "-p")) @@ -110,7 +111,8 @@ int main(int argc, char** argv) ++i; if (i < argc) { - platformIndex = strtoul(argv[i], NULL, 10); + platformIndex = + static_cast(strtoul(argv[i], NULL, 10)); } } else diff --git a/samples/core/copybufferkernel/main.cpp b/samples/core/copybufferkernel/main.cpp index 1b14cfc2..20e31dad 100644 --- a/samples/core/copybufferkernel/main.cpp +++ b/samples/core/copybufferkernel/main.cpp @@ -113,7 +113,8 @@ int main(int argc, char** argv) ++i; if (i < argc) { - deviceIndex = strtoul(argv[i], NULL, 10); + deviceIndex = + static_cast(strtoul(argv[i], NULL, 10)); } } else if (!strcmp(argv[i], "-p")) @@ -121,7 +122,8 @@ int main(int argc, char** argv) ++i; if (i < argc) { - platformIndex = strtoul(argv[i], NULL, 10); + platformIndex = + static_cast(strtoul(argv[i], NULL, 10)); } } else diff --git a/samples/core/enumopencl/main.c b/samples/core/enumopencl/main.c index 697c2e2d..c2676c26 100644 --- a/samples/core/enumopencl/main.c +++ b/samples/core/enumopencl/main.c @@ -195,7 +195,7 @@ static cl_int PrintDeviceInfoSummary(cl_device_id* devices, cl_uint numDevices) return errorCode; } -int main(int argc, char** argv) +int main(void) { cl_uint numPlatforms = 0; clGetPlatformIDs(0, NULL, &numPlatforms); diff --git a/samples/core/reduce/main.c b/samples/core/reduce/main.c index a9c8b1fa..b4acbd31 100644 --- a/samples/core/reduce/main.c +++ b/samples/core/reduce/main.c @@ -232,13 +232,13 @@ cl_int accumulate(cl_int *arr, size_t len, cl_int zero_elem, cl_ulong new_size(const cl_ulong actual, const cl_ulong factor) { return actual / factor + (actual % factor == 0 ? 0 : 1); -}; +} // NOTE: because one work-group produces one output // new_size == number_of_work_groups size_t global(const size_t actual, const cl_ulong factor, const size_t wgs) { return new_size(actual, factor) * wgs; -}; +} // Random number generator state pcg32_random_t rng; diff --git a/samples/core/reduce/main.cpp b/samples/core/reduce/main.cpp index 053a2286..088ca469 100644 --- a/samples/core/reduce/main.cpp +++ b/samples/core/reduce/main.cpp @@ -306,7 +306,7 @@ int main(int argc, char* argv[]) std::exit(e.err()); } catch (cl::Error& e) { - std::cerr << "OpenCL rutnime error: " << e.what() << std::endl; + std::cerr << "OpenCL runtime error: " << e.what() << std::endl; std::exit(e.err()); } catch (std::exception& e) { diff --git a/samples/core/saxpy/main.cpp b/samples/core/saxpy/main.cpp index c9f9b188..1e09a4be 100644 --- a/samples/core/saxpy/main.cpp +++ b/samples/core/saxpy/main.cpp @@ -155,7 +155,7 @@ int main(int argc, char* argv[]) std::exit(e.err()); } catch (cl::Error& e) { - std::cerr << "OpenCL rutnime error: " << e.what() << std::endl; + std::cerr << "OpenCL runtime error: " << e.what() << std::endl; std::exit(e.err()); } catch (std::exception& e) { diff --git a/samples/extensions/khr/CMakeLists.txt b/samples/extensions/khr/CMakeLists.txt index 82d4426f..efc6d747 100644 --- a/samples/extensions/khr/CMakeLists.txt +++ b/samples/extensions/khr/CMakeLists.txt @@ -17,3 +17,6 @@ if(OPENCL_SDK_BUILD_OPENGL_SAMPLES) add_subdirectory(conway) add_subdirectory(nbody) endif() +if(OPENCL_SDK_BUILD_VULKAN_SAMPLES) + add_subdirectory(externalmemory) +endif() diff --git a/samples/extensions/khr/conway/main.cpp b/samples/extensions/khr/conway/main.cpp index 8a3f3c96..f3c5cb15 100644 --- a/samples/extensions/khr/conway/main.cpp +++ b/samples/extensions/khr/conway/main.cpp @@ -46,17 +46,13 @@ class Conway : public cl::sdk::InteropWindow { explicit Conway(int width, int height, bool fullscreen, cl_uint platform_id = 0, cl_uint device_id = 0, cl_bitfield device_type = CL_DEVICE_TYPE_DEFAULT) - : InteropWindow{ sf::VideoMode(width, height), - "Conway's Game of Life", - fullscreen ? sf::Style::Fullscreen - : sf::Style::Default, - sf::ContextSettings{ - 0, 0, 0, // Depth, Stencil, AA - 3, 3, // OpenGL version - sf::ContextSettings::Attribute::Core }, - platform_id, - device_id, - device_type }, + : InteropWindow( + sf::VideoMode(width, height), "Conway's Game of Life", + fullscreen ? sf::Style::Fullscreen : sf::Style::Default, + sf::ContextSettings{ 0, 0, 0, // Depth, Stencil, AA + 3, 3, // OpenGL version + sf::ContextSettings::Attribute::Core }, + platform_id, device_id, device_type), animating(true) {} @@ -306,7 +302,7 @@ void Conway::updateScene() conway( cl::EnqueueArgs{ queue, cl::NDRange{ getSize().x, getSize().y } }, cl_images.front, cl_images.back, - cl_float2{ 1.f / getSize().x, 1.f / getSize().y }); + cl_float2{ { 1.f / getSize().x, 1.f / getSize().y } }); queue.enqueueReleaseGLObjects(&interop_resources, nullptr, &release); @@ -391,7 +387,7 @@ int main(int argc, char* argv[]) std::exit(e.err()); } catch (cl::Error& e) { - std::cerr << "OpenCL rutnime error: " << e.what() << std::endl; + std::cerr << "OpenCL runtime error: " << e.what() << std::endl; std::exit(e.err()); } catch (std::exception& e) { diff --git a/samples/extensions/khr/externalmemory/CMakeLists.txt b/samples/extensions/khr/externalmemory/CMakeLists.txt new file mode 100644 index 00000000..78ea8a8c --- /dev/null +++ b/samples/extensions/khr/externalmemory/CMakeLists.txt @@ -0,0 +1,31 @@ +# Copyright (c) 2021 The Khronos Group Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +add_sample( + TEST + TARGET externalmemory + VERSION 300 + SOURCES main.c + KERNELS external_saxpy.cl + LIBS Vulkan::Vulkan + DEFINITIONS $<$:VK_USE_PLATFORM_WIN32_KHR>) + +add_sample( + TEST + TARGET externalmemorycpp + VERSION 300 + SOURCES main.cpp + KERNELS external_saxpy.cl + LIBS Vulkan::Vulkan + DEFINITIONS $<$:VK_USE_PLATFORM_WIN32_KHR>) diff --git a/samples/extensions/khr/externalmemory/README.md b/samples/extensions/khr/externalmemory/README.md new file mode 100644 index 00000000..78ca63f6 --- /dev/null +++ b/samples/extensions/khr/externalmemory/README.md @@ -0,0 +1,257 @@ +# External Memory Sample + +## Sample purpose +External devices resources can be shared across GPU APIs. This can specially come in handy when developing graphical applications, as usually we have specialized APIs for graphics (like OpenGL or the lower-level-API Vulkan) that are used for rendering and the more general APIs (like OpenCL, SYCL, etc). This sample showcases an OpenCL program that interacts with the Vulkan API by sharing buffers. For one that actually does rendering, the [open_cl_interop](https://github.com/KhronosGroup/Vulkan-Samples/tree/main/samples/extensions/open_cl_interop) sample should be consulted. + +## Key APIs and Concepts +### Kernel logic +The kernel used in this sample is a saxpy, i.e. performs the vector operation $a*x+y$ where $x$ and $y$ are the input vectors and $a$ is a scalar. This simple kernel was chosen because the main purpose of the example is to showcase the buffer sharing between the OpenCL and Vulkan APIs, rather than showing off some complex kernel implementation. + +### Create Vulkan instance with the necessary extensions enabled +The Vulkan function `vkCreateInstance` creates a new Vulkan instance (object gathering the application's state), which later can be used to query the physical devices available on the system for our program. When calling to this function, a `VkInstanceCreateInfo` object must be passed in order to tell the Vulkan API some characteristics of the application. In this sample, one of the main pieces of information passed to the named function is a list of Vulkan instance extensions to be enabled: +- `VK_KHR_EXTERNAL_MEMORY_CAPABILITIES_EXTENSION_NAME` for exporting non-Vulkan handles from Vulkan buffers. +- `VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME` for also being able to query the properties of physical devices (needed for obtaining the devices' UUIDs). + +### Find an OpenCL device Vulkan-compatible +In the context of a given OpenCL program, for a device to be compatible with the Vulkan API there are three main requirements: +- It has to be recognized by Vulkan as a physical device, that is, Vulkan must report the existence of a physical device with the same UUID than the selected OpenCL device's. In Vulkan, with `vkGetPhysicalDeviceProperties2` we can get the properties of a physical device, among which is included the `deviceUUID` attribute storing the UUID of the corresponding device. For OpenCL, we can query the device's UUID by calling `clGetDeviceInfo` (or the C++ wrapper `cl::Device::getInfo<>()`) with the `CL_DEVICE_UUID_KHR` value as `cl_device_info` parameter. + - Beware the query of the UUID in OpenCL/Vulkan cannot be done without the device supporting the `cl_khr_device_uuid`/`VK_KHR_get_physical_device_properties2`. +- It must support the Vulkan device extensions needed for the program at hand. In this occasion, we need the Vulkan device to support exporting non-Vulkan handles from Vulkan memory objects (e.g. buffers). The `vkEnumerateDeviceExtensionProperties` function is used for querying the Vulkan device extensions supported by a given physical device. +- It also needs to support the Khronos extension `cl_khr_external_memory_opaque_fd` for Linux systems or `cl_khr_external_memory_win32` for Windows. With the C API, The function `clGetDeviceInfo` called with the parameter `CL_DEVICE_EXTENSIONS` provides information about whether this extension is supported by the OpenCL device. The C++ API (Utils library) provides the function `cl::util::supports_extension`, with which this check can be done easier. + + _Note: The `cl_khr_external_memory` extension requires OpenCL 3.0, which we make sure to check that is indeed supported on the device before compiling the OpenCL kernel._ + +Once a suitable Vulkan physical device (and its correspondent OpenCL device) has been found, we can create a Vulkan device object from it with `vkCreateDevice`. We must set the `ppEnabledExtensionNames` attribute of the `VkDeviceCreateInfo` passed to the said function with the names of the required Vulkan device extensions (that we already checked the device supports) in order for them to be enabled on the device. + +### Create Vulkan buffers for external sharing +When creating the Vulkan buffer objects for our application, we must make explicit that those buffers are going to be shared with an external API. The way of doing this can be summarized into the following steps: +- Before starting to allocate Vulkan memory objects, we need to ensure that the external memory handle type needed for importing Vulkan memory objects is supported by the device, both in OpenCL and in Vulkan APIs. +The mapping between Vulkan and OpenCL handle types is as follows: + + | Vulkan external memory handle type | OpenCL external memory handle type | + | ------------------------------------------------------------------ | ------------------------------------------------------------- | + | `VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR` | `CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_FD_KHR` | + | `VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT_KHR` | `CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KHR` | + | `VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT_KHR` | `CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KMT_BIT_KHR` | + + The first row contains the handle types used for Linux, while for Windows platforms the handle types used are either the ones from the second or third row. + + To check whether the OpenCL device supports the memory handle we use `clGetDeviceInfo` with the `CL_DEVICE_EXTERNAL_MEMORY_IMPORT_HANDLE_TYPES_KHR` value as `cl_device_info` parameter in order to get a list of supported external memory handle types. + + For Vulkan, we can request a `VkExternalBufferProperties` object containing this information by calling to `vkGetPhysicalDeviceExternalBufferProperties`. + +- We now create our Vulkan buffer objects. We first initialize a `VkExternalMemoryBufferCreateInfo` structure with the necessary information for the buffers bounded to the exported memory. It is **mandatory** when creating a Vulkan buffer that will be bound to exported/imported memory to pass a **non-null** value for the **`handleTypes`** field of this info structure. A pointer to this object is then added as the `pNext` field of a `VkBufferCreateInfo` structure, which contains the information for creating Vulkan buffers (that are not necessarily bounded to external memory). We finally create our buffers by calling `vkCreateBuffer`. + +- The next step is to allocate device memory. This is done with the function `vkAllocateMemory`, which needs a `VkMemoryAllocateInfo` parameter. The key information to set up when allocating external memory is the `pNext` field, pointing to a `VkExportMemoryAllocateInfo` structure which `handleTypes` field specifies the handle types that may be exported. + +- After allocating the device memory, it is only left to bind it to the buffer objects with `vkBindBufferMemory` and to map the latter into the application address space with `vkMapMemory`. If the buffer objects are to be mapped in their entirety, we can use `VK_WHOLE_SIZE` as the `size` parameter of `vkMapMemory`. After mapping the buffer objects we obtain host-accessible pointers to the beginning of the mapped ranges and we can just copy the contents of the host arrays to those ranges. + +### Initialize OpenCL buffers from external API +The key point when initializing OpenCL buffers from external memory is that we need a file descriptor associated to this external memory in order to access it from the OpenCL API. In the Vulkan API we can get such file descriptor by making use of the function `vkGetMemoryFdKHR` provided by the `VK_KHR_external_memory_fd` extension. + +Being provided by an extension, we need to obtain a function pointer to it by calling to `vkGetDeviceProcAddr`. We can then call `vkGetMemoryFdKHR` with a `VkMemoryGetFdInfoKHR` parameter containing the information about the memory range for which we want to obtain a file descriptor: + - `memory` field containing the pointer to the said range + - `handleType` field with the same Vulkan external memory handle type used in the `VkExportMemoryAllocateInfo` structure when memory was allocated. + +Once we have the file descriptor, we can initialize an array of `cl_mem_properties` with the following entries: +- The OpenCL external memory handle type to use. +- The file descriptor previously obtained for the Vulkan memory range. +- A list of devices to which these properties apply. This list must start with an entry containing the macro `CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_FD_KHR`, followed by as many entries as devices in the list containing the corresponding `cl_device_id` objects. The list must end with an entry containing the macro `CL_DEVICE_HANDLE_LIST_END_KHR`. +- A $0$ indicating the end of the array. + +_Note: With the C++ API we can obtain the `cl_device_id` object from a `cl::Device device` wrapper by using the `()` operator._ + +This array of properties is then passed to `clCreateBufferWithProperties` (or to the C++ constructor of `cl::Buffer`). When creating OpenCL buffer objects from external memory there are a couple of restrictions in the parameters allowed for `clCreateBufferWithProperties`/`cl::Buffer::Buffer()`, namely: +- The `flags` parameter used to specify usage information for the buffer must not include `CL_MEM_USE_HOST_PTR`, `CL_MEM_ALLOC_HOST_PTR`, or `CL_MEM_COPY_HOST_PTR`. +- The `host_ptr` argument must be null. + +From this point on the OpenCL API functions are called as usual. + +## Application flow +### Overview +1. Parse user options. +2. Initialize Vulkan instance. +3. Find an OpenCL Vulkan-compatible device. +4. Create a Vulkan device object from the physical device selected enabling the required extensions on it. +5. Check that the OpenCL device supports the necessary Khronos extensions. +6. Create Vulkan's buffer objects for sharing them with an external API. +7. Query the requirements for memory to be exportable. Allocate memory, bind buffers to memory and map the former to the Vulkan address space. Copy input from host to Vulkan memory objects. +8. Query the file descriptors correspondent to Vulkan's memory ranges mapped and initialize OpenCL buffers from them. +9. Enqueue kernel call to saxpy. +10. Fetch and validate result. +11. Free resources. + +## Used API surface +### C +```c +CL_BLOCKING +CL_CONTEXT_PLATFORM +CL_DEVICE_EXTENSIONS +CL_DEVICE_EXTERNAL_MEMORY_IMPORT_HANDLE_TYPES_KHR +CL_DEVICE_HANDLE_LIST_KHR +CL_DEVICE_HANDLE_LIST_END_KHR +CL_DEVICE_NAME +CL_DEVICE_PLATFORM +CL_DEVICE_TYPE_ALL +CL_HPP_TARGET_OPENCL_VERSION +CL_INVALID_ARG_VALUE +CL_INVALID_VALUE +CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_FD_KHR +CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KMT_KHR +CL_KERNEL_WORK_GROUP_SIZE +CL_KHR_EXTERNAL_MEMORY_OPAQUE_FD_EXTENSION_NAME +CL_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME +CL_MEM_READ_ONLY +CL_MEM_READ_WRITE +CL_PLATFORM_VENDOR +CL_PROFILING_COMMAND_END +CL_PROFILING_COMMAND_START +CL_QUEUE_PROFILING_ENABLE +CL_QUEUE_PROPERTIES +CL_SUCCESS +CL_UUID_SIZE_KHR +cl_command_queue +cl_command_queue_properties +cl_context +cl_context_properties +cl_device_id +cl_event +cl_float +cl_int +cl_kernel +cl_external_memory_handle_type_khr +cl_khr_external_memory_opaque_fd +cl_khr_external_memory_win32 +cl_mem +cl_mem_properties +cl_platform_id +cl_program +cl_sdk_fill_with_random_ints_range(pcg32_random_t*, cl_int*, size_t, cl_int, cl_int) +cl_sdk_options_Diagnostic +cl_sdk_options_SingleDevice +cl_uint +cl_uchar +cl_ulong +cl_util_build_program(cl_program, cl_device_id, char*) +cl_util_get_device(cl_uint, cl_uint, cl_device_type, cl_int*) +cl_util_get_event_duration(cl_event, cl_profiling_info, cl_profiling_info, cl_int*) +cl_util_print_device_info*(cl_device_id) +cl_util_print_error(cl_int) +cl_util_read_text_file(char*const, size_t*const, cl_int*) +clCreateBufferWithProperties(cl_context, cl_mem_properties*, cl_mem_flags, size_t, void*, cl_int*) +clCreateCommandQueueWithProperties(cl_context, cl_device_id, cl_queue_properties*, cl_int*) -> OpenCL >= 2.0 +clCreateContext(cl_context_properties*, cl_uint, cl_device_id*, void *(char*, void*,size_t, void*), void*, cl_int*) +clCreateKernel(cl_program, char*, cl_int*) +clGetKernelWorkGroupInfo(cl_kernel, cl_device_id, cl_kernel_work_group_info, size_t, void*, size_t*) +clCreateProgramWithSource(cl_context, cl_uint, char**, size_t*, cl_int*) +clEnqueueNDRangeKernel(cl_command_queue, cl_kernel, cl_uint, size_t*, size_t*, size_t*, cl_uint, cl_event*, cl_event*) +clEnqueueReadBuffer(cl_command_queue, cl_mem, cl_bool, size_t, size_t, void*, cl_uint, cl_event*, cl_event*) +clGetDeviceIDs(cl_platform_id, cl_device_type, cl_uint, cl_device_id*, cl_uint*) +clGetDeviceInfo(cl_device_id, cl_device_info, size_t, void*, size_t*) +clGetPlatformIDs(cl_uint, cl_platform_id*, cl_uint*) +clReleaseCommandQueue(cl_command_queue) +clReleaseContext(cl_context) +clReleaseKernel(cl_kernel) +clReleaseMemObject(cl_mem) +clReleaseProgram(cl_program) +clSetKernelArg(cl_kernel, cl_uint, size_t, void *) +clWaitForEvents(cl_uint, cl_event*) +``` + +### C++ +```c++ +cl::Buffer::Buffer(const Context&, const vector&, IteratorType, IteratorType, bool, bool=false, cl_int*=NULL) +cl::BuildError +cl::CommandQueue::CommandQueue(const cl::Context&, const Device&,cl::QueueProperties, cl_int*=NULL) +cl::Context +cl::Device::Device() +cl::EnqueueArgs::EnqueueArgs(cl::CommandQueue&, cl::NDRange, cl::NDRange) +cl::Error +cl::Event +cl::KernelFunctor::KernelFunctor(const Program&, const string, cl_int*=NULL) +cl::NDRange::NDRange(size_t, size_t) +cl::Platform::Platform() +cl::Platform::Platform(cl::Platform) +cl::Platform::get(vector*) +cl::Program::Program(cl::Program) +cl::WaitForEvents(const vector&) +cl::copy(const CommandQueue&, const cl::Buffer&, IteratorType, IteratorType) +cl::sdk::comprehend() +cl::sdk::fill_with_random() +cl::sdk::get_context(cl_uint, cl_uint, cl_device_type, cl_int*) +cl::sdk::parse() +cl::sdk::parse_cli() +cl::sdk::options::Diagnostic +cl::sdk::options::SingleDevice +cl::string::string(cl::string) +cl::util::Error +cl::util::get_duration(cl::Event&) +cl::util::supports_extension(const cl::Device&, const cl::string&) +``` + +### Vulkan +```c +PFN_vkCreateDevice(VkPhysicalDevice, const VkDeviceCreateInfo*, const VkAllocationCallbacks*, VkDevice*) +VK_BUFFER_USAGE_TRANSFER_DST_BIT +VK_BUFFER_USAGE_TRANSFER_SRC_BIT +VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR +VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT_KHR +VK_KHR_EXTERNAL_MEMORY_CAPABILITIES_EXTENSION_NAME +VK_KHR_EXTERNAL_MEMORY_EXTENSION_NAME +VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME +VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME +VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME +VK_MAKE_VERSION +VK_MEMORY_PROPERTY_HOST_COHERENT_BIT +VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT +VK_SHARING_MODE_EXCLUSIVE +VK_STRUCTURE_TYPE_APPLICATION_INFO +VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO +VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO +VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO +VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO +VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO +VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO +VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR +VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES_KHR +VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR +VK_SUCCESS +VK_WHOLE_SIZE +VkApplicationInfo +VkBuffer +VkBufferCreateInfo +VkDevice +VkDeviceMemory +VkDeviceQueueCreateInfo +VkExportMemoryAllocateInfo +VkExtensionProperties +VkExternalMemoryBufferCreateInfo +VkExternalMemoryHandleTypeFlagBits +VkInstance +VkInstanceCreateInfo +VkMemoryAllocateInfo +VkMemoryGetFdInfoKHR +VkMemoryPropertyFlags +VkMemoryRequirements +VkPhysicalDevice +VkPhysicalDeviceIDPropertiesKHR +VkPhysicalDeviceMemoryProperties +VkPhysicalDeviceProperties2KHR +VkPhysicalDeviceProperties +VkResult +vkAllocateMemory(VkDevice, const VkMemoryAllocateInfo*, const VkAllocationCallbacks*, VkDeviceMemory*) +vkBindBufferMemory(VkDevice, VkBuffer, VkDeviceMemory, VkDeviceSize) +vkCreateBuffer(VkDevice, const VkBufferCreateInfo*, const VkAllocationCallbacks*, VkBuffer*) +vkCreateInstance(const VkInstanceCreateInfo*, const VkAllocationCallbacks*, VkInstance*) +vkDestroyBuffer(VkDevice, VkBuffer, const VkAllocationCallbacks*) +vkEnumerateDeviceExtensionProperties(VkPhysicalDevice, const char*, uint32_t*, VkExtensionProperties*) +vkEnumeratePhysicalDevices(VkInstance, uint32_t*, VkPhysicalDevice*) +vkFreeMemory(VkDevice, VkDeviceMemory, const VkAllocationCallbacks*) +vkGetBufferMemoryRequirements(VkDevice, VkBuffer, VkMemoryRequirements*) +vkGetDeviceProcAddr(VkDevice, const char*) +vkGetMemoryFdKHR(VkDevice, const VkMemoryGetFdInfoKHR*, int*) +vkGetPhysicalDeviceMemoryProperties(VkPhysicalDevice, VkPhysicalDeviceMemoryProperties*) +vkGetPhysicalDeviceProperties2(VkPhysicalDevice, VkPhysicalDeviceProperties2) +vkMapMemory(VkDevice, VkDeviceMemory, VkDeviceSize, VkDeviceSize, VkMemoryMapFlags, void**) +vkUnmapMemory(VkDevice, VkDeviceMemory) +``` diff --git a/samples/extensions/khr/externalmemory/external_saxpy.cl b/samples/extensions/khr/externalmemory/external_saxpy.cl new file mode 100644 index 00000000..c5780872 --- /dev/null +++ b/samples/extensions/khr/externalmemory/external_saxpy.cl @@ -0,0 +1,8 @@ +__kernel void saxpy(float a, + __global float* x, + __global float* y) +{ + int gid = get_global_id(0); + + y[gid] = fma(a, x[gid], y[gid]); +} diff --git a/samples/extensions/khr/externalmemory/main.c b/samples/extensions/khr/externalmemory/main.c new file mode 100644 index 00000000..d0d3bd28 --- /dev/null +++ b/samples/extensions/khr/externalmemory/main.c @@ -0,0 +1,713 @@ +/* + * Copyright (c) 2023 The Khronos Group Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// OpenCL SDK includes. +#include +#include +#include +#include + +// OpenCL Utils includes. +#include +#include +#include + +// Vulkan includes. +#include + +// Vulkan utils includes. +#include "vulkan_utils.h" + +// Standard header includes. +#include +#include +#include +#include + +// Sample-specific option. +struct options_Saxpy +{ + size_t length; +}; + +// Add option to CLI-parsing SDK utility for input length. +cag_option SaxpyOptions[] = { { .identifier = 'l', + .access_letters = "l", + .access_name = "length", + .value_name = "(positive integer)", + .description = "Length of input" } }; + +ParseState parse_SaxpyOptions(const char identifier, + cag_option_context* cag_context, + struct options_Saxpy* opts) +{ + const char* value; + switch (identifier) + { + case 'l': + if (0 != (value = cag_option_get_value(cag_context))) + { + opts->length = strtoul(value, NULL, 0); + return ParsedOK; + } + else + return ParseError; + } + return NotParsed; +} + +cl_int parse_options(int argc, char* argv[], + struct cl_sdk_options_Diagnostic* diag_opts, + struct options_Saxpy* saxpy_opts) +{ + cl_int error = CL_SUCCESS; + struct cag_option *opts = NULL, *tmp = NULL; + size_t n = 0; + + // Prepare options array. + MEM_CHECK(opts = add_CLI_options(opts, &n, DiagnosticOptions, + CAG_ARRAY_SIZE(DiagnosticOptions)), + error, end); + opts = tmp; + MEM_CHECK(tmp = add_CLI_options(opts, &n, SaxpyOptions, + CAG_ARRAY_SIZE(SaxpyOptions)), + error, end); + opts = tmp; + + char identifier; + cag_option_context cag_context; + + // Prepare the context and iterate over all options. + cag_option_prepare(&cag_context, opts, n, argc, argv); + while (cag_option_fetch(&cag_context)) + { + ParseState state = NotParsed; + identifier = cag_option_get(&cag_context); + + PARS_OPTIONS(parse_DiagnosticOptions(identifier, diag_opts), state); + PARS_OPTIONS(parse_SaxpyOptions(identifier, &cag_context, saxpy_opts), + state); + + if (identifier == 'h') + { + printf("Usage: externalmemory [OPTION]...\n"); + printf("Option name and value should be separated by '=' or a " + "space\n"); + printf("Demonstrates OpenCL--Vulkan interop.\n\n"); + cag_option_print(opts, n, stdout); + exit((state == ParseError) ? CL_INVALID_ARG_VALUE : CL_SUCCESS); + } + } +end: + free(opts); + return error; +} + +// Host-side saxpy implementation. +void host_saxpy(const cl_float* x, cl_float* y, const float a, size_t length) +{ + for (size_t i = 0; i < length; ++i) + { + y[i] = fmaf(a, x[i], y[i]); + } +} + +// Vulkan instance extensions required for sharing OpenCL and Vulkan types: +// - VK_KHR_EXTERNAL_MEMORY_CAPABILITIES required for sharing buffers. +// - VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2 required for the previous one +// and for querying the device's UUID. +const char* const required_instance_extensions[] = { + VK_KHR_EXTERNAL_MEMORY_CAPABILITIES_EXTENSION_NAME, /*VK_KHR_external_memory_capabilities*/ + VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME /*VK_KHR_get_physical_device_properties2*/ +}; +const size_t required_instance_extensions_count = + sizeof(required_instance_extensions) / sizeof(const char*); + +// General Vulkan extensions that a device needs to support for exporting +// memory. +const char* required_device_extensions[] = { + VK_KHR_EXTERNAL_MEMORY_EXTENSION_NAME, /*VK_KHR_external_memory*/ +#ifdef _WIN32 + VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME /*VK_KHR_external_memory_win32*/ +#else + VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME /*VK_KHR_external_memory_fd*/ +#endif +}; +const size_t required_device_extensions_count = + sizeof(required_device_extensions) / sizeof(const char*); + +// Khronos extensions that a device needs to support memory sharing with Vulkan. +const char* required_khronos_extensions[] = { +#ifdef _WIN32 + CL_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME /*cl_khr_external_memory_win32*/ +#else + CL_KHR_EXTERNAL_MEMORY_OPAQUE_FD_EXTENSION_NAME /*cl_khr_external_memory_opaque_fd*/ +#endif +}; +const size_t required_khronos_extensions_count = + sizeof(required_khronos_extensions) / sizeof(const char*); + +// Required Vulkan external memory handle. +const VkExternalMemoryHandleTypeFlagBits vk_external_memory_handle_type = +#ifdef _WIN32 + VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT_KHR; +#else + VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR; +#endif + +// Required OpenCL external memory handle. +const cl_external_memory_handle_type_khr cl_external_memory_handle_type = +#ifdef _WIN32 + CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KHR; +#else + CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_FD_KHR; +#endif + +// Check if a given OpenCL device supports a particular external memory handle +// type. +bool cl_check_external_memory_handle_type( + const cl_device_id cl_device, + cl_external_memory_handle_type_khr external_memory_handle_type) +{ + cl_external_memory_handle_type_khr* supported_handle_types = NULL; + size_t supported_handle_types_count = 0; + cl_int error = CL_SUCCESS; + + OCLERROR_RET( + clGetDeviceInfo(cl_device, + CL_DEVICE_EXTERNAL_MEMORY_IMPORT_HANDLE_TYPES_KHR, 0, + NULL, &supported_handle_types_count), + error, err); + supported_handle_types = (cl_external_memory_handle_type_khr*)malloc( + supported_handle_types_count); + + OCLERROR_RET( + clGetDeviceInfo( + cl_device, CL_DEVICE_EXTERNAL_MEMORY_IMPORT_HANDLE_TYPES_KHR, + supported_handle_types_count, supported_handle_types, NULL), + error, err); + for (size_t i = 0; i < supported_handle_types_count; ++i) + { + if (external_memory_handle_type == supported_handle_types[i]) + { + free(supported_handle_types); + return true; + } + } + free(supported_handle_types); + return false; +err: + fprintf(stderr, + "Error: OpenCL could not query supported external memory handle " + "types\n"); + free(supported_handle_types); + exit(EXIT_FAILURE); +} + +int main(int argc, char* argv[]) +{ + cl_int error = CL_SUCCESS; + cl_int end_error = CL_SUCCESS; + cl_platform_id cl_platform; + cl_device_id cl_device; + VkPhysicalDevice vk_physical_device; + VkDevice vk_device; + cl_context context = NULL; + cl_command_queue queue = NULL; + + cl_program program; + + // Parse command-line options. + struct cl_sdk_options_Diagnostic diag_opts = { .quiet = false, + .verbose = false }; + // Define as default length 1048576 = 4 * 262144 = sizeof(cl_float) * 2^18. + struct options_Saxpy saxpy_opts = { .length = 1048576 }; + + OCLERROR_RET(parse_options(argc, argv, &diag_opts, &saxpy_opts), error, + end); + + // Fill in Vulkan application info. + VkApplicationInfo app_info = { 0 }; + app_info.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO; + app_info.pApplicationName = "OpenCL-Vulkan interop example"; + app_info.applicationVersion = VK_MAKE_VERSION(3, 0, 0); + app_info.pEngineName = "OpenCL-SDK samples"; + app_info.engineVersion = VK_MAKE_VERSION(3, 0, 0); + app_info.apiVersion = VK_MAKE_VERSION(3, 0, 0); + + // Initialize Vulkan instance info and create Vulkan instance. + VkInstanceCreateInfo instance_create_info = { + VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO + }; + instance_create_info.pApplicationInfo = &app_info; + instance_create_info.enabledExtensionCount = + (uint32_t)required_instance_extensions_count; + instance_create_info.ppEnabledExtensionNames = required_instance_extensions; + + VkInstance instance; + VK_CHECK(vkCreateInstance(&instance_create_info, NULL, &instance)); + + // Find a suitable (Vulkan-compatible) OpenCL device for the sample. + struct device_candidate candidate = find_suitable_device( + instance, required_device_extensions, required_device_extensions_count); + + // OpenCL device object for the selected device. + cl_device = candidate.cl_candidate.device; + + // Vulkan physical device object for the selected device. + vk_physical_device = candidate.vk_candidate; + + // Set up necessary info and create Vulkan device from physical device. + const float default_queue_priority = 1.0f; + VkDeviceQueueCreateInfo queue_create_info = { + VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO + }; + queue_create_info.queueFamilyIndex = 0; + queue_create_info.queueCount = 1; + queue_create_info.pQueuePriorities = &default_queue_priority; + + VkDeviceCreateInfo device_create_info = { + VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO + }; + device_create_info.queueCreateInfoCount = 1; + device_create_info.pQueueCreateInfos = &queue_create_info; + device_create_info.enabledExtensionCount = + (uint32_t)required_device_extensions_count; + device_create_info.ppEnabledExtensionNames = required_device_extensions; + + VK_CHECK(vkCreateDevice(vk_physical_device, &device_create_info, NULL, + &vk_device)); + + if (!diag_opts.quiet) + { + cl_util_print_device_info(cl_device); + } + + // Create OpenCL runtime objects. + OCLERROR_RET(clGetDeviceInfo(cl_device, CL_DEVICE_PLATFORM, + sizeof(cl_platform_id), &cl_platform, NULL), + error, cont); + cl_context_properties context_props[] = { + CL_CONTEXT_PLATFORM, (cl_context_properties)cl_platform, 0 + }; + OCLERROR_PAR(context = clCreateContext(context_props, 1, &cl_device, NULL, + NULL, &error), + error, end); + + // Check if the device supports the Khronos extensions needed before + // attempting to compile the kernel. + if (diag_opts.verbose) + { + printf("\nChecking Khronos extensions support... "); + fflush(stdout); + } + + if (!check_khronos_extensions(cl_device, required_khronos_extensions, + required_khronos_extensions_count)) + { + fprintf(stdout, + "OpenCL device does not support the required Khronos " + "extensions\n"); + exit(EXIT_SUCCESS); + } + + // Compile kernel. + if (diag_opts.verbose) + { + printf("done.\nCompiling OpenCL kernel... "); + fflush(stdout); + } + const char* kernel_location = "./external_saxpy.cl"; + char *kernel = NULL, *tmp = NULL; + size_t program_size = 0; + OCLERROR_PAR( + kernel = cl_util_read_text_file(kernel_location, &program_size, &error), + error, que); + MEM_CHECK(tmp = (char*)realloc(kernel, program_size), error, ker); + kernel = tmp; + OCLERROR_PAR(program = clCreateProgramWithSource( + context, 1, (const char**)&kernel, &program_size, &error), + error, ker); + + // The Khronos extension showcased requires OpenCL 3.0 version. + char compiler_options[1023] = ""; +#if CL_HPP_TARGET_OPENCL_VERSION >= 300 + strcat(compiler_options, "-cl-std=CL3.0 "); +#else + fprintf(stderr, "\nError: OpenCL version must be at least 3.0\n"); + exit(EXIT_FAILURE); +#endif + + OCLERROR_RET(cl_util_build_program(program, cl_device, compiler_options), + error, prg); + + // Query maximum workgroup size (WGS) supported based on private mem + // (registers) constraints. + size_t wgs; + cl_kernel saxpy; + OCLERROR_PAR(saxpy = clCreateKernel(program, "saxpy", &error), error, prg); + OCLERROR_RET(clGetKernelWorkGroupInfo(saxpy, cl_device, + CL_KERNEL_WORK_GROUP_SIZE, + sizeof(size_t), &wgs, NULL), + error, ker); + + // Initialize host-side storage. + const size_t length = saxpy_opts.length; + + // Random number generator. + pcg32_random_t rng; + pcg32_srandom_r(&rng, 11111, 2222); + + // Initialize input and output vectors and constant. + cl_float *arr_x, *arr_y, a; + MEM_CHECK(arr_x = (cl_float*)malloc(sizeof(cl_float) * length), error, sxp); + MEM_CHECK(arr_y = (cl_float*)malloc(sizeof(cl_float) * length), error, + arrx); + if (diag_opts.verbose) + { + printf("done.\nGenerating random scalar and %zd random numbers for " + "saxpy input vector...", + length); + fflush(stdout); + } + cl_sdk_fill_with_random_floats_range(&rng, &a, 1, -100, 100); + cl_sdk_fill_with_random_floats_range(&rng, arr_x, length, -100, 100); + cl_sdk_fill_with_random_floats_range(&rng, arr_y, length, -100, 100); + + // Check if the device supports the required OpenCL handle type. + if (diag_opts.verbose) + { + printf( + "done.\nChecking OpenCL external memory handle type support... "); + fflush(stdout); + } + + if (!cl_check_external_memory_handle_type(cl_device, + cl_external_memory_handle_type)) + { + fprintf(stderr, + "\nError: Unsupported OpenCL external memory handle type\n"); + exit(EXIT_FAILURE); + } + + if (!vk_check_external_memory_handle_type(vk_physical_device, + vk_external_memory_handle_type)) + { + fprintf(stderr, + "\nError: Unsupported Vulkan external memory handle type\n"); + exit(EXIT_FAILURE); + } + + // Initialize Vulkan device-side storage. + if (diag_opts.verbose) + { + printf("done.\nInitializing Vulkan device storage... "); + fflush(stdout); + } + + // Create Vulkan (external) buffers and assign memory to them. + VkExternalMemoryBufferCreateInfo external_memory_buffer_info = { + VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO + }; + external_memory_buffer_info.handleTypes = vk_external_memory_handle_type; + + VkBufferCreateInfo buffer_info = { 0 }; + buffer_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; + buffer_info.pNext = &external_memory_buffer_info; + buffer_info.size = sizeof(cl_float) * length; + buffer_info.usage = + VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; + buffer_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + + VkBuffer vk_buf_x, vk_buf_y; + VK_CHECK(vkCreateBuffer(vk_device, &buffer_info, NULL, &vk_buf_x)); + VK_CHECK(vkCreateBuffer(vk_device, &buffer_info, NULL, &vk_buf_y)); + + // Get requirements and necessary information for (exportable) memory. + VkMemoryRequirements mem_requirements_x = { 0 }, mem_requirements_y = { 0 }; + vkGetBufferMemoryRequirements(vk_device, vk_buf_x, &mem_requirements_x); + vkGetBufferMemoryRequirements(vk_device, vk_buf_y, &mem_requirements_y); + + VkExportMemoryAllocateInfo export_memory_alloc_info = { + VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO + }; + export_memory_alloc_info.handleTypes = vk_external_memory_handle_type; + + VkMemoryAllocateInfo memory_alloc_info_x = { 0 }; + memory_alloc_info_x.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; + memory_alloc_info_x.pNext = &export_memory_alloc_info; + memory_alloc_info_x.allocationSize = mem_requirements_x.size; + memory_alloc_info_x.memoryTypeIndex = find_vk_memory_type( + vk_physical_device, mem_requirements_x.memoryTypeBits, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT + | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT); + + VkMemoryAllocateInfo memory_alloc_info_y = { 0 }; + memory_alloc_info_y.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; + memory_alloc_info_y.pNext = &export_memory_alloc_info; + memory_alloc_info_y.allocationSize = mem_requirements_y.size; + memory_alloc_info_y.memoryTypeIndex = find_vk_memory_type( + vk_physical_device, mem_requirements_y.memoryTypeBits, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT + | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT); + + // Allocate and bind memory. + VkDeviceMemory vk_buf_x_memory, vk_buf_y_memory; + VK_CHECK(vkAllocateMemory(vk_device, &memory_alloc_info_x, NULL, + &vk_buf_x_memory)); + VK_CHECK(vkAllocateMemory(vk_device, &memory_alloc_info_y, NULL, + &vk_buf_y_memory)); + + VK_CHECK(vkBindBufferMemory(vk_device, vk_buf_x, vk_buf_x_memory, 0)); + VK_CHECK(vkBindBufferMemory(vk_device, vk_buf_y, vk_buf_y_memory, 0)); + + // Map memory. + void *vk_arr_x, *vk_arr_y; + VK_CHECK(vkMapMemory(vk_device, vk_buf_x_memory, 0, VK_WHOLE_SIZE, 0, + &vk_arr_x)); + VK_CHECK(vkMapMemory(vk_device, vk_buf_y_memory, 0, VK_WHOLE_SIZE, 0, + &vk_arr_y)); + + memcpy(vk_arr_x, arr_x, sizeof(cl_float) * length); + memcpy(vk_arr_y, arr_y, sizeof(cl_float) * length); + +#ifdef _WIN32 + // Get Vulkan external memory file descriptors for accessing external memory + // with OpenCL. + VkMemoryGetWin32HandleInfoKHR handle_info_x = { 0 }; + handle_info_x.sType = VK_STRUCTURE_TYPE_MEMORY_GET_WIN32_HANDLE_INFO_KHR; + handle_info_x.pNext = NULL; + handle_info_x.memory = vk_buf_x_memory; + handle_info_x.handleType = vk_external_memory_handle_type; + HANDLE handle_x; + + VkMemoryGetWin32HandleInfoKHR handle_info_y = { 0 }; + handle_info_y.sType = VK_STRUCTURE_TYPE_MEMORY_GET_WIN32_HANDLE_INFO_KHR; + handle_info_y.pNext = NULL; + handle_info_y.memory = vk_buf_y_memory; + handle_info_y.handleType = vk_external_memory_handle_type; + HANDLE handle_y; + + // We need to get the pointer to the + // vkGetMemoryFdKHR/vkGetMemoryWin32HandleKHR function because it's from + // extension VK_KHR_external_memory_fd. This Vulkan function exports a POSIX + // file descriptor/Windows handle referencing the payload of a Vulkan device + // memory object. + PFN_vkGetMemoryWin32HandleKHR vkGetMemoryWin32Handle; + *(PFN_vkGetMemoryWin32HandleKHR*)&vkGetMemoryWin32Handle = + (PFN_vkGetMemoryWin32HandleKHR)vkGetDeviceProcAddr( + vk_device, "vkGetMemoryWin32HandleKHR"); + VK_CHECK(vkGetMemoryWin32Handle(vk_device, &handle_info_x, &handle_x)); + VK_CHECK(vkGetMemoryWin32Handle(vk_device, &handle_info_y, &handle_y)); +#else + // Get Vulkan external memory file descriptors for accessing external memory + // with OpenCL. + VkMemoryGetFdInfoKHR fd_info_x = { 0 }; + fd_info_x.sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR; + fd_info_x.pNext = NULL; + fd_info_x.memory = vk_buf_x_memory; + fd_info_x.handleType = vk_external_memory_handle_type; + int fd_x; + + VkMemoryGetFdInfoKHR fd_info_y = { 0 }; + fd_info_y.sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR; + fd_info_y.pNext = NULL; + fd_info_y.memory = vk_buf_y_memory; + fd_info_y.handleType = vk_external_memory_handle_type; + int fd_y; + + // We need to get the pointer to the + // vkGetMemoryFdKHR/vkGetMemoryWin32HandleKHR function because it's from + // extension VK_KHR_external_memory_fd. This Vulkan function exports a POSIX + // file descriptor/Windows handle referencing the payload of a Vulkan device + // memory object. + PFN_vkGetMemoryFdKHR vkGetMemoryFd; + *(PFN_vkGetMemoryFdKHR*)&vkGetMemoryFd = + (PFN_vkGetMemoryFdKHR)vkGetDeviceProcAddr(vk_device, + "vkGetMemoryFdKHR"); + VK_CHECK(vkGetMemoryFd(vk_device, &fd_info_x, &fd_x)); + VK_CHECK(vkGetMemoryFd(vk_device, &fd_info_y, &fd_y)); +#endif + + + // Create OpenCL buffers from Vulkan external memory file descriptors. + cl_mem_properties ext_mem_props_x[] = { + (cl_mem_properties)CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_FD_KHR, +#ifdef _WIN32 + (cl_mem_properties)handle_x, +#else + (cl_mem_properties)fd_x, +#endif + (cl_mem_properties)CL_MEM_DEVICE_HANDLE_LIST_KHR, + (cl_mem_properties)(uintptr_t)cl_device, + CL_MEM_DEVICE_HANDLE_LIST_END_KHR, + 0 + }; + cl_mem_properties ext_mem_props_y[] = { + (cl_mem_properties)CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_FD_KHR, +#ifdef _WIN32 + (cl_mem_properties)handle_y, +#else + (cl_mem_properties)fd_y, +#endif + (cl_mem_properties)CL_MEM_DEVICE_HANDLE_LIST_KHR, + (cl_mem_properties)(uintptr_t)cl_device, + CL_MEM_DEVICE_HANDLE_LIST_END_KHR, + 0 + }; + cl_mem cl_buf_x, cl_buf_y; + OCLERROR_PAR(cl_buf_x = clCreateBufferWithProperties( + context, ext_mem_props_x, CL_MEM_READ_ONLY, + sizeof(cl_float) * length, NULL, &error), + error, vulkan); + OCLERROR_PAR(cl_buf_y = clCreateBufferWithProperties( + context, ext_mem_props_y, CL_MEM_READ_WRITE, + sizeof(cl_float) * length, NULL, &error), + error, clbufx); + + // Initialize queue for command execution. + cl_command_queue_properties queue_props[] = { CL_QUEUE_PROPERTIES, + CL_QUEUE_PROFILING_ENABLE, + 0 }; + OCLERROR_PAR(queue = clCreateCommandQueueWithProperties( + context, cl_device, queue_props, &error), + error, cont); + + // Set kernel arguments. + OCLERROR_RET(clSetKernelArg(saxpy, 0, sizeof(cl_float), &a), error, clbufy); + OCLERROR_RET(clSetKernelArg(saxpy, 1, sizeof(cl_mem), &cl_buf_x), error, + clbufy); + OCLERROR_RET(clSetKernelArg(saxpy, 2, sizeof(cl_mem), &cl_buf_y), error, + clbufy); + + // Acquire OpenCL memory objects created from Vulkan external memory + // handles. + cl_mem cl_mem_objects[] = { cl_buf_x, cl_buf_y }; + clEnqueueAcquireExternalMemObjectsKHR_fn + clEnqueueAcquireExternalMemObjects = + (clEnqueueAcquireExternalMemObjectsKHR_fn) + clGetExtensionFunctionAddressForPlatform( + cl_platform, "clEnqueueAcquireExternalMemObjectsKHR"); + clEnqueueAcquireExternalMemObjects(queue, 2, cl_mem_objects, 0, NULL, NULL); + + // Launch kernel. + if (diag_opts.verbose) + { + printf("done.\nExecuting on device... "); + fflush(stdout); + } + + cl_event kernel_run; + GET_CURRENT_TIMER(dev_start) + OCLERROR_RET(clEnqueueNDRangeKernel(queue, saxpy, 1, NULL, &length, &wgs, 0, + NULL, &kernel_run), + error, clbufy); + OCLERROR_RET(clWaitForEvents(1, &kernel_run), error, clbufy); + GET_CURRENT_TIMER(dev_end) + + cl_ulong dev_time; + TIMER_DIFFERENCE(dev_time, dev_start, dev_end) + + // Release OpenCL memory objects created from Vulkan external memory + // handles. + clEnqueueReleaseExternalMemObjectsKHR_fn + clEnqueueReleaseExternalMemObjects = + (clEnqueueReleaseExternalMemObjectsKHR_fn) + clGetExtensionFunctionAddressForPlatform( + cl_platform, "clEnqueueReleaseExternalMemObjectsKHR"); + clEnqueueReleaseExternalMemObjects(queue, 2, cl_mem_objects, 0, NULL, NULL); + + // Concurrently calculate reference saxpy. + if (diag_opts.verbose) + { + printf("done.\nExecuting on host... "); + } + + GET_CURRENT_TIMER(host_start) + host_saxpy(arr_x, arr_y, a, length); + GET_CURRENT_TIMER(host_end) + cl_ulong host_time; + TIMER_DIFFERENCE(host_time, host_start, host_end) + + if (diag_opts.verbose) + { + printf("done.\n"); + } + + // Fetch results. + OCLERROR_RET(clEnqueueReadBuffer(queue, cl_buf_y, CL_BLOCKING, 0, + sizeof(cl_float) * length, (void*)arr_x, 0, + NULL, NULL), + error, clbufy); + + // Validate solution. + for (size_t i = 0; i < length; ++i) + if (arr_y[i] != arr_x[i]) + { + printf("Verification failed! %f != %f at index %zu\n", arr_y[i], + arr_x[i], i); + error = CL_INVALID_VALUE; + } + if (error == CL_SUCCESS) + { + printf("Verification passed.\n"); + } + + if (!diag_opts.quiet) + { + printf("Kernel execution time as seen by host: %llu us.\n", + (unsigned long long)(dev_time + 500) / 1000); + + printf("Kernel execution time as measured by device: %llu us.\n", + (unsigned long long)(cl_util_get_event_duration( + kernel_run, CL_PROFILING_COMMAND_START, + CL_PROFILING_COMMAND_END, &error) + + 500) + / 1000); + + printf("Reference execution as seen by host: %llu us.\n", + (unsigned long long)(host_time + 500) / 1000); + } + + // Release resources. +clbufy: + OCLERROR_RET(clReleaseMemObject(cl_buf_y), end_error, clbufx); +clbufx: + OCLERROR_RET(clReleaseMemObject(cl_buf_x), end_error, vulkan); +vulkan: + vkDestroyBuffer(vk_device, vk_buf_y, NULL); + vkDestroyBuffer(vk_device, vk_buf_x, NULL); + vkUnmapMemory(vk_device, vk_buf_y_memory); + vkUnmapMemory(vk_device, vk_buf_x_memory); + vkFreeMemory(vk_device, vk_buf_y_memory, NULL); + vkFreeMemory(vk_device, vk_buf_x_memory, NULL); + free(arr_y); +arrx: + free(arr_x); +sxp: + OCLERROR_RET(clReleaseKernel(saxpy), end_error, prg); +prg: + OCLERROR_RET(clReleaseProgram(program), end_error, ker); +ker: + free(kernel); +que: + OCLERROR_RET(clReleaseCommandQueue(queue), end_error, cont); +cont: + OCLERROR_RET(clReleaseContext(context), end_error, end); +end: + if (error) cl_util_print_error(error); + return error; +} diff --git a/samples/extensions/khr/externalmemory/main.cpp b/samples/extensions/khr/externalmemory/main.cpp new file mode 100644 index 00000000..97b99a29 --- /dev/null +++ b/samples/extensions/khr/externalmemory/main.cpp @@ -0,0 +1,624 @@ +/* + * Copyright (c) 2023 The Khronos Group Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// OpenCL C++ headers includes. +#include + +// OpenCL SDK includes. +#include +#include +#include +#include + +// OpenCL Utils includes. +#include +#include +#include + +// Vulkan includes. +#include + +// Vulkan utils includes. +#include "vulkan_utils.hpp" + +// Standard header includes. +#include +#include +#include +#include +#include + +// Sample-specific option. +struct SaxpyOptions +{ + size_t length; +}; + +// Add option to CLI-parsing SDK utility for input dimensions. +template <> auto cl::sdk::parse() +{ + return std::make_tuple(std::make_shared>( + "l", "length", "Length of input", false, 1'048'576, + "positive integral")); +} +template <> +SaxpyOptions cl::sdk::comprehend( + std::shared_ptr> length_arg) +{ + return SaxpyOptions{ length_arg->getValue() }; +} + +// Host-side saxpy implementation. +void host_saxpy(std::vector x, std::vector& y, const float a, + size_t length) +{ + for (size_t i = 0; i < length; ++i) + { + y[i] = std::fmaf(a, x[i], y[i]); + } +} + +// Vulkan instance extensions required for sharing OpenCL and Vulkan types: +// - VK_KHR_EXTERNAL_MEMORY_CAPABILITIES required for sharing buffers. +// - VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2 required for the previous one +// and for querying the device's UUID. +const std::vector required_instance_extensions_str = { + VK_KHR_EXTERNAL_MEMORY_CAPABILITIES_EXTENSION_NAME, /*VK_KHR_external_memory_capabilities*/ + VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME /*VK_KHR_get_physical_device_properties2*/ +}; + +// General Vulkan extensions that a device needs to support to run this +// example: +// - VK_KHR_EXTERNAL_MEMORY required for sharing memory. +const std::vector required_device_extensions_str = { + std::string{ + VK_KHR_EXTERNAL_MEMORY_EXTENSION_NAME }, /*VK_KHR_external_memory*/ +#ifdef _WIN64 + std::string{ + VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME } /*VK_KHR_external_memory_win32*/ +#else + std::string{ + VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME } /*VK_KHR_external_memory_fd*/ +#endif +}; + +// Required Vulkan external memory handle. +const VkExternalMemoryHandleTypeFlagBits vk_external_memory_handle_type = +#ifdef _WIN32 + VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT_KHR; +#else + VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR; +#endif + +// Khronos extensions that a device needs to support memory sharing with Vulkan. +const std::vector required_khronos_extensions = { +#ifdef _WIN32 + std::string{ "cl_khr_external_memory_win32" } +#else + std::string{ "cl_khr_external_memory_opaque_fd" } +#endif +}; + +// Required OpenCL external memory handle. +const cl_external_memory_handle_type_khr cl_external_memory_handle_type = +#ifdef _WIN32 + CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KHR; +#else + CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_FD_KHR; +#endif + +// Check if a given OpenCL device supports a particular external memory handle +// type. +bool cl_check_external_memory_handle_type( + const cl::Device cl_device, + cl_external_memory_handle_type_khr external_memory_handle_type) +{ + std::vector supported_handle_types; + cl_device.getInfo(CL_DEVICE_EXTERNAL_MEMORY_IMPORT_HANDLE_TYPES_KHR, + &supported_handle_types); + + const auto it = std::find_if( + supported_handle_types.begin(), supported_handle_types.end(), + [&](const cl_external_memory_handle_type_khr& supported_handle_type) { + return external_memory_handle_type == supported_handle_type; + }); + return it != supported_handle_types.end(); +} + +int main(int argc, char* argv[]) +{ + try + { + // Parse command-line options. + auto opts = + cl::sdk::parse_cli( + argc, argv); + const auto& diag_opts = std::get<0>(opts); + const auto& saxpy_opts = std::get<1>(opts); + + // Fill in Vulkan application info. + VkApplicationInfo app_info{}; + app_info.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO; + app_info.pApplicationName = "OpenCL-Vulkan interop example"; + app_info.applicationVersion = VK_MAKE_VERSION(3, 0, 0); + app_info.pEngineName = "OpenCL-SDK samples"; + app_info.engineVersion = VK_MAKE_VERSION(3, 0, 0); + app_info.apiVersion = VK_MAKE_VERSION(3, 0, 0); + + // Initialize Vulkan instance info and create Vulkan instance. + std::vector required_instance_extensions( + required_instance_extensions_str.size(), nullptr); + std::transform(required_instance_extensions_str.begin(), + required_instance_extensions_str.end(), + required_instance_extensions.begin(), + [&](const std::string& str) { return str.c_str(); }); + VkInstanceCreateInfo instance_create_info{}; + instance_create_info.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO; + instance_create_info.pApplicationInfo = &app_info; + instance_create_info.enabledExtensionCount = + static_cast(required_instance_extensions.size()); + instance_create_info.ppEnabledExtensionNames = + required_instance_extensions.data(); + + VkInstance instance; + VK_CHECK(vkCreateInstance(&instance_create_info, nullptr, &instance)); + + // Find a suitable (Vulkan-compatible) OpenCL device for the sample. + std::vector required_device_extensions( + required_device_extensions_str.size(), nullptr); + std::transform(required_device_extensions_str.begin(), + required_device_extensions_str.end(), + required_device_extensions.begin(), + [&](const std::string& str) { return str.c_str(); }); + device_candidate candidate = + find_suitable_device(instance, required_device_extensions); + + // OpenCL device and platform objects for the selected device. + cl::Device cl_device = candidate.cl_candidate.device; + const cl::Platform cl_platform{ + cl_device.getInfo() + }; + + // Vulkan physical device object for the selected device. + const VkPhysicalDevice vk_physical_device = candidate.vk_candidate; + + // Set up necessary info and create Vulkan device from physical device. + constexpr float default_queue_priority = 1.0f; + VkDeviceQueueCreateInfo queue_create_info{}; + queue_create_info.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO; + queue_create_info.queueFamilyIndex = 0; + queue_create_info.queueCount = 1; + queue_create_info.pQueuePriorities = &default_queue_priority; + + VkDeviceCreateInfo device_create_info{}; + device_create_info.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO; + device_create_info.queueCreateInfoCount = 1; + device_create_info.pQueueCreateInfos = &queue_create_info; + device_create_info.enabledExtensionCount = + static_cast(required_device_extensions.size()); + device_create_info.ppEnabledExtensionNames = + required_device_extensions.data(); + + VkDevice vk_device; + VK_CHECK(vkCreateDevice(vk_physical_device, &device_create_info, + nullptr, &vk_device)); + + if (!diag_opts.quiet) + { + std::cout << "Selected platform: " + << cl_platform.getInfo() << "\n" + << "Selected device: " + << cl_device.getInfo() << "\n" + << std::endl; + } + + // Create OpenCL runtime objects. + cl::Context cl_context{ cl_device }; + + // Check if the device supports the Khronos extensions needed before + // attempting to compile the kernel. + if (diag_opts.verbose) + { + std::cout << "Checking Khronos extensions support... "; + std::cout.flush(); + } + + for (const auto& extension : required_khronos_extensions) + { + if (!cl::util::supports_extension(cl_device, extension)) + { + std::cout << "OpenCL device does not support the required " + "Khronos extension " + << extension << std::endl; + exit(EXIT_SUCCESS); + } + } + + // Compile kernel. + if (diag_opts.verbose) + { + std::cout << " done.\nCompiling OpenCL kernel... "; + std::cout.flush(); + } + const char* kernel_location = "./external_saxpy.cl"; + std::ifstream kernel_stream{ kernel_location }; + if (!kernel_stream.is_open()) + throw std::runtime_error{ + std::string{ "Cannot open kernel source: " } + kernel_location + }; + cl::Program cl_program{ + cl_context, + std::string{ std::istreambuf_iterator{ kernel_stream }, + std::istreambuf_iterator{} } + }; + + // The Khronos extension showcased requires OpenCL 3.0 version. + cl::string compiler_options = ""; +#if CL_HPP_TARGET_OPENCL_VERSION >= 300 + compiler_options += cl::string{ "-cl-std=CL3.0 " }; +#else + sdt::cerr << "\nError: OpenCL version must be at least 3.0" + << std::endl; + exit(EXIT_FAILURE); +#endif + + cl_program.build(cl_device, compiler_options.c_str()); + + // Query maximum workgroup size (WGS) supported based on private mem + // (registers) constraints. + auto saxpy = cl::KernelFunctor( + cl_program, "saxpy"); + auto wgs = + saxpy.getKernel().getWorkGroupInfo( + cl_device); + + // Initialize host-side storage. + const auto length = saxpy_opts.length; + + // Random number generator. + auto prng = [engine = std::default_random_engine{}, + dist = std::uniform_real_distribution{ + -1.0, 1.0 }]() mutable { return dist(engine); }; + + // Initialize input and output vectors and constant. + std::vector arr_x(length), arr_y(length); + if (diag_opts.verbose) + { + std::cout << "Generating random scalar and " << length + << " random numbers for saxpy input vector." << std::endl; + } + cl_float a = prng(); + cl::sdk::fill_with_random(prng, arr_x, arr_y); + + // Check if the device supports the required OpenCL handle type. + if (diag_opts.verbose) + { + std::cout << "done.\nChecking OpenCL external memory handle type " + "support... "; + std::cout.flush(); + } + + if (!cl_check_external_memory_handle_type( + cl_device, cl_external_memory_handle_type)) + { + std::cerr + << "\nError: Unsupported OpenCL external memory handle type" + << std::endl; + exit(EXIT_FAILURE); + } + + if (!vk_check_external_memory_handle_type( + vk_physical_device, vk_external_memory_handle_type)) + { + std::cerr + << "\nError: Unsupported Vulkan external memory handle type" + << std::endl; + exit(EXIT_FAILURE); + } + + // Initialize Vulkan device-side storage. + if (diag_opts.verbose) + { + std::cout << "done.\nInitializing Vulkan device storage... "; + std::cout.flush(); + } + + // Create Vulkan (external) buffers and assign memory to them. + VkExternalMemoryBufferCreateInfo external_memory_buffer_info{}; + external_memory_buffer_info.sType = + VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO; + external_memory_buffer_info.handleTypes = + vk_external_memory_handle_type; + + VkBufferCreateInfo buffer_info{}; + buffer_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; + buffer_info.pNext = &external_memory_buffer_info; + buffer_info.size = sizeof(cl_float) * length; + buffer_info.usage = + VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; + buffer_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + + VkBuffer vk_buf_x, vk_buf_y; + VK_CHECK(vkCreateBuffer(vk_device, &buffer_info, nullptr, &vk_buf_x)); + VK_CHECK(vkCreateBuffer(vk_device, &buffer_info, nullptr, &vk_buf_y)); + + // Get requirements and necessary information for (exportable) memory. + VkMemoryRequirements mem_requirements_x{}, mem_requirements_y{}; + vkGetBufferMemoryRequirements(vk_device, vk_buf_x, &mem_requirements_x); + vkGetBufferMemoryRequirements(vk_device, vk_buf_y, &mem_requirements_y); + + VkExportMemoryAllocateInfo export_memory_alloc_info{}; + export_memory_alloc_info.sType = + VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO; + export_memory_alloc_info.handleTypes = vk_external_memory_handle_type; + + VkMemoryAllocateInfo memory_alloc_info_x{}; + memory_alloc_info_x.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; + memory_alloc_info_x.pNext = &export_memory_alloc_info; + memory_alloc_info_x.allocationSize = mem_requirements_x.size; + memory_alloc_info_x.memoryTypeIndex = find_vk_memory_type( + vk_physical_device, mem_requirements_x.memoryTypeBits, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT + | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT); + + VkMemoryAllocateInfo memory_alloc_info_y{}; + memory_alloc_info_y.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; + memory_alloc_info_y.pNext = &export_memory_alloc_info; + memory_alloc_info_y.allocationSize = mem_requirements_y.size; + memory_alloc_info_y.memoryTypeIndex = find_vk_memory_type( + vk_physical_device, mem_requirements_y.memoryTypeBits, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT + | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT); + + // Allocate and bind memory. + VkDeviceMemory vk_buf_x_memory, vk_buf_y_memory; + VK_CHECK(vkAllocateMemory(vk_device, &memory_alloc_info_x, nullptr, + &vk_buf_x_memory)); + VK_CHECK(vkAllocateMemory(vk_device, &memory_alloc_info_y, nullptr, + &vk_buf_y_memory)); + + VK_CHECK(vkBindBufferMemory(vk_device, vk_buf_x, vk_buf_x_memory, 0)); + VK_CHECK(vkBindBufferMemory(vk_device, vk_buf_y, vk_buf_y_memory, 0)); + + // Map memory. + void *vk_arr_x, *vk_arr_y; + VK_CHECK(vkMapMemory(vk_device, vk_buf_x_memory, 0, VK_WHOLE_SIZE, 0, + &vk_arr_x)); + VK_CHECK(vkMapMemory(vk_device, vk_buf_y_memory, 0, VK_WHOLE_SIZE, 0, + &vk_arr_y)); + + memcpy(vk_arr_x, arr_x.data(), sizeof(cl_float) * length); + memcpy(vk_arr_y, arr_y.data(), sizeof(cl_float) * length); + +#ifdef _WIN32 + // Get Vulkan external memory file descriptors for accessing external + // memory with OpenCL. + VkMemoryGetWin32HandleInfoKHR handle_info_x{}; + handle_info_x.sType = + VK_STRUCTURE_TYPE_MEMORY_GET_WIN32_HANDLE_INFO_KHR; + handle_info_x.pNext = nullptr; + handle_info_x.memory = vk_buf_x_memory; + handle_info_x.handleType = vk_external_memory_handle_type; + HANDLE handle_x; + + VkMemoryGetWin32HandleInfoKHR handle_info_y{}; + handle_info_y.sType = + VK_STRUCTURE_TYPE_MEMORY_GET_WIN32_HANDLE_INFO_KHR; + handle_info_y.pNext = nullptr; + handle_info_y.memory = vk_buf_y_memory; + handle_info_y.handleType = vk_external_memory_handle_type; + HANDLE handle_y; + + // We need to get the pointer to the + // vkGetMemoryFdKHR/vkGetMemoryWin32HandleKHR function because it's from + // the extension VK_KHR_external_memory_fd. This Vulkan function exports + // a POSIX file descriptor/Windows handle referencing the payload of a + // Vulkan device memory object. + PFN_vkGetMemoryWin32HandleKHR vkGetMemoryWin32Handle; + *(PFN_vkGetMemoryWin32HandleKHR*)&vkGetMemoryWin32Handle = + (PFN_vkGetMemoryWin32HandleKHR)vkGetDeviceProcAddr( + vk_device, "vkGetMemoryWin32HandleKHR"); + VK_CHECK(vkGetMemoryWin32Handle(vk_device, &handle_info_x, &handle_x)); + VK_CHECK(vkGetMemoryWin32Handle(vk_device, &handle_info_y, &handle_y)); +#else + // Get Vulkan external memory file descriptors for accessing external + // memory with OpenCL. + VkMemoryGetFdInfoKHR fd_info_x{}; + fd_info_x.sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR; + fd_info_x.pNext = nullptr; + fd_info_x.memory = vk_buf_x_memory; + fd_info_x.handleType = vk_external_memory_handle_type; + int fd_x; + + VkMemoryGetFdInfoKHR fd_info_y{}; + fd_info_y.sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR; + fd_info_y.pNext = nullptr; + fd_info_y.memory = vk_buf_y_memory; + fd_info_y.handleType = vk_external_memory_handle_type; + int fd_y; + + // We need to get the pointer to the + // vkGetMemoryFdKHR/vkGetMemoryWin32HandleKHR function because it's from + // extension VK_KHR_external_memory_fd. This Vulkan function exports a + // POSIX file descriptor/Windows handle referencing the payload of a + // Vulkan device memory object. + PFN_vkGetMemoryFdKHR vkGetMemoryFd; + *(PFN_vkGetMemoryFdKHR*)&vkGetMemoryFd = + (PFN_vkGetMemoryFdKHR)vkGetDeviceProcAddr(vk_device, + "vkGetMemoryFdKHR"); + VK_CHECK(vkGetMemoryFd(vk_device, &fd_info_x, &fd_x)); + VK_CHECK(vkGetMemoryFd(vk_device, &fd_info_y, &fd_y)); +#endif + + // Create OpenCL buffers from Vulkan external memory file descriptors. + std::vector ext_mem_props_x = { + (cl_mem_properties)CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_FD_KHR, +#ifdef _WIN32 + (cl_mem_properties)handle_x, +#else + (cl_mem_properties)fd_x, +#endif + (cl_mem_properties)CL_MEM_DEVICE_HANDLE_LIST_KHR, + (cl_mem_properties)cl_device(), + CL_MEM_DEVICE_HANDLE_LIST_END_KHR, + 0 + }; + std::vector ext_mem_props_y = { + (cl_mem_properties)CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_FD_KHR, +#ifdef _WIN32 + (cl_mem_properties)handle_y, +#else + (cl_mem_properties)fd_y, +#endif + (cl_mem_properties)CL_MEM_DEVICE_HANDLE_LIST_KHR, + (cl_mem_properties)cl_device(), + CL_MEM_DEVICE_HANDLE_LIST_END_KHR, + 0 + }; + + cl::Buffer cl_buf_x{ cl_context, ext_mem_props_x, CL_MEM_READ_ONLY, + sizeof(cl_float) * length }; + cl::Buffer cl_buf_y{ cl_context, ext_mem_props_y, CL_MEM_READ_WRITE, + sizeof(cl_float) * length }; + + // Initialize queue for command execution. + cl_command_queue_properties queue_props[] = { CL_QUEUE_PROFILING_ENABLE, + 0 }; + cl::CommandQueue queue{ cl_context, cl_device, *queue_props }; + + // Acquire OpenCL memory objects created from Vulkan external memory + // handles. + std::vector cl_mem_objects = { cl_buf_x(), cl_buf_y() }; + clEnqueueAcquireExternalMemObjectsKHR_fn + clEnqueueAcquireExternalMemObjects = + (clEnqueueAcquireExternalMemObjectsKHR_fn) + clGetExtensionFunctionAddressForPlatform( + cl_platform(), "clEnqueueAcquireExternalMemObjectsKHR"); + clEnqueueAcquireExternalMemObjects( + queue(), static_cast(cl_mem_objects.size()), + cl_mem_objects.data(), 0, nullptr, nullptr); + + // Launch kernel. + if (diag_opts.verbose) + { + std::cout << "done.\nExecuting on device... "; + std::cout.flush(); + } + + std::vector kernel_run; + auto dev_start = std::chrono::high_resolution_clock::now(); + kernel_run.push_back( + saxpy(cl::EnqueueArgs{ queue, cl::NDRange{ length }, wgs }, a, + cl_buf_x, cl_buf_y)); + cl::WaitForEvents(kernel_run); + auto dev_end = std::chrono::high_resolution_clock::now(); + + // Release OpenCL memory objects created from Vulkan external memory + // handles. + clEnqueueReleaseExternalMemObjectsKHR_fn + clEnqueueReleaseExternalMemObjects = + (clEnqueueReleaseExternalMemObjectsKHR_fn) + clGetExtensionFunctionAddressForPlatform( + cl_platform(), "clEnqueueReleaseExternalMemObjectsKHR"); + clEnqueueReleaseExternalMemObjects( + queue(), static_cast(cl_mem_objects.size()), + cl_mem_objects.data(), 0, nullptr, nullptr); + + // Concurrently calculate reference saxpy. + if (diag_opts.verbose) + { + std::cout << "done.\nExecuting on host... "; + std::cout.flush(); + } + + auto host_start = std::chrono::high_resolution_clock::now(); + host_saxpy(arr_x, arr_y, a, length); + auto host_end = std::chrono::high_resolution_clock::now(); + + if (diag_opts.verbose) + { + std::cout << "done.\n"; + std::cout.flush(); + } + + // Fetch results. + cl::copy(queue, cl_buf_y, arr_x.begin(), arr_x.end()); + + // Validate solution. + if (std::equal(std::begin(arr_x), std::end(arr_x), std::begin(arr_y), + std::end(arr_y))) + std::cout << "Verification passed." << std::endl; + else + throw std::runtime_error{ "Verification failed!" }; + + if (!diag_opts.quiet) + { + std::cout << "Kernel execution time as seen by host: " + << std::chrono::duration_cast( + dev_end - dev_start) + .count() + << " us." << std::endl; + + std::cout << "Kernel execution time as measured by device: "; + std::cout << cl::util::get_duration( + kernel_run[0]) + .count() + << " us." << std::endl; + + std::cout << "Reference execution as seen by host: " + << std::chrono::duration_cast( + host_end - host_start) + .count() + << " us." << std::endl; + } + + // Release resources. + vkDestroyBuffer(vk_device, vk_buf_y, nullptr); + vkDestroyBuffer(vk_device, vk_buf_x, nullptr); + vkUnmapMemory(vk_device, vk_buf_y_memory); + vkUnmapMemory(vk_device, vk_buf_x_memory); + vkFreeMemory(vk_device, vk_buf_y_memory, nullptr); + vkFreeMemory(vk_device, vk_buf_x_memory, nullptr); + + } catch (cl::BuildError& e) + { + std::cerr << "OpenCL build error: " << e.what() << std::endl; + for (auto& build_log : e.getBuildLog()) + { + std::cerr << "\tBuild log for device: " + << build_log.first.getInfo() << "\n" + << std::endl; + std::cerr << build_log.second << "\n" << std::endl; + } + std::exit(e.err()); + } catch (cl::util::Error& e) + { + std::cerr << "OpenCL utils error: " << e.what() << std::endl; + std::exit(e.err()); + } catch (cl::Error& e) + { + std::cerr << "OpenCL runtime error: " << e.what() << std::endl; + std::exit(e.err()); + } catch (std::exception& e) + { + std::cerr << "Error: " << e.what() << std::endl; + std::exit(EXIT_FAILURE); + } + + return 0; +} diff --git a/samples/extensions/khr/externalmemory/vulkan_utils.h b/samples/extensions/khr/externalmemory/vulkan_utils.h new file mode 100644 index 00000000..f2d82554 --- /dev/null +++ b/samples/extensions/khr/externalmemory/vulkan_utils.h @@ -0,0 +1,389 @@ +/* + * Copyright (c) 2023 The Khronos Group Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _SAMPLES_CORE_EXTERNALMEMORY_UTILS_H +#define _SAMPLES_CORE_EXTERNALMEMORY_UTILS_H + +// OpenCL C headers includes. +#include + +// OpenCL Utils includes. +#include + +// Vulkan includes. +#include + +// Standard header includes. +#include +#include +#include +#include + +// Check if the provided Vulkan error code is \p VK_SUCCESS. If not, prints an +// error message to the standard error output and terminates the program with an +// error code. +#define VK_CHECK(condition) \ + { \ + const VkResult _error = condition; \ + if (_error != VK_SUCCESS) \ + { \ + fprintf(stderr, "A vulkan error encountered: %d at %s: %d\n", \ + _error, __FILE__, __LINE__); \ + exit(EXIT_FAILURE); \ + } \ + } + +// OpenCL device that is suitable for this example. +struct cl_device_candidate +{ + /// The OpenCL device id representing the device. + cl_device_id device; + + /// The Vulkan-compatible device UUID. + cl_uchar uuid[CL_UUID_SIZE_KHR]; +}; + +// OpenCL and Vulkan physical device suitable for the sample. +struct device_candidate +{ + /// The Vulkan physical device handle of the device to be used. + VkPhysicalDevice vk_candidate; + + /// The candidate device's Vulkan device properties. + VkPhysicalDeviceProperties vk_props; + + /// The OpenCL device candidate that this Vulkan device corresponds to. + struct cl_device_candidate cl_candidate; +}; + +// Check if the extensions supported by a Vulkan device includes a given set of +// required extensions. +bool extensions_supported( + const VkExtensionProperties* supported_extensions_properties, + const size_t supported_extensions_count, + const char* const* required_device_extensions, + const size_t required_device_extensions_count) +{ + for (size_t i = 0; i < required_device_extensions_count; ++i) + { + size_t j = 0; + while (strcmp(required_device_extensions[i], + supported_extensions_properties[j].extensionName) + && j < supported_extensions_count) + { + ++j; + } + if (j == supported_extensions_count) + { + return false; + } + } + return true; +} + +// Check if a given Vulkan device supports all the required Vulkan extensions. +bool check_device_extensions(const VkPhysicalDevice vk_device, + const char* const* required_device_extensions, + const size_t required_device_extensions_count) +{ + uint32_t supported_extensions_count; + VK_CHECK(vkEnumerateDeviceExtensionProperties( + vk_device, NULL, &supported_extensions_count, NULL)); + VkExtensionProperties* vk_supported_extensions_properties = + (VkExtensionProperties*)malloc(supported_extensions_count + * sizeof(VkExtensionProperties)); + VK_CHECK(vkEnumerateDeviceExtensionProperties( + vk_device, NULL, &supported_extensions_count, + vk_supported_extensions_properties)); + + bool result = extensions_supported( + vk_supported_extensions_properties, supported_extensions_count, + required_device_extensions, required_device_extensions_count); + + free(vk_supported_extensions_properties); + + return result; +} + +// Check if a given Vulkan physical device is compatible with any of the OpenCL +// devices available. +bool is_vk_device_suitable(const struct cl_device_candidate* cl_candidates, + const size_t cl_candidates_count, + VkPhysicalDevice vk_device, + struct device_candidate* candidate, + const char* const* required_device_extensions, + const size_t required_device_extensions_count) +{ + // Check if the device supports OpenCL by checking if there is any device + // with the same UUID. + { + // Query the Vulkan device UUID using vkGetPhysicalDeviceProperties2. + VkPhysicalDeviceIDPropertiesKHR id_props = { 0 }; + id_props.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES_KHR; + + VkPhysicalDeviceProperties2KHR props2 = { 0 }; + props2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR; + props2.pNext = &id_props; + + vkGetPhysicalDeviceProperties2(vk_device, &props2); + + // Look for an OpenCL device which UUID matches the UUID reported by + // Vulkan. + size_t compatible_dev_index = cl_candidates_count; + for (size_t i = 0; i < cl_candidates_count + && compatible_dev_index == cl_candidates_count; + ++i) + { + compatible_dev_index = i; + for (uint32_t j = 0; j < CL_UUID_SIZE_KHR; ++j) + { + if (cl_candidates[i].uuid[j] != id_props.deviceUUID[j]) + { + compatible_dev_index = cl_candidates_count; + break; + } + } + } + + if (compatible_dev_index == cl_candidates_count) + { + return false; + } + + candidate->vk_props = props2.properties; + candidate->cl_candidate = cl_candidates[compatible_dev_index]; + } + + // Check if the device supports the required extensions. + if (!check_device_extensions(vk_device, required_device_extensions, + required_device_extensions_count)) + { + return false; + } + + candidate->vk_candidate = vk_device; + return true; +} + +// Check if a given OpenCL device supports a particular set of Khronos +// extensions. +bool check_khronos_extensions( + const cl_device_id cl_device, + const char* const* const required_khronos_extensions, + const size_t required_khronos_extensions_count) +{ + cl_int error = CL_SUCCESS; + size_t supported_extensions_count; + OCLERROR_RET(clGetDeviceInfo(cl_device, CL_DEVICE_EXTENSIONS, 0, NULL, + &supported_extensions_count), + error, ret); + char* supported_extensions = + (char*)malloc(supported_extensions_count * sizeof(char)); + OCLERROR_RET(clGetDeviceInfo(cl_device, CL_DEVICE_EXTENSIONS, + supported_extensions_count, + supported_extensions, NULL), + error, err); + + for (size_t i = 0; i < required_khronos_extensions_count; ++i) + { + if (!strstr(supported_extensions, required_khronos_extensions[i])) + { + free(supported_extensions); + return false; + } + } + free(supported_extensions); + return true; +err: + free(supported_extensions); +ret: + return false; +} + +// Find a suitable device for the example, that is, an OpenCL +// device that is also Vulkan-compatible and that supports the required +// Vulkan device extensions. +struct device_candidate +find_suitable_device(VkInstance instance, + const char* const* required_device_extensions, + const size_t required_device_extensions_count) +{ + // Query OpenCL devices available. + cl_int error = CL_SUCCESS; + bool candidate_found = false; + cl_uint platform_count = 0; + struct device_candidate found_candidate = {0}; + OCLERROR_RET(clGetPlatformIDs(0, NULL, &platform_count), error, ret); + + cl_platform_id* platforms = + (cl_platform_id*)malloc(platform_count * sizeof(cl_platform_id)); + OCLERROR_RET(clGetPlatformIDs(platform_count, platforms, NULL), error, + platforms); + + size_t cl_device_count = 0; + const char* uuid_khronos_extension[] = { + CL_KHR_DEVICE_UUID_EXTENSION_NAME + }; + for (cl_uint platform_id = 0; platform_id < platform_count; + ++platform_id) + { + cl_uint cl_platform_devices_count = 0; + OCLERROR_RET(clGetDeviceIDs(platforms[platform_id], + CL_DEVICE_TYPE_ALL, 0, NULL, + &cl_platform_devices_count), + error, platforms); + for (cl_uint device_id = 0; device_id < cl_platform_devices_count; + ++device_id) + { + cl_device_id device; + OCLERROR_PAR(device = cl_util_get_device( + platform_id, device_id, CL_DEVICE_TYPE_ALL, &error), error, platforms); + cl_device_count += + check_khronos_extensions(device, uuid_khronos_extension, 1); + } + } + + if (!cl_device_count) + { + printf("No suitable OpenCL Vulkan-compatible devices available\n"); + goto platforms; + } + + // For each OpenCL device, query its Vulkan-compatible device UUID and + // add it to the list of candidates. The device must support the + // cl_khr_device_uuid extension for us to be able to query the device's + // UUID. + struct cl_device_candidate* cl_candidates = + (struct cl_device_candidate*)malloc( + cl_device_count * sizeof(struct cl_device_candidate)); + cl_device_count = 0; + for (cl_uint platform_id = 0; platform_id < platform_count; + ++platform_id) + { + cl_uint cl_platform_devices_count = 0; + OCLERROR_RET(clGetDeviceIDs(platforms[platform_id], + CL_DEVICE_TYPE_ALL, 0, NULL, + &cl_platform_devices_count), + error, candidates); + + for (cl_uint cl_candidate_id = 0; + cl_candidate_id < cl_platform_devices_count; + ++cl_candidate_id) + { + cl_device_id device = cl_util_get_device( + platform_id, cl_candidate_id, CL_DEVICE_TYPE_ALL, &error); + if (check_khronos_extensions(device, uuid_khronos_extension, 1)) + { + cl_uchar vk_candidate_uuid[CL_UUID_SIZE_KHR]; + OCLERROR_RET(clGetDeviceInfo(device, CL_DEVICE_UUID_KHR, + CL_UUID_SIZE_KHR, + &vk_candidate_uuid, NULL), + error, candidates); + + struct cl_device_candidate candidate; + candidate.device = device; + memcpy(candidate.uuid, &vk_candidate_uuid, + sizeof(cl_uchar) * CL_UUID_SIZE_KHR); + cl_candidates[cl_device_count] = candidate; + cl_device_count++; + } + } + } + + // Query the Vulkan physical devices available. + uint32_t vk_device_count; + VK_CHECK(vkEnumeratePhysicalDevices(instance, &vk_device_count, NULL)); + + VkPhysicalDevice* vk_devices = + (VkPhysicalDevice*)malloc(vk_device_count * sizeof(VkPhysicalDevice)); + VK_CHECK( + vkEnumeratePhysicalDevices(instance, &vk_device_count, vk_devices)); + + // Find a suitable Vulkan physical device compatible with one of the OpenCL + // devices available. + for (cl_uint vk_device_id = 0; vk_device_id < vk_device_count; + ++vk_device_id) + { + VkPhysicalDevice vk_device = vk_devices[vk_device_id]; + if (is_vk_device_suitable(cl_candidates, cl_device_count, vk_device, + &found_candidate, required_device_extensions, + required_device_extensions_count)) + { + candidate_found = true; + break; + } + } + if (!candidate_found) + { + printf("No suitable OpenCL Vulkan-compatible devices available\n"); + } + + free(vk_devices); +candidates: + free(cl_candidates); +platforms: + free(platforms); +ret: + if (candidate_found) + { + return found_candidate; + } + exit(error); +} + + +// Check if a given Vulkan device supports a particular external memory handle +// type. +bool vk_check_external_memory_handle_type( + VkPhysicalDevice vk_physical_device, + VkExternalMemoryHandleTypeFlagBits vk_external_memory_handle_type) +{ + VkPhysicalDeviceExternalBufferInfo physical_device_external_buffer_info = { + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_BUFFER_INFO + }; + physical_device_external_buffer_info.handleType = + vk_external_memory_handle_type; + + VkExternalBufferProperties external_buffer_properties; + + vkGetPhysicalDeviceExternalBufferProperties( + vk_physical_device, &physical_device_external_buffer_info, + &external_buffer_properties); + + return (vk_external_memory_handle_type + & external_buffer_properties.externalMemoryProperties + .compatibleHandleTypes); +} + +// Find Vulkan memory properties from Vulkan physical device property flags. +uint32_t find_vk_memory_type(VkPhysicalDevice vk_device, uint32_t type_filter, + VkMemoryPropertyFlags properties) +{ + VkPhysicalDeviceMemoryProperties mem_properties; + vkGetPhysicalDeviceMemoryProperties(vk_device, &mem_properties); + for (uint32_t i = 0; i < mem_properties.memoryTypeCount; i++) + { + if ((type_filter & (1 << i)) + && (mem_properties.memoryTypes[i].propertyFlags & properties) + == properties) + { + return i; + } + } + return 0; +} + +#endif // _SAMPLES_CORE_EXTERNALMEMORY_UTILS_H diff --git a/samples/extensions/khr/externalmemory/vulkan_utils.hpp b/samples/extensions/khr/externalmemory/vulkan_utils.hpp new file mode 100644 index 00000000..90090dfd --- /dev/null +++ b/samples/extensions/khr/externalmemory/vulkan_utils.hpp @@ -0,0 +1,268 @@ +/* + * Copyright (c) 2023 The Khronos Group Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _SAMPLES_CORE_EXTERNALMEMORY_UTILS_HPP +#define _SAMPLES_CORE_EXTERNALMEMORY_UTILS_HPP + +// OpenCL C++ headers includes. +#include + +// OpenCL Utils includes. +#include + +// Vulkan includes. +#include + +// Standard header includes. +#include +#include +#include +#include + +// Check if the provided Vulkan error code is \p VK_SUCCESS. If not, prints an +// error message to the standard error output and terminates the program with an +// error code. +#define VK_CHECK(condition) \ + { \ + const VkResult error = condition; \ + if (error != VK_SUCCESS) \ + { \ + std::cerr << "A vulkan error encountered: " << error << " at " \ + << __FILE__ << ':' << __LINE__ << std::endl; \ + std::exit(EXIT_FAILURE); \ + } \ + } + +// OpenCL device that is suitable for this example. +struct cl_device_candidate +{ + /// The OpenCL device id representing the device. + cl::Device device; + + /// The Vulkan-compatible device UUID. + cl_uchar uuid[CL_UUID_SIZE_KHR]; +}; + +// OpenCL and Vulkan physical device suitable for the sample. +struct device_candidate +{ + /// The Vulkan physical device handle of the device to be used. + VkPhysicalDevice vk_candidate; + + /// The candidate device's Vulkan device properties. + VkPhysicalDeviceProperties vk_props; + + /// The OpenCL device candidate that this Vulkan device corresponds to. + struct cl_device_candidate cl_candidate; +}; + +// Check if the extensions supported by a Vulkan device includes a given set of +// required extensions. +template +bool extensions_supported( + const std::vector supported_extensions_properties, + const IteratorT required_device_extensions_begin, + const IteratorT required_device_extensions_end) +{ + IteratorT it = required_device_extensions_begin; + for (; it != required_device_extensions_end; ++it) + { + const auto supported_it = + std::find_if(supported_extensions_properties.begin(), + supported_extensions_properties.end(), + [&](const VkExtensionProperties& props) { + return std::strcmp(*it, props.extensionName) == 0; + }); + if (supported_it == supported_extensions_properties.end()) + { + return false; + } + } + return true; +} + +// Check if a given Vulkan device supports all the required Vulkan extensions. +bool check_device_extensions( + const VkPhysicalDevice vk_device, + const std::vector required_device_extensions) +{ + uint32_t supported_extensions_count; + VK_CHECK(vkEnumerateDeviceExtensionProperties( + vk_device, nullptr, &supported_extensions_count, nullptr)); + std::vector vk_supported_extensions_properties( + supported_extensions_count); + VK_CHECK(vkEnumerateDeviceExtensionProperties( + vk_device, nullptr, &supported_extensions_count, + vk_supported_extensions_properties.data())); + + return extensions_supported(vk_supported_extensions_properties, + required_device_extensions.begin(), + required_device_extensions.end()); +} + +// Check if a given Vulkan physical device is compatible with any of the OpenCL +// devices available. +bool is_vk_device_suitable( + const std::vector cl_candidates, + VkPhysicalDevice vk_device, device_candidate& candidate, + const std::vector required_device_extensions) +{ + // Check if the device supports OpenCL by checking if there is any device + // with the same UUID. + { + // Query the Vulkan device UUID using vkGetPhysicalDeviceProperties2. + VkPhysicalDeviceIDPropertiesKHR id_props = {}; + id_props.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES_KHR; + + VkPhysicalDeviceProperties2KHR props2 = {}; + props2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR; + props2.pNext = &id_props; + + vkGetPhysicalDeviceProperties2(vk_device, &props2); + + // Look for an OpenCL device which UUID matches the UUID reported by + // Vulkan. + const auto cmp_device_uuid = + [&](const cl_device_candidate& cl_candidate) { + return std::equal(std::begin(cl_candidate.uuid), + std::end(cl_candidate.uuid), + std::begin(id_props.deviceUUID), + std::end(id_props.deviceUUID)); + }; + const auto it = std::find_if(cl_candidates.begin(), cl_candidates.end(), + cmp_device_uuid); + if (it == cl_candidates.end()) + { + // This device does not support HIP. + return false; + } + + candidate.vk_props = props2.properties; + candidate.cl_candidate = *it; + } + + // Check if the device supports the required extensions. + if (!check_device_extensions(vk_device, required_device_extensions)) + { + return false; + } + + candidate.vk_candidate = vk_device; + return true; +} + +// Find a suitable device for the example, that is, an OpenCL +// device that is also Vulkan-compatible and that supports the required +// Vulkan device extensions. +struct device_candidate +find_suitable_device(VkInstance instance, + std::vector required_device_extensions) +{ + // Query OpenCL devices available. + std::vector platforms; + cl::Platform::get(&platforms); + + // For each OpenCL device, query its Vulkan-compatible device UUID and + // add it to the list of candidates. + std::vector cl_candidates; + for (const auto& platform : platforms) + { + std::vector platform_devices; + platform.getDevices(CL_DEVICE_TYPE_ALL, &platform_devices); + + for (const auto& device : platform_devices) + { + if (cl::util::supports_extension(device, "cl_khr_device_uuid")) + { + cl_uchar vk_candidate_uuid[CL_UUID_SIZE_KHR]; + device.getInfo(CL_DEVICE_UUID_KHR, &vk_candidate_uuid); + + cl_device_candidate candidate; + candidate.device = device; + std::memcpy(candidate.uuid, &vk_candidate_uuid, + sizeof(cl_uchar) * CL_UUID_SIZE_KHR); + cl_candidates.push_back(candidate); + } + } + } + + // Query the Vulkan physical devices available. + uint32_t vk_device_count; + VK_CHECK(vkEnumeratePhysicalDevices(instance, &vk_device_count, nullptr)); + + std::vector vk_devices(vk_device_count); + VK_CHECK(vkEnumeratePhysicalDevices(instance, &vk_device_count, + vk_devices.data())); + + // Find a suitable Vulkan physical device compatible with one of the OpenCL + // devices available. + device_candidate candidate; + for (const auto vk_device : vk_devices) + { + if (is_vk_device_suitable(cl_candidates, vk_device, candidate, + required_device_extensions)) + { + return candidate; + } + } + + std::cout << "No suitable OpenCL Vulkan-compatible devices available" + << std::endl; + exit(EXIT_SUCCESS); +} + +// Check if a given OpenCL device supports a particular external memory handle +// type. +bool vk_check_external_memory_handle_type( + VkPhysicalDevice vk_physical_device, + VkExternalMemoryHandleTypeFlagBits vk_external_memory_handle_type) +{ + VkPhysicalDeviceExternalBufferInfo physical_device_external_buffer_info{}; + physical_device_external_buffer_info.sType = + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_BUFFER_INFO; + physical_device_external_buffer_info.handleType = + vk_external_memory_handle_type; + + VkExternalBufferProperties external_buffer_properties; + + vkGetPhysicalDeviceExternalBufferProperties( + vk_physical_device, &physical_device_external_buffer_info, + &external_buffer_properties); + + return (vk_external_memory_handle_type + & external_buffer_properties.externalMemoryProperties + .compatibleHandleTypes); +} + +// Find Vulkan memory properties from Vulkan physical device property flags. +uint32_t find_vk_memory_type(VkPhysicalDevice vk_device, uint32_t type_filter, + VkMemoryPropertyFlags properties) +{ + VkPhysicalDeviceMemoryProperties mem_properties; + vkGetPhysicalDeviceMemoryProperties(vk_device, &mem_properties); + for (uint32_t i = 0; i < mem_properties.memoryTypeCount; i++) + { + if ((type_filter & (1 << i)) + && (mem_properties.memoryTypes[i].propertyFlags & properties) + == properties) + { + return i; + } + } + return 0; +} + +#endif // _SAMPLES_CORE_EXTERNALMEMORY_UTILS_HPP diff --git a/samples/extensions/khr/histogram/main.cpp b/samples/extensions/khr/histogram/main.cpp index a4f33ec5..3409b962 100644 --- a/samples/extensions/khr/histogram/main.cpp +++ b/samples/extensions/khr/histogram/main.cpp @@ -194,7 +194,7 @@ int main(int argc, char* argv[]) std::exit(e.err()); } catch (cl::Error& e) { - std::cerr << "OpenCL rutnime error: " << e.what() << std::endl; + std::cerr << "OpenCL runtime error: " << e.what() << std::endl; std::exit(e.err()); } catch (std::exception& e) { diff --git a/samples/extensions/khr/nbody/main.cpp b/samples/extensions/khr/nbody/main.cpp index c0af5d27..3c93fca9 100644 --- a/samples/extensions/khr/nbody/main.cpp +++ b/samples/extensions/khr/nbody/main.cpp @@ -225,8 +225,8 @@ void NBody::initializeGL() y_dist = uni(-y_abs_range, y_abs_range), z_dist = uni(-z_abs_range, z_abs_range), m_dist = uni(mass_min, mass_max)]() mutable { - return cl_float4{ x_dist(prng), y_dist(prng), - z_dist(prng), m_dist(prng) }; + return cl_float4{ { x_dist(prng), y_dist(prng), + z_dist(prng), m_dist(prng) } }; }); glUseProgram(gl_program); @@ -299,7 +299,7 @@ void NBody::initializeCL() // }); velocity_buffer = cl::Buffer{ opencl_context, CL_MEM_READ_WRITE, particle_count * sizeof(cl_float3), nullptr }; - queue.enqueueFillBuffer(velocity_buffer, cl_float4{ 0, 0, 0, 0 }, 0, + queue.enqueueFillBuffer(velocity_buffer, cl_float4{ { 0, 0, 0, 0 } }, 0, particle_count * sizeof(cl_float4)); queue.finish(); @@ -493,7 +493,7 @@ int main(int argc, char* argv[]) std::exit(e.err()); } catch (cl::Error& e) { - std::cerr << "OpenCL rutnime error: " << e.what() << std::endl; + std::cerr << "OpenCL runtime error: " << e.what() << std::endl; std::exit(e.err()); } catch (std::exception& e) { diff --git a/scripts/check-format.sh b/scripts/check-format.sh index 2a9a78ff..d7096528 100755 --- a/scripts/check-format.sh +++ b/scripts/check-format.sh @@ -1,19 +1,110 @@ #!/usr/bin/env bash -# Arg used to specify non-'origin/main' comparison branch -ORIGIN_BRANCH=${1:-"origin/main"} +SOURCE_COMMIT="$1" +if [ "$#" -gt 0 ]; then + shift +fi + +# If no source commit is given target the default branch +if [ "x$SOURCE_COMMIT" = "x" ]; then + # If remote is not set use the remote of the current branch or fallback to "origin" + if [ "x$REMOTE" = "x" ]; then + BRANCH="$(git rev-parse --abbrev-ref HEAD)" + REMOTE="$(git config --local --get "branch.$BRANCH.remote" || echo 'origin')" + fi + SOURCE_COMMIT="remotes/$REMOTE/HEAD" +fi -# Run git-clang-format to check for violations -if [ "$TRAVIS" == "true" ]; then - EXTRA_OPTS="--binary `which clang-format-9`" +# Force colored diff output +DIFF_COLOR_SAVED="$(git config --local --get color.diff)" +if [ "x$DIFF_COLOR_SAVED" != "x" ]; then + git config --local --replace-all "color.diff" "always" +else + git config --local --add "color.diff" "always" fi -CLANG_FORMAT_OUTPUT=$(git-clang-format --diff $ORIGIN_BRANCH --extensions c,cpp,h,hpp $EXTRA_OPTS) + +scratch="$(mktemp -t check-format.XXXXXXXXXX)" +finish () { + # Remove temporary file + rm -rf "$scratch" + # Restore setting + if [ "x$DIFF_COLOR_SAVED" != "x" ]; then + git config --local --replace-all "color.diff" "$DIFF_COLOR_SAVED" + else + git config --local --unset "color.diff" + fi +} +# The trap will be invoked whenever the script exits, even due to a signal, this is a bash only +# feature +trap finish EXIT + +GIT_CLANG_FORMAT="${GIT_CLANG_FORMAT:-git-clang-format}" +"$GIT_CLANG_FORMAT" --style=file --extensions=cc,cp,cpp,c++,cxx,cu,cuh,hh,hpp,hxx,hip,vert,frag --diff "$@" "$SOURCE_COMMIT" > "$scratch" # Check for no-ops -grep '^no modified files to format$' <<<"$CLANG_FORMAT_OUTPUT" && exit 0 -grep '^clang-format did not modify any files$' <<<"$CLANG_FORMAT_OUTPUT" && exit 0 +grep '^no modified files to format$\|^clang-format did not modify any files$' \ + "$scratch" > /dev/null && exit 0 # Dump formatting diff and signal failure -echo -e "\n==== FORMATTING VIOLATIONS DETECTED ====\n" -echo "$CLANG_FORMAT_OUTPUT" +printf \ +"\033[31m==== FORMATTING VIOLATIONS DETECTED ====\033[0m +run '\033[33m%s --style=file %s %s\033[0m' to apply these formating changes\n\n" \ +"$GIT_CLANG_FORMAT" "$*" "$SOURCE_COMMIT" + +cat "$scratch" +exit 1 +#!/usr/bin/env bash + +SOURCE_COMMIT="$1" +if [ "$#" -gt 0 ]; then + shift +fi + +# If no source commit is given target the default branch +if [ "x$SOURCE_COMMIT" = "x" ]; then + # If remote is not set use the remote of the current branch or fallback to "origin" + if [ "x$REMOTE" = "x" ]; then + BRANCH="$(git rev-parse --abbrev-ref HEAD)" + REMOTE="$(git config --local --get "branch.$BRANCH.remote" || echo 'origin')" + fi + SOURCE_COMMIT="remotes/$REMOTE/HEAD" +fi + +# Force colored diff output +DIFF_COLOR_SAVED="$(git config --local --get color.diff)" +if [ "x$DIFF_COLOR_SAVED" != "x" ]; then + git config --local --replace-all "color.diff" "always" +else + git config --local --add "color.diff" "always" +fi + +scratch="$(mktemp -t check-format.XXXXXXXXXX)" +finish () { + # Remove temporary file + rm -rf "$scratch" + # Restore setting + if [ "x$DIFF_COLOR_SAVED" != "x" ]; then + git config --local --replace-all "color.diff" "$DIFF_COLOR_SAVED" + else + git config --local --unset "color.diff" + fi +} +# The trap will be invoked whenever the script exits, even due to a signal, this is a bash only +# feature +trap finish EXIT + +GIT_CLANG_FORMAT="${GIT_CLANG_FORMAT:-git-clang-format}" +"$GIT_CLANG_FORMAT" --style=file --extensions=cc,cp,cpp,c++,cxx,cu,cuh,hh,hpp,hxx,hip,vert,frag --diff "$@" "$SOURCE_COMMIT" > "$scratch" + +# Check for no-ops +grep '^no modified files to format$\|^clang-format did not modify any files$' \ + "$scratch" > /dev/null && exit 0 + +# Dump formatting diff and signal failure +printf \ +"\033[31m==== FORMATTING VIOLATIONS DETECTED ====\033[0m +run '\033[33m%s --style=file %s %s\033[0m' to apply these formating changes\n\n" \ +"$GIT_CLANG_FORMAT" "$*" "$SOURCE_COMMIT" + +cat "$scratch" exit 1 diff --git a/test/cmake/findmodule/CMakeLists.txt b/test/cmake/findmodule/CMakeLists.txt index 4fee1e80..6da08165 100644 --- a/test/cmake/findmodule/CMakeLists.txt +++ b/test/cmake/findmodule/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.0) +cmake_minimum_required(VERSION 3.16) project(FindModuleTest) diff --git a/test/cmake/pkgconfig/platformenum/CMakeLists.txt b/test/cmake/pkgconfig/platformenum/CMakeLists.txt index 926dccf3..84a6cb6b 100644 --- a/test/cmake/pkgconfig/platformenum/CMakeLists.txt +++ b/test/cmake/pkgconfig/platformenum/CMakeLists.txt @@ -1,7 +1,9 @@ -cmake_minimum_required(VERSION 3.0) +cmake_minimum_required(VERSION 3.16) project(PkgConfigTest-PlatformEnum) +include(CTest) + find_package(OpenCL REQUIRED CONFIG diff --git a/test/cmake/pkgconfig/useutil/CMakeLists.txt b/test/cmake/pkgconfig/useutil/CMakeLists.txt index db0134b2..245621ae 100644 --- a/test/cmake/pkgconfig/useutil/CMakeLists.txt +++ b/test/cmake/pkgconfig/useutil/CMakeLists.txt @@ -1,7 +1,10 @@ -cmake_minimum_required(VERSION 3.0) +cmake_minimum_required(VERSION 3.16) project(PkgConfigTest-UseUtil) +include(CTest) +include("${CMAKE_CURRENT_SOURCE_DIR}/../../../../cmake/Dependencies/whereami/whereami.cmake") + find_package(OpenCL REQUIRED CONFIG @@ -25,6 +28,7 @@ target_link_libraries(${PROJECT_NAME}_cpp OpenCL::HeadersCpp OpenCL::Headers OpenCL::OpenCL + whereami ) target_compile_definitions(${PROJECT_NAME}_cpp @@ -49,6 +53,7 @@ target_link_libraries(${PROJECT_NAME}_c OpenCL::Utils OpenCL::Headers OpenCL::OpenCL + whereami ) target_compile_definitions(${PROJECT_NAME}_c diff --git a/test/cmake/platformenum.c b/test/cmake/platformenum.c index 0c787caa..fe20e0fe 100644 --- a/test/cmake/platformenum.c +++ b/test/cmake/platformenum.c @@ -10,7 +10,7 @@ #define print(...) printf(__VA_ARGS__) #endif -int main() +int main(void) { cl_int CL_err = CL_SUCCESS; cl_uint numPlatforms = 0; diff --git a/test/cmake/platformenum.cpp b/test/cmake/platformenum.cpp index b91e630f..0c153b2c 100644 --- a/test/cmake/platformenum.cpp +++ b/test/cmake/platformenum.cpp @@ -5,7 +5,7 @@ #include // std::cout #include // EXIT_FAILURE -int main() +int main(void) { try { diff --git a/test/cmake/useutil.c b/test/cmake/useutil.c index 0c787caa..fe20e0fe 100644 --- a/test/cmake/useutil.c +++ b/test/cmake/useutil.c @@ -10,7 +10,7 @@ #define print(...) printf(__VA_ARGS__) #endif -int main() +int main(void) { cl_int CL_err = CL_SUCCESS; cl_uint numPlatforms = 0;