diff --git a/.github/workflows/continuous-integration-workflow-32bit.yml b/.github/workflows/continuous-integration-workflow-32bit.yml new file mode 100644 index 00000000000..7fab3b0e62f --- /dev/null +++ b/.github/workflows/continuous-integration-workflow-32bit.yml @@ -0,0 +1,37 @@ +name: github-Linux-32bit +on: [push, pull_request] + +concurrency: + group: ${ {github.event_name }}-${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: ${{github.event_name == 'pull_request'}} + +jobs: + CI-32bit: + name: Linux-32bit + runs-on: ubuntu-latest + container: + image: ghcr.io/kokkos/ci-containers/ubuntu:latest + steps: + - name: Checkout code + uses: actions/checkout@v3 + - name: install_multilib + run: sudo apt-get update && sudo apt-get install -y gcc-multilib g++-multilib gfortran-multilib + - name: Configure Kokkos + run: | + cmake -B builddir \ + -DKokkos_ENABLE_OPENMP=ON \ + -DKokkos_ENABLE_TESTS=ON \ + -DKokkos_ENABLE_BENCHMARKS=ON \ + -DKokkos_ENABLE_EXAMPLES=ON \ + -DKokkos_ENABLE_DEPRECATED_CODE_4=ON \ + -DKokkos_ENABLE_DEPRECATION_WARNINGS=OFF \ + -DKokkos_ENABLE_COMPILER_WARNINGS=ON \ + -DCMAKE_CXX_FLAGS="-Werror -m32 -DKOKKOS_IMPL_32BIT" \ + -DCMAKE_CXX_COMPILER=g++ \ + -DCMAKE_BUILD_TYPE=RelWithDebInfo + - name: Build + run: | + cmake --build builddir --parallel 2 + - name: Tests + working-directory: builddir + run: ctest --output-on-failure diff --git a/.github/workflows/continuous-integration-workflow-hpx.yml b/.github/workflows/continuous-integration-workflow-hpx.yml index 35bb5bb2cb2..0c7abd2fc1e 100644 --- a/.github/workflows/continuous-integration-workflow-hpx.yml +++ b/.github/workflows/continuous-integration-workflow-hpx.yml @@ -13,7 +13,7 @@ jobs: steps: - name: checkout code - uses: actions/checkout@v2.2.0 + uses: actions/checkout@v3 with: path: kokkos - name: setup hpx dependencies @@ -26,12 +26,12 @@ jobs: libboost-all-dev \ ninja-build - name: checkout hpx - uses: actions/checkout@v2.2.0 + uses: actions/checkout@v3 with: repository: STELLAR-GROUP/hpx - ref: 1.7.1 + ref: 1.8.0 path: hpx - - uses: actions/cache@v2 + - uses: actions/cache@v3 id: cache-hpx with: path: ./hpx/install @@ -69,12 +69,10 @@ jobs: -DCMAKE_CXX_COMPILER=clang++ \ -DCMAKE_CXX_FLAGS="-Werror" \ -DHPX_ROOT=$PWD/../../hpx/install \ - -DKokkos_ARCH_NATIVE=ON \ -DKokkos_ENABLE_COMPILER_WARNINGS=ON \ -DKokkos_ENABLE_DEPRECATED_CODE_4=OFF \ -DKokkos_ENABLE_EXAMPLES=ON \ -DKokkos_ENABLE_HPX=ON \ - -DKokkos_ENABLE_HPX_ASYNC_DISPATCH=ON \ -DKokkos_ENABLE_SERIAL=OFF \ -DKokkos_ENABLE_TESTS=ON \ .. diff --git a/.github/workflows/continuous-integration-workflow.yml b/.github/workflows/continuous-integration-workflow.yml index 55b88179486..741f6b27469 100644 --- a/.github/workflows/continuous-integration-workflow.yml +++ b/.github/workflows/continuous-integration-workflow.yml @@ -12,30 +12,31 @@ jobs: matrix: distro: ['fedora:latest', 'fedora:rawhide', 'ubuntu:latest'] cxx: ['g++', 'clang++'] + cxx_extra_flags: [''] cmake_build_type: ['Release', 'Debug'] backend: ['OPENMP'] clang-tidy: [''] include: - distro: 'fedora:intel' cxx: 'icpc' + cxx_extra_flags: '-diag-disable=177,10441' cmake_build_type: 'Release' backend: 'OPENMP' - clang-tidy: '' - distro: 'fedora:intel' cxx: 'icpc' + cxx_extra_flags: '-diag-disable=177,10441' cmake_build_type: 'Debug' backend: 'OPENMP' - clang-tidy: '' - distro: 'fedora:intel' cxx: 'icpx' + cxx_extra_flags: '-fp-model=precise -Wno-pass-failed' cmake_build_type: 'Release' backend: 'OPENMP' - clang-tidy: '' - distro: 'fedora:intel' cxx: 'icpx' + cxx_extra_flags: '-fp-model=precise -Wno-pass-failed' cmake_build_type: 'Debug' backend: 'OPENMP' - clang-tidy: '' - distro: 'ubuntu:latest' cxx: 'clang++' cmake_build_type: 'RelWithDebInfo' @@ -48,11 +49,9 @@ jobs: runs-on: ubuntu-latest container: image: ghcr.io/kokkos/ci-containers/${{ matrix.distro }} - # see https://github.com/actions/virtual-environments/issues/3812 - options: --security-opt seccomp=unconfined steps: - name: Checkout desul - uses: actions/checkout@v2.2.0 + uses: actions/checkout@v3 with: repository: desul/desul ref: 477da9c8f40f8db369c28dd3f93a67e376d8511b @@ -67,21 +66,17 @@ jobs: cmake -DDESUL_ENABLE_TESTS=OFF -DCMAKE_INSTALL_PREFIX=/usr/desul-install .. sudo cmake --build . --target install --parallel 2 - name: Checkout code - uses: actions/checkout@v2.2.0 - - uses: actions/cache@v2 + uses: actions/checkout@v3 + - uses: actions/cache@v3 with: - path: ~/.ccache - key: kokkos-${{ matrix.distro }}-${{ matrix.cxx }}-${{ matrix.cmake_build_type }}-${{ matrix.openmp }}-${github.ref}-${{ github.sha }} - restore-keys: kokkos-${{ matrix.distro }}-${{ matrix.cxx }}-${{ matrix.cmake_build_type }}-${{ matrix.openmp }}-${{github.ref}} + path: ~/.cache/ccache + key: kokkos-${{ matrix.distro }}-${{ matrix.cxx }}-${{ matrix.cmake_build_type }}-${{ matrix.openmp }}-${{ github.ref }}-${{ github.sha }} + restore-keys: kokkos-${{ matrix.distro }}-${{ matrix.cxx }}-${{ matrix.cmake_build_type }}-${{ matrix.openmp }}-${{ github.ref }} - name: maybe_disable_death_tests if: ${{ matrix.distro == 'fedora:rawhide' }} run: echo "GTEST_FILTER=-*DeathTest*" >> $GITHUB_ENV -# Re-enable when latest is F37+ -# - name: maybe_use_flang -# if: ${{ matrix.cxx == 'clang++' && startsWith(matrix.distro,'fedora:') }} -# run: echo "FC=flang" >> $GITHUB_ENV - name: maybe_use_flang_new - if: ${{ matrix.cxx == 'clang++' && startsWith(matrix.distro,'fedora:rawhide') }} + if: ${{ matrix.cxx == 'clang++' && startsWith(matrix.distro,'fedora:') }} run: echo "FC=flang-new" >> $GITHUB_ENV - name: maybe_use_external_gtest if: ${{ matrix.distro == 'ubuntu:latest' }} @@ -95,7 +90,6 @@ jobs: -DCMAKE_INSTALL_PREFIX=/usr \ ${{ matrix.clang-tidy }} \ -Ddesul_ROOT=/usr/desul-install/ \ - -DKokkos_ARCH_NATIVE=ON \ -DKokkos_ENABLE_DESUL_ATOMICS_EXTERNAL=ON \ -DKokkos_ENABLE_HWLOC=ON \ -DKokkos_ENABLE_${{ matrix.backend }}=ON \ @@ -104,7 +98,10 @@ jobs: -DKokkos_ENABLE_EXAMPLES=ON \ -DKokkos_ENABLE_DEPRECATED_CODE_4=ON \ -DKokkos_ENABLE_DEPRECATION_WARNINGS=OFF \ + -DKokkos_ENABLE_COMPILER_WARNINGS=ON \ + -DCMAKE_CXX_FLAGS="-Werror ${{ matrix.cxx_extra_flags }}" \ -DCMAKE_CXX_COMPILER=${{ matrix.cxx }} \ + -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ -DCMAKE_BUILD_TYPE=${{ matrix.cmake_build_type }} - name: Build run: | @@ -114,6 +111,12 @@ jobs: - name: Tests working-directory: builddir run: ctest --output-on-failure + - name: Test linking against build dir + working-directory: example/build_cmake_installed + run: | + cmake -B builddir_buildtree -DCMAKE_CXX_COMPILER=${{ matrix.cxx }} -DKokkos_ROOT=../../builddir + cmake --build builddir_buildtree + cmake --build builddir_buildtree --target test - name: Test DESTDIR Install run: DESTDIR=${PWD}/install cmake --build builddir --target install && rm -rf ${PWD}/install/usr && rmdir ${PWD}/install - name: Install diff --git a/.github/workflows/osx.yml b/.github/workflows/osx.yml index dae8343f20d..0ff32668488 100644 --- a/.github/workflows/osx.yml +++ b/.github/workflows/osx.yml @@ -24,14 +24,13 @@ jobs: cmake_build_type: "Release" steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: configure run: cmake -B build . -DKokkos_ENABLE_${{ matrix.backend }}=On -DCMAKE_CXX_FLAGS="-Werror" -DCMAKE_CXX_STANDARD=17 - -DKokkos_ARCH_NATIVE=ON -DKokkos_ENABLE_COMPILER_WARNINGS=ON -DKokkos_ENABLE_DEPRECATED_CODE_4=OFF -DKokkos_ENABLE_TESTS=On diff --git a/.github/workflows/performance-benchmark.yml b/.github/workflows/performance-benchmark.yml new file mode 100644 index 00000000000..205239e043d --- /dev/null +++ b/.github/workflows/performance-benchmark.yml @@ -0,0 +1,61 @@ +name: github-benchmarks +on: + push: + branches: + - develop + pull_request: + +jobs: + CI: + continue-on-error: true + strategy: + matrix: + distro: ['ubuntu:latest'] + cxx: ['g++', 'clang++'] + backend: ['OPENMP'] + runs-on: ubuntu-latest + container: + image: ghcr.io/kokkos/ci-containers/${{ matrix.distro }} + env: + BUILD_ID: ${{ matrix.distro }}-${{ matrix.cxx }}-${{ matrix.backend }} + steps: + - name: Checkout code + uses: actions/checkout@v3 + - uses: actions/cache@v3 + with: + path: ~/.cache/ccache + key: kokkos-${{ matrix.distro }}-${{ matrix.cxx }}-${{ matrix.backend }}-${{ github.ref }}-${{ github.sha }} + restore-keys: kokkos-${{ matrix.distro }}-${{ matrix.cxx }}-${{ matrix.backend }}-${{ github.ref }} + - name: Configure Kokkos + run: | + cmake -B builddir \ + -DKokkos_ENABLE_HWLOC=ON \ + -DKokkos_ENABLE_${{ matrix.backend }}=ON \ + -DKokkos_ENABLE_BENCHMARKS=ON \ + -DCMAKE_CXX_COMPILER=${{ matrix.cxx }} \ + -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ + -DCMAKE_BUILD_TYPE=Release + - name: Build + run: | + ccache -z + NUM_CPU=$(grep -c processor /proc/cpuinfo) + cmake --build builddir --parallel ${NUM_CPU} + ccache -s + - name: Tests + working-directory: builddir + run: ctest --output-on-failure + - name: Gather benchmark results + run: | + mkdir ${{ env.BUILD_ID }} + find builddir/core/perf_test/ -name "*.json" -exec mv {} ${{ env.BUILD_ID }}/ \; + - name: Push benchmark results + if: ${{ github.ref == 'refs/heads/develop' }} + uses: dmnemec/copy_file_to_another_repo_action@main + env: + API_TOKEN_GITHUB: ${{ secrets.DALG24_PUSH_BENCHMARK_RESULTS }} + with: + source_file: ${{ env.BUILD_ID }} + destination_repo: 'kokkos/kokkos-benchmark-results' + destination_branch: 'main' + user_email: 'kokkos@users.noreply.github.com' + user_name: 'Kokkos Developers' diff --git a/.jenkins b/.jenkins index 1775a57d3b2..c7d8ce533d0 100644 --- a/.jenkins +++ b/.jenkins @@ -17,7 +17,7 @@ pipeline { dockerfile { filename 'Dockerfile.clang' dir 'scripts/docker' - label 'nvidia-docker || docker' + label 'nvidia-docker || rocm-docker || docker' args '-v /tmp/ccache.kokkos:/tmp/ccache' } } @@ -101,12 +101,14 @@ pipeline { } steps { sh 'ccache --zero-stats' - sh '''rm -rf build && mkdir -p build && cd build && \ + sh '''. /opt/intel/oneapi/setvars.sh --include-intel-llvm && \ + rm -rf build && mkdir -p build && cd build && \ cmake \ -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ - -DCMAKE_CXX_COMPILER=clang++ \ - -DCMAKE_CXX_FLAGS="-fsycl-device-code-split=per_kernel -Werror -Wno-gnu-zero-variadic-macro-arguments -Wno-linker-warnings" \ + -DCMAKE_CXX_COMPILER=/opt/intel/oneapi/compiler/2023.0.0/linux/bin-llvm/clang++ \ + -DCMAKE_CXX_FLAGS="-fsycl-device-code-split=per_kernel -Wno-deprecated-declarations -Werror -Wno-gnu-zero-variadic-macro-arguments -Wno-unknown-cuda-version -Wno-sycl-target" \ + -DKOKKOS_IMPL_SYCL_DEVICE_GLOBAL_SUPPORTED=0 \ -DKokkos_ARCH_NATIVE=ON \ -DKokkos_ARCH_VOLTA70=ON \ -DKokkos_ENABLE_COMPILER_WARNINGS=ON \ @@ -343,7 +345,7 @@ pipeline { --with-cuda \ --with-cuda-options=enable_lambda \ --arch=Volta70 \ - .. && \ + && \ make test -j8''' } post { @@ -487,6 +489,7 @@ pipeline { -DCMAKE_CXX_FLAGS=-Werror \ -DKokkos_ARCH_NATIVE=ON \ -DKokkos_ENABLE_COMPILER_WARNINGS=ON \ + -DKokkos_ENABLE_DEPRECATED_CODE_3=ON \ -DKokkos_ENABLE_DEPRECATED_CODE_4=ON \ -DKokkos_ENABLE_DEPRECATION_WARNINGS=OFF \ -DKokkos_ENABLE_TESTS=ON \ diff --git a/.jenkins_nightly b/.jenkins_nightly new file mode 100644 index 00000000000..8bcdb75a2a5 --- /dev/null +++ b/.jenkins_nightly @@ -0,0 +1,76 @@ +pipeline { + agent none + + options { + timeout(time: 6, unit: 'HOURS') + } + + stages { + stage('Build') { + parallel { + stage('spack-serial') { + agent { + docker { + image 'ubuntu:22.04' + label 'docker' + } + } + steps { + sh ''' + DEBIAN_FRONTEND=noninteractive && \ + apt-get update && apt-get upgrade -y && apt-get install -y \ + build-essential \ + wget \ + git \ + bc \ + python3-dev \ + && \ + apt-get clean && rm -rf /var/lib/apt/lists/* + + rm -rf spack && \ + git clone https://github.com/spack/spack.git && \ + . ./spack/share/spack/setup-env.sh && \ + spack install kokkos@develop+tests && \ + spack load cmake && \ + spack test run kokkos && \ + spack test results -l + ''' + } + } + stage('spack-cuda') { + agent { + docker { + image 'nvidia/cuda:12.1.0-devel-ubuntu22.04' + label 'nvidia-docker && ampere' + } + } + steps { + sh ''' + DEBIAN_FRONTEND=noninteractive && \ + apt-get update && apt-get upgrade -y && apt-get install -y \ + build-essential \ + wget \ + git \ + bc \ + python3-dev \ + gfortran \ + && \ + apt-get clean && rm -rf /var/lib/apt/lists/* + + rm -rf spack && \ + git clone https://github.com/spack/spack.git && \ + . ./spack/share/spack/setup-env.sh && \ + spack install kokkos@develop+cuda+wrapper+tests cuda_arch=80 ^cuda@12.1.0 && \ + spack load cmake && \ + spack load kokkos-nvcc-wrapper && \ + spack load cuda && \ + spack load kokkos && \ + spack test run kokkos && \ + spack test results -l + ''' + } + } + } + } + } +} diff --git a/BUILD.md b/BUILD.md index b0d603e6db0..f80320e78b1 100644 --- a/BUILD.md +++ b/BUILD.md @@ -111,247 +111,4 @@ For dev-build details, consult the kokkos-spack repository [README](https://gith # Kokkos Keyword Listing -## Device Backends -Device backends can be enabled by specifying `-DKokkos_ENABLE_X`. - -* Kokkos_ENABLE_CUDA - * Whether to build CUDA backend - * BOOL Default: OFF -* Kokkos_ENABLE_HPX - * Whether to build HPX backend (experimental) - * BOOL Default: OFF -* Kokkos_ENABLE_OPENMP - * Whether to build OpenMP backend - * BOOL Default: OFF -* Kokkos_ENABLE_THREADS - * Whether to build C++ thread backend - * BOOL Default: OFF -* Kokkos_ENABLE_SERIAL - * Whether to build serial backend - * BOOL Default: ON -* Kokkos_ENABLE_HIP (Experimental) - * Whether to build HIP backend - * BOOL Default: OFF -* Kokkos_ENABLE_OPENMPTARGET (Experimental) - * Whether to build the OpenMP target backend - * BOOL Default: OFF - -## Enable Options -Options can be enabled by specifying `-DKokkos_ENABLE_X`. - -* Kokkos_ENABLE_AGGRESSIVE_VECTORIZATION - * Whether to aggressively vectorize loops - * BOOL Default: OFF -* Kokkos_ENABLE_COMPILER_WARNINGS - * Whether to print all compiler warnings - * BOOL Default: OFF -* Kokkos_ENABLE_CUDA_CONSTEXPR - * Whether to activate experimental relaxed constexpr functions - * BOOL Default: OFF -* Kokkos_ENABLE_CUDA_LAMBDA - * Whether to activate experimental lambda features - * BOOL Default: OFF -* Kokkos_ENABLE_CUDA_LDG_INTRINSIC - * Deprecated since 4.0, LDG intrinsics are always enabled. - * Whether to use CUDA LDG intrinsics - * BOOL Default: OFF -* Kokkos_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE - * Whether to enable relocatable device code (RDC) for CUDA - * BOOL Default: OFF -* Kokkos_ENABLE_CUDA_UVM - * Deprecated since 4.0 - * Whether to use unified memory (UM) by default for CUDA - * BOOL Default: OFF -* Kokkos_ENABLE_DEBUG - * Whether to activate extra debug features - may increase compile times - * BOOL Default: OFF -* Kokkos_ENABLE_DEBUG_BOUNDS_CHECK - * Whether to use bounds checking - will increase runtime - * BOOL Default: OFF -* Kokkos_ENABLE_DEBUG_DUALVIEW_MODIFY_CHECK - * Debug check on dual views - * BOOL Default: OFF -* Kokkos_ENABLE_EXAMPLES - * Whether to enable building examples - * BOOL Default: OFF -* Kokkos_ENABLE_HPX_ASYNC_DISPATCH - * Whether HPX supports asynchronous dispatch - * BOOL Default: OFF -* Kokkos_ENABLE_IMPL_CUDA_MALLOC_ASYNC - * Whether to enable CudaMallocAsync (requires CUDA Toolkit 11.2). This is an experimental performance feature and currently has issue when using with UCX. See https://github.com/kokkos/kokkos/issues/4228 for more details. - * BOOL Default: OFF -* Kokkos_ENABLE_LARGE_MEM_TESTS - * Whether to perform extra large memory tests - * BOOL_Default: OFF -* Kokkos_ENABLE_PROFILING_LOAD_PRINT - * Whether to print information about which profiling tools gotloaded - * BOOL Default: OFF -* Kokkos_ENABLE_TESTS - * Whether to enable test suite - * BOOL Default: OFF - - -## Third-party Libraries (TPLs) -The following options control enabling TPLs: -* Kokkos_ENABLE_HPX - * Whether to enable the HPX library - * BOOL Default: OFF -* Kokkos_ENABLE_HWLOC - * Whether to enable the HWLOC library - * BOOL Default: Off -* Kokkos_ENABLE_LIBNUMA - * Whether to enable the LIBNUMA library - * BOOL Default: Off -* Kokkos_ENABLE_MEMKIND - * Whether to enable the MEMKIND library - * BOOL Default: Off -* Kokkos_ENABLE_LIBDL - * Whether to enable the LIBDL library - * BOOL Default: On -* Kokkos_ENABLE_LIBRT - * Whether to enable the LIBRT library - * BOOL Default: Off - -The following options control finding and configuring non-CMake TPLs: -* Kokkos_CUDA_DIR or CUDA_ROOT - * Location of CUDA install prefix for libraries - * PATH Default: -* Kokkos_HWLOC_DIR or HWLOC_ROOT - * Location of HWLOC install prefix - * PATH Default: -* Kokkos_LIBNUMA_DIR or LIBNUMA_ROOT - * Location of LIBNUMA install prefix - * PATH Default: -* Kokkos_MEMKIND_DIR or MEMKIND_ROOT - * Location of MEMKIND install prefix - * PATH Default: -* Kokkos_LIBDL_DIR or LIBDL_ROOT - * Location of LIBDL install prefix - * PATH Default: -* Kokkos_LIBRT_DIR or LIBRT_ROOT - * Location of LIBRT install prefix - * PATH Default: - -The following options control `find_package` paths for CMake-based TPLs: -* HPX_DIR or HPX_ROOT - * Location of HPX prefix (ROOT) or CMake config file (DIR) - * PATH Default: - -## Architecture Keywords -Architecture-specific optimizations can be enabled by specifying `-DKokkos_ARCH_X`. - -* Kokkos_ARCH_NATIVE - * Whether to optimize for the the local CPU architecture - * BOOL Default: OFF -* Kokkos_ARCH_AMDAVX - * Whether to optimize for the AMDAVX architecture - * BOOL Default: OFF -* Kokkos_ARCH_ARMV80 - * Whether to optimize for the ARMV80 architecture - * BOOL Default: OFF -* Kokkos_ARCH_ARMV81 - * Whether to optimize for the ARMV81 architecture - * BOOL Default: OFF -* Kokkos_ARCH_ARMV8_THUNDERX - * Whether to optimize for the ARMV8_THUNDERX architecture - * BOOL Default: OFF -* Kokkos_ARCH_ARMV8_TX2 - * Whether to optimize for the ARMV8_TX2 architecture - * BOOL Default: OFF -* Kokkos_ARCH_BDW - * Whether to optimize for the BDW architecture - * BOOL Default: OFF -* Kokkos_ARCH_BGQ - * Whether to optimize for the BGQ architecture - * BOOL Default: OFF -* Kokkos_ARCH_ZEN - * Whether to optimize for the Zen architecture - * BOOL Default: OFF -* Kokkos_ARCH_ZEN2 - * Whether to optimize for the Zen2 architecture - * BOOL Default: OFF -* Kokkos_ARCH_ZEN3 - * Whether to optimize for the Zen3 architecture - * BOOL Default: OFF -* Kokkos_ARCH_HSW - * Whether to optimize for the HSW architecture - * BOOL Default: OFF -* Kokkos_ARCH_KEPLER30 - * Whether to optimize for the KEPLER30 architecture - * BOOL Default: OFF -* Kokkos_ARCH_KEPLER32 - * Whether to optimize for the KEPLER32 architecture - * BOOL Default: OFF -* Kokkos_ARCH_KEPLER35 - * Whether to optimize for the KEPLER35 architecture - * BOOL Default: OFF -* Kokkos_ARCH_KEPLER37 - * Whether to optimize for the KEPLER37 architecture - * BOOL Default: OFF -* Kokkos_ARCH_KNC - * Whether to optimize for the KNC architecture - * BOOL Default: OFF -* Kokkos_ARCH_KNL - * Whether to optimize for the KNL architecture - * BOOL Default: OFF -* Kokkos_ARCH_MAXWELL50 - * Whether to optimize for the MAXWELL50 architecture - * BOOL Default: OFF -* Kokkos_ARCH_MAXWELL52 - * Whether to optimize for the MAXWELL52 architecture - * BOOL Default: OFF -* Kokkos_ARCH_MAXWELL53 - * Whether to optimize for the MAXWELL53 architecture - * BOOL Default: OFF -* Kokkos_ARCH_PASCAL60 - * Whether to optimize for the PASCAL60 architecture - * BOOL Default: OFF -* Kokkos_ARCH_PASCAL61 - * Whether to optimize for the PASCAL61 architecture - * BOOL Default: OFF -* Kokkos_ARCH_POWER7 - * Whether to optimize for the POWER7 architecture - * BOOL Default: OFF -* Kokkos_ARCH_POWER8 - * Whether to optimize for the POWER8 architecture - * BOOL Default: OFF -* Kokkos_ARCH_POWER9 - * Whether to optimize for the POWER9 architecture - * BOOL Default: OFF -* Kokkos_ARCH_ICL - * Whether to optimize for the ICL architecture - * BOOL Default: OFF -* Kokkos_ARCH_ICX - * Whether to optimize for the ICX architecture - * BOOL Default: OFF -* Kokkos_ARCH_SKL - * Whether to optimize for the SKL architecture - * BOOL Default: OFF -* Kokkos_ARCH_SKX - * Whether to optimize for the SKX architecture - * BOOL Default: OFF -* Kokkos_ARCH_SNB - * Whether to optimize for the SNB architecture - * BOOL Default: OFF -* Kokkos_ARCH_SPR - * Whether to optimize for the SPR architecture - * BOOL Default: OFF -* Kokkos_ARCH_TURING75 - * Whether to optimize for the TURING75 architecture - * BOOL Default: OFF -* Kokkos_ARCH_VOLTA70 - * Whether to optimize for the VOLTA70 architecture - * BOOL Default: OFF -* Kokkos_ARCH_VOLTA72 - * Whether to optimize for the VOLTA72 architecture - * BOOL Default: OFF -* Kokkos_ARCH_WSM - * Whether to optimize for the WSM architecture - * BOOL Default: OFF - -##### [LICENSE](https://github.com/kokkos/kokkos/blob/devel/LICENSE) - -[![License](https://img.shields.io/badge/License-BSD%203--Clause-blue.svg)](https://opensource.org/licenses/BSD-3-Clause) - -Under the terms of Contract DE-NA0003525 with NTESS, -the U.S. Government retains certain rights in this software. +Please refer to our [wiki](https://kokkos.github.io/kokkos-core-wiki/keywords.html#cmake-keywords). diff --git a/CHANGELOG.md b/CHANGELOG.md index a381f16129f..4c145c44b38 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,88 @@ -# Change Log +# CHANGELOG + +## [4.1.00](https://github.com/kokkos/kokkos/tree/4.0.01) (2023-06-16) +[Full Changelog](https://github.com/kokkos/kokkos/compare/4.0.01...4.1.00) + +### Features: +* Add `` header [\#4577](https://github.com/kokkos/kokkos/pull/4577) [\#5907](https://github.com/kokkos/kokkos/pull/5907) [\#5967](https://github.com/kokkos/kokkos/pull/5967) [\#6101](https://github.com/kokkos/kokkos/pull/6101) +* Add `UnorderedMapInsertOpTypes` [\#5877](https://github.com/kokkos/kokkos/pull/5877) and documentation [\#350](https://github.com/kokkos/kokkos-core-wiki/pull/350) +* Add multiple reducers support for team-level parallel reduce [\#5727](https://github.com/kokkos/kokkos/pull/5727) + +### Backend and Architecture Enhancements: + +#### CUDA: + +* Allow NVCC 12 to compile using C++20 flag [\#5977](https://github.com/kokkos/kokkos/pull/5977) +* Remove ability to disable CMake option `Kokkos_ENABLE_CUDA_LAMBDA` and unconditionally enable CUDA extended lambda support. [\#5964](https://github.com/kokkos/kokkos/pull/5964) +* Drop unnecessary fences around the memory allocation when using `CudaUVMSpace` in views [\#6008](https://github.com/kokkos/kokkos/pull/6008) + +#### HIP: +* Improve performance for `parallel_reduce`. Use different parameters for `LightWeight` kernels [\#6029](https://github.com/kokkos/kokkos/pull/6029) and [\#6160](https://github.com/kokkos/kokkos/pull/6160) + +#### SYCL: +* Only pass one wrapper object in SYCL reductions [\#6047](https://github.com/kokkos/kokkos/pull/6047) +* Improve and simplify parallel_scan implementation [\#6064](https://github.com/kokkos/kokkos/pull/6064) +* Remove workaround for submit_barrier not being enqueued properly [\#5504](https://github.com/kokkos/kokkos/pull/5504) +* Fix guards for using scratch space with SYCL [\#6003](https://github.com/kokkos/kokkos/pull/6003) +* Fix compiling SYCL with KOKKOS_IMPL_DO_NOT_USE_PRINTF_USAGE [\#6219](https://github.com/kokkos/kokkos/pull/6219) + +#### OpenMPTarget: +* Improve hierarchical parallelism for Intel architectures [\#6043](https://github.com/kokkos/kokkos/pull/6043) +* Enable Cray compiler for the OpenMPTarget backend. [\#5889](https://github.com/kokkos/kokkos/pull/5889) + +#### HPX: +* Update HPX backend to use HPX's sender/receiver functionality [\#5628](https://github.com/kokkos/kokkos/pull/5628) +* Increase minimum required HPX version to 1.8.0 [\#6132](https://github.com/kokkos/kokkos/pull/6132) +* Implement HPX::in_parallel [\#6143](https://github.com/kokkos/kokkos/pull/6143) + +### General Enhancements +* Export CMake `Kokkos_{CUDA,HIP}_ARCHITECTURES` variables [\#5919](https://github.com/kokkos/kokkos/pull/5919) [\#5925](https://github.com/kokkos/kokkos/pull/5925) +* Add `Kokkos::Profiling::ScopedRegion` [\#5959](https://github.com/kokkos/kokkos/pull/5959) [\#5972](https://github.com/kokkos/kokkos/pull/5972) +* Add support for `View::rank[_dynamic]()`[\#5870](https://github.com/kokkos/kokkos/pull/5870) +* Detect incompatible relocatable device code mode to prevent ODR violations [\#5991](https://github.com/kokkos/kokkos/pull/5991) +* Add (experimental) support for 32-bit Darwin and PPC [\#5916](https://github.com/kokkos/kokkos/pull/5916) +* Add missing half and bhalf specialization of the infinity numeric trait [\#6055](https://github.com/kokkos/kokkos/pull/6055) +* Add `is_dual_view` trait and align further with regular view [\#6120](https://github.com/kokkos/kokkos/pull/6120) +* Allow templated functors in parallel_for, parallel_reduce and parallel_scan [\#5976](https://github.com/kokkos/kokkos/pull/5976) +* Define KOKKOS_COMPILER_INTEL_LLVM and only define at most one KOKKOS_COMPILER* macro [\#5906](https://github.com/kokkos/kokkos/pull/5906) +* Allow linking against build tree [\#6078](https://github.com/kokkos/kokkos/pull/6078) +* Allow passing a temporary std::vector to partition_space [\#6167](https://github.com/kokkos/kokkos/pull/6167) +* `Kokkos` can be used as an external dependency in `Trilinos` [\#6142](https://github.com/kokkos/kokkos/pull/6142), [\#6157](https://github.com/kokkos/kokkos/pull/6157) [\#6163](https://github.com/kokkos/kokkos/pull/6163) +* Left align demangled stacktrace output [\#6191](https://github.com/kokkos/kokkos/pull/6191) +* Improve OpenMP affinity warning to include MPI concerns [\#6185](https://github.com/kokkos/kokkos/pull/6185) + +### Build System Changes +* Drop `Kokkos_ENABLE_LAUNCH_COMPILER` option which had no effect [\#6148](https://github.com/kokkos/kokkos/pull/6148) +* Export variables for relevant Kokkos options with cmake[\#6142](https://github.com/kokkos/kokkos/pull/6142) + +### Incompatibilities (i.e. breaking changes) +* Desul atomics always enabled [\#5801](https://github.com/kokkos/kokkos/pull/5801) +* Drop `KOKKOS_ENABLE_CUDA_ASM*` and `KOKKOS_ENABLE_*_ATOMICS` macros [\#5940](https://github.com/kokkos/kokkos/pull/5940) +* Drop `KOKKOS_ENABLE_RFO_PREFETCH` macro [\#5944](https://github.com/kokkos/kokkos/pull/5944) +* Deprecate `Kokkos_ENABLE_CUDA_LAMBDA` configuration option and force it to `ON` [\#5964](https://github.com/kokkos/kokkos/pull/5964) +* Remove TriBITS Kokkos subpackages [\#6104](https://github.com/kokkos/kokkos/pull/6104) +* Cuda: Remove unused attach_texture_object [\#6129](https://github.com/kokkos/kokkos/pull/6129) +* Drop Kokkos_ENABLE_PROFILING_LOAD_PRINT configuration option [\#6150](https://github.com/kokkos/kokkos/pull/6150) +* Drop pointless Kokkos{Algorithms,Containers}_config.h files [\#6108](https://github.com/kokkos/kokkos/pull/6108) + +### Deprecations +* Deprecate `BinSort`, `BinOp1D`, and `BinOp3D` default constructors [\#6131](https://github.com/kokkos/kokkos/pull/6131) + +### Bug Fixes +* Fix `SYCLTeamMember` to take arguments for scratch sizes as `std::size_t` [\#5981](https://github.com/kokkos/kokkos/pull/5981) +* Fix Kokkos_SIMD with AVX2 on 64-bit architectures [\#6075](https://github.com/kokkos/kokkos/pull/6075) +* Fix an incorrectly returning size for SIMD uint64_t in AVX2 [\#6004](https://github.com/kokkos/kokkos/pull/6004) +* Fix missing avx512 header file with gcc versions before 10 [\#6183](https://github.com/kokkos/kokkos/pull/6183) +* Fix incorrect results of `parallel_reduce` of types smaller than `int` on CUDA and HIP: [\#5745](https://github.com/kokkos/kokkos/pull/5745) +* CMake: update package compatibility mode when building within Trilinos [\#6012](https://github.com/kokkos/kokkos/pull/6012) +* Fix warnings generated from internal uses of `ALL_t` rather than `Kokkos::ALL_t` [\#6028](https://github.com/kokkos/kokkos/pull/6028) +* Fix bug in `hpcbind` script: check for correct Slurm variable [\#6116](https://github.com/kokkos/kokkos/pull/6116) +* KokkosTools: Don't call callbacks before backends are initialized [\#6114](https://github.com/kokkos/kokkos/pull/6114) +* Fix global fence in Kokkos::resize(DynRankView) [\#6184](https://github.com/kokkos/kokkos/pull/6184) +* Fix `BinSort` support for strided views [\#6081](https://github.com/kokkos/kokkos/pull/6184) +* Fix missing `is_*_view` traits in containers [\#6195](https://github.com/kokkos/kokkos/pull/6195) +* Fix broken OpenMP target on NVHPC [\#6171](https://github.com/kokkos/kokkos/pull/6171) +* Sorting an empty view should exit early and not fail [\#6130](https://github.com/kokkos/kokkos/pull/6130) ## [4.0.01](https://github.com/kokkos/kokkos/tree/4.0.01) (2023-04-14) [Full Changelog](https://github.com/kokkos/kokkos/compare/4.0.00...4.0.01) @@ -29,8 +113,9 @@ - Fix an incorrectly returning size for SIMD uint64_t in AVX2 [\#6011](https://github.com/kokkos/kokkos/pull/6011) - Desul atomics: wrong value for `desul::Impl::numeric_limits_max` [\#6018](https://github.com/kokkos/kokkos/pull/6018) - Fix warning in some user code when using std::memcpy [\#6000](https://github.com/kokkos/kokkos/pull/6000) +- Fix excessive build times using Makefile.kokkos [\#6068](https://github.com/kokkos/kokkos/pull/6068) -## [4.0.0](https://github.com/kokkos/kokkos/tree/4.0.0) (2023-02-21) +## [4.0.0](https://github.com/kokkos/kokkos/tree/4.0.00) (2023-02-21) [Full Changelog](https://github.com/kokkos/kokkos/compare/3.7.01...4.0.00) ### Features: @@ -38,6 +123,7 @@ - `parallel_scan` with `View` as result type. [\#5146](https://github.com/kokkos/kokkos/pull/5146) - Introduced `SharedSpace`, an alias for a `MemorySpace` that is accessible by every `ExecutionSpace`. The memory is moved and then accessed locally. [\#5289](https://github.com/kokkos/kokkos/pull/5289) - Introduced `SharedHostPinnedSpace`, an alias for a `MemorySpace` that is accessible by every `ExecutionSpace`. The memory is pinned to the host and accessed via zero-copy access. [\#5405](https://github.com/kokkos/kokkos/pull/5405) +- Add team- and thread-level `sort`, `sort_by_key` algorithms. [\#5317](https://github.com/kokkos/kokkos/pull/5317) - Groundwork for `MDSpan` integration. [\#4973](https://github.com/kokkos/kokkos/pull/4973) and [\#5304](https://github.com/kokkos/kokkos/pull/5304) - Introduced MD version of hierarchical parallelism: `TeamThreadMDRange`, `ThreadVectorMDRange` and `TeamVectorMDRange`. [\#5238](https://github.com/kokkos/kokkos/pull/5238) @@ -121,7 +207,27 @@ - Add missing `ReductionIdentity` specialization [\#5798](https://github.com/kokkos/kokkos/pull/5798) - Don't install standard algorithms headers multiple times [\#5670](https://github.com/kokkos/kokkos/pull/5670) - Fix max scratch size calculation for level 0 scratch in CUDA and HIP [\#5718](https://github.com/kokkos/kokkos/pull/5718) -- Fix excessive build times using Makefile.kokkos [\#6068](https://github.com/kokkos/kokkos/pull/6068) + +## [3.7.02](https://github.com/kokkos/kokkos/tree/3.7.02) (2023-05-17) +[Full Changelog](https://github.com/kokkos/kokkos/compare/3.7.01...3.7.02) + +### Backends and Archs Enhancements: +#### CUDA +- Add Hopper support and update nvcc_wrapper to work with CUDA-12 [\#5693](https://github.com/kokkos/kokkos/pull/5693) +### General Enhancements: +- sprintf -> snprintf [\#5787](https://github.com/kokkos/kokkos/pull/5787) +### Build System: +- Add error message when not using `hipcc` and when `CMAKE_CXX_STANDARD` is not set [\#5945](https://github.com/kokkos/kokkos/pull/5945) +### Bug Fixes: +- Fix Scratch allocation alignment issues [\#5692](https://github.com/kokkos/kokkos/pull/5692) +- Fix Intel Classic Compiler ICE [\#5710](https://github.com/kokkos/kokkos/pull/5710) +- Don't install std algorithm headers multiple times [\#5711](https://github.com/kokkos/kokkos/pull/5711) +- Fix static init order issue in InitalizationSettings [\#5721](https://github.com/kokkos/kokkos/pull/5721) +- Fix src/dst Properties in deep_copy(DynamicView,View) [\#5732](https://github.com/kokkos/kokkos/pull/5732) +- Fix build on Fedora Rawhide [\#5782](https://github.com/kokkos/kokkos/pull/5782) +- Finalize HIP lock arrays [\#5694](https://github.com/kokkos/kokkos/pull/5694) +- Fix CUDA lock arrays for current Desul [\#5812](https://github.com/kokkos/kokkos/pull/5812) +- Set the correct device/context in InterOp tests [\#5701](https://github.com/kokkos/kokkos/pull/5701) ## [3.7.01](https://github.com/kokkos/kokkos/tree/3.7.01) (2022-12-01) [Full Changelog](https://github.com/kokkos/kokkos/compare/3.7.00...3.7.01) diff --git a/CMakeLists.txt b/CMakeLists.txt index aa712f56127..895cee6a089 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -5,8 +5,8 @@ if( "${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_BINARY_DIR}" ) message( FATAL_ERROR "FATAL: In-source builds are not allowed. You should create a separate directory for build files and delete CMakeCache.txt." ) endif() -if (COMMAND TRIBITS_PACKAGE_DECL) - TRIBITS_PACKAGE_DECL(Kokkos) +if (COMMAND TRIBITS_PACKAGE) + TRIBITS_PACKAGE(Kokkos) endif() # We want to determine if options are given with the wrong case @@ -37,6 +37,8 @@ IF(COMMAND TRIBITS_PACKAGE_DECL) SET(KOKKOS_HAS_TRILINOS ON) ELSE() SET(KOKKOS_HAS_TRILINOS OFF) + SET(PACKAGE_NAME Kokkos) + SET(PACKAGE_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}") ENDIF() # Is this build a subdirectory of another project GET_DIRECTORY_PROPERTY(HAS_PARENT PARENT_DIRECTORY) @@ -138,14 +140,20 @@ IF (NOT CMAKE_SIZEOF_VOID_P) MESSAGE(FATAL_ERROR "Kokkos did not configure correctly and failed to validate compiler. The most likely cause is linkage errors during CMake compiler validation. Please consult the CMake error log shown below for the exact error during compiler validation") ENDIF() ELSEIF (NOT CMAKE_SIZEOF_VOID_P EQUAL 8) - MESSAGE(FATAL_ERROR "Kokkos assumes a 64-bit build; i.e., 8-byte pointers, but found ${CMAKE_SIZEOF_VOID_P}-byte pointers instead") + IF(CMAKE_SIZEOF_VOID_P EQUAL 4) + MESSAGE(WARNING "32-bit builds are experimental and not officially supported.") + SET(KOKKOS_IMPL_32BIT ON) + ELSE() + MESSAGE(FATAL_ERROR "Kokkos assumes a 64-bit build, i.e., 8-byte pointers, but found ${CMAKE_SIZEOF_VOID_P}-byte pointers instead;") + ENDIF() ENDIF() set(Kokkos_VERSION_MAJOR 4) -set(Kokkos_VERSION_MINOR 0) -set(Kokkos_VERSION_PATCH 1) +set(Kokkos_VERSION_MINOR 1) +set(Kokkos_VERSION_PATCH 00) set(Kokkos_VERSION "${Kokkos_VERSION_MAJOR}.${Kokkos_VERSION_MINOR}.${Kokkos_VERSION_PATCH}") +message(STATUS "Kokkos version: ${Kokkos_VERSION}") math(EXPR KOKKOS_VERSION "${Kokkos_VERSION_MAJOR} * 10000 + ${Kokkos_VERSION_MINOR} * 100 + ${Kokkos_VERSION_PATCH}") # mathematical expressions below are not stricly necessary but they eliminate # the rather aggravating leading 0 in the releases patch version number, and, @@ -293,10 +301,6 @@ IF (KOKKOS_HAS_TRILINOS) $<$:${KOKKOS_ALL_COMPILE_OPTIONS}>) ENDIF() -if (NOT COMMAND TRIBITS_PACKAGE_DECL) - KOKKOS_PACKAGE_DECL() -endif() - #------------------------------------------------------------------------------ # @@ -310,7 +314,6 @@ KOKKOS_PROCESS_SUBPACKAGES() # E) If Kokkos itself is enabled, process the Kokkos package # -KOKKOS_PACKAGE_DEF() KOKKOS_EXCLUDE_AUTOTOOLS_FILES() KOKKOS_PACKAGE_POSTPROCESS() KOKKOS_CONFIGURE_CORE() @@ -320,6 +323,8 @@ IF (NOT KOKKOS_HAS_TRILINOS AND NOT Kokkos_INSTALL_TESTING) #Make sure in-tree projects can reference this as Kokkos:: #to match the installed target names ADD_LIBRARY(Kokkos::kokkos ALIAS kokkos) + # all_libs target is required for TriBITS-compliance + ADD_LIBRARY(Kokkos::all_libs ALIAS kokkos) TARGET_LINK_LIBRARIES(kokkos INTERFACE ${KOKKOS_COMPONENT_LIBRARIES}) KOKKOS_INTERNAL_ADD_LIBRARY_INSTALL(kokkos) ENDIF() diff --git a/Makefile.kokkos b/Makefile.kokkos index 60cef6c7f30..9436b75b9e4 100644 --- a/Makefile.kokkos +++ b/Makefile.kokkos @@ -1,8 +1,8 @@ # Default settings common options. KOKKOS_VERSION_MAJOR = 4 -KOKKOS_VERSION_MINOR = 0 -KOKKOS_VERSION_PATCH = 1 +KOKKOS_VERSION_MINOR = 1 +KOKKOS_VERSION_PATCH = 00 KOKKOS_VERSION = $(shell echo $(KOKKOS_VERSION_MAJOR)*10000+$(KOKKOS_VERSION_MINOR)*100+$(KOKKOS_VERSION_PATCH) | bc) # Options: Cuda,HIP,SYCL,OpenMPTarget,OpenMP,Threads,Serial @@ -23,7 +23,7 @@ KOKKOS_DEBUG ?= "no" KOKKOS_USE_TPLS ?= "" # Options: c++17,c++1z,c++20,c++2a,c++23,c++2b KOKKOS_CXX_STANDARD ?= "c++17" -# Options: aggressive_vectorization,disable_profiling,enable_large_mem_tests,disable_complex_align,disable_deprecated_code,enable_deprecation_warnings,disable_desul_atomics +# Options: aggressive_vectorization,disable_profiling,enable_large_mem_tests,disable_complex_align,disable_deprecated_code,enable_deprecation_warnings KOKKOS_OPTIONS ?= "" KOKKOS_CMAKE ?= "no" KOKKOS_TRIBITS ?= "no" @@ -75,7 +75,6 @@ KOKKOS_INTERNAL_AGGRESSIVE_VECTORIZATION := $(call kokkos_has_string,$(KOKKOS_OP KOKKOS_INTERNAL_ENABLE_TUNING := $(call kokkos_has_string,$(KOKKOS_OPTIONS),enable_tuning) KOKKOS_INTERNAL_DISABLE_COMPLEX_ALIGN := $(call kokkos_has_string,$(KOKKOS_OPTIONS),disable_complex_align) KOKKOS_INTERNAL_DISABLE_DUALVIEW_MODIFY_CHECK := $(call kokkos_has_string,$(KOKKOS_OPTIONS),disable_dualview_modify_check) -KOKKOS_INTERNAL_ENABLE_PROFILING_LOAD_PRINT := $(call kokkos_has_string,$(KOKKOS_OPTIONS),enable_profile_load_print) KOKKOS_INTERNAL_ENABLE_LARGE_MEM_TESTS := $(call kokkos_has_string,$(KOKKOS_OPTIONS),enable_large_mem_tests) # deprecated KOKKOS_INTERNAL_CUDA_USE_LDG := $(call kokkos_has_string,$(KOKKOS_CUDA_OPTIONS),use_ldg) @@ -86,6 +85,7 @@ KOKKOS_INTERNAL_CUDA_USE_CONSTEXPR := $(call kokkos_has_string,$(KOKKOS_CUDA_OPT KOKKOS_INTERNAL_HPX_ENABLE_ASYNC_DISPATCH := $(call kokkos_has_string,$(KOKKOS_HPX_OPTIONS),enable_async_dispatch) # deprecated KOKKOS_INTERNAL_ENABLE_DESUL_ATOMICS := $(call kokkos_has_string,$(KOKKOS_OPTIONS),enable_desul_atomics) +# deprecated KOKKOS_INTERNAL_DISABLE_DESUL_ATOMICS := $(call kokkos_has_string,$(KOKKOS_OPTIONS),disable_desul_atomics) KOKKOS_INTERNAL_DISABLE_BUNDLED_MDSPAN := $(call kokkos_has_string,$(KOKKOS_OPTIONS),impl_disable_bundled_mdspan) KOKKOS_INTERNAL_DISABLE_DEPRECATED_CODE := $(call kokkos_has_string,$(KOKKOS_OPTIONS),disable_deprecated_code) @@ -265,15 +265,16 @@ else KOKKOS_INTERNAL_OPENMP_FLAG := -fopenmp endif endif -ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) - #KOKKOS_INTERNAL_OPENMPTARGET_FLAG := -DKOKKOS_BUG_WORKAROUND_IBM_CLANG_OMP45_VIEW_INIT -fopenmp-implicit-declare-target -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp -fopenmp=libomp - KOKKOS_INTERNAL_OPENMPTARGET_FLAG := -DKOKKOS_WORKAROUND_OPENMPTARGET_CLANG -fopenmp -fopenmp=libomp -Wno-openmp-mapping - KOKKOS_INTERNAL_OPENMPTARGET_LIB := -lomptarget -else ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL_CLANG), 1) - KOKKOS_INTERNAL_OPENMPTARGET_FLAG := -fiopenmp -Wno-openmp-mapping -else - #Assume GCC - KOKKOS_INTERNAL_OPENMPTARGET_FLAG := -fopenmp -foffload=nvptx-none + +ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1) + ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL_CLANG), 1) + KOKKOS_INTERNAL_OPENMPTARGET_FLAG := -fiopenmp -Wno-openmp-mapping + else ifeq ($(KOKKOS_INTERNAL_COMPILER_NVHPC), 1) + KOKKOS_INTERNAL_OPENMPTARGET_FLAG := -mp=gpu + else ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 0) + #Assume GCC + KOKKOS_INTERNAL_OPENMPTARGET_FLAG := -fopenmp -foffload=nvptx-none + endif endif ifeq ($(KOKKOS_INTERNAL_USE_OPENACC), 1) @@ -576,10 +577,6 @@ ifeq ($(KOKKOS_INTERNAL_DISABLE_COMPLEX_ALIGN), 0) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_COMPLEX_ALIGN") endif -ifeq ($(KOKKOS_INTERNAL_ENABLE_PROFILING_LOAD_PRINT), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_PROFILING_LOAD_PRINT") -endif - ifeq ($(KOKKOS_INTERNAL_ENABLE_TUNING), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_TUNING") endif @@ -668,15 +665,13 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) endif endif - ifeq ($(KOKKOS_INTERNAL_CUDA_USE_LAMBDA), 1) - ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CUDA_LAMBDA") - KOKKOS_CXXFLAGS += -expt-extended-lambda - endif + ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CUDA_LAMBDA") + KOKKOS_CXXFLAGS += -extended-lambda + endif - ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CUDA_LAMBDA") - endif + ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CUDA_LAMBDA") endif ifeq ($(KOKKOS_INTERNAL_CUDA_USE_CONSTEXPR), 1) @@ -697,7 +692,7 @@ endif ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1) ifeq ($(KOKKOS_INTERNAL_HPX_ENABLE_ASYNC_DISPATCH), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_HPX_ASYNC_DISPATCH") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_IMPL_HPX_ASYNC_DISPATCH") endif endif @@ -973,143 +968,144 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA_ARCH), 1) endif ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1) - ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) - KOKKOS_INTERNAL_CUDA_ARCH_FLAG=-fopenmp-targets=nvptx64 -Xopenmp-target -march + ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY_CLANG), 1) + KOKKOS_INTERNAL_CUDA_ARCH_FLAG=-fopenmp + else ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) + KOKKOS_INTERNAL_CUDA_ARCH_FLAG=-fopenmp --offload-arch endif - KOKKOS_INTERNAL_USE_CUDA_ARCH = 1 endif -ifeq ($(KOKKOS_INTERNAL_USE_CUDA_ARCH), 1) - ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER30), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER30") - KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_30 - endif - ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER32), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER32") - KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_32 - endif - ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER35), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER35") - KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_35 - endif - ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER37), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER37") - KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_37 - endif - ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_MAXWELL") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_MAXWELL50") - KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_50 - endif - ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_MAXWELL") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_MAXWELL52") - KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_52 - endif - ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_MAXWELL") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_MAXWELL53") - KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_53 - endif - ifeq ($(KOKKOS_INTERNAL_USE_ARCH_PASCAL60), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_PASCAL") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_PASCAL60") - KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_60 - endif - ifeq ($(KOKKOS_INTERNAL_USE_ARCH_PASCAL61), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_PASCAL") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_PASCAL61") - KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_61 - endif - ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VOLTA70), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VOLTA") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VOLTA70") - KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_70 - endif - ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VOLTA72), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VOLTA") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VOLTA72") - KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_72 - endif - ifeq ($(KOKKOS_INTERNAL_USE_ARCH_TURING75), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_TURING") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_TURING75") - KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_75 - endif - ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMPERE80), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMPERE") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMPERE80") - KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_80 - endif - ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMPERE86), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMPERE") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMPERE86") - KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_86 - endif - ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ADA89), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ADA89") - KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_89 - endif - ifeq ($(KOKKOS_INTERNAL_USE_ARCH_HOPPER90), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_HOPPER") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_HOPPER90") - KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_90 +# Do not add this flag if its the cray compiler or the nvhpc compiler. +ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY_CLANG), 0) + ifeq ($(KOKKOS_INTERNAL_COMPILER_NVHPC), 0) + # Lets start with adding architecture defines + ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER30), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER30") + KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_30 + endif + ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER32), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER32") + KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_32 + endif + ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER35), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER35") + KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_35 + endif + ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER37), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KEPLER37") + KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_37 + endif + ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_MAXWELL") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_MAXWELL50") + KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_50 + endif + ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_MAXWELL") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_MAXWELL52") + KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_52 + endif + ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_MAXWELL") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_MAXWELL53") + KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_53 + endif + ifeq ($(KOKKOS_INTERNAL_USE_ARCH_PASCAL60), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_PASCAL") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_PASCAL60") + KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_60 + endif + ifeq ($(KOKKOS_INTERNAL_USE_ARCH_PASCAL61), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_PASCAL") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_PASCAL61") + KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_61 + endif + ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VOLTA70), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VOLTA") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VOLTA70") + KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_70 + endif + ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VOLTA72), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VOLTA") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VOLTA72") + KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_72 + endif + ifeq ($(KOKKOS_INTERNAL_USE_ARCH_TURING75), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_TURING75") + KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_75 + endif + ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMPERE80), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMPERE") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMPERE80") + KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_80 + endif + ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMPERE86), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMPERE") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMPERE86") + KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_86 + endif + ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ADA89), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ADA89") + KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_89 + endif + ifeq ($(KOKKOS_INTERNAL_USE_ARCH_HOPPER90), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_HOPPER") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_HOPPER90") + KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_90 + endif endif +endif - ifneq ($(KOKKOS_INTERNAL_USE_ARCH_NVIDIA), 0) - KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG) +ifneq ($(KOKKOS_INTERNAL_USE_ARCH_NVIDIA), 0) + KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG) - ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1) + ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1) + KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG) + endif + ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) + ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1) KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG) endif - ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) - ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1) - KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG) - endif - endif endif endif # Figure out the architecture flag for ROCm. -ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1) - # Lets start with adding architecture defines - ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VEGA906), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA906") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA") - KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx906 - endif - ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VEGA908), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA908") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA") - KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx908 - endif - ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VEGA90A), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA90A") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA") - KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx90a - endif - ifeq ($(KOKKOS_INTERNAL_USE_ARCH_NAVI1030), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_NAVI1030") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_NAVI") - KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx1030 - endif - ifeq ($(KOKKOS_INTERNAL_USE_ARCH_NAVI1100), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_NAVI1100") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_NAVI") - KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx1100 - endif +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VEGA906), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA906") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA") + KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx906 +endif +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VEGA908), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA908") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA") + KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx908 +endif +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VEGA90A), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA90A") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA") + KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx90a +endif +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_NAVI1030), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_NAVI1030") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_NAVI") + KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx1030 +endif +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_NAVI1100), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_NAVI1100") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_NAVI") + KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx1100 +endif +ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1) KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/HIP/*.cpp) + KOKKOS_SRC += $(KOKKOS_PATH)/tpls/desul/src/Lock_Array_HIP.cpp KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/HIP/*.hpp) - ifeq ($(KOKKOS_INTERNAL_DISABLE_DESUL_ATOMICS), 0) - KOKKOS_SRC += $(KOKKOS_PATH)/tpls/desul/src/Lock_Array_HIP.cpp - endif KOKKOS_CXXFLAGS+=$(KOKKOS_INTERNAL_HIP_ARCH_FLAG) KOKKOS_LDFLAGS+=$(KOKKOS_INTERNAL_HIP_ARCH_FLAG) @@ -1182,12 +1178,14 @@ ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1) KOKKOS_LDFLAGS+=$(KOKKOS_INTERNAL_INTEL_ARCH_FLAG) endif -ifeq ($(KOKKOS_INTERNAL_DISABLE_DESUL_ATOMICS), 0) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_IMPL_DESUL_ATOMICS") - KOKKOS_CPPFLAGS+=-I$(KOKKOS_PATH)/tpls/desul/include -else ifeq ($(KOKKOS_INTERNAL_ENABLE_DESUL_ATOMICS), 1) - $(error Contradictory Desul atomics options: KOKKOS_OPTIONS=$(KOKKOS_OPTIONS) ) +ifeq ($(KOKKOS_INTERNAL_DISABLE_DESUL_ATOMICS), 1) + $(warning disable_desul_atomics option has been removed. Desul atomics cannot be disabled.) + KOKKOS_INTERNAL_DISABLE_DESUL_ATOMICS := 0 +endif +ifeq ($(KOKKOS_INTERNAL_ENABLE_DESUL_ATOMICS), 1) + $(warning enable_desul_atomics option has been removed. Desul atomics are always enabled.) endif +KOKKOS_CPPFLAGS+=-I$(KOKKOS_PATH)/tpls/desul/include ifeq ($(KOKKOS_INTERNAL_DISABLE_BUNDLED_MDSPAN), 0) KOKKOS_CPPFLAGS+=-I$(KOKKOS_PATH)/tpls/mdspan/include @@ -1229,6 +1227,7 @@ ifneq ($(KOKKOS_INTERNAL_NEW_CONFIG), 0) ifeq ($(KOKKOS_INTERNAL_USE_SYCL), 1) tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_FwdBackend.hpp") tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_DeclareBackend.hpp") + tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_SetupBackend.hpp") endif ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1) tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_FwdBackend.hpp") @@ -1240,8 +1239,8 @@ ifneq ($(KOKKOS_INTERNAL_NEW_CONFIG), 0) tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_DeclareBackend.hpp") endif ifeq ($(KOKKOS_INTERNAL_USE_OPENACC), 1) - tmp := $(call kokkos_append_config_header,"\#include ","KokkosCore_Config_FwdBackend.hpp") - tmp := $(call kokkos_append_config_header,"\#include ","KokkosCore_Config_DeclareBackend.hpp") + tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_FwdBackend.hpp") + tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_DeclareBackend.hpp") endif ifeq ($(KOKKOS_INTERNAL_USE_THREADS), 1) tmp := $(call kokkos_append_config_header,"$H""include ","KokkosCore_Config_FwdBackend.hpp") @@ -1272,9 +1271,7 @@ KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/containers/src/impl/*.cpp) ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.cpp) - ifeq ($(KOKKOS_INTERNAL_DISABLE_DESUL_ATOMICS), 0) - KOKKOS_SRC += $(KOKKOS_PATH)/tpls/desul/src/Lock_Array_CUDA.cpp - endif + KOKKOS_SRC += $(KOKKOS_PATH)/tpls/desul/src/Lock_Array_CUDA.cpp KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.hpp) ifneq ($(CUDA_PATH),) KOKKOS_CPPLAGS += -I$(CUDA_PATH)/include @@ -1390,11 +1387,7 @@ KOKKOS_LIBS := -lkokkos ${KOKKOS_LIBS} # Generating the header DESUL_INTERNAL_CONFIG_TMP=Desul_Config.tmp -ifeq ($(KOKKOS_INTERNAL_DISABLE_DESUL_ATOMICS), 0) - DESUL_CONFIG_HEADER=desul/atomics/Config.hpp -else - DESUL_CONFIG_HEADER=NothingToSeeHereMoveAlong -endif +DESUL_CONFIG_HEADER=desul/atomics/Config.hpp desul_append_header = $(shell echo $1 >> $(DESUL_INTERNAL_CONFIG_TMP)) tmp := $(call desul_append_header, "// generated by on-demand build system by crtrott" > $(DESUL_INTERNAL_CONFIG_TMP)) tmp := $(call desul_append_header, "$H""ifndef DESUL_ATOMICS_CONFIG_HPP_") @@ -1405,12 +1398,22 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) else tmp := $(call desul_append_header,"/* $H""undef DESUL_ATOMICS_ENABLE_CUDA */") endif +ifeq ($(KOKKOS_INTERNAL_CUDA_USE_RELOC), 1) + tmp := $(call desul_append_header,"$H""define DESUL_ATOMICS_ENABLE_CUDA_SEPARABLE_COMPILATION") +else + tmp := $(call desul_append_header,"/* $H""undef DESUL_ATOMICS_ENABLE_CUDA_SEPARABLE_COMPILATION */") +endif ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1) tmp := $(call desul_append_header,"$H""define DESUL_ATOMICS_ENABLE_HIP") else tmp := $(call desul_append_header,"/* $H""undef DESUL_ATOMICS_ENABLE_HIP */") endif +ifeq ($(KOKKOS_INTERNAL_HIP_USE_RELOC), 1) + tmp := $(call desul_append_header,"$H""define DESUL_ATOMICS_ENABLE_HIP_SEPARABLE_COMPILATION") +else + tmp := $(call desul_append_header,"/* $H""undef DESUL_ATOMICS_ENABLE_HIP_SEPARABLE_COMPILATION */") +endif ifeq ($(KOKKOS_INTERNAL_USE_SYCL), 1) tmp := $(call desul_append_header,"$H""define DESUL_ATOMICS_ENABLE_SYCL") diff --git a/Makefile.targets b/Makefile.targets index 32b1fab2615..4e08a46c695 100644 --- a/Makefile.targets +++ b/Makefile.targets @@ -51,8 +51,6 @@ Kokkos_CudaSpace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cu $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_CudaSpace.cpp Kokkos_Cuda_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Task.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Task.cpp -Kokkos_Cuda_Locks.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Locks.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Locks.cpp Lock_Array_CUDA.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/tpls/desul/src/Lock_Array_CUDA.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/tpls/desul/src/Lock_Array_CUDA.cpp endif @@ -77,8 +75,6 @@ Kokkos_HIP_Space.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/HIP/Kokkos_HIP $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/HIP/Kokkos_HIP_Space.cpp Kokkos_HIP_Instance.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/HIP/Kokkos_HIP_Instance.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/HIP/Kokkos_HIP_Instance.cpp -Kokkos_HIP_Locks.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/HIP/Kokkos_HIP_Locks.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/HIP/Kokkos_HIP_Locks.cpp Lock_Array_HIP.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/tpls/desul/src/Lock_Array_HIP.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/tpls/desul/src/Lock_Array_HIP.cpp endif @@ -89,6 +85,8 @@ Kokkos_ThreadsExec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads/Kokk endif ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) +Kokkos_OpenMP.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP.cpp Kokkos_OpenMP_Instance.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP_Instance.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP_Instance.cpp Kokkos_OpenMP_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP_Task.cpp diff --git a/algorithms/CMakeLists.txt b/algorithms/CMakeLists.txt index f32363dc9ac..ab557ab66ae 100644 --- a/algorithms/CMakeLists.txt +++ b/algorithms/CMakeLists.txt @@ -1,7 +1,3 @@ - - -KOKKOS_SUBPACKAGE(Algorithms) - IF (NOT Kokkos_INSTALL_TESTING) ADD_SUBDIRECTORY(src) ENDIF() @@ -9,7 +5,3 @@ ENDIF() IF(NOT ((KOKKOS_ENABLE_OPENMPTARGET OR KOKKOS_ENABLE_OPENACC) AND KOKKOS_CXX_COMPILER_ID STREQUAL NVHPC)) KOKKOS_ADD_TEST_DIRECTORIES(unit_tests) ENDIF() - -KOKKOS_SUBPACKAGE_POSTPROCESS() - - diff --git a/algorithms/cmake/Dependencies.cmake b/algorithms/cmake/Dependencies.cmake deleted file mode 100644 index c36b62523fa..00000000000 --- a/algorithms/cmake/Dependencies.cmake +++ /dev/null @@ -1,5 +0,0 @@ -TRIBITS_PACKAGE_DEFINE_DEPENDENCIES( - LIB_REQUIRED_PACKAGES KokkosCore KokkosContainers - LIB_OPTIONAL_TPLS Pthread CUDA HWLOC - TEST_OPTIONAL_TPLS CUSPARSE - ) diff --git a/algorithms/cmake/KokkosAlgorithms_config.h.in b/algorithms/cmake/KokkosAlgorithms_config.h.in deleted file mode 100644 index 67334b70f36..00000000000 --- a/algorithms/cmake/KokkosAlgorithms_config.h.in +++ /dev/null @@ -1,4 +0,0 @@ -#ifndef KOKKOS_ALGORITHMS_CONFIG_H -#define KOKKOS_ALGORITHMS_CONFIG_H - -#endif diff --git a/algorithms/src/CMakeLists.txt b/algorithms/src/CMakeLists.txt index 606d83d18b5..16957789472 100644 --- a/algorithms/src/CMakeLists.txt +++ b/algorithms/src/CMakeLists.txt @@ -1,6 +1,3 @@ - -KOKKOS_CONFIGURE_FILE(${PACKAGE_NAME}_config.h) - #I have to leave these here for tribits KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) @@ -9,7 +6,6 @@ KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) FILE(GLOB ALGO_HEADERS *.hpp) FILE(GLOB ALGO_SOURCES *.cpp) -LIST(APPEND ALGO_HEADERS ${CMAKE_CURRENT_BINARY_DIR}/${PACKAGE_NAME}_config.h) APPEND_GLOB(ALGO_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/std_algorithms/*.hpp) APPEND_GLOB(ALGO_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/std_algorithms/impl/*.hpp) diff --git a/algorithms/src/Kokkos_Random.hpp b/algorithms/src/Kokkos_Random.hpp index 91e9ce6fc84..abb028d28ea 100644 --- a/algorithms/src/Kokkos_Random.hpp +++ b/algorithms/src/Kokkos_Random.hpp @@ -1514,7 +1514,7 @@ void fill_random(const ExecutionSpace& exec, ViewType a, RandomPool g, "Kokkos::fill_random", Kokkos::RangePolicy(exec, 0, (LDA + 127) / 128), Impl::fill_random_functor_begin_end( + ViewType::rank, IndexType>( a, g, begin, end)); } diff --git a/algorithms/src/Kokkos_Sort.hpp b/algorithms/src/Kokkos_Sort.hpp index 033de221649..10f9ad64626 100644 --- a/algorithms/src/Kokkos_Sort.hpp +++ b/algorithms/src/Kokkos_Sort.hpp @@ -66,11 +66,16 @@ #endif +#if defined(KOKKOS_ENABLE_ONEDPL) +#include +#include +#endif + namespace Kokkos { namespace Impl { -template +template struct CopyOp; template @@ -141,8 +146,12 @@ class BinSort { Kokkos::is_view::value, Kokkos::View >, + typename SrcViewType::device_type +#if !defined(KOKKOS_COMPILER_NVHPC) // FIXME_NVHPC + , + Kokkos::MemoryTraits +#endif + >, typename SrcViewType::const_type>; using perm_view_type = typename PermuteViewType::const_type; @@ -221,7 +230,11 @@ class BinSort { bool sort_within_bins; public: - BinSort() = default; +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4 + KOKKOS_DEPRECATED BinSort() = default; +#else + BinSort() = delete; +#endif //---------------------------------------- // Constructor: takes the keys, the binning_operator and optionally whether to @@ -324,6 +337,10 @@ class BinSort { template void sort(const ExecutionSpace& exec, ValuesViewType const& values, int values_range_begin, int values_range_end) const { + if (values.extent(0) == 0) { + return; + } + static_assert( Kokkos::SpaceAccessibility::accessible, @@ -335,11 +352,6 @@ class BinSort { "The provided execution space must be able to access the memory space " "of the View argument!"); - using scratch_view_type = - Kokkos::View; - const size_t len = range_end - range_begin; const size_t values_len = values_range_end - values_range_begin; if (len != values_len) { @@ -347,6 +359,9 @@ class BinSort { "BinSort::sort: values range length != permutation vector length"); } + using scratch_view_type = + Kokkos::View; scratch_view_type sorted_values( view_alloc(exec, WithoutInitializing, "Kokkos::SortImpl::BinSortFunctor::sorted_values"), @@ -451,24 +466,29 @@ class BinSort { void operator()(const bin_sort_bins_tag& /*tag*/, const int i) const { auto bin_size = bin_count_const(i); if (bin_size <= 1) return; - int upper_bound = bin_offsets(i) + bin_size; - bool sorted = false; - while (!sorted) { - sorted = true; - int old_idx = sort_order(bin_offsets(i)); - int new_idx = 0; - for (int k = bin_offsets(i) + 1; k < upper_bound; k++) { - new_idx = sort_order(k); - - if (!bin_op(keys_rnd, old_idx, new_idx)) { - sort_order(k - 1) = new_idx; - sort_order(k) = old_idx; - sorted = false; - } else { - old_idx = new_idx; + constexpr bool use_std_sort = + std::is_same_v; + int lower_bound = bin_offsets(i); + int upper_bound = lower_bound + bin_size; + // Switching to std::sort for more than 10 elements has been found + // reasonable experimentally. + if (use_std_sort && bin_size > 10) { + if constexpr (use_std_sort) { + std::sort(&sort_order(lower_bound), &sort_order(upper_bound), + [this](int p, int q) { return bin_op(keys_rnd, p, q); }); + } + } else { + for (int k = lower_bound + 1; k < upper_bound; ++k) { + int old_idx = sort_order(k); + int j = k - 1; + while (j >= lower_bound) { + int new_idx = sort_order(j); + if (!bin_op(keys_rnd, old_idx, new_idx)) break; + sort_order(j + 1) = new_idx; + --j; } + sort_order(j + 1) = old_idx; } - upper_bound--; } } }; @@ -481,7 +501,11 @@ struct BinOp1D { double mul_ = {}; double min_ = {}; - BinOp1D() = default; +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4 + KOKKOS_DEPRECATED BinOp1D() = default; +#else + BinOp1D() = delete; +#endif // Construct BinOp with number of bins, minimum value and maximum value BinOp1D(int max_bins__, typename KeyViewType::const_value_type min, @@ -525,7 +549,11 @@ struct BinOp3D { double mul_[3] = {}; double min_[3] = {}; - BinOp3D() = default; +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4 + KOKKOS_DEPRECATED BinOp3D() = default; +#else + BinOp3D() = delete; +#endif BinOp3D(int max_bins__[], typename KeyViewType::const_value_type min[], typename KeyViewType::const_value_type max[]) { @@ -596,6 +624,10 @@ std::enable_if_t<(Kokkos::is_execution_space::value) && memory_space>::accessible)> sort(const ExecutionSpace& exec, const Kokkos::View& view) { + if (view.extent(0) == 0) { + return; + } + using ViewType = Kokkos::View; using CompType = BinOp1D; @@ -634,12 +666,44 @@ sort(const ExecutionSpace& exec, bin_sort.sort(exec, view); } +#if defined(KOKKOS_ENABLE_ONEDPL) +template +void sort(const Experimental::SYCL& space, + const Kokkos::View& view) { + if (view.extent(0) == 0) { + return; + } + + using ViewType = Kokkos::View; + static_assert(SpaceAccessibility::accessible, + "SYCL execution space is not able to access the memory space " + "of the View argument!"); + + auto queue = space.sycl_queue(); + auto policy = oneapi::dpl::execution::make_device_policy(queue); + + // Can't use Experimental::begin/end here since the oneDPL then assumes that + // the data is on the host. + static_assert( + ViewType::rank == 1 && + (std::is_same::value || + std::is_same::value), + "SYCL sort only supports contiguous 1D Views."); + const int n = view.extent(0); + oneapi::dpl::sort(policy, view.data(), view.data() + n); +} +#endif + template std::enable_if_t<(Kokkos::is_execution_space::value) && (SpaceAccessibility< HostSpace, typename Kokkos::View:: memory_space>::accessible)> sort(const ExecutionSpace&, const Kokkos::View& view) { + if (view.extent(0) == 0) { + return; + } auto first = Experimental::begin(view); auto last = Experimental::end(view); std::sort(first, last); @@ -649,6 +713,9 @@ sort(const ExecutionSpace&, const Kokkos::View& view) { template void sort(const Cuda& space, const Kokkos::View& view) { + if (view.extent(0) == 0) { + return; + } const auto exec = thrust::cuda::par.on(space.cuda_stream()); auto first = Experimental::begin(view); auto last = Experimental::end(view); @@ -659,6 +726,11 @@ void sort(const Cuda& space, template void sort(ViewType const& view) { Kokkos::fence("Kokkos::sort: before"); + + if (view.extent(0) == 0) { + return; + } + typename ViewType::execution_space exec; sort(exec, view); exec.fence("Kokkos::sort: fence after sorting"); @@ -668,6 +740,10 @@ template std::enable_if_t::value> sort( const ExecutionSpace& exec, ViewType view, size_t const begin, size_t const end) { + if (view.extent(0) == 0) { + return; + } + using range_policy = Kokkos::RangePolicy; using CompType = BinOp1D; @@ -690,6 +766,11 @@ std::enable_if_t::value> sort( template void sort(ViewType view, size_t const begin, size_t const end) { Kokkos::fence("Kokkos::sort: before"); + + if (view.extent(0) == 0) { + return; + } + typename ViewType::execution_space exec; sort(exec, view, begin, end); exec.fence("Kokkos::Sort: fence after sorting"); diff --git a/algorithms/src/std_algorithms/impl/Kokkos_AdjacentFind.hpp b/algorithms/src/std_algorithms/impl/Kokkos_AdjacentFind.hpp index cc6b63f0287..dd785e603b5 100644 --- a/algorithms/src/std_algorithms/impl/Kokkos_AdjacentFind.hpp +++ b/algorithms/src/std_algorithms/impl/Kokkos_AdjacentFind.hpp @@ -42,12 +42,13 @@ struct StdAdjacentFindFunctor { const auto& next_value = m_first[i + 1]; const bool are_equal = m_p(my_value, next_value); - auto rv = - are_equal - ? red_value_type{i} - : red_value_type{::Kokkos::reduction_identity::min()}; + // FIXME_NVHPC using a ternary operator causes problems + red_value_type value = {::Kokkos::reduction_identity::min()}; + if (are_equal) { + value.min_loc_true = i; + } - m_reducer.join(red_value, rv); + m_reducer.join(red_value, value); } KOKKOS_FUNCTION diff --git a/algorithms/src/std_algorithms/impl/Kokkos_Constraints.hpp b/algorithms/src/std_algorithms/impl/Kokkos_Constraints.hpp index 52e7625e4d2..0376100410b 100644 --- a/algorithms/src/std_algorithms/impl/Kokkos_Constraints.hpp +++ b/algorithms/src/std_algorithms/impl/Kokkos_Constraints.hpp @@ -29,7 +29,7 @@ struct is_admissible_to_kokkos_std_algorithms : std::false_type {}; template struct is_admissible_to_kokkos_std_algorithms< - T, std::enable_if_t< ::Kokkos::is_view::value && T::rank == 1 && + T, std::enable_if_t< ::Kokkos::is_view::value && T::rank() == 1 && (std::is_same::value || std::is_same::max()}; + // FIXME_NVHPC using a ternary operator causes problems + red_value_type rv = {::Kokkos::reduction_identity::max()}; + if (found) { + rv.max_loc_true = i; + } m_reducer.join(red_value, rv); } diff --git a/algorithms/src/std_algorithms/impl/Kokkos_FindFirstOf.hpp b/algorithms/src/std_algorithms/impl/Kokkos_FindFirstOf.hpp index df10da2fd55..5f22d2ad138 100644 --- a/algorithms/src/std_algorithms/impl/Kokkos_FindFirstOf.hpp +++ b/algorithms/src/std_algorithms/impl/Kokkos_FindFirstOf.hpp @@ -52,10 +52,11 @@ struct StdFindFirstOfFunctor { } } - const auto rv = - found ? red_value_type{i} - : red_value_type{::Kokkos::reduction_identity::min()}; - + // FIXME_NVHPC using a ternary operator causes problems + red_value_type rv = {::Kokkos::reduction_identity::min()}; + if (found) { + rv.min_loc_true = i; + } m_reducer.join(red_value, rv); } diff --git a/algorithms/src/std_algorithms/impl/Kokkos_FindIfOrNot.hpp b/algorithms/src/std_algorithms/impl/Kokkos_FindIfOrNot.hpp index f7ec4b1110c..9c0b0c0ccdf 100644 --- a/algorithms/src/std_algorithms/impl/Kokkos_FindIfOrNot.hpp +++ b/algorithms/src/std_algorithms/impl/Kokkos_FindIfOrNot.hpp @@ -44,10 +44,11 @@ struct StdFindIfOrNotFunctor { // if doing find_if_not, look for when predicate is false const bool found_condition = is_find_if ? m_p(my_value) : !m_p(my_value); - auto rv = - found_condition - ? red_value_type{i} - : red_value_type{::Kokkos::reduction_identity::min()}; + // FIXME_NVHPC using a ternary operator causes problems + red_value_type rv = {::Kokkos::reduction_identity::min()}; + if (found_condition) { + rv.min_loc_true = i; + } m_reducer.join(red_value, rv); } diff --git a/algorithms/src/std_algorithms/impl/Kokkos_InclusiveScan.hpp b/algorithms/src/std_algorithms/impl/Kokkos_InclusiveScan.hpp index 55e1a78695d..ecd6ff39cd5 100644 --- a/algorithms/src/std_algorithms/impl/Kokkos_InclusiveScan.hpp +++ b/algorithms/src/std_algorithms/impl/Kokkos_InclusiveScan.hpp @@ -90,6 +90,8 @@ struct InclusiveScanDefaultFunctor { KOKKOS_FUNCTION void join(value_type& update, const value_type& input) const { + if (input.is_initial) return; + if (update.is_initial) { update.val = input.val; } else { diff --git a/algorithms/src/std_algorithms/impl/Kokkos_IsPartitioned.hpp b/algorithms/src/std_algorithms/impl/Kokkos_IsPartitioned.hpp index 92a22f3c3a8..0fe2d246ff2 100644 --- a/algorithms/src/std_algorithms/impl/Kokkos_IsPartitioned.hpp +++ b/algorithms/src/std_algorithms/impl/Kokkos_IsPartitioned.hpp @@ -43,8 +43,12 @@ struct StdIsPartitionedFunctor { ::Kokkos::reduction_identity::min(); constexpr index_type m_red_id_max = ::Kokkos::reduction_identity::max(); - auto rv = predicate_value ? red_value_type{i, m_red_id_min} - : red_value_type{m_red_id_max, i}; + + // FIXME_NVHPC using a ternary operator causes problems + red_value_type rv = {m_red_id_max, i}; + if (predicate_value) { + rv = {i, m_red_id_min}; + } m_reducer.join(redValue, rv); } diff --git a/algorithms/src/std_algorithms/impl/Kokkos_IsSortedUntil.hpp b/algorithms/src/std_algorithms/impl/Kokkos_IsSortedUntil.hpp index fe52e18a33d..2a0c112bf5a 100644 --- a/algorithms/src/std_algorithms/impl/Kokkos_IsSortedUntil.hpp +++ b/algorithms/src/std_algorithms/impl/Kokkos_IsSortedUntil.hpp @@ -28,33 +28,30 @@ namespace Kokkos { namespace Experimental { namespace Impl { -template +template struct StdIsSortedUntilFunctor { using index_type = typename IteratorType::difference_type; + using value_type = typename ReducerType::value_type; + IteratorType m_first; - IndicatorViewType m_indicator; ComparatorType m_comparator; + ReducerType m_reducer; KOKKOS_FUNCTION - void operator()(const index_type i, int& update, const bool final) const { + void operator()(const index_type i, value_type& reduction_result) const { const auto& val_i = m_first[i]; const auto& val_ip1 = m_first[i + 1]; - if (m_comparator(val_ip1, val_i)) { - ++update; - } - - if (final) { - m_indicator(i) = update; + m_reducer.join(reduction_result, i); } } KOKKOS_FUNCTION - StdIsSortedUntilFunctor(IteratorType _first1, IndicatorViewType indicator, - ComparatorType comparator) - : m_first(std::move(_first1)), - m_indicator(std::move(indicator)), - m_comparator(std::move(comparator)) {} + StdIsSortedUntilFunctor(IteratorType first, ComparatorType comparator, + ReducerType reducer) + : m_first(std::move(first)), + m_comparator(std::move(comparator)), + m_reducer(std::move(reducer)) {} }; template @@ -73,40 +70,31 @@ IteratorType is_sorted_until_impl(const std::string& label, } /* - use scan and a helper "indicator" view - such that we scan the data and fill the indicator with - partial sum that is always 0 unless we find a pair that - breaks the sorting, so in that case the indicator will - have a 1 starting at the location where the sorting breaks. - So finding that 1 means finding the location we want. - */ - - // aliases - using indicator_value_type = std::size_t; - using indicator_view_type = - ::Kokkos::View; - using functor_type = - StdIsSortedUntilFunctor; - - // do scan - // use num_elements-1 because each index handles i and i+1 - const auto num_elements_minus_one = num_elements - 1; - indicator_view_type indicator("is_sorted_until_indicator_helper", - num_elements_minus_one); - ::Kokkos::parallel_scan( - label, RangePolicy(ex, 0, num_elements_minus_one), - functor_type(first, indicator, std::move(comp))); - - // try to find the first sentinel value, which indicates - // where the sorting condition breaks - namespace KE = ::Kokkos::Experimental; - constexpr indicator_value_type sentinel_value = 1; - auto r = - KE::find(ex, KE::cbegin(indicator), KE::cend(indicator), sentinel_value); - const auto shift = r - ::Kokkos::Experimental::cbegin(indicator); - - return first + (shift + 1); + Do a par_reduce computing the *min* index that breaks the sorting. + If such an index is found, then the range is sorted until that element. + If no such index is found, then the range is sorted until the end. + */ + using index_type = typename IteratorType::difference_type; + index_type reduction_result; + ::Kokkos::Min reducer(reduction_result); + ::Kokkos::parallel_reduce( + label, + // use num_elements-1 because each index handles i and i+1 + RangePolicy(ex, 0, num_elements - 1), + // use CTAD + StdIsSortedUntilFunctor(first, comp, reducer), reducer); + + /* If the reduction result is equal to the initial value, + it means the range is sorted until the end */ + index_type reduction_result_init; + reducer.init(reduction_result_init); + if (reduction_result == reduction_result_init) { + return last; + } else { + /* If such an index is found, then the range is sorted until there and + we need to return an iterator past the element found so do +1 */ + return first + (reduction_result + 1); + } } template diff --git a/algorithms/src/std_algorithms/impl/Kokkos_LexicographicalCompare.hpp b/algorithms/src/std_algorithms/impl/Kokkos_LexicographicalCompare.hpp index 170ec9f2911..ad7f59232ec 100644 --- a/algorithms/src/std_algorithms/impl/Kokkos_LexicographicalCompare.hpp +++ b/algorithms/src/std_algorithms/impl/Kokkos_LexicographicalCompare.hpp @@ -63,12 +63,14 @@ struct StdLexicographicalCompareFunctor { const auto& my_value1 = m_first1[i]; const auto& my_value2 = m_first2[i]; - bool different = m_comparator(my_value1, my_value2) || - m_comparator(my_value2, my_value1); - auto rv = - different - ? red_value_type{i} - : red_value_type{::Kokkos::reduction_identity::min()}; + const bool different = m_comparator(my_value1, my_value2) || + m_comparator(my_value2, my_value1); + + // FIXME_NVHPC using a ternary operator causes problems + red_value_type rv = {::Kokkos::reduction_identity::min()}; + if (different) { + rv.min_loc_true = i; + } m_reducer.join(red_value, rv); } diff --git a/algorithms/src/std_algorithms/impl/Kokkos_Mismatch.hpp b/algorithms/src/std_algorithms/impl/Kokkos_Mismatch.hpp index 9d2e31f63fc..b7426844670 100644 --- a/algorithms/src/std_algorithms/impl/Kokkos_Mismatch.hpp +++ b/algorithms/src/std_algorithms/impl/Kokkos_Mismatch.hpp @@ -42,10 +42,11 @@ struct StdMismatchRedFunctor { const auto& my_value1 = m_first1[i]; const auto& my_value2 = m_first2[i]; - auto rv = - !m_predicate(my_value1, my_value2) - ? red_value_type{i} - : red_value_type{::Kokkos::reduction_identity::min()}; + // FIXME_NVHPC using a ternary operator causes problems + red_value_type rv = {i}; + if (m_predicate(my_value1, my_value2)) { + rv = {::Kokkos::reduction_identity::min()}; + } m_reducer.join(red_value, rv); } diff --git a/algorithms/src/std_algorithms/impl/Kokkos_PartitionCopy.hpp b/algorithms/src/std_algorithms/impl/Kokkos_PartitionCopy.hpp index 5457ae25084..54f7c5b612a 100644 --- a/algorithms/src/std_algorithms/impl/Kokkos_PartitionCopy.hpp +++ b/algorithms/src/std_algorithms/impl/Kokkos_PartitionCopy.hpp @@ -31,25 +31,6 @@ template struct StdPartitionCopyScalar { ValueType true_count_; ValueType false_count_; - - // Here we implement the copy assignment operators explicitly for consistency - // with how the Scalar structs are implemented inside - // Kokkos_Parallel_Reduce.hpp. - KOKKOS_FUNCTION - void operator=(const StdPartitionCopyScalar& other) { - true_count_ = other.true_count_; - false_count_ = other.false_count_; - } - - // this is needed for - // OpenMPTarget/Kokkos_OpenMPTarget_Parallel.hpp:699:21: error: no viable - // overloaded '=' m_returnvalue = 0; - // - KOKKOS_FUNCTION - void operator=(const ValueType value) { - true_count_ = value; - false_count_ = value; - } }; template ::min()} - : red_value_type{i}; + + // FIXME_NVHPC using a ternary operator causes problems + red_value_type rv = {i}; + if (predicate_value) { + rv = {::Kokkos::reduction_identity::min()}; + } + m_reducer.join(redValue, rv); } diff --git a/algorithms/src/std_algorithms/impl/Kokkos_Reverse.hpp b/algorithms/src/std_algorithms/impl/Kokkos_Reverse.hpp index a4aaba26b98..7c75899cb8a 100644 --- a/algorithms/src/std_algorithms/impl/Kokkos_Reverse.hpp +++ b/algorithms/src/std_algorithms/impl/Kokkos_Reverse.hpp @@ -39,16 +39,7 @@ struct StdReverseFunctor { KOKKOS_FUNCTION void operator()(index_type i) const { - // the swap below is doing the same thing, but - // for Intel 18.0.5 does not work. - // But putting the impl directly here, it works. -#ifdef KOKKOS_COMPILER_INTEL - typename InputIterator::value_type tmp = std::move(m_first[i]); - m_first[i] = std::move(m_last[-i - 1]); - m_last[-i - 1] = std::move(tmp); -#else ::Kokkos::Experimental::swap(m_first[i], m_last[-i - 1]); -#endif } StdReverseFunctor(InputIterator first, InputIterator last) diff --git a/algorithms/src/std_algorithms/impl/Kokkos_Search.hpp b/algorithms/src/std_algorithms/impl/Kokkos_Search.hpp index a612a57231f..2780151f29f 100644 --- a/algorithms/src/std_algorithms/impl/Kokkos_Search.hpp +++ b/algorithms/src/std_algorithms/impl/Kokkos_Search.hpp @@ -60,9 +60,11 @@ struct StdSearchFunctor { } } - const auto rv = - found ? red_value_type{i} - : red_value_type{::Kokkos::reduction_identity::min()}; + // FIXME_NVHPC using a ternary operator causes problems + red_value_type rv = {::Kokkos::reduction_identity::min()}; + if (found) { + rv = {i}; + } m_reducer.join(red_value, rv); } diff --git a/algorithms/src/std_algorithms/impl/Kokkos_SearchN.hpp b/algorithms/src/std_algorithms/impl/Kokkos_SearchN.hpp index 0d3b6bc7060..98640136d42 100644 --- a/algorithms/src/std_algorithms/impl/Kokkos_SearchN.hpp +++ b/algorithms/src/std_algorithms/impl/Kokkos_SearchN.hpp @@ -59,9 +59,11 @@ struct StdSearchNFunctor { } } - const auto rv = - found ? red_value_type{i} - : red_value_type{::Kokkos::reduction_identity::min()}; + // FIXME_NVHPC using a ternary operator causes problems + red_value_type rv = {::Kokkos::reduction_identity::min()}; + if (found) { + rv.min_loc_true = i; + } m_reducer.join(red_value, rv); } diff --git a/algorithms/src/std_algorithms/impl/Kokkos_SwapRanges.hpp b/algorithms/src/std_algorithms/impl/Kokkos_SwapRanges.hpp index 438acb989f9..a5e4786d04c 100644 --- a/algorithms/src/std_algorithms/impl/Kokkos_SwapRanges.hpp +++ b/algorithms/src/std_algorithms/impl/Kokkos_SwapRanges.hpp @@ -35,16 +35,7 @@ struct StdSwapRangesFunctor { KOKKOS_FUNCTION void operator()(IndexType i) const { - // the swap below is doing the same thing, but - // for Intel 18.0.5 does not work. - // But putting the impl directly here, it works. -#ifdef KOKKOS_COMPILER_INTEL - typename IteratorType1::value_type tmp = std::move(m_first1[i]); - m_first1[i] = std::move(m_first2[i]); - m_first2[i] = std::move(tmp); -#else ::Kokkos::Experimental::swap(m_first1[i], m_first2[i]); -#endif } KOKKOS_FUNCTION diff --git a/algorithms/src/std_algorithms/impl/Kokkos_TransformExclusiveScan.hpp b/algorithms/src/std_algorithms/impl/Kokkos_TransformExclusiveScan.hpp index 773e8c2f883..3bb337de36f 100644 --- a/algorithms/src/std_algorithms/impl/Kokkos_TransformExclusiveScan.hpp +++ b/algorithms/src/std_algorithms/impl/Kokkos_TransformExclusiveScan.hpp @@ -76,6 +76,8 @@ struct TransformExclusiveScanFunctor { KOKKOS_FUNCTION void join(value_type& update, const value_type& input) const { + if (input.is_initial) return; + if (update.is_initial) { update.val = input.val; } else { diff --git a/algorithms/src/std_algorithms/impl/Kokkos_TransformInclusiveScan.hpp b/algorithms/src/std_algorithms/impl/Kokkos_TransformInclusiveScan.hpp index 9dde2b0fb12..05f8589086f 100644 --- a/algorithms/src/std_algorithms/impl/Kokkos_TransformInclusiveScan.hpp +++ b/algorithms/src/std_algorithms/impl/Kokkos_TransformInclusiveScan.hpp @@ -67,6 +67,8 @@ struct TransformInclusiveScanNoInitValueFunctor { KOKKOS_FUNCTION void join(value_type& update, const value_type& input) const { + if (input.is_initial) return; + if (update.is_initial) { update.val = input.val; } else { @@ -118,6 +120,8 @@ struct TransformInclusiveScanWithInitValueFunctor { KOKKOS_FUNCTION void join(value_type& update, const value_type& input) const { + if (input.is_initial) return; + if (update.is_initial) { update.val = input.val; } else { diff --git a/algorithms/src/std_algorithms/impl/Kokkos_ValueWrapperForNoNeutralElement.hpp b/algorithms/src/std_algorithms/impl/Kokkos_ValueWrapperForNoNeutralElement.hpp index 9b0d4d8244f..8a73b8e0f1d 100644 --- a/algorithms/src/std_algorithms/impl/Kokkos_ValueWrapperForNoNeutralElement.hpp +++ b/algorithms/src/std_algorithms/impl/Kokkos_ValueWrapperForNoNeutralElement.hpp @@ -29,12 +29,6 @@ template struct ValueWrapperForNoNeutralElement { Scalar val; bool is_initial = true; - - KOKKOS_FUNCTION - void operator=(const ValueWrapperForNoNeutralElement& rhs) { - val = rhs.val; - is_initial = rhs.is_initial; - } }; } // namespace Impl diff --git a/algorithms/unit_tests/CMakeLists.txt b/algorithms/unit_tests/CMakeLists.txt index 0fe9c2006ee..92d9f072c18 100644 --- a/algorithms/unit_tests/CMakeLists.txt +++ b/algorithms/unit_tests/CMakeLists.txt @@ -16,35 +16,45 @@ foreach(Tag Threads;Serial;OpenMP;Cuda;HPX;HIP;SYCL;OpenMPTarget) set(dir ${CMAKE_CURRENT_BINARY_DIR}/${dir}) file(MAKE_DIRECTORY ${dir}) - # ------------------------- - # Sort1d,3d, Random - # ------------------------- - set(SOURCES_A) - if(Tag STREQUAL "OpenMP") - LIST(APPEND SOURCES_A - TestOpenMP_Sort1D.cpp - TestOpenMP_Sort3D.cpp - TestOpenMP_SortDynamicView.cpp - ) - endif() - + # ------------------------------------------ + # Sort + # ------------------------------------------ # Each of these inputs is an .hpp file. # Generate a .cpp file for each one that runs it on the current backend (Tag), # and add this .cpp file to the sources for UnitTest_RandomAndSort. - foreach(SOURCES_A_Input - TestRandomCommon - TestSortCommon - TestNestedSort - ) - set(file ${dir}/${SOURCES_A_Input}.cpp) + set(ALGO_SORT_SOURCES) + foreach(SOURCE_Input + TestSort + TestBinSortA + TestBinSortB + TestNestedSort + ) + set(file ${dir}/${SOURCE_Input}.cpp) # Write to a temporary intermediate file and call configure_file to avoid # updating timestamps triggering unnecessary rebuilds on subsequent cmake runs. file(WRITE ${dir}/dummy.cpp "#include \n" - "#include <${SOURCES_A_Input}.hpp>\n" + "#include <${SOURCE_Input}.hpp>\n" + ) + configure_file(${dir}/dummy.cpp ${file}) + list(APPEND ALGO_SORT_SOURCES ${file}) + endforeach() + + # ------------------------------------------ + # Random + # ------------------------------------------ + # do as above + set(ALGO_RANDOM_SOURCES) + foreach(SOURCE_Input + TestRandom + ) + set(file ${dir}/${SOURCE_Input}.cpp) + file(WRITE ${dir}/dummy.cpp + "#include \n" + "#include <${SOURCE_Input}.hpp>\n" ) configure_file(${dir}/dummy.cpp ${file}) - list(APPEND SOURCES_A ${file}) + list(APPEND ALGO_RANDOM_SOURCES ${file}) endforeach() # ------------------------------------------ @@ -145,6 +155,26 @@ foreach(Tag Threads;Serial;OpenMP;Cuda;HPX;HIP;SYCL;OpenMPTarget) endif() endforeach() +# FIXME_OPENMPTARGET This test causes internal compiler errors as of 09/01/22 +# when compiling for Intel's Xe-HP GPUs. +# FRIZZI: 04/26/2023: not sure if the compilation error is still applicable +# but we conservatively leave this guard on +if(NOT (KOKKOS_ENABLE_OPENMPTARGET AND KOKKOS_CXX_COMPILER_ID STREQUAL IntelLLVM)) + KOKKOS_ADD_EXECUTABLE_AND_TEST( + UnitTest_Sort + SOURCES + UnitTestMain.cpp + ${ALGO_SORT_SOURCES} + ) + + KOKKOS_ADD_EXECUTABLE_AND_TEST( + UnitTest_Random + SOURCES + UnitTestMain.cpp + ${ALGO_RANDOM_SOURCES} + ) +endif() + # FIXME_OPENMPTARGET These tests cause internal compiler errors as of 09/01/22 # when compiling for Intel's Xe-HP GPUs. if(KOKKOS_ENABLE_OPENMPTARGET AND KOKKOS_CXX_COMPILER_ID STREQUAL IntelLLVM) @@ -160,20 +190,9 @@ if(KOKKOS_ENABLE_OPENMPTARGET AND KOKKOS_CXX_COMPILER_ID STREQUAL IntelLLVM) ) endif() -# FIXME_OPENMPTARGET This test causes internal compiler errors as of 09/01/22 -# when compiling for Intel's Xe-HP GPUs. -if(NOT (KOKKOS_ENABLE_OPENMPTARGET AND KOKKOS_CXX_COMPILER_ID STREQUAL IntelLLVM)) - KOKKOS_ADD_EXECUTABLE_AND_TEST( - UnitTest_RandomAndSort - SOURCES - UnitTestMain.cpp - ${SOURCES_A} - ) -endif() - foreach(ID A;B;C;D;E) KOKKOS_ADD_EXECUTABLE_AND_TEST( - UnitTest_StdSet_${ID} + AlgorithmsUnitTest_StdSet_${ID} SOURCES UnitTestMain.cpp ${STDALGO_SOURCES_${ID}} @@ -184,7 +203,7 @@ endforeach() # when compiling for Intel's Xe-HP GPUs. if(NOT (KOKKOS_ENABLE_OPENMPTARGET AND KOKKOS_CXX_COMPILER_ID STREQUAL IntelLLVM)) KOKKOS_ADD_EXECUTABLE( - UnitTest_StdAlgoCompileOnly + AlgorithmsUnitTest_StdAlgoCompileOnly SOURCES TestStdAlgorithmsCompileOnly.cpp ) endif() diff --git a/algorithms/unit_tests/Makefile b/algorithms/unit_tests/Makefile index e961e7ba2c1..9e0f1d60a03 100644 --- a/algorithms/unit_tests/Makefile +++ b/algorithms/unit_tests/Makefile @@ -27,10 +27,8 @@ TARGETS = tmp := $(foreach device, $(KOKKOS_DEVICELIST), \ $(if $(filter Test$(device).cpp, $(shell ls Test$(device).cpp 2>/dev/null)),,\ - $(shell echo "\#include " > Test$(device).cpp); \ - $(shell echo "\#include " >> Test$(device).cpp); \ - $(shell echo "\#include " >> Test$(device).cpp); \ - ) \ + $(shell echo "\#include " > Test$(device).cpp); \ + ) \ ) ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) @@ -52,7 +50,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_THREADS), 1) endif ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) - OBJ_OPENMP = TestOpenMP.o TestOpenMP_Sort1D.o TestOpenMP_Sort3D.o TestOpenMP_SortDynamicView.o UnitTestMain.o gtest-all.o + OBJ_OPENMP = TestOpenMP.o UnitTestMain.o gtest-all.o TARGETS += KokkosAlgorithms_UnitTest_OpenMP TEST_TARGETS += test-openmp endif diff --git a/algorithms/unit_tests/TestBinSortA.hpp b/algorithms/unit_tests/TestBinSortA.hpp new file mode 100644 index 00000000000..46f6486cdce --- /dev/null +++ b/algorithms/unit_tests/TestBinSortA.hpp @@ -0,0 +1,280 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef KOKKOS_ALGORITHMS_UNITTESTS_TEST_BINSORTA_HPP +#define KOKKOS_ALGORITHMS_UNITTESTS_TEST_BINSORTA_HPP + +#include +#include +#include +#include +#include + +namespace Test { +namespace BinSortSetA { + +template +struct bin3d_is_sorted_struct { + using value_type = unsigned int; + using execution_space = ExecutionSpace; + + Kokkos::View keys; + + int max_bins; + Scalar min; + Scalar max; + + bin3d_is_sorted_struct(Kokkos::View keys_, + int max_bins_, Scalar min_, Scalar max_) + : keys(keys_), max_bins(max_bins_), min(min_), max(max_) {} + KOKKOS_INLINE_FUNCTION + void operator()(int i, unsigned int& count) const { + int ix1 = int((keys(i, 0) - min) / max * max_bins); + int iy1 = int((keys(i, 1) - min) / max * max_bins); + int iz1 = int((keys(i, 2) - min) / max * max_bins); + int ix2 = int((keys(i + 1, 0) - min) / max * max_bins); + int iy2 = int((keys(i + 1, 1) - min) / max * max_bins); + int iz2 = int((keys(i + 1, 2) - min) / max * max_bins); + + if (ix1 > ix2) + count++; + else if (ix1 == ix2) { + if (iy1 > iy2) + count++; + else if ((iy1 == iy2) && (iz1 > iz2)) + count++; + } + } +}; + +template +struct sum3D { + using value_type = double; + using execution_space = ExecutionSpace; + + Kokkos::View keys; + + sum3D(Kokkos::View keys_) : keys(keys_) {} + KOKKOS_INLINE_FUNCTION + void operator()(int i, double& count) const { + count += keys(i, 0); + count += keys(i, 1); + count += keys(i, 2); + } +}; + +template +void test_3D_sort_impl(unsigned int n) { + using KeyViewType = Kokkos::View; + + KeyViewType keys("Keys", n * n * n); + + Kokkos::Random_XorShift64_Pool g(1931); + Kokkos::fill_random(keys, g, 100.0); + + double sum_before = 0.0; + double sum_after = 0.0; + unsigned int sort_fails = 0; + + ExecutionSpace exec; + Kokkos::parallel_reduce( + Kokkos::RangePolicy(exec, 0, keys.extent(0)), + sum3D(keys), sum_before); + + int bin_1d = 1; + while (bin_1d * bin_1d * bin_1d * 4 < (int)keys.extent(0)) bin_1d *= 2; + int bin_max[3] = {bin_1d, bin_1d, bin_1d}; + typename KeyViewType::value_type min[3] = {0, 0, 0}; + typename KeyViewType::value_type max[3] = {100, 100, 100}; + + using BinOp = Kokkos::BinOp3D; + BinOp bin_op(bin_max, min, max); + Kokkos::BinSort Sorter(keys, bin_op, false); + Sorter.create_permute_vector(exec); + Sorter.sort(exec, keys); + + Kokkos::parallel_reduce( + Kokkos::RangePolicy(exec, 0, keys.extent(0)), + sum3D(keys), sum_after); + Kokkos::parallel_reduce( + Kokkos::RangePolicy(exec, 0, keys.extent(0) - 1), + bin3d_is_sorted_struct(keys, bin_1d, min[0], + max[0]), + sort_fails); + + double ratio = sum_before / sum_after; + double epsilon = 1e-10; + unsigned int equal_sum = + (ratio > (1.0 - epsilon)) && (ratio < (1.0 + epsilon)) ? 1 : 0; + + if (sort_fails) + printf("3D Sort Sum: %f %f Fails: %u\n", sum_before, sum_after, sort_fails); + + ASSERT_EQ(sort_fails, 0u); + ASSERT_EQ(equal_sum, 1u); +} + +template +void test_issue_1160_impl() { + Kokkos::View element_("element", 10); + Kokkos::View x_("x", 10); + Kokkos::View v_("y", 10); + + auto h_element = Kokkos::create_mirror_view(element_); + auto h_x = Kokkos::create_mirror_view(x_); + auto h_v = Kokkos::create_mirror_view(v_); + + h_element(0) = 9; + h_element(1) = 8; + h_element(2) = 7; + h_element(3) = 6; + h_element(4) = 5; + h_element(5) = 4; + h_element(6) = 3; + h_element(7) = 2; + h_element(8) = 1; + h_element(9) = 0; + + for (int i = 0; i < 10; ++i) { + h_v.access(i, 0) = h_x.access(i, 0) = double(h_element(i)); + } + ExecutionSpace exec; + Kokkos::deep_copy(exec, element_, h_element); + Kokkos::deep_copy(exec, x_, h_x); + Kokkos::deep_copy(exec, v_, h_v); + + using KeyViewType = decltype(element_); + using BinOp = Kokkos::BinOp1D; + + int begin = 3; + int end = 8; + auto max = h_element(begin); + auto min = h_element(end - 1); + BinOp binner(end - begin, min, max); + + Kokkos::BinSort Sorter(element_, begin, end, binner, + false); + Sorter.create_permute_vector(exec); + Sorter.sort(exec, element_, begin, end); + + Sorter.sort(exec, x_, begin, end); + Sorter.sort(exec, v_, begin, end); + + Kokkos::deep_copy(exec, h_element, element_); + Kokkos::deep_copy(exec, h_x, x_); + Kokkos::deep_copy(exec, h_v, v_); + exec.fence(); + + ASSERT_EQ(h_element(0), 9); + ASSERT_EQ(h_element(1), 8); + ASSERT_EQ(h_element(2), 7); + ASSERT_EQ(h_element(3), 2); + ASSERT_EQ(h_element(4), 3); + ASSERT_EQ(h_element(5), 4); + ASSERT_EQ(h_element(6), 5); + ASSERT_EQ(h_element(7), 6); + ASSERT_EQ(h_element(8), 1); + ASSERT_EQ(h_element(9), 0); + + for (int i = 0; i < 10; ++i) { + ASSERT_EQ(h_element(i), int(h_x.access(i, 0))); + ASSERT_EQ(h_element(i), int(h_v.access(i, 0))); + } +} + +template +void test_sort_integer_overflow() { + // FIXME: this test is meant to test something for BinSort, + // but actually uses the kokkos::sort API with the assumption + // that underneath it calls binsort. I don't think this is correct, + // because if the kokkos::sort API chages impl, this test is not testing + // what it meants to test... so need to change this to actually use BinSort + // directly. + + // array with two extrema in reverse order to expose integer overflow bug in + // bin calculation + T a[2] = {Kokkos::Experimental::finite_max::value, + Kokkos::Experimental::finite_min::value}; + auto vd = Kokkos::create_mirror_view_and_copy( + ExecutionSpace(), Kokkos::View(a)); + Kokkos::sort(vd); + auto vh = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), vd); + EXPECT_TRUE(std::is_sorted(vh.data(), vh.data() + 2)) + << "view (" << vh[0] << ", " << vh[1] << ") is not sorted"; +} + +} // namespace BinSortSetA + +TEST(TEST_CATEGORY, BinSortGenericTests) { + using ExecutionSpace = TEST_EXECSPACE; + using key_type = unsigned; + constexpr int N = 171; + +#if defined(KOKKOS_ENABLE_CUDA) && \ + defined(KOKKOS_COMPILER_NVHPC) // FIXME_NVHPC + if (!std::is_same_v) +#endif + BinSortSetA::test_3D_sort_impl(N); + +#if defined(KOKKOS_ENABLE_CUDA) && \ + defined(KOKKOS_COMPILER_NVHPC) // FIXME_NVHPC + if (!std::is_same_v) +#endif + BinSortSetA::test_issue_1160_impl(); + + BinSortSetA::test_sort_integer_overflow(); + BinSortSetA::test_sort_integer_overflow(); + BinSortSetA::test_sort_integer_overflow(); +} + +TEST(TEST_CATEGORY, BinSortEmptyView) { + using ExecutionSpace = TEST_EXECSPACE; + + // the bounds and extents used below are totally arbitrary + // and, in theory, should have no impact + + using KeyViewType = Kokkos::View; + KeyViewType kv("kv", 20); + + using BinOp_t = Kokkos::BinOp1D; + BinOp_t binOp(5, 0, 10); + Kokkos::BinSort Sorter(ExecutionSpace{}, kv, binOp); + + // does not matter if we use int or something else + Kokkos::View v("v", 0); + + // test all exposed public sort methods + ASSERT_NO_THROW(Sorter.sort(ExecutionSpace(), v, 0, 0)); + ASSERT_NO_THROW(Sorter.sort(v, 0, 0)); + ASSERT_NO_THROW(Sorter.sort(ExecutionSpace(), v)); + ASSERT_NO_THROW(Sorter.sort(v)); +} + +TEST(TEST_CATEGORY, BinSortEmptyKeysView) { + using ExecutionSpace = TEST_EXECSPACE; + + using KeyViewType = Kokkos::View; + KeyViewType kv("kv", 0); + + using BinOp_t = Kokkos::BinOp1D; + BinOp_t binOp(5, 0, 10); + Kokkos::BinSort Sorter(ExecutionSpace{}, kv, binOp); + + ASSERT_NO_THROW(Sorter.create_permute_vector(ExecutionSpace{})); +} + +} // namespace Test +#endif diff --git a/algorithms/unit_tests/TestBinSortB.hpp b/algorithms/unit_tests/TestBinSortB.hpp new file mode 100644 index 00000000000..0707411f59c --- /dev/null +++ b/algorithms/unit_tests/TestBinSortB.hpp @@ -0,0 +1,262 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef KOKKOS_ALGORITHMS_UNITTESTS_TEST_BINSORTB_HPP +#define KOKKOS_ALGORITHMS_UNITTESTS_TEST_BINSORTB_HPP + +#include +#include +#include +#include +#include +#include +#include +#include //needed for iota + +namespace Test { +namespace BinSortSetB { + +template +struct CopyFunctorRank2 { + ViewTypeFrom m_view_from; + ViewTypeTo m_view_to; + + CopyFunctorRank2() = delete; + + CopyFunctorRank2(const ViewTypeFrom view_from, const ViewTypeTo view_to) + : m_view_from(view_from), m_view_to(view_to) {} + + KOKKOS_INLINE_FUNCTION + void operator()(int k) const { + const auto i = k / m_view_from.extent(1); + const auto j = k % m_view_from.extent(1); + m_view_to(i, j) = m_view_from(i, j); + } +}; + +template +auto create_deep_copyable_compatible_view_with_same_extent( + Kokkos::View view) { + using view_type = Kokkos::View; + using view_value_type = typename view_type::value_type; + using view_exespace = typename view_type::execution_space; + const std::size_t ext0 = view.extent(0); + using view_deep_copyable_t = Kokkos::View; + return view_deep_copyable_t{"view_dc", ext0}; +} + +template +auto create_deep_copyable_compatible_view_with_same_extent( + Kokkos::View view) { + using view_type = Kokkos::View; + using view_value_type = typename view_type::value_type; + using view_exespace = typename view_type::execution_space; + using view_deep_copyable_t = Kokkos::View; + const std::size_t ext0 = view.extent(0); + const std::size_t ext1 = view.extent(1); + return view_deep_copyable_t{"view_dc", ext0, ext1}; +} + +template +auto create_deep_copyable_compatible_clone(ViewType view) { + static_assert(ViewType::rank <= 2); + + auto view_dc = create_deep_copyable_compatible_view_with_same_extent(view); + using view_dc_t = decltype(view_dc); + if constexpr (ViewType::rank == 1) { + Test::stdalgos::CopyFunctor F1(view, view_dc); + Kokkos::parallel_for("copy", view.extent(0), F1); + } else { + static_assert(ViewType::rank == 2, "Only rank 1 or 2 supported."); + CopyFunctorRank2 F1(view, view_dc); + Kokkos::parallel_for("copy", view.extent(0) * view.extent(1), F1); + } + return view_dc; +} + +template +auto create_host_space_copy(ViewType view) { + auto view_dc = create_deep_copyable_compatible_clone(view); + return create_mirror_view_and_copy(Kokkos::HostSpace(), view_dc); +} + +template +auto create_rank1_dev_and_host_views_of_keys(const ExecutionSpace& exec, + int N) { + namespace KE = Kokkos::Experimental; + Kokkos::DefaultHostExecutionSpace defaultHostExeSpace; + + using KeyViewType = Kokkos::View; + KeyViewType keys("keys", N); + auto keys_h = Kokkos::create_mirror_view(keys); + std::iota(KE::begin(keys_h), KE::end(keys_h), KeyType(0)); + KE::reverse(defaultHostExeSpace, keys_h); + // keys now is = [N-1,N-2,...,2,1,0], shuffle it for avoid trivial case + std::random_device rd; + std::mt19937 g(rd()); + std::shuffle(KE::begin(keys_h), KE::end(keys_h), g); + Kokkos::deep_copy(exec, keys, keys_h); + + return std::make_pair(keys, keys_h); +} + +template = 0> +auto create_strided_view(std::size_t numRows, std::size_t /*numCols*/) { + Kokkos::LayoutStride layout{numRows, 2}; + using v_t = Kokkos::View; + v_t v("v", layout); + return v; +} + +template = 0> +auto create_strided_view(std::size_t numRows, std::size_t numCols) { + Kokkos::LayoutStride layout{numRows, 2, numCols, numRows * 2}; + using v_t = Kokkos::View; + v_t v("v", layout); + return v; +} + +template +void test_on_view_with_stride(std::size_t numRows, std::size_t indB, + std::size_t indE, std::size_t numCols = 1) { + ExecutionSpace exec; + Kokkos::DefaultHostExecutionSpace defaultHostExeSpace; + namespace KE = Kokkos::Experimental; + + // 1. generate 1D view of keys + auto [keys, keys_h] = + create_rank1_dev_and_host_views_of_keys(exec, numRows); + using KeyViewType = decltype(keys); + + // need this map key->row to use later for checking + std::unordered_map keyToRowBeforeSort; + for (std::size_t i = 0; i < numRows; ++i) { + keyToRowBeforeSort[keys_h(i)] = i; + } + + // 2. create binOp + using BinOp = Kokkos::BinOp1D; + auto itB = KE::cbegin(keys_h) + indB; + auto itE = itB + indE - indB; + auto it = KE::minmax_element(defaultHostExeSpace, itB, itE); + // seems like the behavior is odd when we use # buckets = # keys + // so use +5 for using more buckets than keys. + // This is something to investigate. + BinOp binner(indE - indB + 5, *it.first, *it.second); + + // 3. create sorter + Kokkos::BinSort sorter(keys, indB, indE, binner, false); + sorter.create_permute_vector(exec); + sorter.sort(exec, keys, indB, indE); + Kokkos::deep_copy(exec, keys_h, keys); + + auto v = create_strided_view( + numRows, numCols); + + Kokkos::Random_XorShift64_Pool pool(73931); + Kokkos::fill_random(v, pool, ValueType(545)); + auto v_before_sort_h = create_host_space_copy(v); + sorter.sort(exec, v, indB, indE); + auto v_after_sort_h = create_host_space_copy(v); + + for (size_t i = 0; i < v.extent(0); ++i) { + // if i within [indB,indE), the sorting was done + // so we need to do proper checking since rows have changed + if (i >= size_t(indB) && i < size_t(indE)) { + const KeyType key = keys_h(i); + if constexpr (ValuesViewRank == 1) { + ASSERT_TRUE(v_before_sort_h(keyToRowBeforeSort.at(key)) == + v_after_sort_h(i)); + } else { + for (size_t j = 0; j < v.extent(1); ++j) { + ASSERT_TRUE(v_before_sort_h(keyToRowBeforeSort.at(key), j) == + v_after_sort_h(i, j)); + } + } + } + // outside the target bounds, then the i-th row remains unchanged + else { + if constexpr (ValuesViewRank == 1) { + ASSERT_TRUE(v_before_sort_h(i) == v_after_sort_h(i)); + } else { + for (size_t j = 0; j < v.extent(1); ++j) { + ASSERT_TRUE(v_before_sort_h(i, j) == v_after_sort_h(i, j)); + } + } + } + } +} + +template +void run_for_rank1() { + constexpr int rank = 1; + + // trivial case + test_on_view_with_stride(1, 0, 1); + + // nontrivial cases + for (std::size_t N : {311, 710017}) { + // various cases for bounds + test_on_view_with_stride(N, 0, N); + test_on_view_with_stride(N, 3, N); + test_on_view_with_stride(N, 0, + N - 4); + test_on_view_with_stride(N, 4, + N - 3); + } +} + +template +void run_for_rank2() { + constexpr int rank = 2; + + // trivial case + test_on_view_with_stride(1, 0, 1, + 1); + + // nontrivial cases + for (std::size_t Nr : {11, 1157, 710017}) { + for (std::size_t Nc : {3, 51}) { + // various cases for bounds + test_on_view_with_stride( + Nr, 0, Nr, Nc); + test_on_view_with_stride( + Nr, 3, Nr, Nc); + test_on_view_with_stride( + Nr, 0, Nr - 4, Nc); + test_on_view_with_stride( + Nr, 4, Nr - 3, Nc); + } + } +} + +} // namespace BinSortSetB + +TEST(TEST_CATEGORY, BinSortUnsignedKeyLayoutStrideValues) { + using ExeSpace = TEST_EXECSPACE; + using key_type = unsigned; + BinSortSetB::run_for_rank1(); + BinSortSetB::run_for_rank1(); + + BinSortSetB::run_for_rank2(); + BinSortSetB::run_for_rank2(); +} + +} // namespace Test +#endif diff --git a/algorithms/unit_tests/TestNestedSort.hpp b/algorithms/unit_tests/TestNestedSort.hpp index 37ee211b42a..1b7a3f48fc5 100644 --- a/algorithms/unit_tests/TestNestedSort.hpp +++ b/algorithms/unit_tests/TestNestedSort.hpp @@ -17,14 +17,14 @@ #ifndef KOKKOS_ALGORITHMS_UNITTESTS_TEST_NESTED_SORT_HPP #define KOKKOS_ALGORITHMS_UNITTESTS_TEST_NESTED_SORT_HPP +#include #include #include #include #include namespace Test { - -namespace Impl { +namespace NestedSortImpl { // Comparator for sorting in descending order template @@ -383,24 +383,28 @@ void test_nested_sort_by_key(unsigned int N, KeyType minKey, KeyType maxKey, test_nested_sort_by_key_impl( N, N, false, true, minKey, maxKey, minVal, maxVal); } -} // namespace Impl +} // namespace NestedSortImpl TEST(TEST_CATEGORY, NestedSort) { - Impl::test_nested_sort(171, 0U, UINT_MAX); - Impl::test_nested_sort(42, -1e6f, 1e6f); - Impl::test_nested_sort(67, CHAR_MIN, CHAR_MAX); + using ExecutionSpace = TEST_EXECSPACE; + NestedSortImpl::test_nested_sort(171, 0U, UINT_MAX); + NestedSortImpl::test_nested_sort(42, -1e6f, 1e6f); + NestedSortImpl::test_nested_sort(67, CHAR_MIN, + CHAR_MAX); } TEST(TEST_CATEGORY, NestedSortByKey) { + using ExecutionSpace = TEST_EXECSPACE; + // Second/third template arguments are key and value respectively. // In sort_by_key_X functions, a key view and a value view are both permuted // to make the keys sorted. This means that the value type doesn't need to be // ordered, unlike key - Impl::test_nested_sort_by_key( + NestedSortImpl::test_nested_sort_by_key( 161, 0U, UINT_MAX, 0U, UINT_MAX); - Impl::test_nested_sort_by_key( + NestedSortImpl::test_nested_sort_by_key( 267, -1e6f, 1e6f, CHAR_MIN, CHAR_MAX); - Impl::test_nested_sort_by_key( + NestedSortImpl::test_nested_sort_by_key( 11, CHAR_MIN, CHAR_MAX, 2.718, 3.14); } diff --git a/algorithms/unit_tests/TestOpenMP_Sort1D.cpp b/algorithms/unit_tests/TestOpenMP_Sort1D.cpp deleted file mode 100644 index e06486618f3..00000000000 --- a/algorithms/unit_tests/TestOpenMP_Sort1D.cpp +++ /dev/null @@ -1,39 +0,0 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER - -#include -#ifdef KOKKOS_ENABLE_OPENMP - -#include -#include - -//---------------------------------------------------------------------------- -#include -#include -#include - -namespace Test { - -TEST(openmp, SortUnsigned1D) { - Impl::test_1D_sort(171); -} - -TEST(openmp, SortIssue1160) { Impl::test_issue_1160_sort(); } - -} // namespace Test -#else -void KOKKOS_ALGORITHMS_UNITTESTS_TESTOPENMP_PREVENT_LINK_ERROR() {} -#endif diff --git a/algorithms/unit_tests/TestOpenMP_SortDynamicView.cpp b/algorithms/unit_tests/TestOpenMP_SortDynamicView.cpp deleted file mode 100644 index 549d09f1f24..00000000000 --- a/algorithms/unit_tests/TestOpenMP_SortDynamicView.cpp +++ /dev/null @@ -1,37 +0,0 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER - -#include -#ifdef KOKKOS_ENABLE_OPENMP - -#include -#include - -//---------------------------------------------------------------------------- -#include -#include -#include - -namespace Test { - -TEST(openmp, SortUnsignedDynamicView) { - Impl::test_dynamic_view_sort(171); -} - -} // namespace Test -#else -void KOKKOS_ALGORITHMS_UNITTESTS_TESTOPENMP_PREVENT_LINK_ERROR() {} -#endif diff --git a/algorithms/unit_tests/TestRandom.hpp b/algorithms/unit_tests/TestRandom.hpp index 607e94c7845..e9dc3327a6c 100644 --- a/algorithms/unit_tests/TestRandom.hpp +++ b/algorithms/unit_tests/TestRandom.hpp @@ -14,8 +14,8 @@ // //@HEADER -#ifndef KOKKOS_TEST_DUALVIEW_HPP -#define KOKKOS_TEST_DUALVIEW_HPP +#ifndef KOKKOS_ALGORITHMS_UNITTESTS_TEST_RANDOM_HPP +#define KOKKOS_ALGORITHMS_UNITTESTS_TEST_RANDOM_HPP #include #include @@ -29,8 +29,7 @@ #include namespace Test { - -namespace Impl { +namespace AlgoRandomImpl { // This test runs the random number generators and uses some statistic tests to // check the 'goodness' of the random numbers: @@ -469,42 +468,46 @@ struct TestDynRankView { ASSERT_LE(val.max_val, max); } }; -} // namespace Impl -template -void test_random_xorshift64() { +} // namespace AlgoRandomImpl + +TEST(TEST_CATEGORY, Random_XorShift64) { + using ExecutionSpace = TEST_EXECSPACE; + #if defined(KOKKOS_ENABLE_SYCL) || defined(KOKKOS_ENABLE_CUDA) || \ defined(KOKKOS_ENABLE_HIP) const int num_draws = 132141141; #else // SERIAL, HPX, OPENMP const int num_draws = 10240000; #endif - Impl::test_random>(num_draws); - Impl::test_random>( + num_draws); + AlgoRandomImpl::test_random>>( num_draws); - Impl::TestDynRankView>(10000) + AlgoRandomImpl::TestDynRankView< + ExecutionSpace, Kokkos::Random_XorShift64_Pool>(10000) .run(); } -template -void test_random_xorshift1024() { +TEST(TEST_CATEGORY, Random_XorShift1024_0) { + using ExecutionSpace = TEST_EXECSPACE; + #if defined(KOKKOS_ENABLE_SYCL) || defined(KOKKOS_ENABLE_CUDA) || \ defined(KOKKOS_ENABLE_HIP) const int num_draws = 52428813; #else // SERIAL, HPX, OPENMP const int num_draws = 10130144; #endif - Impl::test_random>( + AlgoRandomImpl::test_random>( num_draws); - Impl::test_random>>( num_draws); - Impl::TestDynRankView>(10000) + AlgoRandomImpl::TestDynRankView< + ExecutionSpace, Kokkos::Random_XorShift1024_Pool>(10000) .run(); } -} // namespace Test -#endif // KOKKOS_TEST_UNORDERED_MAP_HPP +} // namespace Test +#endif diff --git a/algorithms/unit_tests/TestRandomAccessIterator.cpp b/algorithms/unit_tests/TestRandomAccessIterator.cpp index 439d171c8ae..fd3a875b1e3 100644 --- a/algorithms/unit_tests/TestRandomAccessIterator.cpp +++ b/algorithms/unit_tests/TestRandomAccessIterator.cpp @@ -54,7 +54,7 @@ void test_random_access_it_verify(IteratorType it, ValueType gold_value) { Kokkos::parallel_for("_std_algo_copy", 1, cf); auto v_h = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), checkView); - EXPECT_EQ(v_h(), gold_value); + ASSERT_EQ(v_h(), gold_value); } TEST_F(random_access_iterator_test, dereference) { @@ -96,9 +96,9 @@ void test_random_access_it_subscript_op_verify(IteratorType it) { auto v_h = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), checkView); - EXPECT_EQ(v_h(0), (value_t)0); - EXPECT_EQ(v_h(1), (value_t)1); - EXPECT_EQ(v_h(2), (value_t)2); + ASSERT_EQ(v_h(0), (value_t)0); + ASSERT_EQ(v_h(1), (value_t)1); + ASSERT_EQ(v_h(2), (value_t)2); } TEST_F(random_access_iterator_test, subscript_operator) { @@ -188,9 +188,9 @@ TEST_F(random_access_iterator_test, operatorsSet4) { auto it7 = KE::Impl::RandomAccessIterator(m_static_view, 3); auto it8 = KE::Impl::RandomAccessIterator(m_dynamic_view, 3); auto it9 = KE::Impl::RandomAccessIterator(m_strided_view, 3); - EXPECT_EQ(it1, it7); - EXPECT_EQ(it2, it8); - EXPECT_EQ(it3, it9); + ASSERT_EQ(it1, it7); + ASSERT_EQ(it2, it8); + ASSERT_EQ(it3, it9); EXPECT_GE(it1, it7); EXPECT_GE(it2, it8); EXPECT_GE(it3, it9); @@ -205,16 +205,16 @@ TEST_F(random_access_iterator_test, assignment_operator) { EXPECT_NE(it1, it2); it2 = it1; - EXPECT_EQ(it1, it2); + ASSERT_EQ(it1, it2); } TEST_F(random_access_iterator_test, distance) { auto first = KE::begin(m_dynamic_view); auto last = KE::end(m_dynamic_view); - EXPECT_EQ(0, KE::distance(first, first)); - EXPECT_EQ(1, KE::distance(first, first + 1)); - EXPECT_EQ(m_dynamic_view.extent(0), size_t(KE::distance(first, last))); + ASSERT_EQ(0, KE::distance(first, first)); + ASSERT_EQ(1, KE::distance(first, first + 1)); + ASSERT_EQ(m_dynamic_view.extent(0), size_t(KE::distance(first, last))); } } // namespace stdalgos diff --git a/algorithms/unit_tests/TestSort.hpp b/algorithms/unit_tests/TestSort.hpp index d903888878c..968fb8950b7 100644 --- a/algorithms/unit_tests/TestSort.hpp +++ b/algorithms/unit_tests/TestSort.hpp @@ -14,8 +14,8 @@ // //@HEADER -#ifndef KOKKOS_ALGORITHMS_UNITTESTS_TESTSORT_HPP -#define KOKKOS_ALGORITHMS_UNITTESTS_TESTSORT_HPP +#ifndef KOKKOS_ALGORITHMS_UNITTESTS_TEST_SORT_HPP +#define KOKKOS_ALGORITHMS_UNITTESTS_TEST_SORT_HPP #include #include @@ -24,8 +24,7 @@ #include namespace Test { - -namespace Impl { +namespace SortImpl { template struct is_sorted_struct { @@ -53,56 +52,6 @@ struct sum { void operator()(int i, double& count) const { count += keys(i); } }; -template -struct bin3d_is_sorted_struct { - using value_type = unsigned int; - using execution_space = ExecutionSpace; - - Kokkos::View keys; - - int max_bins; - Scalar min; - Scalar max; - - bin3d_is_sorted_struct(Kokkos::View keys_, - int max_bins_, Scalar min_, Scalar max_) - : keys(keys_), max_bins(max_bins_), min(min_), max(max_) {} - KOKKOS_INLINE_FUNCTION - void operator()(int i, unsigned int& count) const { - int ix1 = int((keys(i, 0) - min) / max * max_bins); - int iy1 = int((keys(i, 1) - min) / max * max_bins); - int iz1 = int((keys(i, 2) - min) / max * max_bins); - int ix2 = int((keys(i + 1, 0) - min) / max * max_bins); - int iy2 = int((keys(i + 1, 1) - min) / max * max_bins); - int iz2 = int((keys(i + 1, 2) - min) / max * max_bins); - - if (ix1 > ix2) - count++; - else if (ix1 == ix2) { - if (iy1 > iy2) - count++; - else if ((iy1 == iy2) && (iz1 > iz2)) - count++; - } - } -}; - -template -struct sum3D { - using value_type = double; - using execution_space = ExecutionSpace; - - Kokkos::View keys; - - sum3D(Kokkos::View keys_) : keys(keys_) {} - KOKKOS_INLINE_FUNCTION - void operator()(int i, double& count) const { - count += keys(i, 0); - count += keys(i, 1); - count += keys(i, 2); - } -}; - template void test_1D_sort_impl(unsigned int n) { using KeyViewType = Kokkos::View; @@ -142,57 +91,6 @@ void test_1D_sort_impl(unsigned int n) { ASSERT_EQ(equal_sum, 1u); } -template -void test_3D_sort_impl(unsigned int n) { - using KeyViewType = Kokkos::View; - - KeyViewType keys("Keys", n * n * n); - - Kokkos::Random_XorShift64_Pool g(1931); - Kokkos::fill_random(keys, g, 100.0); - - double sum_before = 0.0; - double sum_after = 0.0; - unsigned int sort_fails = 0; - - ExecutionSpace exec; - Kokkos::parallel_reduce( - Kokkos::RangePolicy(exec, 0, keys.extent(0)), - sum3D(keys), sum_before); - - int bin_1d = 1; - while (bin_1d * bin_1d * bin_1d * 4 < (int)keys.extent(0)) bin_1d *= 2; - int bin_max[3] = {bin_1d, bin_1d, bin_1d}; - typename KeyViewType::value_type min[3] = {0, 0, 0}; - typename KeyViewType::value_type max[3] = {100, 100, 100}; - - using BinOp = Kokkos::BinOp3D; - BinOp bin_op(bin_max, min, max); - Kokkos::BinSort Sorter(keys, bin_op, false); - Sorter.create_permute_vector(exec); - Sorter.sort(exec, keys); - - Kokkos::parallel_reduce( - Kokkos::RangePolicy(exec, 0, keys.extent(0)), - sum3D(keys), sum_after); - Kokkos::parallel_reduce( - Kokkos::RangePolicy(exec, 0, keys.extent(0) - 1), - bin3d_is_sorted_struct(keys, bin_1d, min[0], - max[0]), - sort_fails); - - double ratio = sum_before / sum_after; - double epsilon = 1e-10; - unsigned int equal_sum = - (ratio > (1.0 - epsilon)) && (ratio < (1.0 + epsilon)) ? 1 : 0; - - if (sort_fails) - printf("3D Sort Sum: %f %f Fails: %u\n", sum_before, sum_after, sort_fails); - - ASSERT_EQ(sort_fails, 0u); - ASSERT_EQ(equal_sum, 1u); -} - //---------------------------------------------------------------------------- template @@ -259,74 +157,6 @@ void test_dynamic_view_sort_impl(unsigned int n) { //---------------------------------------------------------------------------- -template -void test_issue_1160_impl() { - Kokkos::View element_("element", 10); - Kokkos::View x_("x", 10); - Kokkos::View v_("y", 10); - - auto h_element = Kokkos::create_mirror_view(element_); - auto h_x = Kokkos::create_mirror_view(x_); - auto h_v = Kokkos::create_mirror_view(v_); - - h_element(0) = 9; - h_element(1) = 8; - h_element(2) = 7; - h_element(3) = 6; - h_element(4) = 5; - h_element(5) = 4; - h_element(6) = 3; - h_element(7) = 2; - h_element(8) = 1; - h_element(9) = 0; - - for (int i = 0; i < 10; ++i) { - h_v.access(i, 0) = h_x.access(i, 0) = double(h_element(i)); - } - ExecutionSpace exec; - Kokkos::deep_copy(exec, element_, h_element); - Kokkos::deep_copy(exec, x_, h_x); - Kokkos::deep_copy(exec, v_, h_v); - - using KeyViewType = decltype(element_); - using BinOp = Kokkos::BinOp1D; - - int begin = 3; - int end = 8; - auto max = h_element(begin); - auto min = h_element(end - 1); - BinOp binner(end - begin, min, max); - - Kokkos::BinSort Sorter(element_, begin, end, binner, - false); - Sorter.create_permute_vector(exec); - Sorter.sort(exec, element_, begin, end); - - Sorter.sort(exec, x_, begin, end); - Sorter.sort(exec, v_, begin, end); - - Kokkos::deep_copy(exec, h_element, element_); - Kokkos::deep_copy(exec, h_x, x_); - Kokkos::deep_copy(exec, h_v, v_); - exec.fence(); - - ASSERT_EQ(h_element(0), 9); - ASSERT_EQ(h_element(1), 8); - ASSERT_EQ(h_element(2), 7); - ASSERT_EQ(h_element(3), 2); - ASSERT_EQ(h_element(4), 3); - ASSERT_EQ(h_element(5), 4); - ASSERT_EQ(h_element(6), 5); - ASSERT_EQ(h_element(7), 6); - ASSERT_EQ(h_element(8), 1); - ASSERT_EQ(h_element(9), 0); - - for (int i = 0; i < 10; ++i) { - ASSERT_EQ(h_element(i), int(h_x.access(i, 0))); - ASSERT_EQ(h_element(i), int(h_v.access(i, 0))); - } -} - template void test_issue_4978_impl() { Kokkos::View element_("element", 9); @@ -376,55 +206,33 @@ void test_sort_integer_overflow() { << "view (" << vh[0] << ", " << vh[1] << ") is not sorted"; } -//---------------------------------------------------------------------------- +} // namespace SortImpl -template -void test_1D_sort(unsigned int N) { - test_1D_sort_impl(N * N * N); -} +TEST(TEST_CATEGORY, SortUnsignedValueType) { + using ExecutionSpace = TEST_EXECSPACE; + using key_type = unsigned; + constexpr int N = 171; -template -void test_3D_sort(unsigned int N) { - test_3D_sort_impl(N); -} + SortImpl::test_1D_sort_impl(N * N * N); -template -void test_dynamic_view_sort(unsigned int N) { - test_dynamic_view_sort_impl(N * N); -} +#ifndef KOKKOS_ENABLE_OPENMPTARGET + // FIXME_OPENMPTARGET: OpenMPTarget doesn't support DynamicView yet. + SortImpl::test_dynamic_view_sort_impl(N * N); +#endif -template -void test_issue_1160_sort() { - test_issue_1160_impl(); + SortImpl::test_issue_4978_impl(); } -template -void test_issue_4978_sort() { - test_issue_4978_impl(); -} +TEST(TEST_CATEGORY, SortEmptyView) { + using ExecutionSpace = TEST_EXECSPACE; -template -void test_sort(unsigned int N) { - test_1D_sort(N); -#if defined(KOKKOS_ENABLE_CUDA) && \ - defined(KOKKOS_COMPILER_NVHPC) // FIXME_NVHPC - if (!std::is_same_v) -#endif - test_3D_sort(N); -// FIXME_OPENMPTARGET: OpenMPTarget doesn't support DynamicView yet. -#ifndef KOKKOS_ENABLE_OPENMPTARGET - test_dynamic_view_sort(N); -#endif -#if defined(KOKKOS_ENABLE_CUDA) && \ - defined(KOKKOS_COMPILER_NVHPC) // FIXME_NVHPC - if (!std::is_same_v) -#endif - test_issue_1160_sort(); - test_issue_4978_sort(); - test_sort_integer_overflow(); - test_sort_integer_overflow(); - test_sort_integer_overflow(); + // does not matter if we use int or something else + Kokkos::View v("v", 0); + + // TODO check the synchronous behavior of the calls below + ASSERT_NO_THROW(Kokkos::sort(ExecutionSpace(), v)); + ASSERT_NO_THROW(Kokkos::sort(v)); } -} // namespace Impl + } // namespace Test -#endif /* KOKKOS_ALGORITHMS_UNITTESTS_TESTSORT_HPP */ +#endif diff --git a/algorithms/unit_tests/TestStdAlgorithmsAdjacentDifference.cpp b/algorithms/unit_tests/TestStdAlgorithmsAdjacentDifference.cpp index d414d524b61..75ad533f6ee 100644 --- a/algorithms/unit_tests/TestStdAlgorithmsAdjacentDifference.cpp +++ b/algorithms/unit_tests/TestStdAlgorithmsAdjacentDifference.cpp @@ -157,7 +157,7 @@ void verify_data(TestViewType test_view, GoldViewType gold) { const auto gold_h = create_mirror_view_and_copy(Kokkos::HostSpace(), gold); for (std::size_t i = 0; i < test_view.extent(0); ++i) { - EXPECT_EQ(gold_h(i), test_view_dc_h(i)); + ASSERT_EQ(gold_h(i), test_view_dc_h(i)); } } @@ -197,7 +197,7 @@ void run_single_scenario(const InfoType& scenario_info, auto res1 = KE::adjacent_difference(exespace(), KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), args...); - EXPECT_EQ(res1, KE::end(view_dest)); + ASSERT_EQ(res1, KE::end(view_dest)); verify_data(view_dest, gold); } @@ -207,7 +207,7 @@ void run_single_scenario(const InfoType& scenario_info, auto res2 = KE::adjacent_difference( "label", exespace(), KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), args...); - EXPECT_EQ(res2, KE::end(view_dest)); + ASSERT_EQ(res2, KE::end(view_dest)); verify_data(view_dest, gold); } @@ -216,7 +216,7 @@ void run_single_scenario(const InfoType& scenario_info, create_view(Tag{}, view_ext, "adj_diff_dest_view"); auto res3 = KE::adjacent_difference(exespace(), view_from, view_dest, args...); - EXPECT_EQ(res3, KE::end(view_dest)); + ASSERT_EQ(res3, KE::end(view_dest)); verify_data(view_dest, gold); } @@ -225,7 +225,7 @@ void run_single_scenario(const InfoType& scenario_info, create_view(Tag{}, view_ext, "adj_diff_dest_view"); auto res4 = KE::adjacent_difference("label", exespace(), view_from, view_dest, args...); - EXPECT_EQ(res4, KE::end(view_dest)); + ASSERT_EQ(res4, KE::end(view_dest)); verify_data(view_dest, gold); } diff --git a/algorithms/unit_tests/TestStdAlgorithmsAdjacentFind.cpp b/algorithms/unit_tests/TestStdAlgorithmsAdjacentFind.cpp index ee347612650..fa4ff48dbef 100644 --- a/algorithms/unit_tests/TestStdAlgorithmsAdjacentFind.cpp +++ b/algorithms/unit_tests/TestStdAlgorithmsAdjacentFind.cpp @@ -229,7 +229,7 @@ void verify(DiffType my_diff, ViewType view, Args... args) { my_std_adjacent_find(KE::cbegin(view_h), KE::cend(view_h), args...); const auto std_diff = std_r - KE::cbegin(view_h); - EXPECT_EQ(my_diff, std_diff); + ASSERT_EQ(my_diff, std_diff); } template @@ -287,12 +287,6 @@ void run_all_scenarios() { } TEST(std_algorithms_nonmod_seq_ops, adjacent_find) { -#if defined(KOKKOS_ENABLE_CUDA) && \ - defined(KOKKOS_COMPILER_NVHPC) // FIXME_NVHPC - if constexpr (std::is_same_v) { - GTEST_SKIP() << "FIXME wrong result"; - } -#endif run_all_scenarios(); run_all_scenarios(); run_all_scenarios(); diff --git a/algorithms/unit_tests/TestStdAlgorithmsAllAnyNoneOf.cpp b/algorithms/unit_tests/TestStdAlgorithmsAllAnyNoneOf.cpp index 1c39a4735e6..cccc0f6c18b 100644 --- a/algorithms/unit_tests/TestStdAlgorithmsAllAnyNoneOf.cpp +++ b/algorithms/unit_tests/TestStdAlgorithmsAllAnyNoneOf.cpp @@ -147,12 +147,6 @@ void run_all_scenarios() { } TEST(std_algorithms_all_any_none_of_test, test) { -#if defined(KOKKOS_ENABLE_CUDA) && \ - defined(KOKKOS_COMPILER_NVHPC) // FIXME_NVHPC - if constexpr (std::is_same_v) { - GTEST_SKIP() << "FIXME wrong result"; - } -#endif run_all_scenarios(); run_all_scenarios(); run_all_scenarios(); diff --git a/algorithms/unit_tests/TestStdAlgorithmsCommon.hpp b/algorithms/unit_tests/TestStdAlgorithmsCommon.hpp index 694676a878a..5b30b9eda7c 100644 --- a/algorithms/unit_tests/TestStdAlgorithmsCommon.hpp +++ b/algorithms/unit_tests/TestStdAlgorithmsCommon.hpp @@ -110,7 +110,7 @@ verify_values(ValueType expected, const ViewType view) { "Non-matching value types of view and reference value"); auto view_h = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), view); for (std::size_t i = 0; i < view_h.extent(0); i++) { - EXPECT_EQ(expected, view_h(i)); + ASSERT_EQ(expected, view_h(i)); } } @@ -130,7 +130,7 @@ verify_values(ValueType expected, const ViewType view) { auto view_h = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), tmpView); for (std::size_t i = 0; i < view_h.extent(0); i++) { - EXPECT_EQ(expected, view_h(i)); + ASSERT_EQ(expected, view_h(i)); } } @@ -147,7 +147,7 @@ compare_views(ViewType1 expected, const ViewType2 actual) { Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), actual); for (std::size_t i = 0; i < expected_h.extent(0); i++) { - EXPECT_EQ(expected_h(i), actual_h(i)); + ASSERT_EQ(expected_h(i), actual_h(i)); } } @@ -171,7 +171,7 @@ compare_views(ViewType1 expected, const ViewType2 actual) { Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), expected); for (std::size_t i = 0; i < expected_h.extent(0); i++) { - EXPECT_EQ(expected_h(i), actual_h(i)); + ASSERT_EQ(expected_h(i), actual_h(i)); } } diff --git a/algorithms/unit_tests/TestStdAlgorithmsConstraints.cpp b/algorithms/unit_tests/TestStdAlgorithmsConstraints.cpp index 5d551998012..386d533f7a8 100644 --- a/algorithms/unit_tests/TestStdAlgorithmsConstraints.cpp +++ b/algorithms/unit_tests/TestStdAlgorithmsConstraints.cpp @@ -42,8 +42,8 @@ TEST(std_algorithms, is_admissible_to_std_algorithms) { using strided_view_1d_t = Kokkos::View; Kokkos::LayoutStride layout1d{extent0, 2}; strided_view_1d_t strided_view_1d{"std-algo-test-1d-strided-view", layout1d}; - EXPECT_EQ(layout1d.dimension[0], 13u); - EXPECT_EQ(layout1d.stride[0], 2u); + ASSERT_EQ(layout1d.dimension[0], 13u); + ASSERT_EQ(layout1d.stride[0], 2u); // they are admissible KE::Impl::static_assert_is_admissible_to_kokkos_std_algorithms( static_view_1d); diff --git a/algorithms/unit_tests/TestStdAlgorithmsCopyIf.cpp b/algorithms/unit_tests/TestStdAlgorithmsCopyIf.cpp index e21d50f69b9..5778e37be04 100644 --- a/algorithms/unit_tests/TestStdAlgorithmsCopyIf.cpp +++ b/algorithms/unit_tests/TestStdAlgorithmsCopyIf.cpp @@ -135,49 +135,49 @@ void verify_data(const std::string& name, ViewTypeFrom view_from, } else if (name == "one-element-a") { - EXPECT_EQ(view_test_h(0), static_cast(0)); + ASSERT_EQ(view_test_h(0), static_cast(0)); } else if (name == "one-element-b") { - EXPECT_EQ(view_test_h(0), static_cast(2)); + ASSERT_EQ(view_test_h(0), static_cast(2)); } else if (name == "two-elements-a") { - EXPECT_EQ(view_test_h(0), static_cast(2)); - EXPECT_EQ(view_test_h(1), static_cast(0)); + ASSERT_EQ(view_test_h(0), static_cast(2)); + ASSERT_EQ(view_test_h(1), static_cast(0)); } else if (name == "two-elements-b") { - EXPECT_EQ(view_test_h(0), static_cast(2)); - EXPECT_EQ(view_test_h(1), static_cast(0)); + ASSERT_EQ(view_test_h(0), static_cast(2)); + ASSERT_EQ(view_test_h(1), static_cast(0)); } else if (name == "small-a") { - EXPECT_EQ(view_test_h(0), static_cast(-4)); - EXPECT_EQ(view_test_h(1), static_cast(-2)); - EXPECT_EQ(view_test_h(2), static_cast(0)); - EXPECT_EQ(view_test_h(3), static_cast(2)); - EXPECT_EQ(view_test_h(4), static_cast(4)); - EXPECT_EQ(view_test_h(5), static_cast(0)); - EXPECT_EQ(view_test_h(6), static_cast(0)); - EXPECT_EQ(view_test_h(7), static_cast(0)); - EXPECT_EQ(view_test_h(8), static_cast(0)); + ASSERT_EQ(view_test_h(0), static_cast(-4)); + ASSERT_EQ(view_test_h(1), static_cast(-2)); + ASSERT_EQ(view_test_h(2), static_cast(0)); + ASSERT_EQ(view_test_h(3), static_cast(2)); + ASSERT_EQ(view_test_h(4), static_cast(4)); + ASSERT_EQ(view_test_h(5), static_cast(0)); + ASSERT_EQ(view_test_h(6), static_cast(0)); + ASSERT_EQ(view_test_h(7), static_cast(0)); + ASSERT_EQ(view_test_h(8), static_cast(0)); } else if (name == "small-b") { - EXPECT_EQ(view_test_h(0), static_cast(22)); - EXPECT_EQ(view_test_h(1), static_cast(-12)); - EXPECT_EQ(view_test_h(2), static_cast(22)); - EXPECT_EQ(view_test_h(3), static_cast(-12)); - EXPECT_EQ(view_test_h(4), static_cast(22)); - EXPECT_EQ(view_test_h(5), static_cast(-12)); - EXPECT_EQ(view_test_h(6), static_cast(22)); - EXPECT_EQ(view_test_h(7), static_cast(-12)); - EXPECT_EQ(view_test_h(8), static_cast(22)); - EXPECT_EQ(view_test_h(9), static_cast(-12)); - EXPECT_EQ(view_test_h(10), static_cast(22)); - EXPECT_EQ(view_test_h(11), static_cast(-12)); - EXPECT_EQ(view_test_h(12), static_cast(22)); + ASSERT_EQ(view_test_h(0), static_cast(22)); + ASSERT_EQ(view_test_h(1), static_cast(-12)); + ASSERT_EQ(view_test_h(2), static_cast(22)); + ASSERT_EQ(view_test_h(3), static_cast(-12)); + ASSERT_EQ(view_test_h(4), static_cast(22)); + ASSERT_EQ(view_test_h(5), static_cast(-12)); + ASSERT_EQ(view_test_h(6), static_cast(22)); + ASSERT_EQ(view_test_h(7), static_cast(-12)); + ASSERT_EQ(view_test_h(8), static_cast(22)); + ASSERT_EQ(view_test_h(9), static_cast(-12)); + ASSERT_EQ(view_test_h(10), static_cast(22)); + ASSERT_EQ(view_test_h(11), static_cast(-12)); + ASSERT_EQ(view_test_h(12), static_cast(22)); } else if (name == "medium" || name == "large") { @@ -190,14 +190,14 @@ void verify_data(const std::string& name, ViewTypeFrom view_from, std::size_t count = 0; for (std::size_t i = 0; i < view_from_h.extent(0); ++i) { if (pred(view_from_h(i))) { - EXPECT_EQ(view_test_h(count), view_from_h(i)); + ASSERT_EQ(view_test_h(count), view_from_h(i)); count++; } } // all other entries of test view should be zero for (; count < view_test_h.extent(0); ++count) { // std::cout << count << '\n'; - EXPECT_EQ(view_test_h(count), value_type(0)); + ASSERT_EQ(view_test_h(count), value_type(0)); } } @@ -226,7 +226,7 @@ void run_single_scenario(const InfoType& scenario_info) { auto rit = KE::copy_if(exespace(), KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), pred); verify_data(name, view_from, view_dest, pred); - EXPECT_EQ(rit, (KE::begin(view_dest) + n)); + ASSERT_EQ(rit, (KE::begin(view_dest) + n)); } { @@ -235,7 +235,7 @@ void run_single_scenario(const InfoType& scenario_info) { auto rit = KE::copy_if("label", exespace(), KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), pred); verify_data(name, view_from, view_dest, pred); - EXPECT_EQ(rit, (KE::begin(view_dest) + n)); + ASSERT_EQ(rit, (KE::begin(view_dest) + n)); } { @@ -243,7 +243,7 @@ void run_single_scenario(const InfoType& scenario_info) { auto view_dest = create_view(Tag{}, view_ext, "copy_if_dest"); auto rit = KE::copy_if(exespace(), view_from, view_dest, pred); verify_data(name, view_from, view_dest, pred); - EXPECT_EQ(rit, (KE::begin(view_dest) + n)); + ASSERT_EQ(rit, (KE::begin(view_dest) + n)); } { @@ -251,7 +251,7 @@ void run_single_scenario(const InfoType& scenario_info) { auto view_dest = create_view(Tag{}, view_ext, "copy_if_dest"); auto rit = KE::copy_if("label", exespace(), view_from, view_dest, pred); verify_data(name, view_from, view_dest, pred); - EXPECT_EQ(rit, (KE::begin(view_dest) + n)); + ASSERT_EQ(rit, (KE::begin(view_dest) + n)); } Kokkos::fence(); diff --git a/algorithms/unit_tests/TestStdAlgorithmsCount.cpp b/algorithms/unit_tests/TestStdAlgorithmsCount.cpp index 9423d2e15a4..32e98837090 100644 --- a/algorithms/unit_tests/TestStdAlgorithmsCount.cpp +++ b/algorithms/unit_tests/TestStdAlgorithmsCount.cpp @@ -35,13 +35,13 @@ void test_count(const ViewType view) { const value_t count_value = 0; const auto std_result = std::count(KE::cbegin(expected), KE::cend(expected), count_value); - EXPECT_EQ(view.extent(0), size_t(std_result)); + ASSERT_EQ(view.extent(0), size_t(std_result)); // pass const iterators - EXPECT_EQ(std_result, KE::count(exespace(), KE::cbegin(view), + ASSERT_EQ(std_result, KE::count(exespace(), KE::cbegin(view), KE::cend(view), count_value)); // pass view - EXPECT_EQ(std_result, KE::count(exespace(), view, count_value)); + ASSERT_EQ(std_result, KE::count(exespace(), view, count_value)); } { @@ -50,10 +50,10 @@ void test_count(const ViewType view) { std::count(KE::cbegin(expected), KE::cend(expected), count_value); // pass iterators - EXPECT_EQ(std_result, KE::count("label", exespace(), KE::begin(view), + ASSERT_EQ(std_result, KE::count("label", exespace(), KE::begin(view), KE::end(view), count_value)); // pass view - EXPECT_EQ(std_result, KE::count("label", exespace(), view, count_value)); + ASSERT_EQ(std_result, KE::count("label", exespace(), view, count_value)); } } @@ -67,24 +67,24 @@ void test_count_if(const ViewType view) { // no positive elements (all zeroes) const auto predicate = IsPositiveFunctor(); - EXPECT_EQ(0, + ASSERT_EQ(0, std::count_if(KE::begin(expected), KE::end(expected), predicate)); // pass iterators - EXPECT_EQ( + ASSERT_EQ( 0, KE::count_if(exespace(), KE::begin(view), KE::end(view), predicate)); // pass view - EXPECT_EQ(0, KE::count_if(exespace(), view, predicate)); + ASSERT_EQ(0, KE::count_if(exespace(), view, predicate)); fill_views_inc(view, expected); const auto std_result = std::count_if(KE::begin(expected), KE::end(expected), predicate); // pass const iterators - EXPECT_EQ(std_result, KE::count_if("label", exespace(), KE::cbegin(view), + ASSERT_EQ(std_result, KE::count_if("label", exespace(), KE::cbegin(view), KE::cend(view), predicate)); // pass view - EXPECT_EQ(std_result, KE::count_if("label", exespace(), view, predicate)); + ASSERT_EQ(std_result, KE::count_if("label", exespace(), view, predicate)); } template diff --git a/algorithms/unit_tests/TestStdAlgorithmsExclusiveScan.cpp b/algorithms/unit_tests/TestStdAlgorithmsExclusiveScan.cpp index 4969541a023..799de8b0c49 100644 --- a/algorithms/unit_tests/TestStdAlgorithmsExclusiveScan.cpp +++ b/algorithms/unit_tests/TestStdAlgorithmsExclusiveScan.cpp @@ -157,7 +157,7 @@ void verify_data(ViewType1 data_view, // contains data // << gold_h(i) << " " << test_view_h(i) << " " // << std::abs(gold_h(i) - test_view_h(i)) << std::endl; if (std::is_same::value) { - EXPECT_EQ(gold_h(i), test_view_h(i)); + ASSERT_EQ(gold_h(i), test_view_h(i)); } else { const auto error = std::abs(static_cast(gold_h(i) - test_view_h(i))); @@ -213,7 +213,7 @@ void run_single_scenario_default_op(const InfoType& scenario_info, auto r = KE::exclusive_scan(exespace(), KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), init_value); - EXPECT_EQ(r, KE::end(view_dest)); + ASSERT_EQ(r, KE::end(view_dest)); verify_data(view_from, view_dest, init_value, default_op()); } @@ -222,14 +222,14 @@ void run_single_scenario_default_op(const InfoType& scenario_info, auto r = KE::exclusive_scan("label", exespace(), KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), init_value); - EXPECT_EQ(r, KE::end(view_dest)); + ASSERT_EQ(r, KE::end(view_dest)); verify_data(view_from, view_dest, init_value, default_op()); } { fill_zero(view_dest); auto r = KE::exclusive_scan(exespace(), view_from, view_dest, init_value); - EXPECT_EQ(r, KE::end(view_dest)); + ASSERT_EQ(r, KE::end(view_dest)); verify_data(view_from, view_dest, init_value, default_op()); } @@ -237,7 +237,7 @@ void run_single_scenario_default_op(const InfoType& scenario_info, fill_zero(view_dest); auto r = KE::exclusive_scan("label", exespace(), view_from, view_dest, init_value); - EXPECT_EQ(r, KE::end(view_dest)); + ASSERT_EQ(r, KE::end(view_dest)); verify_data(view_from, view_dest, init_value, default_op()); } @@ -263,7 +263,7 @@ void run_single_scenario_custom_op(const InfoType& scenario_info, auto r = KE::exclusive_scan(exespace(), KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), init_value, bop); - EXPECT_EQ(r, KE::end(view_dest)); + ASSERT_EQ(r, KE::end(view_dest)); verify_data(view_from, view_dest, init_value, bop); } @@ -272,7 +272,7 @@ void run_single_scenario_custom_op(const InfoType& scenario_info, auto r = KE::exclusive_scan("label", exespace(), KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), init_value, bop); - EXPECT_EQ(r, KE::end(view_dest)); + ASSERT_EQ(r, KE::end(view_dest)); verify_data(view_from, view_dest, init_value, bop); } @@ -280,7 +280,7 @@ void run_single_scenario_custom_op(const InfoType& scenario_info, fill_zero(view_dest); auto r = KE::exclusive_scan(exespace(), view_from, view_dest, init_value, bop); - EXPECT_EQ(r, KE::end(view_dest)); + ASSERT_EQ(r, KE::end(view_dest)); verify_data(view_from, view_dest, init_value, bop); } @@ -288,7 +288,7 @@ void run_single_scenario_custom_op(const InfoType& scenario_info, fill_zero(view_dest); auto r = KE::exclusive_scan("label", exespace(), view_from, view_dest, init_value, bop); - EXPECT_EQ(r, KE::end(view_dest)); + ASSERT_EQ(r, KE::end(view_dest)); verify_data(view_from, view_dest, init_value, bop); } @@ -344,6 +344,46 @@ TEST(std_algorithms_numeric_ops_test, exclusive_scan) { run_exclusive_scan_all_scenarios(); } +TEST(std_algorithms_numeric_ops_test, exclusive_scan_functor) { + int dummy = 0; + using view_type = Kokkos::View; + view_type dummy_view("dummy_view", 0); + using functor_type = Kokkos::Experimental::Impl::ExclusiveScanDefaultFunctor< + exespace, int, int, view_type, view_type>; + functor_type functor(dummy, dummy_view, dummy_view); + using value_type = functor_type::value_type; + + value_type value1; + functor.init(value1); + ASSERT_EQ(value1.val, 0); + ASSERT_EQ(value1.is_initial, true); + + value_type value2; + value2.val = 1; + value2.is_initial = false; + functor.join(value1, value2); + ASSERT_EQ(value1.val, 1); + ASSERT_EQ(value1.is_initial, false); + + functor.init(value1); + functor.join(value2, value1); + ASSERT_EQ(value2.val, 1); + ASSERT_EQ(value2.is_initial, false); + + functor.init(value2); + functor.join(value2, value1); + ASSERT_EQ(value2.val, 0); + ASSERT_EQ(value2.is_initial, true); + + value1.val = 1; + value1.is_initial = false; + value2.val = 2; + value2.is_initial = false; + functor.join(value2, value1); + ASSERT_EQ(value2.val, 3); + ASSERT_EQ(value2.is_initial, false); +} + } // namespace EScan } // namespace stdalgos } // namespace Test diff --git a/algorithms/unit_tests/TestStdAlgorithmsFind.cpp b/algorithms/unit_tests/TestStdAlgorithmsFind.cpp index 3b8b5e85af4..2692df69821 100644 --- a/algorithms/unit_tests/TestStdAlgorithmsFind.cpp +++ b/algorithms/unit_tests/TestStdAlgorithmsFind.cpp @@ -34,14 +34,14 @@ void test_find(const ViewType view) { constexpr value_t find_value = 13; // value not found, return last - EXPECT_EQ(KE::end(expected), + ASSERT_EQ(KE::end(expected), std::find(KE::begin(expected), KE::end(expected), find_value)); // pass const iterators, returns const iterator - EXPECT_EQ(KE::cend(view), + ASSERT_EQ(KE::cend(view), KE::find(exespace(), KE::cbegin(view), KE::cend(view), find_value)); // pass view, returns iterator - EXPECT_EQ(KE::end(view), KE::find(exespace(), view, find_value)); + ASSERT_EQ(KE::end(view), KE::find(exespace(), view, find_value)); fill_views_inc(view, expected); @@ -50,10 +50,10 @@ void test_find(const ViewType view) { auto distance = std::distance(KE::begin(expected), std_result); // pass iterators, returns iterator - EXPECT_EQ(KE::begin(view) + distance, + ASSERT_EQ(KE::begin(view) + distance, KE::find(exespace(), KE::begin(view), KE::end(view), find_value)); // pass view, returns iterator - EXPECT_EQ(KE::begin(view) + distance, KE::find(exespace(), view, find_value)); + ASSERT_EQ(KE::begin(view) + distance, KE::find(exespace(), view, find_value)); } template @@ -67,15 +67,15 @@ void test_find_if(const ViewType view) { const auto not_equals_zero = NotEqualsZeroFunctor(); // value not found, return last - EXPECT_EQ( + ASSERT_EQ( KE::end(expected), std::find_if(KE::begin(expected), KE::end(expected), not_equals_zero)); // pass iterators, returns iterator - EXPECT_EQ(KE::end(view), KE::find_if(exespace(), KE::begin(view), + ASSERT_EQ(KE::end(view), KE::find_if(exespace(), KE::begin(view), KE::end(view), not_equals_zero)); // pass view, returns iterator - EXPECT_EQ(KE::end(view), KE::find_if(exespace(), view, not_equals_zero)); + ASSERT_EQ(KE::end(view), KE::find_if(exespace(), view, not_equals_zero)); fill_views_inc(view, expected); @@ -86,11 +86,11 @@ void test_find_if(const ViewType view) { auto distance = std::distance(KE::begin(expected), std_result); // pass const iterators, returns const iterator - EXPECT_EQ( + ASSERT_EQ( KE::cbegin(view) + distance, KE::find_if(exespace(), KE::cbegin(view), KE::cend(view), equals_val)); // pass view, returns iterator - EXPECT_EQ(KE::begin(view) + distance, + ASSERT_EQ(KE::begin(view) + distance, KE::find_if(exespace(), view, equals_val)); } @@ -105,15 +105,15 @@ void test_find_if_not(const ViewType view) { const auto not_equals_zero = NotEqualsZeroFunctor(); // first value matches - EXPECT_EQ(KE::begin(expected), + ASSERT_EQ(KE::begin(expected), std::find_if_not(KE::begin(expected), KE::end(expected), not_equals_zero)); // pass iterators, returns iterator - EXPECT_EQ(KE::begin(view), KE::find_if_not(exespace(), KE::begin(view), + ASSERT_EQ(KE::begin(view), KE::find_if_not(exespace(), KE::begin(view), KE::end(view), not_equals_zero)); // pass view, returns iterator - EXPECT_EQ(KE::begin(view), + ASSERT_EQ(KE::begin(view), KE::find_if_not(exespace(), view, not_equals_zero)); fill_views_inc(view, expected); @@ -124,11 +124,11 @@ void test_find_if_not(const ViewType view) { auto distance = std::distance(KE::begin(expected), std_result); // pass const iterators, returns const iterator - EXPECT_EQ(KE::cbegin(view) + distance, + ASSERT_EQ(KE::cbegin(view) + distance, KE::find_if_not(exespace(), KE::cbegin(view), KE::cend(view), equals_zero)); // pass view, returns const iterator - EXPECT_EQ(KE::begin(view) + distance, + ASSERT_EQ(KE::begin(view) + distance, KE::find_if_not(exespace(), view, equals_zero)); } @@ -151,12 +151,6 @@ void run_all_scenarios() { } TEST(std_algorithms_find_test, test) { -#if defined(KOKKOS_ENABLE_CUDA) && \ - defined(KOKKOS_COMPILER_NVHPC) // FIXME_NVHPC - if constexpr (std::is_same_v) { - GTEST_SKIP() << "FIXME wrong result"; - } -#endif run_all_scenarios(); run_all_scenarios(); run_all_scenarios(); diff --git a/algorithms/unit_tests/TestStdAlgorithmsFindEnd.cpp b/algorithms/unit_tests/TestStdAlgorithmsFindEnd.cpp index ddc4bc1ba67..5a5359b0b23 100644 --- a/algorithms/unit_tests/TestStdAlgorithmsFindEnd.cpp +++ b/algorithms/unit_tests/TestStdAlgorithmsFindEnd.cpp @@ -282,7 +282,7 @@ void run_single_scenario(const InfoType& scenario_info, std::size_t seq_ext, const auto mydiff = myrit - KE::cbegin(view); const auto stddiff = stdrit - KE::cbegin(view_h); // std::cout << "result : " << mydiff << " " << stddiff << std::endl; - EXPECT_EQ(mydiff, stddiff); + ASSERT_EQ(mydiff, stddiff); } { @@ -291,21 +291,21 @@ void run_single_scenario(const InfoType& scenario_info, std::size_t seq_ext, KE::cbegin(s_view), KE::cend(s_view), args...); const auto mydiff = myrit - KE::cbegin(view); const auto stddiff = stdrit - KE::cbegin(view_h); - EXPECT_EQ(mydiff, stddiff); + ASSERT_EQ(mydiff, stddiff); } { auto myrit = KE::find_end(exespace(), view, s_view, args...); const auto mydiff = myrit - KE::begin(view); const auto stddiff = stdrit - KE::cbegin(view_h); - EXPECT_EQ(mydiff, stddiff); + ASSERT_EQ(mydiff, stddiff); } { auto myrit = KE::find_end("label", exespace(), view, s_view, args...); const auto mydiff = myrit - KE::begin(view); const auto stddiff = stdrit - KE::cbegin(view_h); - EXPECT_EQ(mydiff, stddiff); + ASSERT_EQ(mydiff, stddiff); } Kokkos::fence(); @@ -348,12 +348,6 @@ void run_all_scenarios() { } TEST(std_algorithms_non_mod_seq_ops, find_end) { -#if defined(KOKKOS_ENABLE_CUDA) && \ - defined(KOKKOS_COMPILER_NVHPC) // FIXME_NVHPC - if constexpr (std::is_same_v) { - GTEST_SKIP() << "FIXME wrong result"; - } -#endif run_all_scenarios(); run_all_scenarios(); } diff --git a/algorithms/unit_tests/TestStdAlgorithmsFindFirstOf.cpp b/algorithms/unit_tests/TestStdAlgorithmsFindFirstOf.cpp index c2f7a2fdb8f..d77edb5fed3 100644 --- a/algorithms/unit_tests/TestStdAlgorithmsFindFirstOf.cpp +++ b/algorithms/unit_tests/TestStdAlgorithmsFindFirstOf.cpp @@ -201,7 +201,7 @@ void run_single_scenario(const InfoType& scenario_info, std::size_t seq_ext, KE::cbegin(s_view), KE::cend(s_view), args...); const auto mydiff = myrit - KE::cbegin(view); const auto stddiff = stdrit - KE::cbegin(view_h); - EXPECT_EQ(mydiff, stddiff); + ASSERT_EQ(mydiff, stddiff); } { @@ -210,21 +210,21 @@ void run_single_scenario(const InfoType& scenario_info, std::size_t seq_ext, KE::cbegin(s_view), KE::cend(s_view), args...); const auto mydiff = myrit - KE::cbegin(view); const auto stddiff = stdrit - KE::cbegin(view_h); - EXPECT_EQ(mydiff, stddiff); + ASSERT_EQ(mydiff, stddiff); } { auto myrit = KE::find_first_of(exespace(), view, s_view, args...); const auto mydiff = myrit - KE::begin(view); const auto stddiff = stdrit - KE::cbegin(view_h); - EXPECT_EQ(mydiff, stddiff); + ASSERT_EQ(mydiff, stddiff); } { auto myrit = KE::find_first_of("label", exespace(), view, s_view, args...); const auto mydiff = myrit - KE::begin(view); const auto stddiff = stdrit - KE::cbegin(view_h); - EXPECT_EQ(mydiff, stddiff); + ASSERT_EQ(mydiff, stddiff); } Kokkos::fence(); @@ -264,12 +264,6 @@ void run_all_scenarios() { } TEST(std_algorithms_non_mod_seq_ops, find_first_of) { -#if defined(KOKKOS_ENABLE_CUDA) && \ - defined(KOKKOS_COMPILER_NVHPC) // FIXME_NVHPC - if constexpr (std::is_same_v) { - GTEST_SKIP() << "FIXME wrong result"; - } -#endif run_all_scenarios(); run_all_scenarios(); } diff --git a/algorithms/unit_tests/TestStdAlgorithmsForEach.cpp b/algorithms/unit_tests/TestStdAlgorithmsForEach.cpp index 83b44f01aa7..793b98a67f1 100644 --- a/algorithms/unit_tests/TestStdAlgorithmsForEach.cpp +++ b/algorithms/unit_tests/TestStdAlgorithmsForEach.cpp @@ -91,23 +91,23 @@ void test_for_each_n(const ViewType view) { const auto non_mod_functor = NoOpNonMutableFunctor(); // pass const iterators, functor takes const ref - EXPECT_EQ(KE::cbegin(view) + n, + ASSERT_EQ(KE::cbegin(view) + n, KE::for_each_n(exespace(), KE::cbegin(view), n, non_mod_functor)); verify_values(value_t{0}, view); // pass view, functor takes const ref - EXPECT_EQ(KE::begin(view) + n, + ASSERT_EQ(KE::begin(view) + n, KE::for_each_n(exespace(), view, n, non_mod_functor)); verify_values(value_t{0}, view); // pass iterators, functor takes non-const ref const auto mod_functor = IncrementElementWiseFunctor(); - EXPECT_EQ(KE::begin(view) + n, + ASSERT_EQ(KE::begin(view) + n, KE::for_each_n(exespace(), KE::begin(view), n, mod_functor)); verify_values(value_t{1}, view); // pass view, functor takes non-const ref - EXPECT_EQ(KE::begin(view) + n, + ASSERT_EQ(KE::begin(view) + n, KE::for_each_n("label", exespace(), view, n, mod_functor)); verify_values(value_t{2}, view); } diff --git a/algorithms/unit_tests/TestStdAlgorithmsInclusiveScan.cpp b/algorithms/unit_tests/TestStdAlgorithmsInclusiveScan.cpp index 510f1d195a1..8e60a43e5ff 100644 --- a/algorithms/unit_tests/TestStdAlgorithmsInclusiveScan.cpp +++ b/algorithms/unit_tests/TestStdAlgorithmsInclusiveScan.cpp @@ -171,7 +171,7 @@ void verify_data(ViewType1 data_view, // contains data // << std::abs(gold_h(i) - test_view_h(i)) << std::endl; if (std::is_same::value) { - EXPECT_EQ(gold_h(i), test_view_h(i)); + ASSERT_EQ(gold_h(i), test_view_h(i)); } else { const auto error = std::abs(static_cast(gold_h(i) - test_view_h(i))); @@ -224,7 +224,7 @@ void run_single_scenario_default_op(const InfoType& scenario_info) { fill_zero(view_dest); auto r = KE::inclusive_scan(exespace(), KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest)); - EXPECT_EQ(r, KE::end(view_dest)); + ASSERT_EQ(r, KE::end(view_dest)); verify_data(view_from, view_dest, default_op()); } @@ -232,21 +232,21 @@ void run_single_scenario_default_op(const InfoType& scenario_info) { fill_zero(view_dest); auto r = KE::inclusive_scan("label", exespace(), KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest)); - EXPECT_EQ(r, KE::end(view_dest)); + ASSERT_EQ(r, KE::end(view_dest)); verify_data(view_from, view_dest, default_op()); } { fill_zero(view_dest); auto r = KE::inclusive_scan(exespace(), view_from, view_dest); - EXPECT_EQ(r, KE::end(view_dest)); + ASSERT_EQ(r, KE::end(view_dest)); verify_data(view_from, view_dest, default_op()); } { fill_zero(view_dest); auto r = KE::inclusive_scan("label", exespace(), view_from, view_dest); - EXPECT_EQ(r, KE::end(view_dest)); + ASSERT_EQ(r, KE::end(view_dest)); verify_data(view_from, view_dest, default_op()); } @@ -279,7 +279,7 @@ void run_single_scenario_custom_op(const InfoType& scenario_info, BinaryOp bop, auto r = KE::inclusive_scan(exespace(), KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), bop, args...); - EXPECT_EQ(r, KE::end(view_dest)); + ASSERT_EQ(r, KE::end(view_dest)); verify_data(view_from, view_dest, bop, args...); } @@ -288,14 +288,14 @@ void run_single_scenario_custom_op(const InfoType& scenario_info, BinaryOp bop, auto r = KE::inclusive_scan("label", exespace(), KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), bop, args...); - EXPECT_EQ(r, KE::end(view_dest)); + ASSERT_EQ(r, KE::end(view_dest)); verify_data(view_from, view_dest, bop, args...); } { fill_zero(view_dest); auto r = KE::inclusive_scan(exespace(), view_from, view_dest, bop, args...); - EXPECT_EQ(r, KE::end(view_dest)); + ASSERT_EQ(r, KE::end(view_dest)); verify_data(view_from, view_dest, bop, args...); } @@ -303,7 +303,7 @@ void run_single_scenario_custom_op(const InfoType& scenario_info, BinaryOp bop, fill_zero(view_dest); auto r = KE::inclusive_scan("label", exespace(), view_from, view_dest, bop, args...); - EXPECT_EQ(r, KE::end(view_dest)); + ASSERT_EQ(r, KE::end(view_dest)); verify_data(view_from, view_dest, bop, args...); } @@ -353,6 +353,45 @@ TEST(std_algorithms_numeric_ops_test, inclusive_scan) { run_inclusive_scan_all_scenarios(); } +TEST(std_algorithms_numeric_ops_test, inclusive_scan_functor) { + using view_type = Kokkos::View; + view_type dummy_view("dummy_view", 0); + using functor_type = Kokkos::Experimental::Impl::InclusiveScanDefaultFunctor< + exespace, int, int, view_type, view_type>; + functor_type functor(dummy_view, dummy_view); + using value_type = functor_type::value_type; + + value_type value1; + functor.init(value1); + ASSERT_EQ(value1.val, 0); + ASSERT_EQ(value1.is_initial, true); + + value_type value2; + value2.val = 1; + value2.is_initial = false; + functor.join(value1, value2); + ASSERT_EQ(value1.val, 1); + ASSERT_EQ(value1.is_initial, false); + + functor.init(value1); + functor.join(value2, value1); + ASSERT_EQ(value2.val, 1); + ASSERT_EQ(value2.is_initial, false); + + functor.init(value2); + functor.join(value2, value1); + ASSERT_EQ(value2.val, 0); + ASSERT_EQ(value2.is_initial, true); + + value1.val = 1; + value1.is_initial = false; + value2.val = 2; + value2.is_initial = false; + functor.join(value2, value1); + ASSERT_EQ(value2.val, 3); + ASSERT_EQ(value2.is_initial, false); +} + } // namespace IncScan } // namespace stdalgos } // namespace Test diff --git a/algorithms/unit_tests/TestStdAlgorithmsIsSortedUntil.cpp b/algorithms/unit_tests/TestStdAlgorithmsIsSortedUntil.cpp index ce8669a84f2..dcfe8ad67e1 100644 --- a/algorithms/unit_tests/TestStdAlgorithmsIsSortedUntil.cpp +++ b/algorithms/unit_tests/TestStdAlgorithmsIsSortedUntil.cpp @@ -145,10 +145,10 @@ void run_single_scenario(const InfoType& scenario_info) { KE::is_sorted_until("label", exespace(), KE::begin(view), KE::end(view)); auto r3 = KE::is_sorted_until(exespace(), view); auto r4 = KE::is_sorted_until("label", exespace(), view); - EXPECT_EQ(r1, gold); - EXPECT_EQ(r2, gold); - EXPECT_EQ(r3, gold); - EXPECT_EQ(r4, gold); + ASSERT_EQ(r1, gold); + ASSERT_EQ(r2, gold); + ASSERT_EQ(r3, gold); + ASSERT_EQ(r4, gold); #if !defined KOKKOS_ENABLE_OPENMPTARGET CustomLessThanComparator comp; @@ -160,10 +160,10 @@ void run_single_scenario(const InfoType& scenario_info) { auto r8 = KE::is_sorted_until("label", exespace(), view, comp); #endif - EXPECT_EQ(r1, gold); - EXPECT_EQ(r2, gold); - EXPECT_EQ(r3, gold); - EXPECT_EQ(r4, gold); + ASSERT_EQ(r1, gold); + ASSERT_EQ(r2, gold); + ASSERT_EQ(r3, gold); + ASSERT_EQ(r4, gold); Kokkos::fence(); } @@ -185,12 +185,6 @@ void run_is_sorted_until_all_scenarios() { } TEST(std_algorithms_sorting_ops_test, is_sorted_until) { -#if defined(KOKKOS_ENABLE_CUDA) && \ - defined(KOKKOS_COMPILER_NVHPC) // FIXME_NVHPC - if constexpr (std::is_same_v) { - GTEST_SKIP() << "FIXME wrong result"; - } -#endif run_is_sorted_until_all_scenarios(); run_is_sorted_until_all_scenarios(); run_is_sorted_until_all_scenarios(); diff --git a/algorithms/unit_tests/TestStdAlgorithmsLexicographicalCompare.cpp b/algorithms/unit_tests/TestStdAlgorithmsLexicographicalCompare.cpp index 2acd4934acc..5d9e7db803c 100644 --- a/algorithms/unit_tests/TestStdAlgorithmsLexicographicalCompare.cpp +++ b/algorithms/unit_tests/TestStdAlgorithmsLexicographicalCompare.cpp @@ -44,16 +44,16 @@ void test_lexicographical_compare(const ViewType1 view_1, ViewType2 view_2) { std::lexicographical_compare(h_first_1, h_last_1, h_first_2, h_last_2); // pass iterators - EXPECT_EQ(std_result, KE::lexicographical_compare(exespace(), first_1, + ASSERT_EQ(std_result, KE::lexicographical_compare(exespace(), first_1, last_1, first_2, last_2)); - EXPECT_EQ(std_result, + ASSERT_EQ(std_result, KE::lexicographical_compare("label", exespace(), first_1, last_1, first_2, last_2)); // pass views - EXPECT_EQ(std_result, + ASSERT_EQ(std_result, KE::lexicographical_compare(exespace(), view_1, view_2)); - EXPECT_EQ(std_result, + ASSERT_EQ(std_result, KE::lexicographical_compare("label", exespace(), view_1, view_2)); } @@ -67,17 +67,17 @@ void test_lexicographical_compare(const ViewType1 view_1, ViewType2 view_2) { h_first_1, h_last_1, h_first_2, h_last_2, custom_comparator); // pass iterators - EXPECT_EQ(std_result, + ASSERT_EQ(std_result, KE::lexicographical_compare(exespace(), first_1, last_1, first_2, last_2, custom_comparator)); - EXPECT_EQ(std_result, + ASSERT_EQ(std_result, KE::lexicographical_compare("label", exespace(), first_1, last_1, first_2, last_2, custom_comparator)); // pass views - EXPECT_EQ(std_result, KE::lexicographical_compare( + ASSERT_EQ(std_result, KE::lexicographical_compare( exespace(), view_1, view_2, custom_comparator)); - EXPECT_EQ(std_result, + ASSERT_EQ(std_result, KE::lexicographical_compare("label", exespace(), view_1, view_2, custom_comparator)); } @@ -86,7 +86,7 @@ void test_lexicographical_compare(const ViewType1 view_1, ViewType2 view_2) { // empty vs non-empty auto std_result = std::lexicographical_compare(h_first_1, h_first_1, h_first_2, h_last_2); - EXPECT_EQ(std_result, KE::lexicographical_compare( + ASSERT_EQ(std_result, KE::lexicographical_compare( exespace(), first_1, first_1, first_2, last_2)); } @@ -95,7 +95,7 @@ void test_lexicographical_compare(const ViewType1 view_1, ViewType2 view_2) { if (view_1.extent(0) > 1) { auto std_result = std::lexicographical_compare(h_first_1, h_last_1 - 1, h_first_2, h_last_2); - EXPECT_EQ(std_result, + ASSERT_EQ(std_result, KE::lexicographical_compare(exespace(), first_1, last_1 - 1, first_2, last_2)); } @@ -140,12 +140,6 @@ void run_all_scenarios() { } TEST(std_algorithms_lexicographical_compare_test, test) { -#if defined(KOKKOS_ENABLE_CUDA) && \ - defined(KOKKOS_COMPILER_NVHPC) // FIXME_NVHPC - if constexpr (std::is_same_v) { - GTEST_SKIP() << "FIXME wrong result"; - } -#endif // FIXME: should this disable only custom comparator tests? #if !defined KOKKOS_ENABLE_OPENMPTARGET run_all_scenarios(); diff --git a/algorithms/unit_tests/TestStdAlgorithmsMinMaxElementOps.cpp b/algorithms/unit_tests/TestStdAlgorithmsMinMaxElementOps.cpp index f8634ffafe2..bc432317842 100644 --- a/algorithms/unit_tests/TestStdAlgorithmsMinMaxElementOps.cpp +++ b/algorithms/unit_tests/TestStdAlgorithmsMinMaxElementOps.cpp @@ -173,7 +173,7 @@ void std_algo_min_max_test_verify(Kokkos::pair goldPair, const ItType result, TestedViewType testedView) { // check that iterator is pointing to right element - EXPECT_EQ(result - KE::begin(testedView), goldPair.first); + ASSERT_EQ(result - KE::begin(testedView), goldPair.first); // create a view for the result to copy into it the iterator's value using result_view_t = Kokkos::View; @@ -184,7 +184,7 @@ void std_algo_min_max_test_verify(Kokkos::pair goldPair, Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), resultView); // use the host mirror of the result view to check that the values match - EXPECT_EQ(result_v_h(), goldPair.second); + ASSERT_EQ(result_v_h(), goldPair.second); } template @@ -199,39 +199,39 @@ template void test_max_element_trivial_data(ViewType view) { /* if we pass empty range, should return last */ auto result = KE::max_element(exespace(), KE::cbegin(view), KE::cbegin(view)); - EXPECT_EQ(result, KE::cbegin(view)); + ASSERT_EQ(result, KE::cbegin(view)); /* if we pass empty range, should return last */ auto it0 = KE::cbegin(view) + 3; auto it1 = it0; auto result2 = KE::max_element(exespace(), it0, it1); - EXPECT_EQ(result2, it1); + ASSERT_EQ(result2, it1); } template void test_min_element_trivial_data(ViewType view) { /* if we pass empty range, should return last */ auto result = KE::min_element(exespace(), KE::cbegin(view), KE::cbegin(view)); - EXPECT_EQ(result, KE::cbegin(view)); + ASSERT_EQ(result, KE::cbegin(view)); /* if we pass empty range, should return last */ auto it0 = KE::cbegin(view) + 3; auto it1 = it0; auto result2 = KE::min_element(exespace(), it0, it1); - EXPECT_EQ(result2, it1); + ASSERT_EQ(result2, it1); } template void test_minmax_element_empty_range(ViewType view) { auto result = KE::minmax_element(exespace(), KE::cbegin(view), KE::cbegin(view)); - EXPECT_EQ(result.first, KE::cbegin(view)); - EXPECT_EQ(result.second, KE::cbegin(view)); + ASSERT_EQ(result.first, KE::cbegin(view)); + ASSERT_EQ(result.second, KE::cbegin(view)); auto it0 = KE::cbegin(view) + 3; auto it1 = it0; auto result2 = KE::minmax_element(exespace(), it0, it1); - EXPECT_EQ(result2.first, it1); - EXPECT_EQ(result2.second, it1); + ASSERT_EQ(result2.first, it1); + ASSERT_EQ(result2.second, it1); } template diff --git a/algorithms/unit_tests/TestStdAlgorithmsMismatch.cpp b/algorithms/unit_tests/TestStdAlgorithmsMismatch.cpp index bb4b6fb2a2a..f3b3e269c44 100644 --- a/algorithms/unit_tests/TestStdAlgorithmsMismatch.cpp +++ b/algorithms/unit_tests/TestStdAlgorithmsMismatch.cpp @@ -120,10 +120,10 @@ void run_single_scenario(ViewType view1, ViewType view2, const auto my_diff12 = my_res1.second - f2; const auto my_diff21 = my_res2.first - f1; const auto my_diff22 = my_res2.second - f2; - EXPECT_EQ(my_diff11, std_diff1); - EXPECT_EQ(my_diff12, std_diff2); - EXPECT_EQ(my_diff21, std_diff1); - EXPECT_EQ(my_diff22, std_diff2); + ASSERT_EQ(my_diff11, std_diff1); + ASSERT_EQ(my_diff12, std_diff2); + ASSERT_EQ(my_diff21, std_diff1); + ASSERT_EQ(my_diff22, std_diff2); } { @@ -134,10 +134,10 @@ void run_single_scenario(ViewType view1, ViewType view2, const auto my_diff12 = my_res1.second - KE::begin(view2); const auto my_diff21 = my_res2.first - KE::begin(view1); const auto my_diff22 = my_res2.second - KE::begin(view2); - EXPECT_EQ(my_diff11, std_diff1); - EXPECT_EQ(my_diff12, std_diff2); - EXPECT_EQ(my_diff21, std_diff1); - EXPECT_EQ(my_diff22, std_diff2); + ASSERT_EQ(my_diff11, std_diff1); + ASSERT_EQ(my_diff12, std_diff2); + ASSERT_EQ(my_diff21, std_diff1); + ASSERT_EQ(my_diff22, std_diff2); } } @@ -189,12 +189,6 @@ void run_all_scenarios() { } TEST(std_algorithms_mismatch_test, test) { -#if defined(KOKKOS_ENABLE_CUDA) && \ - defined(KOKKOS_COMPILER_NVHPC) // FIXME_NVHPC - if constexpr (std::is_same_v) { - GTEST_SKIP() << "FIXME wrong result"; - } -#endif run_all_scenarios(); run_all_scenarios(); } diff --git a/algorithms/unit_tests/TestStdAlgorithmsModOps.cpp b/algorithms/unit_tests/TestStdAlgorithmsModOps.cpp index 4fce044bcf7..4604764097e 100644 --- a/algorithms/unit_tests/TestStdAlgorithmsModOps.cpp +++ b/algorithms/unit_tests/TestStdAlgorithmsModOps.cpp @@ -52,14 +52,14 @@ TEST(std_algorithms_mod_ops_test, move) { // move constr MyMovableType b(std::move(a)); - EXPECT_EQ(b.m_value, 11); - EXPECT_EQ(a.m_value, -2); + ASSERT_EQ(b.m_value, 11); + ASSERT_EQ(a.m_value, -2); // move assign MyMovableType c; c = std::move(b); - EXPECT_EQ(c.m_value, 11); - EXPECT_EQ(b.m_value, -4); + ASSERT_EQ(c.m_value, 11); + ASSERT_EQ(b.m_value, -4); } template @@ -97,8 +97,8 @@ TEST(std_algorithms_mod_ops_test, swap) { int a = 1; int b = 2; KE::swap(a, b); - EXPECT_EQ(a, 2); - EXPECT_EQ(b, 1); + ASSERT_EQ(a, 2); + ASSERT_EQ(b, 1); } { @@ -151,17 +151,17 @@ void test_iter_swap(ViewType view) { using value_type = typename ViewType::value_type; auto a_dc = create_deep_copyable_compatible_clone(view); auto a_h = create_mirror_view_and_copy(Kokkos::HostSpace(), a_dc); - EXPECT_EQ(view.extent_int(0), 10); - EXPECT_EQ(a_h(0), value_type(3)); - EXPECT_EQ(a_h(1), value_type(1)); - EXPECT_EQ(a_h(2), value_type(2)); - EXPECT_EQ(a_h(3), value_type(0)); - EXPECT_EQ(a_h(4), value_type(6)); - EXPECT_EQ(a_h(5), value_type(5)); - EXPECT_EQ(a_h(6), value_type(4)); - EXPECT_EQ(a_h(7), value_type(7)); - EXPECT_EQ(a_h(8), value_type(8)); - EXPECT_EQ(a_h(9), value_type(9)); + ASSERT_EQ(view.extent_int(0), 10); + ASSERT_EQ(a_h(0), value_type(3)); + ASSERT_EQ(a_h(1), value_type(1)); + ASSERT_EQ(a_h(2), value_type(2)); + ASSERT_EQ(a_h(3), value_type(0)); + ASSERT_EQ(a_h(4), value_type(6)); + ASSERT_EQ(a_h(5), value_type(5)); + ASSERT_EQ(a_h(6), value_type(4)); + ASSERT_EQ(a_h(7), value_type(7)); + ASSERT_EQ(a_h(8), value_type(8)); + ASSERT_EQ(a_h(9), value_type(9)); } TEST(std_algorithms_mod_ops_test, iter_swap_static_view) { diff --git a/algorithms/unit_tests/TestStdAlgorithmsModSeqOps.cpp b/algorithms/unit_tests/TestStdAlgorithmsModSeqOps.cpp index 6b806d7bc5c..f80f30797e4 100644 --- a/algorithms/unit_tests/TestStdAlgorithmsModSeqOps.cpp +++ b/algorithms/unit_tests/TestStdAlgorithmsModSeqOps.cpp @@ -34,21 +34,21 @@ struct std_algorithms_mod_seq_ops_test : std_algorithms_test { TEST_F(std_algorithms_mod_seq_ops_test, copy) { auto result = KE::copy(exespace(), KE::begin(m_static_view), KE::end(m_static_view), KE::begin(m_strided_view)); - EXPECT_EQ(KE::end(m_strided_view), result); + ASSERT_EQ(KE::end(m_strided_view), result); compare_views(m_static_view, m_strided_view); auto result2 = KE::copy(exespace(), KE::begin(m_strided_view), KE::end(m_strided_view), KE::begin(m_dynamic_view)); - EXPECT_EQ(KE::end(m_dynamic_view), result2); + ASSERT_EQ(KE::end(m_dynamic_view), result2); compare_views(m_dynamic_view, m_strided_view); } TEST_F(std_algorithms_mod_seq_ops_test, copy_view) { - EXPECT_EQ(KE::end(m_dynamic_view), + ASSERT_EQ(KE::end(m_dynamic_view), KE::copy(exespace(), m_static_view, m_dynamic_view)); compare_views(m_static_view, m_dynamic_view); - EXPECT_EQ(KE::end(m_strided_view), + ASSERT_EQ(KE::end(m_strided_view), KE::copy(exespace(), m_dynamic_view, m_strided_view)); compare_views(m_dynamic_view, m_strided_view); } @@ -70,11 +70,11 @@ TEST_F(std_algorithms_mod_seq_ops_test, copy_n) { // pass iterators auto first = KE::begin(m_static_view); auto dest = KE::begin(m_dynamic_view); - EXPECT_EQ(dest + n, KE::copy_n(exespace(), first, n, dest)); + ASSERT_EQ(dest + n, KE::copy_n(exespace(), first, n, dest)); compare_views(expected, m_dynamic_view); // pass views - EXPECT_EQ(KE::begin(m_strided_view) + n, + ASSERT_EQ(KE::begin(m_strided_view) + n, KE::copy_n(exespace(), m_static_view, n, m_strided_view)); compare_views(expected, m_strided_view); } @@ -85,12 +85,12 @@ TEST_F(std_algorithms_mod_seq_ops_test, copy_backward) { auto dest = KE::end(m_dynamic_view); // pass iterators - EXPECT_EQ(KE::begin(m_dynamic_view), + ASSERT_EQ(KE::begin(m_dynamic_view), KE::copy_backward(exespace(), first, last, dest)); compare_views(m_static_view, m_dynamic_view); // pass views - EXPECT_EQ(KE::begin(m_strided_view), + ASSERT_EQ(KE::begin(m_strided_view), KE::copy_backward(exespace(), m_static_view, m_strided_view)); compare_views(m_static_view, m_strided_view); } @@ -112,11 +112,11 @@ TEST_F(std_algorithms_mod_seq_ops_test, reverse_copy) { auto last = KE::end(m_static_view); auto dest = KE::begin(m_dynamic_view); - EXPECT_EQ(KE::end(m_dynamic_view), + ASSERT_EQ(KE::end(m_dynamic_view), KE::reverse_copy(exespace(), first, last, dest)); compare_views(expected, m_dynamic_view); - EXPECT_EQ(KE::end(m_strided_view), + ASSERT_EQ(KE::end(m_strided_view), KE::reverse_copy(exespace(), m_static_view, m_strided_view)); compare_views(expected, m_strided_view); } @@ -151,25 +151,25 @@ TEST_F(std_algorithms_mod_seq_ops_test, fill_n) { // fill all elements // pass iterator - EXPECT_EQ(KE::end(m_static_view), + ASSERT_EQ(KE::end(m_static_view), KE::fill_n(exespace(), KE::begin(m_static_view), m_static_view.extent(0), fill_n_value)); verify_values(fill_n_value, m_static_view); // pass view - EXPECT_EQ(KE::end(m_strided_view), + ASSERT_EQ(KE::end(m_strided_view), KE::fill_n(exespace(), m_strided_view, m_strided_view.extent(0), fill_n_value)); verify_values(fill_n_value, m_strided_view); // fill zero elements // pass view - EXPECT_EQ(KE::begin(m_dynamic_view), + ASSERT_EQ(KE::begin(m_dynamic_view), KE::fill_n(exespace(), m_dynamic_view, 0, fill_n_new_value)); // fill single element // pass iterator - EXPECT_EQ( + ASSERT_EQ( KE::begin(m_static_view) + 1, KE::fill_n(exespace(), KE::begin(m_static_view), 1, fill_n_new_value)); @@ -212,21 +212,21 @@ TEST_F(std_algorithms_mod_seq_ops_test, transform_from_fixture_unary_op) { auto r1 = KE::transform(exespace(), KE::begin(m_static_view), KE::end(m_static_view), KE::begin(m_dynamic_view), TransformFunctor()); - EXPECT_EQ(r1, KE::end(m_dynamic_view)); + ASSERT_EQ(r1, KE::end(m_dynamic_view)); compare_views(gold_source, m_static_view); verify_values(-1., m_dynamic_view); // transform dynamic view, store results in strided view auto r2 = KE::transform(exespace(), m_dynamic_view, m_strided_view, TransformFunctor()); - EXPECT_EQ(r2, KE::end(m_strided_view)); + ASSERT_EQ(r2, KE::end(m_strided_view)); verify_values(-1., m_dynamic_view); verify_values(-1., m_strided_view); // transform strided view, store results in static view auto r3 = KE::transform(exespace(), m_strided_view, m_static_view, TransformFunctor()); - EXPECT_EQ(r3, KE::end(m_static_view)); + ASSERT_EQ(r3, KE::end(m_static_view)); verify_values(-1., m_static_view); verify_values(-1., m_strided_view); } @@ -254,7 +254,7 @@ TEST_F(std_algorithms_mod_seq_ops_test, transform_from_fixture_binary_op) { auto r1 = KE::transform(exespace(), KE::begin(m_static_view), KE::end(m_static_view), KE::begin(m_dynamic_view), KE::begin(m_strided_view), TransformBinaryFunctor()); - EXPECT_EQ(r1, KE::end(m_strided_view)); + ASSERT_EQ(r1, KE::end(m_strided_view)); compare_views(expected, m_strided_view); expected(0) = 0; @@ -269,7 +269,7 @@ TEST_F(std_algorithms_mod_seq_ops_test, transform_from_fixture_binary_op) { expected(9) = 18; auto r2 = KE::transform("label", exespace(), m_static_view, m_strided_view, m_dynamic_view, TransformBinaryFunctor()); - EXPECT_EQ(r2, KE::end(m_dynamic_view)); + ASSERT_EQ(r2, KE::end(m_dynamic_view)); compare_views(expected, m_dynamic_view); } @@ -296,19 +296,19 @@ TEST_F(std_algorithms_mod_seq_ops_test, generate) { TEST_F(std_algorithms_mod_seq_ops_test, generate_n) { // iterator + functor - EXPECT_EQ(KE::end(m_static_view), + ASSERT_EQ(KE::end(m_static_view), KE::generate_n(exespace(), KE::begin(m_static_view), m_static_view.extent(0), GenerateFunctor())); verify_values(generated_value, m_static_view); // view + functor - EXPECT_EQ(KE::end(m_dynamic_view), + ASSERT_EQ(KE::end(m_dynamic_view), KE::generate_n(exespace(), m_dynamic_view, m_dynamic_view.extent(0), GenerateFunctor())); verify_values(generated_value, m_dynamic_view); // view + functor, negative n - EXPECT_EQ(KE::begin(m_strided_view), + ASSERT_EQ(KE::begin(m_strided_view), KE::generate_n(exespace(), m_strided_view, -1, GenerateFunctor())); } @@ -352,7 +352,7 @@ void test_swap_ranges(ViewType view) { auto last1 = first1 + 4; auto first2 = KE::begin(viewB) + 1; auto r = KE::swap_ranges(exespace(), first1, last1, first2); - EXPECT_EQ(r, first2 + 4); + ASSERT_EQ(r, first2 + 4); /* check VIEW_A */ static_view_type checkViewA("tmp"); @@ -360,16 +360,16 @@ void test_swap_ranges(ViewType view) { parallel_for(ext, cp_func_a_t(view, checkViewA)); auto cvA_h = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), checkViewA); - EXPECT_EQ(cvA_h(0), 0); - EXPECT_EQ(cvA_h(1), 1); - EXPECT_EQ(cvA_h(2), 99); - EXPECT_EQ(cvA_h(3), 98); - EXPECT_EQ(cvA_h(4), 97); - EXPECT_EQ(cvA_h(5), 96); - EXPECT_EQ(cvA_h(6), 6); - EXPECT_EQ(cvA_h(7), 7); - EXPECT_EQ(cvA_h(8), 8); - EXPECT_EQ(cvA_h(9), 9); + ASSERT_EQ(cvA_h(0), 0); + ASSERT_EQ(cvA_h(1), 1); + ASSERT_EQ(cvA_h(2), 99); + ASSERT_EQ(cvA_h(3), 98); + ASSERT_EQ(cvA_h(4), 97); + ASSERT_EQ(cvA_h(5), 96); + ASSERT_EQ(cvA_h(6), 6); + ASSERT_EQ(cvA_h(7), 7); + ASSERT_EQ(cvA_h(8), 8); + ASSERT_EQ(cvA_h(9), 9); /* check viewB */ static_view_type checkViewB("tmpB"); @@ -377,16 +377,16 @@ void test_swap_ranges(ViewType view) { Kokkos::parallel_for(ext, cp_func_b_t(viewB, checkViewB)); auto cvB_h = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), checkViewB); - EXPECT_EQ(cvB_h(0), 100); - EXPECT_EQ(cvB_h(1), 2); - EXPECT_EQ(cvB_h(2), 3); - EXPECT_EQ(cvB_h(3), 4); - EXPECT_EQ(cvB_h(4), 5); - EXPECT_EQ(cvB_h(5), 95); - EXPECT_EQ(cvB_h(6), 94); - EXPECT_EQ(cvB_h(7), 93); - EXPECT_EQ(cvB_h(8), 92); - EXPECT_EQ(cvB_h(9), 91); + ASSERT_EQ(cvB_h(0), 100); + ASSERT_EQ(cvB_h(1), 2); + ASSERT_EQ(cvB_h(2), 3); + ASSERT_EQ(cvB_h(3), 4); + ASSERT_EQ(cvB_h(4), 5); + ASSERT_EQ(cvB_h(5), 95); + ASSERT_EQ(cvB_h(6), 94); + ASSERT_EQ(cvB_h(7), 93); + ASSERT_EQ(cvB_h(8), 92); + ASSERT_EQ(cvB_h(9), 91); } TEST_F(std_algorithms_mod_seq_ops_test, swap_ranges) { diff --git a/algorithms/unit_tests/TestStdAlgorithmsMoveBackward.cpp b/algorithms/unit_tests/TestStdAlgorithmsMoveBackward.cpp index 635714eb545..b201ab95c1a 100644 --- a/algorithms/unit_tests/TestStdAlgorithmsMoveBackward.cpp +++ b/algorithms/unit_tests/TestStdAlgorithmsMoveBackward.cpp @@ -53,20 +53,20 @@ void run_single_scenario(const InfoType& scenario_info, int apiId) { auto rit = KE::move_backward(exespace(), KE::begin(v), KE::end(v), KE::end(v2)); const int dist = KE::distance(KE::begin(v2), rit); - EXPECT_EQ(dist, 5); + ASSERT_EQ(dist, 5); } else if (apiId == 1) { auto rit = KE::move_backward("mylabel", exespace(), KE::begin(v), KE::end(v), KE::end(v2)); const int dist = KE::distance(KE::begin(v2), rit); - EXPECT_EQ(dist, 5); + ASSERT_EQ(dist, 5); } else if (apiId == 2) { auto rit = KE::move_backward(exespace(), v, v2); const int dist = KE::distance(KE::begin(v2), rit); - EXPECT_EQ(dist, 5); + ASSERT_EQ(dist, 5); } else if (apiId == 3) { auto rit = KE::move_backward("mylabel", exespace(), v, v2); const int dist = KE::distance(KE::begin(v2), rit); - EXPECT_EQ(dist, 5); + ASSERT_EQ(dist, 5); } // check diff --git a/algorithms/unit_tests/TestStdAlgorithmsNumerics.cpp b/algorithms/unit_tests/TestStdAlgorithmsNumerics.cpp index 288a67c3695..0933c4e135f 100644 --- a/algorithms/unit_tests/TestStdAlgorithmsNumerics.cpp +++ b/algorithms/unit_tests/TestStdAlgorithmsNumerics.cpp @@ -151,8 +151,8 @@ void run_and_check_transform_reduce_default(ViewType1 first_view, const auto r2 = KE::transform_reduce( "MYLABEL", ExecutionSpace(), KE::cbegin(first_view), KE::cbegin(first_view), KE::cbegin(second_view), init_value); - EXPECT_EQ(r1, init_value); - EXPECT_EQ(r2, init_value); + ASSERT_EQ(r1, init_value); + ASSERT_EQ(r2, init_value); // non-trivial cases const auto r3 = KE::transform_reduce(ExecutionSpace(), KE::cbegin(first_view), @@ -168,10 +168,10 @@ void run_and_check_transform_reduce_default(ViewType1 first_view, const auto r6 = KE::transform_reduce("MYLABEL", ExecutionSpace(), first_view, second_view, init_value); - EXPECT_EQ(r3, result_value); - EXPECT_EQ(r4, result_value); - EXPECT_EQ(r5, result_value); - EXPECT_EQ(r6, result_value); + ASSERT_EQ(r3, result_value); + ASSERT_EQ(r4, result_value); + ASSERT_EQ(r5, result_value); + ASSERT_EQ(r6, result_value); } TEST_F(std_algorithms_numerics_test, @@ -254,8 +254,8 @@ void run_and_check_transform_reduce_overloadA(ViewType1 first_view, KE::cbegin(first_view), KE::cbegin(second_view), init_value, std::forward(args)...); - EXPECT_EQ(r1, init_value); - EXPECT_EQ(r2, init_value); + ASSERT_EQ(r1, init_value); + ASSERT_EQ(r2, init_value); // non trivial cases const auto r3 = KE::transform_reduce( @@ -273,10 +273,10 @@ void run_and_check_transform_reduce_overloadA(ViewType1 first_view, KE::transform_reduce("MYLABEL", ExecutionSpace(), first_view, second_view, init_value, std::forward(args)...); - EXPECT_EQ(r3, result_value); - EXPECT_EQ(r4, result_value); - EXPECT_EQ(r5, result_value); - EXPECT_EQ(r6, result_value); + ASSERT_EQ(r3, result_value); + ASSERT_EQ(r4, result_value); + ASSERT_EQ(r5, result_value); + ASSERT_EQ(r6, result_value); } TEST_F(std_algorithms_numerics_test, @@ -373,8 +373,8 @@ void run_and_check_transform_reduce_overloadB(ViewType view, KE::cbegin(view), KE::cbegin(view), init_value, std::forward(args)...); - EXPECT_EQ(r1, init_value); - EXPECT_EQ(r2, init_value); + ASSERT_EQ(r1, init_value); + ASSERT_EQ(r2, init_value); // non trivial const auto r3 = @@ -390,10 +390,10 @@ void run_and_check_transform_reduce_overloadB(ViewType view, const auto r6 = KE::transform_reduce("MYLABEL", ExecutionSpace(), view, init_value, std::forward(args)...); - EXPECT_EQ(r3, result_value); - EXPECT_EQ(r4, result_value); - EXPECT_EQ(r5, result_value); - EXPECT_EQ(r6, result_value); + ASSERT_EQ(r3, result_value); + ASSERT_EQ(r4, result_value); + ASSERT_EQ(r5, result_value); + ASSERT_EQ(r6, result_value); } TEST_F(std_algorithms_numerics_test, @@ -447,8 +447,8 @@ void run_and_check_reduce_overloadA(ViewType view, ValueType non_trivial_result, KE::reduce(ExecutionSpace(), KE::cbegin(view), KE::cbegin(view)); const auto r2 = KE::reduce("MYLABEL", ExecutionSpace(), KE::cbegin(view), KE::cbegin(view)); - EXPECT_EQ(r1, trivial_result); - EXPECT_EQ(r2, trivial_result); + ASSERT_EQ(r1, trivial_result); + ASSERT_EQ(r2, trivial_result); // non trivial cases const auto r3 = @@ -458,10 +458,10 @@ void run_and_check_reduce_overloadA(ViewType view, ValueType non_trivial_result, const auto r5 = KE::reduce(ExecutionSpace(), view); const auto r6 = KE::reduce("MYLABEL", ExecutionSpace(), view); - EXPECT_EQ(r3, non_trivial_result); - EXPECT_EQ(r4, non_trivial_result); - EXPECT_EQ(r5, non_trivial_result); - EXPECT_EQ(r6, non_trivial_result); + ASSERT_EQ(r3, non_trivial_result); + ASSERT_EQ(r4, non_trivial_result); + ASSERT_EQ(r5, non_trivial_result); + ASSERT_EQ(r6, non_trivial_result); } TEST_F(std_algorithms_numerics_test, @@ -503,8 +503,8 @@ void run_and_check_reduce_overloadB(ViewType view, ValueType result_value, KE::cbegin(view), init_value); const auto r2 = KE::reduce("MYLABEL", ExecutionSpace(), KE::cbegin(view), KE::cbegin(view), init_value); - EXPECT_EQ(r1, init_value); - EXPECT_EQ(r2, init_value); + ASSERT_EQ(r1, init_value); + ASSERT_EQ(r2, init_value); // non trivial cases const auto r3 = KE::reduce(ExecutionSpace(), KE::cbegin(view), KE::cend(view), @@ -514,10 +514,10 @@ void run_and_check_reduce_overloadB(ViewType view, ValueType result_value, const auto r5 = KE::reduce(ExecutionSpace(), view, init_value); const auto r6 = KE::reduce("MYLABEL", ExecutionSpace(), view, init_value); - EXPECT_EQ(r3, result_value); - EXPECT_EQ(r4, result_value); - EXPECT_EQ(r5, result_value); - EXPECT_EQ(r6, result_value); + ASSERT_EQ(r3, result_value); + ASSERT_EQ(r4, result_value); + ASSERT_EQ(r5, result_value); + ASSERT_EQ(r6, result_value); } TEST_F(std_algorithms_numerics_test, @@ -553,8 +553,8 @@ void run_and_check_reduce_overloadC(ViewType view, ValueType result_value, KE::cbegin(view), init_value, joiner); const auto r2 = KE::reduce("MYLABEL", ExecutionSpace(), KE::cbegin(view), KE::cbegin(view), init_value, joiner); - EXPECT_EQ(r1, init_value); - EXPECT_EQ(r2, init_value); + ASSERT_EQ(r1, init_value); + ASSERT_EQ(r2, init_value); // non trivial cases const auto r3 = KE::reduce(ExecutionSpace(), KE::cbegin(view), KE::cend(view), @@ -565,10 +565,10 @@ void run_and_check_reduce_overloadC(ViewType view, ValueType result_value, const auto r6 = KE::reduce("MYLABEL", ExecutionSpace(), view, init_value, joiner); - EXPECT_EQ(r3, result_value); - EXPECT_EQ(r4, result_value); - EXPECT_EQ(r5, result_value); - EXPECT_EQ(r6, result_value); + ASSERT_EQ(r3, result_value); + ASSERT_EQ(r4, result_value); + ASSERT_EQ(r5, result_value); + ASSERT_EQ(r6, result_value); } TEST_F(std_algorithms_numerics_test, diff --git a/algorithms/unit_tests/TestStdAlgorithmsPartitionCopy.cpp b/algorithms/unit_tests/TestStdAlgorithmsPartitionCopy.cpp index 0399e9eee4d..f169fd9ce88 100644 --- a/algorithms/unit_tests/TestStdAlgorithmsPartitionCopy.cpp +++ b/algorithms/unit_tests/TestStdAlgorithmsPartitionCopy.cpp @@ -130,12 +130,12 @@ void verify_data(const std::string& name, ResultType my_result, const std::size_t my_diff_true = my_result.first - KE::begin(view_dest_true); const std::size_t my_diff_false = my_result.second - KE::begin(view_dest_false); - EXPECT_EQ(std_diff_true, my_diff_true); - EXPECT_EQ(std_diff_false, my_diff_false); + ASSERT_EQ(std_diff_true, my_diff_true); + ASSERT_EQ(std_diff_false, my_diff_false); auto view_dest_true_h = create_host_space_copy(view_dest_true); for (std::size_t i = 0; i < std_diff_true; ++i) { - EXPECT_EQ(std_vec_true[i], view_dest_true_h(i)); + ASSERT_EQ(std_vec_true[i], view_dest_true_h(i)); // std::cout << "i= " << i << " " // << " std_true = " << std_vec_true[i] << " " // << " mine = " << view_dest_true_h(i) << '\n'; @@ -143,45 +143,45 @@ void verify_data(const std::string& name, ResultType my_result, auto view_dest_false_h = create_host_space_copy(view_dest_false); for (std::size_t i = 0; i < std_diff_false; ++i) { - EXPECT_EQ(std_vec_false[i], view_dest_false_h(i)); + ASSERT_EQ(std_vec_false[i], view_dest_false_h(i)); // std::cout << "i= " << i << " " // << " std_false = " << std_vec_false[i] << " " // << " mine = " << view_dest_false_h(i) << '\n'; } if (name == "empty") { - EXPECT_EQ(my_diff_true, 0u); - EXPECT_EQ(my_diff_false, 0u); + ASSERT_EQ(my_diff_true, 0u); + ASSERT_EQ(my_diff_false, 0u); } else if (name == "one-element-a") { - EXPECT_EQ(my_diff_true, 0u); - EXPECT_EQ(my_diff_false, 1u); + ASSERT_EQ(my_diff_true, 0u); + ASSERT_EQ(my_diff_false, 1u); } else if (name == "one-element-b") { - EXPECT_EQ(my_diff_true, 1u); - EXPECT_EQ(my_diff_false, 0u); + ASSERT_EQ(my_diff_true, 1u); + ASSERT_EQ(my_diff_false, 0u); } else if (name == "two-elements-a") { - EXPECT_EQ(my_diff_true, 1u); - EXPECT_EQ(my_diff_false, 1u); + ASSERT_EQ(my_diff_true, 1u); + ASSERT_EQ(my_diff_false, 1u); } else if (name == "two-elements-b") { - EXPECT_EQ(my_diff_true, 1u); - EXPECT_EQ(my_diff_false, 1u); + ASSERT_EQ(my_diff_true, 1u); + ASSERT_EQ(my_diff_false, 1u); } else if (name == "small-b") { - EXPECT_EQ(my_diff_true, 13u); - EXPECT_EQ(my_diff_false, 0u); + ASSERT_EQ(my_diff_true, 13u); + ASSERT_EQ(my_diff_false, 0u); } else if (name == "small-c") { - EXPECT_EQ(my_diff_true, 0u); - EXPECT_EQ(my_diff_false, 15u); + ASSERT_EQ(my_diff_true, 0u); + ASSERT_EQ(my_diff_false, 15u); } } diff --git a/algorithms/unit_tests/TestStdAlgorithmsPartitioningOps.cpp b/algorithms/unit_tests/TestStdAlgorithmsPartitioningOps.cpp index 1bfb536c2c7..33a1326c474 100644 --- a/algorithms/unit_tests/TestStdAlgorithmsPartitioningOps.cpp +++ b/algorithms/unit_tests/TestStdAlgorithmsPartitioningOps.cpp @@ -148,12 +148,6 @@ struct std_algorithms_partitioning_test : public std_algorithms_test { }; TEST_F(std_algorithms_partitioning_test, is_partitioned_trivial) { -#if defined(KOKKOS_ENABLE_CUDA) && \ - defined(KOKKOS_COMPILER_NVHPC) // FIXME_NVHPC - if constexpr (std::is_same_v) { - GTEST_SKIP() << "FIXME wrong result"; - } -#endif IsNegativeFunctor p; const auto result1 = KE::is_partitioned(exespace(), KE::cbegin(m_static_view), KE::cbegin(m_static_view), p); @@ -169,12 +163,6 @@ TEST_F(std_algorithms_partitioning_test, is_partitioned_trivial) { } TEST_F(std_algorithms_partitioning_test, is_partitioned_accepting_iterators) { -#if defined(KOKKOS_ENABLE_CUDA) && \ - defined(KOKKOS_COMPILER_NVHPC) // FIXME_NVHPC - if constexpr (std::is_same_v) { - GTEST_SKIP() << "FIXME wrong result"; - } -#endif const IsNegativeFunctor p; for (int id = 0; id < FixtureViews::Count; ++id) { @@ -183,25 +171,19 @@ TEST_F(std_algorithms_partitioning_test, is_partitioned_accepting_iterators) { goldSolutionIsPartitioned(static_cast(id)); const auto result1 = KE::is_partitioned( exespace(), KE::cbegin(m_static_view), KE::cend(m_static_view), p); - EXPECT_EQ(goldBool, result1); + ASSERT_EQ(goldBool, result1); const auto result2 = KE::is_partitioned( exespace(), KE::cbegin(m_dynamic_view), KE::cend(m_dynamic_view), p); - EXPECT_EQ(goldBool, result2); + ASSERT_EQ(goldBool, result2); const auto result3 = KE::is_partitioned( exespace(), KE::cbegin(m_strided_view), KE::cend(m_strided_view), p); - EXPECT_EQ(goldBool, result3); + ASSERT_EQ(goldBool, result3); } } TEST_F(std_algorithms_partitioning_test, is_partitioned_accepting_view) { -#if defined(KOKKOS_ENABLE_CUDA) && \ - defined(KOKKOS_COMPILER_NVHPC) // FIXME_NVHPC - if constexpr (std::is_same_v) { - GTEST_SKIP() << "FIXME wrong result"; - } -#endif const IsNegativeFunctor p; for (int id = 0; id < FixtureViews::Count; ++id) { @@ -209,23 +191,17 @@ TEST_F(std_algorithms_partitioning_test, is_partitioned_accepting_view) { const bool goldBool = goldSolutionIsPartitioned(static_cast(id)); const auto result1 = KE::is_partitioned(exespace(), m_static_view, p); - EXPECT_EQ(goldBool, result1); + ASSERT_EQ(goldBool, result1); const auto result2 = KE::is_partitioned(exespace(), m_dynamic_view, p); - EXPECT_EQ(goldBool, result2); + ASSERT_EQ(goldBool, result2); const auto result3 = KE::is_partitioned(exespace(), m_strided_view, p); - EXPECT_EQ(goldBool, result3); + ASSERT_EQ(goldBool, result3); } } TEST_F(std_algorithms_partitioning_test, partition_point) { -#if defined(KOKKOS_ENABLE_CUDA) && \ - defined(KOKKOS_COMPILER_NVHPC) // FIXME_NVHPC - if constexpr (std::is_same_v) { - GTEST_SKIP() << "FIXME wrong result"; - } -#endif const IsNegativeFunctor p; for (int id = 0; id < FixtureViews::Count; ++id) { @@ -235,17 +211,17 @@ TEST_F(std_algorithms_partitioning_test, partition_point) { auto first1 = KE::cbegin(m_static_view); auto last1 = KE::cend(m_static_view); const auto result1 = KE::partition_point(exespace(), first1, last1, p); - EXPECT_EQ(goldIndex, result1 - first1); + ASSERT_EQ(goldIndex, result1 - first1); auto first2 = KE::cbegin(m_dynamic_view); auto last2 = KE::cend(m_dynamic_view); const auto result2 = KE::partition_point(exespace(), first2, last2, p); - EXPECT_EQ(goldIndex, result2 - first2); + ASSERT_EQ(goldIndex, result2 - first2); auto first3 = KE::cbegin(m_strided_view); auto last3 = KE::cend(m_strided_view); const auto result3 = KE::partition_point(exespace(), first3, last3, p); - EXPECT_EQ(goldIndex, result3 - first3); + ASSERT_EQ(goldIndex, result3 - first3); } } diff --git a/algorithms/unit_tests/TestStdAlgorithmsRemove.cpp b/algorithms/unit_tests/TestStdAlgorithmsRemove.cpp index 8832d71f953..c35fc5c24b2 100644 --- a/algorithms/unit_tests/TestStdAlgorithmsRemove.cpp +++ b/algorithms/unit_tests/TestStdAlgorithmsRemove.cpp @@ -117,12 +117,12 @@ void verify_data(ViewTypeData view_data_h, ViewTypeTest view_test, // check that returned iterators are correct const std::size_t std_diff = std_result - KE::begin(view_data_h); const std::size_t my_diff = my_result - KE::begin(view_test); - EXPECT_EQ(std_diff, my_diff); + ASSERT_EQ(std_diff, my_diff); // check the actual data after algo has been applied auto view_test_h = create_host_space_copy(view_test); for (std::size_t i = 0; i < my_diff; ++i) { - EXPECT_EQ(view_test_h(i), view_data_h[i]); + ASSERT_EQ(view_test_h(i), view_data_h[i]); // std::cout << "i= " << i << " " // << "mine: " << view_test_h(i) << " " // << "std: " << view_data_h(i) diff --git a/algorithms/unit_tests/TestStdAlgorithmsRemoveCopy.cpp b/algorithms/unit_tests/TestStdAlgorithmsRemoveCopy.cpp index 949f8f60c93..3d7c52108be 100644 --- a/algorithms/unit_tests/TestStdAlgorithmsRemoveCopy.cpp +++ b/algorithms/unit_tests/TestStdAlgorithmsRemoveCopy.cpp @@ -135,12 +135,12 @@ void verify_data(ViewFromType view_from, ViewDestType view_dest, // check that returned iterators are correct const std::size_t std_diff = std_result - gold_dest_std.begin(); const std::size_t my_diff = my_result - KE::begin(view_dest); - EXPECT_EQ(std_diff, my_diff); + ASSERT_EQ(std_diff, my_diff); // check the actual data after algo has been applied auto view_dest_h = create_host_space_copy(view_dest); for (std::size_t i = 0; i < my_diff; ++i) { - EXPECT_EQ(view_dest_h(i), gold_dest_std[i]); + ASSERT_EQ(view_dest_h(i), gold_dest_std[i]); // std::cout << "i= " << i << " " // << "mine: " << view_dest_h(i) << " " // << "std: " << gold_dest_std[i] diff --git a/algorithms/unit_tests/TestStdAlgorithmsRemoveCopyIf.cpp b/algorithms/unit_tests/TestStdAlgorithmsRemoveCopyIf.cpp index 9dc1e4a7e16..cb699aa9235 100644 --- a/algorithms/unit_tests/TestStdAlgorithmsRemoveCopyIf.cpp +++ b/algorithms/unit_tests/TestStdAlgorithmsRemoveCopyIf.cpp @@ -119,12 +119,12 @@ void verify_data(ViewTypeFrom view_from, ViewTypeDest view_dest, // check that returned iterators are correct const std::size_t std_diff = std_result - gold_dest_std.begin(); const std::size_t my_diff = my_result - KE::begin(view_dest); - EXPECT_EQ(std_diff, my_diff); + ASSERT_EQ(std_diff, my_diff); // check the actual data after algo has been applied auto view_dest_h = create_host_space_copy(view_dest); for (std::size_t i = 0; i < my_diff; ++i) { - EXPECT_EQ(view_dest_h(i), gold_dest_std[i]); + ASSERT_EQ(view_dest_h(i), gold_dest_std[i]); // std::cout << "i= " << i << " " // << "mine: " << view_dest_h(i) << " " // << "std: " << gold_dest_std[i] diff --git a/algorithms/unit_tests/TestStdAlgorithmsRemoveIf.cpp b/algorithms/unit_tests/TestStdAlgorithmsRemoveIf.cpp index e9d15f29d88..f06f2234eed 100644 --- a/algorithms/unit_tests/TestStdAlgorithmsRemoveIf.cpp +++ b/algorithms/unit_tests/TestStdAlgorithmsRemoveIf.cpp @@ -112,12 +112,12 @@ void verify_data(ViewTypeData view_data_h, ViewTypeTest view_test, // check that returned iterators are correct const std::size_t std_diff = std_result - KE::begin(view_data_h); const std::size_t my_diff = my_result - KE::begin(view_test); - EXPECT_EQ(std_diff, my_diff); + ASSERT_EQ(std_diff, my_diff); // check the actual data after algo has been applied auto view_test_h = create_host_space_copy(view_test); for (std::size_t i = 0; i < my_diff; ++i) { - EXPECT_EQ(view_test_h(i), view_data_h[i]); + ASSERT_EQ(view_test_h(i), view_data_h[i]); // std::cout << "i= " << i << " " // << "mine: " << view_test_h(i) << " " // << "std: " << view_data_h(i) diff --git a/algorithms/unit_tests/TestStdAlgorithmsReplace.cpp b/algorithms/unit_tests/TestStdAlgorithmsReplace.cpp index b226de5535f..a22ab32d764 100644 --- a/algorithms/unit_tests/TestStdAlgorithmsReplace.cpp +++ b/algorithms/unit_tests/TestStdAlgorithmsReplace.cpp @@ -104,30 +104,30 @@ void verify_data(const std::string& name, ViewType1 test_view, } else if (name == "one-element-a") { - EXPECT_EQ(view_h(0), ValueType{1}); + ASSERT_EQ(view_h(0), ValueType{1}); } else if (name == "one-element-b") { - EXPECT_EQ(view_h(0), new_value); + ASSERT_EQ(view_h(0), new_value); } else if (name == "two-elements-a") { - EXPECT_EQ(view_h(0), ValueType{1}); - EXPECT_EQ(view_h(1), new_value); + ASSERT_EQ(view_h(0), ValueType{1}); + ASSERT_EQ(view_h(1), new_value); } else if (name == "two-elements-b") { - EXPECT_EQ(view_h(0), new_value); - EXPECT_EQ(view_h(1), ValueType{-1}); + ASSERT_EQ(view_h(0), new_value); + ASSERT_EQ(view_h(1), ValueType{-1}); } else if (name == "small-a") { for (std::size_t i = 0; i < view_h.extent(0); ++i) { if (i == 0 || i == 3 || i == 5 || i == 6) { - EXPECT_EQ(view_h(i), new_value); + ASSERT_EQ(view_h(i), new_value); } else { const auto gold = ValueType{-5} + static_cast(i + 1); - EXPECT_EQ(view_h(i), gold); + ASSERT_EQ(view_h(i), gold); } } } @@ -135,9 +135,9 @@ void verify_data(const std::string& name, ViewType1 test_view, else if (name == "small-b") { for (std::size_t i = 0; i < view_h.extent(0); ++i) { if (i < 4) { - EXPECT_EQ(view_h(i), ValueType{-1}); + ASSERT_EQ(view_h(i), ValueType{-1}); } else { - EXPECT_EQ(view_h(i), new_value); + ASSERT_EQ(view_h(i), new_value); } } } @@ -145,9 +145,9 @@ void verify_data(const std::string& name, ViewType1 test_view, else if (name == "medium" || name == "large") { for (std::size_t i = 0; i < view_h.extent(0); ++i) { if (i % 2 == 0) { - EXPECT_EQ(view_h(i), ValueType{-1}); + ASSERT_EQ(view_h(i), ValueType{-1}); } else { - EXPECT_EQ(view_h(i), new_value); + ASSERT_EQ(view_h(i), new_value); } } } diff --git a/algorithms/unit_tests/TestStdAlgorithmsReplaceCopy.cpp b/algorithms/unit_tests/TestStdAlgorithmsReplaceCopy.cpp index 16b181fdd22..a964ec8e173 100644 --- a/algorithms/unit_tests/TestStdAlgorithmsReplaceCopy.cpp +++ b/algorithms/unit_tests/TestStdAlgorithmsReplaceCopy.cpp @@ -112,40 +112,40 @@ void verify_data(const std::string& name, ViewTypeFrom view_from, } else if (name == "one-element-a") { - EXPECT_EQ(view_from_h(0), ValueType{1}); - EXPECT_EQ(view_test_h(0), view_from_h(0)); + ASSERT_EQ(view_from_h(0), ValueType{1}); + ASSERT_EQ(view_test_h(0), view_from_h(0)); } else if (name == "one-element-b") { - EXPECT_EQ(view_from_h(0), ValueType{2}); - EXPECT_EQ(view_test_h(0), new_value); + ASSERT_EQ(view_from_h(0), ValueType{2}); + ASSERT_EQ(view_test_h(0), new_value); } else if (name == "two-elements-a") { - EXPECT_EQ(view_from_h(0), ValueType{1}); - EXPECT_EQ(view_from_h(1), ValueType{2}); + ASSERT_EQ(view_from_h(0), ValueType{1}); + ASSERT_EQ(view_from_h(1), ValueType{2}); - EXPECT_EQ(view_test_h(0), view_from_h(0)); - EXPECT_EQ(view_test_h(1), new_value); + ASSERT_EQ(view_test_h(0), view_from_h(0)); + ASSERT_EQ(view_test_h(1), new_value); } else if (name == "two-elements-b") { - EXPECT_EQ(view_from_h(0), ValueType{2}); - EXPECT_EQ(view_from_h(1), ValueType{-1}); + ASSERT_EQ(view_from_h(0), ValueType{2}); + ASSERT_EQ(view_from_h(1), ValueType{-1}); - EXPECT_EQ(view_test_h(0), new_value); - EXPECT_EQ(view_test_h(1), view_from_h(1)); + ASSERT_EQ(view_test_h(0), new_value); + ASSERT_EQ(view_test_h(1), view_from_h(1)); } else if (name == "small-a") { for (std::size_t i = 0; i < view_test_h.extent(0); ++i) { if (i == 0 || i == 3 || i == 5 || i == 6) { - EXPECT_EQ(view_from_h(i), ValueType{2}); - EXPECT_EQ(view_test_h(i), new_value); + ASSERT_EQ(view_from_h(i), ValueType{2}); + ASSERT_EQ(view_test_h(i), new_value); } else { const auto gold = ValueType{-5} + static_cast(i + 1); - EXPECT_EQ(view_from_h(i), gold); - EXPECT_EQ(view_test_h(i), gold); + ASSERT_EQ(view_from_h(i), gold); + ASSERT_EQ(view_test_h(i), gold); } } } @@ -153,11 +153,11 @@ void verify_data(const std::string& name, ViewTypeFrom view_from, else if (name == "small-b") { for (std::size_t i = 0; i < view_test_h.extent(0); ++i) { if (i < 4) { - EXPECT_EQ(view_from_h(i), ValueType{-1}); - EXPECT_EQ(view_test_h(i), view_from_h(i)); + ASSERT_EQ(view_from_h(i), ValueType{-1}); + ASSERT_EQ(view_test_h(i), view_from_h(i)); } else { - EXPECT_EQ(view_from_h(i), ValueType{2}); - EXPECT_EQ(view_test_h(i), new_value); + ASSERT_EQ(view_from_h(i), ValueType{2}); + ASSERT_EQ(view_test_h(i), new_value); } } } @@ -165,11 +165,11 @@ void verify_data(const std::string& name, ViewTypeFrom view_from, else if (name == "medium" || name == "large") { for (std::size_t i = 0; i < view_test_h.extent(0); ++i) { if (i % 2 == 0) { - EXPECT_EQ(view_from_h(i), ValueType{-1}); - EXPECT_EQ(view_test_h(i), view_from_h(i)); + ASSERT_EQ(view_from_h(i), ValueType{-1}); + ASSERT_EQ(view_test_h(i), view_from_h(i)); } else { - EXPECT_EQ(view_from_h(i), ValueType{2}); - EXPECT_EQ(view_test_h(i), new_value); + ASSERT_EQ(view_from_h(i), ValueType{2}); + ASSERT_EQ(view_test_h(i), new_value); } } } @@ -202,7 +202,7 @@ void run_single_scenario(const InfoType& scenario_info) { KE::replace_copy(exespace(), KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), old_value, new_value); verify_data(name, view_from, view_dest, new_value); - EXPECT_EQ(rit, (KE::begin(view_dest) + view_ext)); + ASSERT_EQ(rit, (KE::begin(view_dest) + view_ext)); } { @@ -215,7 +215,7 @@ void run_single_scenario(const InfoType& scenario_info) { KE::cend(view_from), KE::begin(view_dest), old_value, new_value); verify_data(name, view_from, view_dest, new_value); - EXPECT_EQ(rit, (KE::begin(view_dest) + view_ext)); + ASSERT_EQ(rit, (KE::begin(view_dest) + view_ext)); } { @@ -227,7 +227,7 @@ void run_single_scenario(const InfoType& scenario_info) { auto rit = KE::replace_copy(exespace(), view_from, view_dest, old_value, new_value); verify_data(name, view_from, view_dest, new_value); - EXPECT_EQ(rit, (KE::begin(view_dest) + view_ext)); + ASSERT_EQ(rit, (KE::begin(view_dest) + view_ext)); } { @@ -239,7 +239,7 @@ void run_single_scenario(const InfoType& scenario_info) { auto rit = KE::replace_copy("label", exespace(), view_from, view_dest, old_value, new_value); verify_data(name, view_from, view_dest, new_value); - EXPECT_EQ(rit, (KE::begin(view_dest) + view_ext)); + ASSERT_EQ(rit, (KE::begin(view_dest) + view_ext)); } Kokkos::fence(); diff --git a/algorithms/unit_tests/TestStdAlgorithmsReplaceCopyIf.cpp b/algorithms/unit_tests/TestStdAlgorithmsReplaceCopyIf.cpp index a402e30ad9c..ceeba889711 100644 --- a/algorithms/unit_tests/TestStdAlgorithmsReplaceCopyIf.cpp +++ b/algorithms/unit_tests/TestStdAlgorithmsReplaceCopyIf.cpp @@ -112,40 +112,40 @@ void verify_data(const std::string& name, ViewTypeFrom view_from, } else if (name == "one-element-a") { - EXPECT_EQ(view_from_h(0), ValueType{1}); - EXPECT_EQ(view_test_h(0), view_from_h(0)); + ASSERT_EQ(view_from_h(0), ValueType{1}); + ASSERT_EQ(view_test_h(0), view_from_h(0)); } else if (name == "one-element-b") { - EXPECT_EQ(view_from_h(0), ValueType{2}); - EXPECT_EQ(view_test_h(0), new_value); + ASSERT_EQ(view_from_h(0), ValueType{2}); + ASSERT_EQ(view_test_h(0), new_value); } else if (name == "two-elements-a") { - EXPECT_EQ(view_from_h(0), ValueType{1}); - EXPECT_EQ(view_from_h(1), ValueType{2}); + ASSERT_EQ(view_from_h(0), ValueType{1}); + ASSERT_EQ(view_from_h(1), ValueType{2}); - EXPECT_EQ(view_test_h(0), view_from_h(0)); - EXPECT_EQ(view_test_h(1), new_value); + ASSERT_EQ(view_test_h(0), view_from_h(0)); + ASSERT_EQ(view_test_h(1), new_value); } else if (name == "two-elements-b") { - EXPECT_EQ(view_from_h(0), ValueType{2}); - EXPECT_EQ(view_from_h(1), ValueType{-1}); + ASSERT_EQ(view_from_h(0), ValueType{2}); + ASSERT_EQ(view_from_h(1), ValueType{-1}); - EXPECT_EQ(view_test_h(0), new_value); - EXPECT_EQ(view_test_h(1), view_from_h(1)); + ASSERT_EQ(view_test_h(0), new_value); + ASSERT_EQ(view_test_h(1), view_from_h(1)); } else if (name == "small-a") { for (std::size_t i = 0; i < view_test_h.extent(0); ++i) { if (i == 0 || i == 3 || i == 5 || i == 6) { - EXPECT_EQ(view_from_h(i), ValueType{2}); - EXPECT_EQ(view_test_h(i), new_value); + ASSERT_EQ(view_from_h(i), ValueType{2}); + ASSERT_EQ(view_test_h(i), new_value); } else { const auto gold = ValueType{-5} + static_cast(i + 1); - EXPECT_EQ(view_from_h(i), gold); - EXPECT_EQ(view_test_h(i), gold); + ASSERT_EQ(view_from_h(i), gold); + ASSERT_EQ(view_test_h(i), gold); } } } @@ -153,11 +153,11 @@ void verify_data(const std::string& name, ViewTypeFrom view_from, else if (name == "small-b") { for (std::size_t i = 0; i < view_test_h.extent(0); ++i) { if (i < 4) { - EXPECT_EQ(view_from_h(i), ValueType{-1}); - EXPECT_EQ(view_test_h(i), view_from_h(i)); + ASSERT_EQ(view_from_h(i), ValueType{-1}); + ASSERT_EQ(view_test_h(i), view_from_h(i)); } else { - EXPECT_EQ(view_from_h(i), ValueType{2}); - EXPECT_EQ(view_test_h(i), new_value); + ASSERT_EQ(view_from_h(i), ValueType{2}); + ASSERT_EQ(view_test_h(i), new_value); } } } @@ -165,11 +165,11 @@ void verify_data(const std::string& name, ViewTypeFrom view_from, else if (name == "medium" || name == "large") { for (std::size_t i = 0; i < view_test_h.extent(0); ++i) { if (i % 2 == 0) { - EXPECT_EQ(view_from_h(i), ValueType{-1}); - EXPECT_EQ(view_test_h(i), view_from_h(i)); + ASSERT_EQ(view_from_h(i), ValueType{-1}); + ASSERT_EQ(view_test_h(i), view_from_h(i)); } else { - EXPECT_EQ(view_from_h(i), ValueType{2}); - EXPECT_EQ(view_test_h(i), new_value); + ASSERT_EQ(view_from_h(i), ValueType{2}); + ASSERT_EQ(view_test_h(i), new_value); } } } @@ -209,7 +209,7 @@ void run_single_scenario(const InfoType& scenario_info) { KE::cend(view_from), KE::begin(view_dest), pred_type(), new_value); verify_data(name, view_from, view_dest, new_value); - EXPECT_EQ(rit, (KE::begin(view_dest) + view_ext)); + ASSERT_EQ(rit, (KE::begin(view_dest) + view_ext)); } { @@ -220,7 +220,7 @@ void run_single_scenario(const InfoType& scenario_info) { KE::cend(view_from), KE::begin(view_dest), pred_type(), new_value); verify_data(name, view_from, view_dest, new_value); - EXPECT_EQ(rit, (KE::begin(view_dest) + view_ext)); + ASSERT_EQ(rit, (KE::begin(view_dest) + view_ext)); } { @@ -230,7 +230,7 @@ void run_single_scenario(const InfoType& scenario_info) { auto rit = KE::replace_copy_if(exespace(), view_from, view_dest, pred_type(), new_value); verify_data(name, view_from, view_dest, new_value); - EXPECT_EQ(rit, (KE::begin(view_dest) + view_ext)); + ASSERT_EQ(rit, (KE::begin(view_dest) + view_ext)); } { @@ -240,7 +240,7 @@ void run_single_scenario(const InfoType& scenario_info) { auto rit = KE::replace_copy_if("label", exespace(), view_from, view_dest, pred_type(), new_value); verify_data(name, view_from, view_dest, new_value); - EXPECT_EQ(rit, (KE::begin(view_dest) + view_ext)); + ASSERT_EQ(rit, (KE::begin(view_dest) + view_ext)); } Kokkos::fence(); diff --git a/algorithms/unit_tests/TestStdAlgorithmsReplaceIf.cpp b/algorithms/unit_tests/TestStdAlgorithmsReplaceIf.cpp index f481144e1ce..802c0093c5c 100644 --- a/algorithms/unit_tests/TestStdAlgorithmsReplaceIf.cpp +++ b/algorithms/unit_tests/TestStdAlgorithmsReplaceIf.cpp @@ -138,7 +138,7 @@ void verify_data(ViewType1 data_view, // contains data // << data_view_dc(i) << " " // << data_view_h(i) << " " // << test_view_h(i) << std::endl; - EXPECT_EQ(data_view_h(i), test_view_h(i)); + ASSERT_EQ(data_view_h(i), test_view_h(i)); } } } diff --git a/algorithms/unit_tests/TestStdAlgorithmsReverse.cpp b/algorithms/unit_tests/TestStdAlgorithmsReverse.cpp index 7d16e54029d..6e6ca727830 100644 --- a/algorithms/unit_tests/TestStdAlgorithmsReverse.cpp +++ b/algorithms/unit_tests/TestStdAlgorithmsReverse.cpp @@ -77,7 +77,7 @@ void verify_data(ViewType1 test_view, ViewType2 orig_view) { const std::size_t ext = test_view.extent(0); for (std::size_t i = 0; i < ext; ++i) { - EXPECT_EQ(tv_h(i), ov_h(ext - i - 1)); + ASSERT_EQ(tv_h(i), ov_h(ext - i - 1)); } } diff --git a/algorithms/unit_tests/TestStdAlgorithmsRotate.cpp b/algorithms/unit_tests/TestStdAlgorithmsRotate.cpp index a5a6f99bac3..5638cbee4a6 100644 --- a/algorithms/unit_tests/TestStdAlgorithmsRotate.cpp +++ b/algorithms/unit_tests/TestStdAlgorithmsRotate.cpp @@ -136,13 +136,13 @@ void verify_data(ResultIt result_it, ViewType view, ViewHostType data_view_host, // make sure results match const auto my_diff = result_it - KE::begin(view); const auto std_diff = std_rit - KE::begin(data_view_host); - EXPECT_EQ(my_diff, std_diff); + ASSERT_EQ(my_diff, std_diff); // check views match auto view_h = create_host_space_copy(view); const std::size_t ext = view_h.extent(0); for (std::size_t i = 0; i < ext; ++i) { - EXPECT_EQ(view_h(i), data_view_host[i]); + ASSERT_EQ(view_h(i), data_view_host[i]); // std::cout << "i= " << i << " " // << "mine: " << view_h(i) << " " // << "std: " << data_view_host(i) diff --git a/algorithms/unit_tests/TestStdAlgorithmsRotateCopy.cpp b/algorithms/unit_tests/TestStdAlgorithmsRotateCopy.cpp index 27451a1d049..d0caca7cea3 100644 --- a/algorithms/unit_tests/TestStdAlgorithmsRotateCopy.cpp +++ b/algorithms/unit_tests/TestStdAlgorithmsRotateCopy.cpp @@ -139,7 +139,7 @@ void verify_data(ViewTypeFrom view_from, ViewTypeTest view_test, std_gold_h.begin()); for (std::size_t i = 0; i < ext; ++i) { - EXPECT_EQ(view_test_h(i), std_gold_h[i]); + ASSERT_EQ(view_test_h(i), std_gold_h[i]); // std::cout << "i= " << i << " " // << "from: " << view_from_h(i) << " " // << "mine: " << view_test_h(i) << " " @@ -177,7 +177,7 @@ void run_single_scenario(const InfoType& scenario_info, auto rit = KE::rotate_copy(exespace(), KE::cbegin(view_from), n_it, KE::cend(view_from), KE::begin(view_dest)); verify_data(view_from, view_dest, rotation_point); - EXPECT_EQ(rit, (KE::begin(view_dest) + view_ext)); + ASSERT_EQ(rit, (KE::begin(view_dest) + view_ext)); } { @@ -187,7 +187,7 @@ void run_single_scenario(const InfoType& scenario_info, auto rit = KE::rotate_copy("label", exespace(), KE::cbegin(view_from), n_it, KE::cend(view_from), KE::begin(view_dest)); verify_data(view_from, view_dest, rotation_point); - EXPECT_EQ(rit, (KE::begin(view_dest) + view_ext)); + ASSERT_EQ(rit, (KE::begin(view_dest) + view_ext)); } { @@ -196,7 +196,7 @@ void run_single_scenario(const InfoType& scenario_info, auto rit = KE::rotate_copy(exespace(), view_from, rotation_point, view_dest); verify_data(view_from, view_dest, rotation_point); - EXPECT_EQ(rit, (KE::begin(view_dest) + view_ext)); + ASSERT_EQ(rit, (KE::begin(view_dest) + view_ext)); } { @@ -205,7 +205,7 @@ void run_single_scenario(const InfoType& scenario_info, auto rit = KE::rotate_copy("label", exespace(), view_from, rotation_point, view_dest); verify_data(view_from, view_dest, rotation_point); - EXPECT_EQ(rit, (KE::begin(view_dest) + view_ext)); + ASSERT_EQ(rit, (KE::begin(view_dest) + view_ext)); } Kokkos::fence(); diff --git a/algorithms/unit_tests/TestStdAlgorithmsSearch.cpp b/algorithms/unit_tests/TestStdAlgorithmsSearch.cpp index ab4bf507136..021609c444d 100644 --- a/algorithms/unit_tests/TestStdAlgorithmsSearch.cpp +++ b/algorithms/unit_tests/TestStdAlgorithmsSearch.cpp @@ -259,7 +259,7 @@ void run_single_scenario(const InfoType& scenario_info, std::size_t seq_ext, KE::cbegin(s_view), KE::cend(s_view), args...); const auto mydiff = myrit - KE::cbegin(view); const auto stddiff = stdrit - KE::cbegin(view_h); - EXPECT_EQ(mydiff, stddiff); + ASSERT_EQ(mydiff, stddiff); } { @@ -268,21 +268,21 @@ void run_single_scenario(const InfoType& scenario_info, std::size_t seq_ext, KE::cbegin(s_view), KE::cend(s_view), args...); const auto mydiff = myrit - KE::cbegin(view); const auto stddiff = stdrit - KE::cbegin(view_h); - EXPECT_EQ(mydiff, stddiff); + ASSERT_EQ(mydiff, stddiff); } { auto myrit = KE::search(exespace(), view, s_view, args...); const auto mydiff = myrit - KE::begin(view); const auto stddiff = stdrit - KE::cbegin(view_h); - EXPECT_EQ(mydiff, stddiff); + ASSERT_EQ(mydiff, stddiff); } { auto myrit = KE::search("label", exespace(), view, s_view, args...); const auto mydiff = myrit - KE::begin(view); const auto stddiff = stdrit - KE::cbegin(view_h); - EXPECT_EQ(mydiff, stddiff); + ASSERT_EQ(mydiff, stddiff); } Kokkos::fence(); @@ -325,12 +325,6 @@ void run_all_scenarios() { } TEST(std_algorithms_non_mod_seq_ops, search) { -#if defined(KOKKOS_ENABLE_CUDA) && \ - defined(KOKKOS_COMPILER_NVHPC) // FIXME_NVHPC - if constexpr (std::is_same_v) { - GTEST_SKIP() << "FIXME wrong result"; - } -#endif run_all_scenarios(); run_all_scenarios(); } diff --git a/algorithms/unit_tests/TestStdAlgorithmsSearch_n.cpp b/algorithms/unit_tests/TestStdAlgorithmsSearch_n.cpp index a6fe9c1e896..53ad8daa2ec 100644 --- a/algorithms/unit_tests/TestStdAlgorithmsSearch_n.cpp +++ b/algorithms/unit_tests/TestStdAlgorithmsSearch_n.cpp @@ -203,26 +203,26 @@ void run_single_scenario(const InfoType& scenario_info, std::size_t count, auto myrit = KE::search_n(exespace(), KE::cbegin(view), KE::cend(view), count, value, args...); const auto mydiff = myrit - KE::cbegin(view); - EXPECT_EQ(mydiff, stddiff); + ASSERT_EQ(mydiff, stddiff); } { auto myrit = KE::search_n("label", exespace(), KE::cbegin(view), KE::cend(view), count, value, args...); const auto mydiff = myrit - KE::cbegin(view); - EXPECT_EQ(mydiff, stddiff); + ASSERT_EQ(mydiff, stddiff); } { auto myrit = KE::search_n("label", exespace(), view, count, value, args...); const auto mydiff = myrit - KE::begin(view); - EXPECT_EQ(mydiff, stddiff); + ASSERT_EQ(mydiff, stddiff); } { auto myrit = KE::search_n(exespace(), view, count, value, args...); const auto mydiff = myrit - KE::begin(view); - EXPECT_EQ(mydiff, stddiff); + ASSERT_EQ(mydiff, stddiff); } Kokkos::fence(); @@ -297,12 +297,6 @@ void run_all_scenarios() { } TEST(std_algorithms_non_mod_seq_ops, search_n) { -#if defined(KOKKOS_ENABLE_CUDA) && \ - defined(KOKKOS_COMPILER_NVHPC) // FIXME_NVHPC - if constexpr (std::is_same_v) { - GTEST_SKIP() << "FIXME wrong result"; - } -#endif run_all_scenarios(); run_all_scenarios(); } diff --git a/algorithms/unit_tests/TestStdAlgorithmsShiftLeft.cpp b/algorithms/unit_tests/TestStdAlgorithmsShiftLeft.cpp index 8e4ced96358..0b5fe9216ea 100644 --- a/algorithms/unit_tests/TestStdAlgorithmsShiftLeft.cpp +++ b/algorithms/unit_tests/TestStdAlgorithmsShiftLeft.cpp @@ -103,12 +103,12 @@ void verify_data(ResultIt result_it, ViewType view, ViewHostType data_view_host, // make sure results match const auto my_diff = result_it - KE::begin(view); const auto std_diff = std_rit - KE::begin(data_view_host); - EXPECT_EQ(my_diff, std_diff); + ASSERT_EQ(my_diff, std_diff); // check views match auto view_h = create_host_space_copy(view); for (std::size_t i = 0; i < (std::size_t)my_diff; ++i) { - EXPECT_EQ(view_h(i), data_view_host[i]); + ASSERT_EQ(view_h(i), data_view_host[i]); // std::cout << "i= " << i << " " // << "mine: " << view_h(i) << " " // << "std: " << data_view_host(i) diff --git a/algorithms/unit_tests/TestStdAlgorithmsShiftRight.cpp b/algorithms/unit_tests/TestStdAlgorithmsShiftRight.cpp index a1614be027b..8e4ae943759 100644 --- a/algorithms/unit_tests/TestStdAlgorithmsShiftRight.cpp +++ b/algorithms/unit_tests/TestStdAlgorithmsShiftRight.cpp @@ -101,14 +101,14 @@ void verify_data(ResultIt result_it, ViewType view, ViewHostType data_view_host, // make sure results match const auto my_diff = KE::end(view) - result_it; const auto std_diff = KE::end(data_view_host) - std_rit; - EXPECT_EQ(my_diff, std_diff); + ASSERT_EQ(my_diff, std_diff); // check views match auto view_h = create_host_space_copy(view); auto it1 = KE::cbegin(view_h); auto it2 = KE::cbegin(data_view_host); for (std::size_t i = 0; i < (std::size_t)my_diff; ++i) { - EXPECT_EQ(it1[i], it2[i]); + ASSERT_EQ(it1[i], it2[i]); // std::cout << "i= " << i << " " // << "mine: " << it1[i] << " " // << "std: " << it2[i] diff --git a/algorithms/unit_tests/TestStdAlgorithmsTransformExclusiveScan.cpp b/algorithms/unit_tests/TestStdAlgorithmsTransformExclusiveScan.cpp index 70c04dbafa2..75525b3b0f9 100644 --- a/algorithms/unit_tests/TestStdAlgorithmsTransformExclusiveScan.cpp +++ b/algorithms/unit_tests/TestStdAlgorithmsTransformExclusiveScan.cpp @@ -165,7 +165,7 @@ void verify_data(ViewType1 data_view, // contains data // << std::abs(gold_h(i) - test_view_h(i)) << std::endl; if (std::is_same::value) { - EXPECT_EQ(gold_h(i), test_view_h(i)); + ASSERT_EQ(gold_h(i), test_view_h(i)); } else { const auto error = std::abs(gold_h(i) - test_view_h(i)); if (error > 1e-10) { @@ -221,7 +221,7 @@ void run_single_scenario(const InfoType& scenario_info, ValueType init_value, auto r = KE::transform_exclusive_scan( exespace(), KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), init_value, bop, uop); - EXPECT_EQ(r, KE::end(view_dest)); + ASSERT_EQ(r, KE::end(view_dest)); verify_data(view_from, view_dest, init_value, bop, uop); } @@ -230,7 +230,7 @@ void run_single_scenario(const InfoType& scenario_info, ValueType init_value, auto r = KE::transform_exclusive_scan( "label", exespace(), KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), init_value, bop, uop); - EXPECT_EQ(r, KE::end(view_dest)); + ASSERT_EQ(r, KE::end(view_dest)); verify_data(view_from, view_dest, init_value, bop, uop); } @@ -238,7 +238,7 @@ void run_single_scenario(const InfoType& scenario_info, ValueType init_value, fill_zero(view_dest); auto r = KE::transform_exclusive_scan(exespace(), view_from, view_dest, init_value, bop, uop); - EXPECT_EQ(r, KE::end(view_dest)); + ASSERT_EQ(r, KE::end(view_dest)); verify_data(view_from, view_dest, init_value, bop, uop); } @@ -246,7 +246,7 @@ void run_single_scenario(const InfoType& scenario_info, ValueType init_value, fill_zero(view_dest); auto r = KE::transform_exclusive_scan("label", exespace(), view_from, view_dest, init_value, bop, uop); - EXPECT_EQ(r, KE::end(view_dest)); + ASSERT_EQ(r, KE::end(view_dest)); verify_data(view_from, view_dest, init_value, bop, uop); } @@ -279,6 +279,59 @@ TEST(std_algorithms_numeric_ops_test, transform_exclusive_scan) { } #endif +template +struct MultiplyFunctor { + KOKKOS_INLINE_FUNCTION + ValueType operator()(const ValueType& a, const ValueType& b) const { + return (a * b); + } +}; + +TEST(std_algorithms_numeric_ops_test, transform_exclusive_scan_functor) { + int dummy = 0; + using view_type = Kokkos::View; + view_type dummy_view("dummy_view", 0); + using unary_op_type = + Kokkos::Experimental::Impl::StdNumericScanIdentityReferenceUnaryFunctor< + int>; + using functor_type = + Kokkos::Experimental::Impl::TransformExclusiveScanFunctor< + exespace, int, int, view_type, view_type, MultiplyFunctor, + unary_op_type>; + functor_type functor(dummy, dummy_view, dummy_view, {}, {}); + using value_type = functor_type::value_type; + + value_type value1; + functor.init(value1); + ASSERT_EQ(value1.val, 0); + ASSERT_EQ(value1.is_initial, true); + + value_type value2; + value2.val = 1; + value2.is_initial = false; + functor.join(value1, value2); + ASSERT_EQ(value1.val, 1); + ASSERT_EQ(value1.is_initial, false); + + functor.init(value1); + functor.join(value2, value1); + ASSERT_EQ(value2.val, 1); + ASSERT_EQ(value2.is_initial, false); + + functor.init(value2); + functor.join(value2, value1); + ASSERT_EQ(value2.val, 0); + ASSERT_EQ(value2.is_initial, true); + + value1.val = 3; + value1.is_initial = false; + value2.val = 2; + value2.is_initial = false; + functor.join(value2, value1); + ASSERT_EQ(value2.val, 6); + ASSERT_EQ(value2.is_initial, false); +} + } // namespace TransformEScan } // namespace stdalgos } // namespace Test diff --git a/algorithms/unit_tests/TestStdAlgorithmsTransformInclusiveScan.cpp b/algorithms/unit_tests/TestStdAlgorithmsTransformInclusiveScan.cpp index 80ff8132519..5d122ac5e89 100644 --- a/algorithms/unit_tests/TestStdAlgorithmsTransformInclusiveScan.cpp +++ b/algorithms/unit_tests/TestStdAlgorithmsTransformInclusiveScan.cpp @@ -177,7 +177,7 @@ void verify_data(ViewType1 data_view, // contains data // << std::abs(gold_h(i) - test_view_h(i)) << std::endl; if (std::is_same::value) { - EXPECT_EQ(gold_h(i), test_view_h(i)); + ASSERT_EQ(gold_h(i), test_view_h(i)); } else { const auto error = std::abs(gold_h(i) - test_view_h(i)); if (error > 1e-10) { @@ -246,7 +246,7 @@ void run_single_scenario(const InfoType& scenario_info, auto r = KE::transform_inclusive_scan(exespace(), KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), args...); - EXPECT_EQ(r, KE::end(view_dest)); + ASSERT_EQ(r, KE::end(view_dest)); verify_data(view_from, view_dest, args...); } @@ -255,7 +255,7 @@ void run_single_scenario(const InfoType& scenario_info, auto r = KE::transform_inclusive_scan( "label", exespace(), KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), args...); - EXPECT_EQ(r, KE::end(view_dest)); + ASSERT_EQ(r, KE::end(view_dest)); verify_data(view_from, view_dest, args...); } @@ -263,7 +263,7 @@ void run_single_scenario(const InfoType& scenario_info, fill_zero(view_dest); auto r = KE::transform_inclusive_scan(exespace(), view_from, view_dest, args...); - EXPECT_EQ(r, KE::end(view_dest)); + ASSERT_EQ(r, KE::end(view_dest)); verify_data(view_from, view_dest, args...); } @@ -271,7 +271,7 @@ void run_single_scenario(const InfoType& scenario_info, fill_zero(view_dest); auto r = KE::transform_inclusive_scan("label", exespace(), view_from, view_dest, args...); - EXPECT_EQ(r, KE::end(view_dest)); + ASSERT_EQ(r, KE::end(view_dest)); verify_data(view_from, view_dest, args...); } @@ -306,6 +306,73 @@ TEST(std_algorithms_numeric_ops_test, transform_inclusive_scan) { } #endif +template +struct MultiplyFunctor { + KOKKOS_INLINE_FUNCTION + ValueType operator()(const ValueType& a, const ValueType& b) const { + return (a * b); + } +}; + +TEST(std_algorithms_numeric_ops_test, transform_inclusive_scan_functor) { + using value_type = KE::Impl::ValueWrapperForNoNeutralElement; + + auto test_lambda = [&](auto& functor) { + value_type value1; + functor.init(value1); + ASSERT_EQ(value1.val, 0); + ASSERT_EQ(value1.is_initial, true); + + value_type value2; + value2.val = 1; + value2.is_initial = false; + functor.join(value1, value2); + ASSERT_EQ(value1.val, 1); + ASSERT_EQ(value1.is_initial, false); + + functor.init(value1); + functor.join(value2, value1); + ASSERT_EQ(value2.val, 1); + ASSERT_EQ(value2.is_initial, false); + + functor.init(value2); + functor.join(value2, value1); + ASSERT_EQ(value2.val, 0); + ASSERT_EQ(value2.is_initial, true); + + value1.val = 3; + value1.is_initial = false; + value2.val = 2; + value2.is_initial = false; + functor.join(value2, value1); + ASSERT_EQ(value2.val, 6); + ASSERT_EQ(value2.is_initial, false); + }; + + int dummy = 0; + using view_type = Kokkos::View; + view_type dummy_view("dummy_view", 0); + using unary_op_type = + KE::Impl::StdNumericScanIdentityReferenceUnaryFunctor; + { + using functor_type = KE::Impl::TransformInclusiveScanNoInitValueFunctor< + exespace, int, int, view_type, view_type, MultiplyFunctor, + unary_op_type>; + functor_type functor(dummy_view, dummy_view, {}, {}); + + test_lambda(functor); + } + + { + using functor_type = KE::Impl::TransformInclusiveScanWithInitValueFunctor< + exespace, int, int, view_type, view_type, MultiplyFunctor, + unary_op_type>; + functor_type functor(dummy_view, dummy_view, {}, {}, dummy); + + test_lambda(functor); + } +} + } // namespace TransformIncScan } // namespace stdalgos } // namespace Test diff --git a/algorithms/unit_tests/TestStdAlgorithmsTransformUnaryOp.cpp b/algorithms/unit_tests/TestStdAlgorithmsTransformUnaryOp.cpp index dab81b8f1e3..6070c1a60d3 100644 --- a/algorithms/unit_tests/TestStdAlgorithmsTransformUnaryOp.cpp +++ b/algorithms/unit_tests/TestStdAlgorithmsTransformUnaryOp.cpp @@ -58,7 +58,7 @@ void verify_data(ViewTypeFrom view_from, ViewTypeTest view_test) { create_mirror_view_and_copy(Kokkos::HostSpace(), view_from_dc); for (std::size_t i = 0; i < view_test_h.extent(0); ++i) { - EXPECT_EQ(view_test_h(i), view_from_h(i) + value_type(1)); + ASSERT_EQ(view_test_h(i), view_from_h(i) + value_type(1)); } } @@ -89,7 +89,7 @@ void run_single_scenario(const InfoType& scenario_info) { auto r1 = KE::transform(exespace(), KE::begin(view_from), KE::end(view_from), KE::begin(view_dest), unOp); verify_data(view_from, view_dest); - EXPECT_EQ(r1, KE::end(view_dest)); + ASSERT_EQ(r1, KE::end(view_dest)); } { @@ -98,7 +98,7 @@ void run_single_scenario(const InfoType& scenario_info) { auto r1 = KE::transform("label", exespace(), KE::begin(view_from), KE::end(view_from), KE::begin(view_dest), unOp); verify_data(view_from, view_dest); - EXPECT_EQ(r1, KE::end(view_dest)); + ASSERT_EQ(r1, KE::end(view_dest)); } { @@ -106,7 +106,7 @@ void run_single_scenario(const InfoType& scenario_info) { create_view(Tag{}, view_ext, "transform_uop_dest"); auto r1 = KE::transform(exespace(), view_from, view_dest, unOp); verify_data(view_from, view_dest); - EXPECT_EQ(r1, KE::end(view_dest)); + ASSERT_EQ(r1, KE::end(view_dest)); } { @@ -114,7 +114,7 @@ void run_single_scenario(const InfoType& scenario_info) { create_view(Tag{}, view_ext, "transform_uop_dest"); auto r1 = KE::transform("label", exespace(), view_from, view_dest, unOp); verify_data(view_from, view_dest); - EXPECT_EQ(r1, KE::end(view_dest)); + ASSERT_EQ(r1, KE::end(view_dest)); } Kokkos::fence(); diff --git a/algorithms/unit_tests/TestStdAlgorithmsUnique.cpp b/algorithms/unit_tests/TestStdAlgorithmsUnique.cpp index a810d31d820..9c5ae0cf8a1 100644 --- a/algorithms/unit_tests/TestStdAlgorithmsUnique.cpp +++ b/algorithms/unit_tests/TestStdAlgorithmsUnique.cpp @@ -157,7 +157,7 @@ void verify_data(const std::string& name, ResultIt my_result_it, // const auto std_diff = (std::size_t)(std_r - KE::begin(data_v_h)); const auto my_diff = (std::size_t)(my_result_it - KE::begin(view_test)); - EXPECT_EQ(my_diff, std_diff); + ASSERT_EQ(my_diff, std_diff); // // check the data in the view @@ -170,14 +170,14 @@ void verify_data(const std::string& name, ResultIt my_result_it, // << " my = " << view_test_h(i) << " " // << " std = " << data_v_h(i) // << '\n'; - EXPECT_EQ(view_test_h(i), data_v_h(i)); + ASSERT_EQ(view_test_h(i), data_v_h(i)); } if (name == "medium-b") { using value_type = typename ViewType1::value_type; - EXPECT_EQ(my_diff, (std::size_t)2); - EXPECT_EQ(view_test_h(0), (value_type)22); - EXPECT_EQ(view_test_h(1), (value_type)44); + ASSERT_EQ(my_diff, (std::size_t)2); + ASSERT_EQ(view_test_h(0), (value_type)22); + ASSERT_EQ(view_test_h(1), (value_type)44); } } diff --git a/algorithms/unit_tests/TestStdAlgorithmsUniqueCopy.cpp b/algorithms/unit_tests/TestStdAlgorithmsUniqueCopy.cpp index f609d8517e6..3cf43ad4db8 100644 --- a/algorithms/unit_tests/TestStdAlgorithmsUniqueCopy.cpp +++ b/algorithms/unit_tests/TestStdAlgorithmsUniqueCopy.cpp @@ -174,51 +174,51 @@ void verify_data(const std::string& name, ViewTypeFrom view_from, } else if (name == "one-element-a") { - EXPECT_EQ(view_test_h(0), static_cast(1)); + ASSERT_EQ(view_test_h(0), static_cast(1)); } else if (name == "one-element-b") { - EXPECT_EQ(view_test_h(0), static_cast(2)); + ASSERT_EQ(view_test_h(0), static_cast(2)); } else if (name == "two-elements-a") { - EXPECT_EQ(view_test_h(0), static_cast(1)); - EXPECT_EQ(view_test_h(1), static_cast(2)); + ASSERT_EQ(view_test_h(0), static_cast(1)); + ASSERT_EQ(view_test_h(1), static_cast(2)); } else if (name == "two-elements-b") { - EXPECT_EQ(view_test_h(0), static_cast(2)); - EXPECT_EQ(view_test_h(1), static_cast(-1)); + ASSERT_EQ(view_test_h(0), static_cast(2)); + ASSERT_EQ(view_test_h(1), static_cast(-1)); } else if (name == "small-a") { - EXPECT_EQ(view_test_h(0), static_cast(0)); - EXPECT_EQ(view_test_h(1), static_cast(1)); - EXPECT_EQ(view_test_h(2), static_cast(2)); - EXPECT_EQ(view_test_h(3), static_cast(3)); - EXPECT_EQ(view_test_h(4), static_cast(4)); - EXPECT_EQ(view_test_h(5), static_cast(5)); - EXPECT_EQ(view_test_h(6), static_cast(6)); - EXPECT_EQ(view_test_h(7), static_cast(0)); - EXPECT_EQ(view_test_h(8), static_cast(0)); - EXPECT_EQ(view_test_h(9), static_cast(0)); - EXPECT_EQ(view_test_h(10), static_cast(0)); + ASSERT_EQ(view_test_h(0), static_cast(0)); + ASSERT_EQ(view_test_h(1), static_cast(1)); + ASSERT_EQ(view_test_h(2), static_cast(2)); + ASSERT_EQ(view_test_h(3), static_cast(3)); + ASSERT_EQ(view_test_h(4), static_cast(4)); + ASSERT_EQ(view_test_h(5), static_cast(5)); + ASSERT_EQ(view_test_h(6), static_cast(6)); + ASSERT_EQ(view_test_h(7), static_cast(0)); + ASSERT_EQ(view_test_h(8), static_cast(0)); + ASSERT_EQ(view_test_h(9), static_cast(0)); + ASSERT_EQ(view_test_h(10), static_cast(0)); } else if (name == "small-b") { - EXPECT_EQ(view_test_h(0), static_cast(1)); - EXPECT_EQ(view_test_h(1), static_cast(2)); - EXPECT_EQ(view_test_h(2), static_cast(3)); - EXPECT_EQ(view_test_h(3), static_cast(4)); - EXPECT_EQ(view_test_h(4), static_cast(5)); - EXPECT_EQ(view_test_h(5), static_cast(6)); - EXPECT_EQ(view_test_h(6), static_cast(8)); - EXPECT_EQ(view_test_h(7), static_cast(9)); - EXPECT_EQ(view_test_h(8), static_cast(8)); - EXPECT_EQ(view_test_h(9), static_cast(0)); - EXPECT_EQ(view_test_h(10), static_cast(0)); - EXPECT_EQ(view_test_h(11), static_cast(0)); - EXPECT_EQ(view_test_h(12), static_cast(0)); + ASSERT_EQ(view_test_h(0), static_cast(1)); + ASSERT_EQ(view_test_h(1), static_cast(2)); + ASSERT_EQ(view_test_h(2), static_cast(3)); + ASSERT_EQ(view_test_h(3), static_cast(4)); + ASSERT_EQ(view_test_h(4), static_cast(5)); + ASSERT_EQ(view_test_h(5), static_cast(6)); + ASSERT_EQ(view_test_h(6), static_cast(8)); + ASSERT_EQ(view_test_h(7), static_cast(9)); + ASSERT_EQ(view_test_h(8), static_cast(8)); + ASSERT_EQ(view_test_h(9), static_cast(0)); + ASSERT_EQ(view_test_h(10), static_cast(0)); + ASSERT_EQ(view_test_h(11), static_cast(0)); + ASSERT_EQ(view_test_h(12), static_cast(0)); } else if (name == "medium" || name == "large") { @@ -230,7 +230,7 @@ void verify_data(const std::string& name, ViewTypeFrom view_from, (void)std_r; for (std::size_t i = 0; i < view_from_h.extent(0); ++i) { - EXPECT_EQ(view_test_h(i), tmp[i]); + ASSERT_EQ(view_test_h(i), tmp[i]); } } @@ -273,7 +273,7 @@ void run_single_scenario(const InfoType& scenario_info, Args... args) { KE::unique_copy(exespace(), KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), args...); verify_data(name, view_from, view_dest, args...); - EXPECT_EQ(rit, (KE::begin(view_dest) + n)); + ASSERT_EQ(rit, (KE::begin(view_dest) + n)); } { @@ -283,7 +283,7 @@ void run_single_scenario(const InfoType& scenario_info, Args... args) { KE::unique_copy("label", exespace(), KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), args...); verify_data(name, view_from, view_dest, args...); - EXPECT_EQ(rit, (KE::begin(view_dest) + n)); + ASSERT_EQ(rit, (KE::begin(view_dest) + n)); } { @@ -291,7 +291,7 @@ void run_single_scenario(const InfoType& scenario_info, Args... args) { create_view(Tag{}, view_ext, "unique_copy_dest"); auto rit = KE::unique_copy(exespace(), view_from, view_dest, args...); verify_data(name, view_from, view_dest, args...); - EXPECT_EQ(rit, (KE::begin(view_dest) + n)); + ASSERT_EQ(rit, (KE::begin(view_dest) + n)); } { @@ -300,7 +300,7 @@ void run_single_scenario(const InfoType& scenario_info, Args... args) { auto rit = KE::unique_copy("label", exespace(), view_from, view_dest, args...); verify_data(name, view_from, view_dest, args...); - EXPECT_EQ(rit, (KE::begin(view_dest) + n)); + ASSERT_EQ(rit, (KE::begin(view_dest) + n)); } Kokkos::fence(); diff --git a/bin/hpcbind b/bin/hpcbind index cb2af2c4b51..b6db270128c 100755 --- a/bin/hpcbind +++ b/bin/hpcbind @@ -36,8 +36,14 @@ fi ################################################################################ declare -i HPCBIND_HAS_NVIDIA=0 type nvidia-smi >/dev/null 2>&1 -HPCBIND_HAS_NVIDIA=$((!$?)) +HPCBIND_HAS_NVIDIA=$((! $?)) +################################################################################ +# Check if rocm-smi exist +################################################################################ +declare -i HPCBIND_HAS_AMD=0 +type rocm-smi >/dev/null 2>&1 +HPCBIND_HAS_AMD=$((! $?)) ################################################################################ # Get visible gpu @@ -45,11 +51,30 @@ HPCBIND_HAS_NVIDIA=$((!$?)) declare -i NUM_GPUS=0 HPCBIND_VISIBLE_GPUS="" if [[ ${HPCBIND_HAS_NVIDIA} -eq 1 ]]; then - NUM_GPUS=$(nvidia-smi -L | wc -l); - HPCBIND_HAS_NVIDIA=$((!$?)) + nvidia-smi >/dev/null 2>&1 + HPCBIND_HAS_NVIDIA=$((! $?)) if [[ ${HPCBIND_HAS_NVIDIA} -eq 1 ]]; then - GPU_LIST="$( seq 0 $((NUM_GPUS-1)) )" - HPCBIND_VISIBLE_GPUS=${CUDA_VISIBLE_DEVICES:-${GPU_LIST}} + NUM_GPUS=$(nvidia-smi -L | wc -l); + HPCBIND_HAS_NVIDIA=$((! $?)) + if [[ ${HPCBIND_HAS_NVIDIA} -eq 1 ]]; then + GPU_LIST="$( seq 0 $((NUM_GPUS-1)) )" + HPCBIND_VISIBLE_GPUS=${CUDA_VISIBLE_DEVICES:-${GPU_LIST}} + fi + fi +fi + +if [[ ${HPCBIND_HAS_AMD} -eq 1 ]]; then + # rocm-smi doesn't have an error code if there is no hardware + # check for /sys/module/amdgpu/initstate instead + stat /sys/module/amdgpu/initstate >/dev/null 2>&1 + HPCBIND_HAS_AMD=$((! $?)) + if [[ ${HPCBIND_HAS_AMD} -eq 1 ]]; then + NUM_GPUS=$(rocm-smi -i --csv | sed '/^$/d' | tail -n +2 | wc -l); + HPCBIND_HAS_AMD=$((! $?)) + if [[ ${HPCBIND_HAS_AMD} -eq 1 ]]; then + GPU_LIST="$( seq 0 $((NUM_GPUS-1)) )" + HPCBIND_VISIBLE_GPUS=${ROCR_VISIBLE_DEVICES:-${GPU_LIST}} + fi fi fi @@ -80,7 +105,7 @@ elif [[ ! -z "${MV2_COMM_WORLD_RANK}" ]]; then HPCBIND_QUEUE_NAME="mvapich2" HPCBIND_QUEUE_RANK=${MV2_COMM_WORLD_RANK} HPCBIND_QUEUE_SIZE=${MV2_COMM_WORLD_SIZE} -elif [[ ! -z "${SLURM_LOCAL_ID}" ]]; then +elif [[ ! -z "${SLURM_LOCALID}" ]]; then HPCBIND_QUEUE_MAPPING=1 HPCBIND_QUEUE_NAME="slurm" HPCBIND_QUEUE_RANK=${SLURM_PROCID} @@ -101,8 +126,8 @@ fi function show_help { local cmd=$(basename "$0") echo "Usage: ${cmd} -- command ..." - echo " Set the process mask, OMP environment variables and CUDA environment" - echo " variables to sane values if possible. Uses hwloc and nvidia-smi if" + echo " Set the process mask, OMP environment variables and CUDA/ROCm environment" + echo " variables to sane values if possible. Uses hwloc and nvidia-smi/rocm-smi if" echo " available. Will preserve the current process binding, so it is safe" echo " to use with a queuing system or mpiexec." echo "" @@ -116,10 +141,10 @@ function show_help { echo " --distribute-partition=I" echo " Use the i'th partition (zero based)" echo " --visible-gpus= Comma separated list of gpu ids" - echo " Default: CUDA_VISIBLE_DEVICES or all gpus in" + echo " Default: CUDA_VISIBLE_DEVICES/ROCR_VISIBLE_DEVICES or all gpus in" echo " sequential order" echo " --ignore-queue Ignore queue job id when choosing visible GPU and partition" - echo " --no-gpu-mapping Do not set CUDA_VISIBLE_DEVICES" + echo " --no-gpu-mapping Do not set CUDA_VISIBLE_DEVICES/ROCR_VISIBLE_DEVICES" echo " --openmp=M.m Set env variables for the given OpenMP version" echo " Default: 4.0" echo " --openmp-ratio=N/D Ratio of the cpuset to use for OpenMP" @@ -525,13 +550,24 @@ fi ################################################################################ if [[ ${HPCBIND_ENABLE_GPU_MAPPING} -eq 1 ]]; then - if [[ ${HPCBIND_QUEUE_MAPPING} -eq 0 ]]; then - declare -i GPU_ID=$((HPCBIND_PARTITION % NUM_GPUS)) - export CUDA_VISIBLE_DEVICES="${HPCBIND_VISIBLE_GPUS[${GPU_ID}]}" - else - declare -i MY_TASK_ID=$((HPCBIND_QUEUE_RANK * HPCBIND_DISTRIBUTE + HPCBIND_PARTITION)) - declare -i GPU_ID=$((MY_TASK_ID % NUM_GPUS)) - export CUDA_VISIBLE_DEVICES="${HPCBIND_VISIBLE_GPUS[${GPU_ID}]}" + if [[ ${HPCBIND_HAS_NVIDIA} -eq 1 ]]; then + if [[ ${HPCBIND_QUEUE_MAPPING} -eq 0 ]]; then + declare -i GPU_ID=$((HPCBIND_PARTITION % NUM_GPUS)) + export CUDA_VISIBLE_DEVICES="${HPCBIND_VISIBLE_GPUS[${GPU_ID}]}" + else + declare -i MY_TASK_ID=$((HPCBIND_QUEUE_RANK * HPCBIND_DISTRIBUTE + HPCBIND_PARTITION)) + declare -i GPU_ID=$((MY_TASK_ID % NUM_GPUS)) + export CUDA_VISIBLE_DEVICES="${HPCBIND_VISIBLE_GPUS[${GPU_ID}]}" + fi + elif [[ ${HPCBIND_HAS_AMD} -eq 1 ]]; then + if [[ ${HPCBIND_QUEUE_MAPPING} -eq 0 ]]; then + declare -i GPU_ID=$((HPCBIND_PARTITION % NUM_GPUS)) + export ROCR_VISIBLE_DEVICES="${HPCBIND_VISIBLE_GPUS[${GPU_ID}]}" + else + declare -i MY_TASK_ID=$((HPCBIND_QUEUE_RANK * HPCBIND_DISTRIBUTE + HPCBIND_PARTITION)) + declare -i GPU_ID=$((MY_TASK_ID % NUM_GPUS)) + export ROCR_VISIBLE_DEVICES="${HPCBIND_VISIBLE_GPUS[${GPU_ID}]}" + fi fi fi @@ -541,6 +577,7 @@ fi export HPCBIND_HWLOC_VERSION=${HPCBIND_HWLOC_VERSION} export HPCBIND_HAS_HWLOC=${HPCBIND_HAS_HWLOC} export HPCBIND_HAS_NVIDIA=${HPCBIND_HAS_NVIDIA} +export HPCBIND_HAS_AMD=${HPCBIND_HAS_AMD} export HPCBIND_NUM_PUS=${HPCBIND_NUM_PUS} export HPCBIND_NUM_CORES=${HPCBIND_NUM_CORES} export HPCBIND_NUM_NUMAS=${HPCBIND_NUM_NUMAS} @@ -555,8 +592,14 @@ else export HPCBIND_HWLOC_PARENT_CPUSET="${HPCBIND_HWLOC_PARENT_CPUSET}" fi export HPCBIND_HWLOC_PROC_BIND="${HPCBIND_PROC_BIND}" -export HPCBIND_NVIDIA_ENABLE_GPU_MAPPING=${HPCBIND_ENABLE_GPU_MAPPING} -export HPCBIND_NVIDIA_VISIBLE_GPUS=$(echo "${HPCBIND_VISIBLE_GPUS[*]}" | tr ' ' ',') +if [[ ${HPCBIND_HAS_NVIDIA} -eq 1 ]]; then + export HPCBIND_NVIDIA_ENABLE_GPU_MAPPING=${HPCBIND_ENABLE_GPU_MAPPING} + export HPCBIND_NVIDIA_VISIBLE_GPUS=$(echo "${HPCBIND_VISIBLE_GPUS[*]}" | tr ' ' ',') +fi +if [[ ${HPCBIND_HAS_AMD} -eq 1 ]]; then + export HPCBIND_AMD_ENABLE_GPU_MAPPING=${HPCBIND_ENABLE_GPU_MAPPING} + export HPCBIND_AMD_VISIBLE_GPUS=$(echo "${HPCBIND_VISIBLE_GPUS[*]}" | tr ' ' ',') +fi export HPCBIND_OPENMP_VERSION="${HPCBIND_OPENMP_VERSION}" if [[ "${HPCBIND_QUEUE_NAME}" != "" ]]; then export HPCBIND_QUEUE_RANK=${HPCBIND_QUEUE_RANK} @@ -580,6 +623,9 @@ if [[ ${HPCBIND_TEE} -eq 0 || ${HPCBIND_VERBOSE} -eq 0 ]]; then echo "${TMP_ENV}" | grep -E "^HWLOC_" >> ${HPCBIND_LOG} echo "[CUDA]" >> ${HPCBIND_LOG} echo "${TMP_ENV}" | grep -E "^CUDA_" >> ${HPCBIND_LOG} + echo "[ROCM]" >> ${HPCBIND_LOG} + echo "${TMP_ENV}" | grep -E "^ROCM_" >> ${HPCBIND_LOG} + echo "${TMP_ENV}" | grep -E "^ROCR_" >> ${HPCBIND_LOG} echo "[OPENMP]" >> ${HPCBIND_LOG} echo "${TMP_ENV}" | grep -E "^OMP_" >> ${HPCBIND_LOG} echo "[GOMP] (gcc, g++, and gfortran)" >> ${HPCBIND_LOG} @@ -602,6 +648,9 @@ else echo "${TMP_ENV}" | grep -E "^HWLOC_" > >(tee -a ${HPCBIND_LOG}) echo "[CUDA]" > >(tee -a ${HPCBIND_LOG}) echo "${TMP_ENV}" | grep -E "^CUDA_" > >(tee -a ${HPCBIND_LOG}) + echo "[ROCM]" > >(tee -a ${HPCBIND_LOG}) + echo "${TMP_ENV}" | grep -E "^ROCM_" > >(tee -a ${HPCBIND_LOG}) + echo "${TMP_ENV}" | grep -E "^ROCR_" > >(tee -a ${HPCBIND_LOG}) echo "[OPENMP]" > >(tee -a ${HPCBIND_LOG}) echo "${TMP_ENV}" | grep -E "^OMP_" > >(tee -a ${HPCBIND_LOG}) echo "[GOMP] (gcc, g++, and gfortran)" > >(tee -a ${HPCBIND_LOG}) diff --git a/bin/nvcc_wrapper b/bin/nvcc_wrapper index 0c55651460a..13971481417 100755 --- a/bin/nvcc_wrapper +++ b/bin/nvcc_wrapper @@ -407,7 +407,7 @@ do -Woverloaded-virtual) ;; #strip -Xcompiler because we add it - -Xcompiler) + -Xcompiler|--compiler-options) if [[ $2 != "-o" ]]; then if [ $first_xcompiler_arg -eq 1 ]; then xcompiler_args="$2" diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake index 23b473ce247..611c089b2e3 100644 --- a/cmake/Dependencies.cmake +++ b/cmake/Dependencies.cmake @@ -1,10 +1,6 @@ TRIBITS_PACKAGE_DEFINE_DEPENDENCIES( - SUBPACKAGES_DIRS_CLASSIFICATIONS_OPTREQS - #SubPackageName Directory Class Req/Opt - # - # New Kokkos subpackages: - Core core PS REQUIRED - Containers containers PS OPTIONAL - Algorithms algorithms PS OPTIONAL - Simd simd PT OPTIONAL + LIB_OPTIONAL_TPLS Pthread CUDA HWLOC DLlib + TEST_OPTIONAL_TPLS CUSPARSE ) + +TRIBITS_TPL_TENTATIVELY_ENABLE(DLlib) diff --git a/cmake/KokkosConfigCommon.cmake.in b/cmake/KokkosConfigCommon.cmake.in index bb5ce5ff819..446d12fa5f5 100644 --- a/cmake/KokkosConfigCommon.cmake.in +++ b/cmake/KokkosConfigCommon.cmake.in @@ -6,10 +6,37 @@ SET(Kokkos_CXX_COMPILER "@CMAKE_CXX_COMPILER@") SET(Kokkos_CXX_COMPILER_ID "@KOKKOS_CXX_COMPILER_ID@") SET(Kokkos_CXX_STANDARD @KOKKOS_CXX_STANDARD@) -# These are needed by KokkosKernels +# Required to be a TriBITS-compliant external package +IF(NOT TARGET Kokkos::all_libs) + # CMake Error at /lib/cmake/Kokkos/KokkosConfigCommon.cmake:10 (ADD_LIBRARY): + # ADD_LIBRARY cannot create ALIAS target "Kokkos::all_libs" because target + # "Kokkos::kokkos" is imported but not globally visible. + IF(CMAKE_VERSION VERSION_LESS "3.18") + SET_TARGET_PROPERTIES(Kokkos::kokkos PROPERTIES IMPORTED_GLOBAL ON) + ENDIF() + ADD_LIBRARY(Kokkos::all_libs ALIAS Kokkos::kokkos) +ENDIF() + +# Export Kokkos_ENABLE_ for each backend that was enabled. +# NOTE: "Devices" is a little bit of a misnomer here. These are really +# backends, e.g. Kokkos_ENABLE_OPENMP, Kokkos_ENABLE_CUDA, Kokkos_ENABLE_HIP, +# or Kokkos_ENABLE_SYCL. FOREACH(DEV ${Kokkos_DEVICES}) SET(Kokkos_ENABLE_${DEV} ON) ENDFOREACH() +# Export relevant Kokkos_ENABLE