From b6e6eccd5bc64fbfa6f09f997180af4570752e6c Mon Sep 17 00:00:00 2001 From: Beatriz Navidad Vilches <61422851+Beanavil@users.noreply.github.com> Date: Tue, 27 Aug 2024 00:23:19 +0200 Subject: [PATCH] rocFFT and hipFFT examples (part I) (#141) * Resolve "rocFFT callback Example" * feat: add hipFFT plan examples * Resolve "rocFFT multi_gpu Example" * Fixed CMake linting * Added rocFFT callback and multi_gpu VS files * Resolve "Generate VS files from external meta-data repository" * Fixed Markdown linting * Fixed installed target in hipFFT plan examples * Added explicit result scaling * Renamed hipFFT's main programs to use C++ extension * Renamed rocFFT/callback's main program to use HIP extension --------- Co-authored-by: Nick Breed Co-authored-by: Nara Prasetya Co-authored-by: Robin Voetter --- Common/hipfft_utils.hpp | 101 ++++++++ Common/rocfft_utils.hpp | 67 +++++ Libraries/CMakeLists.txt | 2 + Libraries/Makefile | 2 + Libraries/hipFFT/CMakeLists.txt | 52 ++++ Libraries/hipFFT/Makefile | 35 +++ Libraries/hipFFT/README.md | 56 +++++ Libraries/hipFFT/plan_d2z/.gitignore | 1 + Libraries/hipFFT/plan_d2z/CMakeLists.txt | 87 +++++++ Libraries/hipFFT/plan_d2z/Makefile | 67 +++++ Libraries/hipFFT/plan_d2z/README.md | 53 ++++ Libraries/hipFFT/plan_d2z/main.cpp | 142 +++++++++++ Libraries/hipFFT/plan_d2z/plan_d2z_vs2017.sln | 24 ++ .../hipFFT/plan_d2z/plan_d2z_vs2017.vcxproj | 124 ++++++++++ .../plan_d2z/plan_d2z_vs2017.vcxproj.filters | 33 +++ Libraries/hipFFT/plan_d2z/plan_d2z_vs2019.sln | 24 ++ .../hipFFT/plan_d2z/plan_d2z_vs2019.vcxproj | 124 ++++++++++ .../plan_d2z/plan_d2z_vs2019.vcxproj.filters | 33 +++ Libraries/hipFFT/plan_d2z/plan_d2z_vs2022.sln | 24 ++ .../hipFFT/plan_d2z/plan_d2z_vs2022.vcxproj | 124 ++++++++++ .../plan_d2z/plan_d2z_vs2022.vcxproj.filters | 33 +++ Libraries/hipFFT/plan_z2z/.gitignore | 1 + Libraries/hipFFT/plan_z2z/CMakeLists.txt | 87 +++++++ Libraries/hipFFT/plan_z2z/Makefile | 67 +++++ Libraries/hipFFT/plan_z2z/README.md | 49 ++++ Libraries/hipFFT/plan_z2z/main.cpp | 139 +++++++++++ Libraries/hipFFT/plan_z2z/plan_z2z_vs2017.sln | 24 ++ .../hipFFT/plan_z2z/plan_z2z_vs2017.vcxproj | 124 ++++++++++ .../plan_z2z/plan_z2z_vs2017.vcxproj.filters | 33 +++ Libraries/hipFFT/plan_z2z/plan_z2z_vs2019.sln | 24 ++ .../hipFFT/plan_z2z/plan_z2z_vs2019.vcxproj | 124 ++++++++++ .../plan_z2z/plan_z2z_vs2019.vcxproj.filters | 33 +++ Libraries/hipFFT/plan_z2z/plan_z2z_vs2022.sln | 24 ++ .../hipFFT/plan_z2z/plan_z2z_vs2022.vcxproj | 124 ++++++++++ .../plan_z2z/plan_z2z_vs2022.vcxproj.filters | 33 +++ Libraries/rocFFT/CMakeLists.txt | 57 +++++ Libraries/rocFFT/Makefile | 38 +++ Libraries/rocFFT/README.md | 58 +++++ Libraries/rocFFT/callback/.gitignore | 1 + Libraries/rocFFT/callback/CMakeLists.txt | 53 ++++ Libraries/rocFFT/callback/Makefile | 64 +++++ Libraries/rocFFT/callback/README.md | 68 +++++ Libraries/rocFFT/callback/callback_vs2017.sln | 24 ++ .../rocFFT/callback/callback_vs2017.vcxproj | 120 +++++++++ .../callback/callback_vs2017.vcxproj.filters | 30 +++ Libraries/rocFFT/callback/callback_vs2019.sln | 24 ++ .../rocFFT/callback/callback_vs2019.vcxproj | 120 +++++++++ .../callback/callback_vs2019.vcxproj.filters | 30 +++ Libraries/rocFFT/callback/callback_vs2022.sln | 24 ++ .../rocFFT/callback/callback_vs2022.vcxproj | 120 +++++++++ .../callback/callback_vs2022.vcxproj.filters | 30 +++ Libraries/rocFFT/callback/main.hip | 182 ++++++++++++++ Libraries/rocFFT/multi_gpu/.gitignore | 1 + Libraries/rocFFT/multi_gpu/CMakeLists.txt | 48 ++++ Libraries/rocFFT/multi_gpu/Makefile | 61 +++++ Libraries/rocFFT/multi_gpu/README.md | 76 ++++++ Libraries/rocFFT/multi_gpu/main.cpp | 233 ++++++++++++++++++ .../rocFFT/multi_gpu/multi_gpu_vs2017.sln | 24 ++ .../rocFFT/multi_gpu/multi_gpu_vs2017.vcxproj | 121 +++++++++ .../multi_gpu_vs2017.vcxproj.filters | 33 +++ .../rocFFT/multi_gpu/multi_gpu_vs2019.sln | 24 ++ .../rocFFT/multi_gpu/multi_gpu_vs2019.vcxproj | 121 +++++++++ .../multi_gpu_vs2019.vcxproj.filters | 33 +++ .../rocFFT/multi_gpu/multi_gpu_vs2022.sln | 24 ++ .../rocFFT/multi_gpu/multi_gpu_vs2022.vcxproj | 121 +++++++++ .../multi_gpu_vs2022.vcxproj.filters | 33 +++ README.md | 6 + ROCm-Examples-VS2017.sln | 34 +++ ROCm-Examples-VS2019.sln | 34 +++ ROCm-Examples-VS2022.sln | 34 +++ 70 files changed, 4198 insertions(+) create mode 100644 Common/hipfft_utils.hpp create mode 100644 Common/rocfft_utils.hpp create mode 100644 Libraries/hipFFT/CMakeLists.txt create mode 100644 Libraries/hipFFT/Makefile create mode 100644 Libraries/hipFFT/README.md create mode 100644 Libraries/hipFFT/plan_d2z/.gitignore create mode 100644 Libraries/hipFFT/plan_d2z/CMakeLists.txt create mode 100644 Libraries/hipFFT/plan_d2z/Makefile create mode 100644 Libraries/hipFFT/plan_d2z/README.md create mode 100644 Libraries/hipFFT/plan_d2z/main.cpp create mode 100644 Libraries/hipFFT/plan_d2z/plan_d2z_vs2017.sln create mode 100644 Libraries/hipFFT/plan_d2z/plan_d2z_vs2017.vcxproj create mode 100644 Libraries/hipFFT/plan_d2z/plan_d2z_vs2017.vcxproj.filters create mode 100644 Libraries/hipFFT/plan_d2z/plan_d2z_vs2019.sln create mode 100644 Libraries/hipFFT/plan_d2z/plan_d2z_vs2019.vcxproj create mode 100644 Libraries/hipFFT/plan_d2z/plan_d2z_vs2019.vcxproj.filters create mode 100644 Libraries/hipFFT/plan_d2z/plan_d2z_vs2022.sln create mode 100644 Libraries/hipFFT/plan_d2z/plan_d2z_vs2022.vcxproj create mode 100644 Libraries/hipFFT/plan_d2z/plan_d2z_vs2022.vcxproj.filters create mode 100644 Libraries/hipFFT/plan_z2z/.gitignore create mode 100644 Libraries/hipFFT/plan_z2z/CMakeLists.txt create mode 100644 Libraries/hipFFT/plan_z2z/Makefile create mode 100644 Libraries/hipFFT/plan_z2z/README.md create mode 100644 Libraries/hipFFT/plan_z2z/main.cpp create mode 100644 Libraries/hipFFT/plan_z2z/plan_z2z_vs2017.sln create mode 100644 Libraries/hipFFT/plan_z2z/plan_z2z_vs2017.vcxproj create mode 100644 Libraries/hipFFT/plan_z2z/plan_z2z_vs2017.vcxproj.filters create mode 100644 Libraries/hipFFT/plan_z2z/plan_z2z_vs2019.sln create mode 100644 Libraries/hipFFT/plan_z2z/plan_z2z_vs2019.vcxproj create mode 100644 Libraries/hipFFT/plan_z2z/plan_z2z_vs2019.vcxproj.filters create mode 100644 Libraries/hipFFT/plan_z2z/plan_z2z_vs2022.sln create mode 100644 Libraries/hipFFT/plan_z2z/plan_z2z_vs2022.vcxproj create mode 100644 Libraries/hipFFT/plan_z2z/plan_z2z_vs2022.vcxproj.filters create mode 100644 Libraries/rocFFT/CMakeLists.txt create mode 100644 Libraries/rocFFT/Makefile create mode 100644 Libraries/rocFFT/README.md create mode 100644 Libraries/rocFFT/callback/.gitignore create mode 100644 Libraries/rocFFT/callback/CMakeLists.txt create mode 100644 Libraries/rocFFT/callback/Makefile create mode 100644 Libraries/rocFFT/callback/README.md create mode 100644 Libraries/rocFFT/callback/callback_vs2017.sln create mode 100644 Libraries/rocFFT/callback/callback_vs2017.vcxproj create mode 100644 Libraries/rocFFT/callback/callback_vs2017.vcxproj.filters create mode 100644 Libraries/rocFFT/callback/callback_vs2019.sln create mode 100644 Libraries/rocFFT/callback/callback_vs2019.vcxproj create mode 100644 Libraries/rocFFT/callback/callback_vs2019.vcxproj.filters create mode 100644 Libraries/rocFFT/callback/callback_vs2022.sln create mode 100644 Libraries/rocFFT/callback/callback_vs2022.vcxproj create mode 100644 Libraries/rocFFT/callback/callback_vs2022.vcxproj.filters create mode 100644 Libraries/rocFFT/callback/main.hip create mode 100644 Libraries/rocFFT/multi_gpu/.gitignore create mode 100644 Libraries/rocFFT/multi_gpu/CMakeLists.txt create mode 100644 Libraries/rocFFT/multi_gpu/Makefile create mode 100644 Libraries/rocFFT/multi_gpu/README.md create mode 100644 Libraries/rocFFT/multi_gpu/main.cpp create mode 100644 Libraries/rocFFT/multi_gpu/multi_gpu_vs2017.sln create mode 100644 Libraries/rocFFT/multi_gpu/multi_gpu_vs2017.vcxproj create mode 100644 Libraries/rocFFT/multi_gpu/multi_gpu_vs2017.vcxproj.filters create mode 100644 Libraries/rocFFT/multi_gpu/multi_gpu_vs2019.sln create mode 100644 Libraries/rocFFT/multi_gpu/multi_gpu_vs2019.vcxproj create mode 100644 Libraries/rocFFT/multi_gpu/multi_gpu_vs2019.vcxproj.filters create mode 100644 Libraries/rocFFT/multi_gpu/multi_gpu_vs2022.sln create mode 100644 Libraries/rocFFT/multi_gpu/multi_gpu_vs2022.vcxproj create mode 100644 Libraries/rocFFT/multi_gpu/multi_gpu_vs2022.vcxproj.filters diff --git a/Common/hipfft_utils.hpp b/Common/hipfft_utils.hpp new file mode 100644 index 00000000..ebdd8930 --- /dev/null +++ b/Common/hipfft_utils.hpp @@ -0,0 +1,101 @@ +// MIT License +// +// Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#ifndef COMMON_HIPFFT_UTILS_HPP +#define COMMON_HIPFFT_UTILS_HPP + +#include + +#include +#include + +/// \brief Converts a \p hipfftResult_t variable to its correspondent string. +inline const char* hipfftResultToString(hipfftResult_t status) +{ + switch(status) + { + case HIPFFT_SUCCESS: return "HIPFFT_SUCCESS"; + case HIPFFT_INVALID_PLAN: return "HIPFFT_INVALID_PLAN"; + case HIPFFT_ALLOC_FAILED: return "HIPFFT_ALLOC_FAILED"; + case HIPFFT_INVALID_TYPE: return "HIPFFT_INVALID_TYPE"; + case HIPFFT_INVALID_VALUE: return "HIPFFT_INVALID_VALUE"; + case HIPFFT_INTERNAL_ERROR: return "HIPFFT_INTERNAL_ERROR"; + case HIPFFT_EXEC_FAILED: return "HIPFFT_EXEC_FAILED"; + case HIPFFT_SETUP_FAILED: return "HIPFFT_SETUP_FAILED"; + case HIPFFT_INVALID_SIZE: return "HIPFFT_INVALID_SIZE"; + case HIPFFT_UNALIGNED_DATA: return "HIPFFT_UNALIGNED_DATA"; + case HIPFFT_INCOMPLETE_PARAMETER_LIST: return "HIPFFT_INCOMPLETE_PARAMETER_LIST"; + case HIPFFT_INVALID_DEVICE: return "HIPFFT_INVALID_DEVICE"; + case HIPFFT_PARSE_ERROR: return "HIPFFT_PARSE_ERROR"; + case HIPFFT_NO_WORKSPACE: return "HIPFFT_NO_WORKSPACE"; + case HIPFFT_NOT_IMPLEMENTED: return "HIPFFT_NOT_IMPLEMENTED"; + case HIPFFT_NOT_SUPPORTED: return "HIPFFT_NOT_SUPPORTED"; + + // We do use default because we are not in control of these enumeration values. + // Ideally this function is something hipFFT would provide + default: return ""; + } +} + +/// \brief Checks if the provided status code is \p HIPFFT_SUCCESS and if not, +/// prints an error message to the standard error output and terminates the program +/// with an error code. +#define HIPFFT_CHECK(condition) \ + { \ + const hipfftResult status = condition; \ + if(status != HIPFFT_SUCCESS) \ + { \ + std::cerr << "hipFFT error encountered: \"" << hipfftResultToString(status) \ + << "\" at " << __FILE__ << ':' << __LINE__ << std::endl; \ + std::exit(error_exit_code); \ + } \ + } + +/// \brief Prints an {1,2,3}-dimensional array. The last dimension (fastest-index) specified in +/// \p n will be printed horizontally. +template +void print_nd_data(const std::vector data, const std::vector n, const int column_width = 4) +{ + // Note: we want to print the last dimension horizontally (on the x-axis)! + int size_x = n[n.size() - 1]; + int size_y = n.size() > 1 ? n[n.size() - 2] : 1; + int size_z = n.size() > 2 ? n[n.size() - 3] : 1; + for(int z = 0; z < size_z; ++z) + { + for(int y = 0; y < size_y; ++y) + { + for(int x = 0; x < size_x; ++x) + { + auto index = (z * size_y + y) * size_x + x; + std::cout << std::setfill(' ') << std::setw(column_width) << data[index] << " "; + } + std::cout << "\n"; + } + if(z != size_z - 1) + { + std::cout << "\n"; + } + } + std::cout << std::flush; +} + +#endif // COMMON_HIPFFT_UTILS_HPP diff --git a/Common/rocfft_utils.hpp b/Common/rocfft_utils.hpp new file mode 100644 index 00000000..304cb68c --- /dev/null +++ b/Common/rocfft_utils.hpp @@ -0,0 +1,67 @@ +// MIT License +// +// Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#ifndef COMMON_ROCFFT_UTILS_HPP +#define COMMON_ROCFFT_UTILS_HPP + +#include "example_utils.hpp" + +#include + +#include + +/// \brief Converts a \p rocfft_status variable to its correspondent string. +inline const char* rocfftStatusToString(rocfft_status status) +{ + switch(status) + { + case rocfft_status_success: return "rocfft_status_success"; + case rocfft_status_failure: return "rocfft_status_failure"; + case rocfft_status_invalid_arg_value: return "rocfft_status_invalid_arg_value"; + case rocfft_status_invalid_dimensions: return "rocfft_status_invalid_dimensions"; + case rocfft_status_invalid_array_type: return "rocfft_status_invalid_array_type"; + case rocfft_status_invalid_strides: return "rocfft_status_invalid_strides"; + case rocfft_status_invalid_distance: return "rocfft_status_invalid_distance"; + case rocfft_status_invalid_offset: return "rocfft_status_invalid_offset"; + case rocfft_status_invalid_work_buffer: return "rocfft_status_invalid_work_buffer"; + + // We do use default because we are not in control of these enumeration values. + // Ideally this function is something rocFFT would provide + default: return ""; + } +} + +/// \brief Checks if the provided status code is \p rocfft_status_success and if not, +/// prints an error message to the standard error output and terminates the program +/// with an error code. +#define ROCFFT_CHECK(condition) \ + { \ + const rocfft_status status = condition; \ + if(status != rocfft_status_success) \ + { \ + std::cerr << "rocFFT error encountered: \"" << rocfftStatusToString(status) \ + << "\" at " << __FILE__ << ':' << __LINE__ << std::endl; \ + std::exit(error_exit_code); \ + } \ + } + +#endif // COMMON_ROCFFT_UTILS_HPP diff --git a/Libraries/CMakeLists.txt b/Libraries/CMakeLists.txt index 1f31d69c..0d779b9a 100644 --- a/Libraries/CMakeLists.txt +++ b/Libraries/CMakeLists.txt @@ -31,8 +31,10 @@ set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin/${folder_bin}) if(NOT (CMAKE_SYSTEM_NAME MATCHES Windows AND "${GPU_RUNTIME}" STREQUAL "CUDA")) add_subdirectory(hipBLAS) add_subdirectory(hipCUB) + add_subdirectory(hipFFT) add_subdirectory(hipSOLVER) add_subdirectory(rocBLAS) + add_subdirectory(rocFFT) add_subdirectory(rocPRIM) add_subdirectory(rocRAND) add_subdirectory(rocSOLVER) diff --git a/Libraries/Makefile b/Libraries/Makefile index 5ea07fbc..9c0ce1db 100644 --- a/Libraries/Makefile +++ b/Libraries/Makefile @@ -23,12 +23,14 @@ LIBRARIES := \ hipBLAS \ hipCUB \ + hipFFT \ hipSOLVER \ rocRAND ifneq ($(GPU_RUNTIME), CUDA) LIBRARIES += \ rocBLAS \ + rocFFT \ rocPRIM \ rocSOLVER \ rocSPARSE \ diff --git a/Libraries/hipFFT/CMakeLists.txt b/Libraries/hipFFT/CMakeLists.txt new file mode 100644 index 00000000..123e0a6d --- /dev/null +++ b/Libraries/hipFFT/CMakeLists.txt @@ -0,0 +1,52 @@ +# MIT License +# +# Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +cmake_minimum_required(VERSION 3.21 FATAL_ERROR) +project(hipFFT_examples LANGUAGES CXX) + +file(RELATIVE_PATH folder_bin ${CMAKE_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}) +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin/${folder_bin}) + +if(WIN32) + set(ROCM_ROOT + "$ENV{HIP_PATH}" + CACHE PATH + "Root directory of the ROCm installation" + ) +else() + set(ROCM_ROOT + "/opt/rocm" + CACHE PATH + "Root directory of the ROCm installation" + ) +endif() + +list(APPEND CMAKE_PREFIX_PATH "${ROCM_ROOT}") + +find_package(hipfft) +if(NOT hipfft_FOUND) + message(STATUS "hipFFT could not be found, not building hipFFT examples") + return() +endif() + +add_subdirectory(plan_d2z) +add_subdirectory(plan_z2z) diff --git a/Libraries/hipFFT/Makefile b/Libraries/hipFFT/Makefile new file mode 100644 index 00000000..ddd62f31 --- /dev/null +++ b/Libraries/hipFFT/Makefile @@ -0,0 +1,35 @@ +# MIT License +# +# Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +EXAMPLES := \ + plan_d2z \ + plan_z2z + +all: $(EXAMPLES) + +clean: TARGET=clean +clean: all + +$(EXAMPLES): + $(MAKE) -C $@ $(TARGET) + +.PHONY: all clean $(EXAMPLES) diff --git a/Libraries/hipFFT/README.md b/Libraries/hipFFT/README.md new file mode 100644 index 00000000..a0c2ad7b --- /dev/null +++ b/Libraries/hipFFT/README.md @@ -0,0 +1,56 @@ +# hipFFT Examples + +## Summary + +The examples in this subdirectory showcase the functionality of [hipFFT](https://github.com/ROCm/hipFFT), a (Fast Fourier Transform) FFT marshalling library for rocFFT and cuFFT. + +## Prerequisites + +### Linux + +- [CMake](https://cmake.org/download/) (at least version 3.21) +- OR GNU Make - available via the distribution's package manager +- [ROCm](https://docs.amd.com/bundle/ROCm-Installation-Guide-v5.1.3/page/Overview_of_ROCm_Installation_Methods.html) (at least version 5.x.x) +- [hipFFT](https://github.com/ROCm/hipFFT) + +### Windows + +- [Visual Studio](https://visualstudio.microsoft.com/) 2019 or 2022 with the "Desktop Development with C++" workload +- ROCm toolchain for Windows (No public release yet) + - The Visual Studio ROCm extension needs to be installed to build with the solution files. +- [hipFFT](https://github.com/ROCm/hipFFT) +- [CMake](https://cmake.org/download/) (optional, to build with CMake. Requires at least version 3.21) +- [Ninja](https://ninja-build.org/) (optional, to build with CMake) + +## Building + +### Linux + +Make sure that the dependencies are installed, or use one of the [provided Dockerfiles](../../Dockerfiles/) to build and run the examples in a containerized environment. + +#### Using CMake + +All examples in the `hipFFT` subdirectory can either be built by a single CMake project or be built independently. + +- `$ cd Libraries/hipFFT` +- `$ cmake -S . -B build` (on ROCm) or `$ cmake -S . -B build -D GPU_RUNTIME=CUDA` (on CUDA) +- `$ cmake --build build` + +#### Using Make + +All examples can be built by a single invocation to Make or be built independently. + +- `$ cd Libraries/hipFFT` +- `$ make` (on ROCm) or `$ make GPU_RUNTIME=CUDA` (on CUDA) + +### Windows + +#### Visual Studio + +Visual Studio solution files are available for the individual examples. To build all examples for hipFFT open the top level solution file [ROCm-Examples-VS2019.sln](../../ROCm-Examples-VS2019.sln) and filter for hipFFT. + +For more detailed build instructions refer to the top level [README.md](../../README.md#visual-studio). + +#### CMake + +All examples in the `hipFFT` subdirectory can either be built by a single CMake project or be built independently. For build instructions refer to the top-level [README.md](../../README.md#cmake-2). diff --git a/Libraries/hipFFT/plan_d2z/.gitignore b/Libraries/hipFFT/plan_d2z/.gitignore new file mode 100644 index 00000000..7691de86 --- /dev/null +++ b/Libraries/hipFFT/plan_d2z/.gitignore @@ -0,0 +1 @@ +hipfft_plan_d2z diff --git a/Libraries/hipFFT/plan_d2z/CMakeLists.txt b/Libraries/hipFFT/plan_d2z/CMakeLists.txt new file mode 100644 index 00000000..75e7ad67 --- /dev/null +++ b/Libraries/hipFFT/plan_d2z/CMakeLists.txt @@ -0,0 +1,87 @@ +# MIT License +# +# Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +set(example_name hipfft_plan_d2z) + +cmake_minimum_required(VERSION 3.21 FATAL_ERROR) +project(hipfft_plan_d2z LANGUAGES CXX) + +set(GPU_RUNTIME "HIP" CACHE STRING "Switches between HIP and CUDA") +set(GPU_RUNTIMES "HIP" "CUDA") +set_property(CACHE GPU_RUNTIME PROPERTY STRINGS ${GPU_RUNTIMES}) + +if(NOT "${GPU_RUNTIME}" IN_LIST GPU_RUNTIMES) + message( + FATAL_ERROR + "Only the following values are accepted for GPU_RUNTIME: ${GPU_RUNTIMES}" + ) +endif() + +enable_language(${GPU_RUNTIME}) +set(CMAKE_${GPU_RUNTIME}_STANDARD 17) +set(CMAKE_${GPU_RUNTIME}_EXTENSIONS OFF) +set(CMAKE_${GPU_RUNTIME}_STANDARD_REQUIRED ON) + +if(WIN32) + set(ROCM_ROOT + "$ENV{HIP_PATH}" + CACHE PATH + "Root directory of the ROCm installation" + ) +else() + set(ROCM_ROOT + "/opt/rocm" + CACHE PATH + "Root directory of the ROCm installation" + ) +endif() +list(APPEND CMAKE_PREFIX_PATH "${ROCM_ROOT}") + +# Duplicate 'find_package(hipfft)' calls do not convert to 'nop' properly. +if(NOT hipfft_FOUND) + find_package(hipfft REQUIRED) +endif() + +add_executable(${example_name} main.cpp) +# Make example runnable using ctest +add_test(NAME ${example_name} COMMAND ${example_name}) + +target_link_libraries(${example_name} PRIVATE hip::hipfft) + +target_include_directories(${example_name} PRIVATE "../../../Common") +set_source_files_properties(main.cpp PROPERTIES LANGUAGE ${GPU_RUNTIME}) + +if(WIN32) + target_compile_definitions(${example_name} PRIVATE WIN32) +endif() + +install(TARGETS ${example_name}) +if(CMAKE_SYSTEM_NAME MATCHES Windows) + install(IMPORTED_RUNTIME_ARTIFACTS hip::hipfft) + if(GPU_RUNTIME STREQUAL "HIP") + find_package(rocfft REQUIRED) + install(IMPORTED_RUNTIME_ARTIFACTS roc::rocfft) + elseif(GPU_RUNTIME STREQUAL "CUDA") + find_package(CUDAToolkit REQUIRED) + install(IMPORTED_RUNTIME_ARTIFACTS CUDA::cufft) + endif() +endif() diff --git a/Libraries/hipFFT/plan_d2z/Makefile b/Libraries/hipFFT/plan_d2z/Makefile new file mode 100644 index 00000000..36690008 --- /dev/null +++ b/Libraries/hipFFT/plan_d2z/Makefile @@ -0,0 +1,67 @@ +# MIT License +# +# Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +EXAMPLE := hipfft_plan_d2z +COMMON_INCLUDE_DIR := ../../../Common +GPU_RUNTIME := HIP + +# HIP variables +ROCM_INSTALL_DIR := /opt/rocm +CUDA_INSTALL_DIR := /usr/local/cuda + +HIP_INCLUDE_DIR := $(ROCM_INSTALL_DIR)/include +HIPCUB_INCLUDE_DIR := $(HIP_INCLUDE_DIR) + +HIPCXX ?= $(ROCM_INSTALL_DIR)/bin/hipcc +CUDACXX ?= $(CUDA_INSTALL_DIR)/bin/nvcc + +# Common variables and flags +CXX_STD := c++17 +ICXXFLAGS := -std=$(CXX_STD) +ICPPFLAGS := -isystem $(HIPCUB_INCLUDE_DIR) -I $(COMMON_INCLUDE_DIR) +ILDFLAGS := -L $(ROCM_INSTALL_DIR)/lib +ILDLIBS := -lhipfft + +ifeq ($(GPU_RUNTIME), CUDA) + ICXXFLAGS += -x cu + ICPPFLAGS += -isystem $(HIP_INCLUDE_DIR) -D__HIP_PLATFORM_NVIDIA__ + COMPILER := $(CUDACXX) +else ifeq ($(GPU_RUNTIME), HIP) + CXXFLAGS ?= -Wall -Wextra + ICPPFLAGS += -D__HIP_PLATFORM_AMD__ + COMPILER := $(HIPCXX) +else + $(error GPU_RUNTIME is set to "$(GPU_RUNTIME)". GPU_RUNTIME must be either CUDA or HIP) +endif + +ICXXFLAGS += $(CXXFLAGS) +ICPPFLAGS += $(CPPFLAGS) +ILDFLAGS += $(LDFLAGS) +ILDLIBS += $(LDLIBS) + +$(EXAMPLE): main.cpp $(COMMON_INCLUDE_DIR)/example_utils.hpp + $(COMPILER) $(ICXXFLAGS) $(ICPPFLAGS) $(ILDFLAGS) -o $@ $< $(ILDLIBS) + +clean: + $(RM) $(EXAMPLE) + +.PHONY: clean diff --git a/Libraries/hipFFT/plan_d2z/README.md b/Libraries/hipFFT/plan_d2z/README.md new file mode 100644 index 00000000..afbbf916 --- /dev/null +++ b/Libraries/hipFFT/plan_d2z/README.md @@ -0,0 +1,53 @@ +# hipFFT Real to Complex FFT Plan Example + +## Description + +This example showcases how to execute a 1, 2, and 3-dimensional real-to-complex fast Fourier +transform (FFT) on the GPU. There are only slight differences in planning and executing FFT on +different dimensional data. + +### Application flow + +1. Define the various input parameters. +2. Generate the input data on host. +3. Allocate memory on device for the input and output. +4. Copy the input data from host to device. +5. Create the FFT plan. +6. Execute the plan. +7. Allocate memory on host for the output. +8. Copy output data from device to host. +9. Print the output +10. Clean up. + +## Key APIs and Concepts + +### hipFFT + +- `hipfftPlan[n]d` is used to create a plan for a $n \in \{ 1, 2, 3 \}$-dimensional FFT. +- The output size of a real-to-complex FFT is dependent on the dimensionality: + - 1D: $\lfloor\frac{x}{2}\rfloor + 1$ + - 2D: $\big(x, \lfloor\frac{y}{2}\rfloor + 1\big)$ + - 3D: $\big(x, y, \lfloor\frac{z}{2}\rfloor + 1\big)$ +- The `hipfftHandle` needs to be created with `hipfftCreate(...)` before use and destroyed with `hipfftDestroy(...)` after use. + +## Used API surface + +### hipFFT + +- `hipfftCreate` +- `hipfftDestroy` +- `hipfftDoubleComplex` +- `hipfftExecD2Z` +- `hipfftHandle` +- `hipfftPlan1d` +- `hipfftPlan2d` +- `hipfftPlan3d` +- `hipfftType::HIPFFT_D2Z` + +### HIP runtime + +- `hipFree` +- `hipMalloc` +- `hipMemcpy` +- `hipMemcpyDeviceToHost` +- `hipMemcpyHostToDevice` diff --git a/Libraries/hipFFT/plan_d2z/main.cpp b/Libraries/hipFFT/plan_d2z/main.cpp new file mode 100644 index 00000000..a1dc6183 --- /dev/null +++ b/Libraries/hipFFT/plan_d2z/main.cpp @@ -0,0 +1,142 @@ +// MIT License +// +// Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include "cmdparser.hpp" +#include "example_utils.hpp" +#include "hipfft_utils.hpp" + +#include +#include + +#include +#include +#include +#include +#include +#include + +void fft_example(const int dimension, const int size = 8) +{ + using input_t = double; + using output_t = std::complex; + + std::cout << "hipFFT " << dimension << "D double-precision real to complex transform." + << std::endl; + + // 1. Define inputs. + + // 1a. Input dimensions, ordered as { Nx, Ny, Nz } + std::vector n(dimension); + std::fill(n.begin(), n.end(), size); + + // 1b. Output dimensions + std::vector m(n); + m.back() = (n.back() / 2 + 1); + + // 1c. Calculate size of arrays + const int m_total = std::accumulate(m.begin(), m.end(), 1, std::multiplies{}); + const int n_total = std::accumulate(n.begin(), n.end(), 1, std::multiplies{}); + + // 2. Generate input and print + std::vector input(n_total); + + std::mt19937 generator{}; + std::uniform_real_distribution distribution{}; + std::generate(input.begin(), input.end(), [&]() { return distribution(generator); }); + + std::cout << "Input:\n" << std::setprecision(3); + print_nd_data(input, n, 6); + + // 3. Alocate device memory + input_t* d_input; + hipfftDoubleComplex* d_output; + + HIP_CHECK(hipMalloc(&d_input, n_total * sizeof(*d_input))); + HIP_CHECK(hipMalloc(&d_output, m_total * sizeof(*d_output))); + + // 4. Copy host to device + HIP_CHECK(hipMemcpy(d_input, input.data(), n_total * sizeof(*d_input), hipMemcpyHostToDevice)); + + // 5. Create FFT plan + + // 5a. Allocate plan handle + hipfftHandle plan; + HIPFFT_CHECK(hipfftCreate(&plan)); + + // 5b. Create {1, 2, 3}-dimensional plan + switch(dimension) + { + case 1: HIPFFT_CHECK(hipfftPlan1d(&plan, n[0], hipfftType::HIPFFT_D2Z, 1)); break; + case 2: HIPFFT_CHECK(hipfftPlan2d(&plan, n[0], n[1], hipfftType::HIPFFT_D2Z)); break; + case 3: HIPFFT_CHECK(hipfftPlan3d(&plan, n[0], n[1], n[2], hipfftType::HIPFFT_D2Z)); break; + } + + // 6. Execute plan + HIPFFT_CHECK(hipfftExecD2Z(plan, d_input, d_output)); + + // 7. Allocate output on host + std::vector output(m_total); + + // 8. Copy device to host + HIP_CHECK( + hipMemcpy(output.data(), d_output, m_total * sizeof(*d_output), hipMemcpyDeviceToHost)); + + // 9. Print output + std::cout << "Output:\n" << std::setprecision(3); + print_nd_data(output, m, 16); + + // 10. Clean up + HIPFFT_CHECK(hipfftDestroy(plan)); + HIP_CHECK(hipFree(d_input)); + HIP_CHECK(hipFree(d_output)); +} + +int main(const int argc, const char* argv[]) +{ + cli::Parser parser(argc, argv); + parser.set_optional>("d", + "dimensions", + {1, 2, 3}, + "number of dimensions. must be {1, 2, 3}"); + parser.set_optional("n", "size", 4, "size of each dimension"); + parser.run_and_exit_if_error(); + + const std::vector dimensions = parser.get>("d"); + const int size = parser.get("n"); + + // Verify passed dimensions + for(const int dimension : dimensions) + { + if(dimension < 1 || dimension > 3) + { + std::cout << "Only 1D, 2D, and 3D FFT transformations are supported!" << std::endl; + return -1; + } + } + + for(const int dimension : dimensions) + { + fft_example(dimension, size); + } + + return 0; +} diff --git a/Libraries/hipFFT/plan_d2z/plan_d2z_vs2017.sln b/Libraries/hipFFT/plan_d2z/plan_d2z_vs2017.sln new file mode 100644 index 00000000..71cc0003 --- /dev/null +++ b/Libraries/hipFFT/plan_d2z/plan_d2z_vs2017.sln @@ -0,0 +1,24 @@ +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio 15 +VisualStudioVersion = 15.0.33026.149 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "plan_d2z_vs2017", "plan_d2z_vs2017.vcxproj", "{AF790582-9E56-4CAA-BBD0-9C9F5B99FDEE}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|x64 = Debug|x64 + Release|x64 = Release|x64 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {AF790582-9E56-4CAA-BBD0-9C9F5B99FDEE}.Debug|x64.ActiveCfg = Debug|x64 + {AF790582-9E56-4CAA-BBD0-9C9F5B99FDEE}.Debug|x64.Build.0 = Debug|x64 + {AF790582-9E56-4CAA-BBD0-9C9F5B99FDEE}.Release|x64.ActiveCfg = Release|x64 + {AF790582-9E56-4CAA-BBD0-9C9F5B99FDEE}.Release|x64.Build.0 = Release|x64 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {1FF4DC24-9C67-4A78-87D1-C1572BCE909D} + EndGlobalSection +EndGlobal diff --git a/Libraries/hipFFT/plan_d2z/plan_d2z_vs2017.vcxproj b/Libraries/hipFFT/plan_d2z/plan_d2z_vs2017.vcxproj new file mode 100644 index 00000000..2f7180db --- /dev/null +++ b/Libraries/hipFFT/plan_d2z/plan_d2z_vs2017.vcxproj @@ -0,0 +1,124 @@ + + + + + Debug + x64 + + + Release + x64 + + + + 6.1 + 15.0 + {af790582-9e56-4caa-bbd0-9c9f5b99fdee} + Win32Proj + plan_d2z_vs2017 + $(LatestTargetPlatformVersion) + + + + + + + + + + + + PreserveNewest + + + PreserveNewest + + + PreserveNewest + + + PreserveNewest + + + PreserveNewest + + + + + Application + true + HIP clang 6.1 + Unicode + + + Application + false + HIP clang 6.1 + Unicode + + + + + + + + + + + + + + + + true + + + + true + hipfft_$(ProjectName) + + + false + hipfft_$(ProjectName) + + + + true + + + + + Level2 + __clang__;__HIP__;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(MSBuildProjectDirectory)\..\..\..\Common;%(AdditionalIncludeDirectories) + stdcpp17 + true + + + Console + true + hipfft.lib;rocfft.lib;hiprtc.lib;hiprtc-builtins.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + + + + + Level2 + true + __clang__;__HIP__;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(MSBuildProjectDirectory)\..\..\..\Common;%(AdditionalIncludeDirectories) + stdcpp17 + true + + + Console + true + true + hipfft.lib;rocfft.lib;hiprtc.lib;hiprtc-builtins.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + + + + + + + + \ No newline at end of file diff --git a/Libraries/hipFFT/plan_d2z/plan_d2z_vs2017.vcxproj.filters b/Libraries/hipFFT/plan_d2z/plan_d2z_vs2017.vcxproj.filters new file mode 100644 index 00000000..70adfc19 --- /dev/null +++ b/Libraries/hipFFT/plan_d2z/plan_d2z_vs2017.vcxproj.filters @@ -0,0 +1,33 @@ + + + + + {ee485f7d-1bb9-460e-add6-735302715dd0} + cpp;c;cc;cxx;c++;def;odl;idl;hpj;bat;asm;asmx;hip;cu + + + {d586a49a-4c83-4516-8f7d-ab580dea88a9} + h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd;cuh + + + {83e15b95-4045-4acf-9068-abb641be00e7} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + Source Files + + + + + Header Files + + + Header Files + + + Header Files + + + \ No newline at end of file diff --git a/Libraries/hipFFT/plan_d2z/plan_d2z_vs2019.sln b/Libraries/hipFFT/plan_d2z/plan_d2z_vs2019.sln new file mode 100644 index 00000000..58cc5362 --- /dev/null +++ b/Libraries/hipFFT/plan_d2z/plan_d2z_vs2019.sln @@ -0,0 +1,24 @@ +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Version 16 +VisualStudioVersion = 16.0.32630.194 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "plan_d2z_vs2019", "plan_d2z_vs2019.vcxproj", "{401073F8-4631-442C-A62E-F90C704AFF1C}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|x64 = Debug|x64 + Release|x64 = Release|x64 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {401073F8-4631-442C-A62E-F90C704AFF1C}.Debug|x64.ActiveCfg = Debug|x64 + {401073F8-4631-442C-A62E-F90C704AFF1C}.Debug|x64.Build.0 = Debug|x64 + {401073F8-4631-442C-A62E-F90C704AFF1C}.Release|x64.ActiveCfg = Release|x64 + {401073F8-4631-442C-A62E-F90C704AFF1C}.Release|x64.Build.0 = Release|x64 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {1EEAB13F-3305-4D59-BC31-034F4FAFBABC} + EndGlobalSection +EndGlobal diff --git a/Libraries/hipFFT/plan_d2z/plan_d2z_vs2019.vcxproj b/Libraries/hipFFT/plan_d2z/plan_d2z_vs2019.vcxproj new file mode 100644 index 00000000..78070521 --- /dev/null +++ b/Libraries/hipFFT/plan_d2z/plan_d2z_vs2019.vcxproj @@ -0,0 +1,124 @@ + + + + + Debug + x64 + + + Release + x64 + + + + 6.1 + 16.0 + {401073f8-4631-442c-a62e-f90c704aff1c} + Win32Proj + plan_d2z_vs2019 + $(LatestTargetPlatformVersion) + + + + + + + + + + + + PreserveNewest + + + PreserveNewest + + + PreserveNewest + + + PreserveNewest + + + PreserveNewest + + + + + Application + true + HIP clang 6.1 + Unicode + + + Application + false + HIP clang 6.1 + Unicode + + + + + + + + + + + + + + + + true + + + + true + hipfft_$(ProjectName) + + + false + hipfft_$(ProjectName) + + + + true + + + + + Level2 + __clang__;__HIP__;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(MSBuildProjectDirectory)\..\..\..\Common;%(AdditionalIncludeDirectories) + stdcpp17 + true + + + Console + true + hipfft.lib;rocfft.lib;hiprtc.lib;hiprtc-builtins.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + + + + + Level2 + true + __clang__;__HIP__;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(MSBuildProjectDirectory)\..\..\..\Common;%(AdditionalIncludeDirectories) + stdcpp17 + true + + + Console + true + true + hipfft.lib;rocfft.lib;hiprtc.lib;hiprtc-builtins.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + + + + + + + + diff --git a/Libraries/hipFFT/plan_d2z/plan_d2z_vs2019.vcxproj.filters b/Libraries/hipFFT/plan_d2z/plan_d2z_vs2019.vcxproj.filters new file mode 100644 index 00000000..c2a1e77f --- /dev/null +++ b/Libraries/hipFFT/plan_d2z/plan_d2z_vs2019.vcxproj.filters @@ -0,0 +1,33 @@ + + + + + {36eb0dee-8b71-4514-9a24-fa80c8cf71b0} + cpp;c;cc;cxx;c++;def;odl;idl;hpj;bat;asm;asmx;hip;cu + + + {8c667cd4-9c53-4f5f-94f0-e909b8958a07} + h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd;cuh + + + {e8b245df-366d-4dc4-a18a-9a37cc5aefb6} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + Source Files + + + + + Header Files + + + Header Files + + + Header Files + + + \ No newline at end of file diff --git a/Libraries/hipFFT/plan_d2z/plan_d2z_vs2022.sln b/Libraries/hipFFT/plan_d2z/plan_d2z_vs2022.sln new file mode 100644 index 00000000..737d87fc --- /dev/null +++ b/Libraries/hipFFT/plan_d2z/plan_d2z_vs2022.sln @@ -0,0 +1,24 @@ +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Version 17 +VisualStudioVersion = 17.4.33213.308 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "plan_d2z_vs2022", "plan_d2z_vs2022.vcxproj", "{F68640C9-872F-4ECA-8D29-54C4E83AD24E}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|x64 = Debug|x64 + Release|x64 = Release|x64 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {F68640C9-872F-4ECA-8D29-54C4E83AD24E}.Debug|x64.ActiveCfg = Debug|x64 + {F68640C9-872F-4ECA-8D29-54C4E83AD24E}.Debug|x64.Build.0 = Debug|x64 + {F68640C9-872F-4ECA-8D29-54C4E83AD24E}.Release|x64.ActiveCfg = Release|x64 + {F68640C9-872F-4ECA-8D29-54C4E83AD24E}.Release|x64.Build.0 = Release|x64 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {2E9C2E42-FEBF-4F86-88AA-B07CC6F017FE} + EndGlobalSection +EndGlobal diff --git a/Libraries/hipFFT/plan_d2z/plan_d2z_vs2022.vcxproj b/Libraries/hipFFT/plan_d2z/plan_d2z_vs2022.vcxproj new file mode 100644 index 00000000..8470bd2d --- /dev/null +++ b/Libraries/hipFFT/plan_d2z/plan_d2z_vs2022.vcxproj @@ -0,0 +1,124 @@ + + + + + Debug + x64 + + + Release + x64 + + + + 6.1 + 17.0 + {f68640c9-872f-4eca-8d29-54c4e83ad24e} + Win32Proj + plan_d2z_vs2022 + $(LatestTargetPlatformVersion) + + + + + + + + + + + + PreserveNewest + + + PreserveNewest + + + PreserveNewest + + + PreserveNewest + + + PreserveNewest + + + + + Application + true + HIP clang 6.1 + Unicode + + + Application + false + HIP clang 6.1 + Unicode + + + + + + + + + + + + + + + + true + + + + true + hipfft_$(ProjectName) + + + false + hipfft_$(ProjectName) + + + + true + + + + + Level2 + __clang__;__HIP__;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(MSBuildProjectDirectory)\..\..\..\Common;%(AdditionalIncludeDirectories) + stdcpp17 + true + + + Console + true + hipfft.lib;rocfft.lib;hiprtc.lib;hiprtc-builtins.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + + + + + Level2 + true + __clang__;__HIP__;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(MSBuildProjectDirectory)\..\..\..\Common;%(AdditionalIncludeDirectories) + stdcpp17 + true + + + Console + true + true + hipfft.lib;rocfft.lib;hiprtc.lib;hiprtc-builtins.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + + + + + + + + diff --git a/Libraries/hipFFT/plan_d2z/plan_d2z_vs2022.vcxproj.filters b/Libraries/hipFFT/plan_d2z/plan_d2z_vs2022.vcxproj.filters new file mode 100644 index 00000000..4176309e --- /dev/null +++ b/Libraries/hipFFT/plan_d2z/plan_d2z_vs2022.vcxproj.filters @@ -0,0 +1,33 @@ + + + + + {e4e0d53c-6451-4d93-85f1-e301cf6ae228} + cpp;c;cc;cxx;c++;def;odl;idl;hpj;bat;asm;asmx;hip;cu + + + {e973eba2-14e2-4ee9-86ba-a89d782d4704} + h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd;cuh + + + {bddb4b88-3366-4909-abd7-31f3a768e176} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + Source Files + + + + + Header Files + + + Header Files + + + Header Files + + + \ No newline at end of file diff --git a/Libraries/hipFFT/plan_z2z/.gitignore b/Libraries/hipFFT/plan_z2z/.gitignore new file mode 100644 index 00000000..7b206c38 --- /dev/null +++ b/Libraries/hipFFT/plan_z2z/.gitignore @@ -0,0 +1 @@ +hipfft_plan_z2z diff --git a/Libraries/hipFFT/plan_z2z/CMakeLists.txt b/Libraries/hipFFT/plan_z2z/CMakeLists.txt new file mode 100644 index 00000000..b5b77849 --- /dev/null +++ b/Libraries/hipFFT/plan_z2z/CMakeLists.txt @@ -0,0 +1,87 @@ +# MIT License +# +# Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +set(example_name hipfft_plan_z2z) + +cmake_minimum_required(VERSION 3.21 FATAL_ERROR) +project(hipfft_plan_z2z LANGUAGES CXX) + +set(GPU_RUNTIME "HIP" CACHE STRING "Switches between HIP and CUDA") +set(GPU_RUNTIMES "HIP" "CUDA") +set_property(CACHE GPU_RUNTIME PROPERTY STRINGS ${GPU_RUNTIMES}) + +if(NOT "${GPU_RUNTIME}" IN_LIST GPU_RUNTIMES) + message( + FATAL_ERROR + "Only the following values are accepted for GPU_RUNTIME: ${GPU_RUNTIMES}" + ) +endif() + +enable_language(${GPU_RUNTIME}) +set(CMAKE_${GPU_RUNTIME}_STANDARD 17) +set(CMAKE_${GPU_RUNTIME}_EXTENSIONS OFF) +set(CMAKE_${GPU_RUNTIME}_STANDARD_REQUIRED ON) + +if(WIN32) + set(ROCM_ROOT + "$ENV{HIP_PATH}" + CACHE PATH + "Root directory of the ROCm installation" + ) +else() + set(ROCM_ROOT + "/opt/rocm" + CACHE PATH + "Root directory of the ROCm installation" + ) +endif() +list(APPEND CMAKE_PREFIX_PATH "${ROCM_ROOT}") + +# Duplicate 'find_package(hipfft)' calls do not convert to 'nop' properly. +if(NOT hipfft_FOUND) + find_package(hipfft REQUIRED) +endif() + +add_executable(${example_name} main.cpp) +# Make example runnable using ctest +add_test(NAME ${example_name} COMMAND ${example_name}) + +target_link_libraries(${example_name} PRIVATE hip::hipfft) + +target_include_directories(${example_name} PRIVATE "../../../Common") +set_source_files_properties(main.cpp PROPERTIES LANGUAGE ${GPU_RUNTIME}) + +if(WIN32) + target_compile_definitions(${example_name} PRIVATE WIN32) +endif() + +install(TARGETS ${example_name}) +if(CMAKE_SYSTEM_NAME MATCHES Windows) + install(IMPORTED_RUNTIME_ARTIFACTS hip::hipfft) + if(GPU_RUNTIME STREQUAL "HIP") + find_package(rocfft REQUIRED) + install(IMPORTED_RUNTIME_ARTIFACTS roc::rocfft) + elseif(GPU_RUNTIME STREQUAL "CUDA") + find_package(CUDAToolkit REQUIRED) + install(IMPORTED_RUNTIME_ARTIFACTS CUDA::cufft) + endif() +endif() diff --git a/Libraries/hipFFT/plan_z2z/Makefile b/Libraries/hipFFT/plan_z2z/Makefile new file mode 100644 index 00000000..0ccc454e --- /dev/null +++ b/Libraries/hipFFT/plan_z2z/Makefile @@ -0,0 +1,67 @@ +# MIT License +# +# Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +EXAMPLE := hipfft_plan_z2z +COMMON_INCLUDE_DIR := ../../../Common +GPU_RUNTIME := HIP + +# HIP variables +ROCM_INSTALL_DIR := /opt/rocm +CUDA_INSTALL_DIR := /usr/local/cuda + +HIP_INCLUDE_DIR := $(ROCM_INSTALL_DIR)/include +HIPCUB_INCLUDE_DIR := $(HIP_INCLUDE_DIR) + +HIPCXX ?= $(ROCM_INSTALL_DIR)/bin/hipcc +CUDACXX ?= $(CUDA_INSTALL_DIR)/bin/nvcc + +# Common variables and flags +CXX_STD := c++17 +ICXXFLAGS := -std=$(CXX_STD) +ICPPFLAGS := -isystem $(HIPCUB_INCLUDE_DIR) -I $(COMMON_INCLUDE_DIR) +ILDFLAGS := -L $(ROCM_INSTALL_DIR)/lib +ILDLIBS := -lhipfft + +ifeq ($(GPU_RUNTIME), CUDA) + ICXXFLAGS += -x cu + ICPPFLAGS += -isystem $(HIP_INCLUDE_DIR) -D__HIP_PLATFORM_NVIDIA__ + COMPILER := $(CUDACXX) +else ifeq ($(GPU_RUNTIME), HIP) + CXXFLAGS ?= -Wall -Wextra + ICPPFLAGS += -D__HIP_PLATFORM_AMD__ + COMPILER := $(HIPCXX) +else + $(error GPU_RUNTIME is set to "$(GPU_RUNTIME)". GPU_RUNTIME must be either CUDA or HIP) +endif + +ICXXFLAGS += $(CXXFLAGS) +ICPPFLAGS += $(CPPFLAGS) +ILDFLAGS += $(LDFLAGS) +ILDLIBS += $(LDLIBS) + +$(EXAMPLE): main.cpp $(COMMON_INCLUDE_DIR)/example_utils.hpp + $(COMPILER) $(ICXXFLAGS) $(ICPPFLAGS) $(ILDFLAGS) -o $@ $< $(ILDLIBS) + +clean: + $(RM) $(EXAMPLE) + +.PHONY: clean diff --git a/Libraries/hipFFT/plan_z2z/README.md b/Libraries/hipFFT/plan_z2z/README.md new file mode 100644 index 00000000..5409a81e --- /dev/null +++ b/Libraries/hipFFT/plan_z2z/README.md @@ -0,0 +1,49 @@ +# hipFFT Complex to Complex FFT Plan Example + +## Description + +This example showcases how to execute a 1, 2, and 3-dimensional complex-to-complex fast Fourier +transform (FFT) on the GPU. There are only slight differences in planning and executing FFT on +different dimensional data. + +### Application flow + +1. Define the various input parameters. +2. Generate the input data on host. +3. Allocate memory on device for the input and output. +4. Copy the input data from host to device. +5. Create the FFT plan. +6. Execute the plan. +7. Allocate memory on host for the output. +8. Copy output data from device to host. +9. Print the output +10. Clean up. + +## Key APIs and Concepts + +### hipFFT + +- `hipfftPlan[n]d` is used to create a plan for a $n \in \{ 1, 2, 3 \}$-dimensional FFT. +- The `hipfftHandle` needs to be created with `hipfftCreate(...)` before use and destroyed with `hipfftDestroy(...)` after use. + +## Used API surface + +### hipFFT + +- `hipfftCreate` +- `hipfftDestroy` +- `hipfftDoubleComplex` +- `hipfftExecZ2Z` +- `hipfftHandle` +- `hipfftPlan1d` +- `hipfftPlan2d` +- `hipfftPlan3d` +- `hipfftType::HIPFFT_Z2Z` + +### HIP runtime + +- `hipFree` +- `hipMalloc` +- `hipMemcpy` +- `hipMemcpyDeviceToHost` +- `hipMemcpyHostToDevice` diff --git a/Libraries/hipFFT/plan_z2z/main.cpp b/Libraries/hipFFT/plan_z2z/main.cpp new file mode 100644 index 00000000..e21e079a --- /dev/null +++ b/Libraries/hipFFT/plan_z2z/main.cpp @@ -0,0 +1,139 @@ +// MIT License +// +// Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include "cmdparser.hpp" +#include "example_utils.hpp" +#include "hipfft_utils.hpp" + +#include +#include + +#include +#include +#include +#include +#include +#include + +void fft_example(const int dimension, const int size = 4, const int direction = HIPFFT_FORWARD) +{ + using input_t = std::complex; + using output_t = std::complex; + + std::cout << "hipFFT " << dimension << "D double-precision real to complex transform." + << std::endl; + + // 1. Define input dimensions, ordered as { Nx, Ny, Nz } + std::vector n(dimension); + std::fill(n.begin(), n.end(), size); + + // 1c. Calculate size of arrays + const int n_total = std::accumulate(n.begin(), n.end(), 1, std::multiplies{}); + + // 2. Generate input and print + std::vector input(n_total); + + std::mt19937 generator{}; + std::uniform_real_distribution distribution{}; + std::generate(input.begin(), + input.end(), + [&]() { + return input_t{distribution(generator), distribution(generator)}; + }); + + std::cout << "Input:\n" << std::setprecision(3); + print_nd_data(input, n, 16); + + // 3. Alocate device memory + hipfftDoubleComplex* d_input; + hipfftDoubleComplex* d_output; + + HIP_CHECK(hipMalloc(&d_input, n_total * sizeof(*d_input))); + HIP_CHECK(hipMalloc(&d_output, n_total * sizeof(*d_output))); + + // 4. Copy host to device + HIP_CHECK(hipMemcpy(d_input, input.data(), n_total * sizeof(*d_input), hipMemcpyHostToDevice)); + + // 5. Create FFT plan + + // 5a. Allocate plan handle + hipfftHandle plan; + HIPFFT_CHECK(hipfftCreate(&plan)); + + // 5b. Create {1, 2, 3}-dimensional plan + switch(dimension) + { + case 1: HIPFFT_CHECK(hipfftPlan1d(&plan, n[0], hipfftType::HIPFFT_Z2Z, 1)); break; + case 2: HIPFFT_CHECK(hipfftPlan2d(&plan, n[0], n[1], hipfftType::HIPFFT_Z2Z)); break; + case 3: HIPFFT_CHECK(hipfftPlan3d(&plan, n[0], n[1], n[2], hipfftType::HIPFFT_Z2Z)); break; + } + + // 6. Execute plan + HIPFFT_CHECK(hipfftExecZ2Z(plan, d_input, d_output, direction)); + + // 7. Allocate output on host + std::vector output(n_total); + + // 8. Copy device to host + HIP_CHECK( + hipMemcpy(output.data(), d_output, n_total * sizeof(*d_output), hipMemcpyDeviceToHost)); + + // 9. Print output + std::cout << "Output:\n" << std::setprecision(3); + print_nd_data(output, n, 16); + + // 10. Clean up + HIPFFT_CHECK(hipfftDestroy(plan)); + HIP_CHECK(hipFree(d_input)); + HIP_CHECK(hipFree(d_output)); +} + +int main(const int argc, const char* argv[]) +{ + cli::Parser parser(argc, argv); + parser.set_optional>("d", + "dimensions", + {1, 2, 3}, + "number of dimensions. must be {1, 2, 3}"); + parser.set_optional("n", "size", 4, "size of each dimension"); + parser.run_and_exit_if_error(); + + const std::vector dimensions = parser.get>("d"); + const int size = parser.get("n"); + + // Verify passed dimensions + for(const int dimension : dimensions) + { + if(dimension < 1 || dimension > 3) + { + std::cout << "Only 1D, 2D, and 3D FFT transformations are supported!" << std::endl; + return -1; + } + } + + for(const int dimension : dimensions) + { + fft_example(dimension, size); + } + + return 0; +} diff --git a/Libraries/hipFFT/plan_z2z/plan_z2z_vs2017.sln b/Libraries/hipFFT/plan_z2z/plan_z2z_vs2017.sln new file mode 100644 index 00000000..11274ddc --- /dev/null +++ b/Libraries/hipFFT/plan_z2z/plan_z2z_vs2017.sln @@ -0,0 +1,24 @@ +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio 15 +VisualStudioVersion = 15.0.33026.149 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "plan_z2z_vs2017", "plan_z2z_vs2017.vcxproj", "{790D456B-B80A-479D-B5D2-145F4363F4F3}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|x64 = Debug|x64 + Release|x64 = Release|x64 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {790D456B-B80A-479D-B5D2-145F4363F4F3}.Debug|x64.ActiveCfg = Debug|x64 + {790D456B-B80A-479D-B5D2-145F4363F4F3}.Debug|x64.Build.0 = Debug|x64 + {790D456B-B80A-479D-B5D2-145F4363F4F3}.Release|x64.ActiveCfg = Release|x64 + {790D456B-B80A-479D-B5D2-145F4363F4F3}.Release|x64.Build.0 = Release|x64 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {19841116-4A06-4AA5-8183-663086C0D08D} + EndGlobalSection +EndGlobal diff --git a/Libraries/hipFFT/plan_z2z/plan_z2z_vs2017.vcxproj b/Libraries/hipFFT/plan_z2z/plan_z2z_vs2017.vcxproj new file mode 100644 index 00000000..0925c383 --- /dev/null +++ b/Libraries/hipFFT/plan_z2z/plan_z2z_vs2017.vcxproj @@ -0,0 +1,124 @@ + + + + + Debug + x64 + + + Release + x64 + + + + 6.1 + 15.0 + {790d456b-b80a-479d-b5d2-145f4363f4f3} + Win32Proj + plan_z2z_vs2017 + $(LatestTargetPlatformVersion) + + + + + + + + + + + + PreserveNewest + + + PreserveNewest + + + PreserveNewest + + + PreserveNewest + + + PreserveNewest + + + + + Application + true + HIP clang 6.1 + Unicode + + + Application + false + HIP clang 6.1 + Unicode + + + + + + + + + + + + + + + + true + + + + true + hipfft_$(ProjectName) + + + false + hipfft_$(ProjectName) + + + + true + + + + + Level2 + __clang__;__HIP__;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(MSBuildProjectDirectory)\..\..\..\Common;%(AdditionalIncludeDirectories) + stdcpp17 + true + + + Console + true + hipfft.lib;rocfft.lib;hiprtc.lib;hiprtc-builtins.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + + + + + Level2 + true + __clang__;__HIP__;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(MSBuildProjectDirectory)\..\..\..\Common;%(AdditionalIncludeDirectories) + stdcpp17 + true + + + Console + true + true + hipfft.lib;rocfft.lib;hiprtc.lib;hiprtc-builtins.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + + + + + + + + \ No newline at end of file diff --git a/Libraries/hipFFT/plan_z2z/plan_z2z_vs2017.vcxproj.filters b/Libraries/hipFFT/plan_z2z/plan_z2z_vs2017.vcxproj.filters new file mode 100644 index 00000000..fdb16c77 --- /dev/null +++ b/Libraries/hipFFT/plan_z2z/plan_z2z_vs2017.vcxproj.filters @@ -0,0 +1,33 @@ + + + + + {014ce972-b60d-4231-b200-269d9c7446ac} + cpp;c;cc;cxx;c++;def;odl;idl;hpj;bat;asm;asmx;hip;cu + + + {d58f8b40-c416-46f3-a01d-a8fc07be4960} + h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd;cuh + + + {3931aa89-9491-4b33-8657-8a3ae171f7ae} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + Source Files + + + + + Header Files + + + Header Files + + + Header Files + + + \ No newline at end of file diff --git a/Libraries/hipFFT/plan_z2z/plan_z2z_vs2019.sln b/Libraries/hipFFT/plan_z2z/plan_z2z_vs2019.sln new file mode 100644 index 00000000..0a90bad4 --- /dev/null +++ b/Libraries/hipFFT/plan_z2z/plan_z2z_vs2019.sln @@ -0,0 +1,24 @@ +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Version 16 +VisualStudioVersion = 16.0.32630.194 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "plan_z2z_vs2019", "plan_z2z_vs2019.vcxproj", "{2D984972-6F80-4EC6-ABCE-9169E45371A7}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|x64 = Debug|x64 + Release|x64 = Release|x64 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {2D984972-6F80-4EC6-ABCE-9169E45371A7}.Debug|x64.ActiveCfg = Debug|x64 + {2D984972-6F80-4EC6-ABCE-9169E45371A7}.Debug|x64.Build.0 = Debug|x64 + {2D984972-6F80-4EC6-ABCE-9169E45371A7}.Release|x64.ActiveCfg = Release|x64 + {2D984972-6F80-4EC6-ABCE-9169E45371A7}.Release|x64.Build.0 = Release|x64 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {AE54D63F-95BB-4AFF-BD77-5CDA727425D9} + EndGlobalSection +EndGlobal diff --git a/Libraries/hipFFT/plan_z2z/plan_z2z_vs2019.vcxproj b/Libraries/hipFFT/plan_z2z/plan_z2z_vs2019.vcxproj new file mode 100644 index 00000000..dc679e7a --- /dev/null +++ b/Libraries/hipFFT/plan_z2z/plan_z2z_vs2019.vcxproj @@ -0,0 +1,124 @@ + + + + + Debug + x64 + + + Release + x64 + + + + 6.1 + 16.0 + {2d984972-6f80-4ec6-abce-9169e45371a7} + Win32Proj + plan_z2z_vs2019 + $(LatestTargetPlatformVersion) + + + + + + + + + + + + PreserveNewest + + + PreserveNewest + + + PreserveNewest + + + PreserveNewest + + + PreserveNewest + + + + + Application + true + HIP clang 6.1 + Unicode + + + Application + false + HIP clang 6.1 + Unicode + + + + + + + + + + + + + + + + true + + + + true + hipfft_$(ProjectName) + + + false + hipfft_$(ProjectName) + + + + true + + + + + Level2 + __clang__;__HIP__;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(MSBuildProjectDirectory)\..\..\..\Common;%(AdditionalIncludeDirectories) + stdcpp17 + true + + + Console + true + hipfft.lib;rocfft.lib;hiprtc.lib;hiprtc-builtins.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + + + + + Level2 + true + __clang__;__HIP__;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(MSBuildProjectDirectory)\..\..\..\Common;%(AdditionalIncludeDirectories) + stdcpp17 + true + + + Console + true + true + hipfft.lib;rocfft.lib;hiprtc.lib;hiprtc-builtins.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + + + + + + + + diff --git a/Libraries/hipFFT/plan_z2z/plan_z2z_vs2019.vcxproj.filters b/Libraries/hipFFT/plan_z2z/plan_z2z_vs2019.vcxproj.filters new file mode 100644 index 00000000..99962c98 --- /dev/null +++ b/Libraries/hipFFT/plan_z2z/plan_z2z_vs2019.vcxproj.filters @@ -0,0 +1,33 @@ + + + + + {f79519f5-ca0d-4dea-acc9-92e2ca8a047f} + cpp;c;cc;cxx;c++;def;odl;idl;hpj;bat;asm;asmx;hip;cu + + + {c545c4ac-2cfe-4147-b887-969060f1aa0c} + h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd;cuh + + + {62fc95e9-42ea-4177-b16a-149076d4e461} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + Source Files + + + + + Header Files + + + Header Files + + + Header Files + + + \ No newline at end of file diff --git a/Libraries/hipFFT/plan_z2z/plan_z2z_vs2022.sln b/Libraries/hipFFT/plan_z2z/plan_z2z_vs2022.sln new file mode 100644 index 00000000..f273aeee --- /dev/null +++ b/Libraries/hipFFT/plan_z2z/plan_z2z_vs2022.sln @@ -0,0 +1,24 @@ +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Version 17 +VisualStudioVersion = 17.4.33213.308 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "plan_z2z_vs2022", "plan_z2z_vs2022.vcxproj", "{C64E34C7-D9C9-4D90-8137-DB06D7EEF979}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|x64 = Debug|x64 + Release|x64 = Release|x64 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {C64E34C7-D9C9-4D90-8137-DB06D7EEF979}.Debug|x64.ActiveCfg = Debug|x64 + {C64E34C7-D9C9-4D90-8137-DB06D7EEF979}.Debug|x64.Build.0 = Debug|x64 + {C64E34C7-D9C9-4D90-8137-DB06D7EEF979}.Release|x64.ActiveCfg = Release|x64 + {C64E34C7-D9C9-4D90-8137-DB06D7EEF979}.Release|x64.Build.0 = Release|x64 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {F45DC423-E58A-48D5-AD60-B73EC2F509A3} + EndGlobalSection +EndGlobal diff --git a/Libraries/hipFFT/plan_z2z/plan_z2z_vs2022.vcxproj b/Libraries/hipFFT/plan_z2z/plan_z2z_vs2022.vcxproj new file mode 100644 index 00000000..2d6f900d --- /dev/null +++ b/Libraries/hipFFT/plan_z2z/plan_z2z_vs2022.vcxproj @@ -0,0 +1,124 @@ + + + + + Debug + x64 + + + Release + x64 + + + + 6.1 + 17.0 + {c64e34c7-d9c9-4d90-8137-db06d7eef979} + Win32Proj + plan_z2z_vs2022 + $(LatestTargetPlatformVersion) + + + + + + + + + + + + PreserveNewest + + + PreserveNewest + + + PreserveNewest + + + PreserveNewest + + + PreserveNewest + + + + + Application + true + HIP clang 6.1 + Unicode + + + Application + false + HIP clang 6.1 + Unicode + + + + + + + + + + + + + + + + true + + + + true + hipfft_$(ProjectName) + + + false + hipfft_$(ProjectName) + + + + true + + + + + Level2 + __clang__;__HIP__;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(MSBuildProjectDirectory)\..\..\..\Common;%(AdditionalIncludeDirectories) + stdcpp17 + true + + + Console + true + hipfft.lib;rocfft.lib;hiprtc.lib;hiprtc-builtins.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + + + + + Level2 + true + __clang__;__HIP__;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(MSBuildProjectDirectory)\..\..\..\Common;%(AdditionalIncludeDirectories) + stdcpp17 + true + + + Console + true + true + hipfft.lib;rocfft.lib;hiprtc.lib;hiprtc-builtins.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + + + + + + + + diff --git a/Libraries/hipFFT/plan_z2z/plan_z2z_vs2022.vcxproj.filters b/Libraries/hipFFT/plan_z2z/plan_z2z_vs2022.vcxproj.filters new file mode 100644 index 00000000..b123cbd7 --- /dev/null +++ b/Libraries/hipFFT/plan_z2z/plan_z2z_vs2022.vcxproj.filters @@ -0,0 +1,33 @@ + + + + + {95f9c786-d401-464b-8f2b-c32d069bf460} + cpp;c;cc;cxx;c++;def;odl;idl;hpj;bat;asm;asmx;hip;cu + + + {e026187b-3f80-4996-88a1-89889c5e76da} + h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd;cuh + + + {5c127a11-2a4b-4f85-aa3c-10e080601068} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + Source Files + + + + + Header Files + + + Header Files + + + Header Files + + + \ No newline at end of file diff --git a/Libraries/rocFFT/CMakeLists.txt b/Libraries/rocFFT/CMakeLists.txt new file mode 100644 index 00000000..157f41a8 --- /dev/null +++ b/Libraries/rocFFT/CMakeLists.txt @@ -0,0 +1,57 @@ +# MIT License +# +# Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +cmake_minimum_required(VERSION 3.21 FATAL_ERROR) +project(rocFFT_examples LANGUAGES CXX) + +file(RELATIVE_PATH folder_bin ${CMAKE_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}) +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin/${folder_bin}) + +if(GPU_RUNTIME STREQUAL "CUDA") + message(STATUS "rocFFT examples do not support the CUDA runtime") + return() +endif() + +if(WIN32) + set(ROCM_ROOT + "$ENV{HIP_PATH}" + CACHE PATH + "Root directory of the ROCm installation" + ) +else() + set(ROCM_ROOT + "/opt/rocm" + CACHE PATH + "Root directory of the ROCm installation" + ) +endif() + +list(APPEND CMAKE_PREFIX_PATH "${ROCM_ROOT}") + +find_package(rocFFT) +if(NOT rocFFT_FOUND) + message(STATUS "rocFFT could not be found, not building rocFFT examples") + return() +endif() + +add_subdirectory(callback) +add_subdirectory(multi_gpu) diff --git a/Libraries/rocFFT/Makefile b/Libraries/rocFFT/Makefile new file mode 100644 index 00000000..c900db09 --- /dev/null +++ b/Libraries/rocFFT/Makefile @@ -0,0 +1,38 @@ +# MIT License +# +# Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# HIP variables +ROCM_INSTALL_DIR := /opt/rocm + +EXAMPLES := \ + callback \ + multi_gpu + +all: $(EXAMPLES) + +clean: TARGET=clean +clean: all + +$(EXAMPLES): + $(MAKE) -C $@ $(TARGET) + +.PHONY: all clean $(EXAMPLES) diff --git a/Libraries/rocFFT/README.md b/Libraries/rocFFT/README.md new file mode 100644 index 00000000..415cffc2 --- /dev/null +++ b/Libraries/rocFFT/README.md @@ -0,0 +1,58 @@ +# rocFFT Examples + +## Summary + +The examples in this subdirectory showcase the functionality of the [rocFFT](https://github.com/ROCm/rocFFT/) library. The examples build on Linux for ROCm (AMD GPU). + +## Prerequisites + +### Linux + +- [CMake](https://cmake.org/download/) (at least version 3.21) +- OR GNU Make - available via the distribution's package manager +- [ROCm](https://docs.amd.com/bundle/ROCm-Installation-Guide-v5.1.3/page/Overview_of_ROCm_Installation_Methods.html) (at least version 5.x.x) +- [rocFFT](https://github.com/ROCm/rocFFT/) + - `rocfft` package available from [repo.radeon.com](https://repo.radeon.com/rocm/). The repository is added during the standard ROCm [install procedure](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/). + +### Windows + +- [Visual Studio](https://visualstudio.microsoft.com/) 2019 or 2022 with the "Desktop Development with C++" workload +- ROCm toolchain for Windows (No public release yet) + - The Visual Studio ROCm extension needs to be installed to build with the solution files. +- [rocFFT](https://github.com/ROCm/rocFFT/) + - Installed as part of the ROCm SDK on Windows for ROCm platform. +- [CMake](https://cmake.org/download/) (optional, to build with CMake. Requires at least version 3.21) +- [Ninja](https://ninja-build.org/) (optional, to build with CMake) + +## Building + +### Linux + +Make sure that the dependencies are installed, or use one of the [provided Dockerfiles](../../Dockerfiles/) to build and run the examples in a containerized environment. + +#### Using CMake + +All examples in the `rocFFT` subdirectory can either be built by a single CMake project or be built independently. + +- `$ cd Libraries/rocFFT` +- `$ cmake -S . -B build` +- `$ cmake --build build` + +#### Using Make + +All examples can be built by a single invocation to Make or be built independently. + +- `$ cd Libraries/rocFFT` +- `$ make` + +### Windows + +#### Visual Studio + +Visual Studio solution files are available for the individual examples. To build all examples for rocFFT open the top level solution file [ROCm-Examples-VS2019.sln](../../ROCm-Examples-VS2019.sln) and filter for rocFFT. + +For more detailed build instructions refer to the top level [README.md](../../README.md#visual-studio). + +#### CMake + +All examples in the `rocFFT` subdirectory can either be built by a single CMake project or be built independently. For build instructions refer to the top-level [README.md](../../README.md#cmake-2). diff --git a/Libraries/rocFFT/callback/.gitignore b/Libraries/rocFFT/callback/.gitignore new file mode 100644 index 00000000..bbd4a2a9 --- /dev/null +++ b/Libraries/rocFFT/callback/.gitignore @@ -0,0 +1 @@ +rocfft_callback diff --git a/Libraries/rocFFT/callback/CMakeLists.txt b/Libraries/rocFFT/callback/CMakeLists.txt new file mode 100644 index 00000000..de9d7f2d --- /dev/null +++ b/Libraries/rocFFT/callback/CMakeLists.txt @@ -0,0 +1,53 @@ +# MIT License +# +# Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +set(example_name rocfft_callback) + +cmake_minimum_required(VERSION 3.21 FATAL_ERROR) +project(${example_name} LANGUAGES CXX HIP) + +if(GPU_RUNTIME STREQUAL "CUDA") + message(STATUS "rocFFT examples do not support the CUDA runtime") + return() +endif() + +set(CMAKE_HIP_STANDARD 17) +set(CMAKE_HIP_EXTENSIONS OFF) +set(CMAKE_HIP_STANDARD_REQUIRED ON) + +set(ROCM_ROOT "/opt/rocm" CACHE PATH "Root directory of the ROCm installation") + +list(APPEND CMAKE_PREFIX_PATH "${ROCM_ROOT}") + +find_package(rocfft REQUIRED) + +add_executable(${example_name} main.hip) +# Make example runnable using ctest +add_test(NAME ${example_name} COMMAND ${example_name}) + +set(include_dirs "../../../Common") + +target_link_libraries(${example_name} PRIVATE roc::rocfft) +target_include_directories(${example_name} PRIVATE ${include_dirs}) +set_source_files_properties(main.hip PROPERTIES LANGUAGE HIP) + +install(TARGETS ${example_name}) diff --git a/Libraries/rocFFT/callback/Makefile b/Libraries/rocFFT/callback/Makefile new file mode 100644 index 00000000..2c358319 --- /dev/null +++ b/Libraries/rocFFT/callback/Makefile @@ -0,0 +1,64 @@ +# MIT License +# +# Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +EXAMPLE := rocfft_callback +COMMON_INCLUDE_DIR := ../../../Common +GPU_RUNTIME := HIP + +ifneq ($(GPU_RUNTIME), HIP) + $(error GPU_RUNTIME is set to "$(GPU_RUNTIME)". GPU_RUNTIME must be HIP.) +endif + +# HIP variables +ROCM_INSTALL_DIR := /opt/rocm + +HIP_INCLUDE_DIR := $(ROCM_INSTALL_DIR)/include +ROCFFT_INCLUDE_DIR := $(HIP_INCLUDE_DIR) + + +HIPCXX ?= $(ROCM_INSTALL_DIR)/bin/hipcc + +# Common variables and flags +CXX_STD := c++17 +ICXXFLAGS := -std=$(CXX_STD) +ICPPFLAGS := -isystem $(ROCFFT_INCLUDE_DIR) -I $(COMMON_INCLUDE_DIR) +ILDFLAGS := -L $(ROCM_INSTALL_DIR)/lib +ILDLIBS := -lrocfft + + +CXXFLAGS ?= -Wall -Wextra +ICPPFLAGS += -D__HIP_PLATFORM_AMD__ -isystem $(HIP_INCLUDE_DIR) +ILDLIBS += -lamdhip64 +COMPILER := $(HIPCXX) + +ICXXFLAGS += $(CXXFLAGS) +ICPPFLAGS += $(CPPFLAGS) +ILDFLAGS += $(LDFLAGS) +ILDLIBS += $(LDLIBS) + +$(EXAMPLE): main.hip $(COMMON_INCLUDE_DIR)/rocfft_utils.hpp $(COMMON_INCLUDE_DIR)/example_utils.hpp + $(COMPILER) $(ICXXFLAGS) $(ICPPFLAGS) $(ILDFLAGS) -o $@ $< $(ILDLIBS) + +clean: + $(RM) $(EXAMPLE) + +.PHONY: clean diff --git a/Libraries/rocFFT/callback/README.md b/Libraries/rocFFT/callback/README.md new file mode 100644 index 00000000..4a0ae21d --- /dev/null +++ b/Libraries/rocFFT/callback/README.md @@ -0,0 +1,68 @@ +# rocFFT callback Example (C++) + +## Description + +This example illustrates the use of rocFFT `callback` functionality. It shows how to use load callback, a user-defined callback function that is run to load input from global memory at the start of the transform, with rocFFT. Additionally, it shows how to make use of rocFFT's result scaling functionality. + +### Application flow + +1. Set up rocFFT. +2. Allocate and initialize the host data and filter. +3. Allocate device memory. +4. Copy data and filter from host to device. +5. Set up scaling factor and create an FFT plan. +6. Check if FFT plan requires a work buffer, if true: + - Allocate and set work buffer on device. +7. Allocate and initialize callback data on host. +8. Copy callback data from host to device. +9. Get a host pointer to the callback device function. +10. Set the callback with the callback data and device function. +11. Execute FFT plan which multiplies each element by filter element and scales. +12. Clean up work buffer and FFT plan. +13. Copy the results from device to host. +14. Print results. +15. Free device memory. +16. The cleanup of the rocFFT enviroment. + +## Key APIs and Concepts + +- rocFFT is initialized by calling `rocfft_setup()` and it is cleaned up by calling `rocfft_cleanup()`. +- rocFFT creates a plan with `rocfft_plan_create`. This function takes many of the fundamental parameters needed to specify a transform. The plan is then executed with `rocfft_execute` and destroyed with `rocfft_plan_destroy`. +- rocFFT can add work buffers and can control plan execution with `rocfft_execution_info` from `rocfft_execution_info_create(rocfft_execution_info *info)`. For this example specifically a load callback with `rocfft_execution_info_set_load_callback` and work buffer with `rocfft_execution_info_set_work_buffer`. +- [Callbacks](https://rocm.docs.amd.com/projects/rocFFT/en/latest/index.html#load-and-store-callbacks) is an experimental functionality in rocFFT. It requires a pointer to the shared memory, but did not support shared memory when this example was created. +- rocFFT provides explicit API for [result scaling](https://rocm.docs.amd.com/projects/rocFFT/en/latest/how-to/working-with-rocfft.html#result-scaling), which offers better performance than callbacks for this operation as the compiler can optimize the extra scaling multiplication. The API exposed is `rocfft_plan_description_set_scale_factor`, which is to be used _before_ creating the plan. In this example, callbacks are still being used for filtering, so the performance improvement from using the scaling factor API is not noticeable. + +## Demonstrated API Calls + +### rocFFT + +- `rocfft_cleanup` +- `rocfft_execute` +- `rocfft_execution_info_create` +- `rocfft_execution_info_destroy` +- `rocfft_execution_info_set_load_callback` +- `rocfft_execution_info_set_work_buffer` +- `rocfft_placement_inplace` +- `rocfft_plan_create` +- `rocfft_plan_description` +- `rocfft_plan_description_create` +- `rocfft_plan_description_destroy` +- `rocfft_plan_description_set_scale_factor` +- `rocfft_plan_destroy` +- `rocfft_plan_get_work_buffer_size` +- `rocfft_precision_double` +- `rocfft_setup` +- `rocfft_transform_type_complex_forward` + +### HIP runtime + +- `HIP_SYMBOL` +- `hipCmul` +- `hipFree` +- `hipGetErrorString` +- `hipMalloc` +- `hipMemcpy` +- `hipMemcpyDeviceToHost` +- `hipMemcpyFromSymbol` +- `hipMemcpyHostToDevice` +- `make_hipDoubleComplex` diff --git a/Libraries/rocFFT/callback/callback_vs2017.sln b/Libraries/rocFFT/callback/callback_vs2017.sln new file mode 100644 index 00000000..61917c9e --- /dev/null +++ b/Libraries/rocFFT/callback/callback_vs2017.sln @@ -0,0 +1,24 @@ +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio 15 +VisualStudioVersion = 15.0.33026.149 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "callback_vs2017", "callback_vs2017.vcxproj", "{65A100E5-7ABE-4EC5-B625-767778DDF2B2}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|x64 = Debug|x64 + Release|x64 = Release|x64 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {65A100E5-7ABE-4EC5-B625-767778DDF2B2}.Debug|x64.ActiveCfg = Debug|x64 + {65A100E5-7ABE-4EC5-B625-767778DDF2B2}.Debug|x64.Build.0 = Debug|x64 + {65A100E5-7ABE-4EC5-B625-767778DDF2B2}.Release|x64.ActiveCfg = Release|x64 + {65A100E5-7ABE-4EC5-B625-767778DDF2B2}.Release|x64.Build.0 = Release|x64 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {07E28DA0-E2EA-4D5D-B41F-02A6D8E1CDE9} + EndGlobalSection +EndGlobal diff --git a/Libraries/rocFFT/callback/callback_vs2017.vcxproj b/Libraries/rocFFT/callback/callback_vs2017.vcxproj new file mode 100644 index 00000000..8c96c31c --- /dev/null +++ b/Libraries/rocFFT/callback/callback_vs2017.vcxproj @@ -0,0 +1,120 @@ + + + + + Debug + x64 + + + Release + x64 + + + + 6.1 + 15.0 + {65a100e5-7abe-4ec5-b625-767778ddf2b2} + Win32Proj + callback_vs2017 + $(LatestTargetPlatformVersion) + + + + + + + + + + + PreserveNewest + + + PreserveNewest + + + PreserveNewest + + + PreserveNewest + + + + + Application + true + HIP clang 6.1 + Unicode + + + Application + false + HIP clang 6.1 + Unicode + + + + + + + + + + + + + + + + true + + + + true + rocfft_$(ProjectName) + + + false + rocfft_$(ProjectName) + + + + true + + + + + Level2 + __clang__;__HIP__;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(MSBuildProjectDirectory)\..\..\..\Common;%(AdditionalIncludeDirectories) + stdcpp17 + true + + + Console + true + rocfft.lib;hiprtc.lib;hiprtc-builtins.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + + + + + Level2 + true + __clang__;__HIP__;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(MSBuildProjectDirectory)\..\..\..\Common;%(AdditionalIncludeDirectories) + stdcpp17 + true + + + Console + true + true + rocfft.lib;hiprtc.lib;hiprtc-builtins.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + + + + + + + + \ No newline at end of file diff --git a/Libraries/rocFFT/callback/callback_vs2017.vcxproj.filters b/Libraries/rocFFT/callback/callback_vs2017.vcxproj.filters new file mode 100644 index 00000000..f04568e5 --- /dev/null +++ b/Libraries/rocFFT/callback/callback_vs2017.vcxproj.filters @@ -0,0 +1,30 @@ + + + + + {8c9bcdcb-6890-49c4-b93f-df05890753a6} + cpp;c;cc;cxx;c++;def;odl;idl;hpj;bat;asm;asmx;hip;cu + + + {4b6d3f12-a435-4514-863d-da192e4aefc1} + h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd;cuh + + + {e715f1f0-c7ec-4ee1-8fc8-fc74286201cd} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + Source Files + + + + + Header Files + + + Header Files + + + \ No newline at end of file diff --git a/Libraries/rocFFT/callback/callback_vs2019.sln b/Libraries/rocFFT/callback/callback_vs2019.sln new file mode 100644 index 00000000..c56a57f6 --- /dev/null +++ b/Libraries/rocFFT/callback/callback_vs2019.sln @@ -0,0 +1,24 @@ +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Version 16 +VisualStudioVersion = 16.0.32630.194 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "callback_vs2019", "callback_vs2019.vcxproj", "{52BD229D-4300-4CB4-A241-21B5A4531F9F}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|x64 = Debug|x64 + Release|x64 = Release|x64 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {52BD229D-4300-4CB4-A241-21B5A4531F9F}.Debug|x64.ActiveCfg = Debug|x64 + {52BD229D-4300-4CB4-A241-21B5A4531F9F}.Debug|x64.Build.0 = Debug|x64 + {52BD229D-4300-4CB4-A241-21B5A4531F9F}.Release|x64.ActiveCfg = Release|x64 + {52BD229D-4300-4CB4-A241-21B5A4531F9F}.Release|x64.Build.0 = Release|x64 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {E42979BB-9837-4D64-87EC-610FC41F7F94} + EndGlobalSection +EndGlobal diff --git a/Libraries/rocFFT/callback/callback_vs2019.vcxproj b/Libraries/rocFFT/callback/callback_vs2019.vcxproj new file mode 100644 index 00000000..894f5864 --- /dev/null +++ b/Libraries/rocFFT/callback/callback_vs2019.vcxproj @@ -0,0 +1,120 @@ + + + + + Debug + x64 + + + Release + x64 + + + + 6.1 + 16.0 + {52bd229d-4300-4cb4-a241-21b5a4531f9f} + Win32Proj + callback_vs2019 + $(LatestTargetPlatformVersion) + + + + + + + + + + + PreserveNewest + + + PreserveNewest + + + PreserveNewest + + + PreserveNewest + + + + + Application + true + HIP clang 6.1 + Unicode + + + Application + false + HIP clang 6.1 + Unicode + + + + + + + + + + + + + + + + true + + + + true + rocfft_$(ProjectName) + + + false + rocfft_$(ProjectName) + + + + true + + + + + Level2 + __clang__;__HIP__;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(MSBuildProjectDirectory)\..\..\..\Common;%(AdditionalIncludeDirectories) + stdcpp17 + true + + + Console + true + rocfft.lib;hiprtc.lib;hiprtc-builtins.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + + + + + Level2 + true + __clang__;__HIP__;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(MSBuildProjectDirectory)\..\..\..\Common;%(AdditionalIncludeDirectories) + stdcpp17 + true + + + Console + true + true + rocfft.lib;hiprtc.lib;hiprtc-builtins.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + + + + + + + + diff --git a/Libraries/rocFFT/callback/callback_vs2019.vcxproj.filters b/Libraries/rocFFT/callback/callback_vs2019.vcxproj.filters new file mode 100644 index 00000000..3184778b --- /dev/null +++ b/Libraries/rocFFT/callback/callback_vs2019.vcxproj.filters @@ -0,0 +1,30 @@ + + + + + {3b2ff5a4-6fe7-4855-82c2-8009546fd2e9} + cpp;c;cc;cxx;c++;def;odl;idl;hpj;bat;asm;asmx;hip;cu + + + {9a49342a-14cb-4de3-be47-062042facd85} + h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd;cuh + + + {f4b36920-a41a-4280-9884-ab556d6b0103} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + Source Files + + + + + Header Files + + + Header Files + + + \ No newline at end of file diff --git a/Libraries/rocFFT/callback/callback_vs2022.sln b/Libraries/rocFFT/callback/callback_vs2022.sln new file mode 100644 index 00000000..ef6b732e --- /dev/null +++ b/Libraries/rocFFT/callback/callback_vs2022.sln @@ -0,0 +1,24 @@ +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Version 17 +VisualStudioVersion = 17.4.33213.308 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "callback_vs2022", "callback_vs2022.vcxproj", "{44A60ED3-BF12-4190-8242-442946300C3E}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|x64 = Debug|x64 + Release|x64 = Release|x64 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {44A60ED3-BF12-4190-8242-442946300C3E}.Debug|x64.ActiveCfg = Debug|x64 + {44A60ED3-BF12-4190-8242-442946300C3E}.Debug|x64.Build.0 = Debug|x64 + {44A60ED3-BF12-4190-8242-442946300C3E}.Release|x64.ActiveCfg = Release|x64 + {44A60ED3-BF12-4190-8242-442946300C3E}.Release|x64.Build.0 = Release|x64 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {8D9A3A79-175C-4F9F-AA4E-726D75ADA70D} + EndGlobalSection +EndGlobal diff --git a/Libraries/rocFFT/callback/callback_vs2022.vcxproj b/Libraries/rocFFT/callback/callback_vs2022.vcxproj new file mode 100644 index 00000000..bdf10639 --- /dev/null +++ b/Libraries/rocFFT/callback/callback_vs2022.vcxproj @@ -0,0 +1,120 @@ + + + + + Debug + x64 + + + Release + x64 + + + + 6.1 + 17.0 + {44a60ed3-bf12-4190-8242-442946300c3e} + Win32Proj + callback_vs2022 + $(LatestTargetPlatformVersion) + + + + + + + + + + + PreserveNewest + + + PreserveNewest + + + PreserveNewest + + + PreserveNewest + + + + + Application + true + HIP clang 6.1 + Unicode + + + Application + false + HIP clang 6.1 + Unicode + + + + + + + + + + + + + + + + true + + + + true + rocfft_$(ProjectName) + + + false + rocfft_$(ProjectName) + + + + true + + + + + Level2 + __clang__;__HIP__;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(MSBuildProjectDirectory)\..\..\..\Common;%(AdditionalIncludeDirectories) + stdcpp17 + true + + + Console + true + rocfft.lib;hiprtc.lib;hiprtc-builtins.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + + + + + Level2 + true + __clang__;__HIP__;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(MSBuildProjectDirectory)\..\..\..\Common;%(AdditionalIncludeDirectories) + stdcpp17 + true + + + Console + true + true + rocfft.lib;hiprtc.lib;hiprtc-builtins.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + + + + + + + + diff --git a/Libraries/rocFFT/callback/callback_vs2022.vcxproj.filters b/Libraries/rocFFT/callback/callback_vs2022.vcxproj.filters new file mode 100644 index 00000000..c34a26b7 --- /dev/null +++ b/Libraries/rocFFT/callback/callback_vs2022.vcxproj.filters @@ -0,0 +1,30 @@ + + + + + {60d23288-3e0a-41b4-9f02-310a07d3c5cf} + cpp;c;cc;cxx;c++;def;odl;idl;hpj;bat;asm;asmx;hip;cu + + + {6d532177-84e6-4bf4-b749-25715c79913e} + h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd;cuh + + + {f149ca87-b0dc-4de5-bc87-b8bc9218282e} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + Source Files + + + + + Header Files + + + Header Files + + + \ No newline at end of file diff --git a/Libraries/rocFFT/callback/main.hip b/Libraries/rocFFT/callback/main.hip new file mode 100644 index 00000000..6726b0c1 --- /dev/null +++ b/Libraries/rocFFT/callback/main.hip @@ -0,0 +1,182 @@ +// MIT License +// +// Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include "example_utils.hpp" +#include "rocfft_utils.hpp" + +#include + +#include +#include +#include + +#include +#include +#include +#include + +// example of using load/store callbacks with rocfft + +struct load_callback_data +{ + double2* filter; +}; + +__device__ double2 load_callback(double2* input, + size_t offset, + void* callback_data, + void* /*sharedMem*/) +{ + auto data = static_cast(callback_data); + + // multiply each element by filter element + return hipCmul(input[offset], data->filter[offset]); +} + +// Can not give __device__ function to HIP_SYMBOL +__device__ auto load_callback_dev = load_callback; + +int main() +{ + constexpr size_t N = 8; + + std::vector input(N), callback_filter(N); + + // Initialize data and filter + std::random_device rd; + std::default_random_engine gen(rd()); + std::uniform_real_distribution distribution(0.0, 1.0); + + for(size_t i = 0; i < N; i++) + { + input[i].x = i; + input[i].y = i; + callback_filter[i].x = distribution(gen); + } + + // Rocfft gpu compute + ROCFFT_CHECK(rocfft_setup()); + + const size_t Nbytes = N * sizeof(double2); + + // Create HIP device object. + double2 *data_dev, *callback_filter_dev; + + // Create buffers + HIP_CHECK(hipMalloc(&data_dev, Nbytes)); + HIP_CHECK(hipMalloc(&callback_filter_dev, Nbytes)); + + // Copy data to device + HIP_CHECK(hipMemcpy(data_dev, input.data(), Nbytes, hipMemcpyHostToDevice)); + HIP_CHECK( + hipMemcpy(callback_filter_dev, callback_filter.data(), Nbytes, hipMemcpyHostToDevice)); + + // Set up scaling + rocfft_plan_description description = nullptr; + const double scale_factor = 1.0 / static_cast(N); + ROCFFT_CHECK(rocfft_plan_description_create(&description)); + ROCFFT_CHECK(rocfft_plan_description_set_scale_factor(description, scale_factor)); + + // Create plan + rocfft_plan plan = nullptr; + ROCFFT_CHECK(rocfft_plan_create(&plan, + rocfft_placement_inplace, + rocfft_transform_type_complex_forward, + rocfft_precision_double, + 1, + &N, + 1, + description)); + + // Check if the plan requires a work buffer + size_t work_buf_size = 0; + ROCFFT_CHECK(rocfft_plan_get_work_buffer_size(plan, &work_buf_size)); + rocfft_execution_info info = nullptr; + ROCFFT_CHECK(rocfft_execution_info_create(&info)); + + void* work_buf = nullptr; + if(work_buf_size) + { + HIP_CHECK(hipMalloc(&work_buf, work_buf_size)); + ROCFFT_CHECK(rocfft_execution_info_set_work_buffer(info, work_buf, work_buf_size)); + } + + // Prepare callback + load_callback_data callback_data_host; + callback_data_host.filter = callback_filter_dev; + + void* callback_data_dev; + HIP_CHECK(hipMalloc(&callback_data_dev, sizeof(load_callback_data))); + HIP_CHECK(hipMemcpy(callback_data_dev, + &callback_data_host, + sizeof(load_callback_data), + hipMemcpyHostToDevice)); + + // Get a properly-typed host pointer to the device function, as + // rocfft_execution_info_set_load_callback expects void*. + void* callback_ptr_host = nullptr; + HIP_CHECK( + hipMemcpyFromSymbol(&callback_ptr_host, HIP_SYMBOL(load_callback_dev), sizeof(void*))); + + // Set callback + ROCFFT_CHECK( + rocfft_execution_info_set_load_callback(info, &callback_ptr_host, &callback_data_dev, 0)); + + // Execute plan + ROCFFT_CHECK(rocfft_execute(plan, (void**)&data_dev, nullptr, info)); + + // Clean up work buffer + if(work_buf_size) + { + HIP_CHECK(hipFree(work_buf)); + } + + // Destroy description + ROCFFT_CHECK(rocfft_plan_description_destroy(description)); + description = nullptr; + + // Destroy info + ROCFFT_CHECK(rocfft_execution_info_destroy(info)); + info = nullptr; + + // Destroy plan + ROCFFT_CHECK(rocfft_plan_destroy(plan)); + plan = nullptr; + + // Copy result back to host + std::vector output(N); + HIP_CHECK(hipMemcpy(output.data(), data_dev, Nbytes, hipMemcpyDeviceToHost)); + + for(size_t i = 0; i < N; i++) + { + std::cout << "element " << i << " input: (" << input[i].x << "," << input[i].y << ")" + << " output: (" << output[i].x << "," << output[i].y << ")" << std::endl; + } + + HIP_CHECK(hipFree(callback_data_dev)); + HIP_CHECK(hipFree(callback_filter_dev)); + HIP_CHECK(hipFree(data_dev)); + + ROCFFT_CHECK(rocfft_cleanup()); + + return 0; +} diff --git a/Libraries/rocFFT/multi_gpu/.gitignore b/Libraries/rocFFT/multi_gpu/.gitignore new file mode 100644 index 00000000..36e9a4bf --- /dev/null +++ b/Libraries/rocFFT/multi_gpu/.gitignore @@ -0,0 +1 @@ +rocfft_multi_gpu diff --git a/Libraries/rocFFT/multi_gpu/CMakeLists.txt b/Libraries/rocFFT/multi_gpu/CMakeLists.txt new file mode 100644 index 00000000..d97ee9af --- /dev/null +++ b/Libraries/rocFFT/multi_gpu/CMakeLists.txt @@ -0,0 +1,48 @@ +# MIT License +# +# Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +set(example_name rocfft_multi_gpu) + +cmake_minimum_required(VERSION 3.21 FATAL_ERROR) +project(${example_name} LANGUAGES CXX HIP) + +set(CMAKE_HIP_STANDARD 17) +set(CMAKE_HIP_EXTENSIONS OFF) +set(CMAKE_HIP_STANDARD_REQUIRED ON) + +set(ROCM_ROOT "/opt/rocm" CACHE PATH "Root directory of the ROCm installation") + +list(APPEND CMAKE_PREFIX_PATH "${ROCM_ROOT}") + +find_package(rocfft REQUIRED) + +add_executable(${example_name} main.cpp) +# Make example runnable using ctest +add_test(NAME ${example_name} COMMAND ${example_name}) + +set(include_dirs "../../../Common") + +target_link_libraries(${example_name} PRIVATE roc::rocfft) +target_include_directories(${example_name} PRIVATE ${include_dirs}) +set_source_files_properties(main.cpp PROPERTIES LANGUAGE HIP) + +install(TARGETS ${example_name}) diff --git a/Libraries/rocFFT/multi_gpu/Makefile b/Libraries/rocFFT/multi_gpu/Makefile new file mode 100644 index 00000000..5d6c0f43 --- /dev/null +++ b/Libraries/rocFFT/multi_gpu/Makefile @@ -0,0 +1,61 @@ +# MIT License +# +# Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +EXAMPLE := rocfft_multi_gpu +COMMON_INCLUDE_DIR := ../../../Common +GPU_RUNTIME := HIP + +ifneq ($(GPU_RUNTIME), HIP) + $(error GPU_RUNTIME is set to "$(GPU_RUNTIME)". GPU_RUNTIME must be HIP.) +endif + +# HIP variables +ROCM_INSTALL_DIR := /opt/rocm +HIP_INCLUDE_DIR := $(ROCM_INSTALL_DIR)/include +ROCFFT_INCLUDE_DIR := $(HIP_INCLUDE_DIR) + +HIPCXX ?= $(ROCM_INSTALL_DIR)/bin/hipcc + +# Common variables and flags +CXX_STD := c++17 +ICXXFLAGS := -std=$(CXX_STD) +ICPPFLAGS := -isystem $(ROCFFT_INCLUDE_DIR) -I $(COMMON_INCLUDE_DIR) +ILDFLAGS := -L $(ROCM_INSTALL_DIR)/lib +ILDLIBS := -lrocfft + +CXXFLAGS ?= -Wall -Wextra +CPPFLAGS ?= -D__HIP_PLATFORM_AMD__ -isystem $(HIP_INCLUDE_DIR) +LDLIBS ?= -lamdhip64 +COMPILER := $(HIPCXX) + +ICXXFLAGS += $(CXXFLAGS) +ICPPFLAGS += $(CPPFLAGS) +ILDFLAGS += $(LDFLAGS) +ILDLIBS += $(LDLIBS) + +$(EXAMPLE): main.cpp $(COMMON_INCLUDE_DIR)/rocfft_utils.hpp $(COMMON_INCLUDE_DIR)/example_utils.hpp + $(COMPILER) $(ICXXFLAGS) $(ICPPFLAGS) $(ILDFLAGS) -o $@ $< $(ILDLIBS) + +clean: + $(RM) $(EXAMPLE) + +.PHONY: clean diff --git a/Libraries/rocFFT/multi_gpu/README.md b/Libraries/rocFFT/multi_gpu/README.md new file mode 100644 index 00000000..6883e592 --- /dev/null +++ b/Libraries/rocFFT/multi_gpu/README.md @@ -0,0 +1,76 @@ +# rocFFT Mutli GPU Example (C++) + +## Description + +This example illustrates the use of rocFFT multi-GPU functionality. It shows how to use multiple GPUs with rocFFT by using `rocfft_brick` and `rocfft_field` to divide the work between multiple devices. At least requires rocm version 6.0.0. + +### Application flow + +1. Read in command-line parameters. +2. Check if there are two device with 3-D inputs. +3. Check if there device ids that do not exist. +4. Create a plan description for multi-GPU plan. +5. Define infield geometry for both gpus and add bricks. +6. Add infield to the plan description. +7. Allocate and initialize GPU input. +8. Define outfield geometry for both gpus and add bricks. +9. Add outfield to the plan description. +10. Allocate and initialize GPU output. +11. Create multi-gpu `rocFFT` plan with the created plan description. +12. Get execution information and allocate work buffer. +13. Execute multi-gpu plan. +14. Get results from the first device. +15. Destroy plan and free device memory. + +### Command line interface + +The application provides the following optional command line arguments: + +- `-l` or `--length`. The 3-D FFT size separated by spaces. It default value is `8 8 8`. +- `-d` or `--devices`. The list of devices to use separated by spaces. It default value is `0 1`. + +## Key APIs and Concepts + +- rocFFT is initialized by calling `rocfft_setup()` and it is cleaned up by calling `rocfft_cleanup()`. +- rocFFT creates a plan with `rocfft_plan_create`. This function takes many of the fundamental parameters needed to specify a transform. The plan is then executed with `rocfft_execute` and destroyed with `rocfft_plan_destroy`. +- `rocfft_field` is used to hold data decomposition information which is then passed to a `rocfft_plan` via a `rocfft_plan_description` +- `rocfft_brick` is used to describe the data decomposition of fields +- To execute HIP functions on different gpus `hipSetDevice` can be used with the id of the gpu to switch beteen gpus. + +## Demonstrated API Calls + +### rocFFT + +- `rocfft_array_type_complex_interleaved` +- `rocfft_brick_create` +- `rocfft_brick_destroy` +- `rocfft_cleanup` +- `rocfft_execute` +- `rocfft_execution_info_create` +- `rocfft_execution_info_destroy` +- `rocfft_execution_info_set_work_buffer` +- `rocfft_field_add_brick` +- `rocfft_field_create` +- `rocfft_placement_notinplace` +- `rocfft_plan_create` +- `rocfft_plan_description_add_infield` +- `rocfft_plan_description_add_outfield` +- `rocfft_plan_description_destroy` +- `rocfft_plan_description_create` +- `rocfft_plan_description_set_data_layout` +- `rocfft_plan_destroy` +- `rocfft_plan_get_work_buffer_size` +- `rocfft_precision_double` +- `rocfft_setup` +- `rocfft_transform_type_complex_forward` + +### HIP runtime + +- `hipFree` +- `hipGetDeviceCount` +- `hipGetErrorString` +- `hipMalloc` +- `hipMemcpy` +- `hipMemcpyHostToDevice` +- `hipMemcpyDeviceToHost` +- `hipSetDevice` diff --git a/Libraries/rocFFT/multi_gpu/main.cpp b/Libraries/rocFFT/multi_gpu/main.cpp new file mode 100644 index 00000000..3e2f24b5 --- /dev/null +++ b/Libraries/rocFFT/multi_gpu/main.cpp @@ -0,0 +1,233 @@ +// MIT License +// +// Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include "cmdparser.hpp" +#include "example_utils.hpp" +#include "rocfft_utils.hpp" + +#include +#include +#include + +#include +#include +#include +#include + +int main(int argc, char* argv[]) +{ + std::cout << "rocfft single-node multi-gpu complex-to-complex 3D FFT example\n"; + + // Command-line options: + cli::Parser parser(argc, argv); + // Length of transform, first dimension must be greater than number of GPU devices + parser.set_optional>("l", + "length", + {8, 8, 8}, + "3-D FFT size (eg: --length 8 8 8)."); + parser.set_optional>( + "d", + "devices", + {0, 1}, + "List of devices to use separated by spaces (eg: --devices 0 1)"); + parser.run_and_exit_if_error(); + + const auto length = parser.get>("l"); + const auto devices = parser.get>("d"); + + int deviceCount = devices.size(); + if(length.size() != 3 || deviceCount != 2) + { + std::cout << "This example is designed to run on two devices with 3-D inputs!" << std::endl; + return 0; + } + + const size_t fftSize = length[0] * length[1] * length[2]; // Must evenly divide deviceCount + int nDevices; + HIP_CHECK(hipGetDeviceCount(&nDevices)); + + std::cout << "Number of available GPUs: " << nDevices << " \n"; + if(nDevices <= static_cast(*std::max_element(devices.begin(), devices.end()))) + { + std::cout << "device ID greater than number of available devices" << std::endl; + return 0; + } + + ROCFFT_CHECK(rocfft_setup()); + + rocfft_plan_description description = nullptr; + ROCFFT_CHECK(rocfft_plan_description_create(&description)); + // Do not set stride information via the descriptor, they are to be defined during field creation below + ROCFFT_CHECK(rocfft_plan_description_set_data_layout(description, + rocfft_array_type_complex_interleaved, + rocfft_array_type_complex_interleaved, + nullptr, + nullptr, + 0, + nullptr, + 0, + 0, + nullptr, + 0)); + + // Define infield geometry + // First entry of upper dimension is the batch size + const std::vector inbrick0_lower = {0, 0, 0, 0}; + const std::vector inbrick0_upper = {1, length[0] / deviceCount, length[1], length[2]}; + const std::vector inbrick1_lower = {0, length[0] / deviceCount, 0, 0}; + const std::vector inbrick1_upper = {1, length[0], length[1], length[2]}; + + // Row-major stride for brick data layout in memory + std::vector brick_stride = {fftSize, length[0] * length[1], length[0], 1}; + + rocfft_field infield = nullptr; + ROCFFT_CHECK(rocfft_field_create(&infield)); + + rocfft_brick inbrick0 = nullptr; + ROCFFT_CHECK(rocfft_brick_create(&inbrick0, + inbrick0_lower.data(), + inbrick0_upper.data(), + brick_stride.data(), + inbrick0_lower.size(), + devices[0])); // Device id + ROCFFT_CHECK(rocfft_field_add_brick(infield, inbrick0)); + ROCFFT_CHECK(rocfft_brick_destroy(inbrick0)); + + rocfft_brick inbrick1 = nullptr; + ROCFFT_CHECK(rocfft_brick_create(&inbrick1, + inbrick1_lower.data(), + inbrick1_upper.data(), + brick_stride.data(), + inbrick1_lower.size(), + devices[1])); // Device id + ROCFFT_CHECK(rocfft_field_add_brick(infield, inbrick1)); + ROCFFT_CHECK(rocfft_brick_destroy(inbrick1)); + + ROCFFT_CHECK(rocfft_plan_description_add_infield(description, infield)); + + // Allocate and initialize GPU input + std::vector gpu_in(2); + const size_t bufferSize = fftSize / deviceCount; + constexpr std::complex input_data = {0.1, 0.1}; + const std::vector> input(bufferSize, input_data); // Host test input + const size_t memSize = sizeof(std::complex) * bufferSize; + + HIP_CHECK(hipSetDevice(devices[0])); + HIP_CHECK(hipMalloc(&gpu_in[0], memSize)); + HIP_CHECK(hipMemcpy(gpu_in[0], input.data(), memSize, hipMemcpyHostToDevice)); + + HIP_CHECK(hipSetDevice(devices[1])); + HIP_CHECK(hipMalloc(&gpu_in[1], memSize)); + HIP_CHECK(hipMemcpy(gpu_in[1], input.data(), memSize, hipMemcpyHostToDevice)); + + // Data decomposition for output + rocfft_field outfield = nullptr; + ROCFFT_CHECK(rocfft_field_create(&outfield)); + + std::vector gpu_out(2); + const std::vector outbrick0_lower = {0, 0, 0, 0}; + const std::vector outbrick0_upper = {1, length[0] / deviceCount, length[1], length[2]}; + const std::vector outbrick1_lower = {0, length[0] / deviceCount, 0, 0}; + const std::vector outbrick1_upper = {1, length[0], length[1], length[2]}; + + rocfft_brick outbrick0 = nullptr; + ROCFFT_CHECK(rocfft_brick_create(&outbrick0, + outbrick0_lower.data(), + outbrick0_upper.data(), + brick_stride.data(), + outbrick0_lower.size(), + devices[0])); // Device id + ROCFFT_CHECK(rocfft_field_add_brick(outfield, outbrick0)); + ROCFFT_CHECK(rocfft_brick_destroy(outbrick0)); + + rocfft_brick outbrick1 = nullptr; + ROCFFT_CHECK(rocfft_brick_create(&outbrick1, + outbrick1_lower.data(), + outbrick1_upper.data(), + brick_stride.data(), + outbrick1_lower.size(), + devices[1])); // Device id + ROCFFT_CHECK(rocfft_field_add_brick(outfield, outbrick1)); + ROCFFT_CHECK(rocfft_brick_destroy(outbrick1)); + + ROCFFT_CHECK(rocfft_plan_description_add_outfield(description, outfield)); + + // Allocate GPU output + HIP_CHECK(hipSetDevice(devices[0])); + HIP_CHECK(hipMalloc(&gpu_out[0], memSize)); + HIP_CHECK(hipSetDevice(devices[1])); + HIP_CHECK(hipMalloc(&gpu_out[1], memSize)); + + // Create a multi-gpu plan + HIP_CHECK(hipSetDevice(devices[0])); + rocfft_plan gpu_plan = nullptr; + ROCFFT_CHECK(rocfft_plan_create(&gpu_plan, + rocfft_placement_notinplace, // Placeness for the transform + rocfft_transform_type_complex_forward, // Direction of transform + rocfft_precision_double, + length.size(), // Dimension + length.data(), // Lengths + 1, // Number of transforms + description); // Description + ); + + // Get execution information and allocate work buffer + + size_t work_buf_size = 0; + ROCFFT_CHECK(rocfft_plan_get_work_buffer_size(gpu_plan, &work_buf_size)); + + void* work_buf = nullptr; + rocfft_execution_info planinfo = nullptr; + ROCFFT_CHECK(rocfft_execution_info_create(&planinfo)); + if(work_buf_size) + { + HIP_CHECK(hipMalloc(&work_buf, work_buf_size)); + ROCFFT_CHECK(rocfft_execution_info_set_work_buffer(planinfo, work_buf, work_buf_size)); + } + + // Execute plan + ROCFFT_CHECK(rocfft_execute(gpu_plan, (void**)gpu_in.data(), (void**)gpu_out.data(), planinfo)); + + // Get results from device + std::complex output; + HIP_CHECK(hipSetDevice(devices[0])); + HIP_CHECK(hipMemcpy(&output, gpu_out[0], sizeof(std::complex), hipMemcpyDeviceToHost)); + + const auto expected = static_cast>(fftSize) * input_data; + std::cout << "Expected result: " << expected << std::endl; + std::cout << "Actual result: " << output << std::endl; + + // Destroy plan + ROCFFT_CHECK(rocfft_execution_info_destroy(planinfo)); + ROCFFT_CHECK(rocfft_plan_description_destroy(description)); + ROCFFT_CHECK(rocfft_plan_destroy(gpu_plan)); + + ROCFFT_CHECK(rocfft_cleanup()); + + // Free device memory + HIP_CHECK(hipFree(gpu_in[0])); + HIP_CHECK(hipFree(gpu_in[1])); + HIP_CHECK(hipFree(gpu_out[0])); + HIP_CHECK(hipFree(gpu_out[1])); + + return 0; +} diff --git a/Libraries/rocFFT/multi_gpu/multi_gpu_vs2017.sln b/Libraries/rocFFT/multi_gpu/multi_gpu_vs2017.sln new file mode 100644 index 00000000..70bad1bf --- /dev/null +++ b/Libraries/rocFFT/multi_gpu/multi_gpu_vs2017.sln @@ -0,0 +1,24 @@ +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio 15 +VisualStudioVersion = 15.0.33026.149 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "multi_gpu_vs2017", "multi_gpu_vs2017.vcxproj", "{5A9F936C-2A90-4B40-A798-3683A38CB7A3}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|x64 = Debug|x64 + Release|x64 = Release|x64 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {5A9F936C-2A90-4B40-A798-3683A38CB7A3}.Debug|x64.ActiveCfg = Debug|x64 + {5A9F936C-2A90-4B40-A798-3683A38CB7A3}.Debug|x64.Build.0 = Debug|x64 + {5A9F936C-2A90-4B40-A798-3683A38CB7A3}.Release|x64.ActiveCfg = Release|x64 + {5A9F936C-2A90-4B40-A798-3683A38CB7A3}.Release|x64.Build.0 = Release|x64 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {F47935F9-605A-4F60-A21D-2123C3DA122E} + EndGlobalSection +EndGlobal diff --git a/Libraries/rocFFT/multi_gpu/multi_gpu_vs2017.vcxproj b/Libraries/rocFFT/multi_gpu/multi_gpu_vs2017.vcxproj new file mode 100644 index 00000000..62fbf229 --- /dev/null +++ b/Libraries/rocFFT/multi_gpu/multi_gpu_vs2017.vcxproj @@ -0,0 +1,121 @@ + + + + + Debug + x64 + + + Release + x64 + + + + 6.1 + 15.0 + {5a9f936c-2a90-4b40-a798-3683a38cb7a3} + Win32Proj + multi_gpu_vs2017 + $(LatestTargetPlatformVersion) + + + + + + + + + + + + PreserveNewest + + + PreserveNewest + + + PreserveNewest + + + PreserveNewest + + + + + Application + true + HIP clang 6.1 + Unicode + + + Application + false + HIP clang 6.1 + Unicode + + + + + + + + + + + + + + + + true + + + + true + rocfft_$(ProjectName) + + + false + rocfft_$(ProjectName) + + + + true + + + + + Level2 + __clang__;__HIP__;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(MSBuildProjectDirectory)\..\..\..\Common;%(AdditionalIncludeDirectories) + stdcpp17 + true + + + Console + true + rocfft.lib;hiprtc.lib;hiprtc-builtins.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + + + + + Level2 + true + __clang__;__HIP__;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(MSBuildProjectDirectory)\..\..\..\Common;%(AdditionalIncludeDirectories) + stdcpp17 + true + + + Console + true + true + rocfft.lib;hiprtc.lib;hiprtc-builtins.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + + + + + + + + \ No newline at end of file diff --git a/Libraries/rocFFT/multi_gpu/multi_gpu_vs2017.vcxproj.filters b/Libraries/rocFFT/multi_gpu/multi_gpu_vs2017.vcxproj.filters new file mode 100644 index 00000000..d90e0339 --- /dev/null +++ b/Libraries/rocFFT/multi_gpu/multi_gpu_vs2017.vcxproj.filters @@ -0,0 +1,33 @@ + + + + + {43363bf8-7a8e-4698-b9fe-8fc15e79e68b} + cpp;c;cc;cxx;c++;def;odl;idl;hpj;bat;asm;asmx;hip;cu + + + {f112e630-6729-451b-8a89-5afe0f42a4a3} + h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd;cuh + + + {00a440a6-f9f3-4ef9-9162-49c58898e32b} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + Source Files + + + + + Header Files + + + Header Files + + + Header Files + + + \ No newline at end of file diff --git a/Libraries/rocFFT/multi_gpu/multi_gpu_vs2019.sln b/Libraries/rocFFT/multi_gpu/multi_gpu_vs2019.sln new file mode 100644 index 00000000..7772b6b3 --- /dev/null +++ b/Libraries/rocFFT/multi_gpu/multi_gpu_vs2019.sln @@ -0,0 +1,24 @@ +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Version 16 +VisualStudioVersion = 16.0.32630.194 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "multi_gpu_vs2019", "multi_gpu_vs2019.vcxproj", "{A9CE29D8-8FCD-4250-ADA6-12237914A593}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|x64 = Debug|x64 + Release|x64 = Release|x64 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {A9CE29D8-8FCD-4250-ADA6-12237914A593}.Debug|x64.ActiveCfg = Debug|x64 + {A9CE29D8-8FCD-4250-ADA6-12237914A593}.Debug|x64.Build.0 = Debug|x64 + {A9CE29D8-8FCD-4250-ADA6-12237914A593}.Release|x64.ActiveCfg = Release|x64 + {A9CE29D8-8FCD-4250-ADA6-12237914A593}.Release|x64.Build.0 = Release|x64 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {C6824BBD-DEBF-4191-834D-4A50E0818CBF} + EndGlobalSection +EndGlobal diff --git a/Libraries/rocFFT/multi_gpu/multi_gpu_vs2019.vcxproj b/Libraries/rocFFT/multi_gpu/multi_gpu_vs2019.vcxproj new file mode 100644 index 00000000..073b33d6 --- /dev/null +++ b/Libraries/rocFFT/multi_gpu/multi_gpu_vs2019.vcxproj @@ -0,0 +1,121 @@ + + + + + Debug + x64 + + + Release + x64 + + + + 6.1 + 16.0 + {a9ce29d8-8fcd-4250-ada6-12237914a593} + Win32Proj + multi_gpu_vs2019 + $(LatestTargetPlatformVersion) + + + + + + + + + + + + PreserveNewest + + + PreserveNewest + + + PreserveNewest + + + PreserveNewest + + + + + Application + true + HIP clang 6.1 + Unicode + + + Application + false + HIP clang 6.1 + Unicode + + + + + + + + + + + + + + + + true + + + + true + rocfft_$(ProjectName) + + + false + rocfft_$(ProjectName) + + + + true + + + + + Level2 + __clang__;__HIP__;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(MSBuildProjectDirectory)\..\..\..\Common;%(AdditionalIncludeDirectories) + stdcpp17 + true + + + Console + true + rocfft.lib;hiprtc.lib;hiprtc-builtins.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + + + + + Level2 + true + __clang__;__HIP__;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(MSBuildProjectDirectory)\..\..\..\Common;%(AdditionalIncludeDirectories) + stdcpp17 + true + + + Console + true + true + rocfft.lib;hiprtc.lib;hiprtc-builtins.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + + + + + + + + diff --git a/Libraries/rocFFT/multi_gpu/multi_gpu_vs2019.vcxproj.filters b/Libraries/rocFFT/multi_gpu/multi_gpu_vs2019.vcxproj.filters new file mode 100644 index 00000000..1940d74a --- /dev/null +++ b/Libraries/rocFFT/multi_gpu/multi_gpu_vs2019.vcxproj.filters @@ -0,0 +1,33 @@ + + + + + {bb10866a-26f9-4e2c-9e6c-947d86c023b3} + cpp;c;cc;cxx;c++;def;odl;idl;hpj;bat;asm;asmx;hip;cu + + + {f427b295-07c2-45a2-b178-ac83bace6b89} + h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd;cuh + + + {af763c6d-5d07-4aaf-b26f-f5165b509b3c} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + Source Files + + + + + Header Files + + + Header Files + + + Header Files + + + \ No newline at end of file diff --git a/Libraries/rocFFT/multi_gpu/multi_gpu_vs2022.sln b/Libraries/rocFFT/multi_gpu/multi_gpu_vs2022.sln new file mode 100644 index 00000000..4cb129f6 --- /dev/null +++ b/Libraries/rocFFT/multi_gpu/multi_gpu_vs2022.sln @@ -0,0 +1,24 @@ +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Version 17 +VisualStudioVersion = 17.4.33213.308 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "multi_gpu_vs2022", "multi_gpu_vs2022.vcxproj", "{AEB1E9B9-2C24-46AA-A78B-A6F2531E14F4}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|x64 = Debug|x64 + Release|x64 = Release|x64 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {AEB1E9B9-2C24-46AA-A78B-A6F2531E14F4}.Debug|x64.ActiveCfg = Debug|x64 + {AEB1E9B9-2C24-46AA-A78B-A6F2531E14F4}.Debug|x64.Build.0 = Debug|x64 + {AEB1E9B9-2C24-46AA-A78B-A6F2531E14F4}.Release|x64.ActiveCfg = Release|x64 + {AEB1E9B9-2C24-46AA-A78B-A6F2531E14F4}.Release|x64.Build.0 = Release|x64 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {0A6395A7-2DCB-4CB7-9DBA-F5C02B1B80C4} + EndGlobalSection +EndGlobal diff --git a/Libraries/rocFFT/multi_gpu/multi_gpu_vs2022.vcxproj b/Libraries/rocFFT/multi_gpu/multi_gpu_vs2022.vcxproj new file mode 100644 index 00000000..4fa55c55 --- /dev/null +++ b/Libraries/rocFFT/multi_gpu/multi_gpu_vs2022.vcxproj @@ -0,0 +1,121 @@ + + + + + Debug + x64 + + + Release + x64 + + + + 6.1 + 17.0 + {aeb1e9b9-2c24-46aa-a78b-a6f2531e14f4} + Win32Proj + multi_gpu_vs2022 + $(LatestTargetPlatformVersion) + + + + + + + + + + + + PreserveNewest + + + PreserveNewest + + + PreserveNewest + + + PreserveNewest + + + + + Application + true + HIP clang 6.1 + Unicode + + + Application + false + HIP clang 6.1 + Unicode + + + + + + + + + + + + + + + + true + + + + true + rocfft_$(ProjectName) + + + false + rocfft_$(ProjectName) + + + + true + + + + + Level2 + __clang__;__HIP__;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(MSBuildProjectDirectory)\..\..\..\Common;%(AdditionalIncludeDirectories) + stdcpp17 + true + + + Console + true + rocfft.lib;hiprtc.lib;hiprtc-builtins.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + + + + + Level2 + true + __clang__;__HIP__;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(MSBuildProjectDirectory)\..\..\..\Common;%(AdditionalIncludeDirectories) + stdcpp17 + true + + + Console + true + true + rocfft.lib;hiprtc.lib;hiprtc-builtins.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + + + + + + + + diff --git a/Libraries/rocFFT/multi_gpu/multi_gpu_vs2022.vcxproj.filters b/Libraries/rocFFT/multi_gpu/multi_gpu_vs2022.vcxproj.filters new file mode 100644 index 00000000..e46410d1 --- /dev/null +++ b/Libraries/rocFFT/multi_gpu/multi_gpu_vs2022.vcxproj.filters @@ -0,0 +1,33 @@ + + + + + {fa5cb5fa-7cd8-4b17-8b5f-67d51f0b92d9} + cpp;c;cc;cxx;c++;def;odl;idl;hpj;bat;asm;asmx;hip;cu + + + {cf82bb03-81a5-41bb-a9ec-afd99c408840} + h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd;cuh + + + {50448845-5065-4981-9f7d-968e56354e44} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + Source Files + + + + + Header Files + + + Header Files + + + Header Files + + + \ No newline at end of file diff --git a/README.md b/README.md index 66c7a257..7a084709 100644 --- a/README.md +++ b/README.md @@ -78,9 +78,15 @@ A collection of examples to enable new users to start using ROCm. Advanced users - [level_3](https://github.com/ROCm/rocm-examples/tree/develop/Libraries/rocBLAS/level_3/): Operations between matrices and matrices. - [gemm](https://github.com/ROCm/rocm-examples/tree/develop/Libraries/rocBLAS/level_3/gemm/): Showcases the general matrix product operation. - [gemm_strided_batched](https://github.com/ROCm/rocm-examples/tree/develop/Libraries/rocBLAS/level_3/gemm_strided_batched/): Showcases the general matrix product operation with strided and batched matrices. + - [rocFFT](https://github.com/ROCm/rocm-examples/tree/develop/Libraries/rocFFT/) + - [callback](https://github.com/ROCm/rocm-examples/tree/develop/Libraries/rocFFT/callback/): Program that showcases the use of rocFFT `callback` functionality. + - [multi_gpu](https://github.com/ROCm/rocm-examples/tree/develop/Libraries/rocFFT/multi_gpu/): Program that showcases the use of rocFFT multi-GPU functionality. - [rocPRIM](https://github.com/ROCm/rocm-examples/tree/develop/Libraries/rocPRIM/) - [block_sum](https://github.com/ROCm/rocm-examples/tree/develop/Libraries/rocPRIM/block_sum/): Simple program that showcases `rocprim::block_reduce` with an addition operator. - [device_sum](https://github.com/ROCm/rocm-examples/tree/develop/Libraries/rocPRIM/device_sum/): Simple program that showcases `rocprim::reduce` with an addition operator. + - [hipFFT](https://github.com/ROCm/rocm-examples/tree/develop/Libraries/hipFFT/) + - [plan_d2z](https://github.com/ROCm/rocm-examples/tree/develop/Libraries/hipFFT/plan_d2z): Forward fast Fourier transform for 1D, 2D, and 3D real input using a simple plan in hipFFT. + - [plan_z2z](https://github.com/ROCm/rocm-examples/tree/develop/Libraries/hipFFT/plan_z2z): Forward fast Fourier transform for 1D, 2D, and 3D complex input using a simple plan in hipFFT. - [rocRAND](https://github.com/ROCm/rocm-examples/tree/develop/Libraries/rocRAND/) - [simple_distributions_cpp](https://github.com/ROCm/rocm-examples/tree/develop/Libraries/rocRAND/simple_distributions_cpp/): A command-line app to compare random number generation on the CPU and on the GPU with rocRAND. - [rocSOLVER](https://github.com/ROCm/rocm-examples/tree/develop/Libraries/rocSOLVER/) diff --git a/ROCm-Examples-VS2017.sln b/ROCm-Examples-VS2017.sln index d3525180..a7372425 100644 --- a/ROCm-Examples-VS2017.sln +++ b/ROCm-Examples-VS2017.sln @@ -238,6 +238,18 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "spitsv_vs2017", "Libraries\ EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "gpsv_vs2017", "Libraries\rocSPARSE\preconditioner\gpsv\gpsv_vs2017.vcxproj", "{FBD46E48-5689-44EA-817A-BBAA6EB006BD}" EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "hipFFT", "hipFFT", "{BA403F99-C412-457C-8DD9-EF064E53C359}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "plan_d2z_vs2017", "Libraries\hipFFT\plan_d2z\plan_d2z_vs2017.vcxproj", "{AF790582-9E56-4CAA-BBD0-9C9F5B99FDEE}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "plan_z2z_vs2017", "Libraries\hipFFT\plan_z2z\plan_z2z_vs2017.vcxproj", "{790D456B-B80A-479D-B5D2-145F4363F4F3}" +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "rocFFT", "rocFFT", "{E026A88D-1461-4FA5-80D0-4BF79D190720}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "callback_vs2017", "Libraries\rocFFT\callback\callback_vs2017.vcxproj", "{65A100E5-7ABE-4EC5-B625-767778DDF2B2}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "multi_gpu_vs2017", "Libraries\rocFFT\multi_gpu\multi_gpu_vs2017.vcxproj", "{5A9F936C-2A90-4B40-A798-3683A38CB7A3}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|x64 = Debug|x64 @@ -644,6 +656,22 @@ Global {FBD46E48-5689-44EA-817A-BBAA6EB006BD}.Debug|x64.Build.0 = Debug|x64 {FBD46E48-5689-44EA-817A-BBAA6EB006BD}.Release|x64.ActiveCfg = Release|x64 {FBD46E48-5689-44EA-817A-BBAA6EB006BD}.Release|x64.Build.0 = Release|x64 + {AF790582-9E56-4CAA-BBD0-9C9F5B99FDEE}.Debug|x64.ActiveCfg = Debug|x64 + {AF790582-9E56-4CAA-BBD0-9C9F5B99FDEE}.Debug|x64.Build.0 = Debug|x64 + {AF790582-9E56-4CAA-BBD0-9C9F5B99FDEE}.Release|x64.ActiveCfg = Release|x64 + {AF790582-9E56-4CAA-BBD0-9C9F5B99FDEE}.Release|x64.Build.0 = Release|x64 + {790D456B-B80A-479D-B5D2-145F4363F4F3}.Debug|x64.ActiveCfg = Debug|x64 + {790D456B-B80A-479D-B5D2-145F4363F4F3}.Debug|x64.Build.0 = Debug|x64 + {790D456B-B80A-479D-B5D2-145F4363F4F3}.Release|x64.ActiveCfg = Release|x64 + {790D456B-B80A-479D-B5D2-145F4363F4F3}.Release|x64.Build.0 = Release|x64 + {65A100E5-7ABE-4EC5-B625-767778DDF2B2}.Debug|x64.ActiveCfg = Debug|x64 + {65A100E5-7ABE-4EC5-B625-767778DDF2B2}.Debug|x64.Build.0 = Debug|x64 + {65A100E5-7ABE-4EC5-B625-767778DDF2B2}.Release|x64.ActiveCfg = Release|x64 + {65A100E5-7ABE-4EC5-B625-767778DDF2B2}.Release|x64.Build.0 = Release|x64 + {5A9F936C-2A90-4B40-A798-3683A38CB7A3}.Debug|x64.ActiveCfg = Debug|x64 + {5A9F936C-2A90-4B40-A798-3683A38CB7A3}.Debug|x64.Build.0 = Debug|x64 + {5A9F936C-2A90-4B40-A798-3683A38CB7A3}.Release|x64.ActiveCfg = Release|x64 + {5A9F936C-2A90-4B40-A798-3683A38CB7A3}.Release|x64.Build.0 = Release|x64 EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE @@ -764,6 +792,12 @@ Global {F0AF1DEB-4B07-4FDC-8566-FB53F60D10B7} = {4581A6EF-211D-4B00-A65E-C29F55CEE886} {EFD1A0EC-2699-443C-BC18-8A3ACFEFB807} = {4581A6EF-211D-4B00-A65E-C29F55CEE886} {FBD46E48-5689-44EA-817A-BBAA6EB006BD} = {2586BC68-9BEF-4AC4-9096-353D503EABA6} + {BA403F99-C412-457C-8DD9-EF064E53C359} = {7BFB14C7-DDB4-4583-9261-8450600CDE29} + {AF790582-9E56-4CAA-BBD0-9C9F5B99FDEE} = {BA403F99-C412-457C-8DD9-EF064E53C359} + {790D456B-B80A-479D-B5D2-145F4363F4F3} = {BA403F99-C412-457C-8DD9-EF064E53C359} + {E026A88D-1461-4FA5-80D0-4BF79D190720} = {7BFB14C7-DDB4-4583-9261-8450600CDE29} + {65A100E5-7ABE-4EC5-B625-767778DDF2B2} = {E026A88D-1461-4FA5-80D0-4BF79D190720} + {5A9F936C-2A90-4B40-A798-3683A38CB7A3} = {E026A88D-1461-4FA5-80D0-4BF79D190720} EndGlobalSection GlobalSection(ExtensibilityGlobals) = postSolution SolutionGuid = {5C96FD63-6F26-4E6F-B6D0-7FB9E1833081} diff --git a/ROCm-Examples-VS2019.sln b/ROCm-Examples-VS2019.sln index 6862e489..e86e3736 100644 --- a/ROCm-Examples-VS2019.sln +++ b/ROCm-Examples-VS2019.sln @@ -238,6 +238,18 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "spitsv_vs2019", "Libraries\ EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "gpsv_vs2019", "Libraries\rocSPARSE\preconditioner\gpsv\gpsv_vs2019.vcxproj", "{17E97A94-213D-413B-A2EB-0164CEEFDEFC}" EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "hipFFT", "hipFFT", "{432A18C5-7A31-4211-81F5-A8E014AD8C85}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "plan_d2z_vs2019", "Libraries\hipFFT\plan_d2z\plan_d2z_vs2019.vcxproj", "{401073F8-4631-442C-A62E-F90C704AFF1C}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "plan_z2z_vs2019", "Libraries\hipFFT\plan_z2z\plan_z2z_vs2019.vcxproj", "{2D984972-6F80-4EC6-ABCE-9169E45371A7}" +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "rocFFT", "rocFFT", "{8E73922C-E4AA-4075-A074-B0AFF626BAB6}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "callback_vs2019", "Libraries\rocFFT\callback\callback_vs2019.vcxproj", "{52BD229D-4300-4CB4-A241-21B5A4531F9F}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "multi_gpu_vs2019", "Libraries\rocFFT\multi_gpu\multi_gpu_vs2019.vcxproj", "{A9CE29D8-8FCD-4250-ADA6-12237914A593}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|x64 = Debug|x64 @@ -644,6 +656,22 @@ Global {17E97A94-213D-413B-A2EB-0164CEEFDEFC}.Debug|x64.Build.0 = Debug|x64 {17E97A94-213D-413B-A2EB-0164CEEFDEFC}.Release|x64.ActiveCfg = Release|x64 {17E97A94-213D-413B-A2EB-0164CEEFDEFC}.Release|x64.Build.0 = Release|x64 + {401073F8-4631-442C-A62E-F90C704AFF1C}.Debug|x64.ActiveCfg = Debug|x64 + {401073F8-4631-442C-A62E-F90C704AFF1C}.Debug|x64.Build.0 = Debug|x64 + {401073F8-4631-442C-A62E-F90C704AFF1C}.Release|x64.ActiveCfg = Release|x64 + {401073F8-4631-442C-A62E-F90C704AFF1C}.Release|x64.Build.0 = Release|x64 + {2D984972-6F80-4EC6-ABCE-9169E45371A7}.Debug|x64.ActiveCfg = Debug|x64 + {2D984972-6F80-4EC6-ABCE-9169E45371A7}.Debug|x64.Build.0 = Debug|x64 + {2D984972-6F80-4EC6-ABCE-9169E45371A7}.Release|x64.ActiveCfg = Release|x64 + {2D984972-6F80-4EC6-ABCE-9169E45371A7}.Release|x64.Build.0 = Release|x64 + {52BD229D-4300-4CB4-A241-21B5A4531F9F}.Debug|x64.ActiveCfg = Debug|x64 + {52BD229D-4300-4CB4-A241-21B5A4531F9F}.Debug|x64.Build.0 = Debug|x64 + {52BD229D-4300-4CB4-A241-21B5A4531F9F}.Release|x64.ActiveCfg = Release|x64 + {52BD229D-4300-4CB4-A241-21B5A4531F9F}.Release|x64.Build.0 = Release|x64 + {A9CE29D8-8FCD-4250-ADA6-12237914A593}.Debug|x64.ActiveCfg = Debug|x64 + {A9CE29D8-8FCD-4250-ADA6-12237914A593}.Debug|x64.Build.0 = Debug|x64 + {A9CE29D8-8FCD-4250-ADA6-12237914A593}.Release|x64.ActiveCfg = Release|x64 + {A9CE29D8-8FCD-4250-ADA6-12237914A593}.Release|x64.Build.0 = Release|x64 EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE @@ -764,6 +792,12 @@ Global {99A25D0A-93FE-47F2-8223-7313E53E7951} = {F0B0FD83-2B22-47F8-92B1-7A5ED88B8B5E} {E92723FC-411A-4656-9C0F-88D5D9F01EBD} = {F0B0FD83-2B22-47F8-92B1-7A5ED88B8B5E} {17E97A94-213D-413B-A2EB-0164CEEFDEFC} = {8B7AD0F4-4288-4ACF-9980-3C500A00EF31} + {432A18C5-7A31-4211-81F5-A8E014AD8C85} = {052412EF-7CEB-4E32-96F9-AADBC70945D7} + {401073F8-4631-442C-A62E-F90C704AFF1C} = {432A18C5-7A31-4211-81F5-A8E014AD8C85} + {2D984972-6F80-4EC6-ABCE-9169E45371A7} = {432A18C5-7A31-4211-81F5-A8E014AD8C85} + {8E73922C-E4AA-4075-A074-B0AFF626BAB6} = {052412EF-7CEB-4E32-96F9-AADBC70945D7} + {52BD229D-4300-4CB4-A241-21B5A4531F9F} = {8E73922C-E4AA-4075-A074-B0AFF626BAB6} + {A9CE29D8-8FCD-4250-ADA6-12237914A593} = {8E73922C-E4AA-4075-A074-B0AFF626BAB6} EndGlobalSection GlobalSection(ExtensibilityGlobals) = postSolution SolutionGuid = {90580497-38BF-428E-A951-6EC6CFC68193} diff --git a/ROCm-Examples-VS2022.sln b/ROCm-Examples-VS2022.sln index 0e77061a..f8e6d5c1 100644 --- a/ROCm-Examples-VS2022.sln +++ b/ROCm-Examples-VS2022.sln @@ -238,6 +238,18 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "spitsv_vs2022", "Libraries\ EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "gpsv_vs2022", "Libraries\rocSPARSE\preconditioner\gpsv\gpsv_vs2022.vcxproj", "{65DD89E3-AB8C-4EAE-B0AB-65FD1B120DC6}" EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "hipFFT", "hipFFT", "{25C8260E-C82B-40B5-A814-AAAEE15F136B}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "plan_d2z_vs2022", "Libraries\hipFFT\plan_d2z\plan_d2z_vs2022.vcxproj", "{F68640C9-872F-4ECA-8D29-54C4E83AD24E}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "plan_z2z_vs2022", "Libraries\hipFFT\plan_z2z\plan_z2z_vs2022.vcxproj", "{C64E34C7-D9C9-4D90-8137-DB06D7EEF979}" +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "rocFFT", "rocFFT", "{B719FEA3-73EB-4365-B552-D232766B40BD}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "callback_vs2022", "Libraries\rocFFT\callback\callback_vs2022.vcxproj", "{44A60ED3-BF12-4190-8242-442946300C3E}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "multi_gpu_vs2022", "Libraries\rocFFT\multi_gpu\multi_gpu_vs2022.vcxproj", "{AEB1E9B9-2C24-46AA-A78B-A6F2531E14F4}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|x64 = Debug|x64 @@ -644,6 +656,22 @@ Global {65DD89E3-AB8C-4EAE-B0AB-65FD1B120DC6}.Debug|x64.Build.0 = Debug|x64 {65DD89E3-AB8C-4EAE-B0AB-65FD1B120DC6}.Release|x64.ActiveCfg = Release|x64 {65DD89E3-AB8C-4EAE-B0AB-65FD1B120DC6}.Release|x64.Build.0 = Release|x64 + {F68640C9-872F-4ECA-8D29-54C4E83AD24E}.Debug|x64.ActiveCfg = Debug|x64 + {F68640C9-872F-4ECA-8D29-54C4E83AD24E}.Debug|x64.Build.0 = Debug|x64 + {F68640C9-872F-4ECA-8D29-54C4E83AD24E}.Release|x64.ActiveCfg = Release|x64 + {F68640C9-872F-4ECA-8D29-54C4E83AD24E}.Release|x64.Build.0 = Release|x64 + {C64E34C7-D9C9-4D90-8137-DB06D7EEF979}.Debug|x64.ActiveCfg = Debug|x64 + {C64E34C7-D9C9-4D90-8137-DB06D7EEF979}.Debug|x64.Build.0 = Debug|x64 + {C64E34C7-D9C9-4D90-8137-DB06D7EEF979}.Release|x64.ActiveCfg = Release|x64 + {C64E34C7-D9C9-4D90-8137-DB06D7EEF979}.Release|x64.Build.0 = Release|x64 + {44A60ED3-BF12-4190-8242-442946300C3E}.Debug|x64.ActiveCfg = Debug|x64 + {44A60ED3-BF12-4190-8242-442946300C3E}.Debug|x64.Build.0 = Debug|x64 + {44A60ED3-BF12-4190-8242-442946300C3E}.Release|x64.ActiveCfg = Release|x64 + {44A60ED3-BF12-4190-8242-442946300C3E}.Release|x64.Build.0 = Release|x64 + {AEB1E9B9-2C24-46AA-A78B-A6F2531E14F4}.Debug|x64.ActiveCfg = Debug|x64 + {AEB1E9B9-2C24-46AA-A78B-A6F2531E14F4}.Debug|x64.Build.0 = Debug|x64 + {AEB1E9B9-2C24-46AA-A78B-A6F2531E14F4}.Release|x64.ActiveCfg = Release|x64 + {AEB1E9B9-2C24-46AA-A78B-A6F2531E14F4}.Release|x64.Build.0 = Release|x64 EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE @@ -764,6 +792,12 @@ Global {DC1DF216-BC97-4797-8EA7-8DDCC38DFDCF} = {F91F4254-0ADD-4955-BDFE-53CB4EDBF601} {A987BF4A-988D-410A-B3EF-1140AEA10960} = {F91F4254-0ADD-4955-BDFE-53CB4EDBF601} {65DD89E3-AB8C-4EAE-B0AB-65FD1B120DC6} = {0AFB7E3F-4173-4F47-A068-17CAB93DA563} + {25C8260E-C82B-40B5-A814-AAAEE15F136B} = {7676633F-925E-4AEF-9F60-7A715A1EFBFE} + {F68640C9-872F-4ECA-8D29-54C4E83AD24E} = {25C8260E-C82B-40B5-A814-AAAEE15F136B} + {C64E34C7-D9C9-4D90-8137-DB06D7EEF979} = {25C8260E-C82B-40B5-A814-AAAEE15F136B} + {B719FEA3-73EB-4365-B552-D232766B40BD} = {7676633F-925E-4AEF-9F60-7A715A1EFBFE} + {44A60ED3-BF12-4190-8242-442946300C3E} = {B719FEA3-73EB-4365-B552-D232766B40BD} + {AEB1E9B9-2C24-46AA-A78B-A6F2531E14F4} = {B719FEA3-73EB-4365-B552-D232766B40BD} EndGlobalSection GlobalSection(ExtensibilityGlobals) = postSolution SolutionGuid = {D648FD37-D8CB-4EA5-8445-38BEF36F6736}