diff --git a/clang/test/Driver/linker-wrapper-image.c b/clang/test/Driver/linker-wrapper-image.c index a24b5e9e6022bd..373ba00a4db55a 100644 --- a/clang/test/Driver/linker-wrapper-image.c +++ b/clang/test/Driver/linker-wrapper-image.c @@ -30,8 +30,8 @@ // OPENMP: define internal void @.omp_offloading.descriptor_reg() section ".text.startup" { // OPENMP-NEXT: entry: -// OPENMP-NEXT: %0 = call i32 @atexit(ptr @.omp_offloading.descriptor_unreg) // OPENMP-NEXT: call void @__tgt_register_lib(ptr @.omp_offloading.descriptor) +// OPENMP-NEXT: %0 = call i32 @atexit(ptr @.omp_offloading.descriptor_unreg) // OPENMP-NEXT: ret void // OPENMP-NEXT: } diff --git a/llvm/lib/Frontend/Offloading/OffloadWrapper.cpp b/llvm/lib/Frontend/Offloading/OffloadWrapper.cpp index 7241d15ed1c670..8b6f9ea1f4cca3 100644 --- a/llvm/lib/Frontend/Offloading/OffloadWrapper.cpp +++ b/llvm/lib/Frontend/Offloading/OffloadWrapper.cpp @@ -232,12 +232,13 @@ void createRegisterFunction(Module &M, GlobalVariable *BinDesc, // Construct function body IRBuilder<> Builder(BasicBlock::Create(C, "entry", Func)); + Builder.CreateCall(RegFuncC, BinDesc); + // Register the destructors with 'atexit'. This is expected by the CUDA // runtime and ensures that we clean up before dynamic objects are destroyed. - // This needs to be done before the runtime is called and registers its own. + // This needs to be done after plugin initialization to ensure that it is + // called before the plugin runtime is destroyed. Builder.CreateCall(AtExit, UnregFunc); - - Builder.CreateCall(RegFuncC, BinDesc); Builder.CreateRetVoid(); // Add this function to constructors. diff --git a/offload/include/PluginManager.h b/offload/include/PluginManager.h index 6557623b0afabd..f76014ab37375b 100644 --- a/offload/include/PluginManager.h +++ b/offload/include/PluginManager.h @@ -13,10 +13,11 @@ #ifndef OMPTARGET_PLUGIN_MANAGER_H #define OMPTARGET_PLUGIN_MANAGER_H +#include "PluginInterface.h" + #include "DeviceImage.h" #include "ExclusiveAccess.h" #include "Shared/APITypes.h" -#include "Shared/PluginAPI.h" #include "Shared/Requirements.h" #include "device.h" @@ -34,38 +35,7 @@ #include #include -struct PluginManager; - -/// Plugin adaptors should be created via `PluginAdaptorTy::create` which will -/// invoke the constructor and call `PluginAdaptorTy::init`. Eventual errors are -/// reported back to the caller, otherwise a valid and initialized adaptor is -/// returned. -struct PluginAdaptorTy { - /// Try to create a plugin adaptor from a filename. - static llvm::Expected> - create(const std::string &Name); - - /// Name of the shared object file representing the plugin. - std::string Name; - - /// Access to the shared object file representing the plugin. - std::unique_ptr LibraryHandler; - -#define PLUGIN_API_HANDLE(NAME) \ - using NAME##_ty = decltype(__tgt_rtl_##NAME); \ - NAME##_ty *NAME = nullptr; - -#include "Shared/PluginAPI.inc" -#undef PLUGIN_API_HANDLE - - /// Create a plugin adaptor for filename \p Name with a dynamic library \p DL. - PluginAdaptorTy(const std::string &Name, - std::unique_ptr DL); - - /// Initialize the plugin adaptor, this can fail in which case the adaptor is - /// useless. - llvm::Error init(); -}; +using GenericPluginTy = llvm::omp::target::plugin::GenericPluginTy; /// Struct for the data required to handle plugins struct PluginManager { @@ -80,6 +50,8 @@ struct PluginManager { void init(); + void deinit(); + // Register a shared library with all (compatible) RTLs. void registerLib(__tgt_bin_desc *Desc); @@ -92,10 +64,9 @@ struct PluginManager { std::make_unique(TgtBinDesc, TgtDeviceImage)); } - /// Initialize as many devices as possible for this plugin adaptor. Devices - /// that fail to initialize are ignored. Returns the offset the devices were - /// registered at. - void initDevices(PluginAdaptorTy &RTL); + /// Initialize as many devices as possible for this plugin. Devices that fail + /// to initialize are ignored. + void initDevices(GenericPluginTy &RTL); /// Return the device presented to the user as device \p DeviceNo if it is /// initialized and ready. Otherwise return an error explaining the problem. @@ -151,8 +122,8 @@ struct PluginManager { // Initialize all plugins. void initAllPlugins(); - /// Iterator range for all plugin adaptors (in use or not, but always valid). - auto pluginAdaptors() { return llvm::make_pointee_range(PluginAdaptors); } + /// Iterator range for all plugins (in use or not, but always valid). + auto plugins() { return llvm::make_pointee_range(Plugins); } /// Return the user provided requirements. int64_t getRequirements() const { return Requirements.getRequirements(); } @@ -164,14 +135,14 @@ struct PluginManager { bool RTLsLoaded = false; llvm::SmallVector<__tgt_bin_desc *> DelayedBinDesc; - // List of all plugin adaptors, in use or not. - llvm::SmallVector> PluginAdaptors; + // List of all plugins, in use or not. + llvm::SmallVector> Plugins; - // Mapping of plugin adaptors to offsets in the device table. - llvm::DenseMap DeviceOffsets; + // Mapping of plugins to offsets in the device table. + llvm::DenseMap DeviceOffsets; - // Mapping of plugin adaptors to the number of used devices. - llvm::DenseMap DeviceUsed; + // Mapping of plugins to the number of used devices. + llvm::DenseMap DeviceUsed; // Set of all device images currently in use. llvm::DenseSet UsedImages; diff --git a/offload/include/device.h b/offload/include/device.h index 3d284ebfaecb5b..62a7219c880113 100644 --- a/offload/include/device.h +++ b/offload/include/device.h @@ -33,14 +33,17 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" +#include "PluginInterface.h" + +using GenericPluginTy = llvm::omp::target::plugin::GenericPluginTy; + // Forward declarations. -struct PluginAdaptorTy; struct __tgt_bin_desc; struct __tgt_target_table; struct DeviceTy { int32_t DeviceID; - PluginAdaptorTy *RTL; + GenericPluginTy *RTL; int32_t RTLDeviceID; /// The physical number of processors that may concurrently execute a team /// For cuda, this is number of SMs, for amdgcn, this is number of CUs. @@ -51,7 +54,7 @@ struct DeviceTy { /// Controlled via environment flag OMPX_FORCE_SYNC_REGIONS bool ForceSynchronousTargetRegions = false; - DeviceTy(PluginAdaptorTy *RTL, int32_t DeviceID, int32_t RTLDeviceID); + DeviceTy(GenericPluginTy *RTL, int32_t DeviceID, int32_t RTLDeviceID); // DeviceTy is not copyable DeviceTy(const DeviceTy &D) = delete; DeviceTy &operator=(const DeviceTy &D) = delete; diff --git a/offload/plugins-nextgen/CMakeLists.txt b/offload/plugins-nextgen/CMakeLists.txt index 629711fc69335d..959719b01b8590 100644 --- a/offload/plugins-nextgen/CMakeLists.txt +++ b/offload/plugins-nextgen/CMakeLists.txt @@ -14,7 +14,7 @@ set(common_dir ${CMAKE_CURRENT_SOURCE_DIR}/common) add_subdirectory(common) function(add_target_library target_name lib_name) - add_llvm_library(${target_name} SHARED + add_llvm_library(${target_name} STATIC LINK_COMPONENTS ${LLVM_TARGETS_TO_BUILD} AggressiveInstCombine @@ -45,27 +45,17 @@ function(add_target_library target_name lib_name) ) llvm_update_compile_flags(${target_name}) + target_include_directories(${target_name} PUBLIC ${common_dir}/include) + if(OMPT_TARGET_DEFAULT AND LIBOMPTARGET_OMPT_SUPPORT) + target_include_directories(${target_name} PUBLIC ${common_dir}/OMPT) + endif() target_link_libraries(${target_name} PRIVATE PluginCommon ${llvm_libs} ${OPENMP_PTHREAD_LIB}) target_compile_definitions(${target_name} PRIVATE TARGET_NAME=${lib_name}) target_compile_definitions(${target_name} PRIVATE DEBUG_PREFIX="TARGET ${lib_name} RTL") - - if(CMAKE_SYSTEM_NAME MATCHES "FreeBSD") - # On FreeBSD, the 'environ' symbol is undefined at link time, but resolved by - # the dynamic linker at runtime. Therefore, allow the symbol to be undefined - # when creating a shared library. - target_link_libraries(${target_name} PRIVATE "-Wl,--allow-shlib-undefined") - else() - target_link_libraries(${target_name} PRIVATE "-Wl,-z,defs") - endif() - - if(LIBOMP_HAVE_VERSION_SCRIPT_FLAG) - target_link_libraries(${target_name} PRIVATE - "-Wl,--version-script=${common_dir}/../exports") - endif() - set_target_properties(${target_name} PROPERTIES CXX_VISIBILITY_PRESET protected) + set_target_properties(${target_name} PROPERTIES POSITION_INDEPENDENT_CODE ON) endfunction() foreach(plugin IN LISTS LIBOMPTARGET_PLUGINS_TO_BUILD) diff --git a/offload/plugins-nextgen/amdgpu/CMakeLists.txt b/offload/plugins-nextgen/amdgpu/CMakeLists.txt index 3b5f3a3ee6a86d..02da319ab1ee40 100644 --- a/offload/plugins-nextgen/amdgpu/CMakeLists.txt +++ b/offload/plugins-nextgen/amdgpu/CMakeLists.txt @@ -76,14 +76,3 @@ else() libomptarget_say("Not generating AMDGPU tests, no supported devices detected." " Use 'LIBOMPTARGET_FORCE_AMDGPU_TESTS' to override.") endif() - -# Install plugin under the lib destination folder. -install(TARGETS omptarget.rtl.amdgpu LIBRARY DESTINATION "${OFFLOAD_INSTALL_LIBDIR}") -if(NOT DEFINED CMAKE_INSTALL_RPATH) - set_target_properties(omptarget.rtl.amdgpu PROPERTIES - INSTALL_RPATH "$ORIGIN") -endif() - -set_target_properties(omptarget.rtl.amdgpu PROPERTIES - BUILD_RPATH "$ORIGIN:${CMAKE_CURRENT_BINARY_DIR}/.." - CXX_VISIBILITY_PRESET protected) diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp index e4b7b92a4f91a7..f47ca091a94713 100644 --- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp +++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp @@ -688,7 +688,7 @@ struct AMDGPUKernelTy : public GenericKernelTy { WGSizeName += "_wg_size"; GlobalTy HostConstWGSize(WGSizeName, sizeof(decltype(ConstWGSize)), &ConstWGSize); - GenericGlobalHandlerTy &GHandler = PluginTy::get().getGlobalHandler(); + GenericGlobalHandlerTy &GHandler = Device.Plugin.getGlobalHandler(); if (auto Err = GHandler.readGlobalFromImage(Device, AMDImage, HostConstWGSize)) { // In case it is not found, we simply stick with the defaults. @@ -2911,7 +2911,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy { if (!AMDGPUKernel) return Plugin::error("Failed to allocate memory for AMDGPU kernel"); - new (AMDGPUKernel) AMDGPUKernelTy(Name, PluginTy::get().getGlobalHandler()); + new (AMDGPUKernel) AMDGPUKernelTy(Name, Plugin.getGlobalHandler()); return *AMDGPUKernel; } @@ -4274,10 +4274,6 @@ struct AMDGPUPluginTy final : public GenericPluginTy { UInt32Envar KernTrace("LIBOMPTARGET_KERNEL_TRACE", 0); llvm::omp::target::plugin::PrintKernelTrace = KernTrace.get(); -#ifdef OMPT_SUPPORT - ompt::connectLibrary(); -#endif - // Register event handler to detect memory errors on the devices. Status = hsa_amd_register_system_event_handler(eventHandler, nullptr); if (auto Err = Plugin::check( @@ -4366,6 +4362,8 @@ struct AMDGPUPluginTy final : public GenericPluginTy { Triple::ArchType getTripleArch() const override { return Triple::amdgcn; } + const char *getName() const override { return GETNAME(TARGET_NAME); } + /// Get the ELF code for recognizing the compatible image binary. uint16_t getMagicElfBits() const override { return ELF::EM_AMDGPU; } @@ -4685,8 +4683,6 @@ Error AMDGPUKernelTy::printLaunchInfoDetails(GenericDeviceTy &GenericDevice, return Plugin::success(); } -GenericPluginTy *PluginTy::createPlugin() { return new AMDGPUPluginTy(); } - template static Error Plugin::check(int32_t Code, const char *ErrFmt, ArgsTy... Args) { hsa_status_t ResultCode = static_cast(Code); @@ -4779,17 +4775,22 @@ void *AMDGPUDeviceTy::allocate(size_t Size, void *, TargetAllocTy Kind) { namespace llvm::omp::target::plugin { /// Enable/disable kernel profiling for the given device. -void setOmptQueueProfile(int DeviceId, int Enable) { - AMDGPUPluginTy &Plugin = PluginTy::get(); - static_cast(Plugin.getDevice(DeviceId)) - .setOmptQueueProfile(Enable); +void setOmptQueueProfile(void *Device, int Enable) { + reinterpret_cast(Device) + ->setOmptQueueProfile(Enable); } } // namespace llvm::omp::target::plugin /// Enable/disable kernel profiling for the given device. -void setGlobalOmptKernelProfile(int DeviceId, int Enable) { - llvm::omp::target::plugin::setOmptQueueProfile(DeviceId, Enable); +void setGlobalOmptKernelProfile(void *Device, int Enable) { + llvm::omp::target::plugin::setOmptQueueProfile(Device, Enable); } #endif + +extern "C" { +llvm::omp::target::plugin::GenericPluginTy *createPlugin_amdgpu() { + return new llvm::omp::target::plugin::AMDGPUPluginTy(); +} +} diff --git a/offload/plugins-nextgen/common/CMakeLists.txt b/offload/plugins-nextgen/common/CMakeLists.txt index 820aae5415d53d..7a90ae3c6cc092 100644 --- a/offload/plugins-nextgen/common/CMakeLists.txt +++ b/offload/plugins-nextgen/common/CMakeLists.txt @@ -66,6 +66,4 @@ target_include_directories(PluginCommon PUBLIC ${LIBOMPTARGET_INCLUDE_DIR} ) -set_target_properties(PluginCommon PROPERTIES - POSITION_INDEPENDENT_CODE ON - CXX_VISIBILITY_PRESET protected) +set_target_properties(PluginCommon PROPERTIES POSITION_INDEPENDENT_CODE ON) diff --git a/offload/plugins-nextgen/common/OMPT/OmptDeviceTracing.h b/offload/plugins-nextgen/common/OMPT/OmptDeviceTracing.h index a57400ed047490..e9cfe8359df124 100644 --- a/offload/plugins-nextgen/common/OMPT/OmptDeviceTracing.h +++ b/offload/plugins-nextgen/common/OMPT/OmptDeviceTracing.h @@ -28,7 +28,7 @@ #define DEBUG_PREFIX "OMPT" extern void setOmptAsyncCopyProfile(bool Enable); -extern void setGlobalOmptKernelProfile(int DeviceId, int Enable); +extern void setGlobalOmptKernelProfile(void *Device, int Enable); extern uint64_t getSystemTimestampInNs(); namespace llvm { diff --git a/offload/plugins-nextgen/common/OMPT/OmptTracing.cpp b/offload/plugins-nextgen/common/OMPT/OmptTracing.cpp index cd33b443975018..6cd0f12fb08cfa 100644 --- a/offload/plugins-nextgen/common/OMPT/OmptTracing.cpp +++ b/offload/plugins-nextgen/common/OMPT/OmptTracing.cpp @@ -53,14 +53,6 @@ double llvm::omp::target::ompt::HostToDeviceOffset = .0; std::map llvm::omp::target::ompt::Devices; -std::atomic llvm::omp::target::ompt::TracingTypesEnabled{0}; - -bool llvm::omp::target::ompt::TracingActive = false; - -void llvm::omp::target::ompt::setTracingState(bool State) { - TracingActive = State; -} - std::shared_ptr llvm::omp::target::ompt::getParentLibrary() { static bool ParentLibraryAssigned = false; @@ -141,7 +133,7 @@ ompt_start_trace(ompt_device_t *Device, ompt_callback_buffer_request_t Request, setOmptAsyncCopyProfile(/*Enable=*/true); // Enable queue dispatch profiling if (DeviceId >= 0) - setGlobalOmptKernelProfile(DeviceId, /*Enable=*/1); + setGlobalOmptKernelProfile(Device, /*Enable=*/1); else REPORT("May not enable kernel profiling for invalid device id=%d\n", DeviceId); @@ -179,7 +171,7 @@ OMPT_API_ROUTINE int ompt_stop_trace(ompt_device_t *Device) { // Disable queue dispatch profiling int DeviceId = getDeviceId(Device); if (DeviceId >= 0) - setGlobalOmptKernelProfile(DeviceId, /*Enable=*/0); + setGlobalOmptKernelProfile(Device, /*Enable=*/0); else REPORT("May not disable kernel profiling for invalid device id=%d\n", DeviceId); diff --git a/offload/plugins-nextgen/common/include/PluginInterface.h b/offload/plugins-nextgen/common/include/PluginInterface.h index 2664c17b5c7bf1..2dd7c68fdd2a6f 100644 --- a/offload/plugins-nextgen/common/include/PluginInterface.h +++ b/offload/plugins-nextgen/common/include/PluginInterface.h @@ -1122,6 +1122,9 @@ struct GenericPluginTy { /// Get the target triple of this plugin. virtual Triple::ArchType getTripleArch() const = 0; + /// Get the constant name identifier for this plugin. + virtual const char *getName() const = 0; + /// Allocate a structure using the internal allocator. template Ty *allocate() { return reinterpret_cast(Allocator.Allocate(sizeof(Ty), alignof(Ty))); @@ -1391,7 +1394,7 @@ namespace Plugin { /// Create a success error. This is the same as calling Error::success(), but /// it is recommended to use this one for consistency with Plugin::error() and /// Plugin::check(). -static Error success() { return Error::success(); } +static inline Error success() { return Error::success(); } /// Create a string error. template @@ -1411,95 +1414,6 @@ template static Error check(int32_t ErrorCode, const char *ErrFmt, ArgsTy... Args); } // namespace Plugin -/// Class for simplifying the getter operation of the plugin. Anywhere on the -/// code, the current plugin can be retrieved by Plugin::get(). The class also -/// declares functions to create plugin-specific object instances. The check(), -/// createPlugin(), createDevice() and createGlobalHandler() functions should be -/// defined by each plugin implementation. -class PluginTy { - // Reference to the plugin instance. - static GenericPluginTy *SpecificPlugin; - - PluginTy() { - if (auto Err = init()) - REPORT("Failed to initialize plugin: %s\n", - toString(std::move(Err)).data()); - } - - ~PluginTy() { - if (auto Err = deinit()) - REPORT("Failed to deinitialize plugin: %s\n", - toString(std::move(Err)).data()); - } - - PluginTy(const PluginTy &) = delete; - void operator=(const PluginTy &) = delete; - - /// Create and intialize the plugin instance. - static Error init() { - assert(!SpecificPlugin && "Plugin already created"); - - // Create the specific plugin. - SpecificPlugin = createPlugin(); - assert(SpecificPlugin && "Plugin was not created"); - - // Initialize the plugin. - return SpecificPlugin->init(); - } - - // Deinitialize and destroy the plugin instance. - static Error deinit() { - assert(SpecificPlugin && "Plugin no longer valid"); - - for (int32_t DevNo = 0, NumDev = SpecificPlugin->getNumDevices(); - DevNo < NumDev; ++DevNo) - if (auto Err = SpecificPlugin->deinitDevice(DevNo)) - return Err; - - // Deinitialize the plugin. - if (auto Err = SpecificPlugin->deinit()) - return Err; - - // Delete the plugin instance. - delete SpecificPlugin; - - // Invalidate the plugin reference. - SpecificPlugin = nullptr; - - return Plugin::success(); - } - -public: - /// Initialize the plugin if needed. The plugin could have been initialized by - /// a previous call to Plugin::get(). - static Error initIfNeeded() { - // Trigger the initialization if needed. - get(); - - return Error::success(); - } - - /// Get a reference (or create if it was not created) to the plugin instance. - static GenericPluginTy &get() { - // This static variable will initialize the underlying plugin instance in - // case there was no previous explicit initialization. The initialization is - // thread safe. - static PluginTy Plugin; - - assert(SpecificPlugin && "Plugin is not active"); - return *SpecificPlugin; - } - - /// Get a reference to the plugin with a specific plugin-specific type. - template static Ty &get() { return static_cast(get()); } - - /// Indicate whether the plugin is active. - static bool isActive() { return SpecificPlugin != nullptr; } - - /// Create a plugin instance. - static GenericPluginTy *createPlugin(); -}; - /// Auxiliary interface class for GenericDeviceResourceManagerTy. This class /// acts as a reference to a device resource, such as a stream, and requires /// some basic functions to be implemented. The derived class should define an diff --git a/offload/plugins-nextgen/common/include/Utils/ELF.h b/offload/plugins-nextgen/common/include/Utils/ELF.h index f87e0a5ed02b4c..dcfdb5bd7b0352 100644 --- a/offload/plugins-nextgen/common/include/Utils/ELF.h +++ b/offload/plugins-nextgen/common/include/Utils/ELF.h @@ -13,8 +13,6 @@ #ifndef LLVM_OPENMP_LIBOMPTARGET_PLUGINS_ELF_UTILS_H #define LLVM_OPENMP_LIBOMPTARGET_PLUGINS_ELF_UTILS_H -#include "Shared/PluginAPI.h" - #include "llvm/Object/ELF.h" #include "llvm/Object/ELFObjectFile.h" diff --git a/offload/plugins-nextgen/common/src/JIT.cpp b/offload/plugins-nextgen/common/src/JIT.cpp index 9eb610cab4de66..9d58e6060646ba 100644 --- a/offload/plugins-nextgen/common/src/JIT.cpp +++ b/offload/plugins-nextgen/common/src/JIT.cpp @@ -56,28 +56,6 @@ bool isImageBitcode(const __tgt_device_image &Image) { return identify_magic(Binary) == file_magic::bitcode; } -std::once_flag InitFlag; - -void init(Triple TT) { - codegen::RegisterCodeGenFlags(); -#ifdef LIBOMPTARGET_JIT_NVPTX - if (TT.isNVPTX()) { - LLVMInitializeNVPTXTargetInfo(); - LLVMInitializeNVPTXTarget(); - LLVMInitializeNVPTXTargetMC(); - LLVMInitializeNVPTXAsmPrinter(); - } -#endif -#ifdef LIBOMPTARGET_JIT_AMDGPU - if (TT.isAMDGPU()) { - LLVMInitializeAMDGPUTargetInfo(); - LLVMInitializeAMDGPUTarget(); - LLVMInitializeAMDGPUTargetMC(); - LLVMInitializeAMDGPUAsmPrinter(); - } -#endif -} - Expected> createModuleFromMemoryBuffer(std::unique_ptr &MB, LLVMContext &Context) { @@ -148,7 +126,23 @@ createTargetMachine(Module &M, std::string CPU, unsigned OptLevel) { } // namespace JITEngine::JITEngine(Triple::ArchType TA) : TT(Triple::getArchTypeName(TA)) { - std::call_once(InitFlag, init, TT); + codegen::RegisterCodeGenFlags(); +#ifdef LIBOMPTARGET_JIT_NVPTX + if (TT.isNVPTX()) { + LLVMInitializeNVPTXTargetInfo(); + LLVMInitializeNVPTXTarget(); + LLVMInitializeNVPTXTargetMC(); + LLVMInitializeNVPTXAsmPrinter(); + } +#endif +#ifdef LIBOMPTARGET_JIT_AMDGPU + if (TT.isAMDGPU()) { + LLVMInitializeAMDGPUTargetInfo(); + LLVMInitializeAMDGPUTarget(); + LLVMInitializeAMDGPUTargetMC(); + LLVMInitializeAMDGPUAsmPrinter(); + } +#endif } void JITEngine::opt(TargetMachine *TM, TargetLibraryInfoImpl *TLII, Module &M, diff --git a/offload/plugins-nextgen/common/src/PluginInterface.cpp b/offload/plugins-nextgen/common/src/PluginInterface.cpp index cafc0084f4c00f..47bf292796237d 100644 --- a/offload/plugins-nextgen/common/src/PluginInterface.cpp +++ b/offload/plugins-nextgen/common/src/PluginInterface.cpp @@ -13,7 +13,6 @@ #include "Shared/APITypes.h" #include "Shared/Debug.h" #include "Shared/Environment.h" -#include "Shared/PluginAPI.h" #include "GlobalHandler.h" #include "JIT.h" @@ -64,8 +63,6 @@ struct OmptTimestampRAII { }; #endif -GenericPluginTy *PluginTy::SpecificPlugin = nullptr; - namespace llvm::omp::target::plugin { // Used for kernel tracing implementation int PrintKernelTrace = 0; @@ -1694,37 +1691,39 @@ Expected GenericPluginTy::checkELFImage(StringRef Image) const { } int32_t GenericPluginTy::is_valid_binary(__tgt_device_image *Image) { - if (!PluginTy::isActive()) - return false; - - StringRef Buffer(reinterpret_cast(Image->ImageStart), - target::getPtrDiff(Image->ImageEnd, Image->ImageStart)); - - auto HandleError = [&](Error Err) -> bool { - [[maybe_unused]] std::string ErrStr = toString(std::move(Err)); - DP("Failure to check validity of image %p: %s", Image, ErrStr.c_str()); - return false; - }; - switch (identify_magic(Buffer)) { - case file_magic::elf: - case file_magic::elf_relocatable: - case file_magic::elf_executable: - case file_magic::elf_shared_object: - case file_magic::elf_core: { - auto MatchOrErr = checkELFImage(Buffer); - if (Error Err = MatchOrErr.takeError()) - return HandleError(std::move(Err)); - return *MatchOrErr; - } - case file_magic::bitcode: { - auto MatchOrErr = getJIT().checkBitcodeImage(Buffer); - if (Error Err = MatchOrErr.takeError()) - return HandleError(std::move(Err)); - return *MatchOrErr; - } - default: - return false; - } + auto T = logger::log(__func__, Image); + int32_t R = [&]() { + StringRef Buffer(reinterpret_cast(Image->ImageStart), + target::getPtrDiff(Image->ImageEnd, Image->ImageStart)); + + auto HandleError = [&](Error Err) -> bool { + [[maybe_unused]] std::string ErrStr = toString(std::move(Err)); + DP("Failure to check validity of image %p: %s", Image, ErrStr.c_str()); + return false; + }; + switch (identify_magic(Buffer)) { + case file_magic::elf: + case file_magic::elf_relocatable: + case file_magic::elf_executable: + case file_magic::elf_shared_object: + case file_magic::elf_core: { + auto MatchOrErr = checkELFImage(Buffer); + if (Error Err = MatchOrErr.takeError()) + return HandleError(std::move(Err)); + return *MatchOrErr; + } + case file_magic::bitcode: { + auto MatchOrErr = getJIT().checkBitcodeImage(Buffer); + if (Error Err = MatchOrErr.takeError()) + return HandleError(std::move(Err)); + return *MatchOrErr; + } + default: + return false; + } + }(); + T.res(R); + return R; } void GenericPluginTy::check_invalid_image(__tgt_device_image *InvalidImage) { @@ -1737,50 +1736,86 @@ int32_t GenericPluginTy::supports_empty_images() { } int32_t GenericPluginTy::init_device(int32_t DeviceId) { - auto Err = initDevice(DeviceId); - if (Err) { - REPORT("Failure to initialize device %d: %s\n", DeviceId, - toString(std::move(Err)).data()); - return OFFLOAD_FAIL; - } + auto T = logger::log(__func__, DeviceId); + auto R = [&]() { + auto Err = initDevice(DeviceId); + if (Err) { + REPORT("Failure to initialize device %d: %s\n", DeviceId, + toString(std::move(Err)).data()); + return OFFLOAD_FAIL; + } - return OFFLOAD_SUCCESS; + return OFFLOAD_SUCCESS; + }(); + T.res(R); + return R; } -int32_t GenericPluginTy::number_of_devices() { return getNumDevices(); } +int32_t GenericPluginTy::number_of_devices() { + auto T = logger::log(__func__); + auto R = [&]() { return getNumDevices(); }(); + T.res(R); + return R; +} int GenericPluginTy::number_of_team_procs(int DeviceId) { - return getDevice(DeviceId).getNumComputeUnits(); + auto T = logger::log(__func__, DeviceId); + auto R = [&]() { return getDevice(DeviceId).getNumComputeUnits(); }(); + T.res(R); + return R; } bool GenericPluginTy::has_apu_device(int32_t DeviceId) { - return getDevice(DeviceId).hasAPUDevice(); + auto T = logger::log(__func__, DeviceId); + auto R = [&]() { return getDevice(DeviceId).hasAPUDevice(); }(); + T.res(R); + return R; } bool GenericPluginTy::has_USM_capable_dGPU(int32_t DeviceId) { - return getDevice(DeviceId).hasDGpuWithUsmSupport(); + auto T = logger::log(__func__, DeviceId); + auto R = [&]() { return getDevice(DeviceId).hasDGpuWithUsmSupport(); }(); + T.res(R); + return R; } bool GenericPluginTy::supports_unified_memory(int32_t DeviceId) { - return getDevice(DeviceId).supportsUnifiedMemory(); + auto T = logger::log(__func__, DeviceId); + auto R = [&]() { return getDevice(DeviceId).supportsUnifiedMemory(); }(); + T.res(R); + return R; } bool GenericPluginTy::is_fine_grained_memory_enabled(int32_t DeviceId) { - return getDevice(DeviceId).IsFineGrainedMemoryEnabled(); + auto T = logger::log(__func__, DeviceId); + auto R = [&]() { return getDevice(DeviceId).IsFineGrainedMemoryEnabled(); }(); + T.res(R); + return R; } bool GenericPluginTy::is_system_supporting_managed_memory(int32_t DeviceId) { - return IsSystemSupportingManagedMemory(); + auto T = logger::log(__func__, DeviceId); + auto R = [&]() { return IsSystemSupportingManagedMemory(); }(); + T.res(R); + return R; } int64_t GenericPluginTy::init_requires(int64_t RequiresFlags) { - setRequiresFlag(RequiresFlags); - return OFFLOAD_SUCCESS; + auto T = logger::log(__func__, RequiresFlags); + auto R = [&]() { + setRequiresFlag(RequiresFlags); + return OFFLOAD_SUCCESS; + }(); + T.res(R); + return R; } int32_t GenericPluginTy::is_data_exchangable(int32_t SrcDeviceId, int32_t DstDeviceId) { - return isDataExchangable(SrcDeviceId, DstDeviceId); + auto T = logger::log(__func__, SrcDeviceId, DstDeviceId); + auto R = [&]() { return isDataExchangable(SrcDeviceId, DstDeviceId); }(); + T.res(R); + return R; } int32_t GenericPluginTy::initialize_record_replay(int32_t DeviceId, @@ -1788,254 +1823,353 @@ int32_t GenericPluginTy::initialize_record_replay(int32_t DeviceId, void *VAddr, bool isRecord, bool SaveOutput, uint64_t &ReqPtrArgOffset) { - GenericDeviceTy &Device = getDevice(DeviceId); - RecordReplayTy::RRStatusTy Status = - isRecord ? RecordReplayTy::RRStatusTy::RRRecording - : RecordReplayTy::RRStatusTy::RRReplaying; - - if (auto Err = RecordReplay.init(&Device, MemorySize, VAddr, Status, - SaveOutput, ReqPtrArgOffset)) { - REPORT("WARNING RR did not intialize RR-properly with %lu bytes" - "(Error: %s)\n", - MemorySize, toString(std::move(Err)).data()); - RecordReplay.setStatus(RecordReplayTy::RRStatusTy::RRDeactivated); - - if (!isRecord) { - return OFFLOAD_FAIL; + auto T = logger::log(__func__, DeviceId, MemorySize, VAddr, isRecord, + SaveOutput, &ReqPtrArgOffset); + auto R = [&]() { + GenericDeviceTy &Device = getDevice(DeviceId); + RecordReplayTy::RRStatusTy Status = + isRecord ? RecordReplayTy::RRStatusTy::RRRecording + : RecordReplayTy::RRStatusTy::RRReplaying; + + if (auto Err = RecordReplay.init(&Device, MemorySize, VAddr, Status, + SaveOutput, ReqPtrArgOffset)) { + REPORT("WARNING RR did not intialize RR-properly with %lu bytes" + "(Error: %s)\n", + MemorySize, toString(std::move(Err)).data()); + RecordReplay.setStatus(RecordReplayTy::RRStatusTy::RRDeactivated); + + if (!isRecord) { + return OFFLOAD_FAIL; + } } - } - return OFFLOAD_SUCCESS; + return OFFLOAD_SUCCESS; + }(); + T.res(R); + return R; } int32_t GenericPluginTy::load_binary(int32_t DeviceId, __tgt_device_image *TgtImage, __tgt_device_binary *Binary) { - GenericDeviceTy &Device = getDevice(DeviceId); - - auto ImageOrErr = Device.loadBinary(*this, TgtImage); - if (!ImageOrErr) { - auto Err = ImageOrErr.takeError(); - REPORT("Failure to load binary image %p on device %d: %s\n", TgtImage, - DeviceId, toString(std::move(Err)).data()); - return OFFLOAD_FAIL; - } + auto T = logger::log(__func__, DeviceId, TgtImage, Binary); + auto R = [&]() { + GenericDeviceTy &Device = getDevice(DeviceId); + + auto ImageOrErr = Device.loadBinary(*this, TgtImage); + if (!ImageOrErr) { + auto Err = ImageOrErr.takeError(); + REPORT("Failure to load binary image %p on device %d: %s\n", TgtImage, + DeviceId, toString(std::move(Err)).data()); + return OFFLOAD_FAIL; + } - DeviceImageTy *Image = *ImageOrErr; - assert(Image != nullptr && "Invalid Image"); + DeviceImageTy *Image = *ImageOrErr; + assert(Image != nullptr && "Invalid Image"); - *Binary = __tgt_device_binary{reinterpret_cast(Image)}; + *Binary = __tgt_device_binary{reinterpret_cast(Image)}; - return OFFLOAD_SUCCESS; + return OFFLOAD_SUCCESS; + }(); + T.res(R); + return R; } void *GenericPluginTy::data_alloc(int32_t DeviceId, int64_t Size, void *HostPtr, int32_t Kind) { + auto T = logger::log(__func__, DeviceId, Size, HostPtr, Kind); + auto R = [&]() -> void * { #ifdef OMPT_SUPPORT - // If OMPT is enabled, collect start and end times for the allocation. - OmptTimestampRAII Ts; + // If OMPT is enabled, collect start and end times for the allocation. + OmptTimestampRAII Ts; #endif - auto AllocOrErr = - getDevice(DeviceId).dataAlloc(Size, HostPtr, (TargetAllocTy)Kind); - if (!AllocOrErr) { - auto Err = AllocOrErr.takeError(); - REPORT("Failure to allocate device memory: %s\n", - toString(std::move(Err)).data()); - return nullptr; - } - assert(*AllocOrErr && "Null pointer upon successful allocation"); + auto AllocOrErr = + getDevice(DeviceId).dataAlloc(Size, HostPtr, (TargetAllocTy)Kind); + if (!AllocOrErr) { + auto Err = AllocOrErr.takeError(); + REPORT("Failure to allocate device memory: %s\n", + toString(std::move(Err)).data()); + return nullptr; + } + assert(*AllocOrErr && "Null pointer upon successful allocation"); - // Method has no effect when the CUDA Plugin is used. - // This method can only be called if HostPtr is not null. - if (HostPtr && Kind == TARGET_ALLOC_SHARED) - __tgt_rtl_set_coarse_grain_mem_region(DeviceId, HostPtr, Size); + // Method has no effect when the CUDA Plugin is used. + // This method can only be called if HostPtr is not null. + if (HostPtr && Kind == TARGET_ALLOC_SHARED) + set_coarse_grain_mem_region(DeviceId, HostPtr, Size); - return *AllocOrErr; + return *AllocOrErr; + }(); + T.res(R); + return R; } int32_t GenericPluginTy::data_delete(int32_t DeviceId, void *TgtPtr, int32_t Kind) { + auto T = logger::log(__func__, DeviceId, TgtPtr, Kind); + auto R = [&]() { #ifdef OMPT_SUPPORT - // If OMPT is enabled, collect start and end times for the data delete. - OmptTimestampRAII Ts; + // If OMPT is enabled, collect start and end times for the data delete. + OmptTimestampRAII Ts; #endif - auto Err = getDevice(DeviceId).dataDelete(TgtPtr, (TargetAllocTy)Kind); - if (Err) { - REPORT("Failure to deallocate device pointer %p: %s\n", TgtPtr, - toString(std::move(Err)).data()); - return OFFLOAD_FAIL; - } + auto Err = getDevice(DeviceId).dataDelete(TgtPtr, (TargetAllocTy)Kind); + if (Err) { + REPORT("Failure to deallocate device pointer %p: %s\n", TgtPtr, + toString(std::move(Err)).data()); + return OFFLOAD_FAIL; + } - return OFFLOAD_SUCCESS; + return OFFLOAD_SUCCESS; + }(); + T.res(R); + return R; } int32_t GenericPluginTy::data_lock(int32_t DeviceId, void *Ptr, int64_t Size, void **LockedPtr) { - auto LockedPtrOrErr = getDevice(DeviceId).dataLock(Ptr, Size); - if (!LockedPtrOrErr) { - auto Err = LockedPtrOrErr.takeError(); - REPORT("Failure to lock memory %p: %s\n", Ptr, - toString(std::move(Err)).data()); - return OFFLOAD_FAIL; - } + auto T = logger::log(__func__, DeviceId, Ptr, Size, LockedPtr); + auto R = [&]() { + auto LockedPtrOrErr = getDevice(DeviceId).dataLock(Ptr, Size); + if (!LockedPtrOrErr) { + auto Err = LockedPtrOrErr.takeError(); + REPORT("Failure to lock memory %p: %s\n", Ptr, + toString(std::move(Err)).data()); + return OFFLOAD_FAIL; + } - if (!(*LockedPtrOrErr)) { - REPORT("Failure to lock memory %p: obtained a null locked pointer\n", Ptr); - return OFFLOAD_FAIL; - } - *LockedPtr = *LockedPtrOrErr; + if (!(*LockedPtrOrErr)) { + REPORT("Failure to lock memory %p: obtained a null locked pointer\n", + Ptr); + return OFFLOAD_FAIL; + } + *LockedPtr = *LockedPtrOrErr; - return OFFLOAD_SUCCESS; + return OFFLOAD_SUCCESS; + }(); + T.res(R); + return R; } int32_t GenericPluginTy::data_unlock(int32_t DeviceId, void *Ptr) { - auto Err = getDevice(DeviceId).dataUnlock(Ptr); - if (Err) { - REPORT("Failure to unlock memory %p: %s\n", Ptr, - toString(std::move(Err)).data()); - return OFFLOAD_FAIL; - } + auto T = logger::log(__func__, DeviceId, Ptr); + auto R = [&]() { + auto Err = getDevice(DeviceId).dataUnlock(Ptr); + if (Err) { + REPORT("Failure to unlock memory %p: %s\n", Ptr, + toString(std::move(Err)).data()); + return OFFLOAD_FAIL; + } - return OFFLOAD_SUCCESS; + return OFFLOAD_SUCCESS; + }(); + T.res(R); + return R; } int32_t GenericPluginTy::data_notify_mapped(int32_t DeviceId, void *HstPtr, int64_t Size) { - auto Err = getDevice(DeviceId).notifyDataMapped(HstPtr, Size); - if (Err) { - REPORT("Failure to notify data mapped %p: %s\n", HstPtr, - toString(std::move(Err)).data()); - return OFFLOAD_FAIL; - } + auto T = logger::log(__func__, DeviceId, HstPtr, Size); + auto R = [&]() { + auto Err = getDevice(DeviceId).notifyDataMapped(HstPtr, Size); + if (Err) { + REPORT("Failure to notify data mapped %p: %s\n", HstPtr, + toString(std::move(Err)).data()); + return OFFLOAD_FAIL; + } - return OFFLOAD_SUCCESS; + return OFFLOAD_SUCCESS; + }(); + T.res(R); + return R; } int32_t GenericPluginTy::data_notify_unmapped(int32_t DeviceId, void *HstPtr) { - auto Err = getDevice(DeviceId).notifyDataUnmapped(HstPtr); - if (Err) { - REPORT("Failure to notify data unmapped %p: %s\n", HstPtr, - toString(std::move(Err)).data()); - return OFFLOAD_FAIL; - } + auto T = logger::log(__func__, DeviceId, HstPtr); + auto R = [&]() { + auto Err = getDevice(DeviceId).notifyDataUnmapped(HstPtr); + if (Err) { + REPORT("Failure to notify data unmapped %p: %s\n", HstPtr, + toString(std::move(Err)).data()); + return OFFLOAD_FAIL; + } - return OFFLOAD_SUCCESS; + return OFFLOAD_SUCCESS; + }(); + T.res(R); + return R; } int32_t GenericPluginTy::data_submit(int32_t DeviceId, void *TgtPtr, void *HstPtr, int64_t Size) { - return __tgt_rtl_data_submit_async(DeviceId, TgtPtr, HstPtr, Size, - /*AsyncInfoPtr=*/nullptr); + auto T = logger::log(__func__, DeviceId, TgtPtr, HstPtr, Size); + auto R = [&]() { + return data_submit_async(DeviceId, TgtPtr, HstPtr, Size, + /*AsyncInfoPtr=*/nullptr); + }(); + T.res(R); + return R; } int32_t GenericPluginTy::data_submit_async(int32_t DeviceId, void *TgtPtr, void *HstPtr, int64_t Size, __tgt_async_info *AsyncInfoPtr) { - auto Err = getDevice(DeviceId).dataSubmit(TgtPtr, HstPtr, Size, AsyncInfoPtr); - if (Err) { - REPORT("Failure to copy data from host to device. Pointers: host " - "= " DPxMOD ", device = " DPxMOD ", size = %" PRId64 ": %s\n", - DPxPTR(HstPtr), DPxPTR(TgtPtr), Size, - toString(std::move(Err)).data()); - return OFFLOAD_FAIL; - } + auto T = logger::log(__func__, DeviceId, TgtPtr, HstPtr, Size, + AsyncInfoPtr); + auto R = [&]() { + auto Err = + getDevice(DeviceId).dataSubmit(TgtPtr, HstPtr, Size, AsyncInfoPtr); + if (Err) { + REPORT("Failure to copy data from host to device. Pointers: host " + "= " DPxMOD ", device = " DPxMOD ", size = %" PRId64 ": %s\n", + DPxPTR(HstPtr), DPxPTR(TgtPtr), Size, + toString(std::move(Err)).data()); + return OFFLOAD_FAIL; + } - return OFFLOAD_SUCCESS; + return OFFLOAD_SUCCESS; + }(); + T.res(R); + return R; } int32_t GenericPluginTy::data_retrieve(int32_t DeviceId, void *HstPtr, void *TgtPtr, int64_t Size) { - return __tgt_rtl_data_retrieve_async(DeviceId, HstPtr, TgtPtr, Size, - /*AsyncInfoPtr=*/nullptr); + auto T = logger::log(__func__, DeviceId, HstPtr, TgtPtr, Size); + auto R = [&]() { + return data_retrieve_async(DeviceId, HstPtr, TgtPtr, Size, + /*AsyncInfoPtr=*/nullptr); + }(); + T.res(R); + return R; } int32_t GenericPluginTy::data_retrieve_async(int32_t DeviceId, void *HstPtr, void *TgtPtr, int64_t Size, __tgt_async_info *AsyncInfoPtr) { - auto Err = - getDevice(DeviceId).dataRetrieve(HstPtr, TgtPtr, Size, AsyncInfoPtr); - if (Err) { - REPORT("Faliure to copy data from device to host. Pointers: host " - "= " DPxMOD ", device = " DPxMOD ", size = %" PRId64 ": %s\n", - DPxPTR(HstPtr), DPxPTR(TgtPtr), Size, - toString(std::move(Err)).data()); - return OFFLOAD_FAIL; - } + auto T = logger::log(__func__, DeviceId, HstPtr, TgtPtr, Size, + AsyncInfoPtr); + auto R = [&]() { + auto Err = + getDevice(DeviceId).dataRetrieve(HstPtr, TgtPtr, Size, AsyncInfoPtr); + if (Err) { + REPORT("Faliure to copy data from device to host. Pointers: host " + "= " DPxMOD ", device = " DPxMOD ", size = %" PRId64 ": %s\n", + DPxPTR(HstPtr), DPxPTR(TgtPtr), Size, + toString(std::move(Err)).data()); + return OFFLOAD_FAIL; + } - return OFFLOAD_SUCCESS; + return OFFLOAD_SUCCESS; + }(); + T.res(R); + return R; } int32_t GenericPluginTy::data_exchange(int32_t SrcDeviceId, void *SrcPtr, int32_t DstDeviceId, void *DstPtr, int64_t Size) { - return __tgt_rtl_data_exchange_async(SrcDeviceId, SrcPtr, DstDeviceId, DstPtr, - Size, - /*AsyncInfoPtr=*/nullptr); + auto T = logger::log(__func__, SrcDeviceId, SrcPtr, DstDeviceId, + DstPtr, Size); + auto R = [&]() { + return data_exchange_async(SrcDeviceId, SrcPtr, DstDeviceId, DstPtr, Size, + /*AsyncInfoPtr=*/nullptr); + }(); + T.res(R); + return R; } int32_t GenericPluginTy::data_exchange_async(int32_t SrcDeviceId, void *SrcPtr, int DstDeviceId, void *DstPtr, int64_t Size, __tgt_async_info *AsyncInfo) { - GenericDeviceTy &SrcDevice = getDevice(SrcDeviceId); - GenericDeviceTy &DstDevice = getDevice(DstDeviceId); - auto Err = SrcDevice.dataExchange(SrcPtr, DstDevice, DstPtr, Size, AsyncInfo); - if (Err) { - REPORT("Failure to copy data from device (%d) to device (%d). Pointers: " - "host = " DPxMOD ", device = " DPxMOD ", size = %" PRId64 ": %s\n", - SrcDeviceId, DstDeviceId, DPxPTR(SrcPtr), DPxPTR(DstPtr), Size, - toString(std::move(Err)).data()); - return OFFLOAD_FAIL; - } + auto T = logger::log(__func__, SrcDeviceId, SrcPtr, DstDeviceId, + DstPtr, Size, AsyncInfo); + auto R = [&]() { + GenericDeviceTy &SrcDevice = getDevice(SrcDeviceId); + GenericDeviceTy &DstDevice = getDevice(DstDeviceId); + auto Err = + SrcDevice.dataExchange(SrcPtr, DstDevice, DstPtr, Size, AsyncInfo); + if (Err) { + REPORT("Failure to copy data from device (%d) to device (%d). Pointers: " + "host = " DPxMOD ", device = " DPxMOD ", size = %" PRId64 ": %s\n", + SrcDeviceId, DstDeviceId, DPxPTR(SrcPtr), DPxPTR(DstPtr), Size, + toString(std::move(Err)).data()); + return OFFLOAD_FAIL; + } - return OFFLOAD_SUCCESS; + return OFFLOAD_SUCCESS; + }(); + T.res(R); + return R; } int32_t GenericPluginTy::launch_kernel_sync(int32_t DeviceId, void *TgtEntryPtr, void **TgtArgs, ptrdiff_t *TgtOffsets, KernelArgsTy *KernelArgs) { - __tgt_async_info *AsyncInfoPtr = nullptr; - return __tgt_rtl_launch_kernel(DeviceId, TgtEntryPtr, TgtArgs, TgtOffsets, - KernelArgs, AsyncInfoPtr); + auto T = logger::log(__func__, DeviceId, TgtEntryPtr, TgtArgs, + TgtOffsets, KernelArgs); + auto R = [&]() { + __tgt_async_info *AsyncInfoPtr = nullptr; + return launch_kernel(DeviceId, TgtEntryPtr, TgtArgs, TgtOffsets, KernelArgs, + AsyncInfoPtr); + }(); + T.res(R); + return R; } int32_t GenericPluginTy::launch_kernel(int32_t DeviceId, void *TgtEntryPtr, void **TgtArgs, ptrdiff_t *TgtOffsets, KernelArgsTy *KernelArgs, __tgt_async_info *AsyncInfoPtr) { - auto Err = getDevice(DeviceId).launchKernel(TgtEntryPtr, TgtArgs, TgtOffsets, - *KernelArgs, AsyncInfoPtr); - if (Err) { - REPORT("Failure to run target region " DPxMOD " in device %d: %s\n", - DPxPTR(TgtEntryPtr), DeviceId, toString(std::move(Err)).data()); - return OFFLOAD_FAIL; - } + auto T = logger::log(__func__, DeviceId, TgtEntryPtr, TgtArgs, + TgtOffsets, KernelArgs, AsyncInfoPtr); + auto R = [&]() { + auto Err = getDevice(DeviceId).launchKernel( + TgtEntryPtr, TgtArgs, TgtOffsets, *KernelArgs, AsyncInfoPtr); + if (Err) { + REPORT("Failure to run target region " DPxMOD " in device %d: %s\n", + DPxPTR(TgtEntryPtr), DeviceId, toString(std::move(Err)).data()); + return OFFLOAD_FAIL; + } - return OFFLOAD_SUCCESS; + return OFFLOAD_SUCCESS; + }(); + T.res(R); + return R; } int32_t GenericPluginTy::synchronize(int32_t DeviceId, __tgt_async_info *AsyncInfoPtr) { - auto Err = getDevice(DeviceId).synchronize(AsyncInfoPtr); - if (Err) { - REPORT("Failure to synchronize stream %p: %s\n", AsyncInfoPtr->Queue, - toString(std::move(Err)).data()); - return OFFLOAD_FAIL; - } + auto T = logger::log(__func__, DeviceId, AsyncInfoPtr); + auto R = [&]() { + auto Err = getDevice(DeviceId).synchronize(AsyncInfoPtr); + if (Err) { + REPORT("Failure to synchronize stream %p: %s\n", AsyncInfoPtr->Queue, + toString(std::move(Err)).data()); + return OFFLOAD_FAIL; + } - return OFFLOAD_SUCCESS; + return OFFLOAD_SUCCESS; + }(); + T.res(R); + return R; } int32_t GenericPluginTy::query_async(int32_t DeviceId, __tgt_async_info *AsyncInfoPtr) { - auto Err = getDevice(DeviceId).queryAsync(AsyncInfoPtr); - if (Err) { - REPORT("Failure to query stream %p: %s\n", AsyncInfoPtr->Queue, - toString(std::move(Err)).data()); - return OFFLOAD_FAIL; - } + auto T = logger::log(__func__, DeviceId, AsyncInfoPtr); + auto R = [&]() { + auto Err = getDevice(DeviceId).queryAsync(AsyncInfoPtr); + if (Err) { + REPORT("Failure to query stream %p: %s\n", AsyncInfoPtr->Queue, + toString(std::move(Err)).data()); + return OFFLOAD_FAIL; + } - return OFFLOAD_SUCCESS; + return OFFLOAD_SUCCESS; + }(); + T.res(R); + return R; } void GenericPluginTy::print_device_info(int32_t DeviceId) { @@ -2045,87 +2179,124 @@ void GenericPluginTy::print_device_info(int32_t DeviceId) { } int32_t GenericPluginTy::create_event(int32_t DeviceId, void **EventPtr) { - auto Err = getDevice(DeviceId).createEvent(EventPtr); - if (Err) { - REPORT("Failure to create event: %s\n", toString(std::move(Err)).data()); - return OFFLOAD_FAIL; - } + auto T = logger::log(__func__, DeviceId, EventPtr); + auto R = [&]() { + auto Err = getDevice(DeviceId).createEvent(EventPtr); + if (Err) { + REPORT("Failure to create event: %s\n", toString(std::move(Err)).data()); + return OFFLOAD_FAIL; + } - return OFFLOAD_SUCCESS; + return OFFLOAD_SUCCESS; + }(); + T.res(R); + return R; } int32_t GenericPluginTy::record_event(int32_t DeviceId, void *EventPtr, __tgt_async_info *AsyncInfoPtr) { - auto Err = getDevice(DeviceId).recordEvent(EventPtr, AsyncInfoPtr); - if (Err) { - REPORT("Failure to record event %p: %s\n", EventPtr, - toString(std::move(Err)).data()); - return OFFLOAD_FAIL; - } + auto T = logger::log(__func__, DeviceId, EventPtr, AsyncInfoPtr); + auto R = [&]() { + auto Err = getDevice(DeviceId).recordEvent(EventPtr, AsyncInfoPtr); + if (Err) { + REPORT("Failure to record event %p: %s\n", EventPtr, + toString(std::move(Err)).data()); + return OFFLOAD_FAIL; + } - return OFFLOAD_SUCCESS; + return OFFLOAD_SUCCESS; + }(); + T.res(R); + return R; } int32_t GenericPluginTy::wait_event(int32_t DeviceId, void *EventPtr, __tgt_async_info *AsyncInfoPtr) { - auto Err = getDevice(DeviceId).waitEvent(EventPtr, AsyncInfoPtr); - if (Err) { - REPORT("Failure to wait event %p: %s\n", EventPtr, - toString(std::move(Err)).data()); - return OFFLOAD_FAIL; - } + auto T = logger::log(__func__, DeviceId, EventPtr, AsyncInfoPtr); + auto R = [&]() { + auto Err = getDevice(DeviceId).waitEvent(EventPtr, AsyncInfoPtr); + if (Err) { + REPORT("Failure to wait event %p: %s\n", EventPtr, + toString(std::move(Err)).data()); + return OFFLOAD_FAIL; + } - return OFFLOAD_SUCCESS; + return OFFLOAD_SUCCESS; + }(); + T.res(R); + return R; } int32_t GenericPluginTy::sync_event(int32_t DeviceId, void *EventPtr) { - auto Err = getDevice(DeviceId).syncEvent(EventPtr); - if (Err) { - REPORT("Failure to synchronize event %p: %s\n", EventPtr, - toString(std::move(Err)).data()); - return OFFLOAD_FAIL; - } + auto T = logger::log(__func__, DeviceId, EventPtr); + auto R = [&]() { + auto Err = getDevice(DeviceId).syncEvent(EventPtr); + if (Err) { + REPORT("Failure to synchronize event %p: %s\n", EventPtr, + toString(std::move(Err)).data()); + return OFFLOAD_FAIL; + } - return OFFLOAD_SUCCESS; + return OFFLOAD_SUCCESS; + }(); + T.res(R); + return R; } int32_t GenericPluginTy::destroy_event(int32_t DeviceId, void *EventPtr) { - auto Err = getDevice(DeviceId).destroyEvent(EventPtr); - if (Err) { - REPORT("Failure to destroy event %p: %s\n", EventPtr, - toString(std::move(Err)).data()); - return OFFLOAD_FAIL; - } + auto T = logger::log(__func__, DeviceId, EventPtr); + auto R = [&]() { + auto Err = getDevice(DeviceId).destroyEvent(EventPtr); + if (Err) { + REPORT("Failure to destroy event %p: %s\n", EventPtr, + toString(std::move(Err)).data()); + return OFFLOAD_FAIL; + } - return OFFLOAD_SUCCESS; + return OFFLOAD_SUCCESS; + }(); + T.res(R); + return R; } int32_t GenericPluginTy::init_async_info(int32_t DeviceId, __tgt_async_info **AsyncInfoPtr) { - assert(AsyncInfoPtr && "Invalid async info"); - - auto Err = getDevice(DeviceId).initAsyncInfo(AsyncInfoPtr); - if (Err) { - REPORT("Failure to initialize async info at " DPxMOD " on device %d: %s\n", - DPxPTR(*AsyncInfoPtr), DeviceId, toString(std::move(Err)).data()); - return OFFLOAD_FAIL; - } - return OFFLOAD_SUCCESS; + auto T = logger::log(__func__, DeviceId, AsyncInfoPtr); + auto R = [&]() { + assert(AsyncInfoPtr && "Invalid async info"); + + auto Err = getDevice(DeviceId).initAsyncInfo(AsyncInfoPtr); + if (Err) { + REPORT("Failure to initialize async info at " DPxMOD + " on device %d: %s\n", + DPxPTR(*AsyncInfoPtr), DeviceId, toString(std::move(Err)).data()); + return OFFLOAD_FAIL; + } + return OFFLOAD_SUCCESS; + }(); + T.res(R); + return R; } int32_t GenericPluginTy::init_device_info(int32_t DeviceId, __tgt_device_info *DeviceInfo, const char **ErrStr) { - *ErrStr = ""; - - auto Err = getDevice(DeviceId).initDeviceInfo(DeviceInfo); - if (Err) { - REPORT("Failure to initialize device info at " DPxMOD " on device %d: %s\n", - DPxPTR(DeviceInfo), DeviceId, toString(std::move(Err)).data()); - return OFFLOAD_FAIL; - } + auto T = logger::log(__func__, DeviceId, DeviceInfo, ErrStr); + auto R = [&]() { + *ErrStr = ""; + + auto Err = getDevice(DeviceId).initDeviceInfo(DeviceInfo); + if (Err) { + REPORT("Failure to initialize device info at " DPxMOD + " on device %d: %s\n", + DPxPTR(DeviceInfo), DeviceId, toString(std::move(Err)).data()); + return OFFLOAD_FAIL; + } - return OFFLOAD_SUCCESS; + return OFFLOAD_SUCCESS; + }(); + T.res(R); + return R; } // Register mapped or allocated memory (with omp_target_alloc or omp_alloc) @@ -2136,22 +2307,31 @@ int32_t GenericPluginTy::init_device_info(int32_t DeviceId, // registered as coarse grain int GenericPluginTy::set_coarse_grain_mem_region(int32_t DeviceId, void *ptr, int64_t size) { - - auto Err = getDevice(DeviceId).setCoarseGrainMemory(ptr, size); - - if (Err) { - REPORT("Failure switching memory region to coarse grain mode (ptr: %p, " - "size: %ld)\n", - ptr, size); - return OFFLOAD_FAIL; - } - return OFFLOAD_SUCCESS; + auto T = logger::log(__func__, DeviceId, ptr, size); + auto R = [&]() { + auto Err = getDevice(DeviceId).setCoarseGrainMemory(ptr, size); + + if (Err) { + REPORT("Failure switching memory region to coarse grain mode (ptr: %p, " + "size: %ld)\n", + ptr, size); + return OFFLOAD_FAIL; + } + return OFFLOAD_SUCCESS; + }(); + T.res(R); + return R; } int32_t GenericPluginTy::set_device_offset(int32_t DeviceIdOffset) { - setDeviceIdStartIndex(DeviceIdOffset); + auto T = logger::log(__func__, DeviceIdOffset); + auto R = [&]() { + setDeviceIdStartIndex(DeviceIdOffset); - return OFFLOAD_SUCCESS; + return OFFLOAD_SUCCESS; + }(); + T.res(R); + return R; } // Request GPU driver to add all pages underlying memory [ptr,ptr+size[ to the @@ -2162,102 +2342,135 @@ int32_t GenericPluginTy::set_device_offset(int32_t DeviceIdOffset) { // registered as coarse grain int GenericPluginTy::prepopulate_page_table(int32_t DeviceId, void *ptr, int64_t size) { + auto T = logger::log(__func__, DeviceId, ptr, size); + auto R = [&]() { + auto Err = getDevice(DeviceId).prepopulatePageTable(ptr, size); + + if (Err) { + REPORT("Failure prepopulating GPU page table (ptr: %p, " + "size: %ld)\n", + ptr, size); + return OFFLOAD_FAIL; + } - auto Err = getDevice(DeviceId).prepopulatePageTable(ptr, size); - - if (Err) { - REPORT("Failure prepopulating GPU page table (ptr: %p, " - "size: %ld)\n", - ptr, size); - return OFFLOAD_FAIL; - } - - return OFFLOAD_SUCCESS; + return OFFLOAD_SUCCESS; + }(); + T.res(R); + return R; } // Query if [ptr, ptr+size] belongs to coarse grain memory region int32_t GenericPluginTy::query_coarse_grain_mem_region(int32_t DeviceId, const void *ptr, int64_t size) { + auto T = logger::log(__func__, DeviceId, ptr, size); + auto R = [&]() { + auto QueryCoarseGrainReturnValue = + getDevice(DeviceId).queryCoarseGrainMemory(ptr, size); - auto QueryCoarseGrainReturnValue = - getDevice(DeviceId).queryCoarseGrainMemory(ptr, size); - - return QueryCoarseGrainReturnValue; + return QueryCoarseGrainReturnValue; + }(); + T.res(R); + return R; } int32_t GenericPluginTy::get_global(__tgt_device_binary Binary, uint64_t Size, const char *Name, void **DevicePtr) { - assert(Binary.handle && "Invalid device binary handle"); - DeviceImageTy &Image = *reinterpret_cast(Binary.handle); + auto T = logger::log(__func__, Binary.handle, Size, Name, DevicePtr); + auto R = [&]() { + assert(Binary.handle && "Invalid device binary handle"); + DeviceImageTy &Image = *reinterpret_cast(Binary.handle); - GenericDeviceTy &Device = Image.getDevice(); + GenericDeviceTy &Device = Image.getDevice(); - GlobalTy DeviceGlobal(Name, Size); - GenericGlobalHandlerTy &GHandler = getGlobalHandler(); - if (auto Err = - GHandler.getGlobalMetadataFromDevice(Device, Image, DeviceGlobal)) { - REPORT("Failure to look up global address: %s\n", - toString(std::move(Err)).data()); - return OFFLOAD_FAIL; - } + GlobalTy DeviceGlobal(Name, Size); + GenericGlobalHandlerTy &GHandler = getGlobalHandler(); + if (auto Err = + GHandler.getGlobalMetadataFromDevice(Device, Image, DeviceGlobal)) { + REPORT("Failure to look up global address: %s\n", + toString(std::move(Err)).data()); + return OFFLOAD_FAIL; + } - *DevicePtr = DeviceGlobal.getPtr(); - assert(DevicePtr && "Invalid device global's address"); + *DevicePtr = DeviceGlobal.getPtr(); + assert(DevicePtr && "Invalid device global's address"); - // Save the loaded globals if we are recording. - if (RecordReplay.isRecording()) - RecordReplay.addEntry(Name, Size, *DevicePtr); + // Save the loaded globals if we are recording. + if (RecordReplay.isRecording()) + RecordReplay.addEntry(Name, Size, *DevicePtr); - return OFFLOAD_SUCCESS; + return OFFLOAD_SUCCESS; + }(); + T.res(R); + return R; } int32_t GenericPluginTy::get_function(__tgt_device_binary Binary, const char *Name, void **KernelPtr) { - assert(Binary.handle && "Invalid device binary handle"); - DeviceImageTy &Image = *reinterpret_cast(Binary.handle); + auto T = logger::log(__func__, Binary.handle, Name, KernelPtr); + auto R = [&]() { + assert(Binary.handle && "Invalid device binary handle"); + DeviceImageTy &Image = *reinterpret_cast(Binary.handle); - GenericDeviceTy &Device = Image.getDevice(); + GenericDeviceTy &Device = Image.getDevice(); - auto KernelOrErr = Device.constructKernel(Name); - if (Error Err = KernelOrErr.takeError()) { - REPORT("Failure to look up kernel: %s\n", toString(std::move(Err)).data()); - return OFFLOAD_FAIL; - } + auto KernelOrErr = Device.constructKernel(Name); + if (Error Err = KernelOrErr.takeError()) { + REPORT("Failure to look up kernel: %s\n", + toString(std::move(Err)).data()); + return OFFLOAD_FAIL; + } - GenericKernelTy &Kernel = *KernelOrErr; - if (auto Err = Kernel.init(Device, Image)) { - REPORT("Failure to init kernel: %s\n", toString(std::move(Err)).data()); - return OFFLOAD_FAIL; - } + GenericKernelTy &Kernel = *KernelOrErr; + if (auto Err = Kernel.init(Device, Image)) { + REPORT("Failure to init kernel: %s\n", toString(std::move(Err)).data()); + return OFFLOAD_FAIL; + } - // Note that this is not the kernel's device address. - *KernelPtr = &Kernel; - return OFFLOAD_SUCCESS; + // Note that this is not the kernel's device address. + *KernelPtr = &Kernel; + return OFFLOAD_SUCCESS; + }(); + T.res(R); + return R; } int32_t GenericPluginTy::use_auto_zero_copy(int32_t DeviceId) { - return getDevice(DeviceId).useAutoZeroCopy(); + auto T = logger::log(__func__, DeviceId); + auto R = [&]() { return getDevice(DeviceId).useAutoZeroCopy(); }(); + T.res(R); + return R; } int32_t GenericPluginTy::enable_access_to_all_agents(int32_t DeviceId, void *ptr) { - // Not implemented yet. - return OFFLOAD_FAIL; + auto T = logger::log(__func__, DeviceId, ptr); + auto R = [&]() { + // Not implemented yet. + return OFFLOAD_FAIL; + }(); + T.res(R); + return R; } int32_t GenericPluginTy::zero_copy_sanity_checks_and_diag( int32_t DeviceId, bool isUnifiedSharedMemory, bool isAutoZeroCopy, bool isEagerMaps) { - auto Err = getDevice(DeviceId).zeroCopySanityChecksAndDiag( - isUnifiedSharedMemory, isAutoZeroCopy, isEagerMaps); - - if (Err) { - REPORT("Failure in zero-copy sanity checks\n"); - return OFFLOAD_FAIL; - } + auto T = logger::log(__func__, DeviceId, isUnifiedSharedMemory, + isAutoZeroCopy, isEagerMaps); + auto R = [&]() { + auto Err = getDevice(DeviceId).zeroCopySanityChecksAndDiag( + isUnifiedSharedMemory, isAutoZeroCopy, isEagerMaps); + + if (Err) { + REPORT("Failure in zero-copy sanity checks\n"); + return OFFLOAD_FAIL; + } - return OFFLOAD_SUCCESS; + return OFFLOAD_SUCCESS; + }(); + T.res(R); + return R; } bool llvm::omp::target::plugin::libomptargetSupportsRPC() { @@ -2267,285 +2480,3 @@ bool llvm::omp::target::plugin::libomptargetSupportsRPC() { return false; #endif } - -/// Exposed library API function, basically wrappers around the GenericDeviceTy -/// functionality with the same name. All non-async functions are redirected -/// to the async versions right away with a NULL AsyncInfoPtr. -#ifdef __cplusplus -extern "C" { -#endif - -int32_t __tgt_rtl_init_plugin() { - auto Err = PluginTy::initIfNeeded(); - if (Err) { - [[maybe_unused]] std::string ErrStr = toString(std::move(Err)); - DP("Failed to init plugin: %s", ErrStr.c_str()); - return OFFLOAD_FAIL; - } - - return OFFLOAD_SUCCESS; -} - -int32_t __tgt_rtl_is_valid_binary(__tgt_device_image *Image) { - if (!PluginTy::isActive()) - return false; - - return PluginTy::get().is_valid_binary(Image); -} - -void __tgt_rtl_check_invalid_image(__tgt_device_image *InvalidImage) { - PluginTy::get().check_invalid_image(InvalidImage); -} - -int32_t __tgt_rtl_supports_empty_images() { - return PluginTy::get().supports_empty_images(); -} - -int32_t __tgt_rtl_init_device(int32_t DeviceId) { - return PluginTy::get().init_device(DeviceId); -} - -int32_t __tgt_rtl_number_of_devices() { - return PluginTy::get().number_of_devices(); -} - -int __tgt_rtl_number_of_team_procs(int DeviceId) { - return PluginTy::get().number_of_team_procs(DeviceId); -} - -bool __tgt_rtl_has_apu_device(int32_t DeviceId) { - return PluginTy::get().has_apu_device(DeviceId); -} - -bool __tgt_rtl_has_USM_capable_dGPU(int32_t DeviceId) { - return PluginTy::get().has_USM_capable_dGPU(DeviceId); -} - -bool __tgt_rtl_supports_unified_memory(int32_t DeviceId) { - return PluginTy::get().supports_unified_memory(DeviceId); -} - -bool __tgt_rtl_is_fine_grained_memory_enabled(int32_t DeviceId) { - return PluginTy::get().is_fine_grained_memory_enabled(DeviceId); -} - -bool __tgt_rtl_is_system_supporting_managed_memory(int32_t DeviceId) { - return PluginTy::get().is_system_supporting_managed_memory(DeviceId); -} - -int64_t __tgt_rtl_init_requires(int64_t RequiresFlags) { - return PluginTy::get().init_requires(RequiresFlags); -} - -int32_t __tgt_rtl_is_data_exchangable(int32_t SrcDeviceId, - int32_t DstDeviceId) { - return PluginTy::get().is_data_exchangable(SrcDeviceId, DstDeviceId); -} - -int32_t __tgt_rtl_initialize_record_replay(int32_t DeviceId, int64_t MemorySize, - void *VAddr, bool isRecord, - bool SaveOutput, - uint64_t &ReqPtrArgOffset) { - return PluginTy::get().initialize_record_replay( - DeviceId, MemorySize, VAddr, isRecord, SaveOutput, ReqPtrArgOffset); -} - -int32_t __tgt_rtl_load_binary(int32_t DeviceId, __tgt_device_image *TgtImage, - __tgt_device_binary *Binary) { - return PluginTy::get().load_binary(DeviceId, TgtImage, Binary); -} - -void *__tgt_rtl_data_alloc(int32_t DeviceId, int64_t Size, void *HostPtr, - int32_t Kind) { - return PluginTy::get().data_alloc(DeviceId, Size, HostPtr, Kind); -} - -int32_t __tgt_rtl_data_delete(int32_t DeviceId, void *TgtPtr, int32_t Kind) { - return PluginTy::get().data_delete(DeviceId, TgtPtr, Kind); -} - -int32_t __tgt_rtl_data_lock(int32_t DeviceId, void *Ptr, int64_t Size, - void **LockedPtr) { - return PluginTy::get().data_lock(DeviceId, Ptr, Size, LockedPtr); -} - -int32_t __tgt_rtl_data_unlock(int32_t DeviceId, void *Ptr) { - return PluginTy::get().data_unlock(DeviceId, Ptr); -} - -int32_t __tgt_rtl_data_notify_mapped(int32_t DeviceId, void *HstPtr, - int64_t Size) { - return PluginTy::get().data_notify_mapped(DeviceId, HstPtr, Size); -} - -int32_t __tgt_rtl_data_notify_unmapped(int32_t DeviceId, void *HstPtr) { - return PluginTy::get().data_notify_unmapped(DeviceId, HstPtr); -} - -int32_t __tgt_rtl_data_submit(int32_t DeviceId, void *TgtPtr, void *HstPtr, - int64_t Size) { - return PluginTy::get().data_submit(DeviceId, TgtPtr, HstPtr, Size); -} - -int32_t __tgt_rtl_data_submit_async(int32_t DeviceId, void *TgtPtr, - void *HstPtr, int64_t Size, - __tgt_async_info *AsyncInfoPtr) { - return PluginTy::get().data_submit_async(DeviceId, TgtPtr, HstPtr, Size, - AsyncInfoPtr); -} - -int32_t __tgt_rtl_data_retrieve(int32_t DeviceId, void *HstPtr, void *TgtPtr, - int64_t Size) { - return PluginTy::get().data_retrieve(DeviceId, HstPtr, TgtPtr, Size); -} - -int32_t __tgt_rtl_data_retrieve_async(int32_t DeviceId, void *HstPtr, - void *TgtPtr, int64_t Size, - __tgt_async_info *AsyncInfoPtr) { - return PluginTy::get().data_retrieve_async(DeviceId, HstPtr, TgtPtr, Size, - AsyncInfoPtr); -} - -int32_t __tgt_rtl_data_exchange(int32_t SrcDeviceId, void *SrcPtr, - int32_t DstDeviceId, void *DstPtr, - int64_t Size) { - return PluginTy::get().data_exchange(SrcDeviceId, SrcPtr, DstDeviceId, DstPtr, - Size); -} - -int32_t __tgt_rtl_data_exchange_async(int32_t SrcDeviceId, void *SrcPtr, - int DstDeviceId, void *DstPtr, - int64_t Size, - __tgt_async_info *AsyncInfo) { - return PluginTy::get().data_exchange_async(SrcDeviceId, SrcPtr, DstDeviceId, - DstPtr, Size, AsyncInfo); -} - -int32_t __tgt_rtl_launch_kernel_sync(int32_t DeviceId, void *TgtEntryPtr, - void **TgtArgs, ptrdiff_t *TgtOffsets, - KernelArgsTy *KernelArgs) { - return PluginTy::get().launch_kernel_sync(DeviceId, TgtEntryPtr, TgtArgs, - TgtOffsets, KernelArgs); -} -int32_t __tgt_rtl_launch_kernel(int32_t DeviceId, void *TgtEntryPtr, - void **TgtArgs, ptrdiff_t *TgtOffsets, - KernelArgsTy *KernelArgs, - __tgt_async_info *AsyncInfoPtr) { - return PluginTy::get().launch_kernel(DeviceId, TgtEntryPtr, TgtArgs, - TgtOffsets, KernelArgs, AsyncInfoPtr); -} - -int32_t __tgt_rtl_synchronize(int32_t DeviceId, - __tgt_async_info *AsyncInfoPtr) { - return PluginTy::get().synchronize(DeviceId, AsyncInfoPtr); -} - -int32_t __tgt_rtl_query_async(int32_t DeviceId, - __tgt_async_info *AsyncInfoPtr) { - return PluginTy::get().query_async(DeviceId, AsyncInfoPtr); -} - -void __tgt_rtl_print_device_info(int32_t DeviceId) { - PluginTy::get().print_device_info(DeviceId); -} - -int32_t __tgt_rtl_create_event(int32_t DeviceId, void **EventPtr) { - return PluginTy::get().create_event(DeviceId, EventPtr); -} - -int32_t __tgt_rtl_record_event(int32_t DeviceId, void *EventPtr, - __tgt_async_info *AsyncInfoPtr) { - return PluginTy::get().record_event(DeviceId, EventPtr, AsyncInfoPtr); -} - -int32_t __tgt_rtl_wait_event(int32_t DeviceId, void *EventPtr, - __tgt_async_info *AsyncInfoPtr) { - return PluginTy::get().wait_event(DeviceId, EventPtr, AsyncInfoPtr); -} - -int32_t __tgt_rtl_sync_event(int32_t DeviceId, void *EventPtr) { - return PluginTy::get().sync_event(DeviceId, EventPtr); -} - -int32_t __tgt_rtl_destroy_event(int32_t DeviceId, void *EventPtr) { - return PluginTy::get().destroy_event(DeviceId, EventPtr); -} - -void __tgt_rtl_set_info_flag(uint32_t NewInfoLevel) { - std::atomic &InfoLevel = getInfoLevelInternal(); - InfoLevel.store(NewInfoLevel); -} - -int32_t __tgt_rtl_init_async_info(int32_t DeviceId, - __tgt_async_info **AsyncInfoPtr) { - return PluginTy::get().init_async_info(DeviceId, AsyncInfoPtr); -} - -int32_t __tgt_rtl_init_device_info(int32_t DeviceId, - __tgt_device_info *DeviceInfo, - const char **ErrStr) { - return PluginTy::get().init_device_info(DeviceId, DeviceInfo, ErrStr); -} - -// Register mapped or allocated memory (with omp_target_alloc or omp_alloc) -// as coarse grain -// \arg DeviceId is the ID of the device for which the memory should be switched -// to coarse grain mode. \arg ptr is the base pointer of the region to be -// registered as coarse grain \arg size is the size of the memory region to be -// registered as coarse grain -int __tgt_rtl_set_coarse_grain_mem_region(int32_t DeviceId, void *ptr, - int64_t size) { - - return PluginTy::get().set_coarse_grain_mem_region(DeviceId, ptr, size); -} - -int32_t __tgt_rtl_set_device_offset(int32_t DeviceIdOffset) { - return PluginTy::get().set_device_offset(DeviceIdOffset); -} - -// Request GPU driver to add all pages underlying memory [ptr,ptr+size[ to the -// \arg DeviceId page table -// \arg DeviceId is the ID of the device for which the memory should be switched -// to coarse grain mode. \arg ptr is the base pointer of the region to be -// registered as coarse grain \arg size is the size of the memory region to be -// registered as coarse grain -int __tgt_rtl_prepopulate_page_table(int32_t DeviceId, void *ptr, - int64_t size) { - return PluginTy::get().prepopulate_page_table(DeviceId, ptr, size); -} - -// Query if [ptr, ptr+size] belongs to coarse grain memory region -int32_t __tgt_rtl_query_coarse_grain_mem_region(int32_t DeviceId, - const void *ptr, int64_t size) { - return PluginTy::get().query_coarse_grain_mem_region(DeviceId, ptr, size); -} - -int32_t __tgt_rtl_get_global(__tgt_device_binary Binary, uint64_t Size, - const char *Name, void **DevicePtr) { - return PluginTy::get().get_global(Binary, Size, Name, DevicePtr); -} - -int32_t __tgt_rtl_get_function(__tgt_device_binary Binary, const char *Name, - void **KernelPtr) { - return PluginTy::get().get_function(Binary, Name, KernelPtr); -} - -int32_t __tgt_rtl_use_auto_zero_copy(int32_t DeviceId) { - return PluginTy::get().use_auto_zero_copy(DeviceId); -} - -int32_t __tgt_rtl_enable_access_to_all_agents(int32_t DeviceId, void *ptr) { - return PluginTy::get().enable_access_to_all_agents(DeviceId, ptr); -} - -int32_t __tgt_rtl_zero_copy_sanity_checks_and_diag(int32_t DeviceId, - bool isUnifiedSharedMemory, - bool isAutoZeroCopy, - bool isEagerMaps) { - return PluginTy::get().zero_copy_sanity_checks_and_diag( - DeviceId, isUnifiedSharedMemory, isAutoZeroCopy, isEagerMaps); -} - -#ifdef __cplusplus -} -#endif diff --git a/offload/plugins-nextgen/common/src/trace.h b/offload/plugins-nextgen/common/src/trace.h index fdc1bc83031846..84b6c156294f79 100644 --- a/offload/plugins-nextgen/common/src/trace.h +++ b/offload/plugins-nextgen/common/src/trace.h @@ -13,7 +13,7 @@ namespace { -namespace detail { +namespace logger { // Plumbing for concatenating format strings template @@ -129,7 +129,7 @@ template struct log_t { std::tuple args; bool active; R result; - log_t(const char *func, Ts &&... args) + log_t(const char *func, Ts &&...args) : func(func), args(std::forward(args)...) { active = getInfoLevel() & OMP_INFOTYPE_AMD_API_TRACE; @@ -165,336 +165,11 @@ template struct log_t { }; template -log_t log(const char *func, Ts &&... ts) { +log_t log(const char *func, Ts &&...ts) { return log_t(func, std::forward(ts)...); } -} // namespace detail +} // namespace logger } // namespace -#ifdef __cplusplus -extern "C" { -#endif - -static void *__tgt_rtl_data_alloc_impl(int device_id, int64_t size, void *ptr, - int32_t kind); -void *__tgt_rtl_data_alloc(int device_id, int64_t size, void *ptr, - int32_t Kind) { - auto t = detail::log(__func__, device_id, size, ptr); - void *r = __tgt_rtl_data_alloc_impl(device_id, size, ptr, Kind); - t.res(r); - return r; -} -#define __tgt_rtl_data_alloc(...) __tgt_rtl_data_alloc_impl(__VA_ARGS__) - -static int32_t __tgt_rtl_data_delete_impl(int device_id, void *tgt_ptr, - int32_t Kind); -int32_t __tgt_rtl_data_delete(int device_id, void *tgt_ptr, int32_t Kind) { - auto t = detail::log(__func__, device_id, tgt_ptr); - int32_t r = __tgt_rtl_data_delete_impl(device_id, tgt_ptr, Kind); - t.res(r); - return r; -} -#define __tgt_rtl_data_delete(...) __tgt_rtl_data_delete_impl(__VA_ARGS__) - -static int32_t __tgt_rtl_data_retrieve_impl(int device_id, void *hst_ptr, - void *tgt_ptr, int64_t size); -int32_t __tgt_rtl_data_retrieve(int device_id, void *hst_ptr, void *tgt_ptr, - int64_t size) { - auto t = detail::log(__func__, device_id, hst_ptr, tgt_ptr, size); - int32_t r = __tgt_rtl_data_retrieve_impl(device_id, hst_ptr, tgt_ptr, size); - t.res(r); - return r; -} -#define __tgt_rtl_data_retrieve(...) __tgt_rtl_data_retrieve_impl(__VA_ARGS__) - -static int32_t -__tgt_rtl_data_retrieve_async_impl(int device_id, void *hst_ptr, void *tgt_ptr, - int64_t size, - __tgt_async_info *async_info_ptr); -int32_t __tgt_rtl_data_retrieve_async(int device_id, void *hst_ptr, - void *tgt_ptr, int64_t size, - __tgt_async_info *async_info_ptr) { - auto t = detail::log(__func__, device_id, hst_ptr, tgt_ptr, size, - async_info_ptr); - int32_t r = __tgt_rtl_data_retrieve_async_impl(device_id, hst_ptr, tgt_ptr, - size, async_info_ptr); - t.res(r); - return r; -} -#define __tgt_rtl_data_retrieve_async(...) \ - __tgt_rtl_data_retrieve_async_impl(__VA_ARGS__) - -static int32_t __tgt_rtl_data_submit_impl(int device_id, void *tgt_ptr, - void *hst_ptr, int64_t size); -int32_t __tgt_rtl_data_submit(int device_id, void *tgt_ptr, void *hst_ptr, - int64_t size) { - auto t = detail::log(__func__, device_id, tgt_ptr, hst_ptr, size); - int32_t r = __tgt_rtl_data_submit_impl(device_id, tgt_ptr, hst_ptr, size); - t.res(r); - return r; -} -#define __tgt_rtl_data_submit(...) __tgt_rtl_data_submit_impl(__VA_ARGS__) - -static int32_t __tgt_rtl_data_submit_async_impl(int32_t ID, void *TargetPtr, - void *HostPtr, int64_t Size, - __tgt_async_info *AsyncInfoPtr); -int32_t __tgt_rtl_data_submit_async(int32_t ID, void *TargetPtr, void *HostPtr, - int64_t Size, - __tgt_async_info *AsyncInfoPtr) { - auto t = detail::log(__func__, ID, TargetPtr, HostPtr, Size, - AsyncInfoPtr); - int32_t r = __tgt_rtl_data_submit_async_impl(ID, TargetPtr, HostPtr, Size, - AsyncInfoPtr); - t.res(r); - return r; -} -#define __tgt_rtl_data_submit_async(...) \ - __tgt_rtl_data_submit_async_impl(__VA_ARGS__) - -static int32_t __tgt_rtl_init_device_impl(int device_id); -int32_t __tgt_rtl_init_device(int device_id) { - auto t = detail::log(__func__, device_id); - int32_t r = __tgt_rtl_init_device_impl(device_id); - t.res(r); - return r; -} -#define __tgt_rtl_init_device(...) __tgt_rtl_init_device_impl(__VA_ARGS__) - -static int64_t __tgt_rtl_init_requires_impl(int64_t RequiresFlags); -int64_t __tgt_rtl_init_requires(int64_t RequiresFlags) { - auto t = detail::log(__func__, RequiresFlags); - int64_t r = __tgt_rtl_init_requires_impl(RequiresFlags); - t.res(r); - return r; -} -#define __tgt_rtl_init_requires(...) __tgt_rtl_init_requires_impl(__VA_ARGS__) - -static int32_t __tgt_rtl_is_valid_binary_impl(__tgt_device_image *image); -int32_t __tgt_rtl_is_valid_binary(__tgt_device_image *image) { - auto t = detail::log(__func__, image); - int32_t r = __tgt_rtl_is_valid_binary_impl(image); - t.res(r); - return r; -} -#define __tgt_rtl_is_valid_binary(...) \ - __tgt_rtl_is_valid_binary_impl(__VA_ARGS__) - -static int32_t __tgt_rtl_load_binary_impl(int32_t device_id, - __tgt_device_image *Image, - __tgt_device_binary *Binary); -int32_t __tgt_rtl_load_binary(int32_t device_id, __tgt_device_image *Image, - __tgt_device_binary *Binary) { - auto t = detail::log(__func__, device_id, Image); - int32_t r = __tgt_rtl_load_binary_impl(device_id, Image, Binary); - t.res(r); - return r; -} -#define __tgt_rtl_load_binary(...) __tgt_rtl_load_binary_impl(__VA_ARGS__) - -static int32_t __tgt_rtl_get_global_impl(__tgt_device_binary Binary, - uint64_t Size, const char *Name, - void **DevicePtr); -int32_t __tgt_rtl_get_global(__tgt_device_binary Binary, uint64_t Size, - const char *Name, void **DevicePtr) { - auto t = detail::log(__func__, Size, Name, DevicePtr); - int32_t r = __tgt_rtl_get_global_impl(Binary, Size, Name, DevicePtr); - t.res(r); - return r; -} -#define __tgt_rtl_get_global(...) __tgt_rtl_get_global_impl(__VA_ARGS__) - -static int32_t __tgt_rtl_get_function_impl(__tgt_device_binary Binary, - const char *Name, void **DevicePtr); -int32_t __tgt_rtl_get_function(__tgt_device_binary Binary, const char *Name, - void **DevicePtr) { - auto t = detail::log(__func__, Name, DevicePtr); - int32_t r = __tgt_rtl_get_function_impl(Binary, Name, DevicePtr); - t.res(r); - return r; -} -#define __tgt_rtl_get_function(...) __tgt_rtl_get_function_impl(__VA_ARGS__) - -static int __tgt_rtl_number_of_devices_impl(); -int __tgt_rtl_number_of_devices() { - auto t = detail::log(__func__); - int r = __tgt_rtl_number_of_devices_impl(); - t.res(r); - return r; -} -#define __tgt_rtl_number_of_devices(...) \ - __tgt_rtl_number_of_devices_impl(__VA_ARGS__) - -static bool __tgt_rtl_is_apu_system_impl(int32_t device_id); -bool __tgt_rtl_has_apu_device(int32_t device_id) { - auto t = detail::log(__func__); - bool r = __tgt_rtl_is_apu_system_impl(device_id); - t.res(r); - return r; -} -#define __tgt_rtl_has_apu_device(...) __tgt_rtl_is_apu_system_impl(__VA_ARGS__) - -static bool __tgt_rtl_has_USM_capable_dGPU_impl(int32_t device_id); -bool __tgt_rtl_has_USM_capable_dGPU(int32_t device_id) { - auto t = detail::log(__func__); - bool r = __tgt_rtl_has_USM_capable_dGPU_impl(device_id); - t.res(r); - return r; -} -#define __tgt_rtl_has_USM_capable_dGPU(...) \ - __tgt_rtl_has_USM_capable_dGPU_impl(__VA_ARGS__) - -static bool __tgt_rtl_supports_unified_memory_impl(int32_t device_id); -bool __tgt_rtl_supports_unified_memory(int32_t device_id) { - auto t = detail::log(__func__); - bool r = __tgt_rtl_supports_unified_memory_impl(device_id); - t.res(r); - return r; -} -#define __tgt_rtl_supports_unified_memory(...) \ - __tgt_rtl_supports_unified_memory_impl(__VA_ARGS__) - -static bool __tgt_rtl_is_fine_grained_memory_enabled_impl(int32_t device_id); -bool __tgt_rtl_is_fine_grained_memory_enabled(int32_t device_id) { - auto t = detail::log(__func__); - bool r = __tgt_rtl_is_fine_grained_memory_enabled_impl(device_id); - t.res(r); - return r; -} -#define __tgt_rtl_is_fine_grained_memory_enabled(...) \ - __tgt_rtl_is_fine_grained_memory_enabled_impl(__VA_ARGS__) - -static int32_t __tgt_rtl_launch_kernel_sync_impl(int32_t device_id, - void *tgt_entry_ptr, - void **tgt_args, - ptrdiff_t *tgt_offsets, - KernelArgsTy *KernelArgs); -int32_t __tgt_rtl_launch_kernel_sync(int32_t device_id, void *tgt_entry_ptr, - void **tgt_args, ptrdiff_t *tgt_offsets, - KernelArgsTy *KernelArgs) { - auto t = detail::log( - __func__, device_id, tgt_entry_ptr, tgt_args, tgt_offsets, - (int32_t)KernelArgs->NumArgs, (int32_t)KernelArgs->NumTeams[0], - (int32_t)KernelArgs->ThreadLimit[0], (uint64_t)KernelArgs->Tripcount); - int32_t r = __tgt_rtl_launch_kernel_sync_impl( - device_id, tgt_entry_ptr, tgt_args, tgt_offsets, KernelArgs); - t.res(r); - return r; -} -#define __tgt_rtl_launch_kernel_sync(...) \ - __tgt_rtl_launch_kernel_sync_impl(__VA_ARGS__) - -static int32_t __tgt_rtl_launch_kernel_impl(int32_t device_id, - void *tgt_entry_ptr, - void **tgt_args, - ptrdiff_t *tgt_offsets, - KernelArgsTy *KernelArgs, - __tgt_async_info *AsyncInfo); -int32_t __tgt_rtl_launch_kernel(int32_t device_id, void *tgt_entry_ptr, - void **tgt_args, ptrdiff_t *tgt_offsets, - KernelArgsTy *KernelArgs, - __tgt_async_info *AsyncInfo) { - auto t = detail::log(__func__, device_id, tgt_entry_ptr, tgt_args, - tgt_offsets, (int32_t)KernelArgs->NumArgs, - (int32_t)KernelArgs->NumTeams[0], - (int32_t)KernelArgs->ThreadLimit[0], - (uint64_t)KernelArgs->Tripcount, AsyncInfo); - int32_t r = __tgt_rtl_launch_kernel_impl(device_id, tgt_entry_ptr, tgt_args, - tgt_offsets, KernelArgs, AsyncInfo); - t.res(r); - return r; -} -#define __tgt_rtl_launch_kernel(...) __tgt_rtl_launch_kernel_impl(__VA_ARGS__) - -static int32_t __tgt_rtl_synchronize_impl(int32_t device_id, - __tgt_async_info *async_info_ptr); -int32_t __tgt_rtl_synchronize(int32_t device_id, - __tgt_async_info *async_info_ptr) { - auto t = detail::log(__func__, device_id, async_info_ptr); - int32_t r = __tgt_rtl_synchronize_impl(device_id, async_info_ptr); - t.res(r); - return r; -} -#define __tgt_rtl_synchronize(...) __tgt_rtl_synchronize_impl(__VA_ARGS__) - -static int32_t __tgt_rtl_set_coarse_grain_mem_region_impl(int32_t DeviceId, - void *ptr, - int64_t size); -int32_t __tgt_rtl_set_coarse_grain_mem_region(int32_t DeviceId, void *ptr, - int64_t size) { - auto t = detail::log(__func__, DeviceId, ptr, size); - int32_t r = __tgt_rtl_set_coarse_grain_mem_region_impl(DeviceId, ptr, size); - t.res(r); - return r; -} -#define __tgt_rtl_set_coarse_grain_mem_region(...) \ - __tgt_rtl_set_coarse_grain_mem_region_impl(__VA_ARGS__) - -static int32_t __tgt_rtl_query_coarse_grain_mem_region_impl(int32_t DeviceId, - const void *ptr, - int64_t size); -int32_t __tgt_rtl_query_coarse_grain_mem_region(int32_t DeviceId, - void *ptr, int64_t size) { - auto t = detail::log(__func__, DeviceId, ptr, size); - int32_t r = __tgt_rtl_query_coarse_grain_mem_region_impl(DeviceId, ptr, size); - t.res(r); - return r; -} -#define __tgt_rtl_query_coarse_grain_mem_region(...) \ - __tgt_rtl_query_coarse_grain_mem_region_impl(__VA_ARGS__) - -static int32_t __tgt_rtl_prepopulate_page_table_impl(int32_t DeviceId, - void *ptr, int64_t size); -int32_t __tgt_rtl_prepopulate_page_table(int32_t DeviceId, void *ptr, - int64_t size) { - auto t = detail::log(__func__, DeviceId, ptr, size); - int32_t r = __tgt_rtl_prepopulate_page_table_impl(DeviceId, ptr, size); - t.res(r); - return r; -} -#define __tgt_rtl_prepopulate_page_table(...) \ - __tgt_rtl_prepopulate_page_table_impl(__VA_ARGS__) - -static int32_t __tgt_rtl_use_auto_zero_copy_impl(int32_t DeviceId); -int32_t __tgt_rtl_use_auto_zero_copy(int32_t DeviceId) { - auto t = detail::log(__func__, DeviceId); - int32_t r = __tgt_rtl_use_auto_zero_copy_impl(DeviceId); - t.res(r); - return r; -} -#define __tgt_rtl_use_auto_zero_copy(...) \ - __tgt_rtl_use_auto_zero_copy_impl(__VA_ARGS__) - -static int32_t __tgt_rtl_enable_access_to_all_agents_impl(int32_t DeviceId, - void *ptr); -int32_t __tgt_rtl_enable_access_to_all_agents(int32_t DeviceId, void *ptr) { - auto t = detail::log(__func__, DeviceId, ptr); - int32_t r = __tgt_rtl_enable_access_to_all_agents_impl(DeviceId, ptr); - t.res(r); - return r; -} -#define __tgt_rtl_enable_access_to_all_agents(...) \ - __tgt_rtl_enable_access_to_all_agents_impl(__VA_ARGS__) - -static int32_t __tgt_rtl_zero_copy_sanity_checks_and_diag_impl( - int32_t DeviceId, bool isUnifiedSharedMemory, bool isAutoZeroCopy, - bool isEagerMaps); -int32_t __tgt_rtl_zero_copy_sanity_checks_and_diag(int32_t DeviceId, - bool isUnifiedSharedMemory, - bool isAutoZeroCopy, - bool isEagerMaps) { - auto t = detail::log(__func__, DeviceId, isUnifiedSharedMemory, - isAutoZeroCopy, isEagerMaps); - int32_t r = __tgt_rtl_zero_copy_sanity_checks_and_diag_impl( - DeviceId, isUnifiedSharedMemory, isAutoZeroCopy, isEagerMaps); - t.res(r); - return r; -} -#define __tgt_rtl_zero_copy_sanity_checks_and_diag(...) \ - __tgt_rtl_zero_copy_sanity_checks_and_diag_impl(__VA_ARGS__) - -#ifdef __cplusplus -} -#endif - #endif diff --git a/offload/plugins-nextgen/cuda/CMakeLists.txt b/offload/plugins-nextgen/cuda/CMakeLists.txt index 2c837ab56ac177..7d7395e8050fdf 100644 --- a/offload/plugins-nextgen/cuda/CMakeLists.txt +++ b/offload/plugins-nextgen/cuda/CMakeLists.txt @@ -51,11 +51,3 @@ else() libomptarget_say("Not generating NVIDIA tests, no supported devices detected." " Use 'LIBOMPTARGET_FORCE_NVIDIA_TESTS' to override.") endif() - -# Install plugin under the lib destination folder. -install(TARGETS omptarget.rtl.cuda LIBRARY DESTINATION "${OFFLOAD_INSTALL_LIBDIR}") - -if(NOT DEFINED CMAKE_INSTALL_RPATH) -set_target_properties(omptarget.rtl.cuda PROPERTIES - INSTALL_RPATH "$ORIGIN" BUILD_RPATH "$ORIGIN:${CMAKE_CURRENT_BINARY_DIR}/..") -endif() diff --git a/offload/plugins-nextgen/cuda/src/rtl.cpp b/offload/plugins-nextgen/cuda/src/rtl.cpp index 0ba4938ab1f2f0..03f12e7767df8b 100644 --- a/offload/plugins-nextgen/cuda/src/rtl.cpp +++ b/offload/plugins-nextgen/cuda/src/rtl.cpp @@ -32,21 +32,6 @@ #include "llvm/Support/FileSystem.h" #include "llvm/Support/Program.h" -#ifdef OMPT_SUPPORT -void setOmptAsyncCopyProfile(bool Enable) { - // TODO -} - -void setGlobalOmptKernelProfile(int DeviceId, int Enable) { - // TODO -} - -uint64_t getSystemTimestampInNs() { - // TODO - return 0; -} -#endif - namespace llvm { namespace omp { namespace target { @@ -1357,10 +1342,6 @@ struct CUDAPluginTy final : public GenericPluginTy { return 0; } -#ifdef OMPT_SUPPORT - ompt::connectLibrary(); -#endif - if (Res == CUDA_ERROR_NO_DEVICE) { // Do not initialize if there are no devices. DP("There are no devices supporting CUDA.\n"); @@ -1405,6 +1386,8 @@ struct CUDAPluginTy final : public GenericPluginTy { return Triple::nvptx64; } + const char *getName() const override { return GETNAME(TARGET_NAME); } + /// Check whether the image is compatible with the available CUDA devices. Expected isELFCompatible(StringRef Image) const override { auto ElfOrErr = @@ -1528,8 +1511,6 @@ Error CUDADeviceTy::dataExchangeImpl(const void *SrcPtr, return Plugin::check(Res, "Error in cuMemcpyDtoDAsync: %s"); } -GenericPluginTy *PluginTy::createPlugin() { return new CUDAPluginTy(); } - template static Error Plugin::check(int32_t Code, const char *ErrFmt, ArgsTy... Args) { CUresult ResultCode = static_cast(Code); @@ -1549,3 +1530,9 @@ static Error Plugin::check(int32_t Code, const char *ErrFmt, ArgsTy... Args) { } // namespace target } // namespace omp } // namespace llvm + +extern "C" { +llvm::omp::target::plugin::GenericPluginTy *createPlugin_cuda() { + return new llvm::omp::target::plugin::CUDAPluginTy(); +} +} diff --git a/offload/plugins-nextgen/host/CMakeLists.txt b/offload/plugins-nextgen/host/CMakeLists.txt index 32a3b12585ce57..72b5681283fe28 100644 --- a/offload/plugins-nextgen/host/CMakeLists.txt +++ b/offload/plugins-nextgen/host/CMakeLists.txt @@ -31,16 +31,6 @@ else() target_include_directories(omptarget.rtl.host PRIVATE dynamic_ffi) endif() -# Install plugin under the lib destination folder. -install(TARGETS omptarget.rtl.host - LIBRARY DESTINATION "${OFFLOAD_INSTALL_LIBDIR}") -if(NOT DEFINED CMAKE_INSTALL_RPATH) - set_target_properties(omptarget.rtl.host PROPERTIES - INSTALL_RPATH "$ORIGIN" BUILD_RPATH "$ORIGIN:${CMAKE_CURRENT_BINARY_DIR}/.." - ) -endif() -#>>>>>>> d6bf04f4760b0dff3c3d3ff9b560c04438cc25ac - target_include_directories(omptarget.rtl.host PRIVATE ${LIBOMPTARGET_INCLUDE_DIR}) diff --git a/offload/plugins-nextgen/host/src/rtl.cpp b/offload/plugins-nextgen/host/src/rtl.cpp index fb22e92e02a4eb..c569e05719a4d3 100644 --- a/offload/plugins-nextgen/host/src/rtl.cpp +++ b/offload/plugins-nextgen/host/src/rtl.cpp @@ -55,21 +55,6 @@ #define LIBOMPTARGET_NEXTGEN_GENERIC_PLUGIN_TRIPLE "" #endif -#ifdef OMPT_SUPPORT -void setOmptAsyncCopyProfile(bool Enable) { - // TODO -} - -void setGlobalOmptKernelProfile(int DeviceId, int Enable) { - // TODO -} - -uint64_t getSystemTimestampInNs() { - // TODO - return 0; -} -#endif - namespace llvm { namespace omp { namespace target { @@ -410,10 +395,6 @@ struct GenELF64PluginTy final : public GenericPluginTy { /// Initialize the plugin and return the number of devices. Expected initImpl() override { -#ifdef OMPT_SUPPORT - ompt::connectLibrary(); -#endif - #ifdef USES_DYNAMIC_FFI if (auto Err = Plugin::check(ffi_init(), "Failed to initialize libffi")) return std::move(Err); @@ -470,9 +451,9 @@ struct GenELF64PluginTy final : public GenericPluginTy { return llvm::Triple::UnknownArch; #endif } -}; -GenericPluginTy *PluginTy::createPlugin() { return new GenELF64PluginTy(); } + const char *getName() const override { return GETNAME(TARGET_NAME); } +}; template static Error Plugin::check(int32_t Code, const char *ErrMsg, ArgsTy... Args) { @@ -487,3 +468,9 @@ static Error Plugin::check(int32_t Code, const char *ErrMsg, ArgsTy... Args) { } // namespace target } // namespace omp } // namespace llvm + +extern "C" { +llvm::omp::target::plugin::GenericPluginTy *createPlugin_host() { + return new llvm::omp::target::plugin::GenELF64PluginTy(); +} +} diff --git a/offload/src/CMakeLists.txt b/offload/src/CMakeLists.txt index cf3dc2163a8217..dbfdce454fcd3b 100644 --- a/offload/src/CMakeLists.txt +++ b/offload/src/CMakeLists.txt @@ -92,6 +92,10 @@ target_compile_definitions(omptarget PRIVATE DEBUG_PREFIX="omptarget" ) +foreach(plugin IN LISTS LIBOMPTARGET_PLUGINS_TO_BUILD) + target_link_libraries(omptarget PRIVATE omptarget.rtl.${plugin}) +endforeach() + target_compile_options(omptarget PUBLIC ${offload_compile_flags}) target_link_options(omptarget PUBLIC ${offload_link_flags}) diff --git a/offload/src/OffloadRTL.cpp b/offload/src/OffloadRTL.cpp index 56c378acebdf40..777221d5372ba7 100644 --- a/offload/src/OffloadRTL.cpp +++ b/offload/src/OffloadRTL.cpp @@ -50,6 +50,7 @@ void deinitRuntime() { if (RefCount == 1) { DP("Deinit offload library!\n"); + PM->deinit(); delete PM; PM = nullptr; } diff --git a/offload/src/OpenMP/API.cpp b/offload/src/OpenMP/API.cpp index 65557d904c087b..aa7abad06c0d7b 100644 --- a/offload/src/OpenMP/API.cpp +++ b/offload/src/OpenMP/API.cpp @@ -668,8 +668,6 @@ EXTERN int omp_is_coarse_grain_mem_region(void *ptr, size_t size) { FATAL_MESSAGE(omp_get_default_device(), "%s", toString(DeviceOrErr.takeError()).c_str()); - if (!DeviceOrErr->RTL->query_coarse_grain_mem_region) - return 0; return DeviceOrErr->RTL->query_coarse_grain_mem_region( omp_get_default_device(), ptr, size); } diff --git a/offload/src/OpenMP/InteropAPI.cpp b/offload/src/OpenMP/InteropAPI.cpp index 1a995cde7816e1..bdbc440c64a2c9 100644 --- a/offload/src/OpenMP/InteropAPI.cpp +++ b/offload/src/OpenMP/InteropAPI.cpp @@ -230,14 +230,14 @@ void __tgt_interop_init(ident_t *LocRef, int32_t Gtid, } DeviceTy &Device = *DeviceOrErr; - if (!Device.RTL || !Device.RTL->init_device_info || + if (!Device.RTL || Device.RTL->init_device_info(DeviceId, &(InteropPtr)->device_info, &(InteropPtr)->err_str)) { delete InteropPtr; InteropPtr = omp_interop_none; } if (InteropType == kmp_interop_type_tasksync) { - if (!Device.RTL || !Device.RTL->init_async_info || + if (!Device.RTL || Device.RTL->init_async_info(DeviceId, &(InteropPtr)->async_info)) { delete InteropPtr; InteropPtr = omp_interop_none; diff --git a/offload/src/OpenMP/Mapping.cpp b/offload/src/OpenMP/Mapping.cpp index a9dfccdba83792..98d7c58af3c4a2 100644 --- a/offload/src/OpenMP/Mapping.cpp +++ b/offload/src/OpenMP/Mapping.cpp @@ -272,8 +272,7 @@ TargetPointerResultTy MappingInfoTy::getTargetPointer( // overriden by setting the env-var OMPX_DISABLE_USM_MAPS=1. // This is not done for APUs. if (Device.RTL->has_USM_capable_dGPU(Device.DeviceID) && HstPtrBegin && - (!Device.RTL->is_fine_grained_memory_enabled(Device.DeviceID)) && - Device.RTL->set_coarse_grain_mem_region) { + (!Device.RTL->is_fine_grained_memory_enabled(Device.DeviceID))) { Device.RTL->set_coarse_grain_mem_region(Device.DeviceID, HstPtrBegin, Size); INFO(OMP_INFOTYPE_MAPPING_CHANGED, Device.DeviceID, diff --git a/offload/src/PluginManager.cpp b/offload/src/PluginManager.cpp index 28a64dab3720cf..4ac80fdd79046e 100644 --- a/offload/src/PluginManager.cpp +++ b/offload/src/PluginManager.cpp @@ -25,85 +25,25 @@ using namespace llvm::sys; PluginManager *PM = nullptr; -Expected> -PluginAdaptorTy::create(const std::string &Name) { - DP("Attempting to load library '%s'...\n", Name.c_str()); - TIMESCOPE_WITH_NAME_AND_IDENT(Name, (const ident_t *)nullptr); - - std::string ErrMsg; - auto LibraryHandler = std::make_unique( - DynamicLibrary::getPermanentLibrary(Name.c_str(), &ErrMsg)); - - if (!LibraryHandler->isValid()) { - // Library does not exist or cannot be found. - return createStringError(inconvertibleErrorCode(), - "Unable to load library '%s': %s!\n", Name.c_str(), - ErrMsg.c_str()); - } - - DP("Successfully loaded library '%s'!\n", Name.c_str()); - auto PluginAdaptor = std::unique_ptr( - new PluginAdaptorTy(Name, std::move(LibraryHandler))); - if (auto Err = PluginAdaptor->init()) - return Err; - return std::move(PluginAdaptor); -} - -PluginAdaptorTy::PluginAdaptorTy(const std::string &Name, - std::unique_ptr DL) - : Name(Name), LibraryHandler(std::move(DL)) {} - -Error PluginAdaptorTy::init() { - -#define PLUGIN_API_HANDLE(NAME) \ - NAME = reinterpret_cast( \ - LibraryHandler->getAddressOfSymbol(GETNAME(__tgt_rtl_##NAME))); \ - if (!NAME) { \ - return createStringError(inconvertibleErrorCode(), \ - "Invalid plugin as necessary interface function " \ - "(%s) was not found.\n", \ - std::string(#NAME).c_str()); \ - } - -#include "Shared/PluginAPI.inc" -#undef PLUGIN_API_HANDLE - - // Remove plugin on failure to call optional init_plugin - int32_t Rc = init_plugin(); - if (Rc != OFFLOAD_SUCCESS) { - return createStringError(inconvertibleErrorCode(), - "Unable to initialize library '%s': %u!\n", - Name.c_str(), Rc); - } - - // No devices are supported by this RTL? - int32_t NumberOfPluginDevices = number_of_devices(); - if (!NumberOfPluginDevices) { - return createStringError(inconvertibleErrorCode(), - "No devices supported in this RTL\n"); - } - - DP("Registered '%s' with %d plugin visible devices!\n", Name.c_str(), - NumberOfPluginDevices); - return Error::success(); -} +// Every plugin exports this method to create an instance of the plugin type. +#define PLUGIN_TARGET(Name) extern "C" GenericPluginTy *createPlugin_##Name(); +#include "Shared/Targets.def" void PluginManager::init() { TIMESCOPE(); DP("Loading RTLs...\n"); - // Attempt to open all the plugins and, if they exist, check if the interface - // is correct and if they are supporting any devices. + // Attempt to create an instance of each supported plugin. #define PLUGIN_TARGET(Name) \ do { \ - auto PluginAdaptorOrErr = \ - PluginAdaptorTy::create("libomptarget.rtl." #Name ".so"); \ - if (!PluginAdaptorOrErr) { \ - [[maybe_unused]] std::string InfoMsg = \ - toString(PluginAdaptorOrErr.takeError()); \ - DP("%s", InfoMsg.c_str()); \ + auto Plugin = std::unique_ptr(createPlugin_##Name()); \ + if (auto Err = Plugin->init()) { \ + [[maybe_unused]] std::string InfoMsg = toString(std::move(Err)); \ + DP("Failed to init plugin: %s\n", InfoMsg.c_str()); \ } else { \ - PluginAdaptors.push_back(std::move(*PluginAdaptorOrErr)); \ + DP("Registered plugin %s with %d visible device(s)\n", \ + Plugin->getName(), Plugin->number_of_devices()); \ + Plugins.emplace_back(std::move(Plugin)); \ } \ } while (false); #include "Shared/Targets.def" @@ -111,15 +51,29 @@ void PluginManager::init() { DP("RTLs loaded!\n"); } -void PluginManager::initDevices(PluginAdaptorTy &RTL) { +void PluginManager::deinit() { + TIMESCOPE(); + DP("Unloading RTLs...\n"); + + for (auto &Plugin : Plugins) { + if (auto Err = Plugin->deinit()) { + [[maybe_unused]] std::string InfoMsg = toString(std::move(Err)); + DP("Failed to deinit plugin: %s\n", InfoMsg.c_str()); + } + Plugin.release(); + } + + DP("RTLs unloaded!\n"); +} + +void PluginManager::initDevices(GenericPluginTy &RTL) { // If this RTL has already been initialized. if (PM->DeviceOffsets.contains(&RTL)) return; TIMESCOPE(); // If this RTL is not already in use, initialize it. - assert(RTL.number_of_devices() > 0 && - "Tried to initialize useless plugin adaptor"); + assert(RTL.number_of_devices() > 0 && "Tried to initialize useless plugin!"); // Initialize the device information for the RTL we are about to use. auto ExclusiveDevicesAccessor = getExclusiveDevicesAccessor(); @@ -196,13 +150,12 @@ void PluginManager::initDevices(PluginAdaptorTy &RTL) { DeviceOffsets[&RTL] = DeviceOffset; DeviceUsed[&RTL] = NumberOfUserDevices; - DP("Plugin adaptor " DPxMOD " has index %d, exposes %d out of %d devices!\n", - DPxPTR(RTL.LibraryHandler.get()), DeviceOffset, NumberOfUserDevices, - RTL.number_of_devices()); + DP("Plugin has index %d, exposes %d out of %d devices!\n", DeviceOffset, + NumberOfUserDevices, RTL.number_of_devices()); } void PluginManager::initAllPlugins() { - for (auto &R : PluginAdaptors) + for (auto &R : Plugins) initDevices(*R); } @@ -256,19 +209,22 @@ void PluginManager::registerLib(__tgt_bin_desc *Desc) { // Obtain the image and information that was previously extracted. __tgt_device_image *Img = &DI.getExecutableImage(); - PluginAdaptorTy *FoundRTL = nullptr; + GenericPluginTy *FoundRTL = nullptr; // Scan the RTLs that have associated images until we find one that supports // the current image. - for (auto &R : PM->pluginAdaptors()) { + for (auto &R : PM->plugins()) { + if (!R.number_of_devices()) + continue; + if (!R.is_valid_binary(Img)) { DP("Image " DPxMOD " is NOT compatible with RTL %s!\n", - DPxPTR(Img->ImageStart), R.Name.c_str()); + DPxPTR(Img->ImageStart), R.getName()); continue; } DP("Image " DPxMOD " is compatible with RTL %s!\n", - DPxPTR(Img->ImageStart), R.Name.c_str()); + DPxPTR(Img->ImageStart), R.getName()); PM->initDevices(R); @@ -287,7 +243,7 @@ void PluginManager::registerLib(__tgt_bin_desc *Desc) { (PM->HostEntriesBeginToTransTable)[Desc->HostEntriesBegin]; DP("Registering image " DPxMOD " with RTL %s!\n", DPxPTR(Img->ImageStart), - R.Name.c_str()); + R.getName()); registerImageIntoTranslationTable(TransTable, PM->DeviceOffsets[&R], PM->DeviceUsed[&R], Img); @@ -314,7 +270,7 @@ void PluginManager::registerLib(__tgt_bin_desc *Desc) { if (!FoundCompatibleImage) { for (DeviceImageTy &DI : PM->deviceImages()) { __tgt_device_image *Img = &DI.getExecutableImage(); - for (auto &R : PM->pluginAdaptors()) + for (auto &R : PM->plugins()) R.check_invalid_image(Img); } } @@ -341,11 +297,11 @@ void PluginManager::unregisterLib(__tgt_bin_desc *Desc) { // Obtain the image and information that was previously extracted. __tgt_device_image *Img = &DI.getExecutableImage(); - PluginAdaptorTy *FoundRTL = NULL; + GenericPluginTy *FoundRTL = NULL; // Scan the RTLs that have associated images until we find one that supports // the current image. We only need to scan RTLs that are already being used. - for (auto &R : PM->pluginAdaptors()) { + for (auto &R : PM->plugins()) { if (!DeviceOffsets.contains(&R)) continue; @@ -355,8 +311,7 @@ void PluginManager::unregisterLib(__tgt_bin_desc *Desc) { FoundRTL = &R; - DP("Unregistered image " DPxMOD " from RTL " DPxMOD "!\n", - DPxPTR(Img->ImageStart), DPxPTR(R.LibraryHandler.get())); + DP("Unregistered image " DPxMOD " from RTL\n", DPxPTR(Img->ImageStart)); break; } diff --git a/offload/src/device.cpp b/offload/src/device.cpp index 3c8f3c6ce2f454..201bcc9877d9d0 100644 --- a/offload/src/device.cpp +++ b/offload/src/device.cpp @@ -66,7 +66,7 @@ int HostDataToTargetTy::addEventIfNecessary(DeviceTy &Device, return OFFLOAD_SUCCESS; } -DeviceTy::DeviceTy(PluginAdaptorTy *RTL, int32_t DeviceID, int32_t RTLDeviceID) +DeviceTy::DeviceTy(GenericPluginTy *RTL, int32_t DeviceID, int32_t RTLDeviceID) : DeviceID(DeviceID), RTL(RTL), RTLDeviceID(RTLDeviceID), ForceSynchronousTargetRegions(false), MappingInfo(*this) {} @@ -92,7 +92,6 @@ llvm::Error DeviceTy::init() { return llvm::createStringError(llvm::inconvertibleErrorCode(), "Failed to initialize device %d\n", DeviceID); - assert(RTL->number_of_team_procs && "Need function pointer to entry point"); setTeamProcs(RTL->number_of_team_procs(RTLDeviceID)); // Enables recording kernels if set. @@ -242,7 +241,6 @@ int32_t DeviceTy::dataExchange(void *SrcPtr, DeviceTy &DstDev, void *DstPtr, #else if (ForceSynchronousTargetRegions || !AsyncInfo) { #endif - assert(RTL->data_exchange && "RTL->data_exchange is nullptr"); return RTL->data_exchange(RTLDeviceID, SrcPtr, DstDev.RTLDeviceID, DstPtr, Size); } @@ -275,11 +273,11 @@ int32_t DeviceTy::notifyDataUnmapped(void *HstPtr) { int32_t DeviceTy::launchKernel(void *TgtEntryPtr, void **TgtVarsPtr, ptrdiff_t *TgtOffsets, KernelArgsTy &KernelArgs, AsyncInfoTy &AsyncInfo) { - if (ForceSynchronousTargetRegions || !RTL->launch_kernel || #ifdef OMPT_SUPPORT - ompt::TracingActive || + if (ForceSynchronousTargetRegions || ompt::TracingActive) +#else + if (ForceSynchronousTargetRegions) #endif - !RTL->synchronize) return RTL->launch_kernel_sync(RTLDeviceID, TgtEntryPtr, TgtVarsPtr, TgtOffsets, &KernelArgs); return RTL->launch_kernel(RTLDeviceID, TgtEntryPtr, TgtVarsPtr, TgtOffsets, @@ -347,27 +345,18 @@ bool DeviceTy::useAutoZeroCopy() { if (PM->getRequirements() & OMP_REQ_UNIFIED_SHARED_MEMORY) return false; - if (RTL->use_auto_zero_copy) - return RTL->use_auto_zero_copy(RTLDeviceID); - return false; + return RTL->use_auto_zero_copy(RTLDeviceID); } -bool DeviceTy::checkIfAPU() { - if (RTL->has_apu_device) - return RTL->has_apu_device(RTLDeviceID); - return false; -} +bool DeviceTy::checkIfAPU() { return RTL->has_apu_device(RTLDeviceID); } bool DeviceTy::supportsUnifiedMemory() { - if (RTL->supports_unified_memory) - return RTL->supports_unified_memory(RTLDeviceID); - return false; + return RTL->supports_unified_memory(RTLDeviceID); } void DeviceTy::zeroCopySanityChecksAndDiag(bool isUnifiedSharedMemory, bool isAutoZeroCopy, bool isEagerMaps) { - if (RTL->zero_copy_sanity_checks_and_diag) - RTL->zero_copy_sanity_checks_and_diag(RTLDeviceID, isUnifiedSharedMemory, - isAutoZeroCopy, isEagerMaps); + RTL->zero_copy_sanity_checks_and_diag(RTLDeviceID, isUnifiedSharedMemory, + isAutoZeroCopy, isEagerMaps); } diff --git a/offload/src/interface.cpp b/offload/src/interface.cpp index 1af787ac5f53ce..cd1f4b17ef6c6b 100644 --- a/offload/src/interface.cpp +++ b/offload/src/interface.cpp @@ -482,8 +482,6 @@ EXTERN void __tgt_set_info_flag(uint32_t NewInfoLevel) { OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); std::atomic &InfoLevel = getInfoLevelInternal(); InfoLevel.store(NewInfoLevel); - for (auto &R : PM->pluginAdaptors()) - R.set_info_flag(NewInfoLevel); } EXTERN int __tgt_print_device_info(int64_t DeviceId) { diff --git a/offload/tools/kernelreplay/llvm-omp-kernel-replay.cpp b/offload/tools/kernelreplay/llvm-omp-kernel-replay.cpp index 761e04e4c7bbdb..1e9a6a84d80583 100644 --- a/offload/tools/kernelreplay/llvm-omp-kernel-replay.cpp +++ b/offload/tools/kernelreplay/llvm-omp-kernel-replay.cpp @@ -13,8 +13,6 @@ #include "omptarget.h" -#include "Shared/PluginAPI.h" - #include "llvm/Support/CommandLine.h" #include "llvm/Support/JSON.h" #include "llvm/Support/MemoryBuffer.h" diff --git a/offload/unittests/Plugins/NextgenPluginsTest.cpp b/offload/unittests/Plugins/NextgenPluginsTest.cpp index 635bd1637c9032..479b3f614aed2b 100644 --- a/offload/unittests/Plugins/NextgenPluginsTest.cpp +++ b/offload/unittests/Plugins/NextgenPluginsTest.cpp @@ -6,7 +6,6 @@ // //===----------------------------------------------------------------------===// -#include "Shared/PluginAPI.h" #include "omptarget.h" #include "gtest/gtest.h"