Skip to content

Commit

Permalink
merge internal development externally
Browse files Browse the repository at this point in the history
  • Loading branch information
searlmc1 committed May 11, 2024
2 parents 434d49e + 76c0736 commit ed15249
Show file tree
Hide file tree
Showing 29 changed files with 689 additions and 1,335 deletions.
2 changes: 1 addition & 1 deletion clang/test/Driver/linker-wrapper-image.c
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@

// OPENMP: define internal void @.omp_offloading.descriptor_reg() section ".text.startup" {
// OPENMP-NEXT: entry:
// OPENMP-NEXT: %0 = call i32 @atexit(ptr @.omp_offloading.descriptor_unreg)
// OPENMP-NEXT: call void @__tgt_register_lib(ptr @.omp_offloading.descriptor)
// OPENMP-NEXT: %0 = call i32 @atexit(ptr @.omp_offloading.descriptor_unreg)
// OPENMP-NEXT: ret void
// OPENMP-NEXT: }

Expand Down
7 changes: 4 additions & 3 deletions llvm/lib/Frontend/Offloading/OffloadWrapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -232,12 +232,13 @@ void createRegisterFunction(Module &M, GlobalVariable *BinDesc,
// Construct function body
IRBuilder<> Builder(BasicBlock::Create(C, "entry", Func));

Builder.CreateCall(RegFuncC, BinDesc);

// Register the destructors with 'atexit'. This is expected by the CUDA
// runtime and ensures that we clean up before dynamic objects are destroyed.
// This needs to be done before the runtime is called and registers its own.
// This needs to be done after plugin initialization to ensure that it is
// called before the plugin runtime is destroyed.
Builder.CreateCall(AtExit, UnregFunc);

Builder.CreateCall(RegFuncC, BinDesc);
Builder.CreateRetVoid();

// Add this function to constructors.
Expand Down
61 changes: 16 additions & 45 deletions offload/include/PluginManager.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,11 @@
#ifndef OMPTARGET_PLUGIN_MANAGER_H
#define OMPTARGET_PLUGIN_MANAGER_H

#include "PluginInterface.h"

#include "DeviceImage.h"
#include "ExclusiveAccess.h"
#include "Shared/APITypes.h"
#include "Shared/PluginAPI.h"
#include "Shared/Requirements.h"

#include "device.h"
Expand All @@ -34,38 +35,7 @@
#include <mutex>
#include <string>

struct PluginManager;

/// Plugin adaptors should be created via `PluginAdaptorTy::create` which will
/// invoke the constructor and call `PluginAdaptorTy::init`. Eventual errors are
/// reported back to the caller, otherwise a valid and initialized adaptor is
/// returned.
struct PluginAdaptorTy {
/// Try to create a plugin adaptor from a filename.
static llvm::Expected<std::unique_ptr<PluginAdaptorTy>>
create(const std::string &Name);

/// Name of the shared object file representing the plugin.
std::string Name;

/// Access to the shared object file representing the plugin.
std::unique_ptr<llvm::sys::DynamicLibrary> LibraryHandler;

#define PLUGIN_API_HANDLE(NAME) \
using NAME##_ty = decltype(__tgt_rtl_##NAME); \
NAME##_ty *NAME = nullptr;

#include "Shared/PluginAPI.inc"
#undef PLUGIN_API_HANDLE

/// Create a plugin adaptor for filename \p Name with a dynamic library \p DL.
PluginAdaptorTy(const std::string &Name,
std::unique_ptr<llvm::sys::DynamicLibrary> DL);

/// Initialize the plugin adaptor, this can fail in which case the adaptor is
/// useless.
llvm::Error init();
};
using GenericPluginTy = llvm::omp::target::plugin::GenericPluginTy;

/// Struct for the data required to handle plugins
struct PluginManager {
Expand All @@ -80,6 +50,8 @@ struct PluginManager {

void init();

void deinit();

// Register a shared library with all (compatible) RTLs.
void registerLib(__tgt_bin_desc *Desc);

Expand All @@ -92,10 +64,9 @@ struct PluginManager {
std::make_unique<DeviceImageTy>(TgtBinDesc, TgtDeviceImage));
}

/// Initialize as many devices as possible for this plugin adaptor. Devices
/// that fail to initialize are ignored. Returns the offset the devices were
/// registered at.
void initDevices(PluginAdaptorTy &RTL);
/// Initialize as many devices as possible for this plugin. Devices that fail
/// to initialize are ignored.
void initDevices(GenericPluginTy &RTL);

/// Return the device presented to the user as device \p DeviceNo if it is
/// initialized and ready. Otherwise return an error explaining the problem.
Expand Down Expand Up @@ -151,8 +122,8 @@ struct PluginManager {
// Initialize all plugins.
void initAllPlugins();

/// Iterator range for all plugin adaptors (in use or not, but always valid).
auto pluginAdaptors() { return llvm::make_pointee_range(PluginAdaptors); }
/// Iterator range for all plugins (in use or not, but always valid).
auto plugins() { return llvm::make_pointee_range(Plugins); }

/// Return the user provided requirements.
int64_t getRequirements() const { return Requirements.getRequirements(); }
Expand All @@ -164,14 +135,14 @@ struct PluginManager {
bool RTLsLoaded = false;
llvm::SmallVector<__tgt_bin_desc *> DelayedBinDesc;

// List of all plugin adaptors, in use or not.
llvm::SmallVector<std::unique_ptr<PluginAdaptorTy>> PluginAdaptors;
// List of all plugins, in use or not.
llvm::SmallVector<std::unique_ptr<GenericPluginTy>> Plugins;

// Mapping of plugin adaptors to offsets in the device table.
llvm::DenseMap<const PluginAdaptorTy *, int32_t> DeviceOffsets;
// Mapping of plugins to offsets in the device table.
llvm::DenseMap<const GenericPluginTy *, int32_t> DeviceOffsets;

// Mapping of plugin adaptors to the number of used devices.
llvm::DenseMap<const PluginAdaptorTy *, int32_t> DeviceUsed;
// Mapping of plugins to the number of used devices.
llvm::DenseMap<const GenericPluginTy *, int32_t> DeviceUsed;

// Set of all device images currently in use.
llvm::DenseSet<const __tgt_device_image *> UsedImages;
Expand Down
9 changes: 6 additions & 3 deletions offload/include/device.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,14 +33,17 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"

#include "PluginInterface.h"

using GenericPluginTy = llvm::omp::target::plugin::GenericPluginTy;

// Forward declarations.
struct PluginAdaptorTy;
struct __tgt_bin_desc;
struct __tgt_target_table;

struct DeviceTy {
int32_t DeviceID;
PluginAdaptorTy *RTL;
GenericPluginTy *RTL;
int32_t RTLDeviceID;
/// The physical number of processors that may concurrently execute a team
/// For cuda, this is number of SMs, for amdgcn, this is number of CUs.
Expand All @@ -51,7 +54,7 @@ struct DeviceTy {
/// Controlled via environment flag OMPX_FORCE_SYNC_REGIONS
bool ForceSynchronousTargetRegions = false;

DeviceTy(PluginAdaptorTy *RTL, int32_t DeviceID, int32_t RTLDeviceID);
DeviceTy(GenericPluginTy *RTL, int32_t DeviceID, int32_t RTLDeviceID);
// DeviceTy is not copyable
DeviceTy(const DeviceTy &D) = delete;
DeviceTy &operator=(const DeviceTy &D) = delete;
Expand Down
22 changes: 6 additions & 16 deletions offload/plugins-nextgen/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
set(common_dir ${CMAKE_CURRENT_SOURCE_DIR}/common)
add_subdirectory(common)
function(add_target_library target_name lib_name)
add_llvm_library(${target_name} SHARED
add_llvm_library(${target_name} STATIC
LINK_COMPONENTS
${LLVM_TARGETS_TO_BUILD}
AggressiveInstCombine
Expand Down Expand Up @@ -45,27 +45,17 @@ function(add_target_library target_name lib_name)
)

llvm_update_compile_flags(${target_name})
target_include_directories(${target_name} PUBLIC ${common_dir}/include)
if(OMPT_TARGET_DEFAULT AND LIBOMPTARGET_OMPT_SUPPORT)
target_include_directories(${target_name} PUBLIC ${common_dir}/OMPT)
endif()
target_link_libraries(${target_name} PRIVATE
PluginCommon ${llvm_libs} ${OPENMP_PTHREAD_LIB})

target_compile_definitions(${target_name} PRIVATE TARGET_NAME=${lib_name})
target_compile_definitions(${target_name} PRIVATE
DEBUG_PREFIX="TARGET ${lib_name} RTL")

if(CMAKE_SYSTEM_NAME MATCHES "FreeBSD")
# On FreeBSD, the 'environ' symbol is undefined at link time, but resolved by
# the dynamic linker at runtime. Therefore, allow the symbol to be undefined
# when creating a shared library.
target_link_libraries(${target_name} PRIVATE "-Wl,--allow-shlib-undefined")
else()
target_link_libraries(${target_name} PRIVATE "-Wl,-z,defs")
endif()

if(LIBOMP_HAVE_VERSION_SCRIPT_FLAG)
target_link_libraries(${target_name} PRIVATE
"-Wl,--version-script=${common_dir}/../exports")
endif()
set_target_properties(${target_name} PROPERTIES CXX_VISIBILITY_PRESET protected)
set_target_properties(${target_name} PROPERTIES POSITION_INDEPENDENT_CODE ON)
endfunction()

foreach(plugin IN LISTS LIBOMPTARGET_PLUGINS_TO_BUILD)
Expand Down
11 changes: 0 additions & 11 deletions offload/plugins-nextgen/amdgpu/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -76,14 +76,3 @@ else()
libomptarget_say("Not generating AMDGPU tests, no supported devices detected."
" Use 'LIBOMPTARGET_FORCE_AMDGPU_TESTS' to override.")
endif()

# Install plugin under the lib destination folder.
install(TARGETS omptarget.rtl.amdgpu LIBRARY DESTINATION "${OFFLOAD_INSTALL_LIBDIR}")
if(NOT DEFINED CMAKE_INSTALL_RPATH)
set_target_properties(omptarget.rtl.amdgpu PROPERTIES
INSTALL_RPATH "$ORIGIN")
endif()

set_target_properties(omptarget.rtl.amdgpu PROPERTIES
BUILD_RPATH "$ORIGIN:${CMAKE_CURRENT_BINARY_DIR}/.."
CXX_VISIBILITY_PRESET protected)
29 changes: 15 additions & 14 deletions offload/plugins-nextgen/amdgpu/src/rtl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -688,7 +688,7 @@ struct AMDGPUKernelTy : public GenericKernelTy {
WGSizeName += "_wg_size";
GlobalTy HostConstWGSize(WGSizeName, sizeof(decltype(ConstWGSize)),
&ConstWGSize);
GenericGlobalHandlerTy &GHandler = PluginTy::get().getGlobalHandler();
GenericGlobalHandlerTy &GHandler = Device.Plugin.getGlobalHandler();
if (auto Err =
GHandler.readGlobalFromImage(Device, AMDImage, HostConstWGSize)) {
// In case it is not found, we simply stick with the defaults.
Expand Down Expand Up @@ -2911,7 +2911,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
if (!AMDGPUKernel)
return Plugin::error("Failed to allocate memory for AMDGPU kernel");

new (AMDGPUKernel) AMDGPUKernelTy(Name, PluginTy::get().getGlobalHandler());
new (AMDGPUKernel) AMDGPUKernelTy(Name, Plugin.getGlobalHandler());

return *AMDGPUKernel;
}
Expand Down Expand Up @@ -4274,10 +4274,6 @@ struct AMDGPUPluginTy final : public GenericPluginTy {
UInt32Envar KernTrace("LIBOMPTARGET_KERNEL_TRACE", 0);
llvm::omp::target::plugin::PrintKernelTrace = KernTrace.get();

#ifdef OMPT_SUPPORT
ompt::connectLibrary();
#endif

// Register event handler to detect memory errors on the devices.
Status = hsa_amd_register_system_event_handler(eventHandler, nullptr);
if (auto Err = Plugin::check(
Expand Down Expand Up @@ -4366,6 +4362,8 @@ struct AMDGPUPluginTy final : public GenericPluginTy {

Triple::ArchType getTripleArch() const override { return Triple::amdgcn; }

const char *getName() const override { return GETNAME(TARGET_NAME); }

/// Get the ELF code for recognizing the compatible image binary.
uint16_t getMagicElfBits() const override { return ELF::EM_AMDGPU; }

Expand Down Expand Up @@ -4685,8 +4683,6 @@ Error AMDGPUKernelTy::printLaunchInfoDetails(GenericDeviceTy &GenericDevice,
return Plugin::success();
}

GenericPluginTy *PluginTy::createPlugin() { return new AMDGPUPluginTy(); }

template <typename... ArgsTy>
static Error Plugin::check(int32_t Code, const char *ErrFmt, ArgsTy... Args) {
hsa_status_t ResultCode = static_cast<hsa_status_t>(Code);
Expand Down Expand Up @@ -4779,17 +4775,22 @@ void *AMDGPUDeviceTy::allocate(size_t Size, void *, TargetAllocTy Kind) {
namespace llvm::omp::target::plugin {

/// Enable/disable kernel profiling for the given device.
void setOmptQueueProfile(int DeviceId, int Enable) {
AMDGPUPluginTy &Plugin = PluginTy::get<AMDGPUPluginTy>();
static_cast<AMDGPUDeviceTy &>(Plugin.getDevice(DeviceId))
.setOmptQueueProfile(Enable);
void setOmptQueueProfile(void *Device, int Enable) {
reinterpret_cast<llvm::omp::target::plugin::AMDGPUDeviceTy *>(Device)
->setOmptQueueProfile(Enable);
}

} // namespace llvm::omp::target::plugin

/// Enable/disable kernel profiling for the given device.
void setGlobalOmptKernelProfile(int DeviceId, int Enable) {
llvm::omp::target::plugin::setOmptQueueProfile(DeviceId, Enable);
void setGlobalOmptKernelProfile(void *Device, int Enable) {
llvm::omp::target::plugin::setOmptQueueProfile(Device, Enable);
}

#endif

extern "C" {
llvm::omp::target::plugin::GenericPluginTy *createPlugin_amdgpu() {
return new llvm::omp::target::plugin::AMDGPUPluginTy();
}
}
4 changes: 1 addition & 3 deletions offload/plugins-nextgen/common/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,4 @@ target_include_directories(PluginCommon PUBLIC
${LIBOMPTARGET_INCLUDE_DIR}
)

set_target_properties(PluginCommon PROPERTIES
POSITION_INDEPENDENT_CODE ON
CXX_VISIBILITY_PRESET protected)
set_target_properties(PluginCommon PROPERTIES POSITION_INDEPENDENT_CODE ON)
2 changes: 1 addition & 1 deletion offload/plugins-nextgen/common/OMPT/OmptDeviceTracing.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
#define DEBUG_PREFIX "OMPT"

extern void setOmptAsyncCopyProfile(bool Enable);
extern void setGlobalOmptKernelProfile(int DeviceId, int Enable);
extern void setGlobalOmptKernelProfile(void *Device, int Enable);
extern uint64_t getSystemTimestampInNs();

namespace llvm {
Expand Down
12 changes: 2 additions & 10 deletions offload/plugins-nextgen/common/OMPT/OmptTracing.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,14 +53,6 @@ double llvm::omp::target::ompt::HostToDeviceOffset = .0;

std::map<ompt_device_t *, int32_t> llvm::omp::target::ompt::Devices;

std::atomic<uint64_t> llvm::omp::target::ompt::TracingTypesEnabled{0};

bool llvm::omp::target::ompt::TracingActive = false;

void llvm::omp::target::ompt::setTracingState(bool State) {
TracingActive = State;
}

std::shared_ptr<llvm::sys::DynamicLibrary>
llvm::omp::target::ompt::getParentLibrary() {
static bool ParentLibraryAssigned = false;
Expand Down Expand Up @@ -141,7 +133,7 @@ ompt_start_trace(ompt_device_t *Device, ompt_callback_buffer_request_t Request,
setOmptAsyncCopyProfile(/*Enable=*/true);
// Enable queue dispatch profiling
if (DeviceId >= 0)
setGlobalOmptKernelProfile(DeviceId, /*Enable=*/1);
setGlobalOmptKernelProfile(Device, /*Enable=*/1);
else
REPORT("May not enable kernel profiling for invalid device id=%d\n",
DeviceId);
Expand Down Expand Up @@ -179,7 +171,7 @@ OMPT_API_ROUTINE int ompt_stop_trace(ompt_device_t *Device) {
// Disable queue dispatch profiling
int DeviceId = getDeviceId(Device);
if (DeviceId >= 0)
setGlobalOmptKernelProfile(DeviceId, /*Enable=*/0);
setGlobalOmptKernelProfile(Device, /*Enable=*/0);
else
REPORT("May not disable kernel profiling for invalid device id=%d\n",
DeviceId);
Expand Down
Loading

0 comments on commit ed15249

Please sign in to comment.