Skip to content

Commit

Permalink
[libomptarget] [OMPT] Fixed return address computation for OMPT event…
Browse files Browse the repository at this point in the history
…s. (llvm#80498)

Currently, __builtin_return_address is used to generate the return
address when the callback invoker is created. However, this may result
in the return address pointing to an internal runtime function. This is
not what a tool would typically want. A tool would want to know the
corresponding user code from where the runtime entry point is invoked.

This change adds a thread local variable that is assigned the return
address at the OpenMP runtime entry points. An RAII is used to manage
the modifications to the thread local variable. Whenever the return
address is required for OMPT events, it is read from the thread local
variable.
  • Loading branch information
dhruvachak authored Feb 8, 2024
1 parent 4520b47 commit 0d7f232
Show file tree
Hide file tree
Showing 11 changed files with 275 additions and 30 deletions.
38 changes: 37 additions & 1 deletion openmp/libomptarget/include/OpenMP/OMPT/Interface.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
#include "llvm/Support/ErrorHandling.h"

#define OMPT_IF_BUILT(stmt) stmt
#define OMPT_GET_RETURN_ADDRESS(level) __builtin_return_address(level)

/// Callbacks for target regions require task_data representing the
/// encountering task.
Expand Down Expand Up @@ -211,6 +210,11 @@ class Interface {
/// Thread local state for target region and associated metadata
extern thread_local Interface RegionInterface;

/// Thread local variable holding the return address.
/// When using __builtin_return_address to set the return address,
/// allow 0 as the only argument to avoid unpredictable effects.
extern thread_local void *ReturnAddress;

template <typename FuncTy, typename ArgsTy, size_t... IndexSeq>
void InvokeInterfaceFunction(FuncTy Func, ArgsTy Args,
std::index_sequence<IndexSeq...>) {
Expand Down Expand Up @@ -249,10 +253,42 @@ template <typename CallbackPairTy, typename... ArgsTy>
InterfaceRAII(CallbackPairTy Callbacks, ArgsTy... Args)
-> InterfaceRAII<CallbackPairTy, ArgsTy...>;

/// Used to set and reset the thread-local return address. The RAII is expected
/// to be created at a runtime entry point when the return address should be
/// null. If so, the return address is set and \p IsSetter is set in the ctor.
/// The dtor resets the return address only if the corresponding object set it.
/// So if the RAII is called from a nested runtime function, the ctor/dtor will
/// do nothing since the thread local return address is already set.
class ReturnAddressSetterRAII {
public:
ReturnAddressSetterRAII(void *RA) : IsSetter(false) {
// Handle nested calls. If already set, do not set again since it
// must be in a nested call.
if (ReturnAddress == nullptr) {
// Store the return address to a thread local variable.
ReturnAddress = RA;
IsSetter = true;
}
}
~ReturnAddressSetterRAII() {
// Reset the return address if this object set it.
if (IsSetter)
ReturnAddress = nullptr;
}

private:
// Did this object set the thread-local return address?
bool IsSetter;
};

} // namespace ompt
} // namespace target
} // namespace omp
} // namespace llvm

// The getter returns the address stored in the thread local variable.
#define OMPT_GET_RETURN_ADDRESS llvm::omp::target::ompt::ReturnAddress

#else
#define OMPT_IF_BUILT(stmt)
#endif
Expand Down
28 changes: 20 additions & 8 deletions openmp/libomptarget/src/LegacyAPI.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,21 @@
//
//===----------------------------------------------------------------------===//

#include "OpenMP/OMPT/Interface.h"
#include "omptarget.h"
#include "private.h"

#include "Shared/Profile.h"

#ifdef OMPT_SUPPORT
using namespace llvm::omp::target::ompt;
#endif

EXTERN void __tgt_target_data_begin(int64_t DeviceId, int32_t ArgNum,
void **ArgsBase, void **Args,
int64_t *ArgSizes, int64_t *ArgTypes) {
TIMESCOPE();
OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
__tgt_target_data_begin_mapper(nullptr, DeviceId, ArgNum, ArgsBase, Args,
ArgSizes, ArgTypes, nullptr, nullptr);
}
Expand All @@ -30,7 +36,7 @@ EXTERN void __tgt_target_data_begin_nowait(int64_t DeviceId, int32_t ArgNum,
int32_t NoAliasDepNum,
void *NoAliasDepList) {
TIMESCOPE();

OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
__tgt_target_data_begin_mapper(nullptr, DeviceId, ArgNum, ArgsBase, Args,
ArgSizes, ArgTypes, nullptr, nullptr);
}
Expand All @@ -39,6 +45,7 @@ EXTERN void __tgt_target_data_end(int64_t DeviceId, int32_t ArgNum,
void **ArgsBase, void **Args,
int64_t *ArgSizes, int64_t *ArgTypes) {
TIMESCOPE();
OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
__tgt_target_data_end_mapper(nullptr, DeviceId, ArgNum, ArgsBase, Args,
ArgSizes, ArgTypes, nullptr, nullptr);
}
Expand All @@ -47,6 +54,7 @@ EXTERN void __tgt_target_data_update(int64_t DeviceId, int32_t ArgNum,
void **ArgsBase, void **Args,
int64_t *ArgSizes, int64_t *ArgTypes) {
TIMESCOPE();
OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
__tgt_target_data_update_mapper(nullptr, DeviceId, ArgNum, ArgsBase, Args,
ArgSizes, ArgTypes, nullptr, nullptr);
}
Expand All @@ -56,7 +64,7 @@ EXTERN void __tgt_target_data_update_nowait(
int64_t *ArgSizes, int64_t *ArgTypes, int32_t DepNum, void *DepList,
int32_t NoAliasDepNum, void *NoAliasDepList) {
TIMESCOPE();

OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
__tgt_target_data_update_mapper(nullptr, DeviceId, ArgNum, ArgsBase, Args,
ArgSizes, ArgTypes, nullptr, nullptr);
}
Expand All @@ -68,7 +76,7 @@ EXTERN void __tgt_target_data_end_nowait(int64_t DeviceId, int32_t ArgNum,
int32_t NoAliasDepNum,
void *NoAliasDepList) {
TIMESCOPE();

OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
__tgt_target_data_end_mapper(nullptr, DeviceId, ArgNum, ArgsBase, Args,
ArgSizes, ArgTypes, nullptr, nullptr);
}
Expand All @@ -78,6 +86,7 @@ EXTERN int __tgt_target_mapper(ident_t *Loc, int64_t DeviceId, void *HostPtr,
int64_t *ArgSizes, int64_t *ArgTypes,
map_var_info_t *ArgNames, void **ArgMappers) {
TIMESCOPE_WITH_IDENT(Loc);
OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
KernelArgsTy KernelArgs{1, ArgNum, ArgsBase, Args, ArgSizes,
ArgTypes, ArgNames, ArgMappers, 0};
return __tgt_target_kernel(Loc, DeviceId, -1, -1, HostPtr, &KernelArgs);
Expand All @@ -87,6 +96,7 @@ EXTERN int __tgt_target(int64_t DeviceId, void *HostPtr, int32_t ArgNum,
void **ArgsBase, void **Args, int64_t *ArgSizes,
int64_t *ArgTypes) {
TIMESCOPE();
OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
return __tgt_target_mapper(nullptr, DeviceId, HostPtr, ArgNum, ArgsBase, Args,
ArgSizes, ArgTypes, nullptr, nullptr);
}
Expand All @@ -96,7 +106,7 @@ EXTERN int __tgt_target_nowait(int64_t DeviceId, void *HostPtr, int32_t ArgNum,
int64_t *ArgTypes, int32_t DepNum, void *DepList,
int32_t NoAliasDepNum, void *NoAliasDepList) {
TIMESCOPE();

OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
return __tgt_target_mapper(nullptr, DeviceId, HostPtr, ArgNum, ArgsBase, Args,
ArgSizes, ArgTypes, nullptr, nullptr);
}
Expand All @@ -107,7 +117,7 @@ EXTERN int __tgt_target_nowait_mapper(
map_var_info_t *ArgNames, void **ArgMappers, int32_t DepNum, void *DepList,
int32_t NoAliasDepNum, void *NoAliasDepList) {
TIMESCOPE_WITH_IDENT(Loc);

OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
return __tgt_target_mapper(Loc, DeviceId, HostPtr, ArgNum, ArgsBase, Args,
ArgSizes, ArgTypes, ArgNames, ArgMappers);
}
Expand All @@ -120,7 +130,7 @@ EXTERN int __tgt_target_teams_mapper(ident_t *Loc, int64_t DeviceId,
void **ArgMappers, int32_t NumTeams,
int32_t ThreadLimit) {
TIMESCOPE_WITH_IDENT(Loc);

OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
KernelArgsTy KernelArgs{1, ArgNum, ArgsBase, Args, ArgSizes,
ArgTypes, ArgNames, ArgMappers, 0};
return __tgt_target_kernel(Loc, DeviceId, NumTeams, ThreadLimit, HostPtr,
Expand All @@ -132,6 +142,7 @@ EXTERN int __tgt_target_teams(int64_t DeviceId, void *HostPtr, int32_t ArgNum,
int64_t *ArgTypes, int32_t NumTeams,
int32_t ThreadLimit) {
TIMESCOPE();
OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
return __tgt_target_teams_mapper(nullptr, DeviceId, HostPtr, ArgNum, ArgsBase,
Args, ArgSizes, ArgTypes, nullptr, nullptr,
NumTeams, ThreadLimit);
Expand All @@ -145,7 +156,7 @@ EXTERN int __tgt_target_teams_nowait(int64_t DeviceId, void *HostPtr,
void *DepList, int32_t NoAliasDepNum,
void *NoAliasDepList) {
TIMESCOPE();

OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
return __tgt_target_teams_mapper(nullptr, DeviceId, HostPtr, ArgNum, ArgsBase,
Args, ArgSizes, ArgTypes, nullptr, nullptr,
NumTeams, ThreadLimit);
Expand All @@ -158,7 +169,7 @@ EXTERN int __tgt_target_teams_nowait_mapper(
int32_t ThreadLimit, int32_t DepNum, void *DepList, int32_t NoAliasDepNum,
void *NoAliasDepList) {
TIMESCOPE_WITH_IDENT(Loc);

OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
return __tgt_target_teams_mapper(Loc, DeviceId, HostPtr, ArgNum, ArgsBase,
Args, ArgSizes, ArgTypes, ArgNames,
ArgMappers, NumTeams, ThreadLimit);
Expand All @@ -182,6 +193,7 @@ EXTERN int __tgt_target_kernel_nowait(ident_t *Loc, int64_t DeviceId,
int32_t NoAliasDepNum,
void *NoAliasDepList) {
TIMESCOPE_WITH_IDENT(Loc);
OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
return __tgt_target_kernel(Loc, DeviceId, NumTeams, ThreadLimit, HostPtr,
KernelArgs);
}
Loading

0 comments on commit 0d7f232

Please sign in to comment.