diff --git a/openmp/libomptarget/include/OpenMP/OMPT/Interface.h b/openmp/libomptarget/include/OpenMP/OMPT/Interface.h index ed34cbed785bcf..13eca730a9295d 100644 --- a/openmp/libomptarget/include/OpenMP/OMPT/Interface.h +++ b/openmp/libomptarget/include/OpenMP/OMPT/Interface.h @@ -24,7 +24,6 @@ #include "llvm/Support/ErrorHandling.h" #define OMPT_IF_BUILT(stmt) stmt -#define OMPT_GET_RETURN_ADDRESS(level) __builtin_return_address(level) /// Callbacks for target regions require task_data representing the /// encountering task. @@ -211,6 +210,11 @@ class Interface { /// Thread local state for target region and associated metadata extern thread_local Interface RegionInterface; +/// Thread local variable holding the return address. +/// When using __builtin_return_address to set the return address, +/// allow 0 as the only argument to avoid unpredictable effects. +extern thread_local void *ReturnAddress; + template void InvokeInterfaceFunction(FuncTy Func, ArgsTy Args, std::index_sequence) { @@ -249,10 +253,42 @@ template InterfaceRAII(CallbackPairTy Callbacks, ArgsTy... Args) -> InterfaceRAII; +/// Used to set and reset the thread-local return address. The RAII is expected +/// to be created at a runtime entry point when the return address should be +/// null. If so, the return address is set and \p IsSetter is set in the ctor. +/// The dtor resets the return address only if the corresponding object set it. +/// So if the RAII is called from a nested runtime function, the ctor/dtor will +/// do nothing since the thread local return address is already set. +class ReturnAddressSetterRAII { +public: + ReturnAddressSetterRAII(void *RA) : IsSetter(false) { + // Handle nested calls. If already set, do not set again since it + // must be in a nested call. + if (ReturnAddress == nullptr) { + // Store the return address to a thread local variable. + ReturnAddress = RA; + IsSetter = true; + } + } + ~ReturnAddressSetterRAII() { + // Reset the return address if this object set it. + if (IsSetter) + ReturnAddress = nullptr; + } + +private: + // Did this object set the thread-local return address? + bool IsSetter; +}; + } // namespace ompt } // namespace target } // namespace omp } // namespace llvm + +// The getter returns the address stored in the thread local variable. +#define OMPT_GET_RETURN_ADDRESS llvm::omp::target::ompt::ReturnAddress + #else #define OMPT_IF_BUILT(stmt) #endif diff --git a/openmp/libomptarget/src/LegacyAPI.cpp b/openmp/libomptarget/src/LegacyAPI.cpp index d0f21a36513ada..91d5642e811280 100644 --- a/openmp/libomptarget/src/LegacyAPI.cpp +++ b/openmp/libomptarget/src/LegacyAPI.cpp @@ -10,15 +10,21 @@ // //===----------------------------------------------------------------------===// +#include "OpenMP/OMPT/Interface.h" #include "omptarget.h" #include "private.h" #include "Shared/Profile.h" +#ifdef OMPT_SUPPORT +using namespace llvm::omp::target::ompt; +#endif + EXTERN void __tgt_target_data_begin(int64_t DeviceId, int32_t ArgNum, void **ArgsBase, void **Args, int64_t *ArgSizes, int64_t *ArgTypes) { TIMESCOPE(); + OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); __tgt_target_data_begin_mapper(nullptr, DeviceId, ArgNum, ArgsBase, Args, ArgSizes, ArgTypes, nullptr, nullptr); } @@ -30,7 +36,7 @@ EXTERN void __tgt_target_data_begin_nowait(int64_t DeviceId, int32_t ArgNum, int32_t NoAliasDepNum, void *NoAliasDepList) { TIMESCOPE(); - + OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); __tgt_target_data_begin_mapper(nullptr, DeviceId, ArgNum, ArgsBase, Args, ArgSizes, ArgTypes, nullptr, nullptr); } @@ -39,6 +45,7 @@ EXTERN void __tgt_target_data_end(int64_t DeviceId, int32_t ArgNum, void **ArgsBase, void **Args, int64_t *ArgSizes, int64_t *ArgTypes) { TIMESCOPE(); + OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); __tgt_target_data_end_mapper(nullptr, DeviceId, ArgNum, ArgsBase, Args, ArgSizes, ArgTypes, nullptr, nullptr); } @@ -47,6 +54,7 @@ EXTERN void __tgt_target_data_update(int64_t DeviceId, int32_t ArgNum, void **ArgsBase, void **Args, int64_t *ArgSizes, int64_t *ArgTypes) { TIMESCOPE(); + OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); __tgt_target_data_update_mapper(nullptr, DeviceId, ArgNum, ArgsBase, Args, ArgSizes, ArgTypes, nullptr, nullptr); } @@ -56,7 +64,7 @@ EXTERN void __tgt_target_data_update_nowait( int64_t *ArgSizes, int64_t *ArgTypes, int32_t DepNum, void *DepList, int32_t NoAliasDepNum, void *NoAliasDepList) { TIMESCOPE(); - + OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); __tgt_target_data_update_mapper(nullptr, DeviceId, ArgNum, ArgsBase, Args, ArgSizes, ArgTypes, nullptr, nullptr); } @@ -68,7 +76,7 @@ EXTERN void __tgt_target_data_end_nowait(int64_t DeviceId, int32_t ArgNum, int32_t NoAliasDepNum, void *NoAliasDepList) { TIMESCOPE(); - + OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); __tgt_target_data_end_mapper(nullptr, DeviceId, ArgNum, ArgsBase, Args, ArgSizes, ArgTypes, nullptr, nullptr); } @@ -78,6 +86,7 @@ EXTERN int __tgt_target_mapper(ident_t *Loc, int64_t DeviceId, void *HostPtr, int64_t *ArgSizes, int64_t *ArgTypes, map_var_info_t *ArgNames, void **ArgMappers) { TIMESCOPE_WITH_IDENT(Loc); + OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); KernelArgsTy KernelArgs{1, ArgNum, ArgsBase, Args, ArgSizes, ArgTypes, ArgNames, ArgMappers, 0}; return __tgt_target_kernel(Loc, DeviceId, -1, -1, HostPtr, &KernelArgs); @@ -87,6 +96,7 @@ EXTERN int __tgt_target(int64_t DeviceId, void *HostPtr, int32_t ArgNum, void **ArgsBase, void **Args, int64_t *ArgSizes, int64_t *ArgTypes) { TIMESCOPE(); + OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); return __tgt_target_mapper(nullptr, DeviceId, HostPtr, ArgNum, ArgsBase, Args, ArgSizes, ArgTypes, nullptr, nullptr); } @@ -96,7 +106,7 @@ EXTERN int __tgt_target_nowait(int64_t DeviceId, void *HostPtr, int32_t ArgNum, int64_t *ArgTypes, int32_t DepNum, void *DepList, int32_t NoAliasDepNum, void *NoAliasDepList) { TIMESCOPE(); - + OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); return __tgt_target_mapper(nullptr, DeviceId, HostPtr, ArgNum, ArgsBase, Args, ArgSizes, ArgTypes, nullptr, nullptr); } @@ -107,7 +117,7 @@ EXTERN int __tgt_target_nowait_mapper( map_var_info_t *ArgNames, void **ArgMappers, int32_t DepNum, void *DepList, int32_t NoAliasDepNum, void *NoAliasDepList) { TIMESCOPE_WITH_IDENT(Loc); - + OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); return __tgt_target_mapper(Loc, DeviceId, HostPtr, ArgNum, ArgsBase, Args, ArgSizes, ArgTypes, ArgNames, ArgMappers); } @@ -120,7 +130,7 @@ EXTERN int __tgt_target_teams_mapper(ident_t *Loc, int64_t DeviceId, void **ArgMappers, int32_t NumTeams, int32_t ThreadLimit) { TIMESCOPE_WITH_IDENT(Loc); - + OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); KernelArgsTy KernelArgs{1, ArgNum, ArgsBase, Args, ArgSizes, ArgTypes, ArgNames, ArgMappers, 0}; return __tgt_target_kernel(Loc, DeviceId, NumTeams, ThreadLimit, HostPtr, @@ -132,6 +142,7 @@ EXTERN int __tgt_target_teams(int64_t DeviceId, void *HostPtr, int32_t ArgNum, int64_t *ArgTypes, int32_t NumTeams, int32_t ThreadLimit) { TIMESCOPE(); + OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); return __tgt_target_teams_mapper(nullptr, DeviceId, HostPtr, ArgNum, ArgsBase, Args, ArgSizes, ArgTypes, nullptr, nullptr, NumTeams, ThreadLimit); @@ -145,7 +156,7 @@ EXTERN int __tgt_target_teams_nowait(int64_t DeviceId, void *HostPtr, void *DepList, int32_t NoAliasDepNum, void *NoAliasDepList) { TIMESCOPE(); - + OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); return __tgt_target_teams_mapper(nullptr, DeviceId, HostPtr, ArgNum, ArgsBase, Args, ArgSizes, ArgTypes, nullptr, nullptr, NumTeams, ThreadLimit); @@ -158,7 +169,7 @@ EXTERN int __tgt_target_teams_nowait_mapper( int32_t ThreadLimit, int32_t DepNum, void *DepList, int32_t NoAliasDepNum, void *NoAliasDepList) { TIMESCOPE_WITH_IDENT(Loc); - + OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); return __tgt_target_teams_mapper(Loc, DeviceId, HostPtr, ArgNum, ArgsBase, Args, ArgSizes, ArgTypes, ArgNames, ArgMappers, NumTeams, ThreadLimit); @@ -182,6 +193,7 @@ EXTERN int __tgt_target_kernel_nowait(ident_t *Loc, int64_t DeviceId, int32_t NoAliasDepNum, void *NoAliasDepList) { TIMESCOPE_WITH_IDENT(Loc); + OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); return __tgt_target_kernel(Loc, DeviceId, NumTeams, ThreadLimit, HostPtr, KernelArgs); } diff --git a/openmp/libomptarget/src/OpenMP/API.cpp b/openmp/libomptarget/src/OpenMP/API.cpp index 1ab1877774f6e1..85fb08c00a9a74 100644 --- a/openmp/libomptarget/src/OpenMP/API.cpp +++ b/openmp/libomptarget/src/OpenMP/API.cpp @@ -16,6 +16,7 @@ #include "rtl.h" #include "OpenMP/InternalTypes.h" +#include "OpenMP/OMPT/Interface.h" #include "OpenMP/omp.h" #include "Shared/Profile.h" @@ -26,6 +27,10 @@ #include #include +#ifdef OMPT_SUPPORT +using namespace llvm::omp::target::ompt; +#endif + void *targetAllocExplicit(size_t Size, int DeviceNum, int Kind, const char *Name); void targetFreeExplicit(void *DevicePtr, int DeviceNum, int Kind, @@ -59,6 +64,7 @@ int32_t __kmpc_omp_task_with_deps(ident_t *loc_ref, int32_t gtid, EXTERN int omp_get_num_devices(void) { TIMESCOPE(); + OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); size_t NumDevices = PM->getNumDevices(); DP("Call to omp_get_num_devices returning %zd\n", NumDevices); @@ -68,6 +74,7 @@ EXTERN int omp_get_num_devices(void) { EXTERN int omp_get_device_num(void) { TIMESCOPE(); + OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); int HostDevice = omp_get_initial_device(); DP("Call to omp_get_device_num returning %d\n", HostDevice); @@ -77,6 +84,7 @@ EXTERN int omp_get_device_num(void) { EXTERN int omp_get_initial_device(void) { TIMESCOPE(); + OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); int HostDevice = omp_get_num_devices(); DP("Call to omp_get_initial_device returning %d\n", HostDevice); return HostDevice; @@ -85,52 +93,70 @@ EXTERN int omp_get_initial_device(void) { EXTERN void *omp_target_alloc(size_t Size, int DeviceNum) { TIMESCOPE_WITH_DETAILS("dst_dev=" + std::to_string(DeviceNum) + ";size=" + std::to_string(Size)); + OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); return targetAllocExplicit(Size, DeviceNum, TARGET_ALLOC_DEFAULT, __func__); } EXTERN void *llvm_omp_target_alloc_device(size_t Size, int DeviceNum) { + OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); return targetAllocExplicit(Size, DeviceNum, TARGET_ALLOC_DEVICE, __func__); } EXTERN void *llvm_omp_target_alloc_host(size_t Size, int DeviceNum) { + OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); return targetAllocExplicit(Size, DeviceNum, TARGET_ALLOC_HOST, __func__); } EXTERN void *llvm_omp_target_alloc_shared(size_t Size, int DeviceNum) { + OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); return targetAllocExplicit(Size, DeviceNum, TARGET_ALLOC_SHARED, __func__); } EXTERN void omp_target_free(void *Ptr, int DeviceNum) { TIMESCOPE(); + OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); return targetFreeExplicit(Ptr, DeviceNum, TARGET_ALLOC_DEFAULT, __func__); } EXTERN void llvm_omp_target_free_device(void *Ptr, int DeviceNum) { + OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); return targetFreeExplicit(Ptr, DeviceNum, TARGET_ALLOC_DEVICE, __func__); } EXTERN void llvm_omp_target_free_host(void *Ptr, int DeviceNum) { + OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); return targetFreeExplicit(Ptr, DeviceNum, TARGET_ALLOC_HOST, __func__); } EXTERN void llvm_omp_target_free_shared(void *Ptre, int DeviceNum) { + OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); return targetFreeExplicit(Ptre, DeviceNum, TARGET_ALLOC_SHARED, __func__); } -EXTERN void *llvm_omp_target_dynamic_shared_alloc() { return nullptr; } -EXTERN void *llvm_omp_get_dynamic_shared() { return nullptr; } +EXTERN void *llvm_omp_target_dynamic_shared_alloc() { + OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); + return nullptr; +} + +EXTERN void *llvm_omp_get_dynamic_shared() { + OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); + return nullptr; +} EXTERN [[nodiscard]] void *llvm_omp_target_lock_mem(void *Ptr, size_t Size, int DeviceNum) { + OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); return targetLockExplicit(Ptr, Size, DeviceNum, __func__); } EXTERN void llvm_omp_target_unlock_mem(void *Ptr, int DeviceNum) { + OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); targetUnlockExplicit(Ptr, DeviceNum, __func__); } EXTERN int omp_target_is_present(const void *Ptr, int DeviceNum) { TIMESCOPE(); + OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); DP("Call to omp_target_is_present for device %d and address " DPxMOD "\n", DeviceNum, DPxPTR(Ptr)); @@ -167,6 +193,7 @@ EXTERN int omp_target_memcpy(void *Dst, const void *Src, size_t Length, TIMESCOPE_WITH_DETAILS("dst_dev=" + std::to_string(DstDevice) + ";src_dev=" + std::to_string(SrcDevice) + ";size=" + std::to_string(Length)); + OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); DP("Call to omp_target_memcpy, dst device %d, src device %d, " "dst addr " DPxMOD ", src addr " DPxMOD ", dst offset %zu, " "src offset %zu, length %zu\n", @@ -248,6 +275,7 @@ EXTERN int omp_target_memcpy(void *Dst, const void *Src, size_t Length, // The helper function that calls omp_target_memcpy or omp_target_memcpy_rect static int libomp_target_memcpy_async_task(int32_t Gtid, kmp_task_t *Task) { + OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); if (Task == nullptr) return OFFLOAD_FAIL; @@ -279,6 +307,7 @@ static int libomp_target_memcpy_async_task(int32_t Gtid, kmp_task_t *Task) { } static int libomp_target_memset_async_task(int32_t Gtid, kmp_task_t *Task) { + OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); if (!Task) return OFFLOAD_FAIL; @@ -307,6 +336,7 @@ template static inline int libomp_helper_task_creation(T *Args, int (*Fn)(int32_t, kmp_task_t *), int DepObjCount, omp_depend_t *DepObjList) { + OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); // Create global thread ID int Gtid = __kmpc_global_thread_num(nullptr); @@ -340,6 +370,7 @@ libomp_helper_task_creation(T *Args, int (*Fn)(int32_t, kmp_task_t *), EXTERN void *omp_target_memset(void *Ptr, int ByteVal, size_t NumBytes, int DeviceNum) { TIMESCOPE(); + OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); DP("Call to omp_target_memset, device %d, device pointer %p, size %zu\n", DeviceNum, Ptr, NumBytes); @@ -382,6 +413,7 @@ EXTERN void *omp_target_memset(void *Ptr, int ByteVal, size_t NumBytes, EXTERN void *omp_target_memset_async(void *Ptr, int ByteVal, size_t NumBytes, int DeviceNum, int DepObjCount, omp_depend_t *DepObjList) { + OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); DP("Call to omp_target_memset_async, device %d, device pointer %p, size %zu", DeviceNum, Ptr, NumBytes); @@ -408,6 +440,7 @@ EXTERN int omp_target_memcpy_async(void *Dst, const void *Src, size_t Length, TIMESCOPE_WITH_DETAILS("dst_dev=" + std::to_string(DstDevice) + ";src_dev=" + std::to_string(SrcDevice) + ";size=" + std::to_string(Length)); + OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); DP("Call to omp_target_memcpy_async, dst device %d, src device %d, " "dst addr " DPxMOD ", src addr " DPxMOD ", dst offset %zu, " "src offset %zu, length %zu\n", @@ -436,6 +469,7 @@ omp_target_memcpy_rect(void *Dst, const void *Src, size_t ElementSize, const size_t *DstOffsets, const size_t *SrcOffsets, const size_t *DstDimensions, const size_t *SrcDimensions, int DstDevice, int SrcDevice) { + OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); DP("Call to omp_target_memcpy_rect, dst device %d, src device %d, " "dst addr " DPxMOD ", src addr " DPxMOD ", dst offsets " DPxMOD ", " "src offsets " DPxMOD ", dst dims " DPxMOD ", src dims " DPxMOD ", " @@ -498,6 +532,7 @@ EXTERN int omp_target_memcpy_rect_async( ";src_dev=" + std::to_string(SrcDevice) + ";size=" + std::to_string(ElementSize) + ";num_dims=" + std::to_string(NumDims)); + OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); DP("Call to omp_target_memcpy_rect_async, dst device %d, src device %d, " "dst addr " DPxMOD ", src addr " DPxMOD ", dst offsets " DPxMOD ", " "src offsets " DPxMOD ", dst dims " DPxMOD ", src dims " DPxMOD ", " @@ -534,6 +569,7 @@ EXTERN int omp_target_associate_ptr(const void *HostPtr, const void *DevicePtr, size_t Size, size_t DeviceOffset, int DeviceNum) { TIMESCOPE(); + OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); DP("Call to omp_target_associate_ptr with host_ptr " DPxMOD ", " "device_ptr " DPxMOD ", size %zu, device_offset %zu, device_num %d\n", DPxPTR(HostPtr), DPxPTR(DevicePtr), Size, DeviceOffset, DeviceNum); @@ -561,6 +597,7 @@ EXTERN int omp_target_associate_ptr(const void *HostPtr, const void *DevicePtr, EXTERN int omp_target_disassociate_ptr(const void *HostPtr, int DeviceNum) { TIMESCOPE(); + OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); DP("Call to omp_target_disassociate_ptr with host_ptr " DPxMOD ", " "device_num %d\n", DPxPTR(HostPtr), DeviceNum); @@ -588,6 +625,7 @@ EXTERN int omp_target_disassociate_ptr(const void *HostPtr, int DeviceNum) { EXTERN void *omp_get_mapped_ptr(const void *Ptr, int DeviceNum) { TIMESCOPE(); + OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); DP("Call to omp_get_mapped_ptr with ptr " DPxMOD ", device_num %d.\n", DPxPTR(Ptr), DeviceNum); diff --git a/openmp/libomptarget/src/OpenMP/OMPT/Callback.cpp b/openmp/libomptarget/src/OpenMP/OMPT/Callback.cpp index 82934f10486c5a..66435d2a4fe64f 100644 --- a/openmp/libomptarget/src/OpenMP/OMPT/Callback.cpp +++ b/openmp/libomptarget/src/OpenMP/OMPT/Callback.cpp @@ -51,6 +51,8 @@ LibomptargetRtlFinalizer *LibraryFinalizer = nullptr; thread_local Interface llvm::omp::target::ompt::RegionInterface; +thread_local void *llvm::omp::target::ompt::ReturnAddress = nullptr; + bool llvm::omp::target::ompt::Initialized = false; ompt_get_callback_t llvm::omp::target::ompt::lookupCallbackByCode = nullptr; diff --git a/openmp/libomptarget/src/device.cpp b/openmp/libomptarget/src/device.cpp index 67edc559e8ede9..5fe3f508b739cb 100644 --- a/openmp/libomptarget/src/device.cpp +++ b/openmp/libomptarget/src/device.cpp @@ -123,7 +123,7 @@ void *DeviceTy::allocData(int64_t Size, void *HstPtr, int32_t Kind) { OMPT_IF_BUILT(InterfaceRAII TargetDataAllocRAII( RegionInterface.getCallbacks(), DeviceID, HstPtr, &TargetPtr, Size, - /*CodePtr=*/OMPT_GET_RETURN_ADDRESS(0));) + /*CodePtr=*/OMPT_GET_RETURN_ADDRESS);) TargetPtr = RTL->data_alloc(RTLDeviceID, Size, HstPtr, Kind); return TargetPtr; @@ -134,7 +134,7 @@ int32_t DeviceTy::deleteData(void *TgtAllocBegin, int32_t Kind) { OMPT_IF_BUILT(InterfaceRAII TargetDataDeleteRAII( RegionInterface.getCallbacks(), DeviceID, TgtAllocBegin, - /*CodePtr=*/OMPT_GET_RETURN_ADDRESS(0));) + /*CodePtr=*/OMPT_GET_RETURN_ADDRESS);) return RTL->data_delete(RTLDeviceID, TgtAllocBegin, Kind); } @@ -152,7 +152,7 @@ int32_t DeviceTy::submitData(void *TgtPtrBegin, void *HstPtrBegin, int64_t Size, InterfaceRAII TargetDataSubmitRAII( RegionInterface.getCallbacks(), DeviceID, TgtPtrBegin, HstPtrBegin, Size, - /*CodePtr=*/OMPT_GET_RETURN_ADDRESS(0));) + /*CodePtr=*/OMPT_GET_RETURN_ADDRESS);) if (!AsyncInfo || !RTL->data_submit_async || !RTL->synchronize) return RTL->data_submit(RTLDeviceID, TgtPtrBegin, HstPtrBegin, Size); @@ -174,7 +174,7 @@ int32_t DeviceTy::retrieveData(void *HstPtrBegin, void *TgtPtrBegin, InterfaceRAII TargetDataRetrieveRAII( RegionInterface.getCallbacks(), DeviceID, HstPtrBegin, TgtPtrBegin, Size, - /*CodePtr=*/OMPT_GET_RETURN_ADDRESS(0));) + /*CodePtr=*/OMPT_GET_RETURN_ADDRESS);) if (!RTL->data_retrieve_async || !RTL->synchronize) return RTL->data_retrieve(RTLDeviceID, HstPtrBegin, TgtPtrBegin, Size); diff --git a/openmp/libomptarget/src/interface.cpp b/openmp/libomptarget/src/interface.cpp index 49495ac266f1b3..8725e5eb55fc9e 100644 --- a/openmp/libomptarget/src/interface.cpp +++ b/openmp/libomptarget/src/interface.cpp @@ -108,7 +108,7 @@ targetData(ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase, ? RegionInterface.getCallbacks() : RegionInterface.getCallbacks(); InterfaceRAII TargetDataRAII(CallbackFunctions, DeviceId, - OMPT_GET_RETURN_ADDRESS(0));) + OMPT_GET_RETURN_ADDRESS);) int Rc = OFFLOAD_SUCCESS; Rc = TargetDataFunction(Loc, *DeviceOrErr, ArgNum, ArgsBase, Args, ArgSizes, @@ -130,7 +130,7 @@ EXTERN void __tgt_target_data_begin_mapper(ident_t *Loc, int64_t DeviceId, int64_t *ArgTypes, map_var_info_t *ArgNames, void **ArgMappers) { - + OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); targetData(Loc, DeviceId, ArgNum, ArgsBase, Args, ArgSizes, ArgTypes, ArgNames, ArgMappers, targetDataBegin, "Entering OpenMP data region with being_mapper", @@ -142,7 +142,7 @@ EXTERN void __tgt_target_data_begin_nowait_mapper( void **Args, int64_t *ArgSizes, int64_t *ArgTypes, map_var_info_t *ArgNames, void **ArgMappers, int32_t DepNum, void *DepList, int32_t NoAliasDepNum, void *NoAliasDepList) { - + OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); targetData( Loc, DeviceId, ArgNum, ArgsBase, Args, ArgSizes, ArgTypes, ArgNames, ArgMappers, targetDataBegin, @@ -158,7 +158,7 @@ EXTERN void __tgt_target_data_end_mapper(ident_t *Loc, int64_t DeviceId, int64_t *ArgTypes, map_var_info_t *ArgNames, void **ArgMappers) { - + OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); targetData(Loc, DeviceId, ArgNum, ArgsBase, Args, ArgSizes, ArgTypes, ArgNames, ArgMappers, targetDataEnd, "Exiting OpenMP data region with end_mapper", "end"); @@ -169,7 +169,7 @@ EXTERN void __tgt_target_data_end_nowait_mapper( void **Args, int64_t *ArgSizes, int64_t *ArgTypes, map_var_info_t *ArgNames, void **ArgMappers, int32_t DepNum, void *DepList, int32_t NoAliasDepNum, void *NoAliasDepList) { - + OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); targetData( Loc, DeviceId, ArgNum, ArgsBase, Args, ArgSizes, ArgTypes, ArgNames, ArgMappers, targetDataEnd, @@ -182,7 +182,7 @@ EXTERN void __tgt_target_data_update_mapper(ident_t *Loc, int64_t DeviceId, int64_t *ArgTypes, map_var_info_t *ArgNames, void **ArgMappers) { - + OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); targetData( Loc, DeviceId, ArgNum, ArgsBase, Args, ArgSizes, ArgTypes, ArgNames, ArgMappers, targetDataUpdate, @@ -195,6 +195,7 @@ EXTERN void __tgt_target_data_update_nowait_mapper( void **Args, int64_t *ArgSizes, int64_t *ArgTypes, map_var_info_t *ArgNames, void **ArgMappers, int32_t DepNum, void *DepList, int32_t NoAliasDepNum, void *NoAliasDepList) { + OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); targetData( Loc, DeviceId, ArgNum, ArgsBase, Args, ArgSizes, ArgTypes, ArgNames, ArgMappers, targetDataUpdate, @@ -293,7 +294,7 @@ static inline int targetKernel(ident_t *Loc, int64_t DeviceId, int32_t NumTeams, /// RAII to establish tool anchors before and after target region OMPT_IF_BUILT(InterfaceRAII TargetRAII( RegionInterface.getCallbacks(), DeviceId, - /*CodePtr=*/OMPT_GET_RETURN_ADDRESS(0));) + /*CodePtr=*/OMPT_GET_RETURN_ADDRESS);) int Rc = OFFLOAD_SUCCESS; Rc = target(Loc, *DeviceOrErr, HostPtr, *KernelArgs, AsyncInfo); @@ -322,6 +323,7 @@ static inline int targetKernel(ident_t *Loc, int64_t DeviceId, int32_t NumTeams, EXTERN int __tgt_target_kernel(ident_t *Loc, int64_t DeviceId, int32_t NumTeams, int32_t ThreadLimit, void *HostPtr, KernelArgsTy *KernelArgs) { + OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); if (KernelArgs->Flags.NoWait) return targetKernel( Loc, DeviceId, NumTeams, ThreadLimit, HostPtr, KernelArgs); @@ -341,6 +343,7 @@ EXTERN int __tgt_activate_record_replay(int64_t DeviceId, uint64_t MemorySize, void *VAddr, bool IsRecord, bool SaveOutput, uint64_t &ReqPtrArgOffset) { + OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); auto DeviceOrErr = PM->getDevice(DeviceId); if (!DeviceOrErr) FATAL_MESSAGE(DeviceId, "%s", toString(DeviceOrErr.takeError()).c_str()); @@ -376,6 +379,7 @@ EXTERN int __tgt_target_kernel_replay(ident_t *Loc, int64_t DeviceId, int32_t NumTeams, int32_t ThreadLimit, uint64_t LoopTripCount) { + OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); if (checkDeviceAndCtors(DeviceId, Loc)) { DP("Not offloading to device %" PRId64 "\n", DeviceId); return OMP_TGT_FAIL; @@ -387,7 +391,7 @@ EXTERN int __tgt_target_kernel_replay(ident_t *Loc, int64_t DeviceId, /// RAII to establish tool anchors before and after target region OMPT_IF_BUILT(InterfaceRAII TargetRAII( RegionInterface.getCallbacks(), DeviceId, - /*CodePtr=*/OMPT_GET_RETURN_ADDRESS(0));) + /*CodePtr=*/OMPT_GET_RETURN_ADDRESS);) AsyncInfoTy AsyncInfo(*DeviceOrErr); int Rc = target_replay(Loc, *DeviceOrErr, HostPtr, DeviceMemory, @@ -442,6 +446,7 @@ EXTERN int __tgt_print_device_info(int64_t DeviceId) { } EXTERN void __tgt_target_nowait_query(void **AsyncHandle) { + OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); if (!AsyncHandle || !*AsyncHandle) { FATAL_MESSAGE0( 1, "Receive an invalid async handle from the current OpenMP task. Is " diff --git a/openmp/libomptarget/test/ompt/callbacks.h b/openmp/libomptarget/test/ompt/callbacks.h index 083006f756def7..1f9b7c177b2860 100644 --- a/openmp/libomptarget/test/ompt/callbacks.h +++ b/openmp/libomptarget/test/ompt/callbacks.h @@ -1,4 +1,5 @@ #include +#include #include // Tool related code below @@ -37,8 +38,6 @@ static void on_ompt_callback_target_data_op( void *src_addr, int src_device_num, void *dest_addr, int dest_device_num, size_t bytes, const void *codeptr_ra) { assert(codeptr_ra != 0 && "Unexpected null codeptr"); - // Both src and dest must not be null - assert((src_addr != 0 || dest_addr != 0) && "Both src and dest addr null"); printf(" Callback DataOp: target_id=%lu host_op_id=%lu optype=%d src=%p " "src_device_num=%d " "dest=%p dest_device_num=%d bytes=%lu code=%p\n", @@ -80,8 +79,6 @@ static void on_ompt_callback_target_data_op_emi( void *dest_addr, int dest_device_num, size_t bytes, const void *codeptr_ra) { assert(codeptr_ra != 0 && "Unexpected null codeptr"); - // Both src and dest must not be null - assert((src_addr != 0 || dest_addr != 0) && "Both src and dest addr null"); if (endpoint == ompt_scope_begin) *host_op_id = next_op_id++; printf(" Callback DataOp EMI: endpoint=%d optype=%d target_task_data=%p " diff --git a/openmp/libomptarget/test/ompt/target_memcpy.c b/openmp/libomptarget/test/ompt/target_memcpy.c new file mode 100644 index 00000000000000..444f4b7bdbda3d --- /dev/null +++ b/openmp/libomptarget/test/ompt/target_memcpy.c @@ -0,0 +1,61 @@ +// RUN: %libomptarget-compile-run-and-check-generic +// REQUIRES: ompt +// UNSUPPORTED: aarch64-unknown-linux-gnu +// UNSUPPORTED: aarch64-unknown-linux-gnu-LTO +// UNSUPPORTED: x86_64-pc-linux-gnu +// UNSUPPORTED: x86_64-pc-linux-gnu-LTO + +/* + * Verify that for the target OpenMP APIs, the return address is non-null and + * distinct. + */ + +#include +#include + +#include "callbacks.h" +#include "register_non_emi.h" + +int main() { + int dev = omp_get_default_device(); + int host = omp_get_initial_device(); + + int host_var1 = 42; + int host_var2 = 0; + void *dev_ptr = NULL; + + // Allocate space on the device + dev_ptr = omp_target_alloc(sizeof(int), dev); + if (dev_ptr == NULL) + abort(); + + // H2D transfer + if (omp_target_memcpy(dev_ptr, &host_var1, sizeof(int), 0, 0, dev, host)) + abort(); + + // D2H transfer + if (omp_target_memcpy(&host_var2, dev_ptr, sizeof(int), 0, 0, host, dev)) + abort(); + + // Free the device location + omp_target_free(dev_ptr, dev); + + // Both host variables should have the same value. + return host_var1 != host_var2; +} + +// clang-format off +/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=1 +/// CHECK-NOT: code=(nil) +/// CHECK: code=[[CODE1:.*]] +/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=2 +/// CHECK-NOT: code=(nil) +/// CHECK-NOT: code=[[CODE1]] +/// CHECK: code=[[CODE2:.*]] +/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=3 +/// CHECK-NOT: code=(nil) +/// CHECK-NOT: code=[[CODE2]] +/// CHECK: code=[[CODE3:.*]] +/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=4 +/// CHECK-NOT: code=(nil) +/// CHECK-NOT: code=[[CODE3]] diff --git a/openmp/libomptarget/test/ompt/veccopy.c b/openmp/libomptarget/test/ompt/veccopy.c index 79cd918a60c574..80e71fd8a48cb4 100644 --- a/openmp/libomptarget/test/ompt/veccopy.c +++ b/openmp/libomptarget/test/ompt/veccopy.c @@ -54,29 +54,51 @@ int main() { return rc; } +// clang-format off /// CHECK: Callback Init: /// CHECK: Callback Load: -/// CHECK: Callback Target: target_id=[[TARGET_ID:[0-9]+]] kind=1 endpoint=1 +/// CHECK: Callback Target: target_id=[[TARGET_ID:[0-9]+]] kind=1 endpoint=1 device_num=[[DEVICE_NUM:[0-9]+]] +/// CHECK-NOT: code=(nil) +/// CHECK: code=[[CODE1:.*]] /// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=1 +/// CHECK: code=[[CODE1]] /// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=2 +/// CHECK: code=[[CODE1]] /// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=1 +/// CHECK: code=[[CODE1]] /// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=2 +/// CHECK: code=[[CODE1]] /// CHECK: Callback Submit: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] req_num_teams=1 /// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=3 +/// CHECK: code=[[CODE1]] /// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=3 +/// CHECK: code=[[CODE1]] /// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=4 +/// CHECK: code=[[CODE1]] /// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=4 -/// CHECK: Callback Target: target_id=[[TARGET_ID:[0-9]+]] kind=1 endpoint=2 +/// CHECK: code=[[CODE1]] +/// CHECK: Callback Target: target_id=[[TARGET_ID:[0-9]+]] kind=1 endpoint=2 device_num=[[DEVICE_NUM]] code=[[CODE1]] /// CHECK: Callback Target: target_id=[[TARGET_ID:[0-9]+]] kind=1 endpoint=1 +/// device_num=[[DEVICE_NUM]] +/// CHECK-NOT: code=(nil) +/// CHECK: code=[[CODE2:.*]] /// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=1 +/// CHECK: code=[[CODE2]] /// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=2 +/// CHECK: code=[[CODE2]] /// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=1 +/// CHECK: code=[[CODE2]] /// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=2 +/// CHECK: code=[[CODE2]] /// CHECK: Callback Submit: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] req_num_teams=0 /// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=3 +/// CHECK: code=[[CODE2]] /// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=3 +/// CHECK: code=[[CODE2]] /// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=4 +/// CHECK: code=[[CODE2]] /// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=4 -/// CHECK: Callback Target: target_id=[[TARGET_ID:[0-9]+]] kind=1 endpoint=2 +/// CHECK: code=[[CODE2]] +/// CHECK: Callback Target: target_id=[[TARGET_ID:[0-9]+]] kind=1 endpoint=2 device_num=[[DEVICE_NUM]] code=[[CODE2]] /// CHECK: Callback Fini: diff --git a/openmp/libomptarget/test/ompt/veccopy_data.c b/openmp/libomptarget/test/ompt/veccopy_data.c index 540a7d64233455..cef1de316a7a14 100644 --- a/openmp/libomptarget/test/ompt/veccopy_data.c +++ b/openmp/libomptarget/test/ompt/veccopy_data.c @@ -82,46 +82,79 @@ int main() { /// CHECK: Callback Load: /// CHECK: Callback Target EMI: kind=2 endpoint=1 /// CHECK-NOT: device_num=-1 +/// CHECK-NOT: code=(nil) +/// CHECK: code=[[CODE1:.*]] /// CHECK: Callback DataOp EMI: endpoint=1 optype=1 +/// CHECK: code=[[CODE1]] /// CHECK: Callback DataOp EMI: endpoint=2 optype=1 /// CHECK-NOT: dest=(nil) +/// CHECK: code=[[CODE1]] /// CHECK: Callback DataOp EMI: endpoint=1 optype=2 +/// CHECK: code=[[CODE1]] /// CHECK: Callback DataOp EMI: endpoint=2 optype=2 +/// CHECK: code=[[CODE1]] /// CHECK: Callback Target EMI: kind=2 endpoint=2 /// CHECK-NOT: device_num=-1 +/// CHECK: code=[[CODE1]] /// CHECK: Callback Target EMI: kind=1 endpoint=1 /// CHECK-NOT: device_num=-1 +/// CHECK-NOT: code=(nil) +/// CHECK: code=[[CODE2:.*]] /// CHECK: Callback DataOp EMI: endpoint=1 optype=1 +/// CHECK: code=[[CODE2]] /// CHECK: Callback DataOp EMI: endpoint=2 optype=1 /// CHECK-NOT: dest=(nil) +/// CHECK: code=[[CODE2]] /// CHECK: Callback DataOp EMI: endpoint=1 optype=2 +/// CHECK: code=[[CODE2]] /// CHECK: Callback DataOp EMI: endpoint=2 optype=2 +/// CHECK: code=[[CODE2]] /// CHECK: Callback Submit EMI: endpoint=1 req_num_teams=1 /// CHECK: Callback Submit EMI: endpoint=2 req_num_teams=1 /// CHECK: Callback DataOp EMI: endpoint=1 optype=3 +/// CHECK: code=[[CODE2]] /// CHECK: Callback DataOp EMI: endpoint=2 optype=3 +/// CHECK: code=[[CODE2]] /// CHECK: Callback DataOp EMI: endpoint=1 optype=4 +/// CHECK: code=[[CODE2]] /// CHECK: Callback DataOp EMI: endpoint=2 optype=4 +/// CHECK: code=[[CODE2]] /// CHECK: Callback Target EMI: kind=1 endpoint=2 /// CHECK-NOT: device_num=-1 +/// CHECK: code=[[CODE2]] /// CHECK: Callback Target EMI: kind=3 endpoint=1 /// CHECK-NOT: device_num=-1 +/// CHECK-NOT: code=(nil) +/// CHECK: code=[[CODE3:.*]] /// CHECK: Callback DataOp EMI: endpoint=1 optype=3 +/// CHECK: code=[[CODE3]] /// CHECK: Callback DataOp EMI: endpoint=2 optype=3 +/// CHECK: code=[[CODE3]] /// CHECK: Callback DataOp EMI: endpoint=1 optype=4 +/// CHECK: code=[[CODE3]] /// CHECK: Callback DataOp EMI: endpoint=2 optype=4 +/// CHECK: code=[[CODE3]] /// CHECK: Callback Target EMI: kind=3 endpoint=2 /// CHECK-NOT: device_num=-1 +/// CHECK: code=[[CODE3]] /// CHECK: Callback Target EMI: kind=1 endpoint=1 /// CHECK-NOT: device_num=-1 +/// CHECK-NOT: code=(nil) +/// CHECK: code=[[CODE4:.*]] /// CHECK: Callback Submit EMI: endpoint=1 req_num_teams=1 /// CHECK: Callback Submit EMI: endpoint=2 req_num_teams=1 /// CHECK: Callback Target EMI: kind=1 endpoint=2 /// CHECK-NOT: device_num=-1 +/// CHECK: code=[[CODE4]] /// CHECK: Callback Target EMI: kind=4 endpoint=1 /// CHECK-NOT: device_num=-1 +/// CHECK-NOT: code=(nil) +/// CHECK: code=[[CODE5:.*]] /// CHECK: Callback DataOp EMI: endpoint=1 optype=3 +/// CHECK: code=[[CODE5]] /// CHECK: Callback DataOp EMI: endpoint=2 optype=3 +/// CHECK: code=[[CODE5]] /// CHECK: Callback Target EMI: kind=4 endpoint=2 /// CHECK-NOT: device_num=-1 +/// CHECK: code=[[CODE5]] /// CHECK: Callback Fini: diff --git a/openmp/libomptarget/test/ompt/veccopy_emi.c b/openmp/libomptarget/test/ompt/veccopy_emi.c index 37600a3482ba9d..b597d7be6aff66 100644 --- a/openmp/libomptarget/test/ompt/veccopy_emi.c +++ b/openmp/libomptarget/test/ompt/veccopy_emi.c @@ -58,47 +58,86 @@ int main() { /// CHECK: Callback Init: /// CHECK: Callback Load: /// CHECK: Callback Target EMI: kind=1 endpoint=1 +/// CHECK-NOT: code=(nil) +/// CHECK: code=[[CODE1:.*]] /// CHECK: Callback DataOp EMI: endpoint=1 optype=1 +/// CHECK: code=[[CODE1]] /// CHECK: Callback DataOp EMI: endpoint=2 optype=1 /// CHECK-NOT: dest=(nil) +/// CHECK: code=[[CODE1]] /// CHECK: Callback DataOp EMI: endpoint=1 optype=2 +/// CHECK: code=[[CODE1]] /// CHECK: Callback DataOp EMI: endpoint=2 optype=2 +/// CHECK: code=[[CODE1]] /// CHECK: Callback DataOp EMI: endpoint=1 optype=1 +/// CHECK: code=[[CODE1]] /// CHECK: Callback DataOp EMI: endpoint=2 optype=1 /// CHECK-NOT: dest=(nil) +/// CHECK: code=[[CODE1]] /// CHECK: Callback DataOp EMI: endpoint=1 optype=2 +/// CHECK: code=[[CODE1]] /// CHECK: Callback DataOp EMI: endpoint=2 optype=2 +/// CHECK: code=[[CODE1]] /// CHECK: Callback Submit EMI: endpoint=1 req_num_teams=1 /// CHECK: Callback Submit EMI: endpoint=2 req_num_teams=1 /// CHECK: Callback DataOp EMI: endpoint=1 optype=3 +/// CHECK: code=[[CODE1]] /// CHECK: Callback DataOp EMI: endpoint=2 optype=3 +/// CHECK: code=[[CODE1]] /// CHECK: Callback DataOp EMI: endpoint=1 optype=3 +/// CHECK: code=[[CODE1]] /// CHECK: Callback DataOp EMI: endpoint=2 optype=3 +/// CHECK: code=[[CODE1]] /// CHECK: Callback DataOp EMI: endpoint=1 optype=4 +/// CHECK: code=[[CODE1]] /// CHECK: Callback DataOp EMI: endpoint=2 optype=4 +/// CHECK: code=[[CODE1]] /// CHECK: Callback DataOp EMI: endpoint=1 optype=4 +/// CHECK: code=[[CODE1]] /// CHECK: Callback DataOp EMI: endpoint=2 optype=4 +/// CHECK: code=[[CODE1]] /// CHECK: Callback Target EMI: kind=1 endpoint=2 +/// CHECK: code=[[CODE1]] + /// CHECK: Callback Target EMI: kind=1 endpoint=1 +/// CHECK-NOT: code=(nil) +/// CHECK: code=[[CODE2:.*]] /// CHECK: Callback DataOp EMI: endpoint=1 optype=1 +/// CHECK: code=[[CODE2]] /// CHECK: Callback DataOp EMI: endpoint=2 optype=1 /// CHECK-NOT: dest=(nil) +/// CHECK: code=[[CODE2]] /// CHECK: Callback DataOp EMI: endpoint=1 optype=2 +/// CHECK: code=[[CODE2]] /// CHECK: Callback DataOp EMI: endpoint=2 optype=2 +/// CHECK: code=[[CODE2]] /// CHECK: Callback DataOp EMI: endpoint=1 optype=1 +/// CHECK: code=[[CODE2]] /// CHECK: Callback DataOp EMI: endpoint=2 optype=1 /// CHECK-NOT: dest=(nil) +/// CHECK: code=[[CODE2]] /// CHECK: Callback DataOp EMI: endpoint=1 optype=2 +/// CHECK: code=[[CODE2]] /// CHECK: Callback DataOp EMI: endpoint=2 optype=2 +/// CHECK: code=[[CODE2]] /// CHECK: Callback Submit EMI: endpoint=1 req_num_teams=0 /// CHECK: Callback Submit EMI: endpoint=2 req_num_teams=0 /// CHECK: Callback DataOp EMI: endpoint=1 optype=3 +/// CHECK: code=[[CODE2]] /// CHECK: Callback DataOp EMI: endpoint=2 optype=3 +/// CHECK: code=[[CODE2]] /// CHECK: Callback DataOp EMI: endpoint=1 optype=3 +/// CHECK: code=[[CODE2]] /// CHECK: Callback DataOp EMI: endpoint=2 optype=3 +/// CHECK: code=[[CODE2]] /// CHECK: Callback DataOp EMI: endpoint=1 optype=4 +/// CHECK: code=[[CODE2]] /// CHECK: Callback DataOp EMI: endpoint=2 optype=4 +/// CHECK: code=[[CODE2]] /// CHECK: Callback DataOp EMI: endpoint=1 optype=4 +/// CHECK: code=[[CODE2]] /// CHECK: Callback DataOp EMI: endpoint=2 optype=4 +/// CHECK: code=[[CODE2]] /// CHECK: Callback Target EMI: kind=1 endpoint=2 +/// CHECK: code=[[CODE2]] /// CHECK: Callback Fini: