Skip to content

Commit

Permalink
[SYCL][XPTI] 'queue_id' metadata feature refactoring (#13070)
Browse files Browse the repository at this point in the history
- Better requirements/test cases showed gaps in previous implementation
that resulted in data inconsistencies
- Metadata is associated with UID and since UIDs are the same multiple
instantiations of the same object, only invariant data needs to be
stored in the metadata object
- Adding mutable data resulted in data inconsistencies and the feature
refactoring addresses these issues

---------

Signed-off-by: Vasanth Tovinkere <vasanth.tovinkere@intel.com>
  • Loading branch information
tovinkere committed Mar 21, 2024
1 parent 2f9c0bb commit 9876e19
Show file tree
Hide file tree
Showing 13 changed files with 313 additions and 49 deletions.
4 changes: 4 additions & 0 deletions sycl/source/detail/queue_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,9 @@ event queue_impl::memset(const std::shared_ptr<detail::queue_impl> &Self,
xpti::addMetadata(TEvent, "memory_size", Count);
xpti::addMetadata(TEvent, "queue_id", MQueueID);
});
// Before we notifiy the subscribers, we broadcast the 'queue_id', which was a
// metadata entry to TLS for use by callback handlers
xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, MQueueID);
// Notify XPTI about the memset submission
PrepareNotify.notify();
// Emit a begin/end scope for this call
Expand Down Expand Up @@ -159,6 +162,7 @@ event queue_impl::memcpy(const std::shared_ptr<detail::queue_impl> &Self,
xpti::addMetadata(TEvent, "memory_size", Count);
xpti::addMetadata(TEvent, "queue_id", MQueueID);
});
xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, MQueueID);
// Notify XPTI about the memset submission
PrepareNotify.notify();
// Emit a begin/end scope for this call
Expand Down
13 changes: 10 additions & 3 deletions sycl/source/detail/queue_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ class queue_impl {
/// \param PropList is a list of properties to use for queue construction.
queue_impl(const DeviceImplPtr &Device, const async_handler &AsyncHandler,
const property_list &PropList)
: queue_impl(Device, getDefaultOrNew(Device), AsyncHandler, PropList) {};
: queue_impl(Device, getDefaultOrNew(Device), AsyncHandler, PropList){};

/// Constructs a SYCL queue with an async_handler and property_list provided
/// form a device and a context.
Expand Down Expand Up @@ -176,13 +176,16 @@ class queue_impl {
// This section is the second part of the instrumentation that uses the
// tracepoint information and notifies
}

// We enable XPTI tracing events using the TLS mechanism; if the code
// location data is available, then the tracing data will be rich.
#if XPTI_ENABLE_INSTRUMENTATION
constexpr uint16_t NotificationTraceType =
static_cast<uint16_t>(xpti::trace_point_type_t::queue_create);
// Using the instance override constructor for use with queues as queues
// maintain instance IDs in the object
XPTIScope PrepareNotify((void *)this, NotificationTraceType,
SYCL_STREAM_NAME, "queue_create");
SYCL_STREAM_NAME, MQueueID, "queue_create");
// Cache the trace event, stream id and instance IDs for the destructor
if (xptiCheckTraceEnabled(PrepareNotify.streamID(),
NotificationTraceType)) {
Expand All @@ -207,6 +210,8 @@ class queue_impl {
xpti::addMetadata(TEvent, "queue_handle",
reinterpret_cast<size_t>(getHandleRef()));
});
// Also publish to TLS
xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, MQueueID);
PrepareNotify.notify();
}
#endif
Expand Down Expand Up @@ -244,7 +249,7 @@ class queue_impl {
constexpr uint16_t NotificationTraceType =
static_cast<uint16_t>(xpti::trace_point_type_t::queue_create);
XPTIScope PrepareNotify((void *)this, NotificationTraceType,
SYCL_STREAM_NAME, "queue_create");
SYCL_STREAM_NAME, MQueueID, "queue_create");
if (xptiCheckTraceEnabled(PrepareNotify.streamID(),
NotificationTraceType)) {
// Cache the trace event, stream id and instance IDs for the destructor
Expand All @@ -269,6 +274,8 @@ class queue_impl {
if (!MHostQueue)
xpti::addMetadata(TEvent, "queue_handle", getHandleRef());
});
// Also publish to TLS before notification
xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, MQueueID);
PrepareNotify.notify();
}
#endif
Expand Down
66 changes: 47 additions & 19 deletions sycl/source/detail/scheduler/commands.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1005,7 +1005,10 @@ void AllocaCommandBase::emitInstrumentationData() {
xpti::addMetadata(TE, "sycl_device_name",
getSyclObjImpl(MQueue->get_device())->getDeviceName());
xpti::addMetadata(TE, "memory_object", reinterpret_cast<size_t>(MAddress));
xpti::addMetadata(TE, "queue_id", MQueue->getQueueID());
// Since we do NOT add queue_id value to metadata, we are stashing it to TLS
// as this data is mutable and the metadata is supposed to be invariant
xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY,
MQueue->getQueueID());
}
#endif
}
Expand Down Expand Up @@ -1124,7 +1127,8 @@ void AllocaSubBufCommand::emitInstrumentationData() {
this->MRequirement.MAccessRange[0]);
xpti::addMetadata(TE, "access_range_end",
this->MRequirement.MAccessRange[1]);
xpti::addMetadata(TE, "queue_id", MQueue->getQueueID());
xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY,
MQueue->getQueueID());
makeTraceEventEpilog();
}
#endif
Expand Down Expand Up @@ -1202,8 +1206,10 @@ void ReleaseCommand::emitInstrumentationData() {
getSyclObjImpl(MQueue->get_device())->getDeviceName());
xpti::addMetadata(TE, "allocation_type",
commandToName(MAllocaCmd->getType()));
xpti::addMetadata(TE, "queue_id", MQueue->getQueueID());

// Since we do NOT add queue_id value to metadata, we are stashing it to TLS
// as this data is mutable and the metadata is supposed to be invariant
xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY,
MQueue->getQueueID());
makeTraceEventEpilog();
}
#endif
Expand Down Expand Up @@ -1323,8 +1329,10 @@ void MapMemObject::emitInstrumentationData() {
xpti::addMetadata(TE, "sycl_device_name",
getSyclObjImpl(MQueue->get_device())->getDeviceName());
xpti::addMetadata(TE, "memory_object", reinterpret_cast<size_t>(MAddress));
xpti::addMetadata(TE, "queue_id", MQueue->getQueueID());

// Since we do NOT add queue_id value to metadata, we are stashing it to TLS
// as this data is mutable and the metadata is supposed to be invariant
xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY,
MQueue->getQueueID());
makeTraceEventEpilog();
}
#endif
Expand Down Expand Up @@ -1386,8 +1394,10 @@ void UnMapMemObject::emitInstrumentationData() {
xpti::addMetadata(TE, "sycl_device_name",
getSyclObjImpl(MQueue->get_device())->getDeviceName());
xpti::addMetadata(TE, "memory_object", reinterpret_cast<size_t>(MAddress));
xpti::addMetadata(TE, "queue_id", MQueue->getQueueID());

// Since we do NOT add queue_id value to metadata, we are stashing it to TLS
// as this data is mutable and the metadata is supposed to be invariant
xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY,
MQueue->getQueueID());
makeTraceEventEpilog();
}
#endif
Expand Down Expand Up @@ -1489,8 +1499,10 @@ void MemCpyCommand::emitInstrumentationData() {
xpti::addMetadata(
CmdTraceEvent, "copy_to",
reinterpret_cast<size_t>(getSyclObjImpl(MQueue->get_device()).get()));
xpti::addMetadata(CmdTraceEvent, "queue_id", MQueue->getQueueID());

// Since we do NOT add queue_id value to metadata, we are stashing it to TLS
// as this data is mutable and the metadata is supposed to be invariant
xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY,
MQueue->getQueueID());
makeTraceEventEpilog();
}
#endif
Expand Down Expand Up @@ -1665,8 +1677,10 @@ void MemCpyCommandHost::emitInstrumentationData() {
xpti::addMetadata(
CmdTraceEvent, "copy_to",
reinterpret_cast<size_t>(getSyclObjImpl(MQueue->get_device()).get()));
xpti::addMetadata(CmdTraceEvent, "queue_id", MQueue->getQueueID());

// Since we do NOT add queue_id value to metadata, we are stashing it to TLS
// as this data is mutable and the metadata is supposed to be invariant
xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY,
MQueue->getQueueID());
makeTraceEventEpilog();
}
#endif
Expand Down Expand Up @@ -1756,8 +1770,10 @@ void EmptyCommand::emitInstrumentationData() {
getSyclObjImpl(MQueue->get_device())->getDeviceName());
xpti::addMetadata(CmdTraceEvent, "memory_object",
reinterpret_cast<size_t>(MAddress));
xpti::addMetadata(CmdTraceEvent, "queue_id", MQueue->getQueueID());

// Since we do NOT add queue_id value to metadata, we are stashing it to TLS
// as this data is mutable and the metadata is supposed to be invariant
xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY,
MQueue->getQueueID());
makeTraceEventEpilog();
}
#endif
Expand Down Expand Up @@ -1828,8 +1844,10 @@ void UpdateHostRequirementCommand::emitInstrumentationData() {
getSyclObjImpl(MQueue->get_device())->getDeviceName());
xpti::addMetadata(CmdTraceEvent, "memory_object",
reinterpret_cast<size_t>(MAddress));
xpti::addMetadata(CmdTraceEvent, "queue_id", MQueue->getQueueID());

// Since we do NOT add queue_id value to metadata, we are stashing it to TLS
// as this data is mutable and the metadata is supposed to be invariant
xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY,
MQueue->getQueueID());
makeTraceEventEpilog();
}
#endif
Expand Down Expand Up @@ -2063,7 +2081,9 @@ void instrumentationFillCommonData(const std::string &KernelName,
xpti::addMetadata(CmdTraceEvent, "sym_column_no",
static_cast<int>(Column));
}
xpti::addMetadata(CmdTraceEvent, "queue_id", Queue->getQueueID());
// We no longer set the 'queue_id' in the metadata structure as it is a
// mutable value and multiple threads using the same queue created at the
// same location will overwrite the metadata values creating inconsistencies
}
}
#endif
Expand Down Expand Up @@ -2096,6 +2116,10 @@ std::pair<xpti_td *, uint64_t> emitKernelInstrumentationData(
FromSource, InstanceID, CmdTraceEvent);

if (CmdTraceEvent) {
// Stash the queue_id mutable metadata in TLS
xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY,
Queue->getQueueID());

instrumentationAddExtraKernelMetadata(CmdTraceEvent, NDRDesc,
KernelBundleImplPtr, SyclKernelName,
SyclKernel, Queue, CGArgs);
Expand Down Expand Up @@ -2139,6 +2163,8 @@ void ExecCGCommand::emitInstrumentationData() {
CmdTraceEvent);

if (CmdTraceEvent) {
xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY,
MQueue->getQueueID());
MTraceEvent = static_cast<void *>(CmdTraceEvent);
if (MCommandGroup->getType() == detail::CG::Kernel) {
auto KernelCG =
Expand Down Expand Up @@ -3351,10 +3377,12 @@ void KernelFusionCommand::emitInstrumentationData() {
deviceToString(MQueue->get_device()));
xpti::addMetadata(CmdTraceEvent, "sycl_device_name",
getSyclObjImpl(MQueue->get_device())->getDeviceName());
xpti::addMetadata(CmdTraceEvent, "queue_id", MQueue->getQueueID());
}

if (MFirstInstance) {
// Since we do NOT add queue_id value to metadata, we are stashing it to TLS
// as this data is mutable and the metadata is supposed to be invariant
xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY,
MQueue->getQueueID());
xptiNotifySubscribers(MStreamID, NotificationTraceType,
detail::GSYCLGraphEvent,
static_cast<xpti_td *>(MTraceEvent), MInstanceID,
Expand Down
47 changes: 46 additions & 1 deletion sycl/source/detail/xpti_registry.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,9 @@ extern uint8_t GMemAllocStreamID;
extern xpti::trace_event_data_t *GMemAllocEvent;
extern xpti::trace_event_data_t *GSYCLGraphEvent;

// We will pick a global constant so that the pointer in TLS never goes stale
inline constexpr auto XPTI_QUEUE_INSTANCE_ID_KEY = "queue_id";

#define STR(x) #x
#define SYCL_VERSION_STR \
"sycl " STR(__LIBSYCL_MAJOR_VERSION) "." STR(__LIBSYCL_MINOR_VERSION)
Expand Down Expand Up @@ -165,6 +168,45 @@ class XPTIRegistry {
class XPTIScope {
public:
using TracePoint = xpti::framework::tracepoint_t;
/// @brief Scoped class for XPTI instrumentation using TLS data
/// @param CodePtr The address of the class/function to help differentiate
/// actions in case the code location information is not available
/// @param TraceType The type of trace event being created
/// @param StreamName The stream which will emit these notifications
/// @param InstanceID The instance ID associated with an object, otherwise 0
/// will auto-generate
/// @param UserData String value that provides metadata about the
/// instrumentation
XPTIScope(void *CodePtr, uint16_t TraceType, const char *StreamName,
uint64_t InstanceID, const char *UserData)
: MUserData(UserData), MStreamID(0), MInstanceID(InstanceID),
MScopedNotify(false), MTraceType(0) {
detail::tls_code_loc_t Tls;
auto TData = Tls.query();
// If TLS is not set, we can still genertate universal IDs with user data
// and CodePtr information
const char *FuncName = TData.functionName();
if (!TData.functionName() && !TData.fileName())
FuncName = UserData;
// Create a tracepoint object that has a lifetime of this class
MTP = new TracePoint(TData.fileName(), FuncName, TData.lineNumber(),
TData.columnNumber(), CodePtr);
if (TraceType == (uint16_t)xpti::trace_point_type_t::graph_create ||
TraceType == (uint16_t)xpti::trace_point_type_t::node_create ||
TraceType == (uint16_t)xpti::trace_point_type_t::edge_create ||
TraceType == (uint16_t)xpti::trace_point_type_t::queue_create)
MTP->parent_event(GSYCLGraphEvent);
// Now if tracing is enabled, create trace events and notify
if (xptiTraceEnabled() && MTP) {
MTP->stream(StreamName).trace_type((xpti::trace_point_type_t)TraceType);
MTraceEvent = const_cast<xpti::trace_event_data_t *>(MTP->trace_event());
MStreamID = MTP->stream_id();
// This constructor uses a manual override for the instance ID as some
// objects such as queues keep track of instance IDs
MTP->override_instance_id(MInstanceID);
}
}

/// @brief Scoped class for XPTI instrumentation using TLS data
/// @param CodePtr The address of the class/function to help differentiate
/// actions in case the code location information is not available
Expand All @@ -188,7 +230,8 @@ class XPTIScope {
TData.columnNumber(), CodePtr);
if (TraceType == (uint16_t)xpti::trace_point_type_t::graph_create ||
TraceType == (uint16_t)xpti::trace_point_type_t::node_create ||
TraceType == (uint16_t)xpti::trace_point_type_t::edge_create)
TraceType == (uint16_t)xpti::trace_point_type_t::edge_create ||
TraceType == (uint16_t)xpti::trace_point_type_t::queue_create)
MTP->parent_event(GSYCLGraphEvent);
// Now if tracing is enabled, create trace events and notify
if (xptiTraceEnabled() && MTP) {
Expand Down Expand Up @@ -243,6 +286,8 @@ class XPTIScope {
MTraceType == (uint16_t)xpti::trace_point_type_t::graph_create ||
MTraceType == (uint16_t)xpti::trace_point_type_t::node_create ||
MTraceType == (uint16_t)xpti::trace_point_type_t::edge_create ||
MTraceType == (uint16_t)xpti::trace_point_type_t::queue_create ||
MTraceType == (uint16_t)xpti::trace_point_type_t::queue_destroy ||
MTraceType == (uint16_t)xpti::trace_point_type_t::diagnostics)
return;

Expand Down
7 changes: 7 additions & 0 deletions sycl/test-e2e/XPTI/Inputs/test_collector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,10 @@ XPTI_CALLBACK_API void syclCallback(uint16_t TraceType,
xpti::trace_event_data_t *,
xpti::trace_event_data_t *Event, uint64_t,
const void *UserData) {
char *Key = 0;
uint64_t Value;
bool HaveKeyValue =
(xptiGetStashedTuple(&Key, Value) == xpti::result_t::XPTI_RESULT_SUCCESS);
std::lock_guard Lock{GMutex};
auto Type = static_cast<xpti::trace_point_type_t>(TraceType);
switch (Type) {
Expand Down Expand Up @@ -99,6 +103,9 @@ XPTI_CALLBACK_API void syclCallback(uint16_t TraceType,
std::cout << "Unknown tracepoint\n";
}

if (HaveKeyValue) {
std::cout << " " << Key << " : " << Value << "\n";
}
xpti::metadata_t *Metadata = xptiQueryMetadata(Event);
for (auto &Item : *Metadata) {
std::cout << " " << xptiLookupString(Item.first) << " : "
Expand Down
Loading

0 comments on commit 9876e19

Please sign in to comment.