From 9876e19f4ff387b35b0c98c7d62e5f50e6de187d Mon Sep 17 00:00:00 2001 From: tovinkere Date: Wed, 20 Mar 2024 21:22:13 -0700 Subject: [PATCH] [SYCL][XPTI] 'queue_id' metadata feature refactoring (#13070) - Better requirements/test cases showed gaps in previous implementation that resulted in data inconsistencies - Metadata is associated with UID and since UIDs are the same multiple instantiations of the same object, only invariant data needs to be stored in the metadata object - Adding mutable data resulted in data inconsistencies and the feature refactoring addresses these issues --------- Signed-off-by: Vasanth Tovinkere --- sycl/source/detail/queue_impl.cpp | 4 ++ sycl/source/detail/queue_impl.hpp | 13 +++- sycl/source/detail/scheduler/commands.cpp | 66 +++++++++++++------ sycl/source/detail/xpti_registry.hpp | 47 ++++++++++++- sycl/test-e2e/XPTI/Inputs/test_collector.cpp | 7 ++ .../XPTI/basic_event_collection_linux.cpp | 22 +++++-- .../tools/sycl-trace/sycl_trace_collector.cpp | 9 +++ .../xptitest_subscriber/XPTISubscriber.cpp | 35 +++++----- xpti/include/xpti/xpti_trace_framework.h | 38 +++++++++++ xpti/include/xpti/xpti_trace_framework.hpp | 25 ++++++- xpti/src/xpti_proxy.cpp | 37 +++++++++++ xptifw/CMakeLists.txt | 2 +- xptifw/src/xpti_trace_framework.cpp | 57 +++++++++++++++- 13 files changed, 313 insertions(+), 49 deletions(-) diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index 64a1fb5e888ac..321cc48b29769 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -113,6 +113,9 @@ event queue_impl::memset(const std::shared_ptr &Self, xpti::addMetadata(TEvent, "memory_size", Count); xpti::addMetadata(TEvent, "queue_id", MQueueID); }); + // Before we notifiy the subscribers, we broadcast the 'queue_id', which was a + // metadata entry to TLS for use by callback handlers + xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, MQueueID); // Notify XPTI about the memset submission PrepareNotify.notify(); // Emit a begin/end scope for this call @@ -159,6 +162,7 @@ event queue_impl::memcpy(const std::shared_ptr &Self, xpti::addMetadata(TEvent, "memory_size", Count); xpti::addMetadata(TEvent, "queue_id", MQueueID); }); + xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, MQueueID); // Notify XPTI about the memset submission PrepareNotify.notify(); // Emit a begin/end scope for this call diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index 7109555b05ecc..890891644bbac 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -92,7 +92,7 @@ class queue_impl { /// \param PropList is a list of properties to use for queue construction. queue_impl(const DeviceImplPtr &Device, const async_handler &AsyncHandler, const property_list &PropList) - : queue_impl(Device, getDefaultOrNew(Device), AsyncHandler, PropList) {}; + : queue_impl(Device, getDefaultOrNew(Device), AsyncHandler, PropList){}; /// Constructs a SYCL queue with an async_handler and property_list provided /// form a device and a context. @@ -176,13 +176,16 @@ class queue_impl { // This section is the second part of the instrumentation that uses the // tracepoint information and notifies } + // We enable XPTI tracing events using the TLS mechanism; if the code // location data is available, then the tracing data will be rich. #if XPTI_ENABLE_INSTRUMENTATION constexpr uint16_t NotificationTraceType = static_cast(xpti::trace_point_type_t::queue_create); + // Using the instance override constructor for use with queues as queues + // maintain instance IDs in the object XPTIScope PrepareNotify((void *)this, NotificationTraceType, - SYCL_STREAM_NAME, "queue_create"); + SYCL_STREAM_NAME, MQueueID, "queue_create"); // Cache the trace event, stream id and instance IDs for the destructor if (xptiCheckTraceEnabled(PrepareNotify.streamID(), NotificationTraceType)) { @@ -207,6 +210,8 @@ class queue_impl { xpti::addMetadata(TEvent, "queue_handle", reinterpret_cast(getHandleRef())); }); + // Also publish to TLS + xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, MQueueID); PrepareNotify.notify(); } #endif @@ -244,7 +249,7 @@ class queue_impl { constexpr uint16_t NotificationTraceType = static_cast(xpti::trace_point_type_t::queue_create); XPTIScope PrepareNotify((void *)this, NotificationTraceType, - SYCL_STREAM_NAME, "queue_create"); + SYCL_STREAM_NAME, MQueueID, "queue_create"); if (xptiCheckTraceEnabled(PrepareNotify.streamID(), NotificationTraceType)) { // Cache the trace event, stream id and instance IDs for the destructor @@ -269,6 +274,8 @@ class queue_impl { if (!MHostQueue) xpti::addMetadata(TEvent, "queue_handle", getHandleRef()); }); + // Also publish to TLS before notification + xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, MQueueID); PrepareNotify.notify(); } #endif diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 8777b82db1f6b..efc553cdb97e2 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -1005,7 +1005,10 @@ void AllocaCommandBase::emitInstrumentationData() { xpti::addMetadata(TE, "sycl_device_name", getSyclObjImpl(MQueue->get_device())->getDeviceName()); xpti::addMetadata(TE, "memory_object", reinterpret_cast(MAddress)); - xpti::addMetadata(TE, "queue_id", MQueue->getQueueID()); + // Since we do NOT add queue_id value to metadata, we are stashing it to TLS + // as this data is mutable and the metadata is supposed to be invariant + xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, + MQueue->getQueueID()); } #endif } @@ -1124,7 +1127,8 @@ void AllocaSubBufCommand::emitInstrumentationData() { this->MRequirement.MAccessRange[0]); xpti::addMetadata(TE, "access_range_end", this->MRequirement.MAccessRange[1]); - xpti::addMetadata(TE, "queue_id", MQueue->getQueueID()); + xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, + MQueue->getQueueID()); makeTraceEventEpilog(); } #endif @@ -1202,8 +1206,10 @@ void ReleaseCommand::emitInstrumentationData() { getSyclObjImpl(MQueue->get_device())->getDeviceName()); xpti::addMetadata(TE, "allocation_type", commandToName(MAllocaCmd->getType())); - xpti::addMetadata(TE, "queue_id", MQueue->getQueueID()); - + // Since we do NOT add queue_id value to metadata, we are stashing it to TLS + // as this data is mutable and the metadata is supposed to be invariant + xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, + MQueue->getQueueID()); makeTraceEventEpilog(); } #endif @@ -1323,8 +1329,10 @@ void MapMemObject::emitInstrumentationData() { xpti::addMetadata(TE, "sycl_device_name", getSyclObjImpl(MQueue->get_device())->getDeviceName()); xpti::addMetadata(TE, "memory_object", reinterpret_cast(MAddress)); - xpti::addMetadata(TE, "queue_id", MQueue->getQueueID()); - + // Since we do NOT add queue_id value to metadata, we are stashing it to TLS + // as this data is mutable and the metadata is supposed to be invariant + xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, + MQueue->getQueueID()); makeTraceEventEpilog(); } #endif @@ -1386,8 +1394,10 @@ void UnMapMemObject::emitInstrumentationData() { xpti::addMetadata(TE, "sycl_device_name", getSyclObjImpl(MQueue->get_device())->getDeviceName()); xpti::addMetadata(TE, "memory_object", reinterpret_cast(MAddress)); - xpti::addMetadata(TE, "queue_id", MQueue->getQueueID()); - + // Since we do NOT add queue_id value to metadata, we are stashing it to TLS + // as this data is mutable and the metadata is supposed to be invariant + xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, + MQueue->getQueueID()); makeTraceEventEpilog(); } #endif @@ -1489,8 +1499,10 @@ void MemCpyCommand::emitInstrumentationData() { xpti::addMetadata( CmdTraceEvent, "copy_to", reinterpret_cast(getSyclObjImpl(MQueue->get_device()).get())); - xpti::addMetadata(CmdTraceEvent, "queue_id", MQueue->getQueueID()); - + // Since we do NOT add queue_id value to metadata, we are stashing it to TLS + // as this data is mutable and the metadata is supposed to be invariant + xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, + MQueue->getQueueID()); makeTraceEventEpilog(); } #endif @@ -1665,8 +1677,10 @@ void MemCpyCommandHost::emitInstrumentationData() { xpti::addMetadata( CmdTraceEvent, "copy_to", reinterpret_cast(getSyclObjImpl(MQueue->get_device()).get())); - xpti::addMetadata(CmdTraceEvent, "queue_id", MQueue->getQueueID()); - + // Since we do NOT add queue_id value to metadata, we are stashing it to TLS + // as this data is mutable and the metadata is supposed to be invariant + xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, + MQueue->getQueueID()); makeTraceEventEpilog(); } #endif @@ -1756,8 +1770,10 @@ void EmptyCommand::emitInstrumentationData() { getSyclObjImpl(MQueue->get_device())->getDeviceName()); xpti::addMetadata(CmdTraceEvent, "memory_object", reinterpret_cast(MAddress)); - xpti::addMetadata(CmdTraceEvent, "queue_id", MQueue->getQueueID()); - + // Since we do NOT add queue_id value to metadata, we are stashing it to TLS + // as this data is mutable and the metadata is supposed to be invariant + xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, + MQueue->getQueueID()); makeTraceEventEpilog(); } #endif @@ -1828,8 +1844,10 @@ void UpdateHostRequirementCommand::emitInstrumentationData() { getSyclObjImpl(MQueue->get_device())->getDeviceName()); xpti::addMetadata(CmdTraceEvent, "memory_object", reinterpret_cast(MAddress)); - xpti::addMetadata(CmdTraceEvent, "queue_id", MQueue->getQueueID()); - + // Since we do NOT add queue_id value to metadata, we are stashing it to TLS + // as this data is mutable and the metadata is supposed to be invariant + xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, + MQueue->getQueueID()); makeTraceEventEpilog(); } #endif @@ -2063,7 +2081,9 @@ void instrumentationFillCommonData(const std::string &KernelName, xpti::addMetadata(CmdTraceEvent, "sym_column_no", static_cast(Column)); } - xpti::addMetadata(CmdTraceEvent, "queue_id", Queue->getQueueID()); + // We no longer set the 'queue_id' in the metadata structure as it is a + // mutable value and multiple threads using the same queue created at the + // same location will overwrite the metadata values creating inconsistencies } } #endif @@ -2096,6 +2116,10 @@ std::pair emitKernelInstrumentationData( FromSource, InstanceID, CmdTraceEvent); if (CmdTraceEvent) { + // Stash the queue_id mutable metadata in TLS + xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, + Queue->getQueueID()); + instrumentationAddExtraKernelMetadata(CmdTraceEvent, NDRDesc, KernelBundleImplPtr, SyclKernelName, SyclKernel, Queue, CGArgs); @@ -2139,6 +2163,8 @@ void ExecCGCommand::emitInstrumentationData() { CmdTraceEvent); if (CmdTraceEvent) { + xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, + MQueue->getQueueID()); MTraceEvent = static_cast(CmdTraceEvent); if (MCommandGroup->getType() == detail::CG::Kernel) { auto KernelCG = @@ -3351,10 +3377,12 @@ void KernelFusionCommand::emitInstrumentationData() { deviceToString(MQueue->get_device())); xpti::addMetadata(CmdTraceEvent, "sycl_device_name", getSyclObjImpl(MQueue->get_device())->getDeviceName()); - xpti::addMetadata(CmdTraceEvent, "queue_id", MQueue->getQueueID()); } - if (MFirstInstance) { + // Since we do NOT add queue_id value to metadata, we are stashing it to TLS + // as this data is mutable and the metadata is supposed to be invariant + xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, + MQueue->getQueueID()); xptiNotifySubscribers(MStreamID, NotificationTraceType, detail::GSYCLGraphEvent, static_cast(MTraceEvent), MInstanceID, diff --git a/sycl/source/detail/xpti_registry.hpp b/sycl/source/detail/xpti_registry.hpp index be546e4e27905..681e2841c027b 100644 --- a/sycl/source/detail/xpti_registry.hpp +++ b/sycl/source/detail/xpti_registry.hpp @@ -42,6 +42,9 @@ extern uint8_t GMemAllocStreamID; extern xpti::trace_event_data_t *GMemAllocEvent; extern xpti::trace_event_data_t *GSYCLGraphEvent; +// We will pick a global constant so that the pointer in TLS never goes stale +inline constexpr auto XPTI_QUEUE_INSTANCE_ID_KEY = "queue_id"; + #define STR(x) #x #define SYCL_VERSION_STR \ "sycl " STR(__LIBSYCL_MAJOR_VERSION) "." STR(__LIBSYCL_MINOR_VERSION) @@ -165,6 +168,45 @@ class XPTIRegistry { class XPTIScope { public: using TracePoint = xpti::framework::tracepoint_t; + /// @brief Scoped class for XPTI instrumentation using TLS data + /// @param CodePtr The address of the class/function to help differentiate + /// actions in case the code location information is not available + /// @param TraceType The type of trace event being created + /// @param StreamName The stream which will emit these notifications + /// @param InstanceID The instance ID associated with an object, otherwise 0 + /// will auto-generate + /// @param UserData String value that provides metadata about the + /// instrumentation + XPTIScope(void *CodePtr, uint16_t TraceType, const char *StreamName, + uint64_t InstanceID, const char *UserData) + : MUserData(UserData), MStreamID(0), MInstanceID(InstanceID), + MScopedNotify(false), MTraceType(0) { + detail::tls_code_loc_t Tls; + auto TData = Tls.query(); + // If TLS is not set, we can still genertate universal IDs with user data + // and CodePtr information + const char *FuncName = TData.functionName(); + if (!TData.functionName() && !TData.fileName()) + FuncName = UserData; + // Create a tracepoint object that has a lifetime of this class + MTP = new TracePoint(TData.fileName(), FuncName, TData.lineNumber(), + TData.columnNumber(), CodePtr); + if (TraceType == (uint16_t)xpti::trace_point_type_t::graph_create || + TraceType == (uint16_t)xpti::trace_point_type_t::node_create || + TraceType == (uint16_t)xpti::trace_point_type_t::edge_create || + TraceType == (uint16_t)xpti::trace_point_type_t::queue_create) + MTP->parent_event(GSYCLGraphEvent); + // Now if tracing is enabled, create trace events and notify + if (xptiTraceEnabled() && MTP) { + MTP->stream(StreamName).trace_type((xpti::trace_point_type_t)TraceType); + MTraceEvent = const_cast(MTP->trace_event()); + MStreamID = MTP->stream_id(); + // This constructor uses a manual override for the instance ID as some + // objects such as queues keep track of instance IDs + MTP->override_instance_id(MInstanceID); + } + } + /// @brief Scoped class for XPTI instrumentation using TLS data /// @param CodePtr The address of the class/function to help differentiate /// actions in case the code location information is not available @@ -188,7 +230,8 @@ class XPTIScope { TData.columnNumber(), CodePtr); if (TraceType == (uint16_t)xpti::trace_point_type_t::graph_create || TraceType == (uint16_t)xpti::trace_point_type_t::node_create || - TraceType == (uint16_t)xpti::trace_point_type_t::edge_create) + TraceType == (uint16_t)xpti::trace_point_type_t::edge_create || + TraceType == (uint16_t)xpti::trace_point_type_t::queue_create) MTP->parent_event(GSYCLGraphEvent); // Now if tracing is enabled, create trace events and notify if (xptiTraceEnabled() && MTP) { @@ -243,6 +286,8 @@ class XPTIScope { MTraceType == (uint16_t)xpti::trace_point_type_t::graph_create || MTraceType == (uint16_t)xpti::trace_point_type_t::node_create || MTraceType == (uint16_t)xpti::trace_point_type_t::edge_create || + MTraceType == (uint16_t)xpti::trace_point_type_t::queue_create || + MTraceType == (uint16_t)xpti::trace_point_type_t::queue_destroy || MTraceType == (uint16_t)xpti::trace_point_type_t::diagnostics) return; diff --git a/sycl/test-e2e/XPTI/Inputs/test_collector.cpp b/sycl/test-e2e/XPTI/Inputs/test_collector.cpp index a7c00dffdf1cd..be75f61137ea3 100644 --- a/sycl/test-e2e/XPTI/Inputs/test_collector.cpp +++ b/sycl/test-e2e/XPTI/Inputs/test_collector.cpp @@ -62,6 +62,10 @@ XPTI_CALLBACK_API void syclCallback(uint16_t TraceType, xpti::trace_event_data_t *, xpti::trace_event_data_t *Event, uint64_t, const void *UserData) { + char *Key = 0; + uint64_t Value; + bool HaveKeyValue = + (xptiGetStashedTuple(&Key, Value) == xpti::result_t::XPTI_RESULT_SUCCESS); std::lock_guard Lock{GMutex}; auto Type = static_cast(TraceType); switch (Type) { @@ -99,6 +103,9 @@ XPTI_CALLBACK_API void syclCallback(uint16_t TraceType, std::cout << "Unknown tracepoint\n"; } + if (HaveKeyValue) { + std::cout << " " << Key << " : " << Value << "\n"; + } xpti::metadata_t *Metadata = xptiQueryMetadata(Event); for (auto &Item : *Metadata) { std::cout << " " << xptiLookupString(Item.first) << " : " diff --git a/sycl/test-e2e/XPTI/basic_event_collection_linux.cpp b/sycl/test-e2e/XPTI/basic_event_collection_linux.cpp index 61b53feed0622..5a895b67d0097 100644 --- a/sycl/test-e2e/XPTI/basic_event_collection_linux.cpp +++ b/sycl/test-e2e/XPTI/basic_event_collection_linux.cpp @@ -28,6 +28,7 @@ // CHECK-NEXT: PI Call Begin : piPlatformGetInfo // CHECK-NEXT: PI Call Begin : piKernelSetExecInfo // CHECK: Node create +// CHECK-DAG: queue_id : {{.*}} // CHECK-DAG: sym_line_no : {{.*}} // CHECK-DAG: sym_source_file_name : {{.*}} // CHECK-DAG: sym_function_name : typeinfo name for main::{lambda(sycl::_V1::handler&)#1}::operator()(sycl::_V1::handler&) const::{lambda()#1} @@ -35,10 +36,14 @@ // CHECK-DAG: kernel_name : typeinfo name for main::{lambda(sycl::_V1::handler&)#1}::operator()(sycl::_V1::handler&) const::{lambda()#1} // CHECK-DAG: sycl_device : {{.*}} // CHECK-NEXT: Node create -// CHECK-NEXT: kernel_name : virtual_node[{{.*}}] +// CHECK-DAG: queue_id : {{.*}} +// CHECK-DAG: kernel_name : virtual_node[{{.*}}] // CHECK-NEXT: Edge create -// CHECK-NEXT: event : {{.*}} +// CHECK-DAG: queue_id : {{.*}} +// CHECK-DAG: event : {{.*}} +// CHECK-DAG: kernel_name : virtual_node[{{.*}}] // CHECK-NEXT: Task begin +// CHECK-DAG: queue_id : {{.*}} // CHECK-DAG: sym_line_no : {{.*}} // CHECK-DAG: sym_source_file_name : {{.*}} // CHECK-DAG: sym_function_name : typeinfo name for main::{lambda(sycl::_V1::handler&)#1}::operator()(sycl::_V1::handler&) const::{lambda()#1} @@ -51,6 +56,7 @@ // CHECK-NEXT: PI Call Begin : piKernelRelease // CHECK-NEXT: PI Call Begin : piProgramRelease // CHECK-NEXT: Signal +// CHECK-DAG: queue_id : {{.*}} // CHECK-DAG: sym_line_no : {{.*}} // CHECK-DAG: sym_source_file_name : {{.*}} // CHECK-DAG: sym_function_name : typeinfo name for main::{lambda(sycl::_V1::handler&)#1}::operator()(sycl::_V1::handler&) const::{lambda()#1} @@ -58,6 +64,7 @@ // CHECK-DAG: kernel_name : typeinfo name for main::{lambda(sycl::_V1::handler&)#1}::operator()(sycl::_V1::handler&) const::{lambda()#1} // CHECK-DAG: sycl_device : {{.*}} // CHECK-NEXT: Task end +// CHECK-DAG: queue_id : {{.*}} // CHECK-DAG: sym_line_no : {{.*}} // CHECK-DAG: sym_source_file_name : {{.*}} // CHECK-DAG: sym_function_name : typeinfo name for main::{lambda(sycl::_V1::handler&)#1}::operator()(sycl::_V1::handler&) const::{lambda()#1} @@ -65,27 +72,34 @@ // CHECK-DAG: kernel_name : typeinfo name for main::{lambda(sycl::_V1::handler&)#1}::operator()(sycl::_V1::handler&) const::{lambda()#1} // CHECK-DAG: sycl_device : {{.*}} // CHECK-NEXT: Wait begin +// CHECK-DAG: queue_id : {{.*}} // CHECK-NEXT: PI Call Begin : piEventsWait // CHECK-NEXT: Wait end +// CHECK-DAG: queue_id : {{.*}} // CHECK-NEXT: Node create +// CHECK-DAG: queue_id : {{.*}} // CHECK-DAG: memory_size : {{.*}} // CHECK-DAG: dest_memory_ptr : {{.*}} // CHECK-DAG: src_memory_ptr : {{.*}} // CHECK-DAG: sycl_device : {{.*}} // CHECK-NEXT: Task begin +// CHECK-DAG: queue_id : {{.*}} // CHECK-DAG: memory_size : {{.*}} // CHECK-DAG: dest_memory_ptr : {{.*}} // CHECK-DAG: src_memory_ptr : {{.*}} // CHECK-DAG: sycl_device : {{.*}} // CHECK-NEXT: PI Call Begin : piextUSMEnqueueMemcpy // CHECK-NEXT: Task end +// CHECK-DAG: queue_id : {{.*}} // CHECK-DAG: memory_size : {{.*}} // CHECK-DAG: dest_memory_ptr : {{.*}} // CHECK-DAG: src_memory_ptr : {{.*}} // CHECK-DAG: sycl_device : {{.*}} // CHECK-NEXT: PI Call Begin : piEventRelease // CHECK-NEXT: Wait begin -// CHECK: sycl_device_type : {{.*}} +// CHECK-DAG: queue_id : {{.*}} +// CHECK-DAG: sycl_device_type : {{.*}} // CHECK: PI Call Begin : piQueueFinish // CHECK-NEXT: Wait end -// CHECK: sycl_device_type : {{.*}} +// CHECK-DAG: queue_id : {{.*}} +// CHECK-DAG: sycl_device_type : {{.*}} diff --git a/sycl/tools/sycl-trace/sycl_trace_collector.cpp b/sycl/tools/sycl-trace/sycl_trace_collector.cpp index 55075c5437879..5cf5b3bc5f5b9 100644 --- a/sycl/tools/sycl-trace/sycl_trace_collector.cpp +++ b/sycl/tools/sycl-trace/sycl_trace_collector.cpp @@ -57,6 +57,11 @@ void TraceTaskExecutionSignals(xpti::trace_event_data_t * /*Parent*/, if (!Event) return; + char *Key = 0; + uint64_t Value; + bool HaveKeyValue = + (xptiGetStashedTuple(&Key, Value) == xpti::result_t::XPTI_RESULT_SUCCESS); + std::cout << "[SYCL] Task " << (IsBegin ? "begin" : "end ") << " (event=" << Event << ",instanceID=" << InstanceID << ")" << std::endl; @@ -67,6 +72,10 @@ void TraceTaskExecutionSignals(xpti::trace_event_data_t * /*Parent*/, if (!IsBegin || !PrintSyclVerbose) return; + if (HaveKeyValue) { + std::cout << "\t " << Key << " : " << Value << std::endl; + } + xpti::metadata_t *Metadata = xptiQueryMetadata(Event); for (auto &Item : *Metadata) { std::cout << "\t " << xptiLookupString(Item.first) << " : " diff --git a/sycl/unittests/xpti_trace/xptitest_subscriber/XPTISubscriber.cpp b/sycl/unittests/xpti_trace/xptitest_subscriber/XPTISubscriber.cpp index fcbbb02126a62..2c79f76269c11 100644 --- a/sycl/unittests/xpti_trace/xptitest_subscriber/XPTISubscriber.cpp +++ b/sycl/unittests/xpti_trace/xptitest_subscriber/XPTISubscriber.cpp @@ -35,6 +35,13 @@ XPTI_CALLBACK_API void testCallback(uint16_t TraceType, if (GAnalyzedTraceTypes.find(TraceType) == GAnalyzedTraceTypes.end()) return; + // Since "queue_id" is no longer a metadata item, we have to retrieve it from + // TLS using new XPTI API + char *Key = 0; + uint64_t Value; + bool HaveKeyValue = + (xptiGetStashedTuple(&Key, Value) == xpti::result_t::XPTI_RESULT_SUCCESS); + if (TraceType == xpti::trace_diagnostics) { std::string AggregatedData; if (Event && Event->reserved.payload && Event->reserved.payload->name && @@ -111,30 +118,22 @@ XPTI_CALLBACK_API void testCallback(uint16_t TraceType, } else if (TraceType == xpti::trace_task_begin) { if (Event) { std::string Message; - xpti::metadata_t *Metadata = xptiQueryMetadata(Event); - for (const auto &Item : *Metadata) { - std::string_view Key{xptiLookupString(Item.first)}; - if (Key == "queue_id") { - Message.append( - std::string("task_begin:") + Key.data() + std::string(":") + - std::to_string( - xpti::getMetadata(Item).second)); - } + // Since we have changed we send the "queue_id" information, we no longer + // have to check the metadata for the instance ID + if (HaveKeyValue) { + Message.append(std::string("task_begin:") + Key + std::string(":") + + std::to_string(Value)); } GReceivedNotifications.push_back(std::make_pair(TraceType, Message)); } } else if (TraceType == xpti::trace_task_end) { if (Event) { std::string Message; - xpti::metadata_t *Metadata = xptiQueryMetadata(Event); - for (const auto &Item : *Metadata) { - std::string_view Key{xptiLookupString(Item.first)}; - if (Key == "queue_id") { - Message.append( - std::string("task_end:") + Key.data() + std::string(":") + - std::to_string( - xpti::getMetadata(Item).second)); - } + // Since we have changed we send the "queue_id" information, we no longer + // have to check the metadata for the instance ID + if (HaveKeyValue) { + Message.append(std::string("task_end:") + Key + std::string(":") + + std::to_string(Value)); } GReceivedNotifications.push_back(std::make_pair(TraceType, Message)); } diff --git a/xpti/include/xpti/xpti_trace_framework.h b/xpti/include/xpti/xpti_trace_framework.h index 90a0e57047b2e..58fa9a117b6d5 100644 --- a/xpti/include/xpti/xpti_trace_framework.h +++ b/xpti/include/xpti/xpti_trace_framework.h @@ -90,6 +90,41 @@ XPTI_EXPORT_API uint64_t xptiGetUniversalId(); /// @param uid Unique 64 bit identifier. XPTI_EXPORT_API void xptiSetUniversalId(uint64_t uid); +/// @brief Returns stashed tuple +/// @details The XPTI Framework allows the notification mechanism to stash a +/// key-value tupe before a notification that can be accessed in the callback +/// handler fo the notification. This value is guranteed to be valid for the +/// duration of the notifiation. +/// @param key The Key of the stashed tuple is contained in this parameter after +/// the call +/// @param value The value that corresponds to key +/// @return The result code is XPTI_RESULT_SUCCESS when successful and +/// XPTI_RESULT_NOTFOUND if there is nothing stashed. Also returns error if +/// 'key' argument is invalid (XPTI_RESULT_INVALIDARG) +XPTI_EXPORT_API xpti::result_t xptiGetStashedTuple(char **key, uint64_t &value); + +/// @brief Stash a key-value tuple +/// @details Certain notifications in XPTI may want to provide mutable values +/// associated with Universal IDs that can be captured in the notification +/// handler. The framework currently allows one such tuple to be provided and +/// stashed. +/// @param key The Key of the tuple that is being stashed and needs to be +/// available for the duration of the notification call. +/// @param value The value that corresponds to key +/// @return The result code is XPTI_RESULT_SUCCESS when successful and +/// XPTI_RESULT_FAIL if key is invalid +XPTI_EXPORT_API xpti::result_t xptiStashTuple(const char *key, uint64_t value); + +/// @brief Un-Stash a key-value tuple or pop it from a stack, if one exists +/// @details Certain notifications in XPTI may want to provide mutable values +/// associated with Universal IDs that can be captured in the notification +/// handler. The framework currently allows such values to be provided and +/// stashed. This function pops the top of the stack tuple value when it is no +/// longer needed; Currently a stack depth of 1 is supported. +/// @return The result code is XPTI_RESULT_SUCCESS when successful and +/// XPTI_RESULT_FAIL if there are no tuples present +XPTI_EXPORT_API void xptiUnstashTuple(); + /// @brief Generates a unique ID /// @details When a tool is subscribing to the event stream and wants to /// generate task IDs that do not collide with unique IDs currently being @@ -498,6 +533,9 @@ typedef void (*xpti_finalize_t)(const char *); typedef uint64_t (*xpti_get_universal_id_t)(); typedef void (*xpti_set_universal_id_t)(uint64_t uid); typedef uint64_t (*xpti_get_unique_id_t)(); +typedef xpti::result_t (*xpti_stash_tuple_t)(const char *key, uint64_t value); +typedef xpti::result_t (*xpti_get_stashed_tuple_t)(char **key, uint64_t &value); +typedef void (*xpti_unstash_tuple_t)(); typedef xpti::string_id_t (*xpti_register_string_t)(const char *, char **); typedef const char *(*xpti_lookup_string_t)(xpti::string_id_t); typedef xpti::string_id_t (*xpti_register_object_t)(const char *, size_t, diff --git a/xpti/include/xpti/xpti_trace_framework.hpp b/xpti/include/xpti/xpti_trace_framework.hpp index c38a149fa401c..55f6c69760cdb 100644 --- a/xpti/include/xpti/xpti_trace_framework.hpp +++ b/xpti/include/xpti/xpti_trace_framework.hpp @@ -13,6 +13,8 @@ #include #include #include +#include +#include #include #include "xpti/xpti_data_types.h" @@ -539,6 +541,24 @@ class scoped_notify { uint64_t m_instance; }; +// Scoped class that assists in stashing a tuple and clearing it when it is pout +// of scope +class stash_tuple { +public: + stash_tuple(const char *key, uint64_t value) : m_stashed(false) { + m_stashed = + (xptiStashTuple(key, value) == xpti::result_t::XPTI_RESULT_SUCCESS); + } + ~stash_tuple() { + if (m_stashed) { + xptiUnstashTuple(); + } + } + +private: + bool m_stashed; +}; + // --------------- Commented section of the code ------------- // // github.com/bombela/backward-cpp/blob/master/backward.hpp @@ -759,9 +779,12 @@ class tracepoint_t { // Method to extract the stream used by the current tracepoint type uint8_t stream_id() { return m_default_stream; } - // Method to extract the stream used by the current tracepoint type + // Method to extract the instance ID used by the current tracepoint type uint64_t instance_id() { return m_instID; } + // Method to override the instance ID generated by the xptiMakeEvent() call + void override_instance_id(uint64_t instance) { m_instID = instance; } + uint64_t universal_id() { if (m_payload && (m_payload->flags & diff --git a/xpti/src/xpti_proxy.cpp b/xpti/src/xpti_proxy.cpp index 2d17517ee3089..a09b970060033 100644 --- a/xpti/src/xpti_proxy.cpp +++ b/xpti/src/xpti_proxy.cpp @@ -43,6 +43,9 @@ enum functions_t { XPTI_FORCE_SET_TRACE_ENABLED, XPTI_CHECK_TRACE_ENABLED, XPTI_RELEASE_EVENT, + XPTI_STASH_TUPLE, + XPTI_GET_STASHED_TUPLE, + XPTI_UNSTASH_TUPLE, // All additional functions need to appear before // the XPTI_FW_API_COUNT enum XPTI_FW_API_COUNT ///< This enum must always be the last one in the list @@ -79,6 +82,9 @@ class ProxyLoader { {XPTI_TRACE_ENABLED, "xptiTraceEnabled"}, {XPTI_CHECK_TRACE_ENABLED, "xptiCheckTraceEnabled"}, {XPTI_FORCE_SET_TRACE_ENABLED, "xptiForceSetTraceEnabled"}, + {XPTI_STASH_TUPLE, "xptiStashTuple"}, + {XPTI_GET_STASHED_TUPLE, "xptiGetStashedTuple"}, + {XPTI_UNSTASH_TUPLE, "xptiUnstashTuple"}, {XPTI_RELEASE_EVENT, "xptiReleaseEvent"}}; public: @@ -250,6 +256,37 @@ XPTI_EXPORT_API void xptiSetUniversalId(uint64_t uid) { } } +XPTI_EXPORT_API xpti::result_t xptiStashTuple(const char *key, uint64_t value) { + if (xpti::ProxyLoader::instance().noErrors()) { + auto f = xpti::ProxyLoader::instance().functionByIndex(XPTI_STASH_TUPLE); + if (f) { + return (*reinterpret_cast(f))(key, value); + } + } + return xpti::result_t::XPTI_RESULT_FAIL; +} + +XPTI_EXPORT_API xpti::result_t xptiSetGetStashedTuple(char **key, + uint64_t &value) { + if (xpti::ProxyLoader::instance().noErrors()) { + auto f = + xpti::ProxyLoader::instance().functionByIndex(XPTI_GET_STASHED_TUPLE); + if (f) { + return (*reinterpret_cast(f))(key, value); + } + } + return xpti::result_t::XPTI_RESULT_FAIL; +} + +XPTI_EXPORT_API void xptiUnstashTuple() { + if (xpti::ProxyLoader::instance().noErrors()) { + auto f = xpti::ProxyLoader::instance().functionByIndex(XPTI_UNSTASH_TUPLE); + if (f) { + return (*reinterpret_cast(f))(); + } + } +} + XPTI_EXPORT_API uint64_t xptiGetUniqueId() { if (xpti::ProxyLoader::instance().noErrors()) { auto f = xpti::ProxyLoader::instance().functionByIndex(XPTI_GET_UNIQUE_ID); diff --git a/xptifw/CMakeLists.txt b/xptifw/CMakeLists.txt index ccdabf46c9810..4cbf597513772 100644 --- a/xptifw/CMakeLists.txt +++ b/xptifw/CMakeLists.txt @@ -1,6 +1,6 @@ cmake_minimum_required(VERSION 3.8) -set(XPTI_VERSION 0.4.1) +set(XPTI_VERSION 0.6.0) project (xptifw VERSION "${XPTI_VERSION}" LANGUAGES CXX) set(CMAKE_CXX_STANDARD 17) diff --git a/xptifw/src/xpti_trace_framework.cpp b/xptifw/src/xpti_trace_framework.cpp index 41ccaf6a7e27b..93d151094aba4 100644 --- a/xptifw/src/xpti_trace_framework.cpp +++ b/xptifw/src/xpti_trace_framework.cpp @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -40,6 +41,9 @@ static_assert( std::is_trivially_destructible::value, "PlatformHelper is not trivial"); +// TLS variables to support stashing tupples and universal IDs +using stash_tuple_t = std::tuple; +static thread_local stash_tuple_t g_tls_stash_tuple = stash_tuple_t(nullptr, 0); static thread_local uint64_t g_tls_uid = xpti::invalid_uid; namespace xpti { @@ -359,12 +363,16 @@ class Tracepoints { // Protect simultaneous insert operations on the metadata tables { + xpti::result_t res; std::lock_guard HashLock(MMetadataMutex); if (Event->reserved.metadata.count(KeyID)) { - return xpti::result_t::XPTI_RESULT_DUPLICATE; + // One already existed, but we overwrote it + res = xpti::result_t::XPTI_RESULT_DUPLICATE; + } else { + res = xpti::result_t::XPTI_RESULT_SUCCESS; } Event->reserved.metadata[KeyID] = ValueID; - return xpti::result_t::XPTI_RESULT_SUCCESS; + return res; } } @@ -818,6 +826,38 @@ class Framework { void setUniversalID(uint64_t uid) noexcept { g_tls_uid = uid; } + xpti::result_t stashTuple(const char *key, uint64_t value) { + if (!key) + return xpti::result_t::XPTI_RESULT_FAIL; + + std::get<0>(g_tls_stash_tuple) = key; + std::get<1>(g_tls_stash_tuple) = value; + return xpti::result_t::XPTI_RESULT_SUCCESS; + } + + xpti::result_t getStashedTuple(char **key, uint64_t &value) { + if (!key) + return xpti::result_t::XPTI_RESULT_INVALIDARG; + + const char *tls_key = std::get<0>(g_tls_stash_tuple); + if (!tls_key) + return xpti::result_t::XPTI_RESULT_NOTFOUND; + + (*key) = const_cast(tls_key); + value = std::get<1>(g_tls_stash_tuple); + return xpti::result_t::XPTI_RESULT_SUCCESS; + } + + void unstashTuple() { + if (!std::get<0>(g_tls_stash_tuple)) + return; + + // std::get<0>(g_tls_stash_tuple) = nullptr; + // std::get<1>(g_tls_stash_tuple) = 0; + // We will use the actual unstash code when we implement a stack to allow + // multiple stashes/thread + } + bool checkTraceEnabled(uint16_t stream, uint16_t type) { if (MTraceEnabled) { return MNotifier.checkSubscribed(stream, type); @@ -1086,6 +1126,19 @@ XPTI_EXPORT_API void xptiSetUniversalId(uint64_t uid) { xpti::Framework::instance().setUniversalID(uid); } +XPTI_EXPORT_API xpti::result_t xptiStashTuple(const char *key, uint64_t value) { + return xpti::Framework::instance().stashTuple(key, value); +} + +XPTI_EXPORT_API xpti::result_t xptiGetStashedTuple(char **key, + uint64_t &value) { + return xpti::Framework::instance().getStashedTuple(key, value); +} + +XPTI_EXPORT_API void xptiUnstashTuple() { + xpti::Framework::instance().unstashTuple(); +} + XPTI_EXPORT_API uint16_t xptiRegisterUserDefinedTracePoint(const char *ToolName, uint8_t UserDefinedTP) { uint8_t ToolID = xpti::Framework::instance().registerVendor(ToolName);