diff --git a/sycl/include/sycl/detail/common.hpp b/sycl/include/sycl/detail/common.hpp index 1a867a72aff7c..af0d0e6ba0dd7 100644 --- a/sycl/include/sycl/detail/common.hpp +++ b/sycl/include/sycl/detail/common.hpp @@ -140,6 +140,9 @@ class __SYCL_EXPORT tls_code_loc_t { /// @return The code location information saved in the TLS slot. If not TLS /// entry has been set up, a default coe location is returned. const detail::code_location &query(); + /// @brief Returns true if the TLS slot was cleared when this object was + /// constructed. + bool isToplevel() const { return !MLocalScope; } private: // The flag that is used to determine if the object is in a local scope or in diff --git a/sycl/include/sycl/ext/oneapi/bindless_images.hpp b/sycl/include/sycl/ext/oneapi/bindless_images.hpp index 777ddecd887d5..1fa4623eb43fe 100644 --- a/sycl/include/sycl/ext/oneapi/bindless_images.hpp +++ b/sycl/include/sycl/ext/oneapi/bindless_images.hpp @@ -1369,7 +1369,7 @@ inline event queue::ext_oneapi_copy( detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); return submit( [&](handler &CGH) { CGH.ext_oneapi_copy(Src, Dest, DestImgDesc); }, - CodeLoc); + TlsCodeLocCapture.query()); } inline event queue::ext_oneapi_copy( @@ -1383,7 +1383,7 @@ inline event queue::ext_oneapi_copy( CGH.ext_oneapi_copy(Src, SrcOffset, SrcExtent, Dest, DestOffset, DestImgDesc, CopyExtent); }, - CodeLoc); + TlsCodeLocCapture.query()); } inline event queue::ext_oneapi_copy( @@ -1396,7 +1396,7 @@ inline event queue::ext_oneapi_copy( CGH.depends_on(DepEvent); CGH.ext_oneapi_copy(Src, Dest, DestImgDesc); }, - CodeLoc); + TlsCodeLocCapture.query()); } inline event queue::ext_oneapi_copy( @@ -1412,7 +1412,7 @@ inline event queue::ext_oneapi_copy( CGH.ext_oneapi_copy(Src, SrcOffset, SrcExtent, Dest, DestOffset, DestImgDesc, CopyExtent); }, - CodeLoc); + TlsCodeLocCapture.query()); } inline event queue::ext_oneapi_copy( @@ -1425,7 +1425,7 @@ inline event queue::ext_oneapi_copy( CGH.depends_on(DepEvents); CGH.ext_oneapi_copy(Src, Dest, DestImgDesc); }, - CodeLoc); + TlsCodeLocCapture.query()); } inline event queue::ext_oneapi_copy( @@ -1441,7 +1441,7 @@ inline event queue::ext_oneapi_copy( CGH.ext_oneapi_copy(Src, SrcOffset, SrcExtent, Dest, DestOffset, DestImgDesc, CopyExtent); }, - CodeLoc); + TlsCodeLocCapture.query()); } inline event queue::ext_oneapi_copy( @@ -1451,7 +1451,7 @@ inline event queue::ext_oneapi_copy( detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); return submit( [&](handler &CGH) { CGH.ext_oneapi_copy(Src, Dest, SrcImgDesc); }, - CodeLoc); + TlsCodeLocCapture.query()); } inline event queue::ext_oneapi_copy( @@ -1466,7 +1466,7 @@ inline event queue::ext_oneapi_copy( CGH.ext_oneapi_copy(Src, SrcOffset, SrcImgDesc, Dest, DestOffset, DestExtent, CopyExtent); }, - CodeLoc); + TlsCodeLocCapture.query()); } inline event queue::ext_oneapi_copy( @@ -1479,7 +1479,7 @@ inline event queue::ext_oneapi_copy( CGH.depends_on(DepEvent); CGH.ext_oneapi_copy(Src, Dest, SrcImgDesc); }, - CodeLoc); + TlsCodeLocCapture.query()); } inline event queue::ext_oneapi_copy( @@ -1496,7 +1496,7 @@ inline event queue::ext_oneapi_copy( CGH.ext_oneapi_copy(Src, SrcOffset, SrcImgDesc, Dest, DestOffset, DestExtent, CopyExtent); }, - CodeLoc); + TlsCodeLocCapture.query()); } inline event queue::ext_oneapi_copy( @@ -1509,7 +1509,7 @@ inline event queue::ext_oneapi_copy( CGH.depends_on(DepEvents); CGH.ext_oneapi_copy(Src, Dest, SrcImgDesc); }, - CodeLoc); + TlsCodeLocCapture.query()); } inline event queue::ext_oneapi_copy( @@ -1526,7 +1526,7 @@ inline event queue::ext_oneapi_copy( CGH.ext_oneapi_copy(Src, SrcOffset, SrcImgDesc, Dest, DestOffset, DestExtent, CopyExtent); }, - CodeLoc); + TlsCodeLocCapture.query()); } inline event queue::ext_oneapi_copy( @@ -1538,7 +1538,7 @@ inline event queue::ext_oneapi_copy( [&](handler &CGH) { CGH.ext_oneapi_copy(Src, Dest, DeviceImgDesc, DeviceRowPitch); }, - CodeLoc); + TlsCodeLocCapture.query()); } inline event queue::ext_oneapi_copy( @@ -1553,7 +1553,7 @@ inline event queue::ext_oneapi_copy( CGH.ext_oneapi_copy(Src, SrcOffset, Dest, DestOffset, DeviceImgDesc, DeviceRowPitch, HostExtent, CopyExtent); }, - CodeLoc); + TlsCodeLocCapture.query()); } inline event queue::ext_oneapi_copy( @@ -1567,7 +1567,7 @@ inline event queue::ext_oneapi_copy( CGH.depends_on(DepEvent); CGH.ext_oneapi_copy(Src, Dest, DeviceImgDesc, DeviceRowPitch); }, - CodeLoc); + TlsCodeLocCapture.query()); } inline event queue::ext_oneapi_copy( @@ -1581,7 +1581,7 @@ inline event queue::ext_oneapi_copy( CGH.depends_on(DepEvent); CGH.ext_oneapi_copy(Src, Dest, ImageDesc); }, - CodeLoc); + TlsCodeLocCapture.query()); } inline event queue::ext_oneapi_copy( @@ -1595,7 +1595,7 @@ inline event queue::ext_oneapi_copy( CGH.depends_on(DepEvents); CGH.ext_oneapi_copy(Src, Dest, ImageDesc); }, - CodeLoc); + TlsCodeLocCapture.query()); } inline event queue::ext_oneapi_copy( @@ -1606,7 +1606,7 @@ inline event queue::ext_oneapi_copy( detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); return submit( [&](handler &CGH) { CGH.ext_oneapi_copy(Src, Dest, ImageDesc); }, - CodeLoc); + TlsCodeLocCapture.query()); } inline event queue::ext_oneapi_copy( @@ -1622,7 +1622,7 @@ inline event queue::ext_oneapi_copy( CGH.ext_oneapi_copy(Src, SrcOffset, Dest, DestOffset, DeviceImgDesc, DeviceRowPitch, HostExtent, CopyExtent); }, - CodeLoc); + TlsCodeLocCapture.query()); } inline event queue::ext_oneapi_copy( @@ -1636,7 +1636,7 @@ inline event queue::ext_oneapi_copy( CGH.depends_on(DepEvents); CGH.ext_oneapi_copy(Src, Dest, DeviceImgDesc, DeviceRowPitch); }, - CodeLoc); + TlsCodeLocCapture.query()); } inline event queue::ext_oneapi_copy( @@ -1652,7 +1652,7 @@ inline event queue::ext_oneapi_copy( CGH.ext_oneapi_copy(Src, SrcOffset, Dest, DestOffset, DeviceImgDesc, DeviceRowPitch, HostExtent, CopyExtent); }, - CodeLoc); + TlsCodeLocCapture.query()); } inline event queue::ext_oneapi_wait_external_semaphore( @@ -1664,7 +1664,7 @@ inline event queue::ext_oneapi_wait_external_semaphore( CGH.depends_on(DepEvent); CGH.ext_oneapi_wait_external_semaphore(SemaphoreHandle); }, - CodeLoc); + TlsCodeLocCapture.query()); } inline event queue::ext_oneapi_wait_external_semaphore( @@ -1676,7 +1676,7 @@ inline event queue::ext_oneapi_wait_external_semaphore( CGH.depends_on(DepEvents); CGH.ext_oneapi_wait_external_semaphore(SemaphoreHandle); }, - CodeLoc); + TlsCodeLocCapture.query()); } inline event queue::ext_oneapi_wait_external_semaphore( @@ -1687,7 +1687,7 @@ inline event queue::ext_oneapi_wait_external_semaphore( [&](handler &CGH) { CGH.ext_oneapi_wait_external_semaphore(SemaphoreHandle, WaitValue); }, - CodeLoc); + TlsCodeLocCapture.query()); } inline event queue::ext_oneapi_wait_external_semaphore( @@ -1699,7 +1699,7 @@ inline event queue::ext_oneapi_wait_external_semaphore( CGH.depends_on(DepEvent); CGH.ext_oneapi_wait_external_semaphore(SemaphoreHandle, WaitValue); }, - CodeLoc); + TlsCodeLocCapture.query()); } inline event queue::ext_oneapi_wait_external_semaphore( @@ -1712,7 +1712,7 @@ inline event queue::ext_oneapi_wait_external_semaphore( CGH.depends_on(DepEvents); CGH.ext_oneapi_wait_external_semaphore(SemaphoreHandle, WaitValue); }, - CodeLoc); + TlsCodeLocCapture.query()); } inline event queue::ext_oneapi_signal_external_semaphore( @@ -1723,7 +1723,7 @@ inline event queue::ext_oneapi_signal_external_semaphore( [&](handler &CGH) { CGH.ext_oneapi_signal_external_semaphore(SemaphoreHandle); }, - CodeLoc); + TlsCodeLocCapture.query()); } inline event queue::ext_oneapi_signal_external_semaphore( @@ -1735,7 +1735,7 @@ inline event queue::ext_oneapi_signal_external_semaphore( CGH.depends_on(DepEvent); CGH.ext_oneapi_signal_external_semaphore(SemaphoreHandle); }, - CodeLoc); + TlsCodeLocCapture.query()); } inline event queue::ext_oneapi_signal_external_semaphore( @@ -1747,7 +1747,7 @@ inline event queue::ext_oneapi_signal_external_semaphore( CGH.depends_on(DepEvents); CGH.ext_oneapi_signal_external_semaphore(SemaphoreHandle); }, - CodeLoc); + TlsCodeLocCapture.query()); } inline event queue::ext_oneapi_signal_external_semaphore( @@ -1758,7 +1758,7 @@ inline event queue::ext_oneapi_signal_external_semaphore( [&](handler &CGH) { CGH.ext_oneapi_signal_external_semaphore(SemaphoreHandle, SignalValue); }, - CodeLoc); + TlsCodeLocCapture.query()); } inline event queue::ext_oneapi_signal_external_semaphore( @@ -1771,7 +1771,7 @@ inline event queue::ext_oneapi_signal_external_semaphore( CGH.depends_on(DepEvent); CGH.ext_oneapi_signal_external_semaphore(SemaphoreHandle, SignalValue); }, - CodeLoc); + TlsCodeLocCapture.query()); } inline event queue::ext_oneapi_signal_external_semaphore( @@ -1784,7 +1784,7 @@ inline event queue::ext_oneapi_signal_external_semaphore( CGH.depends_on(DepEvents); CGH.ext_oneapi_signal_external_semaphore(SemaphoreHandle, SignalValue); }, - CodeLoc); + TlsCodeLocCapture.query()); } } // namespace _V1 diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index a81a85c63b816..5234f19f962c5 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -590,7 +590,11 @@ class __SYCL_EXPORT handler { /// Saves the location of user's code passed in \p CodeLoc for future usage in /// finalize() method. - void saveCodeLoc(detail::code_location CodeLoc) { MCodeLoc = CodeLoc; } + /// TODO: remove the first version of this func (the one without the IsTopCodeLoc arg) + /// at the next ABI breaking window since removing it breaks ABI on windows. + void saveCodeLoc(detail::code_location CodeLoc); + void saveCodeLoc(detail::code_location CodeLoc, bool IsTopCodeLoc); + void copyCodeLoc(const handler &other); /// Constructs CG object of specific type, passes it to Scheduler and /// returns sycl::event object representing the command group. diff --git a/sycl/include/sycl/queue.hpp b/sycl/include/sycl/queue.hpp index 39f69046ad2aa..da157d12cde52 100644 --- a/sycl/include/sycl/queue.hpp +++ b/sycl/include/sycl/queue.hpp @@ -342,21 +342,25 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { const detail::code_location &CodeLoc = detail::code_location::current()) { detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); #if __SYCL_USE_FALLBACK_ASSERT - auto PostProcess = [this, &CodeLoc](bool IsKernel, bool KernelUsesAssert, - event &E) { + auto PostProcess = [this, &TlsCodeLocCapture]( + bool IsKernel, bool KernelUsesAssert, event &E) { if (IsKernel && !device_has(aspect::ext_oneapi_native_assert) && KernelUsesAssert && !device_has(aspect::accelerator)) { // __devicelib_assert_fail isn't supported by Device-side Runtime // Linking against fallback impl of __devicelib_assert_fail is // performed by program manager class // Fallback assert isn't supported for FPGA - submitAssertCapture(*this, E, /* SecondaryQueue = */ nullptr, CodeLoc); + submitAssertCapture(*this, E, /* SecondaryQueue = */ nullptr, + TlsCodeLocCapture.query()); } }; - return submit_impl_and_postprocess(CGF, CodeLoc, PostProcess); + return submit_impl_and_postprocess(CGF, TlsCodeLocCapture.query(), + PostProcess, + TlsCodeLocCapture.isToplevel()); #else - return submit_impl(CGF, CodeLoc); + return submit_impl(CGF, TlsCodeLocCapture.query(), + TlsCodeLocCapture.isToplevel()); #endif // __SYCL_USE_FALLBACK_ASSERT } @@ -377,7 +381,7 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { const detail::code_location &CodeLoc = detail::code_location::current()) { detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); #if __SYCL_USE_FALLBACK_ASSERT - auto PostProcess = [this, &SecondaryQueue, &CodeLoc]( + auto PostProcess = [this, &SecondaryQueue, &TlsCodeLocCapture]( bool IsKernel, bool KernelUsesAssert, event &E) { if (IsKernel && !device_has(aspect::ext_oneapi_native_assert) && KernelUsesAssert && !device_has(aspect::accelerator)) { @@ -387,14 +391,17 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { // Linking against fallback impl of __devicelib_assert_fail is // performed by program manager class // Fallback assert isn't supported for FPGA - submitAssertCapture(*this, E, &SecondaryQueue, CodeLoc); + submitAssertCapture(*this, E, &SecondaryQueue, + TlsCodeLocCapture.query()); } }; - return submit_impl_and_postprocess(CGF, SecondaryQueue, CodeLoc, - PostProcess); + return submit_impl_and_postprocess(CGF, SecondaryQueue, + TlsCodeLocCapture.query(), PostProcess, + TlsCodeLocCapture.isToplevel()); #else - return submit_impl(CGF, SecondaryQueue, CodeLoc); + return submit_impl(CGF, SecondaryQueue, TlsCodeLocCapture.query(), + TlsCodeLocCapture.isToplevel()); #endif // __SYCL_USE_FALLBACK_ASSERT } @@ -429,7 +436,7 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { void wait( const detail::code_location &CodeLoc = detail::code_location::current()) { detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); - wait_proxy(CodeLoc); + wait_proxy(TlsCodeLocCapture.query()); } /// Performs a blocking wait for the completion of all enqueued tasks in the @@ -443,7 +450,7 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { void wait_and_throw( const detail::code_location &CodeLoc = detail::code_location::current()) { detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); - wait_and_throw_proxy(CodeLoc); + wait_and_throw_proxy(TlsCodeLocCapture.query()); } /// Proxy method for wait to forward the code location information to the @@ -486,7 +493,7 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { const detail::code_location &CodeLoc = detail::code_location::current()) { detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); return submit([&](handler &CGH) { CGH.fill(Ptr, Pattern, Count); }, - CodeLoc); + TlsCodeLocCapture.query()); } /// Fills the specified memory with the specified pattern. @@ -507,7 +514,7 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { CGH.depends_on(DepEvent); CGH.fill(Ptr, Pattern, Count); }, - CodeLoc); + TlsCodeLocCapture.query()); } /// Fills the specified memory with the specified pattern. @@ -530,7 +537,7 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { CGH.depends_on(DepEvents); CGH.fill(Ptr, Pattern, Count); }, - CodeLoc); + TlsCodeLocCapture.query()); } /// Fills the memory pointed by a USM pointer with the value specified. @@ -734,7 +741,8 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { const void *Ptr, size_t Count, const detail::code_location &CodeLoc = detail::code_location::current()) { detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); - return submit([=](handler &CGH) { CGH.prefetch(Ptr, Count); }, CodeLoc); + return submit([=](handler &CGH) { CGH.prefetch(Ptr, Count); }, + TlsCodeLocCapture.query()); } /// Provides hints to the runtime library that data should be made available @@ -754,7 +762,7 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { CGH.depends_on(DepEvent); CGH.prefetch(Ptr, Count); }, - CodeLoc); + TlsCodeLocCapture.query()); } /// Provides hints to the runtime library that data should be made available @@ -775,7 +783,7 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { CGH.depends_on(DepEvents); CGH.prefetch(Ptr, Count); }, - CodeLoc); + TlsCodeLocCapture.query()); } /// Copies data from one 2D memory region to another, both pointed by @@ -1091,7 +1099,7 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { CGH.depends_on(DepEvents); return CGH.memcpy(Dest, Src, NumBytes, Offset); }, - CodeLoc); + TlsCodeLocCapture.query()); } constexpr bool IsDeviceImageScoped = PropertyListT::template has_property< @@ -1853,7 +1861,7 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { [&](handler &CGH) { CGH.ext_oneapi_wait_external_semaphore(extSemaphore); }, - CodeLoc); + TlsCodeLocCapture.query()); } /// Instruct the queue with a non-blocking wait on an external semaphore. @@ -2032,7 +2040,7 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { CGH.template single_task( Properties, KernelFunc); }, - CodeLoc); + TlsCodeLocCapture.query()); } /// single_task version with a kernel represented as a lambda. @@ -2075,7 +2083,7 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { CGH.template single_task( Properties, KernelFunc); }, - CodeLoc); + TlsCodeLocCapture.query()); } /// single_task version with a kernel represented as a lambda. @@ -2122,7 +2130,7 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { CGH.template single_task( Properties, KernelFunc); }, - CodeLoc); + TlsCodeLocCapture.query()); } /// single_task version with a kernel represented as a lambda. @@ -2363,7 +2371,7 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { [&](handler &CGH) { CGH.template parallel_for(Range, Properties, Rest...); }, - CodeLoc); + TlsCodeLocCapture.query()); } /// parallel_for version with a kernel represented as a lambda + nd_range that @@ -2397,7 +2405,7 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { CGH.depends_on(DepEvent); CGH.template parallel_for(Range, Rest...); }, - CodeLoc); + TlsCodeLocCapture.query()); } /// parallel_for version with a kernel represented as a lambda + nd_range that @@ -2419,7 +2427,7 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { CGH.depends_on(DepEvents); CGH.template parallel_for(Range, Rest...); }, - CodeLoc); + TlsCodeLocCapture.query()); } /// Copies data from a memory region pointed to by a placeholder accessor to @@ -2678,13 +2686,20 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { /// A template-free version of submit. event submit_impl(std::function CGH, const detail::code_location &CodeLoc); + event submit_impl(std::function CGH, + const detail::code_location &CodeLoc, bool IsTopCodeLoc); /// A template-free version of submit. event submit_impl(std::function CGH, queue secondQueue, const detail::code_location &CodeLoc); + event submit_impl(std::function CGH, queue secondQueue, + const detail::code_location &CodeLoc, bool IsTopCodeLoc); /// A template-free version of submit_without_event. void submit_without_event_impl(std::function CGH, const detail::code_location &CodeLoc); + void submit_without_event_impl(std::function CGH, + const detail::code_location &CodeLoc, + bool IsTopCodeLoc); /// Submits a command group function object to the queue, in order to be /// scheduled for execution on the device. @@ -2698,9 +2713,10 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { #if __SYCL_USE_FALLBACK_ASSERT // If post-processing is needed, fall back to the regular submit. // TODO: Revisit whether we can avoid this. - submit(CGF, CodeLoc); + submit(CGF, TlsCodeLocCapture.query()); #else - submit_without_event_impl(CGF, CodeLoc); + submit_without_event_impl(CGF, TlsCodeLocCapture.query(), + TlsCodeLocCapture.isToplevel()); #endif // __SYCL_USE_FALLBACK_ASSERT } @@ -2720,6 +2736,10 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { event submit_impl_and_postprocess(std::function CGH, const detail::code_location &CodeLoc, const SubmitPostProcessF &PostProcess); + event submit_impl_and_postprocess(std::function CGH, + const detail::code_location &CodeLoc, + const SubmitPostProcessF &PostProcess, + bool IsTopCodeLoc); /// A template-free version of submit. /// \param CGH command group function/handler /// \param secondQueue fallback queue @@ -2730,6 +2750,11 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { queue secondQueue, const detail::code_location &CodeLoc, const SubmitPostProcessF &PostProcess); + event submit_impl_and_postprocess(std::function CGH, + queue secondQueue, + const detail::code_location &CodeLoc, + const SubmitPostProcessF &PostProcess, + bool IsTopCodeLoc); /// parallel_for_impl with a kernel represented as a lambda + range that /// specifies global size only. @@ -2751,7 +2776,7 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { [&](handler &CGH) { CGH.template parallel_for(Range, Properties, Rest...); }, - CodeLoc); + TlsCodeLocCapture.query()); } /// parallel_for_impl with a kernel represented as a lambda + range that @@ -2786,7 +2811,7 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { CGH.depends_on(DepEvent); CGH.template parallel_for(Range, Properties, Rest...); }, - CodeLoc); + TlsCodeLocCapture.query()); } /// parallel_for_impl with a kernel represented as a lambda + range that @@ -2823,7 +2848,7 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase { CGH.depends_on(DepEvents); CGH.template parallel_for(Range, Properties, Rest...); }, - CodeLoc); + TlsCodeLocCapture.query()); } /// parallel_for_impl version with a kernel represented as a lambda + range diff --git a/sycl/include/sycl/reduction.hpp b/sycl/include/sycl/reduction.hpp index 44ad02e4d595f..01aff56df520b 100644 --- a/sycl/include/sycl/reduction.hpp +++ b/sycl/include/sycl/reduction.hpp @@ -1172,7 +1172,7 @@ template void withAuxHandler(handler &CGH, FunctorTy Func) { handler AuxHandler(CGH.MQueue, CGH.eventNeeded()); if (!createSyclObjFromImpl(CGH.MQueue).is_in_order()) AuxHandler.depends_on(E); - AuxHandler.saveCodeLoc(CGH.MCodeLoc); + AuxHandler.copyCodeLoc(CGH); Func(AuxHandler); CGH.MLastEvent = AuxHandler.finalize(); return; diff --git a/sycl/source/detail/cg.hpp b/sycl/source/detail/cg.hpp index c4ae7d87f4403..1799bbedd4903 100644 --- a/sycl/source/detail/cg.hpp +++ b/sycl/source/detail/cg.hpp @@ -188,7 +188,8 @@ class CG { std::vector MEvents; }; - CG(CGType Type, StorageInitHelper D, detail::code_location loc = {}) + CG(CGType Type, StorageInitHelper D, detail::code_location loc = {}, + bool IsTopCodeLoc = true) : MType(Type), MData(std::move(D)) { // Capture the user code-location from Q.submit(), Q.parallel_for() // etc for later use; if code location information is not available, @@ -199,6 +200,7 @@ class CG { MFileName = loc.fileName(); MLine = loc.lineNumber(); MColumn = loc.columnNumber(); + MIsTopCodeLoc = IsTopCodeLoc; } CG(CG &&CommandGroup) = default; @@ -240,6 +242,7 @@ class CG { std::string MFunctionName, MFileName; // Storage for line and column of code location int32_t MLine, MColumn; + bool MIsTopCodeLoc; }; /// "Execute kernel" command group class. diff --git a/sycl/source/detail/graph_impl.cpp b/sycl/source/detail/graph_impl.cpp index acdf6baf50475..564e2ce148dd5 100644 --- a/sycl/source/detail/graph_impl.cpp +++ b/sycl/source/detail/graph_impl.cpp @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -369,6 +370,15 @@ graph_impl::add(std::function CGF, const std::vector> &Dep) { (void)Args; sycl::handler Handler{shared_from_this()}; + + // save code location if one was set in TLS. + // idealy it would be nice to capture user's call code location + // by adding a parameter to the graph.add function, but this will + // break the API. At least capture code location from TLS, user + // can set it before calling graph.add + sycl::detail::tls_code_loc_t Tls; + Handler.saveCodeLoc(Tls.query(), Tls.isToplevel()); + CGF(Handler); if (Handler.getType() == sycl::detail::CGType::Barrier) { @@ -673,6 +683,23 @@ exec_graph_impl::enqueueNodeDirect(sycl::context Ctx, } ur_exp_command_buffer_sync_point_t NewSyncPoint; ur_exp_command_buffer_command_handle_t NewCommand = 0; + +#ifdef XPTI_ENABLE_INSTRUMENTATION + int32_t StreamID = xptiRegisterStream(sycl::detail::SYCL_STREAM_NAME); + sycl::detail::CGExecKernel *CGExec = + static_cast(Node->MCommandGroup.get()); + sycl::detail::code_location CodeLoc(CGExec->MFileName.c_str(), + CGExec->MFunctionName.c_str(), + CGExec->MLine, CGExec->MColumn); + auto [CmdTraceEvent, InstanceID] = emitKernelInstrumentationData( + StreamID, CGExec->MSyclKernel, CodeLoc, CGExec->MIsTopCodeLoc, + CGExec->MKernelName.c_str(), nullptr, CGExec->MNDRDesc, + CGExec->MKernelBundle, CGExec->MArgs); + if (CmdTraceEvent) + sycl::detail::emitInstrumentationGeneral( + StreamID, InstanceID, CmdTraceEvent, xpti::trace_task_begin, nullptr); +#endif + ur_result_t Res = sycl::detail::enqueueImpCommandBufferKernel( Ctx, DeviceImpl, CommandBuffer, *static_cast((Node->MCommandGroup.get())), @@ -685,6 +712,12 @@ exec_graph_impl::enqueueNodeDirect(sycl::context Ctx, "Failed to add kernel to UR command-buffer"); } +#ifdef XPTI_ENABLE_INSTRUMENTATION + if (CmdTraceEvent) + sycl::detail::emitInstrumentationGeneral( + StreamID, InstanceID, CmdTraceEvent, xpti::trace_task_end, nullptr); +#endif + return NewSyncPoint; } diff --git a/sycl/source/detail/handler_impl.hpp b/sycl/source/detail/handler_impl.hpp index 37a697a57bc2b..b34c22ad2777e 100644 --- a/sycl/source/detail/handler_impl.hpp +++ b/sycl/source/detail/handler_impl.hpp @@ -194,6 +194,9 @@ class handler_impl { /// The list of valid SYCL events that need to complete /// before barrier command can be executed std::vector MEventsWaitWithBarrier; + + /// True if MCodeLoc is sycl entry point code location + bool MIsTopCodeLoc = true; }; } // namespace detail diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index 869529bb75162..0fb68f9c505b6 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -354,9 +354,10 @@ event queue_impl::submit_impl(const std::function &CGF, const std::shared_ptr &SecondaryQueue, bool CallerNeedsEvent, const detail::code_location &Loc, + bool IsTopCodeLoc, const SubmitPostProcessF *PostProcess) { handler Handler(Self, PrimaryQueue, SecondaryQueue, CallerNeedsEvent); - Handler.saveCodeLoc(Loc); + Handler.saveCodeLoc(Loc, IsTopCodeLoc); { NestedCallsTracker tracker; @@ -397,7 +398,8 @@ event queue_impl::submit_impl(const std::function &CGF, // finishes execution. event FlushEvent = submit_impl( [&](handler &ServiceCGH) { Stream->generateFlushCommand(ServiceCGH); }, - Self, PrimaryQueue, SecondaryQueue, /*CallerNeedsEvent*/ true, Loc, {}); + Self, PrimaryQueue, SecondaryQueue, /*CallerNeedsEvent*/ true, Loc, + IsTopCodeLoc, {}); EventImpl->attachEventToCompleteWeak(detail::getSyclObjImpl(FlushEvent)); registerStreamServiceEvent(detail::getSyclObjImpl(FlushEvent)); } @@ -414,7 +416,7 @@ event queue_impl::submitWithHandler(const std::shared_ptr &Self, CGH.depends_on(DepEvents); HandlerFunc(CGH); }, - Self, {}); + Self, /*CodeLoc*/ {}, /*IsTopCodeLoc*/ true); } template diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index 6bf9c338e6f3f..4b68aebab9013 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -335,16 +335,17 @@ class queue_impl { event submit(const std::function &CGF, const std::shared_ptr &Self, const std::shared_ptr &SecondQueue, - const detail::code_location &Loc, + const detail::code_location &Loc, bool IsTopCodeLoc, const SubmitPostProcessF *PostProcess = nullptr) { event ResEvent; try { ResEvent = submit_impl(CGF, Self, Self, SecondQueue, - /*CallerNeedsEvent=*/true, Loc, PostProcess); + /*CallerNeedsEvent=*/true, Loc, IsTopCodeLoc, + PostProcess); } catch (...) { - ResEvent = - SecondQueue->submit_impl(CGF, SecondQueue, Self, SecondQueue, - /*CallerNeedsEvent=*/true, Loc, PostProcess); + ResEvent = SecondQueue->submit_impl(CGF, SecondQueue, Self, SecondQueue, + /*CallerNeedsEvent=*/true, Loc, + IsTopCodeLoc, PostProcess); } return discard_or_return(ResEvent); } @@ -359,19 +360,20 @@ class queue_impl { /// \return a SYCL event object for the submitted command group. event submit(const std::function &CGF, const std::shared_ptr &Self, - const detail::code_location &Loc, + const detail::code_location &Loc, bool IsTopCodeLoc, const SubmitPostProcessF *PostProcess = nullptr) { - auto ResEvent = submit_impl(CGF, Self, Self, nullptr, - /*CallerNeedsEvent=*/true, Loc, PostProcess); + auto ResEvent = + submit_impl(CGF, Self, Self, nullptr, + /*CallerNeedsEvent=*/true, Loc, IsTopCodeLoc, PostProcess); return discard_or_return(ResEvent); } void submit_without_event(const std::function &CGF, const std::shared_ptr &Self, - const detail::code_location &Loc, + const detail::code_location &Loc, bool IsTopCodeLoc, const SubmitPostProcessF *PostProcess = nullptr) { submit_impl(CGF, Self, Self, nullptr, /*CallerNeedsEvent=*/false, Loc, - PostProcess); + IsTopCodeLoc, PostProcess); } /// Performs a blocking wait for the completion of all enqueued tasks in the @@ -812,7 +814,7 @@ class queue_impl { const std::shared_ptr &PrimaryQueue, const std::shared_ptr &SecondaryQueue, bool CallerNeedsEvent, const detail::code_location &Loc, - const SubmitPostProcessF *PostProcess); + bool IsTopCodeLoc, const SubmitPostProcessF *PostProcess); /// Helper function for submitting a memory operation with a handler. /// \param Self is a shared_ptr to this queue. diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 133f55992f356..598b686b28e95 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -1995,8 +1995,10 @@ void instrumentationAddExtraKernelMetadata( Program = SyclKernel->getProgramRef(); if (!SyclKernel->isCreatedFromSource()) EliminatedArgMask = SyclKernel->getKernelArgMask(); - } else { - assert(Queue && "Kernel submissions should have an associated queue"); + } else if (Queue) { + // NOTE: Queue can be null when kernel is directly enqueued to a command + // buffer + // by graph API, when a modifiable graph is finalized. std::tie(Kernel, KernelMutex, EliminatedArgMask, Program) = detail::ProgramManager::getInstance().getOrCreateKernel( Queue->getContextImplPtr(), Queue->getDeviceImplPtr(), KernelName); @@ -2020,6 +2022,7 @@ void instrumentationAddExtraKernelMetadata( } void instrumentationFillCommonData(const std::string &KernelName, + const std::string &FuncName, const std::string &FileName, uint64_t Line, uint64_t Column, const void *const Address, const QueueImplPtr &Queue, @@ -2036,8 +2039,9 @@ void instrumentationFillCommonData(const std::string &KernelName, xpti::payload_t Payload; if (!FileName.empty()) { // File name has a valid string - Payload = xpti::payload_t(KernelName.c_str(), FileName.c_str(), Line, - Column, Address); + Payload = xpti::payload_t(FuncName.empty() ? KernelName.c_str() + : FuncName.c_str(), + FileName.c_str(), Line, Column, Address); HasSourceInfo = true; } else if (Address) { // We have a valid function name and an address @@ -2081,8 +2085,9 @@ void instrumentationFillCommonData(const std::string &KernelName, #ifdef XPTI_ENABLE_INSTRUMENTATION std::pair emitKernelInstrumentationData( int32_t StreamID, const std::shared_ptr &SyclKernel, - const detail::code_location &CodeLoc, const std::string &SyclKernelName, - const QueueImplPtr &Queue, const NDRDescT &NDRDesc, + const detail::code_location &CodeLoc, bool IsTopCodeLoc, + const std::string &SyclKernelName, const QueueImplPtr &Queue, + const NDRDescT &NDRDesc, const std::shared_ptr &KernelBundleImplPtr, std::vector &CGArgs) { @@ -2101,13 +2106,25 @@ std::pair emitKernelInstrumentationData( std::string FileName = CodeLoc.fileName() ? CodeLoc.fileName() : std::string(); - instrumentationFillCommonData(KernelName, FileName, CodeLoc.lineNumber(), - CodeLoc.columnNumber(), Address, Queue, - FromSource, InstanceID, CmdTraceEvent); + + // If code location is above sycl layer, use function name from code + // location instead of kernel name in event payload + std::string FuncName = (!IsTopCodeLoc && CodeLoc.functionName()) + ? CodeLoc.functionName() + : std::string(); + + instrumentationFillCommonData(KernelName, FuncName, FileName, + CodeLoc.lineNumber(), CodeLoc.columnNumber(), + Address, Queue, FromSource, InstanceID, + CmdTraceEvent); if (CmdTraceEvent) { // Stash the queue_id mutable metadata in TLS - xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, getQueueID(Queue)); + // NOTE: Queue can be null when kernel is directly enqueued to a command + // buffer by graph API, when a modifiable graph is finalized. + if (Queue.get()) + xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, + getQueueID(Queue)); instrumentationAddExtraKernelMetadata(CmdTraceEvent, NDRDesc, KernelBundleImplPtr, SyclKernelName, SyclKernel, Queue, CGArgs); @@ -2144,8 +2161,14 @@ void ExecCGCommand::emitInstrumentationData() { break; } + // If code location is above sycl layer, use function name from code + // location instead of kernel name in event payload + std::string FuncName; + if (!MCommandGroup->MIsTopCodeLoc) + FuncName = MCommandGroup->MFunctionName; + xpti_td *CmdTraceEvent = nullptr; - instrumentationFillCommonData(KernelName, MCommandGroup->MFileName, + instrumentationFillCommonData(KernelName, FuncName, MCommandGroup->MFileName, MCommandGroup->MLine, MCommandGroup->MColumn, MAddress, MQueue, FromSource, MInstanceID, CmdTraceEvent); diff --git a/sycl/source/detail/scheduler/commands.hpp b/sycl/source/detail/scheduler/commands.hpp index 8324c1d13fb30..1aecf5ed4eabb 100644 --- a/sycl/source/detail/scheduler/commands.hpp +++ b/sycl/source/detail/scheduler/commands.hpp @@ -684,8 +684,9 @@ class ExecCGCommand : public Command { #ifdef XPTI_ENABLE_INSTRUMENTATION std::pair emitKernelInstrumentationData( int32_t StreamID, const std::shared_ptr &SyclKernel, - const detail::code_location &CodeLoc, const std::string &SyclKernelName, - const QueueImplPtr &Queue, const NDRDescT &NDRDesc, + const detail::code_location &CodeLoc, bool IsTopCodeLoc, + const std::string &SyclKernelName, const QueueImplPtr &Queue, + const NDRDescT &NDRDesc, const std::shared_ptr &KernelBundleImplPtr, std::vector &CGArgs); #endif diff --git a/sycl/source/enqueue_functions.cpp b/sycl/source/enqueue_functions.cpp index 4cfe1c46d8d47..73c4ebe249467 100644 --- a/sycl/source/enqueue_functions.cpp +++ b/sycl/source/enqueue_functions.cpp @@ -18,7 +18,7 @@ __SYCL_EXPORT void memcpy(queue Q, void *Dest, const void *Src, size_t NumBytes, sycl::detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); auto QueueImplPtr = sycl::detail::getSyclObjImpl(Q); QueueImplPtr->memcpy(QueueImplPtr, Dest, Src, NumBytes, {}, - /*CallerNeedsEvent=*/false, CodeLoc); + /*CallerNeedsEvent=*/false, TlsCodeLocCapture.query()); } __SYCL_EXPORT void memset(queue Q, void *Ptr, int Value, size_t NumBytes, diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index a7b66235b6653..851dfad4d69dc 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -266,8 +266,8 @@ event handler::finalize() { // uint32_t StreamID, uint64_t InstanceID, xpti_td* TraceEvent, int32_t StreamID = xptiRegisterStream(detail::SYCL_STREAM_NAME); auto [CmdTraceEvent, InstanceID] = emitKernelInstrumentationData( - StreamID, MKernel, MCodeLoc, MKernelName.c_str(), MQueue, - impl->MNDRDesc, KernelBundleImpPtr, impl->MArgs); + StreamID, MKernel, MCodeLoc, impl->MIsTopCodeLoc, MKernelName.c_str(), + MQueue, impl->MNDRDesc, KernelBundleImpPtr, impl->MArgs); auto EnqueueKernel = [&, CmdTraceEvent = CmdTraceEvent, InstanceID = InstanceID]() { #else @@ -515,6 +515,10 @@ event handler::finalize() { throw exception(make_error_code(errc::runtime), "Internal Error. Command group cannot be constructed."); + // Propagate MIsTopCodeLoc state to CommandGroup. + // Will be used for XPTI payload generation for CG's related events. + CommandGroup->MIsTopCodeLoc = impl->MIsTopCodeLoc; + // If there is a graph associated with the handler we are in the explicit // graph mode, so we store the CG instead of submitting it to the scheduler, // so it can be retrieved by the graph later. @@ -1981,5 +1985,18 @@ void handler::setNDRangeDescriptorPadded(sycl::range<3> NumWorkItems, impl->MNDRDesc = NDRDescT{NumWorkItems, LocalSize, Offset, Dims}; } +void handler::saveCodeLoc(detail::code_location CodeLoc, bool IsTopCodeLoc) { + MCodeLoc = CodeLoc; + impl->MIsTopCodeLoc = IsTopCodeLoc; +} +void handler::saveCodeLoc(detail::code_location CodeLoc) { + MCodeLoc = CodeLoc; + impl->MIsTopCodeLoc = true; +} +void handler::copyCodeLoc(const handler &other) { + MCodeLoc = other.MCodeLoc; + impl->MIsTopCodeLoc = other.impl->MIsTopCodeLoc; +} + } // namespace _V1 } // namespace sycl diff --git a/sycl/source/queue.cpp b/sycl/source/queue.cpp index 2a90779e7829b..4e955d1e1d674 100644 --- a/sycl/source/queue.cpp +++ b/sycl/source/queue.cpp @@ -122,14 +122,14 @@ event queue::memcpy(void *Dest, const void *Src, size_t Count, const detail::code_location &CodeLoc) { detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); return impl->memcpy(impl, Dest, Src, Count, {}, /*CallerNeedsEvent=*/true, - CodeLoc); + TlsCodeLocCapture.query()); } event queue::memcpy(void *Dest, const void *Src, size_t Count, event DepEvent, const detail::code_location &CodeLoc) { detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); return impl->memcpy(impl, Dest, Src, Count, {DepEvent}, - /*CallerNeedsEvent=*/true, CodeLoc); + /*CallerNeedsEvent=*/true, TlsCodeLocCapture.query()); } event queue::memcpy(void *Dest, const void *Src, size_t Count, @@ -137,7 +137,7 @@ event queue::memcpy(void *Dest, const void *Src, size_t Count, const detail::code_location &CodeLoc) { detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc); return impl->memcpy(impl, Dest, Src, Count, DepEvents, - /*CallerNeedsEvent=*/true, CodeLoc); + /*CallerNeedsEvent=*/true, TlsCodeLocCapture.query()); } event queue::mem_advise(const void *Ptr, size_t Length, int Advice, @@ -166,30 +166,59 @@ event queue::mem_advise(const void *Ptr, size_t Length, int Advice, event queue::submit_impl(std::function CGH, const detail::code_location &CodeLoc) { - return impl->submit(CGH, impl, CodeLoc); + return impl->submit(CGH, impl, CodeLoc, true); +} +event queue::submit_impl(std::function CGH, + const detail::code_location &CodeLoc, + bool IsTopCodeLoc) { + return impl->submit(CGH, impl, CodeLoc, IsTopCodeLoc); } event queue::submit_impl(std::function CGH, queue SecondQueue, const detail::code_location &CodeLoc) { - return impl->submit(CGH, impl, SecondQueue.impl, CodeLoc); + return impl->submit(CGH, impl, SecondQueue.impl, CodeLoc, true); +} +event queue::submit_impl(std::function CGH, queue SecondQueue, + const detail::code_location &CodeLoc, + bool IsTopCodeLoc) { + return impl->submit(CGH, impl, SecondQueue.impl, CodeLoc, IsTopCodeLoc); } void queue::submit_without_event_impl(std::function CGH, const detail::code_location &CodeLoc) { - return impl->submit_without_event(CGH, impl, CodeLoc); + return impl->submit_without_event(CGH, impl, CodeLoc, true); +} +void queue::submit_without_event_impl(std::function CGH, + const detail::code_location &CodeLoc, + bool IsTopCodeLoc) { + return impl->submit_without_event(CGH, impl, CodeLoc, IsTopCodeLoc); } event queue::submit_impl_and_postprocess( std::function CGH, const detail::code_location &CodeLoc, const SubmitPostProcessF &PostProcess) { - return impl->submit(CGH, impl, CodeLoc, &PostProcess); + return impl->submit(CGH, impl, CodeLoc, true, &PostProcess); +} +event queue::submit_impl_and_postprocess(std::function CGH, + const detail::code_location &CodeLoc, + const SubmitPostProcessF &PostProcess, + bool IsTopCodeLoc) { + return impl->submit(CGH, impl, CodeLoc, IsTopCodeLoc, &PostProcess); } event queue::submit_impl_and_postprocess( std::function CGH, queue SecondQueue, const detail::code_location &CodeLoc, const SubmitPostProcessF &PostProcess) { - return impl->submit(CGH, impl, SecondQueue.impl, CodeLoc, &PostProcess); + return impl->submit(CGH, impl, SecondQueue.impl, CodeLoc, true, &PostProcess); +} +event queue::submit_impl_and_postprocess(std::function CGH, + queue SecondQueue, + const detail::code_location &CodeLoc, + const SubmitPostProcessF &PostProcess, + bool IsTopCodeLoc) { + return impl->submit(CGH, impl, SecondQueue.impl, CodeLoc, IsTopCodeLoc, + &PostProcess); } void queue::wait_proxy(const detail::code_location &CodeLoc) { diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index ec6ec2096403f..ad4d709854759 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -3120,7 +3120,9 @@ _ZN4sycl3_V15queue10mem_adviseEPKvmiRKNS0_6detail13code_locationE _ZN4sycl3_V15queue10mem_adviseEPKvmiRKSt6vectorINS0_5eventESaIS5_EERKNS0_6detail13code_locationE _ZN4sycl3_V15queue10wait_proxyERKNS0_6detail13code_locationE _ZN4sycl3_V15queue11submit_implESt8functionIFvRNS0_7handlerEEERKNS0_6detail13code_locationE +_ZN4sycl3_V15queue11submit_implESt8functionIFvRNS0_7handlerEEERKNS0_6detail13code_locationEb _ZN4sycl3_V15queue11submit_implESt8functionIFvRNS0_7handlerEEES1_RKNS0_6detail13code_locationE +_ZN4sycl3_V15queue11submit_implESt8functionIFvRNS0_7handlerEEES1_RKNS0_6detail13code_locationEb _ZN4sycl3_V15queue15ext_oneapi_prodEv _ZN4sycl3_V15queue18throw_asynchronousEv _ZN4sycl3_V15queue20memcpyToDeviceGlobalEPvPKvbmmRKSt6vectorINS0_5eventESaIS6_EE @@ -3129,8 +3131,11 @@ _ZN4sycl3_V15queue22memcpyFromDeviceGlobalEPvPKvbmmRKSt6vectorINS0_5eventESaIS6_ _ZN4sycl3_V15queue25ext_oneapi_submit_barrierERKNS0_6detail13code_locationE _ZN4sycl3_V15queue25ext_oneapi_submit_barrierERKSt6vectorINS0_5eventESaIS3_EERKNS0_6detail13code_locationE _ZN4sycl3_V15queue25submit_without_event_implESt8functionIFvRNS0_7handlerEEERKNS0_6detail13code_locationE +_ZN4sycl3_V15queue25submit_without_event_implESt8functionIFvRNS0_7handlerEEERKNS0_6detail13code_locationEb _ZN4sycl3_V15queue27submit_impl_and_postprocessESt8functionIFvRNS0_7handlerEEERKNS0_6detail13code_locationERKS2_IFvbbRNS0_5eventEEE +_ZN4sycl3_V15queue27submit_impl_and_postprocessESt8functionIFvRNS0_7handlerEEERKNS0_6detail13code_locationERKS2_IFvbbRNS0_5eventEEEb _ZN4sycl3_V15queue27submit_impl_and_postprocessESt8functionIFvRNS0_7handlerEEES1_RKNS0_6detail13code_locationERKS2_IFvbbRNS0_5eventEEE +_ZN4sycl3_V15queue27submit_impl_and_postprocessESt8functionIFvRNS0_7handlerEEES1_RKNS0_6detail13code_locationERKS2_IFvbbRNS0_5eventEEEb _ZN4sycl3_V15queue29ext_oneapi_set_external_eventERKNS0_5eventE _ZN4sycl3_V15queue6memcpyEPvPKvmNS0_5eventERKNS0_6detail13code_locationE _ZN4sycl3_V15queue6memcpyEPvPKvmRKNS0_6detail13code_locationE @@ -3466,6 +3471,9 @@ _ZN4sycl3_V17handler10mem_adviseEPKvmi _ZN4sycl3_V17handler10processArgEPvRKNS0_6detail19kernel_param_kind_tEimRmbb _ZN4sycl3_V17handler11SetHostTaskEOSt8functionIFvNS0_14interop_handleEEE _ZN4sycl3_V17handler11SetHostTaskEOSt8functionIFvvEE +_ZN4sycl3_V17handler11copyCodeLocERKS1_ +_ZN4sycl3_V17handler11saveCodeLocENS0_6detail13code_locationE +_ZN4sycl3_V17handler11saveCodeLocENS0_6detail13code_locationEb _ZN4sycl3_V17handler11storeRawArgEPKvm _ZN4sycl3_V17handler12addReductionERKSt10shared_ptrIKvE _ZN4sycl3_V17handler13getKernelNameEv diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index 55ce460c64559..de9e671c5757e 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -3729,6 +3729,7 @@ ?constructorNotification@detail@_V1@sycl@@YAXPEAX0W4target@access@23@W4mode@523@AEBUcode_location@123@@Z ?contains_specialization_constants@kernel_bundle_plain@detail@_V1@sycl@@QEBA_NXZ ?contextSetExtendedDeleter@pi@detail@_V1@sycl@@YAXAEBVcontext@34@P6AXPEAX@Z1@Z +?copyCodeLoc@handler@_V1@sycl@@AEAAXAEBV123@@Z ?cpu_selector_v@_V1@sycl@@YAHAEBVdevice@12@@Z ?create_image@experimental@oneapi@ext@_V1@sycl@@YA?AUsampled_image_handle@12345@AEAVimage_mem@12345@AEBUbindless_image_sampler@12345@AEBUimage_descriptor@12345@AEBVdevice@45@AEBVcontext@45@@Z ?create_image@experimental@oneapi@ext@_V1@sycl@@YA?AUsampled_image_handle@12345@AEAVimage_mem@12345@AEBUbindless_image_sampler@12345@AEBUimage_descriptor@12345@AEBVqueue@45@@Z @@ -4082,6 +4083,7 @@ ?isPathPresent@OSUtil@detail@_V1@sycl@@SA_NAEBV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@@Z ?isPlaceholder@AccessorBaseHost@detail@_V1@sycl@@QEBA_NXZ ?isStateExplicitKernelBundle@handler@_V1@sycl@@AEBA_NXZ +?isToplevel@tls_code_loc_t@detail@_V1@sycl@@QEBA_NXZ ?isValidModeForDestinationAccessor@handler@_V1@sycl@@CA_NW4mode@access@23@@Z ?isValidModeForSourceAccessor@handler@_V1@sycl@@CA_NW4mode@access@23@@Z ?isValidTargetForExplicitOp@handler@_V1@sycl@@CA_NW4target@access@23@@Z @@ -4201,6 +4203,7 @@ ?sampledImageConstructorNotification@image_plain@detail@_V1@sycl@@IEAAXAEBUcode_location@234@PEAXPEBXIQEA_KW4image_format@34@AEBUimage_sampler@34@@Z ?sampledImageDestructorNotification@image_plain@detail@_V1@sycl@@IEAAXPEAX@Z ?saveCodeLoc@handler@_V1@sycl@@AEAAXUcode_location@detail@23@@Z +?saveCodeLoc@handler@_V1@sycl@@AEAAXUcode_location@detail@23@_N@Z ?select_device@detail@_V1@sycl@@YA?AVdevice@23@AEBV?$function@$$A6AHAEBVdevice@_V1@sycl@@@Z@std@@@Z ?select_device@detail@_V1@sycl@@YA?AVdevice@23@AEBV?$function@$$A6AHAEBVdevice@_V1@sycl@@@Z@std@@AEBVcontext@23@@Z ?select_device@device_selector@_V1@sycl@@UEBA?AVdevice@23@XZ @@ -4249,10 +4252,15 @@ ?storeRawArg@handler@_V1@sycl@@AEAAPEAXPEBX_K@Z ?stringifyErrorCode@detail@_V1@sycl@@YAPEBDH@Z ?submit_impl@queue@_V1@sycl@@AEAA?AVevent@23@V?$function@$$A6AXAEAVhandler@_V1@sycl@@@Z@std@@AEBUcode_location@detail@23@@Z +?submit_impl@queue@_V1@sycl@@AEAA?AVevent@23@V?$function@$$A6AXAEAVhandler@_V1@sycl@@@Z@std@@AEBUcode_location@detail@23@_N@Z ?submit_impl@queue@_V1@sycl@@AEAA?AVevent@23@V?$function@$$A6AXAEAVhandler@_V1@sycl@@@Z@std@@V123@AEBUcode_location@detail@23@@Z +?submit_impl@queue@_V1@sycl@@AEAA?AVevent@23@V?$function@$$A6AXAEAVhandler@_V1@sycl@@@Z@std@@V123@AEBUcode_location@detail@23@_N@Z ?submit_impl_and_postprocess@queue@_V1@sycl@@AEAA?AVevent@23@V?$function@$$A6AXAEAVhandler@_V1@sycl@@@Z@std@@AEBUcode_location@detail@23@AEBV?$function@$$A6AX_N0AEAVevent@_V1@sycl@@@Z@6@@Z +?submit_impl_and_postprocess@queue@_V1@sycl@@AEAA?AVevent@23@V?$function@$$A6AXAEAVhandler@_V1@sycl@@@Z@std@@AEBUcode_location@detail@23@AEBV?$function@$$A6AX_N0AEAVevent@_V1@sycl@@@Z@6@_N@Z ?submit_impl_and_postprocess@queue@_V1@sycl@@AEAA?AVevent@23@V?$function@$$A6AXAEAVhandler@_V1@sycl@@@Z@std@@V123@AEBUcode_location@detail@23@AEBV?$function@$$A6AX_N0AEAVevent@_V1@sycl@@@Z@6@@Z +?submit_impl_and_postprocess@queue@_V1@sycl@@AEAA?AVevent@23@V?$function@$$A6AXAEAVhandler@_V1@sycl@@@Z@std@@V123@AEBUcode_location@detail@23@AEBV?$function@$$A6AX_N0AEAVevent@_V1@sycl@@@Z@6@_N@Z ?submit_without_event_impl@queue@_V1@sycl@@AEAAXV?$function@$$A6AXAEAVhandler@_V1@sycl@@@Z@std@@AEBUcode_location@detail@23@@Z +?submit_without_event_impl@queue@_V1@sycl@@AEAAXV?$function@$$A6AXAEAVhandler@_V1@sycl@@@Z@std@@AEBUcode_location@detail@23@_N@Z ?supportsUSMFill2D@handler@_V1@sycl@@AEAA_NXZ ?supportsUSMMemcpy2D@handler@_V1@sycl@@AEAA_NXZ ?supportsUSMMemset2D@handler@_V1@sycl@@AEAA_NXZ diff --git a/sycl/unittests/scheduler/HostTaskAndBarrier.cpp b/sycl/unittests/scheduler/HostTaskAndBarrier.cpp index c244e7b60a0b3..2b07861ef28fd 100644 --- a/sycl/unittests/scheduler/HostTaskAndBarrier.cpp +++ b/sycl/unittests/scheduler/HostTaskAndBarrier.cpp @@ -63,16 +63,16 @@ class BarrierHandlingWithHostTask : public ::testing::Test { [&](handler &CGH) { CGH.host_task(BlockHostTask ? CustomHostLambda : [] {}); }, - QueueDevImpl, nullptr, {}); + QueueDevImpl, nullptr, {}, true); } else if (Type == TestCGType::KERNEL_TASK) { return QueueDevImpl->submit( [&](handler &CGH) { CGH.single_task>([] {}); }, - QueueDevImpl, nullptr, {}); + QueueDevImpl, nullptr, {}, true); } else // (Type == TestCGType::BARRIER) { return QueueDevImpl->submit( [&](handler &CGH) { CGH.ext_oneapi_barrier(); }, QueueDevImpl, - nullptr, {}); + nullptr, {}, true); } } @@ -80,7 +80,7 @@ class BarrierHandlingWithHostTask : public ::testing::Test { InsertBarrierWithWaitList(const std::vector &WaitList) { return QueueDevImpl->submit( [&](handler &CGH) { CGH.ext_oneapi_barrier(WaitList); }, QueueDevImpl, - nullptr, {}); + nullptr, {}, true); } void BuildAndCheckInnerQueueState(std::vector &Events) { diff --git a/sycl/unittests/xpti_trace/NodeCreation.cpp b/sycl/unittests/xpti_trace/NodeCreation.cpp index 6b963bc8563af..6adba2fabff9d 100644 --- a/sycl/unittests/xpti_trace/NodeCreation.cpp +++ b/sycl/unittests/xpti_trace/NodeCreation.cpp @@ -15,6 +15,7 @@ #include #include +#include #include using ::testing::HasSubstr; @@ -23,6 +24,7 @@ XPTI_CALLBACK_API bool queryReceivedNotifications(uint16_t &TraceType, std::string &Message); XPTI_CALLBACK_API void resetReceivedNotifications(); XPTI_CALLBACK_API void addAnalyzedTraceType(uint16_t); +XPTI_CALLBACK_API void clearAnalyzedTraceTypes(); class NodeCreation : public ::testing::Test { protected: @@ -34,6 +36,7 @@ class NodeCreation : public ::testing::Test { void TearDown() { resetReceivedNotifications(); + clearAnalyzedTraceTypes(); xptiForceSetTraceEnabled(false); } @@ -89,6 +92,29 @@ TEST_F(NodeCreation, QueueParallelForWithNoGraphNode) { EXPECT_THAT(Message, HasSubstr("TestKernel")); } +TEST_F(NodeCreation, QueueParallelForWithUserCodeLoc) { + sycl::queue Q; + try { + sycl::buffer buf(sycl::range<1>(1)); + sycl::detail::tls_code_loc_t myLoc( + {"LOCAL_CODELOC_FILE", "LOCAL_CODELOC_NAME", 1, 1}); + Q.submit( + [&](handler &Cgh) { + sycl::accessor acc(buf, Cgh, sycl::read_write); + Cgh.parallel_for>(1, [=](sycl::id<1> idx) {}); + }, + TestCodeLocation); + } catch (sycl::exception &e) { + std::ignore = e; + } + Q.wait(); + uint16_t TraceType = 0; + std::string Message; + ASSERT_TRUE(queryReceivedNotifications(TraceType, Message)); + EXPECT_EQ(TraceType, xpti::trace_node_create); + EXPECT_THAT(Message, HasSubstr("LOCAL_CODELOC_NAME")); +} + TEST_F(NodeCreation, QueueMemcpyNode) { sycl::queue Q; @@ -118,4 +144,96 @@ TEST_F(NodeCreation, QueueMemsetNode) { ASSERT_TRUE(queryReceivedNotifications(TraceType, Message)); EXPECT_EQ(TraceType, xpti::trace_node_create); EXPECT_THAT(Message, HasSubstr("memory_transfer_node")); -} \ No newline at end of file +} + +TEST_F(NodeCreation, CommandGraphRecord) { + sycl::queue Q; + try { + sycl::ext::oneapi::experimental::command_graph cmdGraph(Q.get_context(), + Q.get_device()); + + cmdGraph.begin_recording(Q); + + { + sycl::detail::tls_code_loc_t myLoc( + {"LOCAL_CODELOC_FILE", "LOCAL_CODELOC_NAME", 1, 1}); + Q.submit([&](handler &Cgh) { + Cgh.parallel_for>(1, [=](sycl::id<1> idx) {}); + }); + } + + cmdGraph.end_recording(Q); + + addAnalyzedTraceType(xpti::trace_task_begin); + addAnalyzedTraceType(xpti::trace_task_end); + + auto exeGraph = cmdGraph.finalize(); + + // Notifications should have been generated during finalize + uint16_t TraceType = 0; + std::string Message; + ASSERT_TRUE(queryReceivedNotifications(TraceType, Message)); + EXPECT_EQ(TraceType, xpti::trace_node_create); + EXPECT_THAT(Message, HasSubstr("LOCAL_CODELOC_NAME")); + + ASSERT_TRUE(queryReceivedNotifications(TraceType, Message)); + EXPECT_EQ(TraceType, xpti::trace_task_begin); + + ASSERT_TRUE(queryReceivedNotifications(TraceType, Message)); + EXPECT_EQ(TraceType, xpti::trace_task_end); + + } catch (sycl::exception &e) { + FAIL() << "sycl::exception what=" << e.what(); + } +} + +TEST_F(NodeCreation, CommandGraphAddAPI) { + sycl::queue Q; + try { + sycl::ext::oneapi::experimental::command_graph cmdGraph(Q.get_context(), + Q.get_device()); + + auto doAddNode = [&](const sycl::detail::code_location &loc) { + sycl::detail::tls_code_loc_t codeLoc(loc); + return cmdGraph.add([&](handler &Cgh) { + Cgh.parallel_for>(1, [=](sycl::id<1> idx) {}); + }); + }; + + auto node1 = doAddNode({"LOCAL_CODELOC_FILE", "LOCAL_NODE_1", 1, 1}); + auto node2 = doAddNode({"LOCAL_CODELOC_FILE", "LOCAL_NODE_2", 2, 1}); + cmdGraph.make_edge(node1, node2); + + addAnalyzedTraceType(xpti::trace_task_begin); + addAnalyzedTraceType(xpti::trace_task_end); + + auto exeGraph = cmdGraph.finalize(); + + // Notifications should have get generated during finalize + // + uint16_t TraceType = 0; + std::string Message; + ASSERT_TRUE(queryReceivedNotifications(TraceType, Message)); + EXPECT_EQ(TraceType, xpti::trace_node_create); + EXPECT_THAT(Message, HasSubstr("LOCAL_NODE_1")); + + ASSERT_TRUE(queryReceivedNotifications(TraceType, Message)); + EXPECT_EQ(TraceType, xpti::trace_task_begin); + + ASSERT_TRUE(queryReceivedNotifications(TraceType, Message)); + EXPECT_EQ(TraceType, xpti::trace_task_end); + + ASSERT_TRUE(queryReceivedNotifications(TraceType, Message)); + EXPECT_EQ(TraceType, xpti::trace_node_create); + EXPECT_THAT(Message, HasSubstr("LOCAL_NODE_2")); + + ASSERT_TRUE(queryReceivedNotifications(TraceType, Message)); + EXPECT_EQ(TraceType, xpti::trace_task_begin); + + ASSERT_TRUE(queryReceivedNotifications(TraceType, Message)); + EXPECT_EQ(TraceType, xpti::trace_task_end); + + } catch (sycl::exception &e) { + FAIL() << "sycl::exception what=" << e.what(); + } +}