diff --git a/sycl/source/detail/kernel_program_cache.hpp b/sycl/source/detail/kernel_program_cache.hpp index 9630143c9ee51..5794d6930016a 100644 --- a/sycl/source/detail/kernel_program_cache.hpp +++ b/sycl/source/detail/kernel_program_cache.hpp @@ -9,6 +9,7 @@ #pragma once #include "sycl/exception.hpp" +#include #include #include #include @@ -19,8 +20,10 @@ #include #include +#include #include #include +#include #include #include @@ -176,6 +179,42 @@ class KernelProgramCache { void setContextPtr(const ContextPtr &AContext) { MParentContext = AContext; } + // Sends message to std:cerr stream when SYCL_CACHE_TRACE environemnt is + // set. + static inline void traceProgram(const std::string &Msg, + const ProgramCacheKeyT &CacheKey) { + if (!SYCLConfig::isTraceInMemCache()) + return; + + int ImageId = CacheKey.first.second; + std::stringstream DeviceList; + for (const auto &Device : CacheKey.second) + DeviceList << "0x" << std::setbase(16) + << reinterpret_cast(Device) << ","; + + std::string Identifier = "[Key:{imageId = " + std::to_string(ImageId) + + ",urDevice = " + DeviceList.str() + "}]: "; + + std::cerr << "[In-Memory Cache][Thread Id:" << std::this_thread::get_id() + << "][Program Cache]" << Identifier << Msg << std::endl; + } + + // Sends message to std:cerr stream when SYCL_CACHE_TRACE environemnt is + // set. + static inline void traceKernel(const std::string &Msg, + const std::string &KernelName, + bool IsKernelFastCache = false) { + if (!SYCLConfig::isTraceInMemCache()) + return; + + std::string Identifier = + "[IsFastCache: " + std::to_string(IsKernelFastCache) + + "][Key:{Name = " + KernelName + "}]: "; + + std::cerr << "[In-Memory Cache][Thread Id:" << std::this_thread::get_id() + << "][Kernel Cache]" << Identifier << Msg << std::endl; + } + Locked acquireCachedPrograms() { return {MCachedPrograms, MProgramCacheMutex}; } @@ -195,7 +234,9 @@ class KernelProgramCache { CommonProgramKeyT CommonKey = std::make_pair(CacheKey.first.second, CacheKey.second); ProgCache.KeyMap.emplace(CommonKey, CacheKey); - } + traceProgram("Program inserted.", CacheKey); + } else + traceProgram("Program fetched.", CacheKey); return std::make_pair(It->second, DidInsert); } @@ -217,7 +258,9 @@ class KernelProgramCache { CommonProgramKeyT CommonKey = std::make_pair(CacheKey.first.second, CacheKey.second); ProgCache.KeyMap.emplace(CommonKey, CacheKey); - } + traceProgram("Program inserted.", CacheKey); + } else + traceProgram("Program fetched.", CacheKey); return DidInsert; } @@ -227,8 +270,11 @@ class KernelProgramCache { auto LockedCache = acquireKernelsPerProgramCache(); auto &Cache = LockedCache.get()[Program]; auto [It, DidInsert] = Cache.try_emplace(KernelName, nullptr); - if (DidInsert) + if (DidInsert) { It->second = std::make_shared(getAdapter()); + traceKernel("Kernel inserted.", KernelName); + } else + traceKernel("Kernel fetched.", KernelName); return std::make_pair(It->second, DidInsert); } @@ -237,6 +283,7 @@ class KernelProgramCache { std::unique_lock Lock(MKernelFastCacheMutex); auto It = MKernelFastCache.find(CacheKey); if (It != MKernelFastCache.end()) { + traceKernel("Kernel fetched.", std::get<3>(CacheKey), true); return It->second; } return std::make_tuple(nullptr, nullptr, nullptr, nullptr); @@ -247,6 +294,7 @@ class KernelProgramCache { std::unique_lock Lock(MKernelFastCacheMutex); // if no insertion took place, thus some other thread has already inserted // smth in the cache + traceKernel("Kernel inserted.", std::get<3>(CacheKey), true); MKernelFastCache.emplace(CacheKey, CacheVal); } diff --git a/sycl/test-e2e/KernelAndProgram/trace_kernel_program_cache.cpp b/sycl/test-e2e/KernelAndProgram/trace_kernel_program_cache.cpp new file mode 100644 index 0000000000000..447ec12200ade --- /dev/null +++ b/sycl/test-e2e/KernelAndProgram/trace_kernel_program_cache.cpp @@ -0,0 +1,59 @@ +// Tests tracing of in-memory kernel and program cache. + +// RUN: %{build} -o %t.out + +// There should be no tracing output when SYCL_CACHE_IN_MEM is not set +// or SYCL_CACHE_TRACE is set to 0. + +// RUN: env SYCL_CACHE_IN_MEM=0 %{run} %t.out 2> %t.trace1 +// RUN: FileCheck --allow-empty --input-file=%t.trace1 --implicit-check-not "In-Memory Cache" %s +// RUN: env SYCL_CACHE_TRACE=0 %{run} %t.out 2> %t.trace2 +// RUN: FileCheck --allow-empty --input-file=%t.trace2 --implicit-check-not "In-Memory Cache" %s + +// RUN: env SYCL_CACHE_TRACE=2 %{run} %t.out 2> %t.trace3 +// RUN: FileCheck %s --input-file=%t.trace3 --check-prefix=CHECK-CACHE-TRACE + +#include + +#include +#include + +using namespace sycl; + +constexpr specialization_id spec_id; + +int main() { + queue q; + + // Check program insertion into cache and kernel insertion into fast and + // regular kernel cache. + // CHECK-CACHE-TRACE: [In-Memory Cache][Thread Id:{{.*}}][Program Cache][Key:{{.*}}]: Program inserted. + // CHECK-CACHE-TRACE: [In-Memory Cache][Thread Id:{{.*}}][Kernel Cache][IsFastCache: 0][Key:{Name = [[KERNELNAME1:.*]]]: Kernel inserted. + // CHECK-CACHE-TRACE: [In-Memory Cache][Thread Id:{{.*}}][Kernel Cache][IsFastCache: 1][Key:{Name = [[KERNELNAME1]]]: Kernel inserted. + + // In the 2nd and 3rd invocation of this loop, the kernel should be fetched + // from fast kernel cache. + // CHECK-CACHE-TRACE: [In-Memory Cache][Thread Id:{{.*}}][Kernel Cache][IsFastCache: 1][Key:{Name = [[KERNELNAME1]]]: Kernel fetched. + // CHECK-CACHE-TRACE: [In-Memory Cache][Thread Id:{{.*}}][Kernel Cache][IsFastCache: 1][Key:{Name = [[KERNELNAME1]]]: Kernel fetched. + for (int i = 0; i < 3; i++) + q.single_task([] {}).wait(); + + auto *p = malloc_device(1, q); + + // Check program and kernel insertion into cache. There should be different + // programs for different iterations of this loop, because of the different + // specialization constants. + // CHECK-CACHE-TRACE: [In-Memory Cache][Thread Id:{{.*}}][Program Cache][Key:{{.*}}]: Program inserted. + // CHECK-CACHE-TRACE: [In-Memory Cache][Thread Id:{{.*}}][Kernel Cache][IsFastCache: 0][Key:{Name = [[KERNELNAME2:.*]]]: Kernel inserted. + // CHECK-CACHE-TRACE: [In-Memory Cache][Thread Id:{{.*}}][Program Cache][Key:{{.*}}]: Program inserted. + // CHECK-CACHE-TRACE: [In-Memory Cache][Thread Id:{{.*}}][Kernel Cache][IsFastCache: 0][Key:{Name = [[KERNELNAME2]]]: Kernel inserted. + for (int i = 0; i < 2; ++i) + q.submit([&](handler &cgh) { + cgh.set_specialization_constant(i); + cgh.parallel_for(1, [=](auto, kernel_handler kh) { + *p = kh.get_specialization_constant(); + }); + }).wait(); + + free(p, q); +}