Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DeviceSanitizer] Memory overhead statistics #1869

Merged
merged 24 commits into from
Oct 3, 2024
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions source/loader/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -136,12 +136,16 @@ if(UR_ENABLE_SANITIZER)
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan_interceptor.cpp
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan_interceptor.hpp
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan_libdevice.hpp
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan_options.cpp
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan_options.hpp
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan_quarantine.cpp
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan_quarantine.hpp
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan_report.cpp
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan_report.hpp
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan_shadow_setup.cpp
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan_shadow_setup.hpp
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan_statistics.cpp
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan_statistics.hpp
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan_validator.cpp
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan_validator.hpp
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/common.hpp
Expand Down
1 change: 1 addition & 0 deletions source/loader/layers/sanitizer/asan_allocator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ struct AllocInfo {
StackTrace ReleaseStack;

void print();
size_t getRedzoneSize() { return AllocSize - (UserEnd - UserBegin); }
};

using AllocationMap = std::map<uptr, std::shared_ptr<AllocInfo>>;
Expand Down
116 changes: 79 additions & 37 deletions source/loader/layers/sanitizer/asan_interceptor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include "asan_quarantine.hpp"
#include "asan_report.hpp"
#include "asan_shadow_setup.hpp"
#include "asan_statistics.hpp"
#include "asan_validator.hpp"
#include "stacktrace.hpp"
#include "ur_sanitizer_utils.hpp"
Expand Down Expand Up @@ -58,7 +59,7 @@ ur_result_t urEnqueueUSMSet(ur_queue_handle_t Queue, void *Ptr, char Value,
Queue, Ptr, 1, &Value, Size, NumEvents, EventWaitList, OutEvent);
}

ur_result_t enqueueMemSetShadow(ur_context_handle_t Context,
ur_result_t enqueueMemSetShadow(std::shared_ptr<ContextInfo> &ContextInfo,
std::shared_ptr<DeviceInfo> &DeviceInfo,
ur_queue_handle_t Queue, uptr Ptr, uptr Size,
u8 Value) {
Expand Down Expand Up @@ -105,9 +106,10 @@ ur_result_t enqueueMemSetShadow(ur_context_handle_t Context,
}

assert(ShadowBegin <= ShadowEnd);

{
static const size_t PageSize =
GetVirtualMemGranularity(Context, DeviceInfo->Handle);
static const size_t PageSize = GetVirtualMemGranularity(
ContextInfo->Handle, DeviceInfo->Handle);

ur_physical_mem_properties_t Desc{
UR_STRUCTURE_TYPE_PHYSICAL_MEM_PROPERTIES, nullptr, 0};
Expand All @@ -118,8 +120,8 @@ ur_result_t enqueueMemSetShadow(ur_context_handle_t Context,
MappedPtr <= ShadowEnd; MappedPtr += PageSize) {
if (!PhysicalMem) {
auto URes = getContext()->urDdiTable.PhysicalMem.pfnCreate(
Context, DeviceInfo->Handle, PageSize, &Desc,
&PhysicalMem);
ContextInfo->Handle, DeviceInfo->Handle, PageSize,
&Desc, &PhysicalMem);
if (URes != UR_RESULT_SUCCESS) {
getContext()->logger.error("urPhysicalMemCreate(): {}",
URes);
Expand All @@ -133,8 +135,8 @@ ur_result_t enqueueMemSetShadow(ur_context_handle_t Context,

// FIXME: No flag to check the failed reason is VA is already mapped
auto URes = getContext()->urDdiTable.VirtualMem.pfnMap(
Context, (void *)MappedPtr, PageSize, PhysicalMem, 0,
UR_VIRTUAL_MEM_ACCESS_FLAG_READ_WRITE);
ContextInfo->Handle, (void *)MappedPtr, PageSize,
PhysicalMem, 0, UR_VIRTUAL_MEM_ACCESS_FLAG_READ_WRITE);
if (URes != UR_RESULT_SUCCESS) {
getContext()->logger.debug("urVirtualMemMap({}, {}): {}",
(void *)MappedPtr, PageSize,
Expand All @@ -145,6 +147,7 @@ ur_result_t enqueueMemSetShadow(ur_context_handle_t Context,
if (URes == UR_RESULT_SUCCESS) {
// Reset PhysicalMem to null since it's been mapped
PhysicalMem = nullptr;
ContextInfo->Stats.UpdateShadowMmaped(PageSize);

auto URes =
urEnqueueUSMSet(Queue, (void *)MappedPtr, 0, PageSize);
Expand Down Expand Up @@ -173,11 +176,20 @@ ur_result_t enqueueMemSetShadow(ur_context_handle_t Context,

} // namespace

SanitizerInterceptor::SanitizerInterceptor(logger::Logger &logger)
: logger(logger) {
if (Options(logger).MaxQuarantineSizeMB) {
ContextInfo::~ContextInfo() {
if (getContext()->interceptor->getOptions().PrintStats) {
Stats.Print(Handle);
}

[[maybe_unused]] auto Result =
getContext()->urDdiTable.Context.pfnRelease(Handle);
assert(Result == UR_RESULT_SUCCESS);
}

SanitizerInterceptor::SanitizerInterceptor() {
if (getOptions().MaxQuarantineSizeMB) {
m_Quarantine = std::make_unique<Quarantine>(
static_cast<uint64_t>(Options(logger).MaxQuarantineSizeMB) * 1024 *
static_cast<uint64_t>(getOptions().MaxQuarantineSizeMB) * 1024 *
1024);
}
}
Expand Down Expand Up @@ -231,8 +243,8 @@ ur_result_t SanitizerInterceptor::allocateMemory(
Alignment = MinAlignment;
}

uptr RZLog = ComputeRZLog(Size, Options(logger).MinRZSize,
Options(logger).MaxRZSize);
uptr RZLog =
ComputeRZLog(Size, getOptions().MinRZSize, getOptions().MaxRZSize);
uptr RZSize = RZLog2Size(RZLog);
uptr RoundedSize = RoundUpTo(Size, Alignment);
uptr NeededSize = RoundedSize + RZSize * 2;
Expand All @@ -259,6 +271,9 @@ ur_result_t SanitizerInterceptor::allocateMemory(
return UR_RESULT_ERROR_INVALID_ARGUMENT;
}

// Udpate statistics
ContextInfo->Stats.UpdateUSMMalloced(NeededSize, NeededSize - Size);

uptr AllocBegin = reinterpret_cast<uptr>(Allocated);
[[maybe_unused]] uptr AllocEnd = AllocBegin + NeededSize;
uptr UserBegin = AllocBegin + RZSize;
Expand Down Expand Up @@ -313,7 +328,8 @@ ur_result_t SanitizerInterceptor::releaseMemory(ur_context_handle_t Context,
}

auto AllocInfoIt = *AllocInfoItOp;
auto &AllocInfo = AllocInfoIt->second;
// NOTE: AllocInfoIt will be erased later, so "AllocInfo" must be a new reference here
auto AllocInfo = AllocInfoIt->second;

if (AllocInfo->Context != Context) {
if (AllocInfo->UserBegin == Addr) {
Expand Down Expand Up @@ -347,23 +363,35 @@ ur_result_t SanitizerInterceptor::releaseMemory(ur_context_handle_t Context,
// If quarantine is disabled, USM is freed immediately
if (!m_Quarantine) {
getContext()->logger.debug("Free: {}", (void *)AllocInfo->AllocBegin);

ContextInfo->Stats.UpdateUSMRealFreed(AllocInfo->AllocSize,
AllocInfo->getRedzoneSize());

std::scoped_lock<ur_shared_mutex> Guard(m_AllocationMapMutex);
m_AllocationMap.erase(AllocInfoIt);

return getContext()->urDdiTable.USM.pfnFree(
Context, (void *)(AllocInfo->AllocBegin));
}

// If quarantine is enabled, cache it
auto ReleaseList = m_Quarantine->put(AllocInfo->Device, AllocInfoIt);
if (ReleaseList.size()) {
std::scoped_lock<ur_shared_mutex> Guard(m_AllocationMapMutex);
for (auto &It : ReleaseList) {
getContext()->logger.info("Quarantine Free: {}",
(void *)It->second->AllocBegin);

ContextInfo->Stats.UpdateUSMRealFreed(AllocInfo->AllocSize,
AllocInfo->getRedzoneSize());

m_AllocationMap.erase(It);

UR_CALL(getContext()->urDdiTable.USM.pfnFree(
Context, (void *)(It->second->AllocBegin)));
}
}
ContextInfo->Stats.UpdateUSMFreed(AllocInfo->AllocSize);

return UR_RESULT_SUCCESS;
}
Expand All @@ -385,8 +413,8 @@ ur_result_t SanitizerInterceptor::preLaunchKernel(ur_kernel_handle_t Kernel,
return UR_RESULT_ERROR_INVALID_QUEUE;
}

UR_CALL(
prepareLaunch(Context, DeviceInfo, InternalQueue, Kernel, LaunchInfo));
UR_CALL(prepareLaunch(ContextInfo, DeviceInfo, InternalQueue, Kernel,
LaunchInfo));

UR_CALL(updateShadowMemory(ContextInfo, DeviceInfo, InternalQueue));

Expand Down Expand Up @@ -449,8 +477,9 @@ ur_result_t DeviceInfo::allocShadowMemory(ur_context_handle_t Context) {
///
/// ref: https://github.com/google/sanitizers/wiki/AddressSanitizerAlgorithm#mapping
ur_result_t SanitizerInterceptor::enqueueAllocInfo(
ur_context_handle_t Context, std::shared_ptr<DeviceInfo> &DeviceInfo,
ur_queue_handle_t Queue, std::shared_ptr<AllocInfo> &AI) {
std::shared_ptr<ContextInfo> &ContextInfo,
std::shared_ptr<DeviceInfo> &DeviceInfo, ur_queue_handle_t Queue,
std::shared_ptr<AllocInfo> &AI) {
if (AI->IsReleased) {
int ShadowByte;
switch (AI->Type) {
Expand All @@ -470,13 +499,13 @@ ur_result_t SanitizerInterceptor::enqueueAllocInfo(
ShadowByte = 0xff;
assert(false && "Unknow AllocInfo Type");
}
UR_CALL(enqueueMemSetShadow(Context, DeviceInfo, Queue, AI->AllocBegin,
AI->AllocSize, ShadowByte));
UR_CALL(enqueueMemSetShadow(ContextInfo, DeviceInfo, Queue,
AI->AllocBegin, AI->AllocSize, ShadowByte));
return UR_RESULT_SUCCESS;
}

// Init zero
UR_CALL(enqueueMemSetShadow(Context, DeviceInfo, Queue, AI->AllocBegin,
UR_CALL(enqueueMemSetShadow(ContextInfo, DeviceInfo, Queue, AI->AllocBegin,
AI->AllocSize, 0));

uptr TailBegin = RoundUpTo(AI->UserEnd, ASAN_SHADOW_GRANULARITY);
Expand All @@ -486,8 +515,8 @@ ur_result_t SanitizerInterceptor::enqueueAllocInfo(
if (TailBegin != AI->UserEnd) {
auto Value =
AI->UserEnd - RoundDownTo(AI->UserEnd, ASAN_SHADOW_GRANULARITY);
UR_CALL(enqueueMemSetShadow(Context, DeviceInfo, Queue, AI->UserEnd, 1,
static_cast<u8>(Value)));
UR_CALL(enqueueMemSetShadow(ContextInfo, DeviceInfo, Queue, AI->UserEnd,
1, static_cast<u8>(Value)));
}

int ShadowByte;
Expand All @@ -513,11 +542,11 @@ ur_result_t SanitizerInterceptor::enqueueAllocInfo(
}

// Left red zone
UR_CALL(enqueueMemSetShadow(Context, DeviceInfo, Queue, AI->AllocBegin,
UR_CALL(enqueueMemSetShadow(ContextInfo, DeviceInfo, Queue, AI->AllocBegin,
AI->UserBegin - AI->AllocBegin, ShadowByte));

// Right red zone
UR_CALL(enqueueMemSetShadow(Context, DeviceInfo, Queue, TailBegin,
UR_CALL(enqueueMemSetShadow(ContextInfo, DeviceInfo, Queue, TailBegin,
TailEnd - TailBegin, ShadowByte));

return UR_RESULT_SUCCESS;
Expand All @@ -530,7 +559,7 @@ ur_result_t SanitizerInterceptor::updateShadowMemory(
std::scoped_lock<ur_shared_mutex> Guard(AllocInfos.Mutex);

for (auto &AI : AllocInfos.List) {
UR_CALL(enqueueAllocInfo(ContextInfo->Handle, DeviceInfo, Queue, AI));
UR_CALL(enqueueAllocInfo(ContextInfo, DeviceInfo, Queue, AI));
}
AllocInfos.List.clear();

Expand Down Expand Up @@ -693,23 +722,23 @@ SanitizerInterceptor::getMemBuffer(ur_mem_handle_t MemHandle) {
}

ur_result_t SanitizerInterceptor::prepareLaunch(
ur_context_handle_t Context, std::shared_ptr<DeviceInfo> &DeviceInfo,
ur_queue_handle_t Queue, ur_kernel_handle_t Kernel,
USMLaunchInfo &LaunchInfo) {
std::shared_ptr<ContextInfo> &ContextInfo,
std::shared_ptr<DeviceInfo> &DeviceInfo, ur_queue_handle_t Queue,
ur_kernel_handle_t Kernel, USMLaunchInfo &LaunchInfo) {
auto Program = GetProgram(Kernel);

do {
auto KernelInfo = getKernelInfo(Kernel);

// Validate pointer arguments
if (Options(logger).DetectKernelArguments) {
if (getOptions().DetectKernelArguments) {
for (const auto &[ArgIndex, PtrPair] : KernelInfo->PointerArgs) {
auto Ptr = PtrPair.first;
if (Ptr == nullptr) {
continue;
}
if (auto ValidateResult = ValidateUSMPointer(
Context, DeviceInfo->Handle, (uptr)Ptr)) {
ContextInfo->Handle, DeviceInfo->Handle, (uptr)Ptr)) {
ReportInvalidKernelArgument(Kernel, ArgIndex, (uptr)Ptr,
ValidateResult, PtrPair.second);
exit(1);
Expand Down Expand Up @@ -767,7 +796,7 @@ ur_result_t SanitizerInterceptor::prepareLaunch(
// We use "uint64_t" here because EnqueueWriteGlobal will fail when it's "uint32_t"
// Because EnqueueWriteGlobal is a async write, so
// we need to extend its lifetime
static uint64_t Debug = Options(logger).Debug ? 1 : 0;
static uint64_t Debug = getOptions().Debug ? 1 : 0;
EnqueueWriteGlobal(kSPIR_AsanDebug, &Debug, sizeof(Debug));

// Write shadow memory offset for global memory
Expand Down Expand Up @@ -808,12 +837,12 @@ ur_result_t SanitizerInterceptor::prepareLaunch(
LocalWorkSize[Dim];
}

auto EnqueueAllocateShadowMemory = [Context, &DeviceInfo,
auto EnqueueAllocateShadowMemory = [Context = ContextInfo->Handle,
Device = DeviceInfo->Handle,
Queue](size_t Size, uptr &Ptr) {
void *Allocated = nullptr;
auto URes = getContext()->urDdiTable.USM.pfnDeviceAlloc(
Context, DeviceInfo->Handle, nullptr, nullptr, Size,
&Allocated);
Context, Device, nullptr, nullptr, Size, &Allocated);
if (URes != UR_RESULT_SUCCESS) {
return URes;
}
Expand All @@ -840,7 +869,7 @@ ur_result_t SanitizerInterceptor::prepareLaunch(
LocalMemoryUsage, PrivateMemoryUsage);

// Write shadow memory offset for local memory
if (Options(logger).DetectLocals) {
if (getOptions().DetectLocals) {
// CPU needn't this
if (DeviceInfo->Type == DeviceType::GPU_PVC ||
DeviceInfo->Type == DeviceType::GPU_DG2) {
Expand Down Expand Up @@ -871,6 +900,9 @@ ur_result_t SanitizerInterceptor::prepareLaunch(
LaunchInfo.Data->LocalShadowOffset +
LocalShadowMemorySize - 1;

ContextInfo->Stats.UpdateShadowMalloced(
LocalShadowMemorySize);

getContext()->logger.info(
"ShadowMemory(Local, {} - {})",
(void *)LaunchInfo.Data->LocalShadowOffset,
Expand All @@ -880,7 +912,7 @@ ur_result_t SanitizerInterceptor::prepareLaunch(
}

// Write shadow memory offset for private memory
if (Options(logger).DetectPrivates) {
if (getOptions().DetectPrivates) {
if (DeviceInfo->Type == DeviceType::CPU) {
LaunchInfo.Data->PrivateShadowOffset = DeviceInfo->ShadowOffset;
} else if (DeviceInfo->Type == DeviceType::GPU_PVC ||
Expand Down Expand Up @@ -908,6 +940,10 @@ ur_result_t SanitizerInterceptor::prepareLaunch(
LaunchInfo.Data->PrivateShadowOffsetEnd =
LaunchInfo.Data->PrivateShadowOffset +
PrivateShadowMemorySize - 1;

ContextInfo->Stats.UpdateShadowMalloced(
PrivateShadowMemorySize);

getContext()->logger.info(
"ShadowMemory(Private, {} - {})",
(void *)LaunchInfo.Data->PrivateShadowOffset,
Expand Down Expand Up @@ -966,13 +1002,19 @@ USMLaunchInfo::~USMLaunchInfo() {
[[maybe_unused]] ur_result_t Result;
if (Data) {
auto Type = GetDeviceType(Context, Device);
auto ContextInfo = getContext()->interceptor->getContextInfo(Context);
if (Type == DeviceType::GPU_PVC || Type == DeviceType::GPU_DG2) {
if (Data->PrivateShadowOffset) {
ContextInfo->Stats.UpdateShadowFreed(
Data->PrivateShadowOffsetEnd - Data->PrivateShadowOffset +
1);
Result = getContext()->urDdiTable.USM.pfnFree(
Context, (void *)Data->PrivateShadowOffset);
assert(Result == UR_RESULT_SUCCESS);
}
if (Data->LocalShadowOffset) {
ContextInfo->Stats.UpdateShadowFreed(
Data->LocalShadowOffsetEnd - Data->LocalShadowOffset + 1);
Result = getContext()->urDdiTable.USM.pfnFree(
Context, (void *)Data->LocalShadowOffset);
assert(Result == UR_RESULT_SUCCESS);
Expand Down
Loading
Loading