Skip to content

Commit

Permalink
[SYCL][DeviceSanitizer] Checking "sycl::free" related errors (#12882)
Browse files Browse the repository at this point in the history
UR: oneapi-src/unified-runtime#1402

This PR added supports for checking the following types of error in
"UR_LAYER_ASAN":

- bad-free: the memory address to be freed is not allocated by UR
- bad-context: the memory address to be freed uses a wrong "context"
- double-free: the memory address to be freed is already freed 
- use-after-free: the freed memory is used in kernel

I added the environment variable "UR_LAYER_ASAN_OPTIONS" to have
additional control over "UR_LAYER_ASAN", which is similar to
"ASAN_OPTIONS" in
[ASan](https://github.com/google/sanitizers/wiki/AddressSanitizerFlags).
Currently, it supports:

- "quarantine_size_mb" (default = 0)
- Size (in MB) of quarantine per device. The pointers passed to
urUSMFree are not freed immediately, but saved into QuarantineCache (per
device cache), and when the cached chunk size (only counts the size of
USM buffer, not shadow memory) is more than "quarantine_size_mb", the
first enqueued chunk will be freed (aka., FIFO). Lower value may reduce
memory usage but increase the chance of false negatives
- This option must be enabled for checking "double-free" and
"use-after-free"
- "debug" (default = 0)
- Print extra debug messages in kernel ("__AsanDebug” in
“libdevice/sanitizer_utils.cpp”), which is helpful for DeviceSanitizer
developers.

For example, to enable "use-after-free" with 5MB quarantine cache and
debug message in kernel, you need to
```bash
UR_LAYER_ASAN_OPTIONS="quarantine_size_mb:5;debug:1" ./sycl_app
```

---------

Co-authored-by: Maosu Zhao <maosu.zhao@intel.com>
Co-authored-by: Aaron Greig <aaron.greig@codeplay.com>
  • Loading branch information
3 people committed Apr 19, 2024
1 parent a145848 commit 4723efc
Show file tree
Hide file tree
Showing 30 changed files with 848 additions and 475 deletions.
155 changes: 155 additions & 0 deletions libdevice/include/asan_libdevice.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
//===---- asan_libdevice.hpp - Structure and declaration for sanitizer ----===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#pragma once

#include <cinttypes>

// NOTE This file should be sync with
// unified-runtime/source/loader/layers/sanitizer/device_sanitizer_report.hpp

enum class DeviceSanitizerErrorType : int32_t {
UNKNOWN,
OUT_OF_BOUNDS,
MISALIGNED,
USE_AFTER_FREE,
OUT_OF_SHADOW_BOUNDS,
UNKNOWN_DEVICE,
NULL_POINTER,
};

enum class DeviceSanitizerMemoryType : int32_t {
UNKNOWN,
USM_DEVICE,
USM_HOST,
USM_SHARED,
LOCAL,
PRIVATE,
MEM_BUFFER,
DEVICE_GLOBAL,
};

struct DeviceSanitizerReport {
int Flag = 0;

char File[256 + 1] = {};
char Func[256 + 1] = {};

int32_t Line = 0;

uint64_t GID0 = 0;
uint64_t GID1 = 0;
uint64_t GID2 = 0;

uint64_t LID0 = 0;
uint64_t LID1 = 0;
uint64_t LID2 = 0;

uintptr_t Address = 0;
bool IsWrite = false;
uint32_t AccessSize = 0;
DeviceSanitizerMemoryType MemoryType = DeviceSanitizerMemoryType::UNKNOWN;
DeviceSanitizerErrorType ErrorType = DeviceSanitizerErrorType::UNKNOWN;

bool IsRecover = false;
};

struct LocalArgsInfo {
uint32_t ArgIndex = 0;
uint64_t Size = 0;
uint64_t SizeWithRedZone = 0;
};

struct LaunchInfo {
uintptr_t PrivateShadowOffset =
0; // don't move this field, we use it in AddressSanitizerPass

uintptr_t LocalShadowOffset = 0;
uintptr_t LocalShadowOffsetEnd = 0;
DeviceSanitizerReport SanitizerReport;

uint32_t NumLocalArgs = 0;
LocalArgsInfo *LocalArgs = nullptr; // ordered by ArgIndex
};

constexpr unsigned ASAN_SHADOW_SCALE = 3;
constexpr unsigned ASAN_SHADOW_GRANULARITY = 1ULL << ASAN_SHADOW_SCALE;

// Based on the observation, only the last 24 bits of the address of the private
// variable have changed, we use 31 bits(2G) to be safe.
constexpr std::size_t ASAN_PRIVATE_SIZE = 0x7fffffffULL + 1;

// These magic values are written to shadow for better error
// reporting.
constexpr int kUsmDeviceRedzoneMagic = (char)0x81;
constexpr int kUsmHostRedzoneMagic = (char)0x82;
constexpr int kUsmSharedRedzoneMagic = (char)0x83;
constexpr int kMemBufferRedzoneMagic = (char)0x84;
constexpr int kDeviceGlobalRedzoneMagic = (char)0x85;
constexpr int kNullPointerRedzoneMagic = (char)0x86;

constexpr int kUsmDeviceDeallocatedMagic = (char)0x91;
constexpr int kUsmHostDeallocatedMagic = (char)0x92;
constexpr int kUsmSharedDeallocatedMagic = (char)0x93;
constexpr int kMemBufferDeallocatedMagic = (char)0x93;

constexpr int kSharedLocalRedzoneMagic = (char)0xa1;

// Same with host ASan stack
const int kPrivateLeftRedzoneMagic = (char)0xf1;
const int kPrivateMidRedzoneMagic = (char)0xf2;
const int kPrivateRightRedzoneMagic = (char)0xf3;

constexpr auto kSPIR_AsanShadowMemoryGlobalStart =
"__AsanShadowMemoryGlobalStart";
constexpr auto kSPIR_AsanShadowMemoryGlobalEnd = "__AsanShadowMemoryGlobalEnd";

constexpr auto kSPIR_DeviceType = "__DeviceType";
constexpr auto kSPIR_AsanDebug = "__AsanDebug";

constexpr auto kSPIR_AsanDeviceGlobalCount = "__AsanDeviceGlobalCount";
constexpr auto kSPIR_AsanDeviceGlobalMetadata = "__AsanDeviceGlobalMetadata";

inline const char *ToString(DeviceSanitizerMemoryType MemoryType) {
switch (MemoryType) {
case DeviceSanitizerMemoryType::USM_DEVICE:
return "Device USM";
case DeviceSanitizerMemoryType::USM_HOST:
return "Host USM";
case DeviceSanitizerMemoryType::USM_SHARED:
return "Shared USM";
case DeviceSanitizerMemoryType::LOCAL:
return "Local Memory";
case DeviceSanitizerMemoryType::PRIVATE:
return "Private Memory";
case DeviceSanitizerMemoryType::MEM_BUFFER:
return "Memory Buffer";
case DeviceSanitizerMemoryType::DEVICE_GLOBAL:
return "Device Global";
default:
return "Unknown Memory";
}
}

inline const char *ToString(DeviceSanitizerErrorType ErrorType) {
switch (ErrorType) {
case DeviceSanitizerErrorType::OUT_OF_BOUNDS:
return "out-of-bounds-access";
case DeviceSanitizerErrorType::MISALIGNED:
return "misaligned-access";
case DeviceSanitizerErrorType::USE_AFTER_FREE:
return "use-after-free";
case DeviceSanitizerErrorType::OUT_OF_SHADOW_BOUNDS:
return "out-of-shadow-bounds-access";
case DeviceSanitizerErrorType::UNKNOWN_DEVICE:
return "unknown-device";
case DeviceSanitizerErrorType::NULL_POINTER:
return "null-pointer-access";
default:
return "unknown-error";
}
}
56 changes: 0 additions & 56 deletions libdevice/include/device-sanitizer-report.hpp

This file was deleted.

2 changes: 1 addition & 1 deletion libdevice/include/sanitizer_device_utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,4 +40,4 @@ class
T val;
};

enum DeviceType : uintptr_t { UNKNOWN, CPU, GPU_PVC, GPU_DG2 };
enum DeviceType : uint64_t { UNKNOWN, CPU, GPU_PVC, GPU_DG2 };
Loading

0 comments on commit 4723efc

Please sign in to comment.