Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add functionality to automatically detect NaNs in buffers/images before/after kernel enqueue #306

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions intercept/src/controls.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ CLI_CONTROL( bool, AppendPid, false, "If s
CLI_CONTROL( bool, KernelNameHashTracking, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will append the program and build option hashes to the kernel name in logs and reports." )
CLI_CONTROL( cl_uint, LongKernelNameCutoff, UINT_MAX, "If an OpenCL application uses kernels with very long names, the Intercept Layer for OpenCL Applications can substitute a \"short\" kernel identifier for a \"long\" kernel name in logs and reports. This control defines how long a kernel name must be (in characters) before it is replaced by a \"short\" kernel identifier." )
CLI_CONTROL( bool, DemangleKernelNames, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will track kernel names that are demangled according to C++ ABI rules. This setting requires compiler support for demangling and may not be available in all configurations." )
CLI_CONTROL( bool, DetectNaNs, false, "If set, the Intercept Layer for OpenCL Applications will detect NaNs appearing in fp buffers and fp images and log the first kernel (name and enqueue counter) which caused these NaNs." )

CLI_CONTROL_SEPARATOR( Reporting Controls: )
CLI_CONTROL( bool, ReportToStderr, false, "If set to a nonzero value, the Intercept Layer for OpenCL Applications will emit reports to stderr." )
Expand Down
4 changes: 3 additions & 1 deletion intercept/src/dispatch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4817,8 +4817,9 @@ CL_API_ENTRY cl_int CL_API_CALL CLIRN(clEnqueueNDRangeKernel)(

INCREMENT_ENQUEUE_COUNTER();
DUMP_BUFFERS_BEFORE_ENQUEUE( kernel, command_queue );
DUMP_REPLAYABLE_KERNEL( kernel, command_queue, work_dim, global_work_offset, global_work_size, local_work_size );
DUMP_IMAGES_BEFORE_ENQUEUE( kernel, command_queue );
DUMP_REPLAYABLE_KERNEL( kernel, command_queue, work_dim, global_work_offset, global_work_size, local_work_size );
CHECK_FOR_NANS( "Before", kernel, command_queue, work_dim, global_work_size );
CHECK_AUBCAPTURE_START_KERNEL(
kernel,
work_dim,
Expand Down Expand Up @@ -4926,6 +4927,7 @@ CL_API_ENTRY cl_int CL_API_CALL CLIRN(clEnqueueNDRangeKernel)(

DUMP_BUFFERS_AFTER_ENQUEUE( kernel, command_queue );
DUMP_IMAGES_AFTER_ENQUEUE( kernel, command_queue );
CHECK_FOR_NANS( "After", kernel, command_queue, work_dim, global_work_size );
FINISH_OR_FLUSH_AFTER_ENQUEUE( command_queue );
CHECK_AUBCAPTURE_STOP( command_queue );

Expand Down
240 changes: 236 additions & 4 deletions intercept/src/intercept.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include <stdarg.h>
#include <sstream>
#include <time.h> // strdate
#include <cmath>

#include "common.h"
#include "demangle.h"
Expand Down Expand Up @@ -7455,13 +7456,18 @@ void CLIntercept::dumpKernelInfo(
for ( cl_uint idx = 0; idx != numArgs; ++idx )
{
size_t argNameSize = 0;
dispatch().clGetKernelArgInfo(kernel, idx, CL_KERNEL_ARG_TYPE_NAME, 0, nullptr, &argNameSize);
int error = dispatch().clGetKernelArgInfo(kernel, idx, CL_KERNEL_ARG_TYPE_NAME, 0, nullptr, &argNameSize);
if ( error != CL_SUCCESS || argNameSize == 0 )
{
log( "Note: Kernel Argument info not available for replaying.\n" );
return;
}

std::string argName(argNameSize, ' ');
int error = dispatch().clGetKernelArgInfo(kernel, idx, CL_KERNEL_ARG_TYPE_NAME, argNameSize, &argName, nullptr);
if ( error == CL_KERNEL_ARG_INFO_NOT_AVAILABLE )
error = dispatch().clGetKernelArgInfo(kernel, idx, CL_KERNEL_ARG_TYPE_NAME, argNameSize, &argName[0], nullptr);
if ( error != CL_SUCCESS )
{
log("Note: Kernel Argument info not available for replaying.\n");
log( "Note: Kernel Argument info not available for replaying.\n" );
return;
}
outputArgTypes << argName << '\n';
Expand Down Expand Up @@ -7519,6 +7525,232 @@ void CLIntercept::dumpArgumentsForKernel(
}
}

void CLIntercept::detectNaNs(
std::string when,
cl_kernel kernel,
const uint64_t enqueueCounter,
cl_command_queue command_queue,
size_t work_dim,
size_t const* gws )
{
std::lock_guard<std::mutex> lock(m_Mutex);

cl_platform_id platform = getPlatform(kernel);

std::vector<char> transferBuf;
std::string fileNamePrefix = "";
OS().GetDumpDirectoryName( sc_DumpDirectoryName, fileNamePrefix );
fileNamePrefix += "/NaN_checker_" + std::to_string(m_ProcessId) + ".txt";

std::ofstream output{fileNamePrefix, std::ios_base::app};

// for all buffers & images
// See if we have NaNs
CKernelArgMemMap& kernelArgMemMap = m_KernelArgMap[ kernel ];
CKernelArgMemMap::iterator idx = kernelArgMemMap.begin();
while( idx != kernelArgMemMap.end() )
{
cl_uint arg_index = (*idx).first;
void* allocation = (void*)(*idx).second;
cl_mem memobj = (cl_mem)allocation;
++idx;

// check if we have a buffer
if( ( m_USMAllocInfoMap.find( allocation ) != m_USMAllocInfoMap.end() ) ||
( m_SVMAllocInfoMap.find( allocation ) != m_SVMAllocInfoMap.end() ) ||
( m_BufferInfoMap.find( memobj ) != m_BufferInfoMap.end() ) )
{
void* bufferPtr = nullptr;
size_t bufferSize = 0;

// check if the type is a floating point number
size_t argNameSize = 0;
dispatch().clGetKernelArgInfo(kernel, arg_index, CL_KERNEL_ARG_TYPE_NAME, 0, nullptr, &argNameSize);

std::string argType(' ', static_cast<int>(argNameSize));
dispatch().clGetKernelArgInfo(kernel, arg_index, CL_KERNEL_ARG_TYPE_NAME, argNameSize, &argType[0], nullptr);

if( argType.find("float") != std::string::npos &&
argType.find("double") != std::string::npos )
continue;

if( m_USMAllocInfoMap.find( allocation ) != m_USMAllocInfoMap.end() )
{
size_t size = m_USMAllocInfoMap[ allocation ];

if( dispatchX(platform).clEnqueueMemcpyINTEL == NULL )
{
getExtensionFunctionAddress(
platform,
"clEnqueueMemcpyINTEL" );
}
if( transferBuf.size() < size )
{
transferBuf.resize(size);
}

const auto& dispatchX = this->dispatchX(platform);
if( dispatchX.clEnqueueMemcpyINTEL &&
transferBuf.size() >= size )
{
cl_int error = dispatchX.clEnqueueMemcpyINTEL(
command_queue,
CL_TRUE,
transferBuf.data(),
allocation,
size,
0,
nullptr,
nullptr );

if( error == CL_SUCCESS )
{
bufferPtr = transferBuf.data();
bufferSize = size;
}
}
}
else if( m_SVMAllocInfoMap.find( allocation ) != m_SVMAllocInfoMap.end() )
{
size_t size = m_SVMAllocInfoMap[ allocation ];
cl_int error = dispatch().clEnqueueSVMMap(
command_queue,
CL_TRUE,
CL_MAP_READ,
allocation,
size,
0,
nullptr,
nullptr );
if( error == CL_SUCCESS )
{
bufferPtr = allocation;
bufferSize = size;
dispatch().clEnqueueSVMUnmap(
command_queue,
allocation,
0,
nullptr,
nullptr );
}
}
else if( m_BufferInfoMap.find( memobj ) != m_BufferInfoMap.end() )
{
size_t size = m_BufferInfoMap[ memobj ];

cl_int error = CL_SUCCESS;
bufferPtr = dispatch().clEnqueueMapBuffer(
command_queue,
memobj,
CL_TRUE,
CL_MAP_READ,
0,
size,
0,
nullptr,
nullptr,
&error );
if( error == CL_SUCCESS )
{
bufferSize = size;
dispatch().clEnqueueUnmapMemObject(
command_queue,
memobj,
bufferPtr,
0,
nullptr,
nullptr );
}
}
if (bufferPtr == nullptr)
continue;

bool foundNaN = false;
if( argType.find("float") != std::string::npos )
{
for( unsigned idx = 0; idx < bufferSize / sizeof(float); idx += sizeof(float) )
{
if( std::isnan( reinterpret_cast<float*>(bufferPtr)[idx] ))
{
foundNaN = true;
break;
}
}
} else
{
for( unsigned idx = 0; idx < bufferSize / sizeof(double); idx += sizeof(double) )
{
if( std::isnan( reinterpret_cast<double*>(bufferPtr)[idx] ))
{
foundNaN = true;
break;
}
}
}
if( foundNaN )
{
std::string tmp = when +
" kernel: " + getShortKernelName( kernel ) +
", EnqueueCtr: " + std::to_string( enqueueCounter ) +
", arg_index: " + std::to_string( arg_index ) +
", data type: " + argType.c_str() +
", has a NaN.\n";
output << tmp;
}
continue;
}
// Not a buffer, should be an image
if( m_ImageInfoMap.find( memobj ) != m_ImageInfoMap.end() )
{
const SImageInfo& info = m_ImageInfoMap[ memobj ];
if( info.Format.image_channel_data_type == CL_FLOAT )
{
size_t size =
info.Region[0] *
info.Region[1] *
info.Region[2] *
info.ElementSize;
std::vector<char> readImageData(size);

if( readImageData.data() )
{
size_t origin[3] = { 0, 0, 0 };
cl_int error = dispatch().clEnqueueReadImage(
command_queue,
memobj,
CL_TRUE,
origin,
info.Region,
0,
0,
readImageData.data(),
0,
nullptr,
nullptr );

if( error == CL_SUCCESS )
{
for( unsigned idx = 0; idx < size / sizeof( float ); idx += sizeof( float ) )
{
if( std::isnan( reinterpret_cast<float*>(readImageData.data())[idx] ))
{
std::string tmp = when +
" kernel: " + getShortKernelName( kernel ) +
", EnqueueCtr: " + std::to_string( enqueueCounter ) +
", arg_index: " + std::to_string( arg_index ) +
", data type: CL_FLOAT" +
", has a NaN.\n";
output << tmp;
break;
}
}
}
}
}
}
}
}

///////////////////////////////////////////////////////////////////////////////
//
void CLIntercept::dumpBuffersForKernel(
Expand Down
Loading