Skip to content

Commit

Permalink
Merge branch 'main' into fp16_basic_fpmath
Browse files Browse the repository at this point in the history
  • Loading branch information
shajder committed Jun 12, 2023
2 parents 3c266f4 + 1011f8e commit 5dae748
Show file tree
Hide file tree
Showing 6 changed files with 48 additions and 40 deletions.
1 change: 0 additions & 1 deletion presubmit.sh
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,6 @@ cmake .. -G Ninja \
-DBUILD_WSI_XLIB_SUPPORT=OFF \
-DBUILD_WSI_XCB_SUPPORT=OFF \
-DBUILD_WSI_WAYLAND_SUPPORT=OFF \
-DUSE_GAS=OFF \
-C helper.cmake ..
cmake --build . -j2

Expand Down
69 changes: 39 additions & 30 deletions test_conformance/basic/test_async_copy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,7 @@
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>


#include <vector>

#include "procs.h"
#include "harness/conversions.h"
Expand Down Expand Up @@ -86,8 +85,7 @@ int test_copy(cl_device_id deviceID, cl_context context, cl_command_queue queue,
clKernelWrapper kernel;
clMemWrapper streams[ 2 ];
size_t threads[ 1 ], localThreads[ 1 ];
void *inBuffer, *outBuffer;
MTdata d;
MTdataHolder d(gRandomSeed);
char vecNameString[64]; vecNameString[0] = 0;
if (vecSize == 1)
sprintf(vecNameString, "%s", get_explicit_type_name(vecType));
Expand All @@ -109,9 +107,15 @@ int test_copy(cl_device_id deviceID, cl_context context, cl_command_queue queue,
char programSource[4096]; programSource[0]=0;
char *programPtr;

sprintf(programSource, kernelCode,
vecType == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
vecNameString, vecNameString, vecNameString, vecNameString, get_explicit_type_name(vecType), vecNameString, vecNameString);
std::string extStr = "";
if (vecType == kDouble)
extStr = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable";
else if (vecType == kHalf)
extStr = "#pragma OPENCL EXTENSION cl_khr_fp16 : enable";

sprintf(programSource, kernelCode, extStr.c_str(), vecNameString,
vecNameString, vecNameString, vecNameString,
get_explicit_type_name(vecType), vecNameString, vecNameString);
//log_info("program: %s\n", programSource);
programPtr = programSource;

Expand Down Expand Up @@ -150,9 +154,10 @@ int test_copy(cl_device_id deviceID, cl_context context, cl_command_queue queue,
size_t globalBufferSize = numberOfLocalWorkgroups*localBufferSize;
size_t globalWorkgroupSize = numberOfLocalWorkgroups*localWorkgroupSize;

inBuffer = (void*)malloc(globalBufferSize);
outBuffer = (void*)malloc(globalBufferSize);
memset(outBuffer, 0, globalBufferSize);
std::vector<unsigned char> inBuffer(globalBufferSize);
std::vector<unsigned char> outBuffer(globalBufferSize);

outBuffer.assign(globalBufferSize, 0);

cl_int copiesPerWorkItemInt, copiesPerWorkgroup;
copiesPerWorkItemInt = (int)numberOfCopiesPerWorkitem;
Expand All @@ -164,13 +169,15 @@ int test_copy(cl_device_id deviceID, cl_context context, cl_command_queue queue,
threads[0] = globalWorkgroupSize;
localThreads[0] = localWorkgroupSize;

d = init_genrand( gRandomSeed );
generate_random_data( vecType, globalBufferSize/get_explicit_type_size(vecType), d, inBuffer );
free_mtdata(d); d = NULL;
generate_random_data(vecType,
globalBufferSize / get_explicit_type_size(vecType), d,
&inBuffer.front());

streams[ 0 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, globalBufferSize, inBuffer, &error );
streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, globalBufferSize,
&inBuffer.front(), &error);
test_error( error, "Unable to create input buffer" );
streams[ 1 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, globalBufferSize, outBuffer, &error );
streams[1] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, globalBufferSize,
&outBuffer.front(), &error);
test_error( error, "Unable to create output buffer" );

error = clSetKernelArg( kernel, 0, sizeof( streams[ 0 ] ), &streams[ 0 ] );
Expand All @@ -189,16 +196,18 @@ int test_copy(cl_device_id deviceID, cl_context context, cl_command_queue queue,
test_error( error, "Unable to queue kernel" );

// Read
error = clEnqueueReadBuffer( queue, streams[ 1 ], CL_TRUE, 0, globalBufferSize, outBuffer, 0, NULL, NULL );
error = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, globalBufferSize,
&outBuffer.front(), 0, NULL, NULL);
test_error( error, "Unable to read results" );

// Verify
int failuresPrinted = 0;
if( memcmp( inBuffer, outBuffer, globalBufferSize ) != 0 )
if (memcmp(&inBuffer.front(), &outBuffer.front(), globalBufferSize) != 0)
{
size_t typeSize = get_explicit_type_size(vecType)* vecSize;
unsigned char * inchar = (unsigned char*)inBuffer;
unsigned char * outchar = (unsigned char*)outBuffer;
unsigned char *inchar = static_cast<unsigned char *>(&inBuffer.front());
unsigned char *outchar =
static_cast<unsigned char *>(&outBuffer.front());
for (int i=0; i< (int)globalBufferSize; i+=(int)elementSize) {
if (memcmp( ((char *)inchar)+i, ((char *)outchar)+i, typeSize) != 0 )
{
Expand Down Expand Up @@ -226,26 +235,29 @@ int test_copy(cl_device_id deviceID, cl_context context, cl_command_queue queue,
}
}

free(inBuffer);
free(outBuffer);

return failuresPrinted ? -1 : 0;
}

int test_copy_all_types(cl_device_id deviceID, cl_context context, cl_command_queue queue, const char *kernelCode) {
ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble, kNumExplicitTypes };
const std::vector<ExplicitType> vecType = { kChar, kUChar, kShort, kUShort,
kInt, kUInt, kLong, kULong,
kFloat, kHalf, kDouble };
unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 };
unsigned int size, typeIndex;

int errors = 0;

for( typeIndex = 0; vecType[ typeIndex ] != kNumExplicitTypes; typeIndex++ )
{
if( vecType[ typeIndex ] == kDouble && !is_extension_available( deviceID, "cl_khr_fp64" ) )
continue;
bool fp16Support = is_extension_available(deviceID, "cl_khr_fp16");
bool fp64Support = is_extension_available(deviceID, "cl_khr_fp64");

for (typeIndex = 0; typeIndex < vecType.size(); typeIndex++)
{
if (( vecType[ typeIndex ] == kLong || vecType[ typeIndex ] == kULong ) && !gHasLong )
continue;
else if (vecType[typeIndex] == kDouble && !fp64Support)
continue;
else if (vecType[typeIndex] == kHalf && !fp16Support)
continue;

for( size = 0; vecSizes[ size ] != 0; size++ )
{
Expand All @@ -259,9 +271,6 @@ int test_copy_all_types(cl_device_id deviceID, cl_context context, cl_command_qu
return 0;
}




int test_async_copy_global_to_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
return test_copy_all_types( deviceID, context, queue, async_global_to_local_kernel );
Expand Down
4 changes: 2 additions & 2 deletions test_conformance/basic/test_async_strided_copy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -211,9 +211,9 @@ int test_strided_copy(cl_device_id deviceID, cl_context context, cl_command_queu
if (memcmp(&inBuffer.at(i), &outBuffer.at(i), typeSize) != 0)
{
unsigned char *inchar =
static_cast<unsigned char *>(inBuffer.data());
static_cast<unsigned char *>(&inBuffer.at(i));
unsigned char *outchar =
static_cast<unsigned char *>(outBuffer.data());
static_cast<unsigned char *>(&outBuffer.at(i));
char values[4096];
values[0] = 0;

Expand Down
2 changes: 0 additions & 2 deletions test_conformance/non_uniform_work_group/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,6 @@ set(${MODULE_NAME}_SOURCES
tools.cpp
)

set_gnulike_module_compile_flags("-Wno-unused-but-set-variable")

include(../CMakeCommon.txt)

# end of file #
Original file line number Diff line number Diff line change
Expand Up @@ -448,13 +448,8 @@ void TestNonUniformWorkGroup::verifyData (DataContainerAttrib * reference, DataC
}

void TestNonUniformWorkGroup::calculateExpectedValues () {
size_t nonRemainderGlobalSize[MAX_DIMS];
size_t numberOfPossibleRegions[MAX_DIMS];

nonRemainderGlobalSize[0] = _globalSize[0] - (_globalSize[0] % _enqueuedLocalSize[0]);
nonRemainderGlobalSize[1] = _globalSize[1] - (_globalSize[1] % _enqueuedLocalSize[1]);
nonRemainderGlobalSize[2] = _globalSize[2] - (_globalSize[2] % _enqueuedLocalSize[2]);

numberOfPossibleRegions[0] = (_globalSize[0]>1)?2:1;
numberOfPossibleRegions[1] = (_globalSize[1]>1)?2:1;
numberOfPossibleRegions[2] = (_globalSize[2]>1)?2:1;
Expand Down Expand Up @@ -502,6 +497,11 @@ size_t TestNonUniformWorkGroup::getMaxLocalWorkgroupSize (const cl_device_id &de
if (TestNonUniformWorkGroup::_maxLocalWorkgroupSize == 0) {
err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE,
sizeof(TestNonUniformWorkGroup::_maxLocalWorkgroupSize), &TestNonUniformWorkGroup::_maxLocalWorkgroupSize, NULL);
if (err)
{
log_error("clGetDeviceInfo failed\n");
return 0;
}
}

return TestNonUniformWorkGroup::_maxLocalWorkgroupSize;
Expand Down
2 changes: 2 additions & 0 deletions test_conformance/relationals/test_comparisons_fp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
// limitations under the License.
//

#include <cstdint>
#include <functional>
#include <iostream>
#include <map>
#include <memory>
Expand Down

0 comments on commit 5dae748

Please sign in to comment.