diff --git a/test_conformance/select/test_select.cpp b/test_conformance/select/test_select.cpp index b0cda09fd..8a0567c34 100644 --- a/test_conformance/select/test_select.cpp +++ b/test_conformance/select/test_select.cpp @@ -14,11 +14,14 @@ // limitations under the License. // #include "harness/compat.h" +#include "harness/typeWrappers.h" #include #include #include #include +#include + #if ! defined( _WIN32) #if defined(__APPLE__) #include @@ -66,6 +69,16 @@ static void printUsage( void ); #define BUFFER_SIZE (1024*1024) #define KPAGESIZE 4096 +#define test_error_count(errCode, msg) \ + { \ + auto errCodeResult = errCode; \ + if (errCodeResult != CL_SUCCESS) \ + { \ + gFailCount++; \ + print_error(errCodeResult, msg); \ + return errCode; \ + } \ + } // When we indicate non wimpy mode, the types that are 32 bits value will // test their entire range and 64 bits test will test the 32 bit @@ -74,12 +87,6 @@ static void printUsage( void ); static bool s_wimpy_mode = false; static int s_wimpy_reduction_factor = 256; -// Tests are broken into the major test which is based on the -// src and cmp type and their corresponding vector types and -// sub tests which is for each individual test. The following -// tracks the subtests -int s_test_cnt = 0; - //----------------------------------------- // Static helper functions //----------------------------------------- @@ -237,6 +244,9 @@ static cl_program makeSelectProgram(cl_kernel *kernel_ptr, const cl_context cont if (srctype == kdouble) strcpy( extension, "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n" ); + if (srctype == khalf) + strcpy(extension, "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"); + // create type name and testname switch( vec_len ) { @@ -288,25 +298,14 @@ static cl_program makeSelectProgram(cl_kernel *kernel_ptr, const cl_context cont return program; } - #define VECTOR_SIZE_COUNT 6 static int doTest(cl_command_queue queue, cl_context context, Type stype, Type cmptype, cl_device_id device) { int err = CL_SUCCESS; - int s_test_fail = 0; - MTdataHolder d; + MTdataHolder d(gRandomSeed); const size_t element_count[VECTOR_SIZE_COUNT] = { 1, 2, 3, 4, 8, 16 }; - cl_mem src1 = NULL; - cl_mem src2 = NULL; - cl_mem cmp = NULL; - cl_mem dest = NULL; - void *ref = NULL; - void *sref = NULL; - void *src1_host = NULL; - void *src2_host = NULL; - void *cmp_host = NULL; - void *dest_host = NULL; + clMemWrapper src1, src2, cmp, dest; cl_ulong blocks = type_size[stype] * 0x100000000ULL / BUFFER_SIZE; size_t block_elements = BUFFER_SIZE / type_size[stype]; @@ -315,16 +314,22 @@ static int doTest(cl_command_queue queue, cl_context context, Type stype, Type c // It is more efficient to create the tests all at once since we // use the same test data on each of the vector sizes - int vecsize; - cl_program programs[VECTOR_SIZE_COUNT]; - cl_kernel kernels[VECTOR_SIZE_COUNT]; + clProgramWrapper programs[VECTOR_SIZE_COUNT]; + clKernelWrapper kernels[VECTOR_SIZE_COUNT]; - if(stype == kdouble && ! is_extension_available( device, "cl_khr_fp64" )) + if (stype == kdouble && !is_extension_available(device, "cl_khr_fp64")) { log_info("Skipping double because cl_khr_fp64 extension is not supported.\n"); return 0; } + if (stype == khalf && !is_extension_available(device, "cl_khr_fp16")) + { + log_info( + "Skipping half because cl_khr_fp16 extension is not supported.\n"); + return 0; + } + if (gIsEmbedded) { if (( stype == klong || stype == kulong ) && ! is_extension_available( device, "cles_khr_int64" )) @@ -340,54 +345,41 @@ static int doTest(cl_command_queue queue, cl_context context, Type stype, Type c } } - for (vecsize = 0; vecsize < VECTOR_SIZE_COUNT; ++vecsize) - { - programs[vecsize] = makeSelectProgram(&kernels[vecsize], context, stype, cmptype, element_count[vecsize] ); - if (!programs[vecsize] || !kernels[vecsize]) { - ++s_test_fail; - ++s_test_cnt; - return -1; - } - } - - ref = malloc( BUFFER_SIZE ); - if( NULL == ref ){ log_error("Error: could not allocate ref buffer\n" ); goto exit; } - sref = malloc( BUFFER_SIZE ); - if( NULL == sref ){ log_error("Error: could not allocate ref buffer\n" ); goto exit; } src1 = clCreateBuffer( context, CL_MEM_READ_ONLY, BUFFER_SIZE, NULL, &err ); - if( err ) { log_error( "Error: could not allocate src1 buffer\n" ); ++s_test_fail; goto exit; } + test_error_count(err, "Error: could not allocate src1 buffer\n"); src2 = clCreateBuffer( context, CL_MEM_READ_ONLY, BUFFER_SIZE, NULL, &err ); - if( err ) { log_error( "Error: could not allocate src2 buffer\n" ); ++s_test_fail; goto exit; } + test_error_count(err, "Error: could not allocate src2 buffer\n"); cmp = clCreateBuffer( context, CL_MEM_READ_ONLY, BUFFER_SIZE, NULL, &err ); - if( err ) { log_error( "Error: could not allocate cmp buffer\n" ); ++s_test_fail; goto exit; } + test_error_count(err, "Error: could not allocate cmp buffer\n"); dest = clCreateBuffer( context, CL_MEM_WRITE_ONLY, BUFFER_SIZE, NULL, &err ); - if( err ) { log_error( "Error: could not allocate dest buffer\n" ); ++s_test_fail; goto exit; } + test_error_count(err, "Error: could not allocate dest buffer\n"); - src1_host = malloc(BUFFER_SIZE); - if (NULL == src1_host) - { - log_error("Error: could not allocate src1_host buffer\n"); - goto exit; - } - src2_host = malloc(BUFFER_SIZE); - if (NULL == src2_host) - { - log_error("Error: could not allocate src2_host buffer\n"); - goto exit; - } - cmp_host = malloc(BUFFER_SIZE); - if (NULL == cmp_host) + for (int vecsize = 0; vecsize < VECTOR_SIZE_COUNT; ++vecsize) { - log_error("Error: could not allocate cmp_host buffer\n"); - goto exit; - } - dest_host = malloc(BUFFER_SIZE); - if (NULL == dest_host) - { - log_error("Error: could not allocate dest_host buffer\n"); - goto exit; + programs[vecsize] = makeSelectProgram(&kernels[vecsize], context, stype, + cmptype, element_count[vecsize]); + if (!programs[vecsize] || !kernels[vecsize]) + { + return -1; + } + + err = clSetKernelArg(kernels[vecsize], 0, sizeof dest, &dest); + test_error_count(err, "Error: Cannot set kernel arg dest!\n"); + err = clSetKernelArg(kernels[vecsize], 1, sizeof src1, &src1); + test_error_count(err, "Error: Cannot set kernel arg dest!\n"); + err = clSetKernelArg(kernels[vecsize], 2, sizeof src2, &src2); + test_error_count(err, "Error: Cannot set kernel arg dest!\n"); + err = clSetKernelArg(kernels[vecsize], 3, sizeof cmp, &cmp); + test_error_count(err, "Error: Cannot set kernel arg dest!\n"); } + std::vector ref(BUFFER_SIZE); + std::vector sref(BUFFER_SIZE); + std::vector src1_host(BUFFER_SIZE); + std::vector src2_host(BUFFER_SIZE); + std::vector cmp_host(BUFFER_SIZE); + std::vector dest_host(BUFFER_SIZE); + // We block the test as we are running over the range of compare values // "block the test" means "break the test into blocks" if( type_size[stype] == 4 ) @@ -396,111 +388,63 @@ static int doTest(cl_command_queue queue, cl_context context, Type stype, Type c cmp_stride = block_elements * step * (0xffffffffffffffffULL / 0x100000000ULL + 1); log_info("Testing..."); - d = MTdataHolder(gRandomSeed); uint64_t i; + for (i=0; i < blocks; i+=step) { - void *s1 = clEnqueueMapBuffer( queue, src1, CL_TRUE, CL_MAP_WRITE, 0, BUFFER_SIZE, 0, NULL, NULL, &err ); - if( err ){ log_error( "Error: Could not map src1" ); goto exit; } - // Setup the input data to change for each block - initSrcBuffer( s1, stype, d); - - void *s2 = clEnqueueMapBuffer( queue, src2, CL_TRUE, CL_MAP_WRITE, 0, BUFFER_SIZE, 0, NULL, NULL, &err ); - if( err ){ log_error( "Error: Could not map src2" ); goto exit; } - // Setup the input data to change for each block - initSrcBuffer( s2, stype, d); - - void *s3 = clEnqueueMapBuffer( queue, cmp, CL_TRUE, CL_MAP_WRITE, 0, BUFFER_SIZE, 0, NULL, NULL, &err ); - if( err ){ log_error( "Error: Could not map cmp" ); goto exit; } - // Setup the input data to change for each block - initCmpBuffer(s3, cmptype, i * cmp_stride, block_elements); - - if( (err = clEnqueueUnmapMemObject( queue, src1, s1, 0, NULL, NULL ))) - { log_error( "Error: coult not unmap src1\n" ); ++s_test_fail; goto exit; } - if( (err = clEnqueueUnmapMemObject( queue, src2, s2, 0, NULL, NULL ))) - { log_error( "Error: coult not unmap src2\n" ); ++s_test_fail; goto exit; } - if( (err = clEnqueueUnmapMemObject( queue, cmp, s3, 0, NULL, NULL ))) - { log_error( "Error: coult not unmap cmp\n" ); ++s_test_fail; goto exit; } - - // Create the reference result - err = clEnqueueReadBuffer(queue, src1, CL_TRUE, 0, BUFFER_SIZE, - src1_host, 0, NULL, NULL); - if (err) - { - log_error("Error: Reading buffer from src1 to src1_host failed\n"); - ++s_test_fail; - goto exit; - } - err = clEnqueueReadBuffer(queue, src2, CL_TRUE, 0, BUFFER_SIZE, - src2_host, 0, NULL, NULL); - if (err) - { - log_error("Error: Reading buffer from src2 to src2_host failed\n"); - ++s_test_fail; - goto exit; - } - err = clEnqueueReadBuffer(queue, cmp, CL_TRUE, 0, BUFFER_SIZE, cmp_host, - 0, NULL, NULL); - if (err) - { - log_error("Error: Reading buffer from cmp to cmp_host failed\n"); - ++s_test_fail; - goto exit; - } + initSrcBuffer(src1_host.data(), stype, d); + initSrcBuffer(src2_host.data(), stype, d); + initCmpBuffer(cmp_host.data(), cmptype, i * cmp_stride, block_elements); + + err = clEnqueueWriteBuffer(queue, src1, CL_FALSE, 0, BUFFER_SIZE, + src1_host.data(), 0, NULL, NULL); + test_error_count(err, "Error: Could not write src1"); + + err = clEnqueueWriteBuffer(queue, src2, CL_FALSE, 0, BUFFER_SIZE, + src2_host.data(), 0, NULL, NULL); + test_error_count(err, "Error: Could not write src2"); + + err = clEnqueueWriteBuffer(queue, cmp, CL_FALSE, 0, BUFFER_SIZE, + cmp_host.data(), 0, NULL, NULL); + test_error_count(err, "Error: Could not write cmp"); Select sfunc = (cmptype == ctype[stype][0]) ? vrefSelects[stype][0] : vrefSelects[stype][1]; - (*sfunc)(ref, src1_host, src2_host, cmp_host, block_elements); + (*sfunc)(ref.data(), src1_host.data(), src2_host.data(), + cmp_host.data(), block_elements); sfunc = (cmptype == ctype[stype][0]) ? refSelects[stype][0] : refSelects[stype][1]; - (*sfunc)(sref, src1_host, src2_host, cmp_host, block_elements); + (*sfunc)(sref.data(), src1_host.data(), src2_host.data(), + cmp_host.data(), block_elements); - for (vecsize = 0; vecsize < VECTOR_SIZE_COUNT; ++vecsize) + for (int vecsize = 0; vecsize < VECTOR_SIZE_COUNT; ++vecsize) { size_t vector_size = element_count[vecsize] * type_size[stype]; size_t vector_count = (BUFFER_SIZE + vector_size - 1) / vector_size; - if((err = clSetKernelArg(kernels[vecsize], 0, sizeof dest, &dest) )) - { log_error( "Error: Cannot set kernel arg dest! %d\n", err ); ++s_test_fail; goto exit; } - if((err = clSetKernelArg(kernels[vecsize], 1, sizeof src1, &src1) )) - { log_error( "Error: Cannot set kernel arg dest! %d\n", err ); ++s_test_fail; goto exit; } - if((err = clSetKernelArg(kernels[vecsize], 2, sizeof src2, &src2) )) - { log_error( "Error: Cannot set kernel arg dest! %d\n", err ); ++s_test_fail; goto exit; } - if((err = clSetKernelArg(kernels[vecsize], 3, sizeof cmp, &cmp) )) - { log_error( "Error: Cannot set kernel arg dest! %d\n", err ); ++s_test_fail; goto exit; } - - // Wipe destination - void *d = clEnqueueMapBuffer( queue, dest, CL_TRUE, CL_MAP_WRITE, 0, BUFFER_SIZE, 0, NULL, NULL, &err ); - if( err ){ log_error( "Error: Could not map dest" ); ++s_test_fail; goto exit; } - memset( d, -1, BUFFER_SIZE ); - if( (err = clEnqueueUnmapMemObject( queue, dest, d, 0, NULL, NULL ) ) ){ log_error( "Error: Could not unmap dest" ); ++s_test_fail; goto exit; } + const cl_int pattern = -1; + err = clEnqueueFillBuffer(queue, dest, &pattern, sizeof(cl_int), 0, + BUFFER_SIZE, 0, nullptr, nullptr); + test_error_count(err, "clEnqueueFillBuffer failed"); + err = clEnqueueNDRangeKernel(queue, kernels[vecsize], 1, NULL, &vector_count, NULL, 0, NULL, NULL); - if (err != CL_SUCCESS) { - log_error("clEnqueueNDRangeKernel failed errcode:%d\n", err); - ++s_test_fail; - goto exit; - } + test_error_count(err, "clEnqueueNDRangeKernel failed errcode\n"); err = clEnqueueReadBuffer(queue, dest, CL_TRUE, 0, BUFFER_SIZE, - dest_host, 0, NULL, NULL); - if (err) - { - log_error( - "Error: Reading buffer from dest to dest_host failed\n"); - ++s_test_fail; - goto exit; - } + dest_host.data(), 0, NULL, NULL); + test_error_count( + err, "Error: Reading buffer from dest to dest_host failed\n"); - if ((*checkResults[stype])(dest_host, vecsize == 0 ? sref : ref, + if ((*checkResults[stype])(dest_host.data(), + vecsize == 0 ? sref.data() : ref.data(), block_elements, element_count[vecsize]) != 0) { log_error("vec_size:%d indx: 0x%16.16llx\n", (int)element_count[vecsize], i); - ++s_test_fail; - goto exit; + return TEST_FAIL; } } // for vecsize } // for i @@ -510,28 +454,6 @@ static int doTest(cl_command_queue queue, cl_context context, Type stype, Type c else log_info(" Wimpy Passed\n\n"); -exit: - if( src1 ) clReleaseMemObject( src1 ); - if( src2 ) clReleaseMemObject( src2 ); - if( cmp ) clReleaseMemObject( cmp ); - if( dest) clReleaseMemObject( dest ); - if( ref ) free(ref ); - if( sref ) free(sref ); - if (src1_host) free(src1_host); - if (src2_host) free(src2_host); - if (cmp_host) free(cmp_host); - if (dest_host) free(dest_host); - - for (vecsize = 0; vecsize < VECTOR_SIZE_COUNT; vecsize++) { - clReleaseKernel(kernels[vecsize]); - clReleaseProgram(programs[vecsize]); - } - ++s_test_cnt; - if (s_test_fail) - { - err = TEST_FAIL; - gFailCount++; - } return err; } @@ -567,6 +489,16 @@ int test_select_short_short(cl_device_id deviceID, cl_context context, cl_comman { return doTest(queue, context, kshort, kshort, deviceID); } +int test_select_half_ushort(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) +{ + return doTest(queue, context, khalf, kushort, deviceID); +} +int test_select_half_short(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) +{ + return doTest(queue, context, khalf, kshort, deviceID); +} int test_select_uint_uint(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { return doTest(queue, context, kuint, kuint, deviceID); @@ -617,26 +549,17 @@ int test_select_double_long(cl_device_id deviceID, cl_context context, cl_comman } test_definition test_list[] = { - ADD_TEST( select_uchar_uchar ), - ADD_TEST( select_uchar_char ), - ADD_TEST( select_char_uchar ), - ADD_TEST( select_char_char ), - ADD_TEST( select_ushort_ushort ), - ADD_TEST( select_ushort_short ), - ADD_TEST( select_short_ushort ), - ADD_TEST( select_short_short ), - ADD_TEST( select_uint_uint ), - ADD_TEST( select_uint_int ), - ADD_TEST( select_int_uint ), - ADD_TEST( select_int_int ), - ADD_TEST( select_float_uint ), - ADD_TEST( select_float_int ), - ADD_TEST( select_ulong_ulong ), - ADD_TEST( select_ulong_long ), - ADD_TEST( select_long_ulong ), - ADD_TEST( select_long_long ), - ADD_TEST( select_double_ulong ), - ADD_TEST( select_double_long ), + ADD_TEST(select_uchar_uchar), ADD_TEST(select_uchar_char), + ADD_TEST(select_char_uchar), ADD_TEST(select_char_char), + ADD_TEST(select_ushort_ushort), ADD_TEST(select_ushort_short), + ADD_TEST(select_short_ushort), ADD_TEST(select_short_short), + ADD_TEST(select_half_ushort), ADD_TEST(select_half_short), + ADD_TEST(select_uint_uint), ADD_TEST(select_uint_int), + ADD_TEST(select_int_uint), ADD_TEST(select_int_int), + ADD_TEST(select_float_uint), ADD_TEST(select_float_int), + ADD_TEST(select_ulong_ulong), ADD_TEST(select_ulong_long), + ADD_TEST(select_long_ulong), ADD_TEST(select_long_long), + ADD_TEST(select_double_ulong), ADD_TEST(select_double_long), }; const int test_num = ARRAY_SIZE( test_list ); diff --git a/test_conformance/select/test_select.h b/test_conformance/select/test_select.h index c51ae13c2..5cd786022 100644 --- a/test_conformance/select/test_select.h +++ b/test_conformance/select/test_select.h @@ -28,18 +28,20 @@ #endif // Defines the set of types we support (no support for double) -typedef enum { +typedef enum +{ kuchar = 0, kchar = 1, kushort = 2, kshort = 3, - kuint = 4, - kint = 5, - kfloat = 6, - kulong = 7, - klong = 8, - kdouble = 9, - kTypeCount // always goes last + khalf = 4, + kuint = 5, + kint = 6, + kfloat = 7, + kulong = 8, + klong = 9, + kdouble = 10, + kTypeCount // always goes last } Type; @@ -56,7 +58,8 @@ extern const size_t type_size[kTypeCount]; extern const Type ctype[kTypeCount][2]; // Reference functions for the primitive (non vector) type -typedef void (*Select)(void *dest, void *src1, void *src2, void *cmp, size_t c); +typedef void (*Select)(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t c); extern Select refSelects[kTypeCount][2]; // Reference functions for the primtive type but uses the vector @@ -64,7 +67,8 @@ extern Select refSelects[kTypeCount][2]; extern Select vrefSelects[kTypeCount][2]; // Check functions for each output type -typedef size_t (*CheckResults)(void *out1, void *out2, size_t count, size_t vectorSize); +typedef size_t (*CheckResults)(const void *const out1, const void *const out2, + size_t count, size_t vectorSize); extern CheckResults checkResults[kTypeCount]; // Helpful macros diff --git a/test_conformance/select/util_select.cpp b/test_conformance/select/util_select.cpp index f9641e993..b85f54a76 100644 --- a/test_conformance/select/util_select.cpp +++ b/test_conformance/select/util_select.cpp @@ -13,7 +13,6 @@ // See the License for the specific language governing permissions and // limitations under the License. // -#include "harness/compat.h" #include "harness/errorHelpers.h" #include @@ -25,29 +24,28 @@ //----------------------------------------- -const char *type_name[kTypeCount] = { - "uchar", "char", - "ushort", "short", - "uint", "int", - "float", "ulong", "long", "double" }; +const char *type_name[kTypeCount] = { "uchar", "char", "ushort", "short", + "half", "uint", "int", "float", + "ulong", "long", "double" }; const size_t type_size[kTypeCount] = { - sizeof(cl_uchar), sizeof(cl_char), - sizeof(cl_ushort), sizeof(cl_short), - sizeof(cl_uint), sizeof(cl_int), - sizeof(cl_float), sizeof(cl_ulong), sizeof(cl_long), sizeof( cl_double ) }; + sizeof(cl_uchar), sizeof(cl_char), sizeof(cl_ushort), sizeof(cl_short), + sizeof(cl_half), sizeof(cl_uint), sizeof(cl_int), sizeof(cl_float), + sizeof(cl_ulong), sizeof(cl_long), sizeof(cl_double) +}; const Type ctype[kTypeCount][2] = { - { kuchar, kchar }, // uchar - { kuchar, kchar }, // char - { kushort, kshort}, // ushort - { kushort, kshort}, // short - { kuint, kint }, // uint - { kuint, kint }, // int - { kuint, kint }, // float - { kulong, klong }, // ulong - { kulong, klong }, // long - { kulong, klong } // double + { kuchar, kchar }, // uchar + { kuchar, kchar }, // char + { kushort, kshort }, // ushort + { kushort, kshort }, // short + { kushort, kshort }, // half + { kuint, kint }, // uint + { kuint, kint }, // int + { kuint, kint }, // float + { kulong, klong }, // ulong + { kulong, klong }, // long + { kulong, klong } // double }; @@ -55,510 +53,594 @@ const Type ctype[kTypeCount][2] = { // Reference functions //----------------------------------------- -void refselect_1i8(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void refselect_1i8(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ size_t i; - cl_char *d, *x, *y, *m; - d = (cl_char*) dest; - x = (cl_char*) src1; - y = (cl_char*) src2; - m = (cl_char*) cmp; + cl_char *const d = (cl_char *)dest; + const cl_char *const x = (cl_char *)src1; + const cl_char *const y = (cl_char *)src2; + const cl_char *const m = (cl_char *)cmp; for (i=0; i < count; ++i) { d[i] = m[i] ? y[i] : x[i]; } } -void refselect_1u8(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void refselect_1u8(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ size_t i; - cl_uchar *d, *x, *y; - cl_char *m; - d = (cl_uchar*) dest; - x = (cl_uchar*) src1; - y = (cl_uchar*) src2; - m = (cl_char*) cmp; + cl_uchar *const d = (cl_uchar *)dest; + const cl_uchar *const x = (cl_uchar *)src1; + const cl_uchar *const y = (cl_uchar *)src2; + const cl_char *const m = (cl_char *)cmp; for (i=0; i < count; ++i) { d[i] = m[i] ? y[i] : x[i]; } } -void refselect_1i16(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void refselect_1i16(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ size_t i; - cl_short *d, *x, *y, *m; - d = (cl_short*) dest; - x = (cl_short*) src1; - y = (cl_short*) src2; - m = (cl_short*) cmp; + cl_short *const d = (cl_short *)dest; + const cl_short *const x = (cl_short *)src1; + const cl_short *const y = (cl_short *)src2; + const cl_short *const m = (cl_short *)cmp; for (i=0; i < count; ++i) d[i] = m[i] ? y[i] : x[i]; } -void refselect_1u16(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void refselect_1u16(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ size_t i; - cl_ushort *d, *x, *y; - cl_short *m; - d = (cl_ushort*) dest; - x = (cl_ushort*) src1; - y = (cl_ushort*) src2; - m = (cl_short*) cmp; + cl_ushort *const d = (cl_ushort *)dest; + const cl_ushort *const x = (cl_ushort *)src1; + const cl_ushort *const y = (cl_ushort *)src2; + const cl_short *const m = (cl_short *)cmp; for (i=0; i < count; ++i) d[i] = m[i] ? y[i] : x[i]; } -void refselect_1i32(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void refselect_1i32(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ size_t i; - cl_int *d, *x, *y, *m; - d = (cl_int*)dest; - x = (cl_int*)src1; - y = (cl_int*)src2; - m = (cl_int*)cmp; + cl_int *const d = (cl_int *)dest; + const cl_int *const x = (cl_int *)src1; + const cl_int *const y = (cl_int *)src2; + const cl_int *const m = (cl_int *)cmp; for (i=0; i < count; ++i) d[i] = m[i] ? y[i] : x[i]; } -void refselect_1u32(void *dest, void *src1, void *src2, void *cmp, size_t count){ +void refselect_1u32(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ size_t i; - cl_uint *d, *x, *y; - cl_int *m; - d = (cl_uint*)dest; - x = (cl_uint*)src1; - y = (cl_uint*)src2; - m = (cl_int*)cmp; + cl_uint *const d = (cl_uint *)dest; + const cl_uint *const x = (cl_uint *)src1; + const cl_uint *const y = (cl_uint *)src2; + const cl_int *const m = (cl_int *)cmp; for (i=0; i < count; ++i) d[i] = m[i] ? y[i] : x[i]; } -void refselect_1i64(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void refselect_1i64(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ size_t i; - cl_long *d, *x, *y, *m; - d = (cl_long*) dest; - x = (cl_long*) src1; - y = (cl_long*) src2; - m = (cl_long*) cmp; + cl_long *const d = (cl_long *)dest; + const cl_long *const x = (cl_long *)src1; + const cl_long *const y = (cl_long *)src2; + const cl_long *const m = (cl_long *)cmp; for (i=0; i < count; ++i) d[i] = m[i] ? y[i] : x[i]; } -void refselect_1u64(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void refselect_1u64(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ size_t i; - cl_ulong *d, *x, *y; - cl_long *m; - d = (cl_ulong*) dest; - x = (cl_ulong*) src1; - y = (cl_ulong*) src2; - m = (cl_long*) cmp; + cl_ulong *const d = (cl_ulong *)dest; + const cl_ulong *const x = (cl_ulong *)src1; + const cl_ulong *const y = (cl_ulong *)src2; + const cl_long *const m = (cl_long *)cmp; for (i=0; i < count; ++i) d[i] = m[i] ? y[i] : x[i]; } -void refselect_1i8u(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void refselect_1i8u(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ size_t i; - cl_char *d, *x, *y; - cl_uchar *m; - d = (cl_char*) dest; - x = (cl_char*) src1; - y = (cl_char*) src2; - m = (cl_uchar*) cmp; + cl_char *const d = (cl_char *)dest; + const cl_char *const x = (cl_char *)src1; + const cl_char *const y = (cl_char *)src2; + const cl_uchar *const m = (cl_uchar *)cmp; for (i=0; i < count; ++i) d[i] = m[i] ? y[i] : x[i]; } -void refselect_1u8u(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void refselect_1u8u(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ size_t i; - cl_uchar *d, *x, *y, *m; - d = (cl_uchar*) dest; - x = (cl_uchar*) src1; - y = (cl_uchar*) src2; - m = (cl_uchar*) cmp; + cl_uchar *const d = (cl_uchar *)dest; + const cl_uchar *const x = (cl_uchar *)src1; + const cl_uchar *const y = (cl_uchar *)src2; + const cl_uchar *const m = (cl_uchar *)cmp; for (i=0; i < count; ++i) d[i] = m[i] ? y[i] : x[i]; } -void refselect_1i16u(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void refselect_1i16u(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, + size_t count) +{ size_t i; - cl_short *d, *x, *y; - cl_ushort *m; - d = (cl_short*) dest; - x = (cl_short*) src1; - y = (cl_short*) src2; - m = (cl_ushort*) cmp; + cl_short *const d = (cl_short *)dest; + const cl_short *const x = (cl_short *)src1; + const cl_short *const y = (cl_short *)src2; + const cl_ushort *const m = (cl_ushort *)cmp; for (i=0; i < count; ++i) d[i] = m[i] ? y[i] : x[i]; } -void refselect_1u16u(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void refselect_1u16u(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, + size_t count) +{ size_t i; - cl_ushort *d, *x, *y, *m; - d = (cl_ushort*) dest; - x = (cl_ushort*) src1; - y = (cl_ushort*) src2; - m = (cl_ushort*) cmp; + cl_ushort *const d = (cl_ushort *)dest; + const cl_ushort *const x = (cl_ushort *)src1; + const cl_ushort *const y = (cl_ushort *)src2; + const cl_ushort *const m = (cl_ushort *)cmp; for (i=0; i < count; ++i) d[i] = m[i] ? y[i] : x[i]; } -void refselect_1i32u(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void refselect_1i32u(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, + size_t count) +{ size_t i; - cl_int *d, *x, *y; - cl_uint *m; - d = (cl_int*) dest; - x = (cl_int*) src1; - y = (cl_int*) src2; - m = (cl_uint*) cmp; + cl_int *const d = (cl_int *)dest; + const cl_int *const x = (cl_int *)src1; + const cl_int *const y = (cl_int *)src2; + const cl_uint *const m = (cl_uint *)cmp; for (i=0; i < count; ++i) d[i] = m[i] ? y[i] : x[i]; } -void refselect_1u32u(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void refselect_1u32u(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, + size_t count) +{ size_t i; - cl_uint *d, *x, *y, *m; - d = (cl_uint*) dest; - x = (cl_uint*) src1; - y = (cl_uint*) src2; - m = (cl_uint*) cmp; + cl_uint *const d = (cl_uint *)dest; + const cl_uint *const x = (cl_uint *)src1; + const cl_uint *const y = (cl_uint *)src2; + const cl_uint *const m = (cl_uint *)cmp; for (i=0; i < count; ++i) d[i] = m[i] ? y[i] : x[i]; } -void refselect_1i64u(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void refselect_1i64u(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, + size_t count) +{ size_t i; - cl_long *d, *x, *y; - cl_ulong *m; - d = (cl_long*) dest; - x = (cl_long*) src1; - y = (cl_long*) src2; - m = (cl_ulong*) cmp; + cl_long *const d = (cl_long *)dest; + const cl_long *const x = (cl_long *)src1; + const cl_long *const y = (cl_long *)src2; + const cl_ulong *const m = (cl_ulong *)cmp; for (i=0; i < count; ++i) d[i] = m[i] ? y[i] : x[i]; } -void refselect_1u64u(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void refselect_1u64u(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, + size_t count) +{ size_t i; - cl_ulong *d, *x, *y, *m; - d = (cl_ulong*) dest; - x = (cl_ulong*) src1; - y = (cl_ulong*) src2; - m = (cl_ulong*) cmp; + cl_ulong *const d = (cl_ulong *)dest; + const cl_ulong *const x = (cl_ulong *)src1; + const cl_ulong *const y = (cl_ulong *)src2; + const cl_ulong *const m = (cl_ulong *)cmp; for (i=0; i < count; ++i) d[i] = m[i] ? y[i] : x[i]; } -void refselect_ffi(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void refselect_hhi(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ + size_t i; + cl_short *const d = (cl_short *)dest; + const cl_short *const x = (cl_short *)src1; + const cl_short *const y = (cl_short *)src2; + const cl_short *const m = (cl_short *)cmp; + for (i = 0; i < count; ++i) d[i] = m[i] ? y[i] : x[i]; +} + +void refselect_hhu(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ size_t i; - cl_int *d, *x, *y; - cl_int *m; - d = (cl_int*) dest; - x = (cl_int*) src1; - y = (cl_int*) src2; - m = (cl_int*) cmp; + cl_ushort *const d = (cl_ushort *)dest; + const cl_ushort *const x = (cl_ushort *)src1; + const cl_ushort *const y = (cl_ushort *)src2; + const cl_ushort *const m = (cl_ushort *)cmp; + for (i = 0; i < count; ++i) d[i] = m[i] ? y[i] : x[i]; +} + +void refselect_ffi(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ + size_t i; + cl_int *const d = (cl_int *)dest; + const cl_int *const x = (cl_int *)src1; + const cl_int *const y = (cl_int *)src2; + const cl_int *const m = (cl_int *)cmp; for (i=0; i < count; ++i) d[i] = m[i] ? y[i] : x[i]; } -void refselect_ffu(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void refselect_ffu(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ size_t i; - cl_uint *d, *x, *y; - cl_uint *m; - d = (cl_uint*) dest; - x = (cl_uint*) src1; - y = (cl_uint*) src2; - m = (cl_uint*) cmp; + cl_uint *const d = (cl_uint *)dest; + const cl_uint *const x = (cl_uint *)src1; + const cl_uint *const y = (cl_uint *)src2; + const cl_uint *const m = (cl_uint *)cmp; for (i=0; i < count; ++i) d[i] = m[i] ? y[i] : x[i]; } -void refselect_ddi(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void refselect_ddi(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ size_t i; - cl_long *d, *x, *y; - cl_long *m; - d = (cl_long*) dest; - x = (cl_long*) src1; - y = (cl_long*) src2; - m = (cl_long*) cmp; + cl_long *const d = (cl_long *)dest; + const cl_long *const x = (cl_long *)src1; + const cl_long *const y = (cl_long *)src2; + const cl_long *const m = (cl_long *)cmp; for (i=0; i < count; ++i) d[i] = m[i] ? y[i] : x[i]; } -void refselect_ddu(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void refselect_ddu(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ size_t i; - cl_long *d, *x, *y; - cl_ulong *m; - d = (cl_long*) dest; - x = (cl_long*) src1; - y = (cl_long*) src2; - m = (cl_ulong*) cmp; + cl_long *const d = (cl_long *)dest; + const cl_long *const x = (cl_long *)src1; + const cl_long *const y = (cl_long *)src2; + const cl_ulong *const m = (cl_ulong *)cmp; for (i=0; i < count; ++i) d[i] = m[i] ? y[i] : x[i]; } -void vrefselect_1i8(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void vrefselect_1i8(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ size_t i; - cl_char *d, *x, *y, *m; - d = (cl_char*) dest; - x = (cl_char*) src1; - y = (cl_char*) src2; - m = (cl_char*) cmp; + cl_char *const d = (cl_char *)dest; + const cl_char *const x = (cl_char *)src1; + const cl_char *const y = (cl_char *)src2; + const cl_char *const m = (cl_char *)cmp; for (i=0; i < count; ++i) d[i] = (m[i] & 0x80) ? y[i] : x[i]; } -void vrefselect_1u8(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void vrefselect_1u8(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ size_t i; - cl_uchar *d, *x, *y; - cl_char *m; - d = (cl_uchar*) dest; - x = (cl_uchar*) src1; - y = (cl_uchar*) src2; - m = (cl_char*) cmp; + cl_uchar *const d = (cl_uchar *)dest; + const cl_uchar *const x = (cl_uchar *)src1; + const cl_uchar *const y = (cl_uchar *)src2; + const cl_char *const m = (cl_char *)cmp; for (i=0; i < count; ++i) d[i] = (m[i] & 0x80) ? y[i] : x[i]; } -void vrefselect_1i16(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void vrefselect_1i16(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, + size_t count) +{ size_t i; - cl_short *d, *x, *y, *m; - d = (cl_short*) dest; - x = (cl_short*) src1; - y = (cl_short*) src2; - m = (cl_short*) cmp; + cl_short *const d = (cl_short *)dest; + const cl_short *const x = (cl_short *)src1; + const cl_short *const y = (cl_short *)src2; + const cl_short *const m = (cl_short *)cmp; for (i=0; i < count; ++i) d[i] = (m[i] & 0x8000) ? y[i] : x[i]; } -void vrefselect_1u16(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void vrefselect_1u16(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, + size_t count) +{ size_t i; - cl_ushort *d, *x, *y; - cl_short *m; - d = (cl_ushort*) dest; - x = (cl_ushort*)src1; - y = (cl_ushort*)src2; - m = (cl_short*)cmp; + cl_ushort *const d = (cl_ushort *)dest; + const cl_ushort *const x = (cl_ushort *)src1; + const cl_ushort *const y = (cl_ushort *)src2; + const cl_short *const m = (cl_short *)cmp; for (i=0; i < count; ++i) d[i] = (m[i] & 0x8000) ? y[i] : x[i]; } -void vrefselect_1i32(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void vrefselect_1i32(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, + size_t count) +{ size_t i; - cl_int *d, *x, *y, *m; - d = (cl_int*) dest; - x = (cl_int*) src1; - y = (cl_int*) src2; - m = (cl_int*) cmp; + cl_int *const d = (cl_int *)dest; + const cl_int *const x = (cl_int *)src1; + const cl_int *const y = (cl_int *)src2; + const cl_int *const m = (cl_int *)cmp; for (i=0; i < count; ++i) d[i] = (m[i] & 0x80000000) ? y[i] : x[i]; } -void vrefselect_1u32(void *dest, void *src1, void *src2, void *cmp, size_t count){ +void vrefselect_1u32(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, + size_t count) +{ size_t i; - cl_uint *d, *x, *y; - cl_int *m; - d = (cl_uint*) dest; - x = (cl_uint*) src1; - y = (cl_uint*) src2; - m = (cl_int*) cmp; + cl_uint *const d = (cl_uint *)dest; + const cl_uint *const x = (cl_uint *)src1; + const cl_uint *const y = (cl_uint *)src2; + const cl_int *const m = (cl_int *)cmp; for (i=0; i < count; ++i) d[i] = (m[i] & 0x80000000) ? y[i] : x[i]; } -void vrefselect_1i64(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void vrefselect_1i64(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, + size_t count) +{ size_t i; - cl_long *d, *x, *y, *m; - d = (cl_long*) dest; - x = (cl_long*) src1; - y = (cl_long*) src2; - m = (cl_long*) cmp; + cl_long *const d = (cl_long *)dest; + const cl_long *const x = (cl_long *)src1; + const cl_long *const y = (cl_long *)src2; + const cl_long *const m = (cl_long *)cmp; for (i=0; i < count; ++i) d[i] = (m[i] & 0x8000000000000000LL) ? y[i] : x[i]; } -void vrefselect_1u64(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void vrefselect_1u64(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, + size_t count) +{ size_t i; - cl_ulong *d, *x, *y; - cl_long *m; - d = (cl_ulong*) dest; - x = (cl_ulong*) src1; - y = (cl_ulong*) src2; - m = (cl_long*) cmp; + cl_ulong *const d = (cl_ulong *)dest; + const cl_ulong *const x = (cl_ulong *)src1; + const cl_ulong *const y = (cl_ulong *)src2; + const cl_long *const m = (cl_long *)cmp; for (i=0; i < count; ++i) d[i] = (m[i] & 0x8000000000000000LL) ? y[i] : x[i]; } -void vrefselect_1i8u(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void vrefselect_1i8u(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, + size_t count) +{ size_t i; - cl_char *d, *x, *y; - cl_uchar *m; - d = (cl_char*) dest; - x = (cl_char*) src1; - y = (cl_char*) src2; - m = (cl_uchar*) cmp; + cl_char *const d = (cl_char *)dest; + const cl_char *const x = (cl_char *)src1; + const cl_char *const y = (cl_char *)src2; + const cl_uchar *const m = (cl_uchar *)cmp; for (i=0; i < count; ++i) d[i] = (m[i] & 0x80U) ? y[i] : x[i]; } -void vrefselect_1u8u(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void vrefselect_1u8u(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, + size_t count) +{ size_t i; - cl_uchar *d, *x, *y, *m; - d = (cl_uchar*) dest; - x = (cl_uchar*) src1; - y = (cl_uchar*) src2; - m = (cl_uchar*) cmp; + cl_uchar *const d = (cl_uchar *)dest; + const cl_uchar *const x = (cl_uchar *)src1; + const cl_uchar *const y = (cl_uchar *)src2; + const cl_uchar *const m = (cl_uchar *)cmp; for (i=0; i < count; ++i) d[i] = (m[i] & 0x80U) ? y[i] : x[i]; } -void vrefselect_1i16u(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void vrefselect_1i16u(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, + size_t count) +{ size_t i; - cl_short *d, *x, *y; - cl_ushort *m; - d = (cl_short*) dest; - x = (cl_short*) src1; - y = (cl_short*) src2; - m = (cl_ushort*) cmp; + cl_short *const d = (cl_short *)dest; + const cl_short *const x = (cl_short *)src1; + const cl_short *const y = (cl_short *)src2; + const cl_ushort *const m = (cl_ushort *)cmp; for (i=0; i < count; ++i) d[i] = (m[i] & 0x8000U) ? y[i] : x[i]; } -void vrefselect_1u16u(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void vrefselect_1u16u(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, + size_t count) +{ size_t i; - cl_ushort *d, *x, *y, *m; - d = (cl_ushort*) dest; - x = (cl_ushort*) src1; - y = (cl_ushort*) src2; - m = (cl_ushort*) cmp; + cl_ushort *const d = (cl_ushort *)dest; + const cl_ushort *const x = (cl_ushort *)src1; + const cl_ushort *const y = (cl_ushort *)src2; + const cl_ushort *const m = (cl_ushort *)cmp; for (i=0; i < count; ++i) d[i] = (m[i] & 0x8000U) ? y[i] : x[i]; } -void vrefselect_1i32u(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void vrefselect_1i32u(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, + size_t count) +{ size_t i; - cl_int *d, *x, *y; - cl_uint *m; - d = (cl_int*) dest; - x = (cl_int*) src1; - y = (cl_int*) src2; - m = (cl_uint*) cmp; + cl_int *const d = (cl_int *)dest; + const cl_int *const x = (cl_int *)src1; + const cl_int *const y = (cl_int *)src2; + const cl_uint *const m = (cl_uint *)cmp; for (i=0; i < count; ++i) d[i] = (m[i] & 0x80000000U) ? y[i] : x[i]; } -void vrefselect_1u32u(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void vrefselect_1u32u(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, + size_t count) +{ size_t i; - cl_uint *d, *x, *y, *m; - d = (cl_uint*) dest; - x = (cl_uint*) src1; - y = (cl_uint*) src2; - m = (cl_uint*) cmp; + cl_uint *const d = (cl_uint *)dest; + const cl_uint *const x = (cl_uint *)src1; + const cl_uint *const y = (cl_uint *)src2; + const cl_uint *const m = (cl_uint *)cmp; for (i=0; i < count; ++i) d[i] = (m[i] & 0x80000000U) ? y[i] : x[i]; } -void vrefselect_1i64u(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void vrefselect_1i64u(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, + size_t count) +{ size_t i; - cl_long *d, *x, *y; - cl_ulong *m; - d = (cl_long*) dest; - x = (cl_long*) src1; - y = (cl_long*) src2; - m = (cl_ulong*) cmp; + cl_long *const d = (cl_long *)dest; + const cl_long *const x = (cl_long *)src1; + const cl_long *const y = (cl_long *)src2; + const cl_ulong *const m = (cl_ulong *)cmp; for (i=0; i < count; ++i) d[i] = (m[i] & 0x8000000000000000ULL) ? y[i] : x[i]; } -void vrefselect_1u64u(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void vrefselect_1u64u(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, + size_t count) +{ size_t i; - cl_ulong *d, *x, *y, *m; - d = (cl_ulong*) dest; - x = (cl_ulong*) src1; - y = (cl_ulong*) src2; - m = (cl_ulong*) cmp; + cl_ulong *const d = (cl_ulong *)dest; + const cl_ulong *const x = (cl_ulong *)src1; + const cl_ulong *const y = (cl_ulong *)src2; + const cl_ulong *const m = (cl_ulong *)cmp; for (i=0; i < count; ++i) d[i] = (m[i] & 0x8000000000000000ULL) ? y[i] : x[i]; } -void vrefselect_ffi(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void vrefselect_hhi(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ + size_t i; + cl_ushort *const d = (cl_ushort *)dest; + const cl_ushort *const x = (cl_ushort *)src1; + const cl_ushort *const y = (cl_ushort *)src2; + const cl_short *const m = (cl_short *)cmp; + for (i = 0; i < count; ++i) d[i] = (m[i] & 0x8000) ? y[i] : x[i]; +} + +void vrefselect_hhu(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ + size_t i; + cl_ushort *const d = (cl_ushort *)dest; + const cl_ushort *const x = (cl_ushort *)src1; + const cl_ushort *const y = (cl_ushort *)src2; + const cl_ushort *const m = (cl_ushort *)cmp; + for (i = 0; i < count; ++i) d[i] = (m[i] & 0x8000U) ? y[i] : x[i]; +} + +void vrefselect_ffi(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ size_t i; - cl_uint *d, *x, *y; - cl_int *m; - d = (cl_uint*) dest; - x = (cl_uint*) src1; - y = (cl_uint*) src2; - m = (cl_int*) cmp; + cl_uint *const d = (cl_uint *)dest; + const cl_uint *const x = (cl_uint *)src1; + const cl_uint *const y = (cl_uint *)src2; + const cl_int *const m = (cl_int *)cmp; for (i=0; i < count; ++i) d[i] = (m[i] & 0x80000000) ? y[i] : x[i]; } -void vrefselect_ffu(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void vrefselect_ffu(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ size_t i; - cl_uint *d, *x, *y; - cl_uint *m; - d = (cl_uint*) dest; - x = (cl_uint*) src1; - y = (cl_uint*) src2; - m = (cl_uint*) cmp; + cl_uint *const d = (cl_uint *)dest; + const cl_uint *const x = (cl_uint *)src1; + const cl_uint *const y = (cl_uint *)src2; + const cl_uint *const m = (cl_uint *)cmp; for (i=0; i < count; ++i) d[i] = (m[i] & 0x80000000U) ? y[i] : x[i]; } -void vrefselect_ddi(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void vrefselect_ddi(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ size_t i; - cl_ulong *d, *x, *y; - cl_long *m; - d = (cl_ulong*) dest; - x = (cl_ulong*) src1; - y = (cl_ulong*) src2; - m = (cl_long*) cmp; + cl_ulong *const d = (cl_ulong *)dest; + const cl_ulong *const x = (cl_ulong *)src1; + const cl_ulong *const y = (cl_ulong *)src2; + const cl_long *const m = (cl_long *)cmp; for (i=0; i < count; ++i) d[i] = (m[i] & 0x8000000000000000LL) ? y[i] : x[i]; } -void vrefselect_ddu(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void vrefselect_ddu(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ size_t i; - cl_ulong *d, *x, *y; - cl_ulong *m; - d = (cl_ulong*) dest; - x = (cl_ulong*) src1; - y = (cl_ulong*) src2; - m = (cl_ulong*) cmp; + cl_ulong *const d = (cl_ulong *)dest; + const cl_ulong *const x = (cl_ulong *)src1; + const cl_ulong *const y = (cl_ulong *)src2; + const cl_ulong *const m = (cl_ulong *)cmp; for (i=0; i < count; ++i) d[i] = (m[i] & 0x8000000000000000ULL) ? y[i] : x[i]; } // Define refSelects -Select refSelects[kTypeCount][2] = { - { refselect_1u8u, refselect_1u8 }, // cl_uchar - { refselect_1i8u, refselect_1i8 }, // char +Select refSelects[kTypeCount][2] = { + { refselect_1u8u, refselect_1u8 }, // cl_uchar + { refselect_1i8u, refselect_1i8 }, // char { refselect_1u16u, refselect_1u16 }, // ushort { refselect_1i16u, refselect_1i16 }, // short + { refselect_hhu, refselect_hhi }, // half { refselect_1u32u, refselect_1u32 }, // uint { refselect_1i32u, refselect_1i32 }, // int - { refselect_ffu, refselect_ffi }, // float + { refselect_ffu, refselect_ffi }, // float { refselect_1u64u, refselect_1u64 }, // ulong { refselect_1i64u, refselect_1i64 }, // long - { refselect_ddu, refselect_ddi } // double + { refselect_ddu, refselect_ddi } // double }; // Define vrefSelects (vector refSelects) -Select vrefSelects[kTypeCount][2] = { - { vrefselect_1u8u, vrefselect_1u8 }, // cl_uchar - { vrefselect_1i8u, vrefselect_1i8 }, // char +Select vrefSelects[kTypeCount][2] = { + { vrefselect_1u8u, vrefselect_1u8 }, // cl_uchar + { vrefselect_1i8u, vrefselect_1i8 }, // char { vrefselect_1u16u, vrefselect_1u16 }, // ushort { vrefselect_1i16u, vrefselect_1i16 }, // short + { vrefselect_hhu, vrefselect_hhi }, // half { vrefselect_1u32u, vrefselect_1u32 }, // uint { vrefselect_1i32u, vrefselect_1i32 }, // int - { vrefselect_ffu, vrefselect_ffi }, // float + { vrefselect_ffu, vrefselect_ffi }, // float { vrefselect_1u64u, vrefselect_1u64 }, // ulong { vrefselect_1i64u, vrefselect_1i64 }, // long - { vrefselect_ddu, vrefselect_ddi } // double + { vrefselect_ddu, vrefselect_ddi } // double }; //----------------------------------------- // Check functions //----------------------------------------- -size_t check_uchar(void *test, void *correct, size_t count, size_t vector_size) { - const cl_uchar *t = (const cl_uchar *) test; - const cl_uchar *c = (const cl_uchar *) correct; +size_t check_uchar(const void *const test, const void *const correct, + size_t count, size_t vector_size) +{ + const cl_uchar *const t = (const cl_uchar *)test; + const cl_uchar *const c = (const cl_uchar *)correct; size_t i; if (memcmp(t, c, count * sizeof(c[0])) != 0) @@ -576,9 +658,11 @@ size_t check_uchar(void *test, void *correct, size_t count, size_t vector_size) return 0; } -size_t check_char(void *test, void *correct, size_t count, size_t vector_size) { - const cl_char *t = (const cl_char *) test; - const cl_char *c = (const cl_char *) correct; +size_t check_char(const void *const test, const void *const correct, + size_t count, size_t vector_size) +{ + const cl_char *const t = (const cl_char *)test; + const cl_char *const c = (const cl_char *)correct; size_t i; if (memcmp(t, c, count * sizeof(c[0])) != 0) @@ -597,9 +681,11 @@ size_t check_char(void *test, void *correct, size_t count, size_t vector_size) { return 0; } -size_t check_ushort(void *test, void *correct, size_t count, size_t vector_size) { - const cl_ushort *t = (const cl_ushort *) test; - const cl_ushort *c = (const cl_ushort *) correct; +size_t check_ushort(const void *const test, const void *const correct, + size_t count, size_t vector_size) +{ + const cl_ushort *const t = (const cl_ushort *)test; + const cl_ushort *const c = (const cl_ushort *)correct; size_t i; if (memcmp(t, c, count * sizeof(c[0])) != 0) @@ -618,9 +704,11 @@ size_t check_ushort(void *test, void *correct, size_t count, size_t vector_size) return 0; } -size_t check_short(void *test, void *correct, size_t count, size_t vector_size) { - const cl_short *t = (const cl_short *) test; - const cl_short *c = (const cl_short *) correct; +size_t check_short(const void *const test, const void *const correct, + size_t count, size_t vector_size) +{ + const cl_short *const t = (const cl_short *)test; + const cl_short *const c = (const cl_short *)correct; size_t i; if (memcmp(t, c, count * sizeof(c[0])) != 0) @@ -639,9 +727,11 @@ size_t check_short(void *test, void *correct, size_t count, size_t vector_size) return 0; } -size_t check_uint(void *test, void *correct, size_t count, size_t vector_size) { - const cl_uint *t = (const cl_uint *) test; - const cl_uint *c = (const cl_uint *) correct; +size_t check_uint(const void *const test, const void *const correct, + size_t count, size_t vector_size) +{ + const cl_uint *const t = (const cl_uint *)test; + const cl_uint *const c = (const cl_uint *)correct; size_t i; if (memcmp(t, c, count * sizeof(c[0])) != 0) @@ -660,9 +750,11 @@ size_t check_uint(void *test, void *correct, size_t count, size_t vector_size) { return 0; } -size_t check_int(void *test, void *correct, size_t count, size_t vector_size) { - const cl_int *t = (const cl_int *) test; - const cl_int *c = (const cl_int *) correct; +size_t check_int(const void *const test, const void *const correct, + size_t count, size_t vector_size) +{ + const cl_int *const t = (const cl_int *)test; + const cl_int *const c = (const cl_int *)correct; size_t i; if (memcmp(t, c, count * sizeof(c[0])) != 0) @@ -682,9 +774,11 @@ size_t check_int(void *test, void *correct, size_t count, size_t vector_size) { return 0; } -size_t check_ulong(void *test, void *correct, size_t count, size_t vector_size) { - const cl_ulong *t = (const cl_ulong *) test; - const cl_ulong *c = (const cl_ulong *) correct; +size_t check_ulong(const void *const test, const void *const correct, + size_t count, size_t vector_size) +{ + const cl_ulong *const t = (const cl_ulong *)test; + const cl_ulong *const c = (const cl_ulong *)correct; size_t i; if (memcmp(t, c, count * sizeof(c[0])) != 0) @@ -703,9 +797,11 @@ size_t check_ulong(void *test, void *correct, size_t count, size_t vector_size) return 0; } -size_t check_long(void *test, void *correct, size_t count, size_t vector_size) { - const cl_long *t = (const cl_long *) test; - const cl_long *c = (const cl_long *) correct; +size_t check_long(const void *const test, const void *const correct, + size_t count, size_t vector_size) +{ + const cl_long *const t = (const cl_long *)test; + const cl_long *const c = (const cl_long *)correct; size_t i; if (memcmp(t, c, count * sizeof(c[0])) != 0) @@ -724,9 +820,36 @@ size_t check_long(void *test, void *correct, size_t count, size_t vector_size) { return 0; } -size_t check_float( void *test, void *correct, size_t count, size_t vector_size ) { - const cl_uint *t = (const cl_uint *) test; - const cl_uint *c = (const cl_uint *) correct; +size_t check_half(const void *const test, const void *const correct, + size_t count, size_t vector_size) +{ + const cl_ushort *const t = (const cl_ushort *)test; + const cl_ushort *const c = (const cl_ushort *)correct; + size_t i; + + if (memcmp(t, c, count * sizeof(c[0])) != 0) + { + for (i = 0; i < count; i++) /* Allow nans to be binary different */ + if ((t[i] != c[i]) + && !(isnan(((cl_half *)correct)[i]) + && isnan(((cl_half *)test)[i]))) + { + log_error("\n(check_half) Error for vector size %ld found at " + "0x%8.8lx (of 0x%8.8lx): " + "*0x%4.4x vs 0x%4.4x\n", + vector_size, i, count, c[i], t[i]); + return i + 1; + } + } + + return 0; +} + +size_t check_float(const void *const test, const void *const correct, + size_t count, size_t vector_size) +{ + const cl_uint *const t = (const cl_uint *)test; + const cl_uint *const c = (const cl_uint *)correct; size_t i; if (memcmp(t, c, count * sizeof(c[0])) != 0) @@ -746,9 +869,11 @@ size_t check_float( void *test, void *correct, size_t count, size_t vector_size return 0; } -size_t check_double( void *test, void *correct, size_t count, size_t vector_size ) { - const cl_ulong *t = (const cl_ulong *) test; - const cl_ulong *c = (const cl_ulong *) correct; +size_t check_double(const void *const test, const void *const correct, + size_t count, size_t vector_size) +{ + const cl_ulong *const t = (const cl_ulong *)test; + const cl_ulong *const c = (const cl_ulong *)correct; size_t i; if (memcmp(t, c, count * sizeof(c[0])) != 0) @@ -770,5 +895,7 @@ size_t check_double( void *test, void *correct, size_t count, size_t vector_size } CheckResults checkResults[kTypeCount] = { - check_uchar, check_char, check_ushort, check_short, check_uint, - check_int, check_float, check_ulong, check_long, check_double }; + check_uchar, check_char, check_ushort, check_short, + check_half, check_uint, check_int, check_float, + check_ulong, check_long, check_double +};