diff --git a/test_conformance/images/kernel_read_write/test_common.cpp b/test_conformance/images/kernel_read_write/test_common.cpp index a22db19584..9b2bfe9246 100644 --- a/test_conformance/images/kernel_read_write/test_common.cpp +++ b/test_conformance/images/kernel_read_write/test_common.cpp @@ -35,21 +35,29 @@ cl_sampler create_sampler(cl_context context, image_sampler_data *sdata, bool te } bool get_image_dimensions(image_descriptor *imageInfo, size_t &width, - size_t &height, size_t &depth) + size_t &height, size_t &depth, int &num_dims) { width = imageInfo->width; height = 1; depth = 1; switch (imageInfo->type) { - case CL_MEM_OBJECT_IMAGE1D: break; - case CL_MEM_OBJECT_IMAGE1D_ARRAY: height = imageInfo->arraySize; break; - case CL_MEM_OBJECT_IMAGE2D: height = imageInfo->height; break; + case CL_MEM_OBJECT_IMAGE1D: num_dims = 1; break; + case CL_MEM_OBJECT_IMAGE1D_ARRAY: + num_dims = 2; + height = imageInfo->arraySize; + break; + case CL_MEM_OBJECT_IMAGE2D: + num_dims = 2; + height = imageInfo->height; + break; case CL_MEM_OBJECT_IMAGE2D_ARRAY: + num_dims = 3; height = imageInfo->height; depth = imageInfo->arraySize; break; case CL_MEM_OBJECT_IMAGE3D: + num_dims = 3; height = imageInfo->height; depth = imageInfo->depth; break; @@ -60,6 +68,13 @@ bool get_image_dimensions(image_descriptor *imageInfo, size_t &width, return 0; } +bool get_image_dimensions(image_descriptor *imageInfo, size_t &width, + size_t &height, size_t &depth) +{ + int ignoreMe; + return get_image_dimensions(imageInfo, width, height, depth, ignoreMe); +} + static bool InitFloatCoordsCommon(image_descriptor *imageInfo, image_sampler_data *imageSampler, float *xOffsets, float *yOffsets, @@ -210,6 +225,22 @@ cl_mem create_image_of_type(cl_context context, cl_mem_flags mem_flags, cl_mem image; switch (imageInfo->type) { + case CL_MEM_OBJECT_IMAGE1D: + image = create_image_1d(context, mem_flags, imageInfo->format, + imageInfo->width, row_pitch, host_ptr, NULL, + error); + break; + case CL_MEM_OBJECT_IMAGE1D_ARRAY: + image = create_image_1d_array( + context, mem_flags, imageInfo->format, imageInfo->width, + imageInfo->arraySize, row_pitch, slice_pitch, host_ptr, error); + break; + case CL_MEM_OBJECT_IMAGE2D_ARRAY: + image = create_image_2d_array(context, mem_flags, imageInfo->format, + imageInfo->width, imageInfo->height, + imageInfo->arraySize, row_pitch, + slice_pitch, host_ptr, error); + break; case CL_MEM_OBJECT_IMAGE3D: image = create_image_3d(context, mem_flags, imageInfo->format, imageInfo->width, imageInfo->height, @@ -231,10 +262,17 @@ static size_t get_image_num_pixels(image_descriptor *imageInfo, size_t width, size_t image_size; switch (imageInfo->type) { + case CL_MEM_OBJECT_IMAGE1D: image_size = width; break; + case CL_MEM_OBJECT_IMAGE1D_ARRAY: + image_size = width * array_size; + break; + case CL_MEM_OBJECT_IMAGE2D_ARRAY: + image_size = width * height * array_size; + break; case CL_MEM_OBJECT_IMAGE3D: image_size = width * height * depth; break; default: - log_error("Implementation is incomplete, only 3D images are " - "supported so far"); + log_error("Implementation is incomplete, 2D images are " + "not yet supported here"); return 0; } return image_size; @@ -245,16 +283,20 @@ int test_read_image(cl_context context, cl_command_queue queue, image_sampler_data *imageSampler, bool useFloatCoords, ExplicitType outputType, MTdata d) { + bool image_type_3D = ((imageInfo->type == CL_MEM_OBJECT_IMAGE2D_ARRAY) + || (imageInfo->type == CL_MEM_OBJECT_IMAGE3D)); + int error; - size_t threads[3]; static int initHalf = 0; + int num_dimensions; size_t image_size = get_image_num_pixels(imageInfo, imageInfo->width, imageInfo->height, imageInfo->depth, imageInfo->arraySize); test_assert_error(0 != image_size, "Invalid image size"); size_t width_size, height_size, depth_size; - if (get_image_dimensions(imageInfo, width_size, height_size, depth_size)) + if (get_image_dimensions(imageInfo, width_size, height_size, depth_size, + num_dimensions)) { log_error("ERROR: invalid image dimensions"); return CL_INVALID_VALUE; @@ -433,10 +475,9 @@ int test_read_image(cl_context context, cl_command_queue queue, else { int nextLevelOffset = 0; - for (int i = 0; i < imageInfo->num_mip_levels; i++) { - origin[3] = i; + origin[num_dimensions] = i; error = clEnqueueWriteImage( queue, image, CL_TRUE, origin, region, 0, 0, ((char *)imageValues + nextLevelOffset), 0, NULL, NULL); @@ -452,9 +493,16 @@ int test_read_image(cl_context context, cl_command_queue queue, nextLevelOffset += region[0] * region[1] * region[2] * get_pixel_size(imageInfo->format); // Subsequent mip level dimensions keep halving + // Regions for unnecessary dimensions are already 1. region[0] = region[0] >> 1 ? region[0] >> 1 : 1; - region[1] = region[1] >> 1 ? region[1] >> 1 : 1; - region[2] = region[2] >> 1 ? region[2] >> 1 : 1; + if (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_ARRAY) + { + region[1] = region[1] >> 1 ? region[1] >> 1 : 1; + } + if (imageInfo->type != CL_MEM_OBJECT_IMAGE2D_ARRAY) + { + region[2] = region[2] >> 1 ? region[2] >> 1 : 1; + } } } } @@ -463,14 +511,20 @@ int test_read_image(cl_context context, cl_command_queue queue, clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, sizeof(cl_float) * image_size, xOffsetValues, &error); test_error(error, "Unable to create x offset buffer"); - yOffsets = - clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, - sizeof(cl_float) * image_size, yOffsetValues, &error); - test_error(error, "Unable to create y offset buffer"); - zOffsets = - clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, - sizeof(cl_float) * image_size, zOffsetValues, &error); - test_error(error, "Unable to create y offset buffer"); + if (num_dimensions > 1) + { + yOffsets = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, + sizeof(cl_float) * image_size, yOffsetValues, + &error); + test_error(error, "Unable to create y offset buffer"); + } + if (num_dimensions > 2) + { + zOffsets = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, + sizeof(cl_float) * image_size, zOffsetValues, + &error); + test_error(error, "Unable to create z offset buffer"); + } results = clCreateBuffer( context, CL_MEM_READ_WRITE, get_explicit_type_size(outputType) * 4 * image_size, NULL, &error); @@ -492,10 +546,16 @@ int test_read_image(cl_context context, cl_command_queue queue, } error = clSetKernelArg(kernel, idx++, sizeof(cl_mem), &xOffsets); test_error(error, "Unable to set kernel arguments"); - error = clSetKernelArg(kernel, idx++, sizeof(cl_mem), &yOffsets); - test_error(error, "Unable to set kernel arguments"); - error = clSetKernelArg(kernel, idx++, sizeof(cl_mem), &zOffsets); - test_error(error, "Unable to set kernel arguments"); + if (num_dimensions > 1) + { + error = clSetKernelArg(kernel, idx++, sizeof(cl_mem), &yOffsets); + test_error(error, "Unable to set kernel arguments"); + } + if (num_dimensions > 2) + { + error = clSetKernelArg(kernel, idx++, sizeof(cl_mem), &zOffsets); + test_error(error, "Unable to set kernel arguments"); + } error = clSetKernelArg(kernel, idx++, sizeof(cl_mem), &results); test_error(error, "Unable to set kernel arguments"); @@ -576,14 +636,20 @@ int test_read_image(cl_context context, cl_command_queue queue, sizeof(cl_float) * image_size, xOffsetValues, 0, NULL, NULL); test_error(error, "Unable to write x offsets"); - error = clEnqueueWriteBuffer(queue, yOffsets, CL_TRUE, 0, - sizeof(cl_float) * image_size, - yOffsetValues, 0, NULL, NULL); - test_error(error, "Unable to write y offsets"); - error = clEnqueueWriteBuffer(queue, zOffsets, CL_TRUE, 0, - sizeof(cl_float) * image_size, - zOffsetValues, 0, NULL, NULL); - test_error(error, "Unable to write z offsets"); + if (num_dimensions > 1) + { + error = clEnqueueWriteBuffer(queue, yOffsets, CL_TRUE, 0, + sizeof(cl_float) * image_size, + yOffsetValues, 0, NULL, NULL); + test_error(error, "Unable to write y offsets"); + } + if (num_dimensions > 2) + { + error = clEnqueueWriteBuffer(queue, zOffsets, CL_TRUE, 0, + sizeof(cl_float) * image_size, + zOffsetValues, 0, NULL, NULL); + test_error(error, "Unable to write z offsets"); + } memset(resultValues, 0xff, resultValuesSize); @@ -591,13 +657,12 @@ int test_read_image(cl_context context, cl_command_queue queue, resultValues, 0, NULL, NULL); // Figure out thread dimensions - threads[0] = (size_t)width_lod; - threads[1] = (size_t)height_lod; - threads[2] = (size_t)depth_lod; + size_t threads[] = { (size_t)width_lod, (size_t)height_lod, + (size_t)depth_lod }; // Run the kernel - error = clEnqueueNDRangeKernel(queue, kernel, 3, NULL, threads, - NULL, 0, NULL, NULL); + error = clEnqueueNDRangeKernel(queue, kernel, num_dimensions, NULL, + threads, NULL, 0, NULL, NULL); test_error(error, "Unable to run kernel"); // Get results @@ -610,17 +675,15 @@ int test_read_image(cl_context context, cl_command_queue queue, // Validate results element by element char *imagePtr = (char *)imageValues + nextLevelOffset; - /* - * FLOAT output type - */ - if (is_sRGBA_order(imageInfo->format->image_channel_order) + if (((imageInfo->type == CL_MEM_OBJECT_IMAGE2D_ARRAY) + && (imageInfo->format->image_channel_order == CL_DEPTH)) && (outputType == kFloat)) { // Validate float results float *resultPtr = (float *)(char *)resultValues; float expected[4], error = 0.0f; float maxErr = get_max_relative_error( - imageInfo->format, imageSampler, 1 /*3D*/, + imageInfo->format, imageSampler, image_type_3D, CL_FILTER_LINEAR == imageSampler->filter_mode); for (size_t z = 0, j = 0; z < depth_lod; z++) @@ -676,6 +739,263 @@ int test_read_image(cl_context context, cl_command_queue queue, imageSampler, expected, 0, &hasDenormals, lod); + float err1 = ABS_ERROR(resultPtr[0], + expected[0]); + // Clamp to the minimum absolute error + // for the format + if (err1 > 0 + && err1 < formatAbsoluteError) + { + err1 = 0.0f; + } + float maxErr1 = std::max( + maxErr * maxPixel.p[0], FLT_MIN); + + if (!(err1 <= maxErr1)) + { + // Try flushing the denormals + if (hasDenormals) + { + // If implementation decide to + // flush subnormals to zero, max + // error needs to be adjusted + maxErr1 += 4 * FLT_MIN; + + maxPixel = + sample_image_pixel_float_offset( + imagePtr, imageInfo, + xOffsetValues[j], + yOffsetValues[j], + zOffsetValues[j], + norm_offset_x, + norm_offset_y, + norm_offset_z, + imageSampler, expected, + 0, NULL, lod); + + err1 = ABS_ERROR(resultPtr[0], + expected[0]); + } + } + + found_pixel = (err1 <= maxErr1); + } // norm_offset_z + } // norm_offset_y + } // norm_offset_x + + // Step 2: If we did not find a match, then print + // out debugging info. + if (!found_pixel) + { + // For the normalized case on a GPU we put in + // offsets to the X and Y to see if we land on + // the right pixel. This addresses the + // significant inaccuracy in GPU normalization + // in OpenCL 1.0. + checkOnlyOnePixel = 0; + int shouldReturn = 0; + for (float norm_offset_x = -offset; + norm_offset_x <= offset + && !checkOnlyOnePixel; + norm_offset_x += NORM_OFFSET) + { + for (float norm_offset_y = -offset; + norm_offset_y <= offset + && !checkOnlyOnePixel; + norm_offset_y += NORM_OFFSET) + { + for (float norm_offset_z = -offset; + norm_offset_z <= offset + && !checkOnlyOnePixel; + norm_offset_z += NORM_OFFSET) + { + + int hasDenormals = 0; + FloatPixel maxPixel = + sample_image_pixel_float_offset( + imagePtr, imageInfo, + xOffsetValues[j], + yOffsetValues[j], + zOffsetValues[j], + norm_offset_x, + norm_offset_y, + norm_offset_z, imageSampler, + expected, 0, &hasDenormals, + lod); + + float err1 = ABS_ERROR(resultPtr[0], + expected[0]); + float maxErr1 = + std::max(maxErr * maxPixel.p[0], + FLT_MIN); + + + if (!(err1 <= maxErr1)) + { + // Try flushing the denormals + if (hasDenormals) + { + maxErr1 += 4 * FLT_MIN; + + maxPixel = + sample_image_pixel_float( + imagePtr, imageInfo, + xOffsetValues[j], + yOffsetValues[j], + zOffsetValues[j], + imageSampler, + expected, 0, NULL, + lod); + + err1 = + ABS_ERROR(resultPtr[0], + expected[0]); + } + } + + if (!(err1 <= maxErr1)) + { + log_error( + "FAILED norm_offsets: %g , " + "%g , %g:\n", + norm_offset_x, + norm_offset_y, + norm_offset_z); + + float tempOut[4]; + shouldReturn |= + determine_validation_error_offset< + float>( + imagePtr, imageInfo, + imageSampler, resultPtr, + expected, error, + xOffsetValues[j], + yOffsetValues[j], + zOffsetValues[j], + norm_offset_x, + norm_offset_y, + norm_offset_z, j, + numTries, numClamped, + true, lod); + log_error("Step by step:\n"); + FloatPixel temp = + sample_image_pixel_float_offset( + imagePtr, imageInfo, + xOffsetValues[j], + yOffsetValues[j], + zOffsetValues[j], + norm_offset_x, + norm_offset_y, + norm_offset_z, + imageSampler, tempOut, + 1 /*verbose*/, + &hasDenormals, lod); + log_error( + "\tulps: %2.2f (max " + "allowed: %2.2f)\n\n", + Ulp_Error(resultPtr[0], + expected[0]), + Ulp_Error( + MAKE_HEX_FLOAT( + 0x1.000002p0f, + 0x1000002L, -24) + + maxErr, + MAKE_HEX_FLOAT( + 0x1.000002p0f, + 0x1000002L, -24))); + } + else + { + log_error( + "Test error: we should " + "have detected this " + "passing above.\n"); + } + } // norm_offset_z + } // norm_offset_y + } // norm_offset_x + if (shouldReturn) return 1; + } // if (!found_pixel) + + resultPtr += 1; + } + } + } + } + /* + * FLOAT output type + */ + else if (is_sRGBA_order(imageInfo->format->image_channel_order) + && (outputType == kFloat)) + { + // Validate float results + float *resultPtr = (float *)(char *)resultValues; + float expected[4], error = 0.0f; + float maxErr = get_max_relative_error( + imageInfo->format, imageSampler, image_type_3D, + CL_FILTER_LINEAR == imageSampler->filter_mode); + + for (size_t z = 0, j = 0; z < depth_lod; z++) + { + for (size_t y = 0; y < height_lod; y++) + { + for (size_t x = 0; x < width_lod; x++, j++) + { + // Step 1: go through and see if the results verify + // for the pixel For the normalized case on a GPU we + // put in offsets to the X, Y and Z to see if we + // land on the right pixel. This addresses the + // significant inaccuracy in GPU normalization in + // OpenCL 1.0. + int checkOnlyOnePixel = 0; + int found_pixel = 0; + float offset = NORM_OFFSET; + if (!imageSampler->normalized_coords + || imageSampler->filter_mode + != CL_FILTER_NEAREST + || NORM_OFFSET == 0 +#if defined(__APPLE__) + // Apple requires its CPU implementation to do + // correctly rounded address arithmetic in all + // modes + || !(gDeviceType & CL_DEVICE_TYPE_GPU) +#endif + ) + offset = 0.0f; // Loop only once + + for (float norm_offset_x = -offset; + norm_offset_x <= offset && !found_pixel; + norm_offset_x += NORM_OFFSET) + { + for (float norm_offset_y = -offset; + norm_offset_y <= offset && !found_pixel; + norm_offset_y += NORM_OFFSET) + { + for (float norm_offset_z = -offset; + norm_offset_z <= NORM_OFFSET + && !found_pixel; + norm_offset_z += NORM_OFFSET) + { + + int hasDenormals = 0; + FloatPixel maxPixel = + sample_image_pixel_float_offset( + imagePtr, imageInfo, + xOffsetValues[j], + (num_dimensions > 1) + ? yOffsetValues[j] + : 0.0f, + image_type_3D ? zOffsetValues[j] + : 0.0f, + norm_offset_x, + (num_dimensions > 1) + ? norm_offset_y + : 0.0f, + image_type_3D ? norm_offset_z + : 0.0f, + imageSampler, expected, 0, + &hasDenormals, lod); + float err1 = ABS_ERROR(sRGBmap(resultPtr[0]), sRGBmap(expected[0])); @@ -728,11 +1048,19 @@ int test_read_image(cl_context context, cl_command_queue queue, sample_image_pixel_float_offset( imagePtr, imageInfo, xOffsetValues[j], - yOffsetValues[j], - zOffsetValues[j], + (num_dimensions > 1) + ? yOffsetValues[j] + : 0.0f, + image_type_3D + ? zOffsetValues[j] + : 0.0f, norm_offset_x, - norm_offset_y, - norm_offset_z, + (num_dimensions > 1) + ? norm_offset_y + : 0.0f, + image_type_3D + ? norm_offset_z + : 0.0f, imageSampler, expected, 0, NULL, lod); @@ -784,19 +1112,41 @@ int test_read_image(cl_context context, cl_command_queue queue, && !checkOnlyOnePixel; norm_offset_z += NORM_OFFSET) { + // If we are not on a GPU, or we are + // not normalized, then only test + // with offsets (0.0, 0.0, 0.0) + // E.g., test one pixel. + if (!imageSampler->normalized_coords + || gDeviceType + != CL_DEVICE_TYPE_GPU + || NORM_OFFSET == 0) + { + norm_offset_x = 0.0f; + norm_offset_y = 0.0f; + norm_offset_z = 0.0f; + checkOnlyOnePixel = 1; + } int hasDenormals = 0; FloatPixel maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo, xOffsetValues[j], - yOffsetValues[j], - zOffsetValues[j], + (num_dimensions > 1) + ? yOffsetValues[j] + : 0.0f, + image_type_3D + ? zOffsetValues[j] + : 0.0f, norm_offset_x, - norm_offset_y, - norm_offset_z, imageSampler, - expected, 0, &hasDenormals, - lod); + (num_dimensions > 1) + ? norm_offset_y + : 0.0f, + image_type_3D + ? norm_offset_z + : 0.0f, + imageSampler, expected, 0, + &hasDenormals, lod); float err1 = ABS_ERROR(sRGBmap(resultPtr[0]), @@ -829,8 +1179,14 @@ int test_read_image(cl_context context, cl_command_queue queue, sample_image_pixel_float( imagePtr, imageInfo, xOffsetValues[j], - yOffsetValues[j], - zOffsetValues[j], + (num_dimensions > 1) + ? yOffsetValues + [j] + : 0.0f, + image_type_3D + ? zOffsetValues + [j] + : 0.0f, imageSampler, expected, 0, NULL, lod); @@ -870,23 +1226,39 @@ int test_read_image(cl_context context, cl_command_queue queue, imageSampler, resultPtr, expected, error, xOffsetValues[j], - yOffsetValues[j], - zOffsetValues[j], + (num_dimensions > 1) + ? yOffsetValues[j] + : 0.0f, + image_type_3D + ? zOffsetValues[j] + : 0.0f, norm_offset_x, - norm_offset_y, - norm_offset_z, j, - numTries, numClamped, + (num_dimensions > 1) + ? norm_offset_y + : 0.0f, + image_type_3D + ? norm_offset_z + : 0.0f, + j, numTries, numClamped, true, lod); log_error("Step by step:\n"); FloatPixel temp = sample_image_pixel_float_offset( imagePtr, imageInfo, xOffsetValues[j], - yOffsetValues[j], - zOffsetValues[j], + (num_dimensions > 1) + ? yOffsetValues[j] + : 0.0f, + image_type_3D + ? zOffsetValues[j] + : 0.0f, norm_offset_x, - norm_offset_y, - norm_offset_z, + (num_dimensions > 1) + ? norm_offset_y + : 0.0f, + image_type_3D + ? norm_offset_z + : 0.0f, imageSampler, tempOut, 1 /*verbose*/, &hasDenormals, lod); @@ -938,7 +1310,7 @@ int test_read_image(cl_context context, cl_command_queue queue, float *resultPtr = (float *)(char *)resultValues; float expected[4], error = 0.0f; float maxErr = get_max_relative_error( - imageInfo->format, imageSampler, 1 /*3D*/, + imageInfo->format, imageSampler, image_type_3D, CL_FILTER_LINEAR == imageSampler->filter_mode); for (size_t z = 0, j = 0; z < depth_lod; z++) @@ -988,9 +1360,17 @@ int test_read_image(cl_context context, cl_command_queue queue, sample_image_pixel_float_offset( imagePtr, imageInfo, xOffsetValues[j], - yOffsetValues[j], - zOffsetValues[j], norm_offset_x, - norm_offset_y, norm_offset_z, + (num_dimensions > 1) + ? yOffsetValues[j] + : 0.0f, + image_type_3D ? zOffsetValues[j] + : 0.0f, + norm_offset_x, + (num_dimensions > 1) + ? norm_offset_y + : 0.0f, + image_type_3D ? norm_offset_z + : 0.0f, imageSampler, expected, 0, &hasDenormals, lod); @@ -1053,11 +1433,19 @@ int test_read_image(cl_context context, cl_command_queue queue, sample_image_pixel_float_offset( imagePtr, imageInfo, xOffsetValues[j], - yOffsetValues[j], - zOffsetValues[j], + (num_dimensions > 1) + ? yOffsetValues[j] + : 0.0f, + image_type_3D + ? zOffsetValues[j] + : 0.0f, norm_offset_x, - norm_offset_y, - norm_offset_z, + (num_dimensions > 1) + ? norm_offset_y + : 0.0f, + image_type_3D + ? norm_offset_z + : 0.0f, imageSampler, expected, 0, NULL, lod); @@ -1106,19 +1494,41 @@ int test_read_image(cl_context context, cl_command_queue queue, && !checkOnlyOnePixel; norm_offset_z += NORM_OFFSET) { + // If we are not on a GPU, or we are + // not normalized, then only test + // with offsets (0.0, 0.0) E.g., + // test one pixel. + if (!imageSampler->normalized_coords + || gDeviceType + != CL_DEVICE_TYPE_GPU + || NORM_OFFSET == 0) + { + norm_offset_x = 0.0f; + norm_offset_y = 0.0f; + norm_offset_z = 0.0f; + checkOnlyOnePixel = 1; + } int hasDenormals = 0; FloatPixel maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo, xOffsetValues[j], - yOffsetValues[j], - zOffsetValues[j], + (num_dimensions > 1) + ? yOffsetValues[j] + : 0.0f, + image_type_3D + ? zOffsetValues[j] + : 0.0f, norm_offset_x, - norm_offset_y, - norm_offset_z, imageSampler, - expected, 0, &hasDenormals, - lod); + (num_dimensions > 1) + ? norm_offset_y + : 0.0f, + image_type_3D + ? norm_offset_z + : 0.0f, + imageSampler, expected, 0, + &hasDenormals, lod); float err1 = ABS_ERROR(resultPtr[0], expected[0]); @@ -1159,8 +1569,14 @@ int test_read_image(cl_context context, cl_command_queue queue, sample_image_pixel_float( imagePtr, imageInfo, xOffsetValues[j], - yOffsetValues[j], - zOffsetValues[j], + (num_dimensions > 1) + ? yOffsetValues + [j] + : 0.0f, + image_type_3D + ? zOffsetValues + [j] + : 0.0f, imageSampler, expected, 0, NULL, lod); @@ -1200,23 +1616,39 @@ int test_read_image(cl_context context, cl_command_queue queue, imageSampler, resultPtr, expected, error, xOffsetValues[j], - yOffsetValues[j], - zOffsetValues[j], + (num_dimensions > 1) + ? yOffsetValues[j] + : 0.0f, + image_type_3D + ? zOffsetValues[j] + : 0.0f, norm_offset_x, - norm_offset_y, - norm_offset_z, j, - numTries, numClamped, + (num_dimensions > 1) + ? norm_offset_y + : 0.0f, + image_type_3D + ? norm_offset_z + : 0.0f, + j, numTries, numClamped, true, lod); log_error("Step by step:\n"); FloatPixel temp = sample_image_pixel_float_offset( imagePtr, imageInfo, xOffsetValues[j], - yOffsetValues[j], - zOffsetValues[j], + (num_dimensions > 1) + ? yOffsetValues[j] + : 0.0f, + image_type_3D + ? zOffsetValues[j] + : 0.0f, norm_offset_x, - norm_offset_y, - norm_offset_z, + (num_dimensions > 1) + ? norm_offset_y + : 0.0f, + image_type_3D + ? norm_offset_z + : 0.0f, imageSampler, tempOut, 1 /*verbose*/, &hasDenormals, lod); @@ -1315,9 +1747,17 @@ int test_read_image(cl_context context, cl_command_queue queue, sample_image_pixel_offset( imagePtr, imageInfo, - xOffsetValues[j], yOffsetValues[j], - zOffsetValues[j], norm_offset_x, - norm_offset_y, norm_offset_z, + xOffsetValues[j], + (num_dimensions > 1) + ? yOffsetValues[j] + : 0.0f, + image_type_3D ? zOffsetValues[j] + : 0.0f, + norm_offset_x, + (num_dimensions > 1) ? norm_offset_y + : 0.0f, + image_type_3D ? norm_offset_z + : 0.0f, imageSampler, expected, lod); error = errMax( @@ -1382,9 +1822,17 @@ int test_read_image(cl_context context, cl_command_queue queue, unsigned int>( imagePtr, imageInfo, xOffsetValues[j], - yOffsetValues[j], - zOffsetValues[j], norm_offset_x, - norm_offset_y, norm_offset_z, + (num_dimensions > 1) + ? yOffsetValues[j] + : 0.0f, + image_type_3D ? zOffsetValues[j] + : 0.0f, + norm_offset_x, + (num_dimensions > 1) + ? norm_offset_y + : 0.0f, + image_type_3D ? norm_offset_z + : 0.0f, imageSampler, expected, lod); error = errMax( @@ -1416,12 +1864,20 @@ int test_read_image(cl_context context, cl_command_queue queue, imageSampler, resultPtr, expected, error, xOffsetValues[j], - yOffsetValues[j], - zOffsetValues[j], + (num_dimensions > 1) + ? yOffsetValues[j] + : 0.0f, + image_type_3D + ? zOffsetValues[j] + : 0.0f, norm_offset_x, - norm_offset_y, - norm_offset_z, j, - numTries, numClamped, + (num_dimensions > 1) + ? norm_offset_y + : 0.0f, + image_type_3D + ? norm_offset_z + : 0.0f, + j, numTries, numClamped, false, lod); } else @@ -1498,9 +1954,17 @@ int test_read_image(cl_context context, cl_command_queue queue, sample_image_pixel_offset( imagePtr, imageInfo, - xOffsetValues[j], yOffsetValues[j], - zOffsetValues[j], norm_offset_x, - norm_offset_y, norm_offset_z, + xOffsetValues[j], + (num_dimensions > 1) + ? yOffsetValues[j] + : 0.0f, + image_type_3D ? zOffsetValues[j] + : 0.0f, + norm_offset_x, + (num_dimensions > 1) ? norm_offset_y + : 0.0f, + image_type_3D ? norm_offset_z + : 0.0f, imageSampler, expected, lod); error = errMax( @@ -1565,9 +2029,17 @@ int test_read_image(cl_context context, cl_command_queue queue, sample_image_pixel_offset( imagePtr, imageInfo, xOffsetValues[j], - yOffsetValues[j], - zOffsetValues[j], norm_offset_x, - norm_offset_y, norm_offset_z, + (num_dimensions > 1) + ? yOffsetValues[j] + : 0.0f, + image_type_3D ? zOffsetValues[j] + : 0.0f, + norm_offset_x, + (num_dimensions > 1) + ? norm_offset_y + : 0.0f, + image_type_3D ? norm_offset_z + : 0.0f, imageSampler, expected, lod); error = errMax( @@ -1598,12 +2070,20 @@ int test_read_image(cl_context context, cl_command_queue queue, imageSampler, resultPtr, expected, error, xOffsetValues[j], - yOffsetValues[j], - zOffsetValues[j], + (num_dimensions > 1) + ? yOffsetValues[j] + : 0.0f, + image_type_3D + ? zOffsetValues[j] + : 0.0f, norm_offset_x, - norm_offset_y, - norm_offset_z, j, - numTries, numClamped, + (num_dimensions > 1) + ? norm_offset_y + : 0.0f, + image_type_3D + ? norm_offset_z + : 0.0f, + j, numTries, numClamped, false, lod); } else @@ -1626,8 +2106,9 @@ int test_read_image(cl_context context, cl_command_queue queue, } } { - nextLevelOffset += width_lod * height_lod * depth_lod - * get_pixel_size(imageInfo->format); + nextLevelOffset += + image_lod_size * get_pixel_size(imageInfo->format); + // Any unnecessary dimensions will already be 1. width_lod = (width_lod >> 1) ? (width_lod >> 1) : 1; if (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_ARRAY) { diff --git a/test_conformance/images/kernel_read_write/test_common.h b/test_conformance/images/kernel_read_write/test_common.h index fc95bee22a..2a644dae51 100644 --- a/test_conformance/images/kernel_read_write/test_common.h +++ b/test_conformance/images/kernel_read_write/test_common.h @@ -53,6 +53,9 @@ int determine_validation_error_offset( float zAddressOffset, size_t j, int &numTries, int &numClamped, bool printAsFloat, int lod) { + bool image_type_3D = ((imageInfo->type == CL_MEM_OBJECT_IMAGE2D_ARRAY) + || (imageInfo->type == CL_MEM_OBJECT_IMAGE3D)); + bool image_type_1D = (imageInfo->type == CL_MEM_OBJECT_IMAGE1D); int actualX, actualY, actualZ; int found = debug_find_pixel_in_image(imagePtr, imageInfo, resultPtr, &actualX, &actualY, &actualZ, lod); @@ -66,16 +69,18 @@ int determine_validation_error_offset( return TEST_FAIL; } - clamped = get_integer_coords_offset(x, y, z, xAddressOffset, yAddressOffset, - zAddressOffset, imageWidth, imageHeight, - imageDepth, imageSampler, imageInfo, - clampedX, clampedY, clampedZ); + clamped = get_integer_coords_offset( + x, !image_type_1D ? y : 0.0f, image_type_3D ? z : 0.0f, xAddressOffset, + !image_type_1D ? yAddressOffset : 0.0f, + image_type_3D ? zAddressOffset : 0.0f, imageWidth, imageHeight, + imageDepth, imageSampler, imageInfo, clampedX, clampedY, clampedZ); if (found) { // Is it a clamping bug? - if (clamped && clampedX == actualX && clampedY == actualY - && clampedZ == actualZ) + if (clamped && clampedX == actualX + && (clampedY == actualY || image_type_1D) + && (clampedZ == actualZ || !image_type_3D)) { if ((--numClamped) == 0) { @@ -102,6 +107,16 @@ int determine_validation_error_offset( } log_error("ERROR: TEST FAILED: Read is erroneously clamping " "coordinates!\n"); + + if (imageSampler->filter_mode != CL_FILTER_LINEAR) + { + log_error( + "\tValue really found in image at %d,%d,%d (%s)\n", + actualX, actualY, actualZ, + (found > 1) ? "NOT unique!!" : "unique"); + } + log_error("\n"); + return -1; } clampingErr = true; diff --git a/test_conformance/images/kernel_read_write/test_read_1D.cpp b/test_conformance/images/kernel_read_write/test_read_1D.cpp index cab1fa8e3a..e428c12e03 100644 --- a/test_conformance/images/kernel_read_write/test_read_1D.cpp +++ b/test_conformance/images/kernel_read_write/test_read_1D.cpp @@ -58,965 +58,6 @@ const char *float1DKernelSource = static const char *samplerKernelArg = " sampler_t imageSampler,"; -template int determine_validation_error_1D( void *imagePtr, image_descriptor *imageInfo, image_sampler_data *imageSampler, - T *resultPtr, T * expected, float error, - float x, float xAddressOffset, size_t j, int &numTries, int &numClamped, bool printAsFloat, int lod ) -{ - int actualX, actualY; - int found = debug_find_pixel_in_image( imagePtr, imageInfo, resultPtr, &actualX, &actualY, NULL, lod ); - bool clampingErr = false, clamped = false, otherClampingBug = false; - int clampedX, ignoreMe; - - clamped = get_integer_coords_offset( x, 0.0f, 0.0f, xAddressOffset, 0.0f, 0.0f, imageInfo->width, 0, 0, imageSampler, imageInfo, clampedX, ignoreMe, ignoreMe ); - - if( found ) - { - // Is it a clamping bug? - if( clamped && clampedX == actualX ) - { - if( (--numClamped) == 0 ) - { - log_error( "ERROR: TEST FAILED: Read is erroneously clamping coordinates for image size %ld!\n", imageInfo->width ); - if( printAsFloat ) - { - log_error( "Sample %d: coord {%f(%a)} did not validate!\n\tExpected (%g,%g,%g,%g),\n\tgot (%g,%g,%g,%g),\n\terror of %g\n", - (int)j, x, x, (float)expected[ 0 ], (float)expected[ 1 ], (float)expected[ 2 ], (float)expected[ 3 ], - (float)resultPtr[ 0 ], (float)resultPtr[ 1 ], (float)resultPtr[ 2 ], (float)resultPtr[ 3 ], error ); - } - else - { - log_error( "Sample %d: coord {%f(%a)} did not validate!\n\tExpected (%x,%x,%x,%x),\n\tgot (%x,%x,%x,%x)\n", - (int)j, x, x, (int)expected[ 0 ], (int)expected[ 1 ], (int)expected[ 2 ], (int)expected[ 3 ], - (int)resultPtr[ 0 ], (int)resultPtr[ 1 ], (int)resultPtr[ 2 ], (int)resultPtr[ 3 ] ); - } - return 1; - } - clampingErr = true; - otherClampingBug = true; - } - } - if( clamped && !otherClampingBug ) - { - // If we are in clamp-to-edge mode and we're getting zeroes, it's possible we're getting border erroneously - if( resultPtr[ 0 ] == 0 && resultPtr[ 1 ] == 0 && resultPtr[ 2 ] == 0 && resultPtr[ 3 ] == 0 ) - { - if( (--numClamped) == 0 ) - { - log_error( "ERROR: TEST FAILED: Clamping is erroneously returning border color for image size %ld!\n", imageInfo->width ); - if( printAsFloat ) - { - log_error( "Sample %d: coord {%f(%a)} did not validate!\n\tExpected (%g,%g,%g,%g),\n\tgot (%g,%g,%g,%g),\n\terror of %g\n", - (int)j, x, x, (float)expected[ 0 ], (float)expected[ 1 ], (float)expected[ 2 ], (float)expected[ 3 ], - (float)resultPtr[ 0 ], (float)resultPtr[ 1 ], (float)resultPtr[ 2 ], (float)resultPtr[ 3 ], error ); - } - else - { - log_error( "Sample %d: coord {%f(%a)} did not validate!\n\tExpected (%x,%x,%x,%x),\n\tgot (%x,%x,%x,%x)\n", - (int)j, x, x, (int)expected[ 0 ], (int)expected[ 1 ], (int)expected[ 2 ], (int)expected[ 3 ], - (int)resultPtr[ 0 ], (int)resultPtr[ 1 ], (int)resultPtr[ 2 ], (int)resultPtr[ 3 ] ); - } - return 1; - } - clampingErr = true; - } - } - if( !clampingErr ) - { - if( printAsFloat ) - { - log_error( "Sample %d: coord {%f(%a)} did not validate!\n\tExpected (%g,%g,%g,%g),\n\tgot (%g,%g,%g,%g), error of %g\n", - (int)j, x, x, (float)expected[ 0 ], (float)expected[ 1 ], (float)expected[ 2 ], (float)expected[ 3 ], - (float)resultPtr[ 0 ], (float)resultPtr[ 1 ], (float)resultPtr[ 2 ], (float)resultPtr[ 3 ], error ); - } - else - { - log_error( "Sample %d: coord {%f(%a)} did not validate!\n\tExpected (%x,%x,%x,%x),\n\tgot (%x,%x,%x,%x)\n", - (int)j, x, x, (int)expected[ 0 ], (int)expected[ 1 ], (int)expected[ 2 ], (int)expected[ 3 ], - (int)resultPtr[ 0 ], (int)resultPtr[ 1 ], (int)resultPtr[ 2 ], (int)resultPtr[ 3 ] ); - } - log_error( "img size %ld (pitch %ld)", imageInfo->width, imageInfo->rowPitch ); - if( clamped ) - { - log_error( " which would clamp to %d\n", clampedX ); - } - if( printAsFloat && gExtraValidateInfo) - { - log_error( "Nearby values:\n" ); - log_error( "\t%d\t%d\t%d\t%d\n", clampedX - 2, clampedX - 1, clampedX, clampedX + 1 ); - { - float top[ 4 ], real[ 4 ], bot[ 4 ], bot2[ 4 ]; - read_image_pixel_float( imagePtr, imageInfo, clampedX - 2, 0, 0, top ); - read_image_pixel_float( imagePtr, imageInfo, clampedX - 1, 0, 0, real ); - read_image_pixel_float( imagePtr, imageInfo, clampedX, 0, 0, bot ); - read_image_pixel_float( imagePtr, imageInfo, clampedX + 1, 0, 0, bot2 ); - log_error( "\t(%g,%g,%g,%g)",top[0], top[1], top[2], top[3] ); - log_error( " (%g,%g,%g,%g)", real[0], real[1], real[2], real[3] ); - log_error( " (%g,%g,%g,%g)",bot[0], bot[1], bot[2], bot[3] ); - log_error( " (%g,%g,%g,%g)\n",bot2[0], bot2[1], bot2[2], bot2[3] ); - } - } - - if( imageSampler->filter_mode != CL_FILTER_LINEAR ) - { - if( found ) - log_error( "\tValue really found in image at %d (%s)\n", actualX, ( found > 1 ) ? "NOT unique!!" : "unique" ); - else - log_error( "\tValue not actually found in image\n" ); - } - log_error( "\n" ); - - numClamped = -1; // We force the clamped counter to never work - if( ( --numTries ) == 0 ) - { - return 1; - } - } - return 0; -} - -static void InitFloatCoords( image_descriptor *imageInfo, image_sampler_data *imageSampler, float *xOffsets, float xfract, int normalized_coords, MTdata d, int lod) -{ - size_t i = 0; - size_t width_lod = imageInfo->width; - - if(gTestMipmaps) - width_lod = (imageInfo->width >> lod) ? (imageInfo->width >> lod) : 1; - - if( gDisableOffsets ) - { - for( size_t x = 0; x < width_lod; x++, i++ ) - { - xOffsets[ i ] = (float) (xfract + (double) x); - } - } - else - { - for( size_t x = 0; x < width_lod; x++, i++ ) - { - xOffsets[ i ] = (float) (xfract + (double) ((int) x + random_in_range( -10, 10, d ))); - } - } - - if( imageSampler->addressing_mode == CL_ADDRESS_NONE ) - { - i = 0; - for( size_t x = 0; x < width_lod; x++, i++ ) - { - xOffsets[ i ] = (float) CLAMP( (double) xOffsets[ i ], 0.0, (double) width_lod - 1.0); - } - } - - if( normalized_coords ) - { - i = 0; - for( size_t x = 0; x < width_lod; x++, i++ ) - { - xOffsets[ i ] = (float) ((double) xOffsets[ i ] / (double) width_lod); - } - } -} - -int test_read_image_1D( cl_context context, cl_command_queue queue, cl_kernel kernel, - image_descriptor *imageInfo, image_sampler_data *imageSampler, - bool useFloatCoords, ExplicitType outputType, MTdata d ) -{ - int error; - static int initHalf = 0; - - size_t threads[2]; - cl_mem_flags image_read_write_flags = CL_MEM_READ_ONLY; - clMemWrapper xOffsets, results; - clSamplerWrapper actualSampler; - BufferOwningPtr maxImageUseHostPtrBackingStore; - - // The DataBuffer template class really does use delete[], not free -- IRO - BufferOwningPtr xOffsetValues(malloc(sizeof(cl_float) * imageInfo->width)); - - // generate_random_image_data allocates with malloc, so we use a MallocDataBuffer here - BufferOwningPtr imageValues; - generate_random_image_data( imageInfo, imageValues, d ); - - if( gDebugTrace ) - { - log_info( " - Creating 1D image %d ...\n", (int)imageInfo->width ); - if(gTestMipmaps) - log_info(" - and %d mip levels\n", (int)imageInfo->num_mip_levels); - } - // Construct testing sources - clProtectedImage protImage; - clMemWrapper unprotImage; - cl_mem image; - - if(gtestTypesToRun & kReadTests) - { - image_read_write_flags = CL_MEM_READ_ONLY; - } - else - { - image_read_write_flags = CL_MEM_READ_WRITE; - } - - if( gMemFlagsToUse == CL_MEM_USE_HOST_PTR ) - { - // clProtectedImage uses USE_HOST_PTR, so just rely on that for the testing (via Ian) - // Do not use protected images for max image size test since it rounds the row size to a page size - if (gTestMaxImages) { - generate_random_image_data( imageInfo, maxImageUseHostPtrBackingStore, d ); - unprotImage = create_image_1d( context, - image_read_write_flags | CL_MEM_USE_HOST_PTR, - imageInfo->format, - imageInfo->width, ( gEnablePitch ? imageInfo->rowPitch : 0 ), - maxImageUseHostPtrBackingStore, NULL, &error ); - } else { - error = protImage.Create( context, - image_read_write_flags, - imageInfo->format, imageInfo->width ); - } - if( error != CL_SUCCESS ) - { - log_error( "ERROR: Unable to create 1D image of size %d pitch %d (%s)\n", (int)imageInfo->width, (int)imageInfo->rowPitch, IGetErrorString( error ) ); - return error; - } - - if (gTestMaxImages) - image = (cl_mem)unprotImage; - else - image = (cl_mem)protImage; - } - else if( gMemFlagsToUse == CL_MEM_COPY_HOST_PTR ) - { - // Don't use clEnqueueWriteImage; just use copy host ptr to get the data in - unprotImage = create_image_1d( context, - image_read_write_flags | CL_MEM_COPY_HOST_PTR, - imageInfo->format, - imageInfo->width, ( gEnablePitch ? imageInfo->rowPitch : 0 ), - imageValues, NULL, &error ); - if( error != CL_SUCCESS ) - { - log_error( "ERROR: Unable to create 1D image of size %d pitch %d (%s)\n", (int)imageInfo->width, (int)imageInfo->rowPitch, IGetErrorString( error ) ); - return error; - } - image = unprotImage; - } - else // Either CL_MEM_ALLOC_HOST_PTR or none - { - // Note: if ALLOC_HOST_PTR is used, the driver allocates memory that can be accessed by the host, but otherwise - // it works just as if no flag is specified, so we just do the same thing either way - if(gTestMipmaps) - { - cl_image_desc image_desc = {0}; - image_desc.image_type = CL_MEM_OBJECT_IMAGE1D; - image_desc.image_width = imageInfo->width; - image_desc.num_mip_levels = imageInfo->num_mip_levels; - - unprotImage = clCreateImage( context, - image_read_write_flags, - imageInfo->format, &image_desc, NULL, &error); - if( error != CL_SUCCESS ) - { - log_error( "ERROR: Unable to create %d level mipmapped 1D image of size %d (pitch %d, %d ) (%s)",(int)imageInfo->num_mip_levels, (int)imageInfo->width, (int)imageInfo->rowPitch, (int)imageInfo->slicePitch, IGetErrorString( error ) ); - return error; - } - } - else - { - unprotImage = create_image_1d( context, - image_read_write_flags | gMemFlagsToUse, - imageInfo->format, - imageInfo->width, ( gEnablePitch ? imageInfo->rowPitch : 0 ), - imageValues, NULL, &error ); - if( error != CL_SUCCESS ) - { - log_error( "ERROR: Unable to create 1D image of size %d pitch %d (%s)\n", (int)imageInfo->width, (int)imageInfo->rowPitch, IGetErrorString( error ) ); - return error; - } - } - image = unprotImage; - } - - if( gMemFlagsToUse != CL_MEM_COPY_HOST_PTR ) - { - if( gDebugTrace ) - log_info( " - Writing image...\n" ); - - size_t origin[ 3 ] = { 0, 0, 0 }; - size_t region[ 3 ] = { imageInfo->width, 1, 1 }; - - if(gTestMipmaps) - { - int nextLevelOffset = 0; - - for (int i =0; i < imageInfo->num_mip_levels; i++) - { origin[1] = i; - error = clEnqueueWriteImage(queue, image, CL_TRUE, - origin, region, /*gEnablePitch ? imageInfo->rowPitch :*/ 0, /*gEnablePitch ? imageInfo->slicePitch :*/ 0, - ((char*)imageValues + nextLevelOffset), 0, NULL, NULL); - if (error != CL_SUCCESS) - { - log_error( "ERROR: Unable to write to %d level mipmapped 3D image of size %d x %d x %d\n", (int)imageInfo->num_mip_levels,(int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->depth ); - return error; - } - nextLevelOffset += region[0]*get_pixel_size(imageInfo->format); - //Subsequent mip level dimensions keep halving - region[0] = region[0] >> 1 ? region[0] >> 1 : 1; - } - } - else - { - error = clEnqueueWriteImage(queue, image, CL_TRUE, - origin, region, ( gEnablePitch ? imageInfo->rowPitch : 0 ), 0, - imageValues, 0, NULL, NULL); - if (error != CL_SUCCESS) - { - log_error( "ERROR: Unable to write to 1D image of size %d\n", (int)imageInfo->width ); - return error; - } - } - } - - if( gDebugTrace ) - log_info( " - Creating kernel arguments...\n" ); - - xOffsets = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, - sizeof(cl_float) * imageInfo->width, - xOffsetValues, &error); - test_error( error, "Unable to create x offset buffer" ); - results = clCreateBuffer(context, CL_MEM_READ_WRITE, - get_explicit_type_size(outputType) * 4 - * imageInfo->width, - NULL, &error); - test_error( error, "Unable to create result buffer" ); - - // Create sampler to use - actualSampler = create_sampler(context, imageSampler, gTestMipmaps, &error); - test_error(error, "Unable to create image sampler"); - - // Set arguments - int idx = 0; - error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &image ); - test_error( error, "Unable to set kernel arguments" ); - if( !gUseKernelSamplers ) - { - error = clSetKernelArg( kernel, idx++, sizeof( cl_sampler ), &actualSampler ); - test_error( error, "Unable to set kernel arguments" ); - } - error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &xOffsets ); - test_error( error, "Unable to set kernel arguments" ); - error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &results ); - test_error( error, "Unable to set kernel arguments" ); - - // A cast of troublesome offsets. The first one has to be zero. - const float float_offsets[] = { 0.0f, MAKE_HEX_FLOAT(0x1.0p-30f, 0x1L, -30), 0.25f, 0.3f, 0.5f - FLT_EPSILON/4.0f, 0.5f, 0.9f, 1.0f - FLT_EPSILON/2 }; - int float_offset_count = sizeof( float_offsets) / sizeof( float_offsets[0] ); - int numTries = MAX_TRIES, numClamped = MAX_CLAMPED; - int loopCount = 2 * float_offset_count; - if( ! useFloatCoords ) - loopCount = 1; - if (gTestMaxImages) { - loopCount = 1; - log_info("Testing each size only once with pixel offsets of %g for max sized images.\n", float_offsets[0]); - } - - // Get the maximum absolute error for this format - double formatAbsoluteError = get_max_absolute_error(imageInfo->format, imageSampler); - if (gDebugTrace) log_info("\tformatAbsoluteError is %e\n", formatAbsoluteError); - - if (0 == initHalf && imageInfo->format->image_channel_data_type == CL_HALF_FLOAT ) { - initHalf = CL_SUCCESS == DetectFloatToHalfRoundingMode( queue ); - if (initHalf) { - log_info("Half rounding mode successfully detected.\n"); - } - } - - size_t width_lod = imageInfo->width; - size_t nextLevelOffset = 0; - for(int lod = 0; (gTestMipmaps && lod < imageInfo->num_mip_levels) || (!gTestMipmaps && lod < 1); lod++) - { - float lod_float = (float)lod; - size_t resultValuesSize = width_lod * get_explicit_type_size( outputType ) * 4; - BufferOwningPtr resultValues(malloc(resultValuesSize)); - if (gTestMipmaps) { - //Set the lod kernel arg - if(gDebugTrace) - log_info(" - Working at mip level %d\n", lod); - error = clSetKernelArg( kernel, idx, sizeof( float ), &lod_float); - test_error( error, "Unable to set kernel arguments" ); - } - for( int q = 0; q < loopCount; q++ ) - { - float offset = float_offsets[ q % float_offset_count ]; - - // Init the coordinates - InitFloatCoords( imageInfo, imageSampler, xOffsetValues, - q>=float_offset_count ? -offset: offset, - imageSampler->normalized_coords, d , lod); - - error = clEnqueueWriteBuffer( queue, xOffsets, CL_TRUE, 0, sizeof(cl_float) * width_lod, xOffsetValues, 0, NULL, NULL ); - test_error( error, "Unable to write x offsets" ); - - // Get results - memset( resultValues, 0xff, resultValuesSize ); - clEnqueueWriteBuffer( queue, results, CL_TRUE, 0, resultValuesSize, resultValues, 0, NULL, NULL ); - - // Run the kernel - threads[0] = (size_t)width_lod; - error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL ); - test_error( error, "Unable to run kernel" ); - - if( gDebugTrace ) - log_info( " reading results, %ld kbytes\n", (unsigned long)( width_lod * get_explicit_type_size( outputType ) * 4 / 1024 ) ); - - error = clEnqueueReadBuffer( queue, results, CL_TRUE, 0, width_lod * get_explicit_type_size( outputType ) * 4, resultValues, 0, NULL, NULL ); - test_error( error, "Unable to read results from kernel" ); - if( gDebugTrace ) - log_info( " results read\n" ); - - // Validate results element by element - char *imagePtr = imageValues + nextLevelOffset; - /* - * FLOAT output type - */ - if(is_sRGBA_order(imageInfo->format->image_channel_order) && ( outputType == kFloat )) - { - // Validate float results - float *resultPtr = (float *)(char *)resultValues; - float expected[4], error=0.0f; - float maxErr = get_max_relative_error( imageInfo->format, imageSampler, 0 /*not 3D*/, CL_FILTER_LINEAR == imageSampler->filter_mode ); - { - for( size_t x = 0, j = 0; x < width_lod; x++, j++ ) - { - // Step 1: go through and see if the results verify for the pixel - // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the - // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0. - int checkOnlyOnePixel = 0; - int found_pixel = 0; - float offset = NORM_OFFSET; - if (!imageSampler->normalized_coords - || imageSampler->filter_mode != CL_FILTER_NEAREST - || NORM_OFFSET == 0 -#if defined( __APPLE__ ) - // Apple requires its CPU implementation to do correctly - // rounded address arithmetic in all modes - || !(gDeviceType & CL_DEVICE_TYPE_GPU) -#endif - ) - offset = 0.0f; // Loop only once - - for (float norm_offset_x = -offset; norm_offset_x <= offset && !found_pixel; norm_offset_x += NORM_OFFSET) { - - // Try sampling the pixel, without flushing denormals. - int containsDenormals = 0; - FloatPixel maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo, - xOffsetValues[ j ], 0.0f, 0.0f, norm_offset_x, 0.0f, 0.0f, - imageSampler, expected, 0, &containsDenormals, lod ); - - float err1 = ABS_ERROR(sRGBmap(resultPtr[0]), - sRGBmap(expected[0])); - float err2 = ABS_ERROR(sRGBmap(resultPtr[1]), - sRGBmap(expected[1])); - float err3 = ABS_ERROR(sRGBmap(resultPtr[2]), - sRGBmap(expected[2])); - float err4 = ABS_ERROR(resultPtr[3], expected[3]); - - float maxErr = 0.5; - - // Check if the result matches. - if( ! (err1 <= maxErr) || ! (err2 <= maxErr) || - ! (err3 <= maxErr) || ! (err4 <= maxErr) ) - { - //try flushing the denormals, if there is a failure. - if( containsDenormals ) - { - // If implementation decide to flush subnormals to zero, - // max error needs to be adjusted - maxErr += 4 * FLT_MIN; - - maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo, - xOffsetValues[ j ], 0.0f, 0.0f, norm_offset_x, 0.0f, 0.0f, - imageSampler, expected, 0, NULL, lod ); - - err1 = ABS_ERROR(sRGBmap(resultPtr[0]), - sRGBmap(expected[0])); - err2 = ABS_ERROR(sRGBmap(resultPtr[1]), - sRGBmap(expected[1])); - err3 = ABS_ERROR(sRGBmap(resultPtr[2]), - sRGBmap(expected[2])); - err4 = ABS_ERROR(resultPtr[3], expected[3]); - } - } - - // If the final result DOES match, then we've found a valid result and we're done with this pixel. - found_pixel = (err1 <= maxErr) && (err2 <= maxErr) && (err3 <= maxErr) && (err4 <= maxErr); - }//norm_offset_x - - - // Step 2: If we did not find a match, then print out debugging info. - if (!found_pixel) { - // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the - // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0. - checkOnlyOnePixel = 0; - int shouldReturn = 0; - for (float norm_offset_x = -offset; norm_offset_x <= offset && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) { - - // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0) - // E.g., test one pixel. - if (!imageSampler->normalized_coords - || !(gDeviceType & CL_DEVICE_TYPE_GPU) - || NORM_OFFSET == 0) - { - norm_offset_x = 0.0f; - checkOnlyOnePixel = 1; - } - - int containsDenormals = 0; - FloatPixel maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo, - xOffsetValues[ j ], 0.0f, 0.0f, norm_offset_x, 0.0f, 0.0f, - imageSampler, expected, 0, &containsDenormals, lod ); - - float err1 = ABS_ERROR(sRGBmap(resultPtr[0]), - sRGBmap(expected[0])); - float err2 = ABS_ERROR(sRGBmap(resultPtr[1]), - sRGBmap(expected[1])); - float err3 = ABS_ERROR(sRGBmap(resultPtr[2]), - sRGBmap(expected[2])); - float err4 = - ABS_ERROR(resultPtr[3], expected[3]); - - float maxErr = 0.6; - - if( ! (err1 <= maxErr) || ! (err2 <= maxErr) || - ! (err3 <= maxErr) || ! (err4 <= maxErr) ) - { - //try flushing the denormals, if there is a failure. - if( containsDenormals ) - { - // If implementation decide to flush subnormals to zero, - // max error needs to be adjusted - maxErr += 4 * FLT_MIN; - - maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo, - xOffsetValues[ j ], 0.0f, 0.0f, norm_offset_x, 0.0f, 0.0f, - imageSampler, expected, 0, NULL, lod ); - - err1 = ABS_ERROR(sRGBmap(resultPtr[0]), - sRGBmap(expected[0])); - err2 = ABS_ERROR(sRGBmap(resultPtr[1]), - sRGBmap(expected[1])); - err3 = ABS_ERROR(sRGBmap(resultPtr[2]), - sRGBmap(expected[2])); - err4 = ABS_ERROR(resultPtr[3], - expected[3]); - } - } - if( ! (err1 <= maxErr) || ! (err2 <= maxErr) || - ! (err3 <= maxErr) || ! (err4 <= maxErr) ) - { - log_error("FAILED norm_offsets: %g:\n", norm_offset_x); - - float tempOut[4]; - shouldReturn |= determine_validation_error_1D( imagePtr, imageInfo, imageSampler, resultPtr, - expected, error, xOffsetValues[ j ], norm_offset_x, j, numTries, numClamped, true, lod ); - - log_error( "Step by step:\n" ); - FloatPixel temp = sample_image_pixel_float_offset( imagePtr, imageInfo, - xOffsetValues[ j ], 0.0f, 0.0f, norm_offset_x, 0.0f, 0.0f, - imageSampler, tempOut, 1 /* verbose */, &containsDenormals /*dont flush while error reporting*/, lod ); - log_error( "\tulps: %2.2f, %2.2f, %2.2f, %2.2f (max allowed: %2.2f)\n\n", - Ulp_Error( resultPtr[0], expected[0] ), - Ulp_Error( resultPtr[1], expected[1] ), - Ulp_Error( resultPtr[2], expected[2] ), - Ulp_Error( resultPtr[3], expected[3] ), - Ulp_Error( MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) + maxErr, MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) ) ); - - } else { - log_error("Test error: we should have detected this passing above.\n"); - } - - }//norm_offset_x - if( shouldReturn ) - return 1; - } // if (!found_pixel) - - resultPtr += 4; - } - } - } - else if( outputType == kFloat ) - { - // Validate float results - float *resultPtr = (float *)(char *)resultValues; - float expected[4], error=0.0f; - float maxErr = get_max_relative_error( imageInfo->format, imageSampler, 0 /*not 3D*/, CL_FILTER_LINEAR == imageSampler->filter_mode ); - { - for( size_t x = 0, j = 0; x < width_lod; x++, j++ ) - { - // Step 1: go through and see if the results verify for the pixel - // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the - // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0. - int checkOnlyOnePixel = 0; - int found_pixel = 0; - float offset = NORM_OFFSET; - if (!imageSampler->normalized_coords - || imageSampler->filter_mode != CL_FILTER_NEAREST - || NORM_OFFSET == 0 -#if defined( __APPLE__ ) - // Apple requires its CPU implementation to do correctly - // rounded address arithmetic in all modes - || !(gDeviceType & CL_DEVICE_TYPE_GPU) -#endif - ) - offset = 0.0f; // Loop only once - - for (float norm_offset_x = -offset; norm_offset_x <= offset && !found_pixel; norm_offset_x += NORM_OFFSET) { - - // Try sampling the pixel, without flushing denormals. - int containsDenormals = 0; - FloatPixel maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo, - xOffsetValues[ j ], 0.0f, 0.0f, norm_offset_x, 0.0f, 0.0f, - imageSampler, expected, 0, &containsDenormals, lod ); - - float err1 = ABS_ERROR(resultPtr[0], expected[0]); - float err2 = ABS_ERROR(resultPtr[1], expected[1]); - float err3 = ABS_ERROR(resultPtr[2], expected[2]); - float err4 = ABS_ERROR(resultPtr[3], expected[3]); - // Clamp to the minimum absolute error for the format - if (err1 > 0 && err1 < formatAbsoluteError) { err1 = 0.0f; } - if (err2 > 0 && err2 < formatAbsoluteError) { err2 = 0.0f; } - if (err3 > 0 && err3 < formatAbsoluteError) { err3 = 0.0f; } - if (err4 > 0 && err4 < formatAbsoluteError) { err4 = 0.0f; } - float maxErr1 = - std::max(maxErr * maxPixel.p[0], FLT_MIN); - float maxErr2 = - std::max(maxErr * maxPixel.p[1], FLT_MIN); - float maxErr3 = - std::max(maxErr * maxPixel.p[2], FLT_MIN); - float maxErr4 = - std::max(maxErr * maxPixel.p[3], FLT_MIN); - - // Check if the result matches. - if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2) || - ! (err3 <= maxErr3) || ! (err4 <= maxErr4) ) - { - //try flushing the denormals, if there is a failure. - if( containsDenormals ) - { - // If implementation decide to flush subnormals to zero, - // max error needs to be adjusted - maxErr1 += 4 * FLT_MIN; - maxErr2 += 4 * FLT_MIN; - maxErr3 += 4 * FLT_MIN; - maxErr4 += 4 * FLT_MIN; - - maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo, - xOffsetValues[ j ], 0.0f, 0.0f, norm_offset_x, 0.0f, 0.0f, - imageSampler, expected, 0, NULL, lod ); - - err1 = ABS_ERROR(resultPtr[0], expected[0]); - err2 = ABS_ERROR(resultPtr[1], expected[1]); - err3 = ABS_ERROR(resultPtr[2], expected[2]); - err4 = ABS_ERROR(resultPtr[3], expected[3]); - } - } - - // If the final result DOES match, then we've found a valid result and we're done with this pixel. - found_pixel = (err1 <= maxErr1) && (err2 <= maxErr2) && (err3 <= maxErr3) && (err4 <= maxErr4); - }//norm_offset_x - - - // Step 2: If we did not find a match, then print out debugging info. - if (!found_pixel) { - // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the - // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0. - checkOnlyOnePixel = 0; - int shouldReturn = 0; - for (float norm_offset_x = -offset; norm_offset_x <= offset && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) { - - // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0) - // E.g., test one pixel. - if (!imageSampler->normalized_coords - || !(gDeviceType & CL_DEVICE_TYPE_GPU) - || NORM_OFFSET == 0) - { - norm_offset_x = 0.0f; - checkOnlyOnePixel = 1; - } - - int containsDenormals = 0; - FloatPixel maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo, - xOffsetValues[ j ], 0.0f, 0.0f, norm_offset_x, 0.0f, 0.0f, - imageSampler, expected, 0, &containsDenormals, lod ); - - float err1 = - ABS_ERROR(resultPtr[0], expected[0]); - float err2 = - ABS_ERROR(resultPtr[1], expected[1]); - float err3 = - ABS_ERROR(resultPtr[2], expected[2]); - float err4 = - ABS_ERROR(resultPtr[3], expected[3]); - float maxErr1 = - std::max(maxErr * maxPixel.p[0], FLT_MIN); - float maxErr2 = - std::max(maxErr * maxPixel.p[1], FLT_MIN); - float maxErr3 = - std::max(maxErr * maxPixel.p[2], FLT_MIN); - float maxErr4 = - std::max(maxErr * maxPixel.p[3], FLT_MIN); - - - if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2) || - ! (err3 <= maxErr3) || ! (err4 <= maxErr4) ) - { - //try flushing the denormals, if there is a failure. - if( containsDenormals ) - { - maxErr1 += 4 * FLT_MIN; - maxErr2 += 4 * FLT_MIN; - maxErr3 += 4 * FLT_MIN; - maxErr4 += 4 * FLT_MIN; - - maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo, - xOffsetValues[ j ], 0.0f, 0.0f, norm_offset_x, 0.0f, 0.0f, - imageSampler, expected, 0, NULL, lod ); - - err1 = ABS_ERROR(resultPtr[0], - expected[0]); - err2 = ABS_ERROR(resultPtr[1], - expected[1]); - err3 = ABS_ERROR(resultPtr[2], - expected[2]); - err4 = ABS_ERROR(resultPtr[3], - expected[3]); - } - } - if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2) || - ! (err3 <= maxErr3) || ! (err4 <= maxErr4) ) - { - log_error("FAILED norm_offsets: %g:\n", norm_offset_x); - - float tempOut[4]; - shouldReturn |= determine_validation_error_1D( imagePtr, imageInfo, imageSampler, resultPtr, - expected, error, xOffsetValues[ j ], norm_offset_x, j, numTries, numClamped, true, lod ); - - log_error( "Step by step:\n" ); - FloatPixel temp = sample_image_pixel_float_offset( imagePtr, imageInfo, - xOffsetValues[ j ], 0.0f, 0.0f, norm_offset_x, 0.0f, 0.0f, - imageSampler, tempOut, 1 /* verbose */, &containsDenormals /*dont flush while error reporting*/, lod ); - log_error( "\tulps: %2.2f, %2.2f, %2.2f, %2.2f (max allowed: %2.2f)\n\n", - Ulp_Error( resultPtr[0], expected[0] ), - Ulp_Error( resultPtr[1], expected[1] ), - Ulp_Error( resultPtr[2], expected[2] ), - Ulp_Error( resultPtr[3], expected[3] ), - Ulp_Error( MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) + maxErr, MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) ) ); - - } else { - log_error("Test error: we should have detected this passing above.\n"); - } - - }//norm_offset_x - if( shouldReturn ) - return 1; - } // if (!found_pixel) - - resultPtr += 4; - } - } - } - /* - * UINT output type - */ - else if( outputType == kUInt ) - { - // Validate unsigned integer results - unsigned int *resultPtr = (unsigned int *)(char *)resultValues; - unsigned int expected[4]; - float error; - for( size_t x = 0, j = 0; x < width_lod; x++, j++ ) - { - // Step 1: go through and see if the results verify for the pixel - // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the - // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0. - int checkOnlyOnePixel = 0; - int found_pixel = 0; - for (float norm_offset_x = -NORM_OFFSET; norm_offset_x <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) { - - // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0) - // E.g., test one pixel. - if (!imageSampler->normalized_coords - || !(gDeviceType & CL_DEVICE_TYPE_GPU) - || NORM_OFFSET == 0) - { - norm_offset_x = 0.0f; - checkOnlyOnePixel = 1; - } - - if ( gTestMipmaps ) - sample_image_pixel_offset( imagePtr, imageInfo, - xOffsetValues[ j ], 0.0f, 0.0f, norm_offset_x, 0.0f, 0.0f, - imageSampler, expected, lod ); - else - sample_image_pixel_offset( imagePtr, imageInfo, - xOffsetValues[ j ], 0.0f, 0.0f, norm_offset_x, 0.0f, 0.0f, - imageSampler, expected ); - - error = errMax( errMax( abs_diff_uint(expected[ 0 ], resultPtr[ 0 ]), abs_diff_uint(expected[ 1 ], resultPtr[ 1 ]) ), - errMax( abs_diff_uint(expected[ 2 ], resultPtr[ 2 ]), abs_diff_uint(expected[ 3 ], resultPtr[ 3 ]) ) ); - - if (error <= MAX_ERR) - found_pixel = 1; - }//norm_offset_x - - // Step 2: If we did not find a match, then print out debugging info. - if (!found_pixel) { - // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the - // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0. - checkOnlyOnePixel = 0; - int shouldReturn = 0; - for (float norm_offset_x = -NORM_OFFSET; norm_offset_x <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) { - - // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0) - // E.g., test one pixel. - if (!imageSampler->normalized_coords - || !(gDeviceType & CL_DEVICE_TYPE_GPU) - || NORM_OFFSET == 0) - { - norm_offset_x = 0.0f; - checkOnlyOnePixel = 1; - } - - if ( gTestMipmaps ) - sample_image_pixel_offset( imagePtr, imageInfo, - xOffsetValues[ j ], 0.0f, 0.0f, norm_offset_x, 0.0f, 0.0f, - imageSampler, expected, lod ); - else - sample_image_pixel_offset( imagePtr, imageInfo, - xOffsetValues[ j ], 0.0f, 0.0f, norm_offset_x, 0.0f, 0.0f, - imageSampler, expected ); - - - - error = errMax( errMax( abs_diff_uint(expected[ 0 ], resultPtr[ 0 ]), abs_diff_uint(expected[ 1 ], resultPtr[ 1 ]) ), - errMax( abs_diff_uint(expected[ 2 ], resultPtr[ 2 ]), abs_diff_uint(expected[ 3 ], resultPtr[ 3 ]) ) ); - - if( error > MAX_ERR ) - { - log_error("FAILED norm_offsets: %g:\n", norm_offset_x); - - shouldReturn |= determine_validation_error_1D( imagePtr, imageInfo, imageSampler, resultPtr, - expected, error, xOffsetValues[j], norm_offset_x, j, numTries, numClamped, false, lod ); - } else { - log_error("Test error: we should have detected this passing above.\n"); - } - }//norm_offset_x - if( shouldReturn ) - return 1; - } // if (!found_pixel) - - resultPtr += 4; - } - } - /* - * INT output type - */ - else - { - // Validate integer results - int *resultPtr = (int *)(char *)resultValues; - int expected[4]; - float error; - for( size_t x = 0, j = 0; x < width_lod; x++, j++ ) - { - // Step 1: go through and see if the results verify for the pixel - // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the - // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0. - int checkOnlyOnePixel = 0; - int found_pixel = 0; - for (float norm_offset_x = -NORM_OFFSET; norm_offset_x <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) { - - // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0) - // E.g., test one pixel. - if (!imageSampler->normalized_coords - || !(gDeviceType & CL_DEVICE_TYPE_GPU) - || NORM_OFFSET == 0) - { - norm_offset_x = 0.0f; - checkOnlyOnePixel = 1; - } - - if ( gTestMipmaps ) - sample_image_pixel_offset( imagePtr, imageInfo, - xOffsetValues[ j ], 0.0f, 0.0f, norm_offset_x, 0.0f, 0.0f, - imageSampler, expected, lod); - else - sample_image_pixel_offset( imagePtr, imageInfo, - xOffsetValues[ j ], 0.0f, 0.0f, norm_offset_x, 0.0f, 0.0f, - imageSampler, expected ); - - error = errMax( errMax( abs_diff_int(expected[ 0 ], resultPtr[ 0 ]), abs_diff_int(expected[ 1 ], resultPtr[ 1 ]) ), - errMax( abs_diff_int(expected[ 2 ], resultPtr[ 2 ]), abs_diff_int(expected[ 3 ], resultPtr[ 3 ]) ) ); - - if (error <= MAX_ERR) - found_pixel = 1; - }//norm_offset_x - - // Step 2: If we did not find a match, then print out debugging info. - if (!found_pixel) { - // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the - // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0. - checkOnlyOnePixel = 0; - int shouldReturn = 0; - for (float norm_offset_x = -NORM_OFFSET; norm_offset_x <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) { - - // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0) - // E.g., test one pixel. - if (!imageSampler->normalized_coords - || !(gDeviceType & CL_DEVICE_TYPE_GPU) - || NORM_OFFSET == 0) - { - norm_offset_x = 0.0f; - checkOnlyOnePixel = 1; - } - - if ( gTestMipmaps ) - sample_image_pixel_offset( imagePtr, imageInfo, - xOffsetValues[ j ], 0.0f, 0.0f, norm_offset_x, 0.0f, 0.0f, - imageSampler, expected, lod); - else - sample_image_pixel_offset( imagePtr, imageInfo, - xOffsetValues[ j ], 0.0f, 0.0f, norm_offset_x, 0.0f, 0.0f, - imageSampler, expected ); - - - error = errMax( errMax( abs_diff_int(expected[ 0 ], resultPtr[ 0 ]), abs_diff_int(expected[ 1 ], resultPtr[ 1 ]) ), - errMax( abs_diff_int(expected[ 2 ], resultPtr[ 2 ]), abs_diff_int(expected[ 3 ], resultPtr[ 3 ]) ) ); - - if( error > MAX_ERR ) - { - log_error("FAILED norm_offsets: %g:\n", norm_offset_x); - - shouldReturn |= determine_validation_error_1D( imagePtr, imageInfo, imageSampler, resultPtr, - expected, error, xOffsetValues[j], norm_offset_x, j, numTries, numClamped, false, lod ); - } else { - log_error("Test error: we should have detected this passing above.\n"); - } - }//norm_offset_x - if( shouldReturn ) - return 1; - } // if (!found_pixel) - - resultPtr += 4; - } - } - } - { - nextLevelOffset += width_lod * get_pixel_size(imageInfo->format); - width_lod = (width_lod >> 1) ? (width_lod >> 1) : 1; - } - } - - return numTries != MAX_TRIES || numClamped != MAX_CLAMPED; -} - int test_read_image_set_1D(cl_device_id device, cl_context context, cl_command_queue queue, const cl_image_format *format, @@ -1111,7 +152,9 @@ int test_read_image_set_1D(cl_device_id device, cl_context context, if( gDebugTrace ) log_info( " at size %d\n", (int)imageInfo.width ); - int retCode = test_read_image_1D( context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed ); + int retCode = + test_read_image(context, queue, kernel, &imageInfo, + imageSampler, floatCoords, outputType, seed); if( retCode ) return retCode; } @@ -1133,7 +176,9 @@ int test_read_image_set_1D(cl_device_id device, cl_context context, imageInfo.num_mip_levels = (size_t)random_in_range(2, (compute_max_mip_levels(imageInfo.width, 0, 0)-1), seed); if( gDebugTrace ) log_info( " at max size %d\n", (int)sizes[ idx ][ 0 ] ); - int retCode = test_read_image_1D( context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed ); + int retCode = + test_read_image(context, queue, kernel, &imageInfo, + imageSampler, floatCoords, outputType, seed); if( retCode ) return retCode; } @@ -1155,7 +200,9 @@ int test_read_image_set_1D(cl_device_id device, cl_context context, log_info(" at size %d, starting round ramp at %" PRIu64 " for range %" PRIu64 "\n", (int)imageInfo.width, gRoundingStartValue, typeRange); - int retCode = test_read_image_1D( context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed ); + int retCode = + test_read_image(context, queue, kernel, &imageInfo, + imageSampler, floatCoords, outputType, seed); if( retCode ) return retCode; @@ -1194,7 +241,9 @@ int test_read_image_set_1D(cl_device_id device, cl_context context, if( gDebugTrace ) log_info( " at size %d (row pitch %d) out of %d\n", (int)imageInfo.width, (int)imageInfo.rowPitch, (int)maxWidth ); - int retCode = test_read_image_1D( context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed ); + int retCode = + test_read_image(context, queue, kernel, &imageInfo, + imageSampler, floatCoords, outputType, seed); if( retCode ) return retCode; } diff --git a/test_conformance/images/kernel_read_write/test_read_1D_array.cpp b/test_conformance/images/kernel_read_write/test_read_1D_array.cpp index d55d1b09b7..4bdb3a9cbe 100644 --- a/test_conformance/images/kernel_read_write/test_read_1D_array.cpp +++ b/test_conformance/images/kernel_read_write/test_read_1D_array.cpp @@ -65,1064 +65,6 @@ const char *floatKernelSource1DArray = static const char *samplerKernelArg = " sampler_t imageSampler,"; -template int determine_validation_error_1D_arr( void *imagePtr, image_descriptor *imageInfo, image_sampler_data *imageSampler, - T *resultPtr, T * expected, float error, - float x, float y, float xAddressOffset, float yAddressOffset, size_t j, int &numTries, int &numClamped, bool printAsFloat, int lod ) -{ - int actualX, actualY; - int found = debug_find_pixel_in_image( imagePtr, imageInfo, resultPtr, &actualX, &actualY, NULL, lod ); - bool clampingErr = false, clamped = false, otherClampingBug = false; - int clampedX, clampedY, ignoreMe; - - // FIXME: I do not believe this is correct for 1D or 2D image arrays; - // it will report spurious validation failure reasons since - // the clamping for such image objects is different than 1D-3D - // image objects. - clamped = get_integer_coords_offset( x, y, 0.0f, xAddressOffset, yAddressOffset, 0.0f, imageInfo->width, imageInfo->arraySize, 0, imageSampler, imageInfo, clampedX, clampedY, ignoreMe ); - - if( found ) - { - // Is it a clamping bug? - if( clamped && clampedX == actualX && clampedY == actualY ) - { - if( (--numClamped) == 0 ) - { - log_error( "ERROR: TEST FAILED: Read is erroneously clamping coordinates for image size %ld x %ld!\n", imageInfo->width, imageInfo->arraySize ); - if( printAsFloat ) - { - log_error( "Sample %d: coord {%f(%a), %f(%a)} did not validate!\n\tExpected (%g,%g,%g,%g),\n\tgot (%g,%g,%g,%g),\n\terror of %g\n", - (int)j, x, x, y, y, (float)expected[ 0 ], (float)expected[ 1 ], (float)expected[ 2 ], (float)expected[ 3 ], - (float)resultPtr[ 0 ], (float)resultPtr[ 1 ], (float)resultPtr[ 2 ], (float)resultPtr[ 3 ], error ); - } - else - { - log_error( "Sample %d: coord {%f(%a), %f(%a)} did not validate!\n\tExpected (%x,%x,%x,%x),\n\tgot (%x,%x,%x,%x)\n", - (int)j, x, x, y, y, (int)expected[ 0 ], (int)expected[ 1 ], (int)expected[ 2 ], (int)expected[ 3 ], - (int)resultPtr[ 0 ], (int)resultPtr[ 1 ], (int)resultPtr[ 2 ], (int)resultPtr[ 3 ] ); - } - return 1; - } - clampingErr = true; - otherClampingBug = true; - } - } - if( clamped && !otherClampingBug ) - { - // If we are in clamp-to-edge mode and we're getting zeroes, it's possible we're getting border erroneously - if( resultPtr[ 0 ] == 0 && resultPtr[ 1 ] == 0 && resultPtr[ 2 ] == 0 && resultPtr[ 3 ] == 0 ) - { - if( (--numClamped) == 0 ) - { - log_error( "ERROR: TEST FAILED: Clamping is erroneously returning border color for image size %ld x %ld!\n", imageInfo->width, imageInfo->arraySize ); - if( printAsFloat ) - { - log_error( "Sample %d: coord {%f(%a), %f(%a)} did not validate!\n\tExpected (%g,%g,%g,%g),\n\tgot (%g,%g,%g,%g),\n\terror of %g\n", - (int)j, x, x, y, y, (float)expected[ 0 ], (float)expected[ 1 ], (float)expected[ 2 ], (float)expected[ 3 ], - (float)resultPtr[ 0 ], (float)resultPtr[ 1 ], (float)resultPtr[ 2 ], (float)resultPtr[ 3 ], error ); - } - else - { - log_error( "Sample %d: coord {%f(%a), %f(%a)} did not validate!\n\tExpected (%x,%x,%x,%x),\n\tgot (%x,%x,%x,%x)\n", - (int)j, x, x, y, y, (int)expected[ 0 ], (int)expected[ 1 ], (int)expected[ 2 ], (int)expected[ 3 ], - (int)resultPtr[ 0 ], (int)resultPtr[ 1 ], (int)resultPtr[ 2 ], (int)resultPtr[ 3 ] ); - } - return 1; - } - clampingErr = true; - } - } - if( !clampingErr ) - { - if( printAsFloat ) - { - log_error( "Sample %d: coord {%f(%a), %f(%a)} did not validate!\n\tExpected (%g,%g,%g,%g),\n\tgot (%g,%g,%g,%g), error of %g\n", - (int)j, x, x, y, y, (float)expected[ 0 ], (float)expected[ 1 ], (float)expected[ 2 ], (float)expected[ 3 ], - (float)resultPtr[ 0 ], (float)resultPtr[ 1 ], (float)resultPtr[ 2 ], (float)resultPtr[ 3 ], error ); - } - else - { - log_error( "Sample %d: coord {%f(%a), %f(%a)} did not validate!\n\tExpected (%x,%x,%x,%x),\n\tgot (%x,%x,%x,%x)\n", - (int)j, x, x, y, y, (int)expected[ 0 ], (int)expected[ 1 ], (int)expected[ 2 ], (int)expected[ 3 ], - (int)resultPtr[ 0 ], (int)resultPtr[ 1 ], (int)resultPtr[ 2 ], (int)resultPtr[ 3 ] ); - } - log_error( "img size %ld,%ld (pitch %ld)", imageInfo->width, imageInfo->arraySize, imageInfo->rowPitch ); - if( clamped ) - { - log_error( " which would clamp to %d,%d\n", clampedX, clampedY ); - } - if( printAsFloat && gExtraValidateInfo) - { - log_error( "Nearby values:\n" ); - log_error( "\t%d\t%d\t%d\t%d\n", clampedX - 2, clampedX - 1, clampedX, clampedX + 1 ); - for( int yOff = -2; yOff <= 1; yOff++ ) - { - float top[ 4 ], real[ 4 ], bot[ 4 ], bot2[ 4 ]; - read_image_pixel_float( imagePtr, imageInfo, clampedX - 2 , clampedY + yOff, 0, top ); - read_image_pixel_float( imagePtr, imageInfo, clampedX - 1 ,clampedY + yOff, 0, real ); - read_image_pixel_float( imagePtr, imageInfo, clampedX, clampedY + yOff, 0, bot ); - read_image_pixel_float( imagePtr, imageInfo, clampedX + 1, clampedY + yOff, 0, bot2 ); - log_error( "%d\t(%g,%g,%g,%g)",clampedY + yOff, top[0], top[1], top[2], top[3] ); - log_error( " (%g,%g,%g,%g)", real[0], real[1], real[2], real[3] ); - log_error( " (%g,%g,%g,%g)",bot[0], bot[1], bot[2], bot[3] ); - log_error( " (%g,%g,%g,%g)\n",bot2[0], bot2[1], bot2[2], bot2[3] ); - } - - if( clampedY < 1 ) - { - log_error( "Nearby values:\n" ); - log_error( "\t%d\t%d\t%d\t%d\n", clampedX - 2, clampedX - 1, clampedX, clampedX + 1 ); - for( int yOff = (int)imageInfo->arraySize - 2; yOff <= (int)imageInfo->arraySize + 1; yOff++ ) - { - float top[ 4 ], real[ 4 ], bot[ 4 ], bot2[ 4 ]; - read_image_pixel_float( imagePtr, imageInfo, clampedX - 2 , clampedY + yOff, 0, top ); - read_image_pixel_float( imagePtr, imageInfo, clampedX - 1 ,clampedY + yOff, 0, real ); - read_image_pixel_float( imagePtr, imageInfo, clampedX, clampedY + yOff, 0, bot ); - read_image_pixel_float( imagePtr, imageInfo, clampedX + 1, clampedY + yOff, 0, bot2 ); - log_error( "%d\t(%g,%g,%g,%g)",clampedY + yOff, top[0], top[1], top[2], top[3] ); - log_error( " (%g,%g,%g,%g)", real[0], real[1], real[2], real[3] ); - log_error( " (%g,%g,%g,%g)",bot[0], bot[1], bot[2], bot[3] ); - log_error( " (%g,%g,%g,%g)\n",bot2[0], bot2[1], bot2[2], bot2[3] ); - } - } - } - - if( imageSampler->filter_mode != CL_FILTER_LINEAR ) - { - if( found ) - log_error( "\tValue really found in image at %d,%d (%s)\n", actualX, actualY, ( found > 1 ) ? "NOT unique!!" : "unique" ); - else - log_error( "\tValue not actually found in image\n" ); - } - log_error( "\n" ); - - numClamped = -1; // We force the clamped counter to never work - if( ( --numTries ) == 0 ) - { - return 1; - } - } - return 0; -} - -static void InitFloatCoords( image_descriptor *imageInfo, image_sampler_data *imageSampler, float *xOffsets, float *yOffsets, float xfract, float yfract, int normalized_coords, MTdata d , int lod) -{ - size_t i = 0; - size_t width_lod = imageInfo->width; - - if(gTestMipmaps) - width_lod = (imageInfo->width >> lod) ? (imageInfo->width >> lod) : 1; - - if( gDisableOffsets ) - { - for( size_t y = 0; y < imageInfo->arraySize; y++ ) - { - for( size_t x = 0; x < width_lod; x++, i++ ) - { - xOffsets[ i ] = (float) (xfract + (double) x); - yOffsets[ i ] = (float) (yfract + (double) y); - } - } - } - else - { - for( size_t y = 0; y < imageInfo->arraySize; y++ ) - { - for( size_t x = 0; x < width_lod; x++, i++ ) - { - xOffsets[ i ] = (float) (xfract + (double) ((int) x + random_in_range( -10, 10, d ))); - yOffsets[ i ] = (float) (yfract + (double) ((int) y + random_in_range( -10, 10, d ))); - } - } - } - - if( imageSampler->addressing_mode == CL_ADDRESS_NONE ) - { - i = 0; - for( size_t y = 0; y < imageInfo->arraySize; y++ ) - { - for( size_t x = 0; x < width_lod; x++, i++ ) - { - xOffsets[ i ] = (float) CLAMP( (double) xOffsets[ i ], 0.0, (double)width_lod - 1.0); - yOffsets[ i ] = (float) CLAMP( (double) yOffsets[ i ], 0.0, (double)imageInfo->arraySize - 1.0); - } - } - } - - if( normalized_coords ) - { - i = 0; - for( size_t y = 0; y < imageInfo->arraySize; y++ ) - { - for( size_t x = 0; x < width_lod; x++, i++ ) - { - xOffsets[ i ] = (float) ((double) xOffsets[ i ] / (double) width_lod); - } - } - } -} - -int test_read_image_1D_array( cl_context context, cl_command_queue queue, cl_kernel kernel, - image_descriptor *imageInfo, image_sampler_data *imageSampler, - bool useFloatCoords, ExplicitType outputType, MTdata d ) -{ - int error; - static int initHalf = 0; - - size_t threads[2]; - cl_mem_flags image_read_write_flags = CL_MEM_READ_ONLY; - clMemWrapper xOffsets, yOffsets, results; - clSamplerWrapper actualSampler; - BufferOwningPtr maxImageUseHostPtrBackingStore; - - // The DataBuffer template class really does use delete[], not free -- IRO - BufferOwningPtr xOffsetValues(malloc(sizeof(cl_float) * imageInfo->width * imageInfo->arraySize)); - BufferOwningPtr yOffsetValues(malloc(sizeof(cl_float) * imageInfo->width * imageInfo->arraySize)); - - if( imageInfo->format->image_channel_data_type == CL_HALF_FLOAT ) - if( DetectFloatToHalfRoundingMode(queue) ) - return 1; - - // generate_random_image_data allocates with malloc, so we use a MallocDataBuffer here - BufferOwningPtr imageValues; - generate_random_image_data( imageInfo, imageValues, d ); - - if( gDebugTrace ) - { - log_info( " - Creating 1D image array %d by %d...\n", (int)imageInfo->width, (int)imageInfo->arraySize ); - if(gTestMipmaps) - log_info(" - and %d mip levels\n", (int)imageInfo->num_mip_levels); - } - - // Construct testing sources - clProtectedImage protImage; - clMemWrapper unprotImage; - cl_mem image; - - if(gtestTypesToRun & kReadTests) - { - image_read_write_flags = CL_MEM_READ_ONLY; - } - else - { - image_read_write_flags = CL_MEM_READ_WRITE; - } - - if( gMemFlagsToUse == CL_MEM_USE_HOST_PTR ) - { - // clProtectedImage uses USE_HOST_PTR, so just rely on that for the testing (via Ian) - // Do not use protected images for max image size test since it rounds the row size to a page size - if (gTestMaxImages) { - generate_random_image_data( imageInfo, maxImageUseHostPtrBackingStore, d ); - - unprotImage = create_image_1d_array(context, - image_read_write_flags | CL_MEM_USE_HOST_PTR, - imageInfo->format, - imageInfo->width, imageInfo->arraySize, - ( gEnablePitch ? imageInfo->rowPitch : 0 ), - ( gEnablePitch ? imageInfo->slicePitch : 0), - maxImageUseHostPtrBackingStore, &error); - } else { - error = protImage.Create( context, CL_MEM_OBJECT_IMAGE1D_ARRAY, - image_read_write_flags, - imageInfo->format, - imageInfo->width, 1, 1, imageInfo->arraySize ); - } - if( error != CL_SUCCESS ) - { - log_error( "ERROR: Unable to create 1D image array of size %d x %d pitch %d (%s)\n", - (int)imageInfo->width, (int)imageInfo->arraySize, - (int)imageInfo->rowPitch, IGetErrorString( error ) ); - return error; - } - - if (gTestMaxImages) - image = (cl_mem)unprotImage; - else - image = (cl_mem)protImage; - } - else if( gMemFlagsToUse == CL_MEM_COPY_HOST_PTR ) - { - // Don't use clEnqueueWriteImage; just use copy host ptr to get the data in - unprotImage = create_image_1d_array(context, - image_read_write_flags | CL_MEM_COPY_HOST_PTR, - imageInfo->format, - imageInfo->width, imageInfo->arraySize, - ( gEnablePitch ? imageInfo->rowPitch : 0 ), - ( gEnablePitch ? imageInfo->slicePitch : 0), - imageValues, &error); - - if( error != CL_SUCCESS ) - { - log_error( "ERROR: Unable to create 1D image array of size %d x %d pitch %d (%s)\n", - (int)imageInfo->width, (int)imageInfo->arraySize, - (int)imageInfo->rowPitch, IGetErrorString( error ) ); - return error; - } - image = unprotImage; - } - else // Either CL_MEM_ALLOC_HOST_PTR or none - { - // Note: if ALLOC_HOST_PTR is used, the driver allocates memory that can be accessed by the host, but otherwise - // it works just as if no flag is specified, so we just do the same thing either way - if(gTestMipmaps) - { - cl_image_desc image_desc = {0}; - image_desc.image_type = CL_MEM_OBJECT_IMAGE1D_ARRAY; - image_desc.image_width = imageInfo->width; - image_desc.image_array_size = imageInfo->arraySize; - image_desc.num_mip_levels = imageInfo->num_mip_levels; - - unprotImage = clCreateImage( context, - image_read_write_flags, - imageInfo->format, &image_desc, NULL, &error); - if( error != CL_SUCCESS ) - { - log_error( "ERROR: Unable to create %d level mipmapped 1D image array of size %d x %d (pitch %d, %d ) (%s)",(int)imageInfo->num_mip_levels, (int)imageInfo->width, (int)imageInfo->arraySize, (int)imageInfo->rowPitch, (int)imageInfo->slicePitch, IGetErrorString( error ) ); - return error; - } - } - else - { - unprotImage = create_image_1d_array(context, - image_read_write_flags | gMemFlagsToUse, - imageInfo->format, - imageInfo->width, imageInfo->arraySize, - ( gEnablePitch ? imageInfo->rowPitch : 0 ), - ( gEnablePitch ? imageInfo->slicePitch : 0), - imageValues, &error); - - if( error != CL_SUCCESS ) - { - log_error( "ERROR: Unable to create 1D image array of size %d x %d pitch %d (%s)\n", - (int)imageInfo->width, (int)imageInfo->arraySize, - (int)imageInfo->rowPitch, IGetErrorString( error ) ); - return error; - } - } - image = unprotImage; - } - - if( gMemFlagsToUse != CL_MEM_COPY_HOST_PTR ) - { - if( gDebugTrace ) - log_info( " - Writing image...\n" ); - - size_t origin[ 3 ] = { 0, 0, 0 }; - size_t region[ 3 ] = { imageInfo->width, imageInfo->arraySize, 1 }; - - if(gTestMipmaps) - { - int nextLevelOffset = 0; - - for (int i =0; i < imageInfo->num_mip_levels; i++) - { origin[2] = i; - error = clEnqueueWriteImage(queue, image, CL_TRUE, - origin, region, /*gEnablePitch ? imageInfo->rowPitch :*/ 0, /*gEnablePitch ? imageInfo->slicePitch :*/ 0, - ((char*)imageValues + nextLevelOffset), 0, NULL, NULL); - if (error != CL_SUCCESS) - { - log_error( "ERROR: Unable to write to %d level mipmapped 3D image of size %d x %d x %d\n", (int)imageInfo->num_mip_levels,(int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->depth ); - return error; - } - nextLevelOffset += region[0]*region[1]*get_pixel_size(imageInfo->format); - //Subsequent mip level dimensions keep halving - region[0] = region[0] >> 1 ? region[0] >> 1 : 1; - } - } - else - { - error = clEnqueueWriteImage(queue, image, CL_TRUE, - origin, region, ( gEnablePitch ? imageInfo->rowPitch : 0 ), 0, - imageValues, 0, NULL, NULL); - if (error != CL_SUCCESS) - { - log_error( "ERROR: Unable to write to %d level 1D image array of size %d x %d\n", - (int)imageInfo->num_mip_levels, - (int)imageInfo->width, (int)imageInfo->arraySize ); - return error; - } - } - } - - if( gDebugTrace ) - log_info( " - Creating kernel arguments...\n" ); - - xOffsets = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, - sizeof(cl_float) * imageInfo->width - * imageInfo->arraySize, - xOffsetValues, &error); - test_error( error, "Unable to create x offset buffer" ); - - yOffsets = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, - sizeof(cl_float) * imageInfo->width - * imageInfo->arraySize, - yOffsetValues, &error); - test_error( error, "Unable to create y offset buffer" ); - - results = clCreateBuffer(context, CL_MEM_READ_WRITE, - get_explicit_type_size(outputType) * 4 - * imageInfo->width * imageInfo->arraySize, - NULL, &error); - test_error( error, "Unable to create result buffer" ); - - // Create sampler to use - actualSampler = create_sampler(context, imageSampler, gTestMipmaps, &error); - test_error(error, "Unable to create image sampler"); - - // Set arguments - int idx = 0; - error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &image ); - test_error( error, "Unable to set kernel arguments" ); - if( !gUseKernelSamplers ) - { - error = clSetKernelArg( kernel, idx++, sizeof( cl_sampler ), &actualSampler ); - test_error( error, "Unable to set kernel arguments" ); - } - error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &xOffsets ); - test_error( error, "Unable to set kernel arguments" ); - error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &yOffsets ); - test_error( error, "Unable to set kernel arguments" ); - error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &results ); - test_error( error, "Unable to set kernel arguments" ); - - // A cast of troublesome offsets. The first one has to be zero. - const float float_offsets[] = { 0.0f, MAKE_HEX_FLOAT(0x1.0p-30f, 0x1L, -30), 0.25f, 0.3f, 0.5f - FLT_EPSILON/4.0f, 0.5f, 0.9f, 1.0f - FLT_EPSILON/2 }; - int float_offset_count = sizeof( float_offsets) / sizeof( float_offsets[0] ); - int numTries = MAX_TRIES, numClamped = MAX_CLAMPED; - int loopCount = 2 * float_offset_count; - if( ! useFloatCoords ) - loopCount = 1; - if (gTestMaxImages) { - loopCount = 1; - log_info("Testing each size only once with pixel offsets of %g for max sized images.\n", float_offsets[0]); - } - - // Get the maximum absolute error for this format - if(gtestTypesToRun & kReadWriteTests) - { - loopCount = 1; - } - - // Get the maximum absolute error for this format - double formatAbsoluteError = get_max_absolute_error(imageInfo->format, imageSampler); - if (gDebugTrace) log_info("\tformatAbsoluteError is %e\n", formatAbsoluteError); - - if (0 == initHalf && imageInfo->format->image_channel_data_type == CL_HALF_FLOAT ) { - initHalf = CL_SUCCESS == DetectFloatToHalfRoundingMode( queue ); - if (initHalf) { - log_info("Half rounding mode successfully detected.\n"); - } - } - - size_t width_lod = imageInfo->width; - size_t nextLevelOffset = 0; - char * imagePtr; - for(int lod = 0; (gTestMipmaps && lod < imageInfo->num_mip_levels) || (!gTestMipmaps && lod < 1); lod++) - { - size_t resultValuesSize = width_lod * imageInfo->arraySize * get_explicit_type_size( outputType ) * 4; - BufferOwningPtr resultValues(malloc(resultValuesSize)); - float lod_float = (float)lod; - if (gTestMipmaps) { - //Set the lod kernel arg - if(gDebugTrace) - log_info(" - Working at mip level %d\n", lod); - error = clSetKernelArg( kernel, idx, sizeof( float ), &lod_float); - test_error( error, "Unable to set kernel arguments" ); - } - - for( int q = 0; q < loopCount; q++ ) - { - float offset = float_offsets[ q % float_offset_count ]; - - // Init the coordinates - InitFloatCoords(imageInfo, imageSampler, xOffsetValues, yOffsetValues, - q>=float_offset_count ? -offset: offset, - q>=float_offset_count ? offset: -offset, imageSampler->normalized_coords, d, lod ); - - error = clEnqueueWriteBuffer( queue, xOffsets, CL_TRUE, 0, sizeof(cl_float) * imageInfo->arraySize * imageInfo->width, xOffsetValues, 0, NULL, NULL ); - test_error( error, "Unable to write x offsets" ); - error = clEnqueueWriteBuffer( queue, yOffsets, CL_TRUE, 0, sizeof(cl_float) * imageInfo->arraySize * imageInfo->width, yOffsetValues, 0, NULL, NULL ); - test_error( error, "Unable to write y offsets" ); - - // Get results - memset( resultValues, 0xff, resultValuesSize ); - clEnqueueWriteBuffer( queue, results, CL_TRUE, 0, resultValuesSize, resultValues, 0, NULL, NULL ); - - // Run the kernel - threads[0] = (size_t)width_lod; - threads[1] = (size_t)imageInfo->arraySize; - error = clEnqueueNDRangeKernel( queue, kernel, 2, NULL, threads, NULL, 0, NULL, NULL ); - test_error( error, "Unable to run kernel" ); - - if( gDebugTrace ) - log_info( " reading results, %ld kbytes\n", (unsigned long)( width_lod * imageInfo->arraySize * get_explicit_type_size( outputType ) * 4 / 1024 ) ); - - error = clEnqueueReadBuffer( queue, results, CL_TRUE, 0, width_lod * imageInfo->arraySize * get_explicit_type_size( outputType ) * 4, resultValues, 0, NULL, NULL ); - test_error( error, "Unable to read results from kernel" ); - if( gDebugTrace ) - log_info( " results read\n" ); - - // Validate results element by element - imagePtr = (char*)imageValues + nextLevelOffset; - /* - * FLOAT output type, order= sRGB - */ - if(is_sRGBA_order(imageInfo->format->image_channel_order) && ( outputType == kFloat )) - { - // Validate float results - float *resultPtr = (float *)(char *)resultValues; - float expected[4], error=0.0f; - float maxErr = get_max_relative_error( imageInfo->format, imageSampler, 0 /*not 3D*/, CL_FILTER_LINEAR == imageSampler->filter_mode ); - for( size_t y = 0, j = 0; y < imageInfo->arraySize; y++ ) - { - for( size_t x = 0; x < width_lod; x++, j++ ) - { - // Step 1: go through and see if the results verify for the pixel - // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the - // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0. - int checkOnlyOnePixel = 0; - int found_pixel = 0; - float offset = NORM_OFFSET; - if (!imageSampler->normalized_coords - || imageSampler->filter_mode != CL_FILTER_NEAREST - || NORM_OFFSET == 0 -#if defined( __APPLE__ ) - // Apple requires its CPU implementation to do correctly - // rounded address arithmetic in all modes - || !(gDeviceType & CL_DEVICE_TYPE_GPU) -#endif - ) - offset = 0.0f; // Loop only once - - for (float norm_offset_x = -offset; norm_offset_x <= offset && !found_pixel; norm_offset_x += NORM_OFFSET) { - for (float norm_offset_y = -offset; norm_offset_y <= offset && !found_pixel; norm_offset_y += NORM_OFFSET) { - - - // Try sampling the pixel, without flushing denormals. - int containsDenormals = 0; - FloatPixel maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo, - xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f, - imageSampler, expected, 0, &containsDenormals, lod ); - - float err1 = ABS_ERROR(sRGBmap(resultPtr[0]), - sRGBmap(expected[0])); - float err2 = ABS_ERROR(sRGBmap(resultPtr[1]), - sRGBmap(expected[1])); - float err3 = ABS_ERROR(sRGBmap(resultPtr[2]), - sRGBmap(expected[2])); - float err4 = ABS_ERROR(resultPtr[3], expected[3]); - float maxErr = 0.5; - - // Check if the result matches. - if( ! (err1 <= maxErr) || ! (err2 <= maxErr) || - ! (err3 <= maxErr) || ! (err4 <= maxErr) ) - { - //try flushing the denormals, if there is a failure. - if( containsDenormals ) - { - // If implementation decide to flush subnormals to zero, - // max error needs to be adjusted - maxErr += 4 * FLT_MIN; - - maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo, - xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f, - imageSampler, expected, 0, NULL, lod ); - - err1 = ABS_ERROR(sRGBmap(resultPtr[0]), - sRGBmap(expected[0])); - err2 = ABS_ERROR(sRGBmap(resultPtr[1]), - sRGBmap(expected[1])); - err3 = ABS_ERROR(sRGBmap(resultPtr[2]), - sRGBmap(expected[2])); - err4 = ABS_ERROR(resultPtr[3], expected[3]); - } - } - - // If the final result DOES match, then we've found a valid result and we're done with this pixel. - found_pixel = (err1 <= maxErr) && (err2 <= maxErr) && (err3 <= maxErr) && (err4 <= maxErr); - }//norm_offset_x - }//norm_offset_y - - - // Step 2: If we did not find a match, then print out debugging info. - if (!found_pixel) { - // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the - // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0. - checkOnlyOnePixel = 0; - int shouldReturn = 0; - for (float norm_offset_x = -offset; norm_offset_x <= offset && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) { - for (float norm_offset_y = -offset; norm_offset_y <= offset && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) { - - // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0) - // E.g., test one pixel. - if (!imageSampler->normalized_coords - || !(gDeviceType & CL_DEVICE_TYPE_GPU) - || NORM_OFFSET == 0) - { - norm_offset_x = 0.0f; - norm_offset_y = 0.0f; - checkOnlyOnePixel = 1; - } - - int containsDenormals = 0; - FloatPixel maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo, - xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f, - imageSampler, expected, 0, &containsDenormals, lod ); - - float err1 = ABS_ERROR(sRGBmap(resultPtr[0]), - sRGBmap(expected[0])); - float err2 = ABS_ERROR(sRGBmap(resultPtr[1]), - sRGBmap(expected[1])); - float err3 = ABS_ERROR(sRGBmap(resultPtr[2]), - sRGBmap(expected[2])); - float err4 = - ABS_ERROR(resultPtr[3], expected[3]); - - float maxErr = 0.6; - - if( ! (err1 <= maxErr) || ! (err2 <= maxErr) || - ! (err3 <= maxErr) || ! (err4 <= maxErr) ) - { - //try flushing the denormals, if there is a failure. - if( containsDenormals ) - { - // If implementation decide to flush subnormals to zero, - // max error needs to be adjusted - maxErr += 4 * FLT_MIN; - - maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo, - xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f, - imageSampler, expected, 0, NULL, lod ); - - err1 = ABS_ERROR(sRGBmap(resultPtr[0]), - sRGBmap(expected[0])); - err2 = ABS_ERROR(sRGBmap(resultPtr[1]), - sRGBmap(expected[1])); - err3 = ABS_ERROR(sRGBmap(resultPtr[2]), - sRGBmap(expected[2])); - err4 = ABS_ERROR(resultPtr[3], - expected[3]); - } - } - if( ! (err1 <= maxErr) || ! (err2 <= maxErr) || - ! (err3 <= maxErr) || ! (err4 <= maxErr) ) - { - log_error("FAILED norm_offsets: %g , %g:\n", norm_offset_x, norm_offset_y); - - float tempOut[4]; - shouldReturn |= determine_validation_error_1D_arr( imagePtr, imageInfo, imageSampler, resultPtr, - expected, error, xOffsetValues[ j ], yOffsetValues[ j ], norm_offset_x, norm_offset_y, j, numTries, numClamped, true, lod ); - - log_error( "Step by step:\n" ); - FloatPixel temp = sample_image_pixel_float_offset( imagePtr, imageInfo, - xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f, - imageSampler, tempOut, 1 /* verbose */, &containsDenormals /*dont flush while error reporting*/, lod ); - log_error( "\tulps: %2.2f, %2.2f, %2.2f, %2.2f (max allowed: %2.2f)\n\n", - Ulp_Error( resultPtr[0], expected[0] ), - Ulp_Error( resultPtr[1], expected[1] ), - Ulp_Error( resultPtr[2], expected[2] ), - Ulp_Error( resultPtr[3], expected[3] ), - Ulp_Error( MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) + maxErr, MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) ) ); - - } else { - log_error("Test error: we should have detected this passing above.\n"); - } - - }//norm_offset_x - }//norm_offset_y - if( shouldReturn ) - return 1; - } // if (!found_pixel) - - resultPtr += 4; - } - } - } - /* - * FLOAT output type - */ - else if( outputType == kFloat ) - { - // Validate float results - float *resultPtr = (float *)(char *)resultValues; - float expected[4], error=0.0f; - float maxErr = get_max_relative_error( imageInfo->format, imageSampler, 0 /*not 3D*/, CL_FILTER_LINEAR == imageSampler->filter_mode ); - for( size_t y = 0, j = 0; y < imageInfo->arraySize; y++ ) - { - for( size_t x = 0; x < width_lod; x++, j++ ) - { - // Step 1: go through and see if the results verify for the pixel - // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the - // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0. - int checkOnlyOnePixel = 0; - int found_pixel = 0; - float offset = NORM_OFFSET; - if (!imageSampler->normalized_coords - || imageSampler->filter_mode != CL_FILTER_NEAREST - || NORM_OFFSET == 0 -#if defined( __APPLE__ ) - // Apple requires its CPU implementation to do correctly - // rounded address arithmetic in all modes - || !(gDeviceType & CL_DEVICE_TYPE_GPU) -#endif - ) - offset = 0.0f; // Loop only once - - for (float norm_offset_x = -offset; norm_offset_x <= offset && !found_pixel; norm_offset_x += NORM_OFFSET) { - for (float norm_offset_y = -offset; norm_offset_y <= offset && !found_pixel; norm_offset_y += NORM_OFFSET) { - - - // Try sampling the pixel, without flushing denormals. - int containsDenormals = 0; - FloatPixel maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo, - xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f, - imageSampler, expected, 0, &containsDenormals, lod ); - - float err1 = ABS_ERROR(resultPtr[0], expected[0]); - float err2 = ABS_ERROR(resultPtr[1], expected[1]); - float err3 = ABS_ERROR(resultPtr[2], expected[2]); - float err4 = ABS_ERROR(resultPtr[3], expected[3]); - // Clamp to the minimum absolute error for the format - if (err1 > 0 && err1 < formatAbsoluteError) { err1 = 0.0f; } - if (err2 > 0 && err2 < formatAbsoluteError) { err2 = 0.0f; } - if (err3 > 0 && err3 < formatAbsoluteError) { err3 = 0.0f; } - if (err4 > 0 && err4 < formatAbsoluteError) { err4 = 0.0f; } - float maxErr1 = - std::max(maxErr * maxPixel.p[0], FLT_MIN); - float maxErr2 = - std::max(maxErr * maxPixel.p[1], FLT_MIN); - float maxErr3 = - std::max(maxErr * maxPixel.p[2], FLT_MIN); - float maxErr4 = - std::max(maxErr * maxPixel.p[3], FLT_MIN); - - // Check if the result matches. - if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2) || - ! (err3 <= maxErr3) || ! (err4 <= maxErr4) ) - { - //try flushing the denormals, if there is a failure. - if( containsDenormals ) - { - // If implementation decide to flush subnormals to zero, - // max error needs to be adjusted - maxErr1 += 4 * FLT_MIN; - maxErr2 += 4 * FLT_MIN; - maxErr3 += 4 * FLT_MIN; - maxErr4 += 4 * FLT_MIN; - - maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo, - xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f, - imageSampler, expected, 0, NULL, lod ); - - err1 = ABS_ERROR(resultPtr[0], expected[0]); - err2 = ABS_ERROR(resultPtr[1], expected[1]); - err3 = ABS_ERROR(resultPtr[2], expected[2]); - err4 = ABS_ERROR(resultPtr[3], expected[3]); - } - } - - // If the final result DOES match, then we've found a valid result and we're done with this pixel. - found_pixel = (err1 <= maxErr1) && (err2 <= maxErr2) && (err3 <= maxErr3) && (err4 <= maxErr4); - }//norm_offset_x - }//norm_offset_y - - - // Step 2: If we did not find a match, then print out debugging info. - if (!found_pixel) { - // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the - // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0. - checkOnlyOnePixel = 0; - int shouldReturn = 0; - for (float norm_offset_x = -offset; norm_offset_x <= offset && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) { - for (float norm_offset_y = -offset; norm_offset_y <= offset && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) { - - // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0) - // E.g., test one pixel. - if (!imageSampler->normalized_coords - || !(gDeviceType & CL_DEVICE_TYPE_GPU) - || NORM_OFFSET == 0) - { - norm_offset_x = 0.0f; - norm_offset_y = 0.0f; - checkOnlyOnePixel = 1; - } - - int containsDenormals = 0; - FloatPixel maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo, - xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f, - imageSampler, expected, 0, &containsDenormals, lod ); - - float err1 = - ABS_ERROR(resultPtr[0], expected[0]); - float err2 = - ABS_ERROR(resultPtr[1], expected[1]); - float err3 = - ABS_ERROR(resultPtr[2], expected[2]); - float err4 = - ABS_ERROR(resultPtr[3], expected[3]); - float maxErr1 = - std::max(maxErr * maxPixel.p[0], FLT_MIN); - float maxErr2 = - std::max(maxErr * maxPixel.p[1], FLT_MIN); - float maxErr3 = - std::max(maxErr * maxPixel.p[2], FLT_MIN); - float maxErr4 = - std::max(maxErr * maxPixel.p[3], FLT_MIN); - - - if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2) || - ! (err3 <= maxErr3) || ! (err4 <= maxErr4) ) - { - //try flushing the denormals, if there is a failure. - if( containsDenormals ) - { - maxErr1 += 4 * FLT_MIN; - maxErr2 += 4 * FLT_MIN; - maxErr3 += 4 * FLT_MIN; - maxErr4 += 4 * FLT_MIN; - - maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo, - xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f, - imageSampler, expected, 0, NULL, lod ); - - err1 = ABS_ERROR(resultPtr[0], - expected[0]); - err2 = ABS_ERROR(resultPtr[1], - expected[1]); - err3 = ABS_ERROR(resultPtr[2], - expected[2]); - err4 = ABS_ERROR(resultPtr[3], - expected[3]); - } - } - if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2) || - ! (err3 <= maxErr3) || ! (err4 <= maxErr4) ) - { - log_error("FAILED norm_offsets: %g , %g:\n", norm_offset_x, norm_offset_y); - - float tempOut[4]; - shouldReturn |= determine_validation_error_1D_arr( imagePtr, imageInfo, imageSampler, resultPtr, - expected, error, xOffsetValues[ j ], yOffsetValues[ j ], norm_offset_x, norm_offset_y, j, numTries, numClamped, true, lod ); - - log_error( "Step by step:\n" ); - FloatPixel temp = sample_image_pixel_float_offset( imagePtr, imageInfo, - xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f, - imageSampler, tempOut, 1 /* verbose */, &containsDenormals /*dont flush while error reporting*/, lod ); - log_error( "\tulps: %2.2f, %2.2f, %2.2f, %2.2f (max allowed: %2.2f)\n\n", - Ulp_Error( resultPtr[0], expected[0] ), - Ulp_Error( resultPtr[1], expected[1] ), - Ulp_Error( resultPtr[2], expected[2] ), - Ulp_Error( resultPtr[3], expected[3] ), - Ulp_Error( MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) + maxErr, MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) ) ); - - } else { - log_error("Test error: we should have detected this passing above.\n"); - } - - }//norm_offset_x - }//norm_offset_y - if( shouldReturn ) - return 1; - } // if (!found_pixel) - - resultPtr += 4; - } - } - } - /* - * UINT output type - */ - else if( outputType == kUInt ) - { - // Validate unsigned integer results - unsigned int *resultPtr = (unsigned int *)(char *)resultValues; - unsigned int expected[4]; - float error; - for( size_t y = 0, j = 0; y < imageInfo->arraySize; y++ ) - { - for( size_t x = 0; x < width_lod; x++, j++ ) - { - // Step 1: go through and see if the results verify for the pixel - // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the - // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0. - int checkOnlyOnePixel = 0; - int found_pixel = 0; - for (float norm_offset_x = -NORM_OFFSET; norm_offset_x <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) { - for (float norm_offset_y = -NORM_OFFSET; norm_offset_y <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) { - - // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0) - // E.g., test one pixel. - if (!imageSampler->normalized_coords - || !(gDeviceType & CL_DEVICE_TYPE_GPU) - || NORM_OFFSET == 0) - { - norm_offset_x = 0.0f; - norm_offset_y = 0.0f; - checkOnlyOnePixel = 1; - } - - sample_image_pixel_offset( imagePtr, imageInfo, - xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f, - imageSampler, expected, lod ); - - - error = errMax( errMax( abs_diff_uint(expected[ 0 ], resultPtr[ 0 ]), abs_diff_uint(expected[ 1 ], resultPtr[ 1 ]) ), - errMax( abs_diff_uint(expected[ 2 ], resultPtr[ 2 ]), abs_diff_uint(expected[ 3 ], resultPtr[ 3 ]) ) ); - - if (error <= MAX_ERR) - found_pixel = 1; - }//norm_offset_x - }//norm_offset_y - - // Step 2: If we did not find a match, then print out debugging info. - if (!found_pixel) { - // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the - // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0. - checkOnlyOnePixel = 0; - int shouldReturn = 0; - for (float norm_offset_x = -NORM_OFFSET; norm_offset_x <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) { - for (float norm_offset_y = -NORM_OFFSET; norm_offset_y <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) { - - // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0) - // E.g., test one pixel. - if (!imageSampler->normalized_coords - || !(gDeviceType & CL_DEVICE_TYPE_GPU) - || NORM_OFFSET == 0) - { - norm_offset_x = 0.0f; - norm_offset_y = 0.0f; - checkOnlyOnePixel = 1; - } - - sample_image_pixel_offset( imagePtr, imageInfo, - xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f, - imageSampler, expected, lod ); - - - error = errMax( errMax( abs_diff_uint(expected[ 0 ], resultPtr[ 0 ]), abs_diff_uint(expected[ 1 ], resultPtr[ 1 ]) ), - errMax( abs_diff_uint(expected[ 2 ], resultPtr[ 2 ]), abs_diff_uint(expected[ 3 ], resultPtr[ 3 ]) ) ); - - if( error > MAX_ERR ) - { - log_error("FAILED norm_offsets: %g , %g:\n", norm_offset_x, norm_offset_y); - - shouldReturn |= determine_validation_error_1D_arr( imagePtr, imageInfo, imageSampler, resultPtr, - expected, error, xOffsetValues[j], yOffsetValues[j], norm_offset_x, norm_offset_y, j, numTries, numClamped, false, lod ); - } else { - log_error("Test error: we should have detected this passing above.\n"); - } - }//norm_offset_x - }//norm_offset_y - if( shouldReturn ) - return 1; - } // if (!found_pixel) - - resultPtr += 4; - } - } - } - /* - * INT output type - */ - else - { - // Validate integer results - int *resultPtr = (int *)(char *)resultValues; - int expected[4]; - float error; - for( size_t y = 0, j = 0; y < imageInfo->arraySize; y++ ) - { - for( size_t x = 0; x < width_lod; x++, j++ ) - { - // Step 1: go through and see if the results verify for the pixel - // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the - // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0. - int checkOnlyOnePixel = 0; - int found_pixel = 0; - for (float norm_offset_x = -NORM_OFFSET; norm_offset_x <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) { - for (float norm_offset_y = -NORM_OFFSET; norm_offset_y <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) { - - // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0) - // E.g., test one pixel. - if (!imageSampler->normalized_coords - || !(gDeviceType & CL_DEVICE_TYPE_GPU) - || NORM_OFFSET == 0) - { - norm_offset_x = 0.0f; - norm_offset_y = 0.0f; - checkOnlyOnePixel = 1; - } - - sample_image_pixel_offset( imagePtr, imageInfo, - xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f, - imageSampler, expected, lod ); - - - error = errMax( errMax( abs_diff_int(expected[ 0 ], resultPtr[ 0 ]), abs_diff_int(expected[ 1 ], resultPtr[ 1 ]) ), - errMax( abs_diff_int(expected[ 2 ], resultPtr[ 2 ]), abs_diff_int(expected[ 3 ], resultPtr[ 3 ]) ) ); - - if (error <= MAX_ERR) - found_pixel = 1; - }//norm_offset_x - }//norm_offset_y - - // Step 2: If we did not find a match, then print out debugging info. - if (!found_pixel) { - // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the - // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0. - checkOnlyOnePixel = 0; - int shouldReturn = 0; - for (float norm_offset_x = -NORM_OFFSET; norm_offset_x <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) { - for (float norm_offset_y = -NORM_OFFSET; norm_offset_y <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) { - - // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0) - // E.g., test one pixel. - if (!imageSampler->normalized_coords - || !(gDeviceType & CL_DEVICE_TYPE_GPU) - || NORM_OFFSET == 0) - { - norm_offset_x = 0.0f; - norm_offset_y = 0.0f; - checkOnlyOnePixel = 1; - } - - sample_image_pixel_offset( imagePtr, imageInfo, - xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f, - imageSampler, expected, lod ); - - - error = errMax( errMax( abs_diff_int(expected[ 0 ], resultPtr[ 0 ]), abs_diff_int(expected[ 1 ], resultPtr[ 1 ]) ), - errMax( abs_diff_int(expected[ 2 ], resultPtr[ 2 ]), abs_diff_int(expected[ 3 ], resultPtr[ 3 ]) ) ); - - if( error > MAX_ERR ) - { - log_error("FAILED norm_offsets: %g , %g:\n", norm_offset_x, norm_offset_y); - - shouldReturn |= determine_validation_error_1D_arr( imagePtr, imageInfo, imageSampler, resultPtr, - expected, error, xOffsetValues[j], yOffsetValues[j], norm_offset_x, norm_offset_y, j, numTries, numClamped, false, lod ); - } else { - log_error("Test error: we should have detected this passing above.\n"); - } - }//norm_offset_x - }//norm_offset_y - if( shouldReturn ) - return 1; - } // if (!found_pixel) - - resultPtr += 4; - } - } - } - } - { - nextLevelOffset += width_lod * imageInfo->arraySize * get_pixel_size(imageInfo->format); - width_lod = (width_lod >> 1) ? (width_lod >> 1) : 1; - } - } - - return numTries != MAX_TRIES || numClamped != MAX_CLAMPED; -} - int test_read_image_set_1D_array(cl_device_id device, cl_context context, cl_command_queue queue, const cl_image_format *format, @@ -1213,7 +155,9 @@ int test_read_image_set_1D_array(cl_device_id device, cl_context context, if( gDebugTrace ) log_info( " at size %d,%d\n", (int)imageInfo.width, (int)imageInfo.arraySize ); - int retCode = test_read_image_1D_array( context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed ); + int retCode = test_read_image(context, queue, kernel, + &imageInfo, imageSampler, + floatCoords, outputType, seed); if( retCode ) return retCode; } @@ -1237,7 +181,9 @@ int test_read_image_set_1D_array(cl_device_id device, cl_context context, imageInfo.num_mip_levels = (size_t)random_in_range(2, (compute_max_mip_levels(imageInfo.width, 0, 0)-1), seed); if( gDebugTrace ) log_info( " at max size %d,%d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 1 ] ); - int retCode = test_read_image_1D_array( context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed ); + int retCode = + test_read_image(context, queue, kernel, &imageInfo, + imageSampler, floatCoords, outputType, seed); if( retCode ) return retCode; } @@ -1266,7 +212,9 @@ int test_read_image_set_1D_array(cl_device_id device, cl_context context, " for range %" PRIu64 "\n", (int)imageInfo.width, (int)imageInfo.arraySize, gRoundingStartValue, typeRange); - int retCode = test_read_image_1D_array( context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed ); + int retCode = + test_read_image(context, queue, kernel, &imageInfo, + imageSampler, floatCoords, outputType, seed); if( retCode ) return retCode; @@ -1307,7 +255,9 @@ int test_read_image_set_1D_array(cl_device_id device, cl_context context, if( gDebugTrace ) log_info( " at size %d,%d (row pitch %d) out of %d,%d\n", (int)imageInfo.width, (int)imageInfo.arraySize, (int)imageInfo.rowPitch, (int)maxWidth, (int)maxArraySize ); - int retCode = test_read_image_1D_array( context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed ); + int retCode = + test_read_image(context, queue, kernel, &imageInfo, + imageSampler, floatCoords, outputType, seed); if( retCode ) return retCode; } diff --git a/test_conformance/images/kernel_read_write/test_read_2D_array.cpp b/test_conformance/images/kernel_read_write/test_read_2D_array.cpp index 72f1238d3d..5a47876032 100644 --- a/test_conformance/images/kernel_read_write/test_read_2D_array.cpp +++ b/test_conformance/images/kernel_read_write/test_read_2D_array.cpp @@ -85,1260 +85,6 @@ const char *float2DArrayUnnormalizedCoordKernelSource = static const char *samplerKernelArg = " sampler_t imageSampler,"; -template int determine_validation_error_offset_2D_array( void *imagePtr, image_descriptor *imageInfo, image_sampler_data *imageSampler, - T *resultPtr, T * expected, float error, - float x, float y, float z, float xAddressOffset, float yAddressOffset, float zAddressOffset, size_t j, int &numTries, int &numClamped, bool printAsFloat, int lod ) -{ - int actualX, actualY, actualZ; - int found = debug_find_pixel_in_image( imagePtr, imageInfo, resultPtr, &actualX, &actualY, &actualZ, lod ); - bool clampingErr = false, clamped = false, otherClampingBug = false; - int clampedX, clampedY, clampedZ; - - size_t imageWidth = imageInfo->width, imageHeight = imageInfo->height, imageDepth = imageInfo->arraySize; - - clamped = get_integer_coords_offset( x, y, z, xAddressOffset, yAddressOffset, zAddressOffset, imageWidth, imageHeight, imageDepth, imageSampler, imageInfo, clampedX, clampedY, clampedZ ); - - if( found ) - { - // Is it a clamping bug? - if( clamped && clampedX == actualX && clampedY == actualY && clampedZ == actualZ ) - { - if( (--numClamped) == 0 ) - { - log_error( "\nERROR: TEST FAILED! Read is erroneously clamping coordinates!\n" ); - if( printAsFloat ) - { - log_error( "Sample %ld: coord {%f(%a),%f(%a),%f(%a)} did not validate!\n" - " Expected (%g,%g,%g,%g)\n" - " Observed (%g,%g,%g,%g)\n" - " error of %g\n", - j, x, x, y, y, z, z, (float)expected[ 0 ], (float)expected[ 1 ], (float)expected[ 2 ], (float)expected[ 3 ], - (float)resultPtr[ 0 ], (float)resultPtr[ 1 ], (float)resultPtr[ 2 ], (float)resultPtr[ 3 ], error ); - } - else - { - log_error( "Sample %ld: coord {%f(%a),%f(%a),%f(%a)} did not validate!\n" - " Expected (%x,%x,%x,%x)\n" - " Observed (%x,%x,%x,%x)\n", - j, x, x, y, y, z, z, (int)expected[ 0 ], (int)expected[ 1 ], (int)expected[ 2 ], (int)expected[ 3 ], - (int)resultPtr[ 0 ], (int)resultPtr[ 1 ], (int)resultPtr[ 2 ], (int)resultPtr[ 3 ] ); - } - - if( imageSampler->filter_mode != CL_FILTER_LINEAR ) - { - if( found ) - log_error( "\tValue really found in image at %d,%d,%d (%s)\n", actualX, actualY, actualZ, ( found > 1 ) ? "NOT unique!!" : "unique" ); - else - log_error( "\tValue not actually found in image\n" ); - } - log_error( "\n" ); - - return -1; - } - clampingErr = true; - otherClampingBug = true; - } - } - if( clamped && !otherClampingBug ) - { - // If we are in clamp-to-edge mode and we're getting zeroes, it's possible we're getting border erroneously - if( resultPtr[ 0 ] == 0 && resultPtr[ 1 ] == 0 && resultPtr[ 2 ] == 0 && resultPtr[ 3 ] == 0 ) - { - if( (--numClamped) == 0 ) - { - log_error( "\nERROR: TEST FAILED: Clamping is erroneously returning border color!\n" ); - if( printAsFloat ) - { - log_error( "Sample %ld: coord {%f(%a),%f(%a),%f(%a)} did not validate!\n" - " Expected (%g,%g,%g,%g)\n" - " Observed (%g,%g,%g,%g)\n" - " error of %g\n", - j, x, x, y, y, z, z, (float)expected[ 0 ], (float)expected[ 1 ], (float)expected[ 2 ], (float)expected[ 3 ], - (float)resultPtr[ 0 ], (float)resultPtr[ 1 ], (float)resultPtr[ 2 ], (float)resultPtr[ 3 ], error ); - } - else - { - log_error( "Sample %ld: coord {%f(%a),%f(%a),%f(%a)} did not validate!\n" - " Expected (%x,%x,%x,%x)\n" - " Observed (%x,%x,%x,%x)\n", - j, x, x, y, y, z, z, (int)expected[ 0 ], (int)expected[ 1 ], (int)expected[ 2 ], (int)expected[ 3 ], - (int)resultPtr[ 0 ], (int)resultPtr[ 1 ], (int)resultPtr[ 2 ], (int)resultPtr[ 3 ] ); - } - return -1; - } - clampingErr = true; - } - } - if( !clampingErr ) - { - if( true ) // gExtraValidateInfo ) - { - if( printAsFloat ) - { - log_error( "Sample %ld: coord {%f(%a),%f(%a),%f(%a)} did not validate!\n" - " Expected (%g,%g,%g,%g)\n" - " Observed (%g,%g,%g,%g)\n" - " error of %g\n", - j, x, x, y, y, z, z, (float)expected[ 0 ], (float)expected[ 1 ], (float)expected[ 2 ], (float)expected[ 3 ], - (float)resultPtr[ 0 ], (float)resultPtr[ 1 ], (float)resultPtr[ 2 ], (float)resultPtr[ 3 ], error ); - } - else - { - log_error( "Sample %ld: coord {%f(%a),%f(%a),%f(%a)} did not validate!\n" - " Expected (%x,%x,%x,%x)\n" - " Observed (%x,%x,%x,%x)\n", - j, x, x, y, y, z, z, (int)expected[ 0 ], (int)expected[ 1 ], (int)expected[ 2 ], (int)expected[ 3 ], - (int)resultPtr[ 0 ], (int)resultPtr[ 1 ], (int)resultPtr[ 2 ], (int)resultPtr[ 3 ] ); - } - log_error( "Integer coords resolve to %d,%d,%d, image size = %d,%d,%d\n", clampedX, clampedY, clampedZ, (int)imageWidth, (int)imageHeight, (int)imageDepth ); - - if( printAsFloat && gExtraValidateInfo ) - { - log_error( "\nNearby values:\n" ); - for( int zOff = -1; zOff <= 1; zOff++ ) - { - for( int yOff = -1; yOff <= 1; yOff++ ) - { - float top[ 4 ], real[ 4 ], bot[ 4 ]; - read_image_pixel_float( imagePtr, imageInfo, clampedX - 1 , clampedY + yOff, clampedZ + zOff, top ); - read_image_pixel_float( imagePtr, imageInfo, clampedX ,clampedY + yOff, clampedZ + zOff, real ); - read_image_pixel_float( imagePtr, imageInfo, clampedX + 1, clampedY + yOff, clampedZ + zOff, bot ); - log_error( "\t(%g,%g,%g,%g)",top[0], top[1], top[2], top[3] ); - log_error( " (%g,%g,%g,%g)", real[0], real[1], real[2], real[3] ); - log_error( " (%g,%g,%g,%g)\n",bot[0], bot[1], bot[2], bot[3] ); - } - } - } - if( imageSampler->filter_mode != CL_FILTER_LINEAR ) - { - if( found ) - log_error( "Value really found in image at %d,%d,%d (%s)\n", actualX, actualY, actualZ, ( found > 1 ) ? "NOT unique!!" : "unique" ); - else - log_error( "Value not actually found in image\n" ); - } - log_error( "\n" ); - } - - numClamped = -1; // We force the clamped counter to never work - if( ( --numTries ) == 0 ) - return -1; - } - return 0; -} - -static void InitFloatCoords( image_descriptor *imageInfo, image_sampler_data *imageSampler, float *xOffsets, float *yOffsets, float *zOffsets, float xfract, float yfract, float zfract, int normalized_coords, MTdata d , int lod) -{ - size_t i = 0; - size_t width_lod = imageInfo->width; - size_t height_lod = imageInfo->height; - if(gTestMipmaps) - { - width_lod = ( imageInfo->width >> lod) ?( imageInfo->width >> lod) : 1; - height_lod = ( imageInfo->height >> lod) ?( imageInfo->height >> lod) : 1; - - } - if( gDisableOffsets ) - { - for( size_t z = 0; z < imageInfo->arraySize; z++ ) - { - for( size_t y = 0; y < height_lod; y++ ) - { - for( size_t x = 0; x < width_lod; x++, i++ ) - { - xOffsets[ i ] = (float) (xfract + (double) x); - yOffsets[ i ] = (float) (yfract + (double) y); - zOffsets[ i ] = (float) (zfract + (double) z); - } - } - } - } - else - { - for( size_t z = 0; z < imageInfo->arraySize; z++ ) - { - for( size_t y = 0; y < height_lod; y++ ) - { - for( size_t x = 0; x < width_lod; x++, i++ ) - { - xOffsets[ i ] = (float) (xfract + (double) ((int) x + random_in_range( -10, 10, d ))); - yOffsets[ i ] = (float) (yfract + (double) ((int) y + random_in_range( -10, 10, d ))); - zOffsets[ i ] = (float) (zfract + (double) ((int) z + random_in_range( -10, 10, d ))); - } - } - } - } - - if( imageSampler->addressing_mode == CL_ADDRESS_NONE ) - { - i = 0; - for( size_t z = 0; z < imageInfo->arraySize; z++ ) - { - for( size_t y = 0; y < height_lod; y++ ) - { - for( size_t x = 0; x < width_lod; x++, i++ ) - { - xOffsets[ i ] = (float) CLAMP( (double) xOffsets[ i ], 0.0, (double) width_lod - 1.0); - yOffsets[ i ] = (float) CLAMP( (double) yOffsets[ i ], 0.0, (double) height_lod - 1.0); - zOffsets[ i ] = (float) CLAMP( (double) zOffsets[ i ], 0.0, (double) imageInfo->arraySize - 1.0); - } - } - } - } - - if( normalized_coords ) - { - i = 0; - for( size_t z = 0; z < imageInfo->arraySize; z++ ) - { - for( size_t y = 0; y < height_lod; y++ ) - { - for( size_t x = 0; x < width_lod; x++, i++ ) - { - xOffsets[ i ] = (float) ((double) xOffsets[ i ] / (double) width_lod); - yOffsets[ i ] = (float) ((double) yOffsets[ i ] / (double) height_lod); - } - } - } - } -} - -int test_read_image_2D_array( cl_context context, cl_command_queue queue, cl_kernel kernel, - image_descriptor *imageInfo, image_sampler_data *imageSampler, - bool useFloatCoords, ExplicitType outputType, MTdata d ) -{ - int error; - size_t threads[3]; - static int initHalf = 0; - cl_mem_flags image_read_write_flags = CL_MEM_READ_ONLY; - - clMemWrapper xOffsets, yOffsets, zOffsets, results; - clSamplerWrapper actualSampler; - BufferOwningPtr maxImageUseHostPtrBackingStore; - - // Create offset data - BufferOwningPtr xOffsetValues(malloc(sizeof(cl_float) *imageInfo->width * imageInfo->height * imageInfo->arraySize)); - BufferOwningPtr yOffsetValues(malloc(sizeof(cl_float) *imageInfo->width * imageInfo->height * imageInfo->arraySize)); - BufferOwningPtr zOffsetValues(malloc(sizeof(cl_float) *imageInfo->width * imageInfo->height * imageInfo->arraySize)); - - BufferOwningPtr imageValues; - generate_random_image_data( imageInfo, imageValues, d ); - - // Construct testing sources - clProtectedImage protImage; - clMemWrapper unprotImage; - cl_mem image; - - if(gtestTypesToRun & kReadTests) - { - image_read_write_flags = CL_MEM_READ_ONLY; - } - else - { - image_read_write_flags = CL_MEM_READ_WRITE; - } - - if( gMemFlagsToUse == CL_MEM_USE_HOST_PTR ) - { - // clProtectedImage uses USE_HOST_PTR, so just rely on that for the testing (via Ian) - // Do not use protected images for max image size test since it rounds the row size to a page size - if (gTestMaxImages) { - generate_random_image_data( imageInfo, maxImageUseHostPtrBackingStore, d ); - unprotImage = create_image_2d_array( context, - image_read_write_flags | CL_MEM_USE_HOST_PTR, - imageInfo->format, - imageInfo->width, imageInfo->height, - imageInfo->arraySize, - ( gEnablePitch ? imageInfo->rowPitch : 0 ), - ( gEnablePitch ? imageInfo->slicePitch : 0 ), - maxImageUseHostPtrBackingStore, &error ); - } else { - error = protImage.Create( context, CL_MEM_OBJECT_IMAGE2D_ARRAY, - image_read_write_flags, - imageInfo->format, imageInfo->width, imageInfo->height, 1, imageInfo->arraySize ); - } - if( error != CL_SUCCESS ) - { - log_error( "ERROR: Unable to create 2D image array of size %d x %d x %d (pitch %d, %d ) (%s)", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->arraySize, (int)imageInfo->rowPitch, (int)imageInfo->slicePitch, IGetErrorString( error ) ); - return error; - } - if (gTestMaxImages) - image = (cl_mem)unprotImage; - else - image = (cl_mem)protImage; - } - else if( gMemFlagsToUse == CL_MEM_COPY_HOST_PTR ) - { - // Don't use clEnqueueWriteImage; just use copy host ptr to get the data in - unprotImage = create_image_2d_array( context, - image_read_write_flags | CL_MEM_COPY_HOST_PTR, - imageInfo->format, - imageInfo->width, - imageInfo->height, - imageInfo->arraySize, - ( gEnablePitch ? imageInfo->rowPitch : 0 ), - ( gEnablePitch ? imageInfo->slicePitch : 0 ), - imageValues, &error ); - if( error != CL_SUCCESS ) - { - log_error( "ERROR: Unable to create 2D image array of size %d x %d x %d (pitch %d, %d ) (%s)", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->arraySize, (int)imageInfo->rowPitch, (int)imageInfo->slicePitch, IGetErrorString( error ) ); - return error; - } - image = unprotImage; - } - else // Either CL_MEM_ALLOC_HOST_PTR or none - { - if ( gTestMipmaps ) - { - cl_image_desc image_desc = {0}; - image_desc.image_type = CL_MEM_OBJECT_IMAGE2D_ARRAY; - image_desc.image_width = imageInfo->width; - image_desc.image_height = imageInfo->height; - image_desc.image_array_size = imageInfo->arraySize; - //image_desc.image_rowPitch = imageInfo->rowPitch; - //image_desc.image_slicePitch = imageInfo->slicePitch; - image_desc.num_mip_levels = imageInfo->num_mip_levels; - - unprotImage = clCreateImage( context, - image_read_write_flags, - imageInfo->format, &image_desc, NULL, &error); - if( error != CL_SUCCESS ) - { - log_error( "ERROR: Unable to create %d level mipmapped 2D image array of size %d x %d x %d (pitch %d, %d ) (%s)", (int)imageInfo->num_mip_levels, (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->arraySize, (int)imageInfo->rowPitch, (int)imageInfo->slicePitch, IGetErrorString( error ) ); - return error; - } - } - else - { - // Note: if ALLOC_HOST_PTR is used, the driver allocates memory that can be accessed by the host, but otherwise - // it works just as if no flag is specified, so we just do the same thing either way - unprotImage = create_image_2d_array( context, - image_read_write_flags | gMemFlagsToUse, - imageInfo->format, - imageInfo->width, imageInfo->height, imageInfo->arraySize, - ( gEnablePitch ? imageInfo->rowPitch : 0 ), - ( gEnablePitch ? imageInfo->slicePitch : 0 ), - imageValues, &error ); - if( error != CL_SUCCESS ) - { - log_error( "ERROR: Unable to create 2D image array of size %d x %d x %d (pitch %d, %d ) (%s)", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->arraySize, (int)imageInfo->rowPitch, (int)imageInfo->slicePitch, IGetErrorString( error ) ); - return error; - } - } - image = unprotImage; - } - - if( gMemFlagsToUse != CL_MEM_COPY_HOST_PTR ) - { - if( gDebugTrace ) - log_info( " - Writing image...\n" ); - - size_t origin[ 4 ] = { 0, 0, 0, 0 }; - size_t region[ 3 ] = { imageInfo->width, imageInfo->height, imageInfo->arraySize }; - size_t tmpNextLevelOffset = 0; - - if( gTestMipmaps ) - { - for(int level = 0; level < imageInfo->num_mip_levels; level++) - { - origin[3] = level; - error = clEnqueueWriteImage(queue, image, CL_TRUE, - origin, region, /*gEnablePitch ? imageInfo->rowPitch :*/ 0, /*gEnablePitch ? imageInfo->slicePitch :*/ 0, - imageValues + tmpNextLevelOffset, 0, NULL, NULL); - if (error != CL_SUCCESS) - { - log_error( "ERROR: Unable to write to level %d of 2D image array of size %d x %d x %d\n", (int)imageInfo->num_mip_levels, (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->arraySize ); - return error; - } - - tmpNextLevelOffset += region[0]*region[1]*region[2]*get_pixel_size(imageInfo->format); - region[0] = ( region[0] >> 1 ) ? ( region[0] >> 1 ) : 1; - region[1] = ( region[1] >> 1 ) ? ( region[1] >> 1 ) : 1; - } - } - else - { - error = clEnqueueWriteImage(queue, image, CL_TRUE, - origin, region, gEnablePitch ? imageInfo->rowPitch : 0, gEnablePitch ? imageInfo->slicePitch : 0, - imageValues, 0, NULL, NULL); - if (error != CL_SUCCESS) - { - log_error( "ERROR: Unable to write to 2D image array of size %d x %d x %d\n", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->arraySize ); - return error; - } - } - } - - xOffsets = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, - sizeof(cl_float) * imageInfo->width - * imageInfo->height * imageInfo->arraySize, - xOffsetValues, &error); - test_error( error, "Unable to create x offset buffer" ); - yOffsets = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, - sizeof(cl_float) * imageInfo->width - * imageInfo->height * imageInfo->arraySize, - yOffsetValues, &error); - test_error( error, "Unable to create y offset buffer" ); - zOffsets = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, - sizeof(cl_float) * imageInfo->width - * imageInfo->height * imageInfo->arraySize, - zOffsetValues, &error); - test_error( error, "Unable to create y offset buffer" ); - results = - clCreateBuffer(context, CL_MEM_READ_WRITE, - get_explicit_type_size(outputType) * 4 * imageInfo->width - * imageInfo->height * imageInfo->arraySize, - NULL, &error); - test_error( error, "Unable to create result buffer" ); - - // Create sampler to use - actualSampler = create_sampler(context, imageSampler, gTestMipmaps, &error); - test_error(error, "Unable to create image sampler"); - - // Set arguments - int idx = 0; - error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &image ); - test_error( error, "Unable to set kernel arguments" ); - if( !gUseKernelSamplers ) - { - error = clSetKernelArg( kernel, idx++, sizeof( cl_sampler ), &actualSampler ); - test_error( error, "Unable to set kernel arguments" ); - } - error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &xOffsets ); - test_error( error, "Unable to set kernel arguments" ); - error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &yOffsets ); - test_error( error, "Unable to set kernel arguments" ); - error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &zOffsets ); - test_error( error, "Unable to set kernel arguments" ); - error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &results ); - test_error( error, "Unable to set kernel arguments" ); - - const float float_offsets[] = { 0.0f, MAKE_HEX_FLOAT(0x1.0p-30f, 0x1L, -30), 0.25f, 0.3f, 0.5f - FLT_EPSILON/4.0f, 0.5f, 0.9f, 1.0f - FLT_EPSILON/2 }; - int float_offset_count = sizeof( float_offsets) / sizeof( float_offsets[0] ); - int numTries = MAX_TRIES, numClamped = MAX_CLAMPED; - int loopCount = 2 * float_offset_count; - if( ! useFloatCoords ) - loopCount = 1; - if (gTestMaxImages) { - loopCount = 1; - log_info("Testing each size only once with pixel offsets of %g for max sized images.\n", float_offsets[0]); - } - - // Get the maximum absolute error for this format - double formatAbsoluteError = get_max_absolute_error(imageInfo->format, imageSampler); - if (gDebugTrace) log_info("\tformatAbsoluteError is %e\n", formatAbsoluteError); - - if (0 == initHalf && imageInfo->format->image_channel_data_type == CL_HALF_FLOAT ) { - initHalf = CL_SUCCESS == DetectFloatToHalfRoundingMode( queue ); - if (initHalf) { - log_info("Half rounding mode successfully detected.\n"); - } - } - size_t nextLevelOffset = 0; - size_t width_lod = imageInfo->width, height_lod = imageInfo->height; - for( size_t lod = 0; (gTestMipmaps && (lod < imageInfo->num_mip_levels))|| (!gTestMipmaps && lod < 1); lod ++) - { - size_t resultValuesSize = width_lod * height_lod * imageInfo->arraySize * get_explicit_type_size( outputType ) * 4; - BufferOwningPtr resultValues(malloc( resultValuesSize )); - float lod_float = (float)lod; - if( gTestMipmaps ) - { - if (gDebugTrace) log_info(" - Working at mip level %zu\n", lod); - error = clSetKernelArg( kernel, idx, sizeof(float), &lod_float); - } - for( int q = 0; q < loopCount; q++ ) - { - float offset = float_offsets[ q % float_offset_count ]; - - // Init the coordinates - InitFloatCoords( imageInfo, imageSampler, xOffsetValues, yOffsetValues, zOffsetValues, - q>=float_offset_count ? -offset: offset, - q>=float_offset_count ? offset: -offset, - q>=float_offset_count ? -offset: offset, - imageSampler->normalized_coords, d, lod ); - - error = clEnqueueWriteBuffer( queue, xOffsets, CL_TRUE, 0, sizeof(cl_float) * imageInfo->height * imageInfo->width * imageInfo->arraySize, xOffsetValues, 0, NULL, NULL ); - test_error( error, "Unable to write x offsets" ); - error = clEnqueueWriteBuffer( queue, yOffsets, CL_TRUE, 0, sizeof(cl_float) * imageInfo->height * imageInfo->width * imageInfo->arraySize, yOffsetValues, 0, NULL, NULL ); - test_error( error, "Unable to write y offsets" ); - error = clEnqueueWriteBuffer( queue, zOffsets, CL_TRUE, 0, sizeof(cl_float) * imageInfo->height * imageInfo->width * imageInfo->arraySize, zOffsetValues, 0, NULL, NULL ); - test_error( error, "Unable to write z offsets" ); - - - memset( resultValues, 0xff, resultValuesSize ); - clEnqueueWriteBuffer( queue, results, CL_TRUE, 0, resultValuesSize, resultValues, 0, NULL, NULL ); - - // Figure out thread dimensions - threads[0] = (size_t)width_lod; - threads[1] = (size_t)height_lod; - threads[2] = (size_t)imageInfo->arraySize; - - // Run the kernel - error = clEnqueueNDRangeKernel( queue, kernel, 3, NULL, threads, NULL, 0, NULL, NULL ); - test_error( error, "Unable to run kernel" ); - - // Get results - error = clEnqueueReadBuffer( queue, results, CL_TRUE, 0, width_lod * height_lod * imageInfo->arraySize * get_explicit_type_size( outputType ) * 4, resultValues, 0, NULL, NULL ); - test_error( error, "Unable to read results from kernel" ); - if( gDebugTrace ) - log_info( " results read\n" ); - - // Validate results element by element - char *imagePtr = (char *)imageValues + nextLevelOffset; - - if((imageInfo->format->image_channel_order == CL_DEPTH) && (outputType == kFloat) ) - { - // Validate float results - float *resultPtr = (float *)(char *)resultValues; - float expected[4], error=0.0f; - float maxErr = get_max_relative_error( imageInfo->format, imageSampler, 1 /*3D*/, CL_FILTER_LINEAR == imageSampler->filter_mode ); - - for( size_t z = 0, j = 0; z < imageInfo->arraySize; z++ ) - { - for( size_t y = 0; y < height_lod; y++ ) - { - for( size_t x = 0; x < width_lod; x++, j++ ) - { - // Step 1: go through and see if the results verify for the pixel - // For the normalized case on a GPU we put in offsets to the X, Y and Z to see if we land on the - // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0. - int checkOnlyOnePixel = 0; - int found_pixel = 0; - float offset = NORM_OFFSET; - if (!imageSampler->normalized_coords - || imageSampler->filter_mode != CL_FILTER_NEAREST - || NORM_OFFSET == 0 -#if defined( __APPLE__ ) - // Apple requires its CPU implementation to do - // correctly rounded address arithmetic in all modes - || !(gDeviceType & CL_DEVICE_TYPE_GPU) -#endif - ) - offset = 0.0f; // Loop only once - - for (float norm_offset_x = -offset; norm_offset_x <= offset && !found_pixel ; norm_offset_x += NORM_OFFSET) { - for (float norm_offset_y = -offset; norm_offset_y <= offset && !found_pixel ; norm_offset_y += NORM_OFFSET) { - for (float norm_offset_z = -offset; norm_offset_z <= NORM_OFFSET && !found_pixel; norm_offset_z += NORM_OFFSET) { - - int hasDenormals = 0; - FloatPixel maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo, - xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ], - norm_offset_x, norm_offset_y, norm_offset_z, - imageSampler, expected, 0, &hasDenormals, lod ); - - float err1 = - ABS_ERROR(resultPtr[0], expected[0]); - // Clamp to the minimum absolute error for the format - if (err1 > 0 && err1 < formatAbsoluteError) { err1 = 0.0f; } - float maxErr1 = std::max( - maxErr * maxPixel.p[0], FLT_MIN); - - if( ! (err1 <= maxErr1) ) - { - // Try flushing the denormals - if( hasDenormals ) - { - // If implementation decide to flush subnormals to zero, - // max error needs to be adjusted - maxErr1 += 4 * FLT_MIN; - - maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo, - xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ], - norm_offset_x, norm_offset_y, norm_offset_z, - imageSampler, expected, 0, NULL, lod ); - - err1 = ABS_ERROR(resultPtr[0], - expected[0]); - } - } - - found_pixel = (err1 <= maxErr1); - }//norm_offset_z - }//norm_offset_y - }//norm_offset_x - - // Step 2: If we did not find a match, then print out debugging info. - if (!found_pixel) { - // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the - // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0. - checkOnlyOnePixel = 0; - int shouldReturn = 0; - for (float norm_offset_x = -offset; norm_offset_x <= offset && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) { - for (float norm_offset_y = -offset; norm_offset_y <= offset && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) { - for (float norm_offset_z = -offset; norm_offset_z <= offset && !checkOnlyOnePixel; norm_offset_z += NORM_OFFSET) { - - int hasDenormals = 0; - FloatPixel maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo, - xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ], - norm_offset_x, norm_offset_y, norm_offset_z, - imageSampler, expected, 0, &hasDenormals, lod ); - - float err1 = ABS_ERROR(resultPtr[0], - expected[0]); - float maxErr1 = std::max( - maxErr * maxPixel.p[0], FLT_MIN); - - - if( ! (err1 <= maxErr1) ) - { - // Try flushing the denormals - if( hasDenormals ) - { - maxErr1 += 4 * FLT_MIN; - - maxPixel = sample_image_pixel_float( imagePtr, imageInfo, - xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ], - imageSampler, expected, 0, NULL, lod ); - - err1 = ABS_ERROR(resultPtr[0], - expected[0]); - } - } - - if( ! (err1 <= maxErr1) ) - { - log_error("FAILED norm_offsets: %g , %g , %g:\n", norm_offset_x, norm_offset_y, norm_offset_z); - - float tempOut[4]; - shouldReturn |= determine_validation_error_offset_2D_array( imagePtr, imageInfo, imageSampler, resultPtr, - expected, error, xOffsetValues[j], yOffsetValues[j], zOffsetValues[j], - norm_offset_x, norm_offset_y, norm_offset_z, j, - numTries, numClamped, true, lod ); - log_error( "Step by step:\n" ); - FloatPixel temp = sample_image_pixel_float_offset( imagePtr, imageInfo, - xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ], - norm_offset_x, norm_offset_y, norm_offset_z, - imageSampler, tempOut, 1 /*verbose*/, &hasDenormals, lod); - log_error( "\tulps: %2.2f (max allowed: %2.2f)\n\n", - Ulp_Error( resultPtr[0], expected[0] ), - Ulp_Error( MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) + maxErr, MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) ) ); - } else { - log_error("Test error: we should have detected this passing above.\n"); - } - }//norm_offset_z - }//norm_offset_y - }//norm_offset_x - if( shouldReturn ) - return 1; - } // if (!found_pixel) - - resultPtr += 1; - } - } - } - } - /* - * FLOAT output type, order=CL_sRGBA, CL_sRGB, CL_sRGBx, CL_BGRA - */ - else if(is_sRGBA_order(imageInfo->format->image_channel_order) && (outputType == kFloat) ) - { - // Validate float results - float *resultPtr = (float *)(char *)resultValues; - float expected[4], error=0.0f; - float maxErr = get_max_relative_error( imageInfo->format, imageSampler, 1 /*3D*/, CL_FILTER_LINEAR == imageSampler->filter_mode ); - - for( size_t z = 0, j = 0; z < imageInfo->arraySize; z++ ) - { - for( size_t y = 0; y < height_lod; y++ ) - { - for( size_t x = 0; x < width_lod; x++, j++ ) - { - // Step 1: go through and see if the results verify for the pixel - // For the normalized case on a GPU we put in offsets to the X, Y and Z to see if we land on the - // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0. - int checkOnlyOnePixel = 0; - int found_pixel = 0; - float offset = NORM_OFFSET; - if (!imageSampler->normalized_coords - || imageSampler->filter_mode != CL_FILTER_NEAREST - || NORM_OFFSET == 0 -#if defined( __APPLE__ ) - // Apple requires its CPU implementation to do - // correctly rounded address arithmetic in all modes - || !(gDeviceType & CL_DEVICE_TYPE_GPU) -#endif - ) - offset = 0.0f; // Loop only once - - for (float norm_offset_x = -offset; norm_offset_x <= offset && !found_pixel ; norm_offset_x += NORM_OFFSET) { - for (float norm_offset_y = -offset; norm_offset_y <= offset && !found_pixel ; norm_offset_y += NORM_OFFSET) { - for (float norm_offset_z = -offset; norm_offset_z <= NORM_OFFSET && !found_pixel; norm_offset_z += NORM_OFFSET) { - - int hasDenormals = 0; - FloatPixel maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo, - xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ], - norm_offset_x, norm_offset_y, norm_offset_z, - imageSampler, expected, 0, &hasDenormals, lod ); - - float err1 = - ABS_ERROR(sRGBmap(resultPtr[0]), - sRGBmap(expected[0])); - float err2 = - ABS_ERROR(sRGBmap(resultPtr[1]), - sRGBmap(expected[1])); - float err3 = - ABS_ERROR(sRGBmap(resultPtr[2]), - sRGBmap(expected[2])); - float err4 = - ABS_ERROR(resultPtr[3], expected[3]); - float maxErr = 0.5; - - if( ! (err1 <= maxErr) || ! (err2 <= maxErr) || ! (err3 <= maxErr) || ! (err4 <= maxErr) ) - { - // Try flushing the denormals - if( hasDenormals ) - { - // If implementation decide to flush subnormals to zero, - // max error needs to be adjusted - maxErr += 4 * FLT_MIN; - - maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo, - xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ], - norm_offset_x, norm_offset_y, norm_offset_z, - imageSampler, expected, 0, NULL, lod ); - - err1 = - ABS_ERROR(sRGBmap(resultPtr[0]), - sRGBmap(expected[0])); - err2 = - ABS_ERROR(sRGBmap(resultPtr[1]), - sRGBmap(expected[1])); - err3 = - ABS_ERROR(sRGBmap(resultPtr[2]), - sRGBmap(expected[2])); - err4 = ABS_ERROR(resultPtr[3], - expected[3]); - } - } - - found_pixel = (err1 <= maxErr) && (err2 <= maxErr) && (err3 <= maxErr) && (err4 <= maxErr); - }//norm_offset_z - }//norm_offset_y - }//norm_offset_x - - // Step 2: If we did not find a match, then print out debugging info. - if (!found_pixel) { - // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the - // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0. - checkOnlyOnePixel = 0; - int shouldReturn = 0; - for (float norm_offset_x = -offset; norm_offset_x <= offset && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) { - for (float norm_offset_y = -offset; norm_offset_y <= offset && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) { - for (float norm_offset_z = -offset; norm_offset_z <= offset && !checkOnlyOnePixel; norm_offset_z += NORM_OFFSET) { - - int hasDenormals = 0; - FloatPixel maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo, - xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ], - norm_offset_x, norm_offset_y, norm_offset_z, - imageSampler, expected, 0, &hasDenormals, lod ); - - float err1 = - ABS_ERROR(sRGBmap(resultPtr[0]), - sRGBmap(expected[0])); - float err2 = - ABS_ERROR(sRGBmap(resultPtr[1]), - sRGBmap(expected[1])); - float err3 = - ABS_ERROR(sRGBmap(resultPtr[2]), - sRGBmap(expected[2])); - float err4 = ABS_ERROR(resultPtr[3], - expected[3]); - float maxErr = 0.6; - - if( ! (err1 <= maxErr) || ! (err2 <= maxErr) || ! (err3 <= maxErr) || ! (err4 <= maxErr) ) - { - // Try flushing the denormals - if( hasDenormals ) - { - // If implementation decide to flush subnormals to zero, - // max error needs to be adjusted - maxErr += 4 * FLT_MIN; - - maxPixel = sample_image_pixel_float( imagePtr, imageInfo, - xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ], - imageSampler, expected, 0, NULL, lod ); - - err1 = ABS_ERROR( - sRGBmap(resultPtr[0]), - sRGBmap(expected[0])); - err2 = ABS_ERROR( - sRGBmap(resultPtr[1]), - sRGBmap(expected[1])); - err3 = ABS_ERROR( - sRGBmap(resultPtr[2]), - sRGBmap(expected[2])); - err4 = ABS_ERROR(resultPtr[3], - expected[3]); - } - } - - if( ! (err1 <= maxErr) || ! (err2 <= maxErr) || ! (err3 <= maxErr) || ! (err4 <= maxErr) ) - { - log_error("FAILED norm_offsets: %g , %g , %g:\n", norm_offset_x, norm_offset_y, norm_offset_z); - - float tempOut[4]; - shouldReturn |= determine_validation_error_offset_2D_array( imagePtr, imageInfo, imageSampler, resultPtr, - expected, error, xOffsetValues[j], yOffsetValues[j], zOffsetValues[j], - norm_offset_x, norm_offset_y, norm_offset_z, j, - numTries, numClamped, true, lod ); - log_error( "Step by step:\n" ); - FloatPixel temp = sample_image_pixel_float_offset( imagePtr, imageInfo, - xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ], - norm_offset_x, norm_offset_y, norm_offset_z, - imageSampler, tempOut, 1 /*verbose*/, &hasDenormals, lod); - log_error( "\tulps: %2.2f, %2.2f, %2.2f, %2.2f (max allowed: %2.2f)\n\n", - Ulp_Error( resultPtr[0], expected[0] ), - Ulp_Error( resultPtr[1], expected[1] ), - Ulp_Error( resultPtr[2], expected[2] ), - Ulp_Error( resultPtr[3], expected[3] ), - Ulp_Error( MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) + maxErr, MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) ) ); - } else { - log_error("Test error: we should have detected this passing above.\n"); - } - }//norm_offset_z - }//norm_offset_y - }//norm_offset_x - if( shouldReturn ) - return 1; - } // if (!found_pixel) - - resultPtr += 4; - } - } - } - } - /* - * FLOAT output type - */ - else if( outputType == kFloat ) - { - // Validate float results - float *resultPtr = (float *)(char *)resultValues; - float expected[4], error=0.0f; - float maxErr = get_max_relative_error( imageInfo->format, imageSampler, 1 /*3D*/, CL_FILTER_LINEAR == imageSampler->filter_mode ); - - for( size_t z = 0, j = 0; z < imageInfo->arraySize; z++ ) - { - for( size_t y = 0; y < height_lod; y++ ) - { - for( size_t x = 0; x < width_lod; x++, j++ ) - { - // Step 1: go through and see if the results verify for the pixel - // For the normalized case on a GPU we put in offsets to the X, Y and Z to see if we land on the - // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0. - int checkOnlyOnePixel = 0; - int found_pixel = 0; - float offset = NORM_OFFSET; - if (!imageSampler->normalized_coords - || imageSampler->filter_mode != CL_FILTER_NEAREST - || NORM_OFFSET == 0 -#if defined( __APPLE__ ) - // Apple requires its CPU implementation to do - // correctly rounded address arithmetic in all modes - || !(gDeviceType & CL_DEVICE_TYPE_GPU) -#endif - ) - offset = 0.0f; // Loop only once - - for (float norm_offset_x = -offset; norm_offset_x <= offset && !found_pixel ; norm_offset_x += NORM_OFFSET) { - for (float norm_offset_y = -offset; norm_offset_y <= offset && !found_pixel ; norm_offset_y += NORM_OFFSET) { - for (float norm_offset_z = -offset; norm_offset_z <= NORM_OFFSET && !found_pixel; norm_offset_z += NORM_OFFSET) { - - int hasDenormals = 0; - FloatPixel maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo, - xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ], - norm_offset_x, norm_offset_y, norm_offset_z, - imageSampler, expected, 0, &hasDenormals, lod ); - - float err1 = - ABS_ERROR(resultPtr[0], expected[0]); - float err2 = - ABS_ERROR(resultPtr[1], expected[1]); - float err3 = - ABS_ERROR(resultPtr[2], expected[2]); - float err4 = - ABS_ERROR(resultPtr[3], expected[3]); - // Clamp to the minimum absolute error for the format - if (err1 > 0 && err1 < formatAbsoluteError) { err1 = 0.0f; } - if (err2 > 0 && err2 < formatAbsoluteError) { err2 = 0.0f; } - if (err3 > 0 && err3 < formatAbsoluteError) { err3 = 0.0f; } - if (err4 > 0 && err4 < formatAbsoluteError) { err4 = 0.0f; } - float maxErr1 = std::max( - maxErr * maxPixel.p[0], FLT_MIN); - float maxErr2 = std::max( - maxErr * maxPixel.p[1], FLT_MIN); - float maxErr3 = std::max( - maxErr * maxPixel.p[2], FLT_MIN); - float maxErr4 = std::max( - maxErr * maxPixel.p[3], FLT_MIN); - - if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2) || ! (err3 <= maxErr3) || ! (err4 <= maxErr4) ) - { - // Try flushing the denormals - if( hasDenormals ) - { - // If implementation decide to flush subnormals to zero, - // max error needs to be adjusted - maxErr1 += 4 * FLT_MIN; - maxErr2 += 4 * FLT_MIN; - maxErr3 += 4 * FLT_MIN; - maxErr4 += 4 * FLT_MIN; - - maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo, - xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ], - norm_offset_x, norm_offset_y, norm_offset_z, - imageSampler, expected, 0, NULL, lod ); - - err1 = ABS_ERROR(resultPtr[0], - expected[0]); - err2 = ABS_ERROR(resultPtr[1], - expected[1]); - err3 = ABS_ERROR(resultPtr[2], - expected[2]); - err4 = ABS_ERROR(resultPtr[3], - expected[3]); - } - } - - found_pixel = (err1 <= maxErr1) && (err2 <= maxErr2) && (err3 <= maxErr3) && (err4 <= maxErr4); - }//norm_offset_z - }//norm_offset_y - }//norm_offset_x - - // Step 2: If we did not find a match, then print out debugging info. - if (!found_pixel) { - // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the - // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0. - checkOnlyOnePixel = 0; - int shouldReturn = 0; - for (float norm_offset_x = -offset; norm_offset_x <= offset && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) { - for (float norm_offset_y = -offset; norm_offset_y <= offset && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) { - for (float norm_offset_z = -offset; norm_offset_z <= offset && !checkOnlyOnePixel; norm_offset_z += NORM_OFFSET) { - - int hasDenormals = 0; - FloatPixel maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo, - xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ], - norm_offset_x, norm_offset_y, norm_offset_z, - imageSampler, expected, 0, &hasDenormals, lod ); - - float err1 = ABS_ERROR(resultPtr[0], - expected[0]); - float err2 = ABS_ERROR(resultPtr[1], - expected[1]); - float err3 = ABS_ERROR(resultPtr[2], - expected[2]); - float err4 = ABS_ERROR(resultPtr[3], - expected[3]); - float maxErr1 = std::max( - maxErr * maxPixel.p[0], FLT_MIN); - float maxErr2 = std::max( - maxErr * maxPixel.p[1], FLT_MIN); - float maxErr3 = std::max( - maxErr * maxPixel.p[2], FLT_MIN); - float maxErr4 = std::max( - maxErr * maxPixel.p[3], FLT_MIN); - - - if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2) || ! (err3 <= maxErr3) || ! (err4 <= maxErr4) ) - { - // Try flushing the denormals - if( hasDenormals ) - { - maxErr1 += 4 * FLT_MIN; - maxErr2 += 4 * FLT_MIN; - maxErr3 += 4 * FLT_MIN; - maxErr4 += 4 * FLT_MIN; - - maxPixel = sample_image_pixel_float( imagePtr, imageInfo, - xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ], - imageSampler, expected, 0, NULL, lod ); - - err1 = ABS_ERROR(resultPtr[0], - expected[0]); - err2 = ABS_ERROR(resultPtr[1], - expected[1]); - err3 = ABS_ERROR(resultPtr[2], - expected[2]); - err4 = ABS_ERROR(resultPtr[3], - expected[3]); - } - } - - if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2) || ! (err3 <= maxErr3) || ! (err4 <= maxErr4) ) - { - log_error("FAILED norm_offsets: %g , %g , %g:\n", norm_offset_x, norm_offset_y, norm_offset_z); - - float tempOut[4]; - shouldReturn |= determine_validation_error_offset_2D_array( imagePtr, imageInfo, imageSampler, resultPtr, - expected, error, xOffsetValues[j], yOffsetValues[j], zOffsetValues[j], - norm_offset_x, norm_offset_y, norm_offset_z, j, - numTries, numClamped, true, lod ); - log_error( "Step by step:\n" ); - FloatPixel temp = sample_image_pixel_float_offset( imagePtr, imageInfo, - xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ], - norm_offset_x, norm_offset_y, norm_offset_z, - imageSampler, tempOut, 1 /*verbose*/, &hasDenormals, lod); - log_error( "\tulps: %2.2f, %2.2f, %2.2f, %2.2f (max allowed: %2.2f)\n\n", - Ulp_Error( resultPtr[0], expected[0] ), - Ulp_Error( resultPtr[1], expected[1] ), - Ulp_Error( resultPtr[2], expected[2] ), - Ulp_Error( resultPtr[3], expected[3] ), - Ulp_Error( MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) + maxErr, MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) ) ); - } else { - log_error("Test error: we should have detected this passing above.\n"); - } - }//norm_offset_z - }//norm_offset_y - }//norm_offset_x - if( shouldReturn ) - return 1; - } // if (!found_pixel) - - resultPtr += 4; - } - } - } - } - /* - * UINT output type - */ - else if( outputType == kUInt ) - { - // Validate unsigned integer results - unsigned int *resultPtr = (unsigned int *)(char *)resultValues; - unsigned int expected[4]; - float error; - for( size_t z = 0, j = 0; z < imageInfo->arraySize; z++ ) - { - for( size_t y = 0; y < height_lod; y++ ) - { - for( size_t x = 0; x < width_lod; x++, j++ ) - { - // Step 1: go through and see if the results verify for the pixel - // For the normalized case on a GPU we put in offsets to the X, Y and Z to see if we land on the - // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0. - int checkOnlyOnePixel = 0; - int found_pixel = 0; - for (float norm_offset_x = -NORM_OFFSET; norm_offset_x <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) { - for (float norm_offset_y = -NORM_OFFSET; norm_offset_y <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) { - for (float norm_offset_z = -NORM_OFFSET; norm_offset_z <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_z += NORM_OFFSET) { - - // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0) - // E.g., test one pixel. - if (!imageSampler->normalized_coords - || !(gDeviceType & CL_DEVICE_TYPE_GPU) - || NORM_OFFSET == 0) - { - norm_offset_x = 0.0f; - norm_offset_y = 0.0f; - norm_offset_z = 0.0f; - checkOnlyOnePixel = 1; - } - - if(gTestMipmaps) - sample_image_pixel_offset( imagePtr, imageInfo, - xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ], - norm_offset_x, norm_offset_y, norm_offset_z, - imageSampler, expected, lod ); - else - sample_image_pixel_offset( imageValues, imageInfo, - xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ], - norm_offset_x, norm_offset_y, norm_offset_z, - imageSampler, expected ); - - error = errMax( errMax( abs_diff_uint(expected[ 0 ], resultPtr[ 0 ]), abs_diff_uint(expected[ 1 ], resultPtr[ 1 ]) ), - errMax( abs_diff_uint(expected[ 2 ], resultPtr[ 2 ]), abs_diff_uint(expected[ 3 ], resultPtr[ 3 ]) ) ); - - if (error < MAX_ERR) - found_pixel = 1; - }//norm_offset_z - }//norm_offset_y - }//norm_offset_x - - // Step 2: If we did not find a match, then print out debugging info. - if (!found_pixel) { - // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the - // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0. - checkOnlyOnePixel = 0; - int shouldReturn = 0; - for (float norm_offset_x = -NORM_OFFSET; norm_offset_x <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) { - for (float norm_offset_y = -NORM_OFFSET; norm_offset_y <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) { - for (float norm_offset_z = -NORM_OFFSET; norm_offset_z <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_z += NORM_OFFSET) { - - // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0) - // E.g., test one pixel. - if (!imageSampler->normalized_coords - || !(gDeviceType - & CL_DEVICE_TYPE_GPU) - || NORM_OFFSET == 0) - { - norm_offset_x = 0.0f; - norm_offset_y = 0.0f; - norm_offset_z = 0.0f; - checkOnlyOnePixel = 1; - } - - if(gTestMipmaps) - sample_image_pixel_offset( imagePtr, imageInfo, - xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ], - norm_offset_x, norm_offset_y, norm_offset_z, - imageSampler, expected, lod ); - else - sample_image_pixel_offset( imageValues, imageInfo, - xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ], - norm_offset_x, norm_offset_y, norm_offset_z, - imageSampler, expected ); - - error = errMax( errMax( abs_diff_uint(expected[ 0 ], resultPtr[ 0 ]), abs_diff_uint(expected[ 1 ], resultPtr[ 1 ]) ), - errMax( abs_diff_uint(expected[ 2 ], resultPtr[ 2 ]), abs_diff_uint(expected[ 3 ], resultPtr[ 3 ]) ) ); - - if( error > MAX_ERR ) - { - log_error("FAILED norm_offsets: %g , %g , %g:\n", norm_offset_x, norm_offset_y, norm_offset_z); - shouldReturn |= determine_validation_error_offset_2D_array( imagePtr, imageInfo, imageSampler, resultPtr, - expected, error, xOffsetValues[j], yOffsetValues[j], zOffsetValues[j], - norm_offset_x, norm_offset_y, norm_offset_z, - j, numTries, numClamped, false, lod ); - } else { - log_error("Test error: we should have detected this passing above.\n"); - } - }//norm_offset_z - }//norm_offset_y - }//norm_offset_x - if( shouldReturn ) - return 1; - } // if (!found_pixel) - - resultPtr += 4; - } - } - } - } - else - /* - * INT output type - */ - { - // Validate integer results - int *resultPtr = (int *)(char *)resultValues; - int expected[4]; - float error; - for( size_t z = 0, j = 0; z < imageInfo->arraySize; z++ ) - { - for( size_t y = 0; y < height_lod; y++ ) - { - for( size_t x = 0; x < width_lod; x++, j++ ) - { - // Step 1: go through and see if the results verify for the pixel - // For the normalized case on a GPU we put in offsets to the X, Y and Z to see if we land on the - // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0. - int checkOnlyOnePixel = 0; - int found_pixel = 0; - for (float norm_offset_x = -NORM_OFFSET; norm_offset_x <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) { - for (float norm_offset_y = -NORM_OFFSET; norm_offset_y <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) { - for (float norm_offset_z = -NORM_OFFSET; norm_offset_z <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_z += NORM_OFFSET) { - - // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0) - // E.g., test one pixel. - if (!imageSampler->normalized_coords - || !(gDeviceType & CL_DEVICE_TYPE_GPU) - || NORM_OFFSET == 0) - { - norm_offset_x = 0.0f; - norm_offset_y = 0.0f; - norm_offset_z = 0.0f; - checkOnlyOnePixel = 1; - } - - if(gTestMipmaps) - sample_image_pixel_offset( imagePtr, imageInfo, - xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ], - norm_offset_x, norm_offset_y, norm_offset_z, - imageSampler, expected, lod ); - else - sample_image_pixel_offset( imageValues, imageInfo, - xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ], - norm_offset_x, norm_offset_y, norm_offset_z, - imageSampler, expected ); - - error = errMax( errMax( abs_diff_int(expected[ 0 ], resultPtr[ 0 ]), abs_diff_int(expected[ 1 ], resultPtr[ 1 ]) ), - errMax( abs_diff_int(expected[ 2 ], resultPtr[ 2 ]), abs_diff_int(expected[ 3 ], resultPtr[ 3 ]) ) ); - - if (error < MAX_ERR) - found_pixel = 1; - }//norm_offset_z - }//norm_offset_y - }//norm_offset_x - - // Step 2: If we did not find a match, then print out debugging info. - if (!found_pixel) { - // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the - // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0. - checkOnlyOnePixel = 0; - int shouldReturn = 0; - for (float norm_offset_x = -NORM_OFFSET; norm_offset_x <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) { - for (float norm_offset_y = -NORM_OFFSET; norm_offset_y <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) { - for (float norm_offset_z = -NORM_OFFSET; norm_offset_z <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_z += NORM_OFFSET) { - - // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0) - // E.g., test one pixel. - if (!imageSampler->normalized_coords - || !(gDeviceType - & CL_DEVICE_TYPE_GPU) - || NORM_OFFSET == 0) - { - norm_offset_x = 0.0f; - norm_offset_y = 0.0f; - norm_offset_z = 0.0f; - checkOnlyOnePixel = 1; - } - - if(gTestMipmaps) - sample_image_pixel_offset( imagePtr, imageInfo, - xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ], - norm_offset_x, norm_offset_y, norm_offset_z, - imageSampler, expected, lod ); - else - sample_image_pixel_offset( imageValues, imageInfo, - xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ], - norm_offset_x, norm_offset_y, norm_offset_z, - imageSampler, expected, 0 ); - - error = errMax( errMax( abs_diff_int(expected[ 0 ], resultPtr[ 0 ]), abs_diff_int(expected[ 1 ], resultPtr[ 1 ]) ), - errMax( abs_diff_int(expected[ 2 ], resultPtr[ 2 ]), abs_diff_int(expected[ 3 ], resultPtr[ 3 ]) ) ); - - if( error > MAX_ERR ) - { - log_error("FAILED norm_offsets: %g , %g , %g:\n", norm_offset_x, norm_offset_y, norm_offset_z); - shouldReturn |= determine_validation_error_offset_2D_array( imagePtr, imageInfo, imageSampler, resultPtr, - expected, error, xOffsetValues[j], yOffsetValues[j], zOffsetValues[j], - norm_offset_x, norm_offset_y, norm_offset_z, - j, numTries, numClamped, false, lod ); - } else { - log_error("Test error: we should have detected this passing above.\n"); - } - }//norm_offset_z - }//norm_offset_y - }//norm_offset_x - if( shouldReturn ) - return 1; - } // if (!found_pixel) - - resultPtr += 4; - } - } - } - } - } - { - nextLevelOffset += width_lod * height_lod * imageInfo->arraySize * get_pixel_size(imageInfo->format); - width_lod = ( width_lod >> 1 ) ? ( width_lod >> 1 ) : 1; - height_lod = ( height_lod >> 1 ) ? ( height_lod >> 1 ) : 1; - } - } - - return numTries != MAX_TRIES || numClamped != MAX_CLAMPED; -} - int test_read_image_set_2D_array(cl_device_id device, cl_context context, cl_command_queue queue, const cl_image_format *format, @@ -1453,7 +199,9 @@ int test_read_image_set_2D_array(cl_device_id device, cl_context context, if( gDebugTrace ) log_info( " at size %d,%d,%d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.arraySize ); - int retCode = test_read_image_2D_array( context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed ); + int retCode = test_read_image( + context, queue, kernel, &imageInfo, imageSampler, + floatCoords, outputType, seed); if( retCode ) return retCode; } @@ -1500,7 +248,9 @@ int test_read_image_set_2D_array(cl_device_id device, cl_context context, log_info("Testing %d x %d x %d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 1 ], (int)sizes[ idx ][ 2 ]); if( gDebugTrace ) log_info( " at max size %d,%d,%d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 1 ], (int)sizes[ idx ][ 2 ] ); - int retCode = test_read_image_2D_array( context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed ); + int retCode = + test_read_image(context, queue, kernel, &imageInfo, + imageSampler, floatCoords, outputType, seed); if( retCode ) return retCode; } @@ -1514,7 +264,9 @@ int test_read_image_set_2D_array(cl_device_id device, cl_context context, imageInfo.rowPitch = imageInfo.width * pixelSize; imageInfo.slicePitch = imageInfo.height * imageInfo.rowPitch; - int retCode = test_read_image_2D_array( context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed ); + int retCode = + test_read_image(context, queue, kernel, &imageInfo, imageSampler, + floatCoords, outputType, seed); if( retCode ) return retCode; } @@ -1565,7 +317,9 @@ int test_read_image_set_2D_array(cl_device_id device, cl_context context, if ( gTestMipmaps ) log_info(" and %d mip levels\n", (int) imageInfo.num_mip_levels); } - int retCode = test_read_image_2D_array( context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed ); + int retCode = + test_read_image(context, queue, kernel, &imageInfo, + imageSampler, floatCoords, outputType, seed); if( retCode ) return retCode; }