From b818605f9613dd3f6bc6e1396d466468bf8a1577 Mon Sep 17 00:00:00 2001 From: Beatriz Navidad Vilches Date: Wed, 15 Nov 2023 19:24:12 +0000 Subject: [PATCH] Fixes from review --- samples/core/multi-device/main.c | 118 +++++++++++++++++++++-------- samples/core/multi-device/main.cpp | 26 +++++-- 2 files changed, 108 insertions(+), 36 deletions(-) diff --git a/samples/core/multi-device/main.c b/samples/core/multi-device/main.c index 28e7b28e..6d2802bf 100644 --- a/samples/core/multi-device/main.c +++ b/samples/core/multi-device/main.c @@ -163,6 +163,20 @@ void host_convolution(const cl_float* in, cl_float* out, const cl_float* mask, } } +cl_int opencl_version_contains(const cl_device_id dev, + const char* version_fragment) +{ + char version[64]; + cl_int error = CL_SUCCESS; + OCLERROR_RET(clGetDeviceInfo(dev, CL_DEVICE_VERSION, sizeof(version), + &version, NULL), + error, fail); + char* found_version = strstr(version, version_fragment); + return (found_version != NULL); +fail: + return error; +} + int main(int argc, char* argv[]) { cl_int error = CL_SUCCESS; @@ -199,7 +213,7 @@ int main(int argc, char* argv[]) OCLERROR_PAR(dev = cl_util_get_device(dev_opts.triplet.plat_index, dev_opts.triplet.dev_index, dev_opts.triplet.dev_type, &error), - error, dev); + error, end); if (!diag_opts.quiet) { @@ -212,24 +226,58 @@ int main(int argc, char* argv[]) fflush(stdout); } -#if CL_HPP_TARGET_OPENCL_VERSION < 120 - fprintf(stderr, - "Error: OpenCL subdevices not supported before version 1.2 "); - exit(EXIT_FAILURE); -#endif + if (opencl_version_contains(dev, "1.1")) + { + fprintf(stderr, + "Error: OpenCL sub-devices not supported before version 1.2 "); + exit(EXIT_FAILURE); + } - // Create subdevices, each with half of the compute units available. + // Check if device supports fission. + cl_device_partition_property* dev_props = NULL; + size_t props_size = 0; + OCLERROR_RET(clGetDeviceInfo(dev, CL_DEVICE_PARTITION_PROPERTIES, 0, NULL, + &props_size), + error, end); + if (props_size == 0) + { + fprintf(stderr, "Error: device does not support fission"); + exit(EXIT_FAILURE); + } + + // Check if the "partition equally" type is supported. + MEM_CHECK(dev_props = (cl_device_partition_property*)malloc(sizeof(char) + * props_size), + error, end); + OCLERROR_RET(clGetDeviceInfo(dev, CL_DEVICE_PARTITION_PROPERTIES, + sizeof(char) * props_size, dev_props, NULL), + error, props); + size_t prop = 0; + for (; prop < props_size; ++prop) + { + if (dev_props[prop] == CL_DEVICE_PARTITION_EQUALLY) + { + break; + } + } + if (prop == props_size) + { + fprintf(stderr, "Error: device does not partition equally"); + exit(EXIT_FAILURE); + } + + // Create sub-devices, each with half of the compute units available. cl_uint max_compute_units = 0; OCLERROR_RET(clGetDeviceInfo(dev, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(cl_uint), &max_compute_units, NULL), - error, dev); + error, props); cl_device_partition_property subdevices_properties[] = { (cl_device_partition_property)CL_DEVICE_PARTITION_EQUALLY, (cl_device_partition_property)(max_compute_units / 2), 0 }; - // Initialize subdevices array with one device and then reallocate for - // MacOS and Windows not to complain about NULL subdevices array. + // Initialize sub-devices array with one device and then reallocate for + // MacOS and Windows not to complain about NULL sub-devices array. cl_uint subdev_count = 1; cl_device_id* subdevices = (cl_device_id*)malloc(subdev_count * sizeof(cl_device_id)); @@ -237,7 +285,7 @@ int main(int argc, char* argv[]) OCLERROR_RET(clCreateSubDevices(dev, subdevices_properties, max_compute_units, subdevices, &subdev_count), - error, dev); + error, props); if (subdev_count < 2) { @@ -249,11 +297,11 @@ int main(int argc, char* argv[]) (cl_device_id*)realloc(subdevices, subdev_count * sizeof(cl_device_id)); OCLERROR_RET(clCreateSubDevices(dev, subdevices_properties, subdev_count, subdevices, NULL), - error, subdevs); + error, subdev1); OCLERROR_PAR(context = clCreateContext(NULL, subdev_count, subdevices, NULL, NULL, &error), - error, subdevs); + error, subdev1); // Read kernel file. const char* kernel_location = "./convolution.cl"; @@ -280,11 +328,14 @@ int main(int argc, char* argv[]) // it's only necessary to add the -cl-std option for 2.0 and 3.0 OpenCL // versions. char compiler_options[1023] = ""; -#if CL_HPP_TARGET_OPENCL_VERSION >= 300 - strcat(compiler_options, "-cl-std=CL3.0 "); -#elif CL_HPP_TARGET_OPENCL_VERSION >= 200 - strcat(compiler_options, "-cl-std=CL2.0 "); -#endif + if (opencl_version_contains(dev, "3.")) + { + strcat(compiler_options, "-cl-std=CL3.0 "); + } + else if (opencl_version_contains(dev, "2.")) + { + strcat(compiler_options, "-cl-std=CL2.0 "); + } OCLERROR_RET( clBuildProgram(program, 2, subdevices, compiler_options, NULL, NULL), @@ -356,7 +407,7 @@ int main(int argc, char* argv[]) mask_dim * mask_dim, -1000, 1000); // Create device buffers, from which we will create the subbuffers for the - // subdevices. + // sub-devices. const size_t grid_midpoint = y_dim / 2; const size_t pad_grid_midpoint = pad_y_dim / 2; @@ -391,7 +442,7 @@ int main(int argc, char* argv[]) fflush(stdout); } - // Set up subdevices for kernel execution. + // Set up sub-devices for kernel execution. const size_t half_input_bytes = sizeof(cl_float) * pad_x_dim * (pad_grid_midpoint + 1); const size_t input_offset = @@ -414,7 +465,7 @@ int main(int argc, char* argv[]) error, bufmask); // Initialize queues for command execution on each device. -#if CL_HPP_TARGET_OPENCL_VERSION >= 200 +#if defined(CL_VERSION_2_0) || defined(CL_VERSION_3_0) cl_command_queue_properties props[] = { CL_QUEUE_PROPERTIES, CL_QUEUE_PROFILING_ENABLE, 0 }; OCLERROR_PAR(sub_queues[subdevice] = clCreateCommandQueueWithProperties( @@ -507,7 +558,8 @@ int main(int argc, char* argv[]) } GET_CURRENT_TIMER(host_start) - host_convolution(h_input_grid, h_output_grid, h_mask, (cl_uint)x_dim, (cl_uint)y_dim); + host_convolution(h_input_grid, h_output_grid, h_mask, (cl_uint)x_dim, + (cl_uint)y_dim); GET_CURRENT_TIMER(host_end) size_t host_time; TIMER_DIFFERENCE(host_time, host_start, host_end) @@ -588,7 +640,7 @@ int main(int argc, char* argv[]) event1: OCLERROR_RET(clReleaseEvent(events[0]), end_error, subbufout); subbufout: - if (subdevice == 1) + if (subdevice >= 1) { OCLERROR_RET(clReleaseMemObject(sub_output_grids[1]), end_error, subbufout0); @@ -596,7 +648,7 @@ int main(int argc, char* argv[]) subbufout0: OCLERROR_PAR(clReleaseMemObject(sub_output_grids[0]), end_error, subbufin); subbufin: - if (subdevice == 1) + if (subdevice >= 1) { OCLERROR_RET(clReleaseMemObject(sub_input_grids[1]), end_error, subbufin0); @@ -604,15 +656,15 @@ int main(int argc, char* argv[]) subbufin0: OCLERROR_RET(clReleaseMemObject(sub_input_grids[0]), end_error, subqueue); subqueue: - if (subdevice == 1) + if (subdevice >= 1) { OCLERROR_RET(clReleaseCommandQueue(sub_queues[1]), end_error, subqueue0); } subqueue0: - OCLERROR_RET(clReleaseCommandQueue(sub_queues[1]), end_error, conv); + OCLERROR_RET(clReleaseCommandQueue(sub_queues[0]), end_error, conv); conv: - if (subdevice == 1) + if (subdevice >= 1) { OCLERROR_RET(clReleaseKernel(convolutions[1]), end_error, conv0); } @@ -631,15 +683,19 @@ int main(int argc, char* argv[]) hinput: free(h_input_grid); prg: - OCLERROR_RET(clReleaseProgram(program), end_error, subdevs); + OCLERROR_RET(clReleaseProgram(program), end_error, ker); ker: free(kernel); contx: - OCLERROR_RET(clReleaseContext(context), end_error, end); + OCLERROR_RET(clReleaseContext(context), end_error, subdev1); +subdev1: + OCLERROR_RET(clReleaseDevice(subdevices[1]), end_error, subdev0); +subdev0: + OCLERROR_RET(clReleaseDevice(subdevices[0]), end_error, subdevs); subdevs: free(subdevices); -dev: - OCLERROR_RET(clReleaseDevice(dev), end_error, end); +props: + free(dev_props); end: if (error) cl_util_print_error(error); return error; diff --git a/samples/core/multi-device/main.cpp b/samples/core/multi-device/main.cpp index 6a9cf399..35379f94 100644 --- a/samples/core/multi-device/main.cpp +++ b/samples/core/multi-device/main.cpp @@ -140,6 +140,25 @@ int main(int argc, char* argv[]) exit(EXIT_FAILURE); #endif + // Check if device supports fission. + std::vector dev_props = + dev.getInfo(); + if (dev_props.size() == 0) + { + std::cerr << "Error: device does not support fission" << std::endl; + exit(EXIT_FAILURE); + } + + // Check if the "partition equally" type is supported. + if (std::find(dev_props.begin(), dev_props.end(), + CL_DEVICE_PARTITION_EQUALLY) + == dev_props.end()) + { + std::cerr << "Error: device does not partition equally" + << std::endl; + exit(EXIT_FAILURE); + } + // Create subdevices, each with half of the compute units available. cl_uint max_compute_units = dev.getInfo(); cl_device_partition_property subdevices_properties[] = { @@ -316,10 +335,7 @@ int main(int argc, char* argv[]) std::cout.flush(); } - auto convolution = - cl::KernelFunctor( - program, "convolution_3x3") - .getKernel(); + auto convolution = cl::Kernel(program, "convolution_3x3"); cl::CommandQueue queue(context, subdevice, cl::QueueProperties::Profiling); @@ -362,7 +378,7 @@ int main(int argc, char* argv[]) std::cout.flush(); } - convolutions.push_back(convolution.clone()); + convolutions.push_back(convolution); sub_queues.push_back(queue); sub_input_grids.push_back(sub_input_grid); sub_output_grids.push_back(sub_output_grid);