Skip to content

Commit

Permalink
Fixes from review
Browse files Browse the repository at this point in the history
  • Loading branch information
Beanavil committed Nov 16, 2023
1 parent 419fe40 commit 11c0765
Show file tree
Hide file tree
Showing 2 changed files with 103 additions and 27 deletions.
106 changes: 83 additions & 23 deletions samples/core/multi-device/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,22 @@ void host_convolution(const cl_float* in, cl_float* out, const cl_float* mask,
}
}

cl_int opencl_version_contains(const cl_device_id dev,
const char* version_fragment)
{
char version[64];
cl_int error = CL_SUCCESS;
OCLERROR_RET(clGetDeviceInfo(dev, CL_DEVICE_VERSION, sizeof(version),
&version, NULL),
error, fail);
char* found_version = strstr(version, version_fragment);
printf("Version detected %s from version %s and fragment %s\n",
found_version, version, version_fragment);
return (found_version != NULL);
fail:
return error;
}

int main(int argc, char* argv[])
{
cl_int error = CL_SUCCESS;
Expand Down Expand Up @@ -212,32 +228,66 @@ int main(int argc, char* argv[])
fflush(stdout);
}

#if CL_HPP_TARGET_OPENCL_VERSION < 120
fprintf(stderr,
"Error: OpenCL subdevices not supported before version 1.2 ");
exit(EXIT_FAILURE);
#endif
if (opencl_version_contains(dev, "1.1"))
{
fprintf(stderr,
"Error: OpenCL sub-devices not supported before version 1.2 ");
exit(EXIT_FAILURE);
}

// Create subdevices, each with half of the compute units available.
// Check if device supports fission.
cl_device_partition_property* dev_props = NULL;
size_t props_size = 0;
OCLERROR_RET(clGetDeviceInfo(dev, CL_DEVICE_PARTITION_PROPERTIES, 0, NULL,
&props_size),
error, dev);
if (props_size == 0)
{
fprintf(stderr, "Error: device does not support fission");
exit(EXIT_FAILURE);
}

// Check if the "partition equally" type is supported.
MEM_CHECK(dev_props = (cl_device_partition_property*)malloc(sizeof(char)
* props_size),
error, dev);
OCLERROR_RET(clGetDeviceInfo(dev, CL_DEVICE_PARTITION_PROPERTIES,
sizeof(char) * props_size, dev_props, NULL),
error, props);
size_t i = 0;
for (; i < props_size; ++i)
{
if (dev_props[i] == CL_DEVICE_PARTITION_EQUALLY)
{
break;
}
}
if (i == props_size)
{
fprintf(stderr, "Error: device does not partition equally");
exit(EXIT_FAILURE);
}

// Create sub-devices, each with half of the compute units available.
cl_uint max_compute_units = 0;
OCLERROR_RET(clGetDeviceInfo(dev, CL_DEVICE_MAX_COMPUTE_UNITS,
sizeof(cl_uint), &max_compute_units, NULL),
error, dev);
error, props);
cl_device_partition_property subdevices_properties[] = {
(cl_device_partition_property)CL_DEVICE_PARTITION_EQUALLY,
(cl_device_partition_property)(max_compute_units / 2), 0
};

// Initialize subdevices array with one device and then reallocate for
// MacOS and Windows not to complain about NULL subdevices array.
// Initialize sub-devices array with one device and then reallocate for
// MacOS and Windows not to complain about NULL sub-devices array.
cl_uint subdev_count = 1;
cl_device_id* subdevices =
(cl_device_id*)malloc(subdev_count * sizeof(cl_device_id));

OCLERROR_RET(clCreateSubDevices(dev, subdevices_properties,
max_compute_units, subdevices,
&subdev_count),
error, dev);
error, props);

if (subdev_count < 2)
{
Expand All @@ -249,11 +299,11 @@ int main(int argc, char* argv[])
(cl_device_id*)realloc(subdevices, subdev_count * sizeof(cl_device_id));
OCLERROR_RET(clCreateSubDevices(dev, subdevices_properties, subdev_count,
subdevices, NULL),
error, subdevs);
error, subdev1);

OCLERROR_PAR(context = clCreateContext(NULL, subdev_count, subdevices, NULL,
NULL, &error),
error, subdevs);
error, subdev1);

// Read kernel file.
const char* kernel_location = "./convolution.cl";
Expand All @@ -280,11 +330,14 @@ int main(int argc, char* argv[])
// it's only necessary to add the -cl-std option for 2.0 and 3.0 OpenCL
// versions.
char compiler_options[1023] = "";
#if CL_HPP_TARGET_OPENCL_VERSION >= 300
strcat(compiler_options, "-cl-std=CL3.0 ");
#elif CL_HPP_TARGET_OPENCL_VERSION >= 200
strcat(compiler_options, "-cl-std=CL2.0 ");
#endif
if (opencl_version_contains(dev, "3."))
{
strcat(compiler_options, "-cl-std=CL3.0 ");
}
else if (opencl_version_contains(dev, "2."))
{
strcat(compiler_options, "-cl-std=CL2.0 ");
}

OCLERROR_RET(
clBuildProgram(program, 2, subdevices, compiler_options, NULL, NULL),
Expand Down Expand Up @@ -356,7 +409,7 @@ int main(int argc, char* argv[])
mask_dim * mask_dim, -1000, 1000);

// Create device buffers, from which we will create the subbuffers for the
// subdevices.
// sub-devices.
const size_t grid_midpoint = y_dim / 2;
const size_t pad_grid_midpoint = pad_y_dim / 2;

Expand Down Expand Up @@ -391,7 +444,7 @@ int main(int argc, char* argv[])
fflush(stdout);
}

// Set up subdevices for kernel execution.
// Set up sub-devices for kernel execution.
const size_t half_input_bytes =
sizeof(cl_float) * pad_x_dim * (pad_grid_midpoint + 1);
const size_t input_offset =
Expand All @@ -414,7 +467,7 @@ int main(int argc, char* argv[])
error, bufmask);

// Initialize queues for command execution on each device.
#if CL_HPP_TARGET_OPENCL_VERSION >= 200
#if defined(CL_VERSION_2_0) || defined(CL_VERSION_3_0)
cl_command_queue_properties props[] = { CL_QUEUE_PROPERTIES,
CL_QUEUE_PROFILING_ENABLE, 0 };
OCLERROR_PAR(sub_queues[subdevice] = clCreateCommandQueueWithProperties(
Expand Down Expand Up @@ -507,7 +560,8 @@ int main(int argc, char* argv[])
}

GET_CURRENT_TIMER(host_start)
host_convolution(h_input_grid, h_output_grid, h_mask, (cl_uint)x_dim, (cl_uint)y_dim);
host_convolution(h_input_grid, h_output_grid, h_mask, (cl_uint)x_dim,
(cl_uint)y_dim);
GET_CURRENT_TIMER(host_end)
size_t host_time;
TIMER_DIFFERENCE(host_time, host_start, host_end)
Expand Down Expand Up @@ -631,13 +685,19 @@ int main(int argc, char* argv[])
hinput:
free(h_input_grid);
prg:
OCLERROR_RET(clReleaseProgram(program), end_error, subdevs);
OCLERROR_RET(clReleaseProgram(program), end_error, ker);
ker:
free(kernel);
contx:
OCLERROR_RET(clReleaseContext(context), end_error, end);
OCLERROR_RET(clReleaseContext(context), end_error, subdev1);
subdev1:
OCLERROR_RET(clReleaseDevice(subdevices[1]), end_error, subdev0);
subdev0:
OCLERROR_RET(clReleaseDevice(subdevices[0]), end_error, subdevs);
subdevs:
free(subdevices);
props:
free(dev_props);
dev:
OCLERROR_RET(clReleaseDevice(dev), end_error, end);
end:
Expand Down
24 changes: 20 additions & 4 deletions samples/core/multi-device/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,25 @@ int main(int argc, char* argv[])
exit(EXIT_FAILURE);
#endif

// Check if device supports fission.
std::vector<cl_device_partition_property> dev_props =
dev.getInfo<CL_DEVICE_PARTITION_PROPERTIES>();
if (dev_props.size() == 0)
{
std::cerr << "Error: device does not support fission" << std::endl;
exit(EXIT_FAILURE);
}

// Check if the "partition equally" type is supported.
if (std::find(dev_props.begin(), dev_props.end(),
CL_DEVICE_PARTITION_EQUALLY)
== dev_props.end())
{
std::cerr << "Error: device does not partition equally"
<< std::endl;
exit(EXIT_FAILURE);
}

// Create subdevices, each with half of the compute units available.
cl_uint max_compute_units = dev.getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>();
cl_device_partition_property subdevices_properties[] = {
Expand Down Expand Up @@ -316,10 +335,7 @@ int main(int argc, char* argv[])
std::cout.flush();
}

auto convolution =
cl::KernelFunctor<cl::Buffer, cl::Buffer, cl::Buffer, cl_uint2>(
program, "convolution_3x3")
.getKernel();
auto convolution = cl::Kernel(program, "convolution_3x3");

cl::CommandQueue queue(context, subdevice,
cl::QueueProperties::Profiling);
Expand Down

0 comments on commit 11c0765

Please sign in to comment.