From f510201a092363b66969888df49c68721ca2c4fb Mon Sep 17 00:00:00 2001 From: Mikolaj-Filar-Mobica <123570530+Mikolaj-Filar-Mobica@users.noreply.github.com> Date: Tue, 16 May 2023 16:45:34 +0200 Subject: [PATCH] Add Blur cpp example and improve image error handling (#72) * Add Blur cpp example and improve image error handling * Use clang formatting and include math.h * update submodule , fix event passing and minor comment fix --------- Co-authored-by: PRZEMYSLAW_WISNIEWSKI --- lib/include/CL/SDK/Image.hpp | 2 +- lib/src/SDK/Image.cpp | 14 +- samples/core/blur/CMakeLists.txt | 7 + samples/core/blur/blur.cpp | 747 +++++++++++++++++++++++++++++++ samples/core/blur/blur.hpp | 106 +++++ samples/core/blur/main.cpp | 166 +++++++ 6 files changed, 1040 insertions(+), 2 deletions(-) create mode 100644 samples/core/blur/blur.cpp create mode 100644 samples/core/blur/blur.hpp create mode 100644 samples/core/blur/main.cpp diff --git a/lib/include/CL/SDK/Image.hpp b/lib/include/CL/SDK/Image.hpp index 1da5dc01..6e241a4e 100644 --- a/lib/include/CL/SDK/Image.hpp +++ b/lib/include/CL/SDK/Image.hpp @@ -10,7 +10,7 @@ namespace cl { namespace sdk { struct Image { - int width, height, pixel_size; + int width = 0, height = 0, pixel_size = 1; cl::vector pixels; }; diff --git a/lib/src/SDK/Image.cpp b/lib/src/SDK/Image.cpp index c71c6447..c17a3d14 100644 --- a/lib/src/SDK/Image.cpp +++ b/lib/src/SDK/Image.cpp @@ -32,6 +32,18 @@ namespace sdk { Image im; unsigned char* data = stbi_load(file_name, &im.width, &im.height, &im.pixel_size, 0); + + if (data == nullptr) + { + std::string err_msg{ "Not possible to read file" }; + const char* load_msg = stbi_failure_reason(); + + if (load_msg) err_msg += std::string(": ") + load_msg; + + cl::util::detail::errHandler(CL_INVALID_ARG_VALUE, &err, + err_msg.c_str()); + } + im.pixels.insert(im.pixels.end(), data, data + im.width * im.height * im.pixel_size); @@ -42,7 +54,7 @@ namespace sdk { cl::util::detail::errHandler(CL_INVALID_ARG_VALUE, &err, "File read error!"); - if (error != NULL) *error = err; + if (error != nullptr) *error = err; return im; } diff --git a/samples/core/blur/CMakeLists.txt b/samples/core/blur/CMakeLists.txt index 9ed4fb16..437f755c 100644 --- a/samples/core/blur/CMakeLists.txt +++ b/samples/core/blur/CMakeLists.txt @@ -18,3 +18,10 @@ add_sample( VERSION 300 SOURCES main.c KERNELS blur.cl) + +add_sample( + TEST + TARGET blurcpp + VERSION 300 + SOURCES main.cpp blur.cpp blur.hpp + KERNELS blur.cl) \ No newline at end of file diff --git a/samples/core/blur/blur.cpp b/samples/core/blur/blur.cpp new file mode 100644 index 00000000..c19eca91 --- /dev/null +++ b/samples/core/blur/blur.cpp @@ -0,0 +1,747 @@ +/* + * Copyright (c) 2023 The Khronos Group Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "blur.hpp" + +#include +#include +#include + +// STL includes +#include +#include +#include +#include + +// TCLAP includes +#include + +// Default image +#include "default_image.h" + +std::unique_ptr> valid_op_constraint; + +// Add option to CLI parsing SDK utility +template <> auto cl::sdk::parse() +{ + return std::make_tuple( + std::make_shared>( + "i", "in", "Input image file", false, "", "name"), + std::make_shared>( + "o", "out", "Output image file", false, "out.png", "name"), + std::make_shared>("s", "size", + "Size of blur kernel", false, + (float)1.0, "positive float"), + std::make_shared>( + "b", "blur", "Operation of blur to perform: box or gauss", false, + "box")); +} + +template <> +BlurCppExample::BlurOptions cl::sdk::comprehend( + std::shared_ptr> in_arg, + std::shared_ptr> out_arg, + std::shared_ptr> size_arg, + std::shared_ptr> op_arg) +{ + return BlurCppExample::BlurOptions{ in_arg->getValue(), out_arg->getValue(), + size_arg->getValue(), + op_arg->getValue() }; +} + +void BlurCppExample::single_pass_box_blur() +{ + std::cout << "Single-pass blur" << std::endl; + step++; + + auto size = static_cast(blur_opts.size); + auto blur = + cl::KernelFunctor(program, "blur_box"); + // blur + auto start = std::chrono::high_resolution_clock::now(); + std::vector passes; + + auto event = blur(cl::EnqueueArgs{ queue, cl::NDRange{ width, height } }, + input_image_buf, output_image_buf, size); + + passes.push_back(event); + + event.wait(); + + auto end = std::chrono::high_resolution_clock::now(); + + cl::enqueueReadImage(output_image_buf, CL_BLOCKING, origin, image_size, 0, + 0, output_image.pixels.data()); + + if (verbose) print_timings(end - start, passes); + + // write output file + finalize_blur(); +} + +void BlurCppExample::dual_pass_box_blur() +{ + std::cout << "Dual-pass blur" << std::endl; + step++; + + auto size = static_cast(blur_opts.size); + auto blur = + cl::KernelFunctor(program, "blur_box"); + + // blur + auto start = std::chrono::high_resolution_clock::now(); + std::vector passes; + + passes.push_back( + blur(cl::EnqueueArgs{ queue, cl::NDRange{ width, height } }, + input_image_buf, temp_image_buf, size)); + + passes.push_back( + blur(cl::EnqueueArgs{ queue, cl::NDRange{ width, height } }, + temp_image_buf, output_image_buf, size)); + + cl::WaitForEvents(passes); + + auto end = std::chrono::high_resolution_clock::now(); + + cl::enqueueReadImage(output_image_buf, CL_BLOCKING, origin, image_size, 0, + 0, output_image.pixels.data()); + + if (verbose) print_timings(end - start, passes); + + // write output file + finalize_blur(); +} + +void BlurCppExample::dual_pass_local_memory_exchange_box_blur() +{ + std::cout << "Dual-pass local memory exchange blur" << std::endl; + step++; + + auto size = static_cast(blur_opts.size); + + cl::Kernel blur1(program, "blur_box_horizontal_exchange"); + cl::Kernel blur2(program, "blur_box_vertical_exchange"); + + auto wgs1 = blur1.getWorkGroupInfo(device); + auto psm1 = + blur1.getWorkGroupInfo( + device); + auto wgs2 = blur2.getWorkGroupInfo(device); + auto psm2 = + blur2.getWorkGroupInfo( + device); + + // Further constrain (reduce) WGS based on shared mem size on device + auto loc_mem = device.getInfo(); + + if (loc_mem >= ((psm1 > psm2 ? psm1 : psm2) + 2 * size) * sizeof(cl_uchar4)) + { + while (loc_mem < (wgs1 + 2 * size) * sizeof(cl_uchar4)) wgs1 -= psm1; + while (loc_mem < (wgs2 + 2 * size) * sizeof(cl_uchar4)) wgs2 -= psm2; + } + else + { + cl::util::detail::errHandler( + CL_OUT_OF_RESOURCES, nullptr, + "Not enough local memory to serve a single sub-group."); + } + + blur1.setArg(0, input_image_buf); + blur1.setArg(1, temp_image_buf); + blur1.setArg(2, size); + blur1.setArg(3, sizeof(cl_uchar4) * (wgs1 + 2 * size), nullptr); + + blur2.setArg(0, temp_image_buf); + blur2.setArg(1, output_image_buf); + blur2.setArg(2, size); + blur2.setArg(3, sizeof(cl_uchar4) * (wgs2 + 2 * size), nullptr); + + // blur + auto start = std::chrono::high_resolution_clock::now(); + std::vector passes; + cl::Event event; + + cl::NDRange wgsf{ wgs1, 1 }; + cl::NDRange work_size1{ (input_image.width + wgs1 - 1) / wgs1 * wgs1, + (cl::size_type)input_image.height }; + queue.enqueueNDRangeKernel(blur1, origin, work_size1, wgsf, nullptr, + &event); + passes.push_back(event); + + cl::NDRange wgss{ 1, wgs2 }; + cl::NDRange work_size2{ (cl::size_type)input_image.width, + (input_image.height + wgs2 - 1) / wgs2 * wgs2 }; + queue.enqueueNDRangeKernel(blur2, origin, work_size2, wgss, nullptr, + &event); + passes.push_back(event); + + cl::WaitForEvents(passes); + + auto end = std::chrono::high_resolution_clock::now(); + + cl::enqueueReadImage(output_image_buf, CL_BLOCKING, origin, image_size, 0, + 0, output_image.pixels.data()); + + if (verbose) print_timings(end - start, passes); + + // write output file + finalize_blur(); +} + +void BlurCppExample::dual_pass_subgroup_exchange_box_blur() +{ + step++; + + auto size = static_cast(blur_opts.size); + // create kernels + auto blur1 = cl::KernelFunctor( + program, "blur_box_horizontal_subgroup_exchange"); + auto blur2 = cl::KernelFunctor( + program, "blur_box_vertical_subgroup_exchange"); + + auto wgs1 = + blur1.getKernel() + .getWorkGroupInfo( + device); + auto wgs2 = + blur2.getKernel() + .getWorkGroupInfo( + device); + + // blur + std::vector passes; + auto start = std::chrono::high_resolution_clock::now(); + + cl::NDRange work_size1{ (width + wgs1 - 1) / wgs1 * wgs1, height }; + cl::NDRange wgsf{ wgs1, 1 }; + passes.push_back(blur1(cl::EnqueueArgs{ queue, work_size1, wgsf }, + input_image_buf, temp_image_buf, size)); + + cl::NDRange work_size2{ width, (height + wgs2 - 1) / wgs2 * wgs2 }; + cl::NDRange wgss{ 1, wgs2 }; + passes.push_back(blur2(cl::EnqueueArgs{ queue, work_size2, wgss }, + temp_image_buf, output_image_buf, size)); + + cl::WaitForEvents(passes); + + auto end = std::chrono::high_resolution_clock::now(); + + cl::enqueueReadImage(output_image_buf, CL_BLOCKING, origin, image_size, 0, + 0, output_image.pixels.data()); + + if (verbose) print_timings(end - start, passes); + + // write output file + finalize_blur(); +} + +void BlurCppExample::dual_pass_kernel_blur() +{ + step++; + + auto size = gauss_size; + auto& kern = gauss_kernel_buf; + + // create kernels + auto blur1 = cl::KernelFunctor( + program, "blur_kernel_horizontal"); + auto blur2 = cl::KernelFunctor( + program, "blur_kernel_vertical"); + + // blur + auto start = std::chrono::high_resolution_clock::now(); + std::vector passes; + + passes.push_back( + blur1(cl::EnqueueArgs{ queue, cl::NDRange{ width, height } }, + input_image_buf, temp_image_buf, size, kern)); + + passes.push_back( + blur2(cl::EnqueueArgs{ queue, cl::NDRange{ width, height } }, + temp_image_buf, output_image_buf, size, kern)); + + cl::WaitForEvents(passes); + + auto end = std::chrono::high_resolution_clock::now(); + + cl::enqueueReadImage(output_image_buf, CL_BLOCKING, origin, image_size, 0, + 0, output_image.pixels.data()); + + if (verbose) print_timings(end - start, passes); + + // write output file + finalize_blur(); +} + +void BlurCppExample::dual_pass_local_memory_exchange_kernel_blur() +{ + step++; + + auto size = gauss_size; + auto& kern = gauss_kernel_buf; + + // create kernels + auto blur1 = cl::KernelFunctor( + program, "blur_kernel_horizontal_exchange"); + auto blur2 = cl::KernelFunctor( + program, "blur_kernel_vertical_exchange"); + + // (register) constraints + auto wgs1 = + blur1.getKernel().getWorkGroupInfo(device); + auto psm1 = + blur1.getKernel() + .getWorkGroupInfo( + device); + auto wgs2 = + blur2.getKernel().getWorkGroupInfo(device); + auto psm2 = + blur2.getKernel() + .getWorkGroupInfo( + device); + auto loc_mem = device.getInfo(); + + if (loc_mem >= ((psm1 > psm2 ? psm1 : psm2) + 2 * size) * sizeof(cl_uchar4)) + { + while (loc_mem < (wgs1 + 2 * size) * sizeof(cl_uchar4)) wgs1 -= psm1; + while (loc_mem < (wgs2 + 2 * size) * sizeof(cl_uchar4)) wgs2 -= psm2; + } + else + { + cl::util::detail::errHandler( + CL_OUT_OF_RESOURCES, nullptr, + "Not enough local memory to serve a single sub-group."); + } + + // blur + auto start = std::chrono::high_resolution_clock::now(); + std::vector passes; + cl::Event event; + + cl::NDRange work_size1{ (width + wgs1 - 1) / wgs1 * wgs1, height }; + cl::NDRange wgsf{ wgs1, 1 }; + auto local1 = cl::Local(sizeof(cl_uchar4) * (wgs1 + 2 * size)); + + passes.push_back(blur1(cl::EnqueueArgs{ queue, work_size1, wgsf }, + input_image_buf, temp_image_buf, size, kern, + local1)); + + cl::NDRange work_size2{ width, (height + wgs2 - 1) / wgs2 * wgs2 }; + cl::NDRange wgss{ 1, wgs2 }; + auto local2 = cl::Local(sizeof(cl_uchar4) * (wgs2 + 2 * size)); + + passes.push_back(blur2(cl::EnqueueArgs{ queue, work_size2, wgss }, + temp_image_buf, output_image_buf, size, kern, + local2)); + + cl::WaitForEvents(passes); + + auto end = std::chrono::high_resolution_clock::now(); + + cl::enqueueReadImage(output_image_buf, CL_BLOCKING, origin, image_size, 0, + 0, output_image.pixels.data()); + + if (verbose) print_timings(end - start, passes); + + // write output file + finalize_blur(); +} + +void BlurCppExample::dual_pass_subgroup_exchange_kernel_blur() +{ + step++; + + auto size = gauss_size; + auto& kern = gauss_kernel_buf; + + // create kernels + auto blur1 = cl::KernelFunctor( + program, "blur_kernel_horizontal_subgroup_exchange"); + auto blur2 = cl::KernelFunctor( + program, "blur_kernel_vertical_subgroup_exchange"); + + // query preferred subgroup size of kernel on device + auto wgs1 = + blur1.getKernel() + .getWorkGroupInfo( + device); + auto wgs2 = + blur2.getKernel() + .getWorkGroupInfo( + device); + // blur + std::vector passes; + auto start = std::chrono::high_resolution_clock::now(); + + cl::NDRange work_size1{ (width + wgs1 - 1) / wgs1 * wgs1, height }; + cl::NDRange wgsf{ wgs1, 1 }; + passes.push_back(blur1(cl::EnqueueArgs{ queue, work_size1, wgsf }, + input_image_buf, temp_image_buf, size, kern)); + + cl::NDRange work_size2{ width, (height + wgs2 - 1) / wgs2 * wgs2 }; + cl::NDRange wgss{ 1, wgs2 }; + passes.push_back(blur2(cl::EnqueueArgs{ queue, work_size2, wgss }, + temp_image_buf, output_image_buf, size, kern)); + + cl::WaitForEvents(passes); + + auto end = std::chrono::high_resolution_clock::now(); + + cl::enqueueReadImage(output_image_buf, CL_BLOCKING, origin, image_size, 0, + 0, output_image.pixels.data()); + + if (verbose) print_timings(end - start, passes); + + // write output file + finalize_blur(); +} + +void BlurCppExample::load_device() +{ + // Create context + context = cl::sdk::get_context(dev_opts.triplet); + + device = context.getInfo().at(0); + queue = cl::CommandQueue(context, device, cl::QueueProperties::Profiling); + cl::CommandQueue::setDefault(queue); + + cl::Platform platform{ + device.getInfo() + }; // https://github.com/KhronosGroup/OpenCL-CLHPP/issues/150 + + if (!diag_opts.quiet) + { + std::cout << "Selected platform: " + << platform.getInfo() << "\n" + << "Selected device: " << device.getInfo() + << "\n" + << std::endl; + } +} + +void BlurCppExample::read_input_image() +{ + /// If file not provided in command line, create a default one. + if (blur_opts.in.empty()) + { + std::string fname("andrew_svk_7oJ4D_ewB7c_unsplash.png"); + + std::cout << "No file given, use standard image " << fname << std::endl; + + const char* fcont = (const char*)andrew_svk_7oJ4D_ewB7c_unsplash_png; + const size_t fsize = andrew_svk_7oJ4D_ewB7c_unsplash_png_size; + + std::fstream f(fname, std::ios::out | std::ios::binary); + if (!f.is_open()) + { + throw std::runtime_error{ std::string{ + "Cannot create a default image: open " + "andrew_svk_7oJ4D_ewB7c_unsplash_png" } }; + } + f.write(fcont, fsize); + f.close(); + + blur_opts.in = fname; + } + + input_image = cl::sdk::read_image(blur_opts.in.c_str(), nullptr); + image_size = { (cl::size_type)input_image.width, + (cl::size_type)input_image.height }; + width = input_image.width; + height = input_image.height; +} + +void BlurCppExample::prepare_output_image() +{ + output_image.width = input_image.width; + output_image.height = input_image.height; + output_image.pixel_size = input_image.pixel_size; + output_image.pixels.clear(); + output_image.pixels.reserve(sizeof(unsigned char) * output_image.width + * output_image.height + * output_image.pixel_size); +} + +std::tuple BlurCppExample::query_capabilities() +{ + // 1) query image support + if (!device.getInfo()) + { + cl::util::detail::errHandler(CL_INVALID_DEVICE, nullptr, + "No image support on device!"); + } + + // 2) query if the image format is supported and change image if not + format = set_image_format(); + + // 3) query if device have local memory + bool use_local_mem = + (device.getInfo() == CL_LOCAL); + + // 4) query if device allow subgroup shuffle operations + bool use_subgroup_exchange = + cl::util::supports_extension(device, "cl_khr_subgroup_shuffle"); + bool use_subgroup_exchange_relative = cl::util::supports_extension( + device, "cl_khr_subgroup_shuffle_relative"); + + return std::make_tuple(use_local_mem, use_subgroup_exchange, + use_subgroup_exchange_relative); +} + +void BlurCppExample::create_image_buffers() +{ + input_image_buf = cl::Image2D( + context, (cl_mem_flags)(CL_MEM_READ_ONLY | CL_MEM_HOST_WRITE_ONLY), + format, (cl::size_type)input_image.width, + (cl::size_type)input_image.height); + + output_image_buf = cl::Image2D( + context, (cl_mem_flags)(CL_MEM_READ_WRITE | CL_MEM_HOST_READ_ONLY), + format, (cl::size_type)input_image.width, + (cl::size_type)input_image.height); + + temp_image_buf = cl::Image2D( + context, (cl_mem_flags)(CL_MEM_READ_WRITE | CL_MEM_HOST_READ_ONLY), + format, (cl::size_type)input_image.width, + (cl::size_type)input_image.height); + + cl::enqueueWriteImage(input_image_buf, CL_NON_BLOCKING, origin, image_size, + 0, 0, input_image.pixels.data()); +} + +void BlurCppExample::build_program(std::string options) +{ + // Open kernel stream if not already openned. + if (!kernel_stream.is_open()) + { + const char* kernel_location = "./blur.cl"; + + kernel_stream = std::ifstream(kernel_location); + + if (!kernel_stream.is_open()) + { + throw std::runtime_error{ + std::string{ "Cannot open kernel source: " } + kernel_location + }; + } + } + + // Scroll to the top + kernel_stream.clear(); + kernel_stream.seekg(0, std::ios::beg); + + // Compile kernel + program = cl::Program( + context, + std::string{ std::istreambuf_iterator{ kernel_stream }, + std::istreambuf_iterator{} }); + + program.build(device, options.c_str()); +} + +void BlurCppExample::create_gaussian_kernel() +{ + // create gaussian convolution kernel + create_gaussian_kernel_(blur_opts.size, &gauss_kernel, &gauss_size); + + gauss_kernel_buf = + cl::Buffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, + sizeof(float) * (2 * gauss_size + 1), gauss_kernel); +} + +// note that the kernel is not normalized and has size of 2*(*size)+1 +// elements +void BlurCppExample::create_gaussian_kernel_(float radius, float** const kernel, + int* const size) +{ + radius = fabsf(radius); + *size = (int)ceilf(3 * radius); + int span = 2 * (*size) + 1; + + *kernel = new float[span]; + + for (int i = 0; i <= *size; ++i) + { + float gx = gaussian((float)i, radius); + (*kernel)[*size + i] = gx; + (*kernel)[*size - i] = gx; + } +} + +float BlurCppExample::gaussian(float x, float radius) +{ + const float pi = 3.141592653589793238462f; + return expf(-x * x / (2 * radius * radius)) / (sqrtf(2 * pi) * radius); +} + +void BlurCppExample::parse_command_line(int argc, char* argv[]) +{ + auto opts = cl::sdk::parse_cli( + argc, argv); + diag_opts = std::get<0>(opts); + dev_opts = std::get<1>(opts); + blur_opts = std::get<2>(opts); + + verbose = diag_opts.verbose; + filename = blur_opts.out; + step = 0; + + if (blur_opts.op.empty()) + std::cout << "No blur option passed: box and gauss will be performed." + << std::endl; +} + +void BlurCppExample::print_timings(std::chrono::duration host_duration, + std::vector& events) +{ + std::chrono::duration device_duration(0); + + for (cl::Event event : events) + { + device_duration += + cl::util::get_duration(event); + } + + std::cout << "Execution time as seen by host: " + << std::chrono::duration_cast( + host_duration) + .count() + << " us, by device: " + << std::chrono::duration_cast( + device_duration) + .count() + << std::endl; +} + +bool BlurCppExample::option_active(const std::string option) +{ + // If no option is selected, all options are assumed to be enabled. + if (blur_opts.op.empty()) return true; + + return (std::any_of(blur_opts.op.begin(), blur_opts.op.end(), + [option](std::string op) { + return op.find(option) != std::string::npos; + })); +} + +void BlurCppExample::show_format(cl::ImageFormat* format) +{ + if (verbose) + { + std::map channel_order = { + { CL_R, "CL_R" }, { CL_RGB, "CL_RBG" }, { CL_RGBA, "CL_RBGA" } + }; + + std::map channel_type = { + { CL_SIGNED_INT8, "CL_SIGNED_INT8" }, + { CL_SIGNED_INT16, "CL_SIGNED_INT16" }, + { CL_SIGNED_INT32, "CL_SIGNED_INT32" }, + { CL_UNSIGNED_INT8, "CL_UNSIGNED_INT8" }, + { CL_UNSIGNED_INT16, "CL_UNSIGNED_INT16" }, + { CL_UNSIGNED_INT32, "CL_UNSIGNED_INT32" }, + }; + + std::cout << "Format: " << channel_order[format->image_channel_order] + << ", " << channel_type[format->image_channel_data_type] + << std::endl + << std::endl; + } +} + +cl::ImageFormat BlurCppExample::set_image_format() +{ + // this format is always supported + cl::ImageFormat res = cl::ImageFormat(CL_RGBA, CL_UNSIGNED_INT8); + + if ((input_image.pixel_size == 1) || (input_image.pixel_size == 3)) + { + cl::vector formats; + + context.getSupportedImageFormats(CL_MEM_READ_ONLY, + CL_MEM_OBJECT_IMAGE2D, &formats); + for (auto& format : formats) + { + if (((input_image.pixel_size == 3) + && (format.image_channel_order == CL_RGB) + && (format.image_channel_data_type == CL_UNSIGNED_INT8)) + || ((input_image.pixel_size == 1) + && (format.image_channel_order == CL_R) + && (format.image_channel_data_type == CL_UNSIGNED_INT8))) + { + show_format(&format); + return format; + } + } + + // if not found, default to 4 channels of uint8_t + if (verbose) + std::cout << "Converting picture into supported format... "; + + const size_t pixels = input_image.width * input_image.height; + const size_t new_size = sizeof(unsigned char) * pixels * 4; + + input_image.pixels.reserve(new_size); + output_image.pixels.reserve(new_size); + + // change picture + const size_t pixel_size = input_image.pixel_size; + auto input_pixels = input_image.pixels.data(); + for (size_t i = pixels - 1; i != 0; --i) + { + memcpy(input_pixels + 4 * i, input_pixels + pixel_size * i, + pixel_size); + memset(input_pixels + 4 * i + pixel_size, 0, 4 - pixel_size); + } + memset(input_pixels + pixel_size, 0, 4 - pixel_size); + input_image.pixel_size = 4; + // store initial pixel_size in output_image.pixel_size + if (verbose) std::cout << "done." << std::endl; + } + else if (input_image.pixel_size != 4) + { + cl::util::detail::errHandler(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR, + nullptr, + "Not possible to write PNG file!"); + } + + show_format(&res); + + return res; +} + +void BlurCppExample::finalize_blur() +{ + // restore image type if needed + if (input_image.pixel_size != output_image.pixel_size) + { + const auto pixels = input_image.width * input_image.height, + pixel_size = output_image.pixel_size; + for (size_t i = 1; i < pixels; ++i) + memcpy(output_image.pixels.data() + pixel_size * i, + output_image.pixels.data() + 4 * i, pixel_size); + } + + std::string name = std::to_string((unsigned int)step) + filename; + + cl::sdk::write_image(name.c_str(), output_image); + + std::cout << "Image " << name << " written." << std::endl << std::endl; +} diff --git a/samples/core/blur/blur.hpp b/samples/core/blur/blur.hpp new file mode 100644 index 00000000..160dbed9 --- /dev/null +++ b/samples/core/blur/blur.hpp @@ -0,0 +1,106 @@ +#pragma once + +#include + +#include +#include + +// STL includes +#include + +class BlurCppExample { +public: + BlurCppExample(int argc, char* argv[]) + : gauss_kernel(nullptr), origin({ 0, 0 }) + { + parse_command_line(argc, argv); + } + + ~BlurCppExample() { delete[] gauss_kernel; } + + void single_pass_box_blur(); + + void dual_pass_box_blur(); + + void dual_pass_local_memory_exchange_box_blur(); + + void dual_pass_subgroup_exchange_box_blur(); + + void dual_pass_kernel_blur(); + + void dual_pass_local_memory_exchange_kernel_blur(); + + void dual_pass_subgroup_exchange_kernel_blur(); + + void load_device(); + + void read_input_image(); + + void prepare_output_image(); + + // Query device and runtime capabilities + std::tuple query_capabilities(); + + void create_image_buffers(); + + void build_program(std::string kernel_op); + + void create_gaussian_kernel(); + + // Returns true if an option is passed with "-b option" on the command line + // or if no option is passed. + bool option_active(std::string option); + + // Sample-specific option + struct BlurOptions + { + std::string in; + std::string out; + float size; + std::vector + op; // This is a vector because MultiArg method is used + }; + +private: + cl::Device device; + cl::Context context; + cl::CommandQueue queue; + std::ifstream kernel_stream; + cl::Program program; + + cl::sdk::Image input_image; + cl::Image2D input_image_buf; + cl::sdk::Image output_image; + cl::Image2D output_image_buf; + cl::Image2D temp_image_buf; + cl::ImageFormat format; + std::array origin; + std::array image_size; + cl::size_type width; + cl::size_type height; + std::string filename; + + bool verbose; + cl_uint step; + + cl::sdk::options::Diagnostic diag_opts; + cl::sdk::options::SingleDevice dev_opts; + BlurOptions blur_opts; + + cl::Buffer gauss_kernel_buf; + float* gauss_kernel; + int gauss_size; + + void parse_command_line(int argc, char* argv[]); + void show_format(cl::ImageFormat* format); + cl::ImageFormat set_image_format(); + void finalize_blur(); + + // note that the kernel is not normalized and has size of 2*(*size)+1 + // elements + static void create_gaussian_kernel_(float radius, float** const kernel, + int* const size); + static float gaussian(float x, float radius); + static void print_timings(std::chrono::duration host_duration, + std::vector& events); +}; diff --git a/samples/core/blur/main.cpp b/samples/core/blur/main.cpp new file mode 100644 index 00000000..1face850 --- /dev/null +++ b/samples/core/blur/main.cpp @@ -0,0 +1,166 @@ +/* + * Copyright (c) 2023 The Khronos Group Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "blur.hpp" + +// STL includes +#include +#include + +int main(int argc, char* argv[]) +{ + try + { + // Parse command line arguments and store the parameters in blur class. + // You can pass '-b box' or '-b gauss' to select conversion type. + // You can pass both options with "-b box -b gauss" or don't pass + // anything. If you don't pass a parameter both conversions will be + // performed. + BlurCppExample blur(argc, argv); + + // Create a context and load the device used for blur operations. + blur.load_device(); + + // Read input image. If not specified on the command line, the default + // is generated. If this function fails, ensure that the default file or + // the one specified on the command line is available. The default image + // is placed in a default_image.h. + blur.read_input_image(); + + // Prepare output image. It will have the same dimensions as the input + // image. + blur.prepare_output_image(); + + // Query device and runtime capabilities + bool use_local_mem, use_subgroup_exchange, + use_subgroup_exchange_relative; + std::tie(use_local_mem, use_subgroup_exchange, + use_subgroup_exchange_relative) = blur.query_capabilities(); + + // Create image buffers used for operation. In this example input, + // output and temporary image buffers are used. Temporary buffer is used + // when 2 blur operations in the row are performed. Result of the first + // operation is stored in temporary buffer and temporary buffer is used + // as input for 2nd operation. + blur.create_image_buffers(); + + // Create kernel and build program for selected device and blur.cl file + // without any options. If this function fails, ensure that the blur.cl + // file is available in place of execution. + blur.build_program(""); + + // The box blur operation will be performed if you pass "-b box" or + // don't select any option. + if (blur.option_active("box")) + { + // Basic blur operation using a kernel functor. Using kernel + // functors is more convenient than creating a kernel class object + // and setting the arguments one by one. + blur.single_pass_box_blur(); + + // Dual pass demonstrates the use of the same kernel functor twice. + // The result of the first operation is stored in a temporary buffer + // and used for the second operation. + blur.dual_pass_box_blur(); + + // Comparison with other examples shows the classic approach to + // working with local memory. Kernels for blur operations are + // created separately, and their parameters are set with setArg + // functions, similar to the C library API. + if (use_local_mem) blur.dual_pass_local_memory_exchange_box_blur(); + + // This example uses a program built with parameters. In the blur.cl + // file you can find 'USE_SUBGROUP_EXCHANGE_RELATIVE' C-like + // definition switch for blur_box_horizontal_subgroup_exchange + // function. In this case, 2 blur kernel functors are used. + if (use_subgroup_exchange_relative) + { + std::cout << "Dual-pass subgroup relative exchange blur" + << std::endl; + + blur.build_program("-D USE_SUBGROUP_EXCHANGE_RELATIVE "); + blur.dual_pass_subgroup_exchange_box_blur(); + } + + // Same as the previous one, but with a different build switch. See + // the blur.cl file for more info about the switch. + if (use_subgroup_exchange) + { + std::cout << "Dual-pass subgroup exchange blur" << std::endl; + + blur.build_program("-D USE_SUBGROUP_EXCHANGE "); + blur.dual_pass_subgroup_exchange_box_blur(); + } + } // Box blur + + // Build default program with no kernel arguments. + blur.build_program(""); + + // The gauss blur operation is performed when the "-b gauss" option or + // no option is passed. The following examples use a manually created + // gaussian kernel passed as an argument to functions from blur.cl + if (blur.option_active("gauss")) + { + std::cout << "Dual-pass Gaussian blur" << std::endl; + // Create a gaussian kernel to be used for the next blurs. + blur.create_gaussian_kernel(); + + // Basic blur operation using kernel functor and gaussian kernel. + blur.dual_pass_kernel_blur(); + + // Local memory exchange Gaussian blur with kernel functors. Note + // that the variable type cl::Local is used for local memory + // arguments in kernel functor calls. + if (use_local_mem) + { + std::cout << "Dual-pass local memory exchange Gaussian blur" + << std::endl; + blur.dual_pass_local_memory_exchange_kernel_blur(); + } + + // Similar to dual_pass_subgroup_exchange_box_blur but with a gauss + // kernel. + if (use_subgroup_exchange_relative) + { + std::cout + << "Dual-pass subgroup relative exchange Gaussian blur" + << std::endl; + + blur.build_program("-D USE_SUBGROUP_EXCHANGE_RELATIVE "); + blur.dual_pass_subgroup_exchange_kernel_blur(); + } + + // Same as the previous one, but with a different build switch. See + // the blur.cl file for more info about the switch. + if (use_subgroup_exchange) + { + std::cout << "Dual-pass subgroup exchange Gaussian blur" + << std::endl; + + blur.build_program("-D USE_SUBGROUP_EXCHANGE "); + blur.dual_pass_subgroup_exchange_kernel_blur(); + } + } // Gaussian blur + } catch (cl::Error& e) + { + std::cerr << "OpenCL runtime error: " << e.what() << std::endl; + std::exit(e.err()); + } catch (std::exception& e) + { + std::cerr << "Error: " << e.what() << std::endl; + std::exit(EXIT_FAILURE); + } + return 0; +}