From d8a73fdfc2fee2022ba96c7d2408765db3dcb9a4 Mon Sep 17 00:00:00 2001 From: David Jurado Date: Sun, 29 Mar 2015 11:32:53 +0200 Subject: [PATCH 01/39] * Change the OpenCL version verification system - Removed internal funciton: bool check_version(int major, int minor) const. - Created function: uint_ version_number() const version_number function computes and caches the OpenCL device version for faster runtime checks. * Aditional runtime version checks. Static linking checks are not enough: You can compile a funcition using a OpenCL 2.0 enabled system, but if the final device is only 1.2, only 1.2 functions must be invoked. --- include/boost/compute/device.hpp | 57 +++++++++++++++----------------- 1 file changed, 27 insertions(+), 30 deletions(-) diff --git a/include/boost/compute/device.hpp b/include/boost/compute/device.hpp index ab4cbacc0..a8b5f72b5 100644 --- a/include/boost/compute/device.hpp +++ b/include/boost/compute/device.hpp @@ -52,14 +52,14 @@ class device /// Creates a null device object. device() - : m_id(0) + : m_id(0), m_version(0) { } /// Creates a new device object for \p id. If \p retain is \c true, /// the reference count for the device will be incremented. explicit device(cl_device_id id, bool retain = true) - : m_id(id) + : m_id(id), m_version(0) { #ifdef CL_VERSION_1_2 if(m_id && retain && is_subdevice()){ @@ -72,7 +72,7 @@ class device /// Creates a new device object as a copy of \p other. device(const device &other) - : m_id(other.m_id) + : m_id(other.m_id), m_version(other.m_version) { #ifdef CL_VERSION_1_2 if(m_id && is_subdevice()){ @@ -118,7 +118,7 @@ class device if(m_id && is_subdevice()){ clReleaseDevice(m_id); } - #endif + #endif // CL_VERSION_1_2 m_id = other.m_id; other.m_id = 0; @@ -136,7 +136,7 @@ class device clReleaseDevice(m_id) ); } - #endif + #endif // CL_VERSION_1_2 } /// Returns the ID of the device. @@ -188,6 +188,21 @@ class device return get_info(CL_DEVICE_VERSION); } + /// Returns the device version number. (eg. 1.1 is 101, 1.2 is 102, 2.0 is 200) + uint_ version_number() const + { + if (m_version == 0) { + std::stringstream ss(version()); + ushort_ major, minor; + ss.ignore(7); // 'OpenCL ' + ss >> major; + ss.ignore(1); // '.' + ss >> minor; + m_version = major * 100 + minor; // cache + } + return m_version; + } + /// Returns the driver version string. std::string driver_version() const { @@ -281,17 +296,11 @@ class device bool is_subdevice() const { #if defined(CL_VERSION_1_2) - try { + if (version_number() >= 102) return get_info(CL_DEVICE_PARENT_DEVICE) != 0; - } - catch(opencl_error&){ - // the get_info() call above will throw if the device's opencl version - // is less than 1.2 (in which case it can't be a sub-device). + else + #endif // CL_VERSION_1_2 return false; - } - #else - return false; - #endif } /// Returns information about the device. @@ -327,6 +336,9 @@ class device std::vector partition(const cl_device_partition_property *properties) const { + if (version_number() < 102) + return std::vector(); + // get sub-device count uint_ count = 0; int_ ret = clCreateSubDevices(m_id, properties, 0, 0, &count); @@ -405,24 +417,9 @@ class device return m_id != other.m_id; } - /// \internal_ - bool check_version(int major, int minor) const - { - std::stringstream stream; - stream << version(); - - int actual_major, actual_minor; - stream.ignore(7); // 'OpenCL ' - stream >> actual_major; - stream.ignore(1); // '.' - stream >> actual_minor; - - return actual_major > major || - (actual_major == major && actual_minor >= minor); - } - private: cl_device_id m_id; + mutable uint_ m_version; // Cached ICD OpenCL version number }; /// \internal_ From 3df09fe990c18c85cd419cd0ab59aee9c0ae5dac Mon Sep 17 00:00:00 2001 From: David Jurado Date: Sun, 29 Mar 2015 11:46:21 +0200 Subject: [PATCH 02/39] * Additional runtime version checks. Link time checks are not enough: You can compile a function using a OpenCL 2.0 enabled system, but if the final device is only 1.2, only 1.2 functions must be invoked. * Added enqueue_map_image functions. * enqueue_unmap_buffer uses memory_object instead of buffer objects, so it can be used with images. * Added static assets: BOOST_STATIC_ASSERT * Deprecated functions (eg. clEnqueueTask) must be compiled for backward compatibility. * Removed check_device_version --- include/boost/compute/command_queue.hpp | 146 ++++++++++++++++++------ 1 file changed, 109 insertions(+), 37 deletions(-) diff --git a/include/boost/compute/command_queue.hpp b/include/boost/compute/command_queue.hpp index 433156425..d1388a340 100644 --- a/include/boost/compute/command_queue.hpp +++ b/include/boost/compute/command_queue.hpp @@ -118,24 +118,29 @@ class command_queue cl_int error = 0; #ifdef CL_VERSION_2_0 - std::vector queue_properties; - if(properties){ - queue_properties.push_back(CL_QUEUE_PROPERTIES); - queue_properties.push_back(cl_queue_properties(properties)); - queue_properties.push_back(cl_queue_properties(0)); - } + if (device.version_number() >= 200) + { + std::vector queue_properties; + if(properties){ + queue_properties.push_back(CL_QUEUE_PROPERTIES); + queue_properties.push_back(cl_queue_properties(properties)); + queue_properties.push_back(cl_queue_properties(0)); + } - const cl_queue_properties *queue_properties_ptr = - queue_properties.empty() ? 0 : &queue_properties[0]; + const cl_queue_properties *queue_properties_ptr = + queue_properties.empty() ? 0 : &queue_properties[0]; - m_queue = clCreateCommandQueueWithProperties( - context, device.id(), queue_properties_ptr, &error - ); - #else - m_queue = clCreateCommandQueue( - context, device.id(), properties, &error - ); + m_queue = clCreateCommandQueueWithProperties( + context, device.id(), queue_properties_ptr, &error + ); + } + else #endif + { + m_queue = clCreateCommandQueue( + context, device.id(), properties, &error + ); + } if(!m_queue){ BOOST_THROW_EXCEPTION(opencl_error(error)); @@ -605,20 +610,20 @@ class command_queue /// Enqueues a command to map \p buffer into the host address space. /// /// \see_opencl_ref{clEnqueueMapBuffer} - void* enqueue_map_buffer(const buffer &buffer, + void* enqueue_map_buffer(const buffer &buffer_, cl_map_flags flags, size_t offset, size_t size, const wait_list &events = wait_list()) { BOOST_ASSERT(m_queue != 0); - BOOST_ASSERT(offset + size <= buffer.size()); - BOOST_ASSERT(buffer.get_context() == this->get_context()); + BOOST_ASSERT(offset + size <= buffer_.size()); + BOOST_ASSERT(buffer_.get_context() == this->get_context()); cl_int ret = 0; void *pointer = clEnqueueMapBuffer( m_queue, - buffer.get(), + buffer_.get(), CL_TRUE, flags, offset, @@ -636,16 +641,78 @@ class command_queue return pointer; } + /// Enqueues a command to map \p image into the host address space. + /// + /// \see_opencl_ref{clEnqueueMapImage} + void* enqueue_map_image(const image_object &image, + cl_map_flags flags, + const size_t *origin, + const size_t *region, + size_t *row_pitch, + size_t *slice_pitch = NULL, + const wait_list &events = wait_list()) + { + BOOST_ASSERT(m_queue != 0); + BOOST_ASSERT(image.get_context() == this->get_context()); + + cl_int ret = 0; + void *pointer = clEnqueueMapImage( + m_queue, + image.get(), + CL_TRUE, + flags, + origin, + region, + row_pitch, + slice_pitch, + events.size(), + events.get_event_ptr(), + 0, + &ret + ); + + if(ret != CL_SUCCESS){ + BOOST_THROW_EXCEPTION(opencl_error(ret)); + } + + return pointer; + } + + + /// \overload + template + void* enqueue_map_image(const image_object& image, + cl_map_flags flags, + const extents origin, + const extents region, + size_t *row_pitch, + size_t *slice_pitch = NULL, + const wait_list &events = wait_list()) + { + BOOST_STATIC_ASSERT(N <= 3); + BOOST_ASSERT(image.get_context() == this->get_context()); + + size_t origin3[3] = { 0, 0, 0 }; + size_t region3[3] = { 1, 1, 1 }; + + std::copy(origin.data(), origin.data() + N, origin3); + std::copy(region.data(), region.data() + N, region3); + + return enqueue_map_image( + image, flags ,origin3, region3, row_pitch, slice_pitch, events + ); + } + /// Enqueues a command to unmap \p buffer from the host memory space. /// /// \see_opencl_ref{clEnqueueUnmapMemObject} - event enqueue_unmap_buffer(const buffer &buffer, + event enqueue_unmap_buffer(const memory_object &mem_object, void *mapped_ptr, const wait_list &events = wait_list()) { - BOOST_ASSERT(buffer.get_context() == this->get_context()); + BOOST_ASSERT(mem_object.get_context() == this->get_context()); - return enqueue_unmap_mem_object(buffer.get(), mapped_ptr, events); + return enqueue_unmap_mem_object(mem_object.get(), mapped_ptr, events); } /// Enqueues a command to unmap \p mem from the host memory space. @@ -721,6 +788,7 @@ class command_queue size_t slice_pitch = 0, const wait_list &events = wait_list()) { + BOOST_STATIC_ASSERT(N <= 3); BOOST_ASSERT(image.get_context() == this->get_context()); size_t origin3[3] = { 0, 0, 0 }; @@ -780,6 +848,7 @@ class command_queue const size_t input_slice_pitch = 0, const wait_list &events = wait_list()) { + BOOST_STATIC_ASSERT(N <= 3); BOOST_ASSERT(image.get_context() == this->get_context()); size_t origin3[3] = { 0, 0, 0 }; @@ -835,6 +904,7 @@ class command_queue const extents region, const wait_list &events = wait_list()) { + BOOST_STATIC_ASSERT(N <= 3); BOOST_ASSERT(src_image.get_context() == this->get_context()); BOOST_ASSERT(dst_image.get_context() == this->get_context()); BOOST_ASSERT_MSG(src_image.format() == dst_image.format(), @@ -932,6 +1002,7 @@ class command_queue const wait_list &events = wait_list()) { BOOST_ASSERT(m_queue != 0); + BOOST_ASSERT(image.get_context() == this->get_context()); event event_; @@ -961,6 +1032,7 @@ class command_queue const extents region, const wait_list &events = wait_list()) { + BOOST_STATIC_ASSERT(N <= 3); BOOST_ASSERT(image.get_context() == this->get_context()); size_t origin3[3] = { 0, 0, 0 }; @@ -1085,20 +1157,26 @@ class command_queue BOOST_ASSERT(kernel.get_context() == this->get_context()); event event_; + cl_int ret; // clEnqueueTask() was deprecated in OpenCL 2.0. In that case we // just forward to the equivalent clEnqueueNDRangeKernel() call. #ifdef CL_VERSION_2_0 - size_t one = 1; - cl_int ret = clEnqueueNDRangeKernel( - m_queue, kernel, 1, 0, &one, &one, - events.size(), events.get_event_ptr(), &event_.get() - ); - #else - cl_int ret = clEnqueueTask( - m_queue, kernel, events.size(), events.get_event_ptr(), &event_.get() - ); + if (this->get_context().get_device().version_number() >= 200) + { + size_t one = 1; + ret = clEnqueueNDRangeKernel( + m_queue, kernel, 1, 0, &one, &one, + events.size(), events.get_event_ptr(), &event_.get() + ); + } + else #endif + { + ret = clEnqueueTask( + m_queue, kernel, events.size(), events.get_event_ptr(), &event_.get() + ); + } if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); @@ -1432,12 +1510,6 @@ class command_queue return m_queue; } - /// \internal_ - bool check_device_version(int major, int minor) const - { - return get_device().check_version(major, minor); - } - private: cl_command_queue m_queue; }; From 384adc2963e72d3320ae54c0a23bc102189a75c5 Mon Sep 17 00:00:00 2001 From: David Jurado Date: Sun, 29 Mar 2015 11:47:24 +0200 Subject: [PATCH 03/39] * static cast to avoid a compiler warning. --- include/boost/compute/utility/wait_list.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/boost/compute/utility/wait_list.hpp b/include/boost/compute/utility/wait_list.hpp index 60ebb2b0d..ca45773d2 100644 --- a/include/boost/compute/utility/wait_list.hpp +++ b/include/boost/compute/utility/wait_list.hpp @@ -97,7 +97,7 @@ class wait_list /// Returns the number of events in the wait-list. uint_ size() const { - return m_events.size(); + return static_cast(m_events.size()); } /// Removes all of the events from the wait-list. From 6bb769647a376282b617bfa4e22bf3e8ef215ec7 Mon Sep 17 00:00:00 2001 From: David Jurado Date: Sun, 29 Mar 2015 11:48:25 +0200 Subject: [PATCH 04/39] * Removed redundant const. --- include/boost/compute/exception/context_error.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/boost/compute/exception/context_error.hpp b/include/boost/compute/exception/context_error.hpp index 2d4580b1a..56abee769 100644 --- a/include/boost/compute/exception/context_error.hpp +++ b/include/boost/compute/exception/context_error.hpp @@ -70,7 +70,7 @@ class context_error : public std::exception } /// Returns the size of the private info memory block. - const size_t get_private_info_size() const throw() + size_t get_private_info_size() const throw() { return m_private_info_size; } From a815f277cd040b7cf94f1bcffb56726b1986f76a Mon Sep 17 00:00:00 2001 From: David Jurado Date: Sun, 29 Mar 2015 11:50:59 +0200 Subject: [PATCH 05/39] * Additional runtime version checks. Link time checks are not enough: You can compile a function using a OpenCL 2.0 enabled system, but if the final device is only 1.2, only 1.2 functions must be invoked. * Deprecated functions (eg. clCreateImage2D) must be maintained for backward compatibility. --- include/boost/compute/image/image2d.hpp | 134 +++++++++++++----------- 1 file changed, 72 insertions(+), 62 deletions(-) diff --git a/include/boost/compute/image/image2d.hpp b/include/boost/compute/image/image2d.hpp index cd903fe91..ce23b8948 100644 --- a/include/boost/compute/image/image2d.hpp +++ b/include/boost/compute/image/image2d.hpp @@ -59,38 +59,43 @@ class image2d : public image_object cl_int error = 0; #ifdef CL_VERSION_1_2 - cl_image_desc desc; - desc.image_type = CL_MEM_OBJECT_IMAGE2D; - desc.image_width = image_width; - desc.image_height = image_height; - desc.image_depth = 1; - desc.image_array_size = 0; - desc.image_row_pitch = image_row_pitch; - desc.image_slice_pitch = 0; - desc.num_mip_levels = 0; - desc.num_samples = 0; - #ifdef CL_VERSION_2_0 - desc.mem_object = 0; - #else - desc.buffer = 0; - #endif - - m_mem = clCreateImage(context, - flags, - format.get_format_ptr(), - &desc, - host_ptr, - &error); - #else - m_mem = clCreateImage2D(context, - flags, - format.get_format_ptr(), - image_width, - image_height, - image_row_pitch, - host_ptr, - &error); + if (context.get_device().version_number() >= 102) + { + cl_image_desc desc; + desc.image_type = CL_MEM_OBJECT_IMAGE2D; + desc.image_width = image_width; + desc.image_height = image_height; + desc.image_depth = 1; + desc.image_array_size = 0; + desc.image_row_pitch = image_row_pitch; + desc.image_slice_pitch = 0; + desc.num_mip_levels = 0; + desc.num_samples = 0; + #ifdef CL_VERSION_2_0 + desc.mem_object = 0; + #else + desc.buffer = 0; + #endif + + m_mem = clCreateImage(context, + flags, + format.get_format_ptr(), + &desc, + host_ptr, + &error); + } + else #endif + { + m_mem = clCreateImage2D(context, + flags, + format.get_format_ptr(), + image_width, + image_height, + image_row_pitch, + host_ptr, + &error); + } if(!m_mem){ BOOST_THROW_EXCEPTION(opencl_error(error)); @@ -109,38 +114,43 @@ class image2d : public image_object cl_int error = 0; #ifdef CL_VERSION_1_2 - cl_image_desc desc; - desc.image_type = CL_MEM_OBJECT_IMAGE2D; - desc.image_width = image_width; - desc.image_height = image_height; - desc.image_depth = 1; - desc.image_array_size = 0; - desc.image_row_pitch = image_row_pitch; - desc.image_slice_pitch = 0; - desc.num_mip_levels = 0; - desc.num_samples = 0; - #ifdef CL_VERSION_2_0 - desc.mem_object = 0; - #else - desc.buffer = 0; - #endif - - m_mem = clCreateImage(context, - flags, - format.get_format_ptr(), - &desc, - host_ptr, - &error); - #else - m_mem = clCreateImage2D(context, - flags, - format.get_format_ptr(), - image_width, - image_height, - image_row_pitch, - host_ptr, - &error); + if (context.get_device().version_number() >= 102) + { + cl_image_desc desc; + desc.image_type = CL_MEM_OBJECT_IMAGE2D; + desc.image_width = image_width; + desc.image_height = image_height; + desc.image_depth = 1; + desc.image_array_size = 0; + desc.image_row_pitch = image_row_pitch; + desc.image_slice_pitch = 0; + desc.num_mip_levels = 0; + desc.num_samples = 0; + #ifdef CL_VERSION_2_0 + desc.mem_object = 0; + #else + desc.buffer = 0; + #endif + + m_mem = clCreateImage(context, + flags, + format.get_format_ptr(), + &desc, + host_ptr, + &error); + } + else #endif + { + m_mem = clCreateImage2D(context, + flags, + format.get_format_ptr(), + image_width, + image_height, + image_row_pitch, + host_ptr, + &error); + } if(!m_mem){ BOOST_THROW_EXCEPTION(opencl_error(error)); From d46588bdb6778e7786e5032be415a61916c6b075 Mon Sep 17 00:00:00 2001 From: David Jurado Date: Sun, 29 Mar 2015 12:24:22 +0200 Subject: [PATCH 06/39] * Added the version_number() function. --- include/boost/compute/context.hpp | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/include/boost/compute/context.hpp b/include/boost/compute/context.hpp index 7f8602aec..f3cbadffe 100644 --- a/include/boost/compute/context.hpp +++ b/include/boost/compute/context.hpp @@ -50,7 +50,7 @@ class context public: /// Create a null context object. context() - : m_context(0) + : m_context(0), m_version(0) { } @@ -64,6 +64,7 @@ class context cl_device_id device_id = device.id(); + m_version = 0; cl_int error = 0; m_context = clCreateContext(properties, 1, &device_id, 0, 0, &error); @@ -82,6 +83,7 @@ class context cl_int error = 0; + m_version = 0; m_context = clCreateContext( properties, static_cast(devices.size()), @@ -99,7 +101,7 @@ class context /// Creates a new context object for \p context. If \p retain is /// \c true, the reference count for \p context will be incremented. explicit context(cl_context context, bool retain = true) - : m_context(context) + : m_context(context), m_version(0) { if(m_context && retain){ clRetainContext(m_context); @@ -123,6 +125,7 @@ class context clReleaseContext(m_context); } + m_version = other.m_version; m_context = other.m_context; if(m_context){ @@ -136,9 +139,10 @@ class context #ifndef BOOST_COMPUTE_NO_RVALUE_REFERENCES /// Move-constructs a new context object from \p other. context(context&& other) BOOST_NOEXCEPT - : m_context(other.m_context) + : m_context(other.m_context), m_version(other.m_version) { other.m_context = 0; + other.m_version = 0; } /// Move-assigns the context from \p other to \c *this. @@ -148,7 +152,9 @@ class context clReleaseContext(m_context); } + m_version = other.m_version; m_context = other.m_context; + other.m_version = 0; other.m_context = 0; return *this; @@ -236,8 +242,17 @@ class context return m_context; } + /// Returns the device version number. (eg. 1.1 is 101, 1.2 is 102, 2.0 is 200) + uint_ version_number() const + { + if (m_version == 0) + m_version = get_device().version_number(); // The version of the first device + return m_version; + } + private: cl_context m_context; + mutable uint_ m_version; // Cached ICD OpenCL version number }; /// \internal_ define get_info() specializations for context From ed7f213f69a802e8e15bf1ac3228ff792bcc6ea0 Mon Sep 17 00:00:00 2001 From: David Jurado Date: Sun, 29 Mar 2015 12:25:07 +0200 Subject: [PATCH 07/39] * Use the context version_number() function. --- include/boost/compute/image/image2d.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/boost/compute/image/image2d.hpp b/include/boost/compute/image/image2d.hpp index ce23b8948..14b53a69b 100644 --- a/include/boost/compute/image/image2d.hpp +++ b/include/boost/compute/image/image2d.hpp @@ -59,7 +59,7 @@ class image2d : public image_object cl_int error = 0; #ifdef CL_VERSION_1_2 - if (context.get_device().version_number() >= 102) + if (context.version_number() >= 102) { cl_image_desc desc; desc.image_type = CL_MEM_OBJECT_IMAGE2D; @@ -114,7 +114,7 @@ class image2d : public image_object cl_int error = 0; #ifdef CL_VERSION_1_2 - if (context.get_device().version_number() >= 102) + if (context.version_number() >= 102) { cl_image_desc desc; desc.image_type = CL_MEM_OBJECT_IMAGE2D; From 9078790bcbaf4d8537db0af5061d44c8723b874e Mon Sep 17 00:00:00 2001 From: David Jurado Date: Sun, 29 Mar 2015 12:37:24 +0200 Subject: [PATCH 08/39] * Use the context version_number() function. --- include/boost/compute/image/image1d.hpp | 55 +++++++------- include/boost/compute/image/image3d.hpp | 98 +++++++++++++------------ 2 files changed, 81 insertions(+), 72 deletions(-) diff --git a/include/boost/compute/image/image1d.hpp b/include/boost/compute/image/image1d.hpp index 59a5f2557..773d9c26e 100644 --- a/include/boost/compute/image/image1d.hpp +++ b/include/boost/compute/image/image1d.hpp @@ -51,35 +51,38 @@ class image1d : public image_object void *host_ptr = 0) { #ifdef CL_VERSION_1_2 - cl_image_desc desc; - desc.image_type = CL_MEM_OBJECT_IMAGE1D; - desc.image_width = image_width; - desc.image_height = 1; - desc.image_depth = 1; - desc.image_array_size = 0; - desc.image_row_pitch = 0; - desc.image_slice_pitch = 0; - desc.num_mip_levels = 0; - desc.num_samples = 0; - #ifdef CL_VERSION_2_0 - desc.mem_object = 0; - #else - desc.buffer = 0; - #endif - - cl_int error = 0; - - m_mem = clCreateImage( - context, flags, format.get_format_ptr(), &desc, host_ptr, &error - ); - - if(!m_mem){ - BOOST_THROW_EXCEPTION(opencl_error(error)); + if (context.version_number() >= 102) + { + cl_image_desc desc; + desc.image_type = CL_MEM_OBJECT_IMAGE1D; + desc.image_width = image_width; + desc.image_height = 1; + desc.image_depth = 1; + desc.image_array_size = 0; + desc.image_row_pitch = 0; + desc.image_slice_pitch = 0; + desc.num_mip_levels = 0; + desc.num_samples = 0; + #ifdef CL_VERSION_2_0 + desc.mem_object = 0; + #else + desc.buffer = 0; + #endif + + cl_int error = 0; + + m_mem = clCreateImage( + context, flags, format.get_format_ptr(), &desc, host_ptr, &error + ); + + if(!m_mem){ + BOOST_THROW_EXCEPTION(opencl_error(error)); + } } - #else + else + #endif // image1d objects are only supported in OpenCL 1.2 and later BOOST_THROW_EXCEPTION(opencl_error(CL_IMAGE_FORMAT_NOT_SUPPORTED)); - #endif } /// Creates a new image1d as a copy of \p other. diff --git a/include/boost/compute/image/image3d.hpp b/include/boost/compute/image/image3d.hpp index ee3768ac6..f77211b70 100644 --- a/include/boost/compute/image/image3d.hpp +++ b/include/boost/compute/image/image3d.hpp @@ -52,29 +52,33 @@ class image3d : public image_object cl_int error = 0; #ifdef CL_VERSION_1_2 - cl_image_desc desc; - desc.image_type = CL_MEM_OBJECT_IMAGE3D; - desc.image_width = image_width; - desc.image_height = image_height; - desc.image_depth = image_depth; - desc.image_array_size = 0; - desc.image_row_pitch = image_row_pitch; - desc.image_slice_pitch = image_slice_pitch; - desc.num_mip_levels = 0; - desc.num_samples = 0; - #ifdef CL_VERSION_2_0 - desc.mem_object = 0; - #else - desc.buffer = 0; + if (context.version_number() >= 102) + { + cl_image_desc desc; + desc.image_type = CL_MEM_OBJECT_IMAGE3D; + desc.image_width = image_width; + desc.image_height = image_height; + desc.image_depth = image_depth; + desc.image_array_size = 0; + desc.image_row_pitch = image_row_pitch; + desc.image_slice_pitch = image_slice_pitch; + desc.num_mip_levels = 0; + desc.num_samples = 0; + #ifdef CL_VERSION_2_0 + desc.mem_object = 0; + #else + desc.buffer = 0; + #endif + + m_mem = clCreateImage(context, + flags, + format.get_format_ptr(), + &desc, + host_ptr, + &error); + } + else #endif - - m_mem = clCreateImage(context, - flags, - format.get_format_ptr(), - &desc, - host_ptr, - &error); - #else m_mem = clCreateImage3D(context, flags, format.get_format_ptr(), @@ -85,7 +89,6 @@ class image3d : public image_object image_slice_pitch, host_ptr, &error); - #endif if(!m_mem){ BOOST_THROW_EXCEPTION(opencl_error(error)); @@ -106,29 +109,33 @@ class image3d : public image_object cl_int error = 0; #ifdef CL_VERSION_1_2 - cl_image_desc desc; - desc.image_type = CL_MEM_OBJECT_IMAGE3D; - desc.image_width = image_width; - desc.image_height = image_height; - desc.image_depth = image_depth; - desc.image_array_size = 0; - desc.image_row_pitch = image_row_pitch; - desc.image_slice_pitch = image_slice_pitch; - desc.num_mip_levels = 0; - desc.num_samples = 0; - #ifdef CL_VERSION_2_0 - desc.mem_object = 0; - #else - desc.buffer = 0; + if (context.version_number() >= 102) + { + cl_image_desc desc; + desc.image_type = CL_MEM_OBJECT_IMAGE3D; + desc.image_width = image_width; + desc.image_height = image_height; + desc.image_depth = image_depth; + desc.image_array_size = 0; + desc.image_row_pitch = image_row_pitch; + desc.image_slice_pitch = image_slice_pitch; + desc.num_mip_levels = 0; + desc.num_samples = 0; + #ifdef CL_VERSION_2_0 + desc.mem_object = 0; + #else + desc.buffer = 0; + #endif + + m_mem = clCreateImage(context, + flags, + format.get_format_ptr(), + &desc, + host_ptr, + &error); + } + else #endif - - m_mem = clCreateImage(context, - flags, - format.get_format_ptr(), - &desc, - host_ptr, - &error); - #else m_mem = clCreateImage3D(context, flags, format.get_format_ptr(), @@ -139,7 +146,6 @@ class image3d : public image_object image_slice_pitch, host_ptr, &error); - #endif if(!m_mem){ BOOST_THROW_EXCEPTION(opencl_error(error)); From 4eb3a3b33ccca08bb7d2b3a58affcb1759a079a6 Mon Sep 17 00:00:00 2001 From: David Jurado Date: Mon, 30 Mar 2015 01:17:31 +0200 Subject: [PATCH 09/39] * Changed the function name version_number() to get_version() to match other get_*() functions. * Added the get_version() member function to command_queue and to context for fast runtime checks. * Runtime checks. * More synchronizable functions in command_queue (enqueue_read_buffer_rect, enqueue_write_buffer_rect, enqueue_map_buffer, enqueue_map_image, enqueue_svm_map,...) * Fixed a bad use of `cl_bool blocking_read = CL_TRUE` when trying to use the returned event (enqueue_read_image, enqueue_write_image,...) --- include/boost/compute/command_queue.hpp | 490 ++++++++++++------------ include/boost/compute/context.hpp | 4 +- include/boost/compute/device.hpp | 11 +- include/boost/compute/image/image1d.hpp | 2 +- include/boost/compute/image/image2d.hpp | 4 +- include/boost/compute/image/image3d.hpp | 4 +- 6 files changed, 250 insertions(+), 265 deletions(-) diff --git a/include/boost/compute/command_queue.hpp b/include/boost/compute/command_queue.hpp index d1388a340..58b0f48f9 100644 --- a/include/boost/compute/command_queue.hpp +++ b/include/boost/compute/command_queue.hpp @@ -93,12 +93,12 @@ class command_queue /// Creates a null command queue. command_queue() - : m_queue(0) + : m_queue(0), m_version(0) { } explicit command_queue(cl_command_queue queue, bool retain = true) - : m_queue(queue) + : m_queue(queue), m_version(0) { if(m_queue && retain){ clRetainCommandQueue(m_queue); @@ -116,9 +116,10 @@ class command_queue BOOST_ASSERT(device.id() != 0); cl_int error = 0; + m_version = device.get_version(); #ifdef CL_VERSION_2_0 - if (device.version_number() >= 200) + if (get_version() >= 200) { std::vector queue_properties; if(properties){ @@ -149,7 +150,7 @@ class command_queue /// Creates a new command queue object as a copy of \p other. command_queue(const command_queue &other) - : m_queue(other.m_queue) + : m_queue(other.m_queue), m_version(other.m_version) { if(m_queue){ clRetainCommandQueue(m_queue); @@ -165,6 +166,7 @@ class command_queue } m_queue = other.m_queue; + m_version = other.m_version; if(m_queue){ clRetainCommandQueue(m_queue); @@ -177,9 +179,10 @@ class command_queue #ifndef BOOST_COMPUTE_NO_RVALUE_REFERENCES /// Move-constructs a new command queue object from \p other. command_queue(command_queue&& other) BOOST_NOEXCEPT - : m_queue(other.m_queue) + : m_queue(other.m_queue), m_version(other.m_version) { other.m_queue = 0; + other.m_version = 0; } /// Move-assigns the command queue from \p other to \c *this. @@ -190,6 +193,7 @@ class command_queue } m_queue = other.m_queue; + m_version = other.m_version; other.m_queue = 0; return *this; @@ -226,6 +230,14 @@ class command_queue return context(get_info(CL_QUEUE_CONTEXT)); } + /// Returns the numeric version: major * 100 + minor. + uint_ get_version() const + { + if (m_version == 0) + m_version = get_device().get_version(); // The version of the first device + return m_version; + } + /// Returns information about the command queue. /// /// \see_opencl_ref{clGetCommandQueueInfo} @@ -255,7 +267,8 @@ class command_queue size_t offset, size_t size, void *host_ptr, - const wait_list &events = wait_list()) + const wait_list &events = wait_list(), + cl_event * clevent = NULL) { BOOST_ASSERT(m_queue != 0); BOOST_ASSERT(size <= buffer.size()); @@ -265,13 +278,13 @@ class command_queue cl_int ret = clEnqueueReadBuffer( m_queue, buffer.get(), - CL_TRUE, + clevent ? CL_FALSE : CL_TRUE, offset, size, host_ptr, events.size(), events.get_event_ptr(), - 0 + clevent ); if(ret != CL_SUCCESS){ @@ -291,28 +304,14 @@ class command_queue void *host_ptr, const wait_list &events = wait_list()) { - BOOST_ASSERT(m_queue != 0); - BOOST_ASSERT(size <= buffer.size()); - BOOST_ASSERT(buffer.get_context() == this->get_context()); - BOOST_ASSERT(host_ptr != 0); - event event_; - cl_int ret = clEnqueueReadBuffer( - m_queue, - buffer.get(), - CL_FALSE, - offset, - size, - host_ptr, - events.size(), - events.get_event_ptr(), - &event_.get() - ); - - if(ret != CL_SUCCESS){ - BOOST_THROW_EXCEPTION(opencl_error(ret)); - } + enqueue_read_buffer(buffer, + offset, + size, + host_ptr, + events, + &event_.get()); return event_; } @@ -333,16 +332,20 @@ class command_queue size_t host_row_pitch, size_t host_slice_pitch, void *host_ptr, - const wait_list &events = wait_list()) + const wait_list &events = wait_list(), + cl_event * clevent = NULL) { BOOST_ASSERT(m_queue != 0); BOOST_ASSERT(buffer.get_context() == this->get_context()); BOOST_ASSERT(host_ptr != 0); + if (get_version() < 101) + BOOST_THROW_EXCEPTION(opencl_error(CL_INVALID_DEVICE)); + cl_int ret = clEnqueueReadBufferRect( m_queue, buffer.get(), - CL_TRUE, + clevent ? CL_FALSE : CL_TRUE, buffer_origin, host_origin, region, @@ -353,7 +356,7 @@ class command_queue host_ptr, events.size(), events.get_event_ptr(), - 0 + clevent ); if(ret != CL_SUCCESS){ @@ -371,7 +374,8 @@ class command_queue size_t offset, size_t size, const void *host_ptr, - const wait_list &events = wait_list()) + const wait_list &events = wait_list(), + cl_event * clevent = NULL) { BOOST_ASSERT(m_queue != 0); BOOST_ASSERT(size <= buffer.size()); @@ -381,13 +385,13 @@ class command_queue cl_int ret = clEnqueueWriteBuffer( m_queue, buffer.get(), - CL_TRUE, + clevent ? CL_FALSE : CL_TRUE, offset, size, host_ptr, events.size(), events.get_event_ptr(), - 0 + clevent ); if(ret != CL_SUCCESS){ @@ -407,30 +411,16 @@ class command_queue const void *host_ptr, const wait_list &events = wait_list()) { - BOOST_ASSERT(m_queue != 0); - BOOST_ASSERT(size <= buffer.size()); - BOOST_ASSERT(buffer.get_context() == this->get_context()); - BOOST_ASSERT(host_ptr != 0); - event event_; - cl_int ret = clEnqueueWriteBuffer( - m_queue, - buffer.get(), - CL_FALSE, - offset, - size, - host_ptr, - events.size(), - events.get_event_ptr(), - &event_.get() - ); + enqueue_write_buffer(buffer, + offset, + size, + host_ptr, + events, + &event_.get()); - if(ret != CL_SUCCESS){ - BOOST_THROW_EXCEPTION(opencl_error(ret)); - } - - return event_; + return event_; } #if defined(CL_VERSION_1_1) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) @@ -449,16 +439,20 @@ class command_queue size_t host_row_pitch, size_t host_slice_pitch, void *host_ptr, - const wait_list &events = wait_list()) + const wait_list &events = wait_list(), + cl_event * clevent = NULL) { BOOST_ASSERT(m_queue != 0); BOOST_ASSERT(buffer.get_context() == this->get_context()); BOOST_ASSERT(host_ptr != 0); + if (get_version() < 101) + BOOST_THROW_EXCEPTION(opencl_error(CL_INVALID_DEVICE)); + cl_int ret = clEnqueueWriteBufferRect( m_queue, buffer.get(), - CL_TRUE, + clevent ? CL_FALSE : CL_TRUE, buffer_origin, host_origin, region, @@ -469,7 +463,7 @@ class command_queue host_ptr, events.size(), events.get_event_ptr(), - 0 + clevent ); if(ret != CL_SUCCESS){ @@ -484,12 +478,13 @@ class command_queue /// \see_opencl_ref{clEnqueueCopyBuffer} /// /// \see copy() - event enqueue_copy_buffer(const buffer &src_buffer, + void enqueue_copy_buffer(const buffer &src_buffer, const buffer &dst_buffer, size_t src_offset, size_t dst_offset, size_t size, - const wait_list &events = wait_list()) + const wait_list &events = wait_list(), + cl_event * clevent = NULL) { BOOST_ASSERT(m_queue != 0); BOOST_ASSERT(src_offset + size <= src_buffer.size()); @@ -497,8 +492,6 @@ class command_queue BOOST_ASSERT(src_buffer.get_context() == this->get_context()); BOOST_ASSERT(dst_buffer.get_context() == this->get_context()); - event event_; - cl_int ret = clEnqueueCopyBuffer( m_queue, src_buffer.get(), @@ -508,14 +501,12 @@ class command_queue size, events.size(), events.get_event_ptr(), - &event_.get() + clevent ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } - - return event_; } #if defined(CL_VERSION_1_1) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) @@ -525,7 +516,7 @@ class command_queue /// \see_opencl_ref{clEnqueueCopyBufferRect} /// /// \opencl_version_warning{1,1} - event enqueue_copy_buffer_rect(const buffer &src_buffer, + void enqueue_copy_buffer_rect(const buffer &src_buffer, const buffer &dst_buffer, const size_t src_origin[3], const size_t dst_origin[3], @@ -534,13 +525,15 @@ class command_queue size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, - const wait_list &events = wait_list()) + const wait_list &events = wait_list(), + cl_event * clevent = NULL) { BOOST_ASSERT(m_queue != 0); BOOST_ASSERT(src_buffer.get_context() == this->get_context()); BOOST_ASSERT(dst_buffer.get_context() == this->get_context()); - event event_; + if (get_version() < 101) + BOOST_THROW_EXCEPTION(opencl_error(CL_INVALID_DEVICE)); cl_int ret = clEnqueueCopyBufferRect( m_queue, @@ -555,14 +548,12 @@ class command_queue host_slice_pitch, events.size(), events.get_event_ptr(), - &event_.get() + clevent ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } - - return event_; } #endif // CL_VERSION_1_1 @@ -574,18 +565,20 @@ class command_queue /// \opencl_version_warning{1,2} /// /// \see fill() - event enqueue_fill_buffer(const buffer &buffer, + void enqueue_fill_buffer(const buffer &buffer, const void *pattern, size_t pattern_size, size_t offset, size_t size, - const wait_list &events = wait_list()) + const wait_list &events = wait_list(), + cl_event * clevent = NULL) { BOOST_ASSERT(m_queue != 0); BOOST_ASSERT(offset + size <= buffer.size()); BOOST_ASSERT(buffer.get_context() == this->get_context()); - event event_; + if (get_version() < 102) + BOOST_THROW_EXCEPTION(opencl_error(CL_INVALID_DEVICE)); cl_int ret = clEnqueueFillBuffer( m_queue, @@ -596,14 +589,12 @@ class command_queue size, events.size(), events.get_event_ptr(), - &event_.get() + clevent ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } - - return event_; } #endif // CL_VERSION_1_2 @@ -614,7 +605,8 @@ class command_queue cl_map_flags flags, size_t offset, size_t size, - const wait_list &events = wait_list()) + const wait_list &events = wait_list(), + cl_event * clevent = NULL) { BOOST_ASSERT(m_queue != 0); BOOST_ASSERT(offset + size <= buffer_.size()); @@ -624,13 +616,13 @@ class command_queue void *pointer = clEnqueueMapBuffer( m_queue, buffer_.get(), - CL_TRUE, + clevent ? CL_FALSE : CL_TRUE, flags, offset, size, events.size(), events.get_event_ptr(), - 0, + clevent, &ret ); @@ -650,7 +642,8 @@ class command_queue const size_t *region, size_t *row_pitch, size_t *slice_pitch = NULL, - const wait_list &events = wait_list()) + const wait_list &events = wait_list(), + cl_event * clevent = NULL) { BOOST_ASSERT(m_queue != 0); BOOST_ASSERT(image.get_context() == this->get_context()); @@ -659,7 +652,7 @@ class command_queue void *pointer = clEnqueueMapImage( m_queue, image.get(), - CL_TRUE, + clevent ? CL_TRUE : CL_FALSE, flags, origin, region, @@ -667,7 +660,7 @@ class command_queue slice_pitch, events.size(), events.get_event_ptr(), - 0, + clevent, &ret ); @@ -687,7 +680,8 @@ class command_queue const extents region, size_t *row_pitch, size_t *slice_pitch = NULL, - const wait_list &events = wait_list()) + const wait_list &events = wait_list(), + cl_event * clevent = NULL) { BOOST_STATIC_ASSERT(N <= 3); BOOST_ASSERT(image.get_context() == this->get_context()); @@ -699,68 +693,65 @@ class command_queue std::copy(region.data(), region.data() + N, region3); return enqueue_map_image( - image, flags ,origin3, region3, row_pitch, slice_pitch, events + image, flags ,origin3, region3, row_pitch, slice_pitch, events, clevent ); } /// Enqueues a command to unmap \p buffer from the host memory space. /// /// \see_opencl_ref{clEnqueueUnmapMemObject} - event enqueue_unmap_buffer(const memory_object &mem_object, + void enqueue_unmap_buffer(const memory_object &mem_object, void *mapped_ptr, - const wait_list &events = wait_list()) + const wait_list &events = wait_list(), + cl_event * clevent = NULL) { BOOST_ASSERT(mem_object.get_context() == this->get_context()); - return enqueue_unmap_mem_object(mem_object.get(), mapped_ptr, events); + enqueue_unmap_mem_object(mem_object.get(), mapped_ptr, events, clevent); } /// Enqueues a command to unmap \p mem from the host memory space. /// /// \see_opencl_ref{clEnqueueUnmapMemObject} - event enqueue_unmap_mem_object(cl_mem mem, + void enqueue_unmap_mem_object(cl_mem mem, void *mapped_ptr, - const wait_list &events = wait_list()) + const wait_list &events = wait_list(), + cl_event * clevent = NULL) { BOOST_ASSERT(m_queue != 0); - event event_; - cl_int ret = clEnqueueUnmapMemObject( m_queue, mem, mapped_ptr, events.size(), events.get_event_ptr(), - &event_.get() + clevent ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } - - return event_; } /// Enqueues a command to read data from \p image to host memory. /// /// \see_opencl_ref{clEnqueueReadImage} - event enqueue_read_image(const image_object& image, + void enqueue_read_image(const image_object& image, const size_t *origin, const size_t *region, size_t row_pitch, size_t slice_pitch, void *host_ptr, - const wait_list &events = wait_list()) + const wait_list &events = wait_list(), + cl_event * clevent = NULL) { BOOST_ASSERT(m_queue != 0); - event event_; - cl_int ret = clEnqueueReadImage( m_queue, image.get(), - CL_TRUE, + clevent ? CL_FALSE : CL_TRUE, origin, region, row_pitch, @@ -768,25 +759,24 @@ class command_queue host_ptr, events.size(), events.get_event_ptr(), - &event_.get() + clevent ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } - - return event_; } /// \overload template - event enqueue_read_image(const image_object& image, + void enqueue_read_image(const image_object& image, const extents origin, const extents region, void *host_ptr, size_t row_pitch = 0, size_t slice_pitch = 0, - const wait_list &events = wait_list()) + const wait_list &events = wait_list(), + cl_event * clevent = NULL) { BOOST_STATIC_ASSERT(N <= 3); BOOST_ASSERT(image.get_context() == this->get_context()); @@ -797,30 +787,29 @@ class command_queue std::copy(origin.data(), origin.data() + N, origin3); std::copy(region.data(), region.data() + N, region3); - return enqueue_read_image( - image, origin3, region3, row_pitch, slice_pitch, host_ptr, events + enqueue_read_image( + image, origin3, region3, row_pitch, slice_pitch, host_ptr, events, clevent ); } /// Enqueues a command to write data from host memory to \p image. /// /// \see_opencl_ref{clEnqueueWriteImage} - event enqueue_write_image(image_object& image, + void enqueue_write_image(image_object& image, const size_t *origin, const size_t *region, const void *host_ptr, size_t input_row_pitch = 0, size_t input_slice_pitch = 0, - const wait_list &events = wait_list()) + const wait_list &events = wait_list(), + cl_event * clevent = NULL) { BOOST_ASSERT(m_queue != 0); - event event_; - cl_int ret = clEnqueueWriteImage( m_queue, image.get(), - CL_TRUE, + clevent ? CL_FALSE : CL_TRUE, origin, region, input_row_pitch, @@ -828,25 +817,24 @@ class command_queue host_ptr, events.size(), events.get_event_ptr(), - &event_.get() + clevent ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } - - return event_; } /// \overload template - event enqueue_write_image(image_object& image, + void enqueue_write_image(image_object& image, const extents origin, const extents region, const void *host_ptr, const size_t input_row_pitch = 0, const size_t input_slice_pitch = 0, - const wait_list &events = wait_list()) + const wait_list &events = wait_list(), + cl_event * clevent = NULL) { BOOST_STATIC_ASSERT(N <= 3); BOOST_ASSERT(image.get_context() == this->get_context()); @@ -857,25 +845,24 @@ class command_queue std::copy(origin.data(), origin.data() + N, origin3); std::copy(region.data(), region.data() + N, region3); - return enqueue_write_image( - image, origin3, region3, host_ptr, input_row_pitch, input_slice_pitch, events + enqueue_write_image( + image, origin3, region3, host_ptr, input_row_pitch, input_slice_pitch, events, clevent ); } /// Enqueues a command to copy data from \p src_image to \p dst_image. /// /// \see_opencl_ref{clEnqueueCopyImage} - event enqueue_copy_image(const image_object& src_image, + void enqueue_copy_image(const image_object& src_image, image_object& dst_image, const size_t *src_origin, const size_t *dst_origin, const size_t *region, - const wait_list &events = wait_list()) + const wait_list &events = wait_list(), + cl_event * clevent = NULL) { BOOST_ASSERT(m_queue != 0); - event event_; - cl_int ret = clEnqueueCopyImage( m_queue, src_image.get(), @@ -885,24 +872,23 @@ class command_queue region, events.size(), events.get_event_ptr(), - &event_.get() + clevent ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } - - return event_; } /// \overload template - event enqueue_copy_image(const image_object& src_image, + void enqueue_copy_image(const image_object& src_image, image_object& dst_image, const extents src_origin, const extents dst_origin, const extents region, - const wait_list &events = wait_list()) + const wait_list &events = wait_list(), + cl_event * clevent = NULL) { BOOST_STATIC_ASSERT(N <= 3); BOOST_ASSERT(src_image.get_context() == this->get_context()); @@ -918,25 +904,24 @@ class command_queue std::copy(dst_origin.data(), dst_origin.data() + N, dst_origin3); std::copy(region.data(), region.data() + N, region3); - return enqueue_copy_image( - src_image, dst_image, src_origin3, dst_origin3, region3, events + enqueue_copy_image( + src_image, dst_image, src_origin3, dst_origin3, region3, events, clevent ); } /// Enqueues a command to copy data from \p src_image to \p dst_buffer. /// /// \see_opencl_ref{clEnqueueCopyImageToBuffer} - event enqueue_copy_image_to_buffer(const image_object& src_image, + void enqueue_copy_image_to_buffer(const image_object& src_image, memory_object& dst_buffer, const size_t *src_origin, const size_t *region, size_t dst_offset, - const wait_list &events = wait_list()) + const wait_list &events = wait_list(), + cl_event * clevent = NULL) { BOOST_ASSERT(m_queue != 0); - event event_; - cl_int ret = clEnqueueCopyImageToBuffer( m_queue, src_image.get(), @@ -946,30 +931,27 @@ class command_queue dst_offset, events.size(), events.get_event_ptr(), - &event_.get() + clevent ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } - - return event_; } /// Enqueues a command to copy data from \p src_buffer to \p dst_image. /// /// \see_opencl_ref{clEnqueueCopyBufferToImage} - event enqueue_copy_buffer_to_image(const memory_object& src_buffer, + void enqueue_copy_buffer_to_image(const memory_object& src_buffer, image_object& dst_image, size_t src_offset, const size_t *dst_origin, const size_t *region, - const wait_list &events = wait_list()) + const wait_list &events = wait_list(), + cl_event * clevent = NULL) { BOOST_ASSERT(m_queue != 0); - event event_; - cl_int ret = clEnqueueCopyBufferToImage( m_queue, src_buffer.get(), @@ -979,14 +961,12 @@ class command_queue region, events.size(), events.get_event_ptr(), - &event_.get() + clevent ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } - - return event_; } #if defined(CL_VERSION_1_2) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) @@ -995,16 +975,18 @@ class command_queue /// \see_opencl_ref{clEnqueueFillImage} /// /// \opencl_version_warning{1,2} - event enqueue_fill_image(image_object& image, + void enqueue_fill_image(image_object& image, const void *fill_color, const size_t *origin, const size_t *region, - const wait_list &events = wait_list()) + const wait_list &events = wait_list(), + cl_event * clevent = NULL) { BOOST_ASSERT(m_queue != 0); BOOST_ASSERT(image.get_context() == this->get_context()); - event event_; + if (get_version() < 102) + BOOST_THROW_EXCEPTION(opencl_error(CL_INVALID_DEVICE)); cl_int ret = clEnqueueFillImage( m_queue, @@ -1014,23 +996,22 @@ class command_queue region, events.size(), events.get_event_ptr(), - &event_.get() + clevent ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } - - return event_; } /// \overload template - event enqueue_fill_image(image_object& image, + void enqueue_fill_image(image_object& image, const void *fill_color, const extents origin, const extents region, - const wait_list &events = wait_list()) + const wait_list &events = wait_list(), + cl_event * clevent = NULL) { BOOST_STATIC_ASSERT(N <= 3); BOOST_ASSERT(image.get_context() == this->get_context()); @@ -1041,8 +1022,8 @@ class command_queue std::copy(origin.data(), origin.data() + N, origin3); std::copy(region.data(), region.data() + N, region3); - return enqueue_fill_image( - image, fill_color, origin3, region3, events + enqueue_fill_image( + image, fill_color, origin3, region3, events, clevent ); } @@ -1051,14 +1032,16 @@ class command_queue /// \see_opencl_ref{clEnqueueMigrateMemObjects} /// /// \opencl_version_warning{1,2} - event enqueue_migrate_memory_objects(uint_ num_mem_objects, + void enqueue_migrate_memory_objects(uint_ num_mem_objects, const cl_mem *mem_objects, cl_mem_migration_flags flags, - const wait_list &events = wait_list()) + const wait_list &events = wait_list(), + cl_event * clevent = NULL) { BOOST_ASSERT(m_queue != 0); - event event_; + if (get_version() < 102) + BOOST_THROW_EXCEPTION(opencl_error(CL_INVALID_DEVICE)); cl_int ret = clEnqueueMigrateMemObjects( m_queue, @@ -1067,32 +1050,29 @@ class command_queue flags, events.size(), events.get_event_ptr(), - &event_.get() + clevent ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } - - return event_; } #endif // CL_VERSION_1_2 /// Enqueues a kernel for execution. /// /// \see_opencl_ref{clEnqueueNDRangeKernel} - event enqueue_nd_range_kernel(const kernel &kernel, + void enqueue_nd_range_kernel(const kernel &kernel, size_t work_dim, const size_t *global_work_offset, const size_t *global_work_size, const size_t *local_work_size, - const wait_list &events = wait_list()) + const wait_list &events = wait_list(), + cl_event * clevent = NULL) { BOOST_ASSERT(m_queue != 0); BOOST_ASSERT(kernel.get_context() == this->get_context()); - event event_; - cl_int ret = clEnqueueNDRangeKernel( m_queue, kernel, @@ -1102,101 +1082,102 @@ class command_queue local_work_size, events.size(), events.get_event_ptr(), - &event_.get() + clevent ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } - - return event_; } /// \overload template - event enqueue_nd_range_kernel(const kernel &kernel, + void enqueue_nd_range_kernel(const kernel &kernel, const extents &global_work_offset, const extents &global_work_size, const extents &local_work_size, - const wait_list &events = wait_list()) + const wait_list &events = wait_list(), + cl_event * clevent = NULL) { - return enqueue_nd_range_kernel( + enqueue_nd_range_kernel( kernel, N, global_work_offset.data(), global_work_size.data(), local_work_size.data(), - events + events, + clevent ); } /// Convenience method which calls enqueue_nd_range_kernel() with a /// one-dimensional range. - event enqueue_1d_range_kernel(const kernel &kernel, + void enqueue_1d_range_kernel(const kernel &kernel, size_t global_work_offset, size_t global_work_size, size_t local_work_size, - const wait_list &events = wait_list()) + const wait_list &events = wait_list(), + cl_event * clevent = NULL) { - return enqueue_nd_range_kernel( + enqueue_nd_range_kernel( kernel, 1, &global_work_offset, &global_work_size, local_work_size ? &local_work_size : 0, - events + events, + clevent ); } /// Enqueues a kernel to execute using a single work-item. /// /// \see_opencl_ref{clEnqueueTask} - event enqueue_task(const kernel &kernel, const wait_list &events = wait_list()) + void enqueue_task(const kernel &kernel, + const wait_list &events = wait_list(), + cl_event * clevent = NULL) { BOOST_ASSERT(m_queue != 0); BOOST_ASSERT(kernel.get_context() == this->get_context()); - event event_; cl_int ret; // clEnqueueTask() was deprecated in OpenCL 2.0. In that case we // just forward to the equivalent clEnqueueNDRangeKernel() call. #ifdef CL_VERSION_2_0 - if (this->get_context().get_device().version_number() >= 200) + if (get_version() >= 200) { size_t one = 1; ret = clEnqueueNDRangeKernel( m_queue, kernel, 1, 0, &one, &one, - events.size(), events.get_event_ptr(), &event_.get() + events.size(), events.get_event_ptr(), clevent ); } else #endif { ret = clEnqueueTask( - m_queue, kernel, events.size(), events.get_event_ptr(), &event_.get() + m_queue, kernel, events.size(), events.get_event_ptr(), clevent ); } if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } - - return event_; } /// Enqueues a function to execute on the host. - event enqueue_native_kernel(void (BOOST_COMPUTE_CL_CALLBACK *user_func)(void *), - void *args, - size_t cb_args, - uint_ num_mem_objects, - const cl_mem *mem_list, - const void **args_mem_loc, - const wait_list &events = wait_list()) + void enqueue_native_kernel(void (BOOST_COMPUTE_CL_CALLBACK *user_func)(void *), + void *args, + size_t cb_args, + uint_ num_mem_objects, + const cl_mem *mem_list, + const void **args_mem_loc, + const wait_list &events = wait_list(), + cl_event * clevent = NULL) { BOOST_ASSERT(m_queue != 0); - event event_; cl_int ret = clEnqueueNativeKernel( m_queue, user_func, @@ -1207,28 +1188,28 @@ class command_queue args_mem_loc, events.size(), events.get_event_ptr(), - &event_.get() + clevent ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } - - return event_; } /// Convenience overload for enqueue_native_kernel() which enqueues a /// native kernel on the host with a nullary function. - event enqueue_native_kernel(void (BOOST_COMPUTE_CL_CALLBACK *user_func)(void), - const wait_list &events = wait_list()) + void enqueue_native_kernel(void (BOOST_COMPUTE_CL_CALLBACK *user_func)(void), + const wait_list &events = wait_list(), + cl_event * clevent = NULL) { - return enqueue_native_kernel( + enqueue_native_kernel( detail::nullary_native_kernel_trampoline, reinterpret_cast(&user_func), sizeof(user_func), 0, 0, 0, - events + events, + clevent ); } @@ -1258,43 +1239,46 @@ class command_queue BOOST_ASSERT(m_queue != 0); #ifdef CL_VERSION_1_2 - clEnqueueBarrierWithWaitList(m_queue, 0, 0, 0); - #else - clEnqueueBarrier(m_queue); + if (get_version() >= 102) + clEnqueueBarrierWithWaitList(m_queue, 0, 0, 0); + else #endif + clEnqueueBarrier(m_queue); } #if defined(CL_VERSION_1_2) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) /// Enqueues a barrier in the queue after \p events. /// /// \opencl_version_warning{1,2} - void enqueue_barrier(const wait_list &events) + void enqueue_barrier(const wait_list &events, + cl_event * clevent = NULL) { BOOST_ASSERT(m_queue != 0); + if (get_version() < 102) + BOOST_THROW_EXCEPTION(opencl_error(CL_INVALID_DEVICE)); + clEnqueueBarrierWithWaitList( - m_queue, events.size(), events.get_event_ptr(), 0 + m_queue, events.size(), events.get_event_ptr(), clevent ); } #endif // CL_VERSION_1_2 /// Enqueues a marker in the queue and returns an event that can be /// used to track its progress. - event enqueue_marker() + void enqueue_marker(cl_event * clevent) { - event event_; - + cl_int ret; #ifdef CL_VERSION_1_2 - cl_int ret = clEnqueueMarkerWithWaitList(m_queue, 0, 0, &event_.get()); - #else - cl_int ret = clEnqueueMarker(m_queue, &event_.get()); + if (get_version() >= 102) + ret = clEnqueueMarkerWithWaitList(m_queue, 0, 0, clevent); + else #endif + ret = clEnqueueMarker(m_queue, clevent); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } - - return event_; } #if defined(CL_VERSION_1_2) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) @@ -1302,19 +1286,19 @@ class command_queue /// event that can be used to track its progress. /// /// \opencl_version_warning{1,2} - event enqueue_marker(const wait_list &events) + void enqueue_marker(const wait_list &events, + cl_event * clevent = NULL) { - event event_; + if (get_version() < 102) + BOOST_THROW_EXCEPTION(opencl_error(CL_INVALID_DEVICE)); cl_int ret = clEnqueueMarkerWithWaitList( - m_queue, events.size(), events.get_event_ptr(), &event_.get() + m_queue, events.size(), events.get_event_ptr(), clevent ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } - - return event_; } #endif // CL_VERSION_1_2 @@ -1328,17 +1312,21 @@ class command_queue void enqueue_svm_memcpy(void *dst_ptr, const void *src_ptr, size_t size, - const wait_list &events = wait_list()) + const wait_list &events = wait_list(), + cl_event * clevent = NULL) { + if (get_version() < 200) + BOOST_THROW_EXCEPTION(opencl_error(CL_INVALID_DEVICE)); + cl_int ret = clEnqueueSVMMemcpy( m_queue, - CL_TRUE, + clevent ? CL_FALSE : CL_TRUE, dst_ptr, src_ptr, size, events.size(), events.get_event_ptr(), - 0 + clevent ); if(ret != CL_SUCCESS){ @@ -1359,20 +1347,11 @@ class command_queue { event event_; - cl_int ret = clEnqueueSVMMemcpy( - m_queue, - CL_FALSE, - dst_ptr, - src_ptr, - size, - events.size(), - events.get_event_ptr(), - &event_.get() - ); - - if(ret != CL_SUCCESS){ - BOOST_THROW_EXCEPTION(opencl_error(ret)); - } + enqueue_svm_memcpy(dst_ptr, + src_ptr, + size, + events, + &event_.get()); return event_; } @@ -1383,14 +1362,16 @@ class command_queue /// \opencl_version_warning{2,0} /// /// \see_opencl2_ref{clEnqueueSVMMemFill} - event enqueue_svm_fill(void *svm_ptr, + void enqueue_svm_fill(void *svm_ptr, const void *pattern, size_t pattern_size, size_t size, - const wait_list &events = wait_list()) + const wait_list &events = wait_list(), + cl_event * clevent = NULL) { - event event_; + if (get_version() < 200) + BOOST_THROW_EXCEPTION(opencl_error(CL_INVALID_DEVICE)); cl_int ret = clEnqueueSVMMemFill( m_queue, @@ -1400,14 +1381,12 @@ class command_queue size, events.size(), events.get_event_ptr(), - &event_.get() + clevent ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } - - return event_; } /// Enqueues a command to free \p svm_ptr. @@ -1417,10 +1396,12 @@ class command_queue /// \see_opencl2_ref{clEnqueueSVMFree} /// /// \see svm_free() - event enqueue_svm_free(void *svm_ptr, - const wait_list &events = wait_list()) + void enqueue_svm_free(void *svm_ptr, + const wait_list &events = wait_list(), + cl_event * clevent = NULL) { - event event_; + if (get_version() < 200) + BOOST_THROW_EXCEPTION(opencl_error(CL_INVALID_DEVICE)); cl_int ret = clEnqueueSVMFree( m_queue, @@ -1430,14 +1411,12 @@ class command_queue 0, events.size(), events.get_event_ptr(), - &event_.get() + clevent ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } - - return event_; } /// Enqueues a command to map \p svm_ptr to the host memory space. @@ -1448,17 +1427,21 @@ class command_queue void enqueue_svm_map(void *svm_ptr, size_t size, cl_map_flags flags, - const wait_list &events = wait_list()) + const wait_list &events = wait_list(), + cl_event * clevent = NULL) { + if (get_version() < 200) + BOOST_THROW_EXCEPTION(opencl_error(CL_INVALID_DEVICE)); + cl_int ret = clEnqueueSVMMap( m_queue, - CL_TRUE, + clevent ? CL_FALSE : CL_TRUE, flags, svm_ptr, size, events.size(), events.get_event_ptr(), - 0 + clevent ); if(ret != CL_SUCCESS){ @@ -1471,24 +1454,24 @@ class command_queue /// \opencl_version_warning{2,0} /// /// \see_opencl2_ref{clEnqueueSVMUnmap} - event enqueue_svm_unmap(void *svm_ptr, - const wait_list &events = wait_list()) + void enqueue_svm_unmap(void *svm_ptr, + const wait_list &events = wait_list(), + cl_event * clevent = NULL) { - event event_; + if (get_version() < 200) + BOOST_THROW_EXCEPTION(opencl_error(CL_INVALID_DEVICE)); cl_int ret = clEnqueueSVMUnmap( m_queue, svm_ptr, events.size(), events.get_event_ptr(), - &event_.get() + clevent ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } - - return event_; } #endif // CL_VERSION_2_0 @@ -1512,6 +1495,7 @@ class command_queue private: cl_command_queue m_queue; + mutable uint_ m_version; }; inline buffer buffer::clone(command_queue &queue) const diff --git a/include/boost/compute/context.hpp b/include/boost/compute/context.hpp index f3cbadffe..c803f2fd7 100644 --- a/include/boost/compute/context.hpp +++ b/include/boost/compute/context.hpp @@ -243,10 +243,10 @@ class context } /// Returns the device version number. (eg. 1.1 is 101, 1.2 is 102, 2.0 is 200) - uint_ version_number() const + uint_ get_version() const { if (m_version == 0) - m_version = get_device().version_number(); // The version of the first device + m_version = get_device().get_version(); // The version of the first device return m_version; } diff --git a/include/boost/compute/device.hpp b/include/boost/compute/device.hpp index a8b5f72b5..d21ece6db 100644 --- a/include/boost/compute/device.hpp +++ b/include/boost/compute/device.hpp @@ -188,11 +188,12 @@ class device return get_info(CL_DEVICE_VERSION); } - /// Returns the device version number. (eg. 1.1 is 101, 1.2 is 102, 2.0 is 200) - uint_ version_number() const + /// Returns the device version number: major * 100 + minor (eg. 1.1 is 101, 1.2 is 102, 2.0 is 200) + uint_ get_version() const { if (m_version == 0) { - std::stringstream ss(version()); + std::string strversion(version()); + std::stringstream ss(strversion); ushort_ major, minor; ss.ignore(7); // 'OpenCL ' ss >> major; @@ -296,7 +297,7 @@ class device bool is_subdevice() const { #if defined(CL_VERSION_1_2) - if (version_number() >= 102) + if (get_version() >= 102) return get_info(CL_DEVICE_PARENT_DEVICE) != 0; else #endif // CL_VERSION_1_2 @@ -336,7 +337,7 @@ class device std::vector partition(const cl_device_partition_property *properties) const { - if (version_number() < 102) + if (get_version() < 102) return std::vector(); // get sub-device count diff --git a/include/boost/compute/image/image1d.hpp b/include/boost/compute/image/image1d.hpp index 773d9c26e..26dbfa74c 100644 --- a/include/boost/compute/image/image1d.hpp +++ b/include/boost/compute/image/image1d.hpp @@ -51,7 +51,7 @@ class image1d : public image_object void *host_ptr = 0) { #ifdef CL_VERSION_1_2 - if (context.version_number() >= 102) + if (context.get_version() >= 102) { cl_image_desc desc; desc.image_type = CL_MEM_OBJECT_IMAGE1D; diff --git a/include/boost/compute/image/image2d.hpp b/include/boost/compute/image/image2d.hpp index 14b53a69b..9d208639b 100644 --- a/include/boost/compute/image/image2d.hpp +++ b/include/boost/compute/image/image2d.hpp @@ -59,7 +59,7 @@ class image2d : public image_object cl_int error = 0; #ifdef CL_VERSION_1_2 - if (context.version_number() >= 102) + if (context.get_version() >= 102) { cl_image_desc desc; desc.image_type = CL_MEM_OBJECT_IMAGE2D; @@ -114,7 +114,7 @@ class image2d : public image_object cl_int error = 0; #ifdef CL_VERSION_1_2 - if (context.version_number() >= 102) + if (context.get_version() >= 102) { cl_image_desc desc; desc.image_type = CL_MEM_OBJECT_IMAGE2D; diff --git a/include/boost/compute/image/image3d.hpp b/include/boost/compute/image/image3d.hpp index f77211b70..f6b083642 100644 --- a/include/boost/compute/image/image3d.hpp +++ b/include/boost/compute/image/image3d.hpp @@ -52,7 +52,7 @@ class image3d : public image_object cl_int error = 0; #ifdef CL_VERSION_1_2 - if (context.version_number() >= 102) + if (context.get_version() >= 102) { cl_image_desc desc; desc.image_type = CL_MEM_OBJECT_IMAGE3D; @@ -109,7 +109,7 @@ class image3d : public image_object cl_int error = 0; #ifdef CL_VERSION_1_2 - if (context.version_number() >= 102) + if (context.get_version() >= 102) { cl_image_desc desc; desc.image_type = CL_MEM_OBJECT_IMAGE3D; From 8f20857cf6f6641b2566528575c3a8be55c18054 Mon Sep 17 00:00:00 2001 From: David Jurado Date: Mon, 30 Mar 2015 02:36:15 +0200 Subject: [PATCH 10/39] * Allow the automatic computation of the local work size by passing 0 to the first dimension size. --- include/boost/compute/command_queue.hpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/boost/compute/command_queue.hpp b/include/boost/compute/command_queue.hpp index 58b0f48f9..04f8f3e40 100644 --- a/include/boost/compute/command_queue.hpp +++ b/include/boost/compute/command_queue.hpp @@ -1071,6 +1071,7 @@ class command_queue cl_event * clevent = NULL) { BOOST_ASSERT(m_queue != 0); + BOOST_ASSERT(work_dim > 0); BOOST_ASSERT(kernel.get_context() == this->get_context()); cl_int ret = clEnqueueNDRangeKernel( @@ -1099,12 +1100,13 @@ class command_queue const wait_list &events = wait_list(), cl_event * clevent = NULL) { + BOOST_STATIC_ASSERT(N > 0); enqueue_nd_range_kernel( kernel, N, global_work_offset.data(), global_work_size.data(), - local_work_size.data(), + (local_work_size[0] == 0) ? NULL : local_work_size.data(), events, clevent ); From b570d993c5541b4af0f102741be4aacb0590e8a8 Mon Sep 17 00:00:00 2001 From: David Jurado Date: Mon, 30 Mar 2015 02:45:24 +0200 Subject: [PATCH 11/39] * m_version initialization was incomplete. --- include/boost/compute/device.hpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/include/boost/compute/device.hpp b/include/boost/compute/device.hpp index d21ece6db..a26a4500a 100644 --- a/include/boost/compute/device.hpp +++ b/include/boost/compute/device.hpp @@ -92,6 +92,7 @@ class device #endif m_id = other.m_id; + m_version = other.m_version; #ifdef CL_VERSION_1_2 if(m_id && is_subdevice()){ @@ -106,9 +107,10 @@ class device #ifndef BOOST_COMPUTE_NO_RVALUE_REFERENCES /// Move-constructs a new device object from \p other. device(device&& other) BOOST_NOEXCEPT - : m_id(other.m_id) + : m_id(other.m_id), m_version(other.m_version) { other.m_id = 0; + other.m_version = 0; } /// Move-assigns the device from \p other to \c *this. @@ -121,7 +123,9 @@ class device #endif // CL_VERSION_1_2 m_id = other.m_id; + m_version = other.m_version; other.m_id = 0; + other.m_version = 0; return *this; } From cfa1e88c42f0e6414a1da106e54c744f3edd5ae2 Mon Sep 17 00:00:00 2001 From: David Jurado Date: Mon, 30 Mar 2015 15:54:27 +0200 Subject: [PATCH 12/39] * Restored check_version() and check_device_version() for backward compatibility. --- include/boost/compute/command_queue.hpp | 8 ++++++++ include/boost/compute/device.hpp | 8 ++++++++ 2 files changed, 16 insertions(+) diff --git a/include/boost/compute/command_queue.hpp b/include/boost/compute/command_queue.hpp index 04f8f3e40..6169746f9 100644 --- a/include/boost/compute/command_queue.hpp +++ b/include/boost/compute/command_queue.hpp @@ -1495,6 +1495,14 @@ class command_queue return m_queue; } + /// \internal_ + bool check_device_version(int major, int minor) const + { + int ver = static_cast(get_version()); + int check = major * 100 + minor; + return check <= ver; + } + private: cl_command_queue m_queue; mutable uint_ m_version; diff --git a/include/boost/compute/device.hpp b/include/boost/compute/device.hpp index a26a4500a..3a838f7b3 100644 --- a/include/boost/compute/device.hpp +++ b/include/boost/compute/device.hpp @@ -422,6 +422,14 @@ class device return m_id != other.m_id; } + /// \internal_ + bool check_version(int major, int minor) const + { + int ver = static_cast(get_version()); + int check = major * 100 + minor; + return check <= ver; + } + private: cl_device_id m_id; mutable uint_ m_version; // Cached ICD OpenCL version number From e0387db91843697f5a8b67e8db9b6792f06599a7 Mon Sep 17 00:00:00 2001 From: David Jurado Date: Tue, 31 Mar 2015 00:18:28 +0200 Subject: [PATCH 13/39] * Compatibility with current code. * XXX_async() functions wher returning event. --- example/matrix_transpose.cpp | 8 +-- include/boost/compute/command_queue.hpp | 70 ++++++++++++++++++-- include/boost/compute/detail/meta_kernel.hpp | 4 +- 3 files changed, 72 insertions(+), 10 deletions(-) diff --git a/example/matrix_transpose.cpp b/example/matrix_transpose.cpp index da8d3ffa7..cff6ae24a 100644 --- a/example/matrix_transpose.cpp +++ b/example/matrix_transpose.cpp @@ -281,7 +281,7 @@ int main(int argc, char *argv[]) kernel.set_arg(1, d_output); compute::event start; - start = queue.enqueue_nd_range_kernel(kernel, 2, 0, global_work_size, local_work_size); + start = queue.enqueue_nd_range_kernel_async(kernel, 2, 0, global_work_size, local_work_size); queue.finish(); uint64_t elapsed = start.duration().count(); @@ -298,7 +298,7 @@ int main(int argc, char *argv[]) kernel.set_arg(0, d_input); kernel.set_arg(1, d_output); - start = queue.enqueue_nd_range_kernel(kernel, 2, 0, global_work_size, local_work_size); + start = queue.enqueue_nd_range_kernel_async(kernel, 2, 0, global_work_size, local_work_size); queue.finish(); elapsed = start.duration().count(); std::cout << " Elapsed: " << elapsed << " ns" << std::endl; @@ -314,7 +314,7 @@ int main(int argc, char *argv[]) kernel.set_arg(0, d_input); kernel.set_arg(1, d_output); - start = queue.enqueue_nd_range_kernel(kernel, 2, 0, global_work_size, local_work_size); + start = queue.enqueue_nd_range_kernel_async(kernel, 2, 0, global_work_size, local_work_size); queue.finish(); elapsed = start.duration().count(); std::cout << " Elapsed: " << elapsed << " ns" << std::endl; @@ -332,7 +332,7 @@ int main(int argc, char *argv[]) kernel.set_arg(0, d_input); kernel.set_arg(1, d_output); - start = queue.enqueue_nd_range_kernel(kernel, 2, 0, global_work_size, local_work_size); + start = queue.enqueue_nd_range_kernel_async(kernel, 2, 0, global_work_size, local_work_size); queue.finish(); elapsed = start.duration().count(); std::cout << " Elapsed: " << elapsed << " ns" << std::endl; diff --git a/include/boost/compute/command_queue.hpp b/include/boost/compute/command_queue.hpp index 6169746f9..bfb930bc8 100644 --- a/include/boost/compute/command_queue.hpp +++ b/include/boost/compute/command_queue.hpp @@ -1094,13 +1094,56 @@ class command_queue /// \overload template void enqueue_nd_range_kernel(const kernel &kernel, + const extents &global_work_offset, + const extents &global_work_size, + const extents &local_work_size, + const wait_list &events = wait_list(), + cl_event * clevent = NULL) + { + BOOST_STATIC_ASSERT(N > 0); + enqueue_nd_range_kernel( + kernel, + N, + global_work_offset.data(), + global_work_size.data(), + (local_work_size[0] == 0) ? NULL : local_work_size.data(), + events, + clevent + ); + } + + /// Enqueues a kernel for execution. + /// + /// \see_opencl_ref{clEnqueueNDRangeKernel} + event enqueue_nd_range_kernel_async(const kernel &kernel, + size_t work_dim, + const size_t *global_work_offset, + const size_t *global_work_size, + const size_t *local_work_size, + const wait_list &events = wait_list()) + { + event event_; + + enqueue_nd_range_kernel(kernel, + work_dim, + global_work_offset, + global_work_size, + local_work_size, + events, + &event_.get()); + return event_; + } + + /// \overload + template + event enqueue_nd_range_kernel_async(const kernel &kernel, const extents &global_work_offset, const extents &global_work_size, const extents &local_work_size, - const wait_list &events = wait_list(), - cl_event * clevent = NULL) + const wait_list &events = wait_list()) { - BOOST_STATIC_ASSERT(N > 0); + event event_; + enqueue_nd_range_kernel( kernel, N, @@ -1108,8 +1151,10 @@ class command_queue global_work_size.data(), (local_work_size[0] == 0) ? NULL : local_work_size.data(), events, - clevent + &event_.get() ); + + return event_; } /// Convenience method which calls enqueue_nd_range_kernel() with a @@ -1132,6 +1177,23 @@ class command_queue ); } + event enqueue_1d_range_kernel_async(const kernel &kernel, + size_t global_work_offset, + size_t global_work_size, + size_t local_work_size, + const wait_list &events = wait_list()) + { + event event_; + enqueue_1d_range_kernel(kernel, + global_work_offset, + global_work_size, + local_work_size, + events, + &event_.get()); + return event_; + + } + /// Enqueues a kernel to execute using a single work-item. /// /// \see_opencl_ref{clEnqueueTask} diff --git a/include/boost/compute/detail/meta_kernel.hpp b/include/boost/compute/detail/meta_kernel.hpp index 2d6bd5de7..056275608 100644 --- a/include/boost/compute/detail/meta_kernel.hpp +++ b/include/boost/compute/detail/meta_kernel.hpp @@ -636,7 +636,7 @@ class meta_kernel ::boost::compute::kernel kernel = compile(context); - return queue.enqueue_1d_range_kernel( + return queue.enqueue_1d_range_kernel_async( kernel, global_work_offset, global_work_size, @@ -653,7 +653,7 @@ class meta_kernel ::boost::compute::kernel kernel = compile(context); - return queue.enqueue_1d_range_kernel( + return queue.enqueue_1d_range_kernel_async( kernel, global_work_offset, global_work_size, From 6efcf0a3b82cd7b67d3aa3b12d31e238f7e2d56e Mon Sep 17 00:00:00 2001 From: David Jurado Date: Tue, 31 Mar 2015 00:56:38 +0200 Subject: [PATCH 14/39] * enqueue_map_image() bug --- include/boost/compute/command_queue.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/boost/compute/command_queue.hpp b/include/boost/compute/command_queue.hpp index bfb930bc8..3922c88f2 100644 --- a/include/boost/compute/command_queue.hpp +++ b/include/boost/compute/command_queue.hpp @@ -652,7 +652,7 @@ class command_queue void *pointer = clEnqueueMapImage( m_queue, image.get(), - clevent ? CL_TRUE : CL_FALSE, + clevent ? CL_FALSE : CL_TRUE, flags, origin, region, From 104e5f82bef40d1cb4a299d3a57e3e7cc01f51ae Mon Sep 17 00:00:00 2001 From: David Jurado Date: Tue, 31 Mar 2015 14:28:24 +0200 Subject: [PATCH 15/39] * Use of the enqueue_nd_range_kernel_async() function --- include/boost/compute/detail/work_size.hpp | 2 +- test/test_command_queue.cpp | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/include/boost/compute/detail/work_size.hpp b/include/boost/compute/detail/work_size.hpp index 5378941d5..49021e3af 100644 --- a/include/boost/compute/detail/work_size.hpp +++ b/include/boost/compute/detail/work_size.hpp @@ -23,7 +23,7 @@ namespace detail { // passed to clEnqueueNDRangeKernel() for a 1D algorithm. inline size_t calculate_work_size(size_t count, size_t vpt, size_t tpb) { - size_t work_size = std::ceil(float(count) / vpt); + size_t work_size = static_cast(std::ceil(float(count) / vpt)); if(work_size % tpb != 0){ work_size += tpb - work_size % tpb; } diff --git a/test/test_command_queue.cpp b/test/test_command_queue.cpp index e9fe4d4a8..f72315a79 100644 --- a/test/test_command_queue.cpp +++ b/test/test_command_queue.cpp @@ -105,11 +105,11 @@ BOOST_AUTO_TEST_CASE(kernel_profiling) size_t global_work_size = 8; boost::compute::event event = - queue.enqueue_nd_range_kernel(kernel, - size_t(1), - &global_work_offset, - &global_work_size, - 0); + queue.enqueue_nd_range_kernel_async(kernel, + size_t(1), + &global_work_offset, + &global_work_size, + 0); // wait until kernel is finished event.wait(); From bffd8469107f2bb88bdf84ca27241dd3b04a4b5f Mon Sep 17 00:00:00 2001 From: David Jurado Date: Tue, 31 Mar 2015 19:47:17 +0200 Subject: [PATCH 16/39] * nbody.cpp - Use of: #include #include instead of: #ifdef __APPLE__ #include #include #else #include #include #endif * Created: enqueue_copy_buffer_async(), enqueue_copy_buffer_async(). * In Windows Inlude "windows.h" before --- example/nbody.cpp | 15 +++----- include/boost/compute/algorithm/copy.hpp | 2 +- include/boost/compute/algorithm/fill.hpp | 10 +++--- include/boost/compute/command_queue.hpp | 40 +++++++++++++++++++++ include/boost/compute/interop/opengl/gl.hpp | 4 +++ 5 files changed, 55 insertions(+), 16 deletions(-) diff --git a/example/nbody.cpp b/example/nbody.cpp index 76c720d1f..0b113f892 100644 --- a/example/nbody.cpp +++ b/example/nbody.cpp @@ -10,14 +10,9 @@ #include -#define GL_GLEXT_PROTOTYPES -#ifdef __APPLE__ -#include -#include -#else -#include -#include -#endif +//#define GL_GLEXT_PROTOTYPES +#include +#include #include #if QT_VERSION >= 0x050000 @@ -193,8 +188,8 @@ void NBodyWidget::updateParticles() { // enqueue kernels to update particles and make sure that the command queue is finished compute::opengl_enqueue_acquire_buffer(m_position, m_queue); - m_queue.enqueue_1d_range_kernel(m_velocity_kernel, 0, m_particles, 0).wait(); - m_queue.enqueue_1d_range_kernel(m_position_kernel, 0, m_particles, 0).wait(); + m_queue.enqueue_1d_range_kernel_async(m_velocity_kernel, 0, m_particles, 0).wait(); + m_queue.enqueue_1d_range_kernel_async(m_position_kernel, 0, m_particles, 0).wait(); m_queue.finish(); compute::opengl_enqueue_release_buffer(m_position, m_queue); } diff --git a/include/boost/compute/algorithm/copy.hpp b/include/boost/compute/algorithm/copy.hpp index 3bb65af5f..0a3cd8bb3 100644 --- a/include/boost/compute/algorithm/copy.hpp +++ b/include/boost/compute/algorithm/copy.hpp @@ -250,7 +250,7 @@ dispatch_copy_async(InputIterator first, } event event_ = - queue.enqueue_copy_buffer( + queue.enqueue_copy_buffer_async( first.get_buffer(), result.get_buffer(), first.get_index() * sizeof(value_type), diff --git a/include/boost/compute/algorithm/fill.hpp b/include/boost/compute/algorithm/fill.hpp index 481ec4785..7c3aac3e2 100644 --- a/include/boost/compute/algorithm/fill.hpp +++ b/include/boost/compute/algorithm/fill.hpp @@ -163,11 +163,11 @@ dispatch_fill_async(BufferIterator first, size_t offset = static_cast(first.get_index()); event event_ = - queue.enqueue_fill_buffer(first.get_buffer(), - &pattern, - sizeof(value_type), - offset * sizeof(value_type), - count * sizeof(value_type)); + queue.enqueue_fill_buffer_async(first.get_buffer(), + &pattern, + sizeof(value_type), + offset * sizeof(value_type), + count * sizeof(value_type)); return future(event_); } diff --git a/include/boost/compute/command_queue.hpp b/include/boost/compute/command_queue.hpp index 3922c88f2..16a8537c7 100644 --- a/include/boost/compute/command_queue.hpp +++ b/include/boost/compute/command_queue.hpp @@ -509,6 +509,26 @@ class command_queue } } + event enqueue_copy_buffer_async(const buffer &src_buffer, + const buffer &dst_buffer, + size_t src_offset, + size_t dst_offset, + size_t size, + const wait_list &events) + { + event event_; + + enqueue_copy_buffer(src_buffer, + dst_buffer, + src_offset, + dst_offset, + size, + events, + &event_.get()); + + return event_; + } + #if defined(CL_VERSION_1_1) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) /// Enqueues a command to copy a rectangular region from /// \p src_buffer to \p dst_buffer. @@ -596,6 +616,26 @@ class command_queue BOOST_THROW_EXCEPTION(opencl_error(ret)); } } + + event enqueue_fill_buffer_async(const buffer &buffer, + const void *pattern, + size_t pattern_size, + size_t offset, + size_t size, + const wait_list &events = wait_list()) + { + event event_; + + enqueue_fill_buffer(buffer, + pattern, + pattern_size, + offset, + size, + events, + &event_.get()); + + return event_; + } #endif // CL_VERSION_1_2 /// Enqueues a command to map \p buffer into the host address space. diff --git a/include/boost/compute/interop/opengl/gl.hpp b/include/boost/compute/interop/opengl/gl.hpp index c00fb7a8f..2c6f703ef 100644 --- a/include/boost/compute/interop/opengl/gl.hpp +++ b/include/boost/compute/interop/opengl/gl.hpp @@ -14,6 +14,10 @@ #if defined(__APPLE__) #include #else +#if defined(_WIN32) +// Avoid error: 'APIENTRY' : illegal use of type 'void' +#include "windows.h" +#endif #include #endif From d0c4e173bd1db7df3736b7da7d9e35027b232312 Mon Sep 17 00:00:00 2001 From: David Jurado Date: Tue, 31 Mar 2015 20:03:59 +0200 Subject: [PATCH 17/39] * Added a default argument --- include/boost/compute/command_queue.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/boost/compute/command_queue.hpp b/include/boost/compute/command_queue.hpp index 16a8537c7..ac98bad51 100644 --- a/include/boost/compute/command_queue.hpp +++ b/include/boost/compute/command_queue.hpp @@ -514,7 +514,7 @@ class command_queue size_t src_offset, size_t dst_offset, size_t size, - const wait_list &events) + const wait_list &events = wait_list()) { event event_; From 731c26464aae078c6f4999fcbff0580241359451 Mon Sep 17 00:00:00 2001 From: David Jurado Date: Tue, 31 Mar 2015 23:23:38 +0200 Subject: [PATCH 18/39] * Do not assume a device holds only a cl_device_id --- include/boost/compute/context.hpp | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/include/boost/compute/context.hpp b/include/boost/compute/context.hpp index c803f2fd7..a4512ffa0 100644 --- a/include/boost/compute/context.hpp +++ b/include/boost/compute/context.hpp @@ -83,11 +83,19 @@ class context cl_int error = 0; + std::vector device_ids; + std::string name, version; + for (size_t i = 0; i < devices.size(); ++i) { + const device &dev = devices[i]; + name = dev.name(); + version = dev.version(); + device_ids.push_back(devices[i].get()); + } m_version = 0; m_context = clCreateContext( properties, - static_cast(devices.size()), - reinterpret_cast(&devices[0]), + static_cast(device_ids.size()), + reinterpret_cast(&device_ids[0]), 0, 0, &error @@ -207,7 +215,12 @@ class context /// Returns a vector of devices for the context. std::vector get_devices() const { - return get_info >(CL_CONTEXT_DEVICES); + std::vector out; + std::vector id_vector = get_info >(CL_CONTEXT_DEVICES); + for (std::vector::iterator it = id_vector.begin(); it != id_vector.end(); ++it) { + out.push_back(device(*it)); + } + return out; } /// Returns information about the context. From 9043e987da0ed49f1e722f7ccda1e76b3fc78ce4 Mon Sep 17 00:00:00 2001 From: David Jurado Date: Wed, 1 Apr 2015 01:36:48 +0200 Subject: [PATCH 19/39] * find_device() overload to allow partial search and min version check --- include/boost/compute/system.hpp | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/include/boost/compute/system.hpp b/include/boost/compute/system.hpp index 5bc1cec34..ec4190a5f 100644 --- a/include/boost/compute/system.hpp +++ b/include/boost/compute/system.hpp @@ -94,6 +94,22 @@ class system BOOST_THROW_EXCEPTION(no_device_found()); } + /// Returns the device with \p name contained in its name and + /// a minimun version of \p min_version (eg. 1.1 is 101, 1.2 is 102, 2.0 is 200). + /// + /// \throws no_device_found if no device with \p name is found. + static device find_device(const std::string &name, uint_ min_version = 100) + { + BOOST_FOREACH(const device &device, devices()){ + if(device.name().find(name.c_str()) != std::string::npos + && device.get_version() >= min_version){ + return device; + } + } + + BOOST_THROW_EXCEPTION(no_device_found()); + } + /// Returns a vector containing all of the compute devices on /// the system. /// From 98210a5de0d0edbe8728725ba417a7cbebe6e970 Mon Sep 17 00:00:00 2001 From: David Jurado Date: Wed, 1 Apr 2015 02:17:14 +0200 Subject: [PATCH 20/39] * Changed the way the version is parsed. Now parses 3.01 as 301 and 1.2 as 120. --- include/boost/compute/command_queue.hpp | 20 ++++++++++---------- include/boost/compute/context.hpp | 2 +- include/boost/compute/device.hpp | 9 ++++++--- include/boost/compute/image/image1d.hpp | 2 +- include/boost/compute/image/image2d.hpp | 4 ++-- include/boost/compute/image/image3d.hpp | 4 ++-- test/test_fill.cpp | 2 +- 7 files changed, 23 insertions(+), 20 deletions(-) diff --git a/include/boost/compute/command_queue.hpp b/include/boost/compute/command_queue.hpp index ac98bad51..cb11b7118 100644 --- a/include/boost/compute/command_queue.hpp +++ b/include/boost/compute/command_queue.hpp @@ -339,7 +339,7 @@ class command_queue BOOST_ASSERT(buffer.get_context() == this->get_context()); BOOST_ASSERT(host_ptr != 0); - if (get_version() < 101) + if (get_version() < 110) BOOST_THROW_EXCEPTION(opencl_error(CL_INVALID_DEVICE)); cl_int ret = clEnqueueReadBufferRect( @@ -446,7 +446,7 @@ class command_queue BOOST_ASSERT(buffer.get_context() == this->get_context()); BOOST_ASSERT(host_ptr != 0); - if (get_version() < 101) + if (get_version() < 110) BOOST_THROW_EXCEPTION(opencl_error(CL_INVALID_DEVICE)); cl_int ret = clEnqueueWriteBufferRect( @@ -552,7 +552,7 @@ class command_queue BOOST_ASSERT(src_buffer.get_context() == this->get_context()); BOOST_ASSERT(dst_buffer.get_context() == this->get_context()); - if (get_version() < 101) + if (get_version() < 110) BOOST_THROW_EXCEPTION(opencl_error(CL_INVALID_DEVICE)); cl_int ret = clEnqueueCopyBufferRect( @@ -597,7 +597,7 @@ class command_queue BOOST_ASSERT(offset + size <= buffer.size()); BOOST_ASSERT(buffer.get_context() == this->get_context()); - if (get_version() < 102) + if (get_version() < 120) BOOST_THROW_EXCEPTION(opencl_error(CL_INVALID_DEVICE)); cl_int ret = clEnqueueFillBuffer( @@ -1025,7 +1025,7 @@ class command_queue BOOST_ASSERT(m_queue != 0); BOOST_ASSERT(image.get_context() == this->get_context()); - if (get_version() < 102) + if (get_version() < 120) BOOST_THROW_EXCEPTION(opencl_error(CL_INVALID_DEVICE)); cl_int ret = clEnqueueFillImage( @@ -1080,7 +1080,7 @@ class command_queue { BOOST_ASSERT(m_queue != 0); - if (get_version() < 102) + if (get_version() < 120) BOOST_THROW_EXCEPTION(opencl_error(CL_INVALID_DEVICE)); cl_int ret = clEnqueueMigrateMemObjects( @@ -1343,7 +1343,7 @@ class command_queue BOOST_ASSERT(m_queue != 0); #ifdef CL_VERSION_1_2 - if (get_version() >= 102) + if (get_version() >= 120) clEnqueueBarrierWithWaitList(m_queue, 0, 0, 0); else #endif @@ -1359,7 +1359,7 @@ class command_queue { BOOST_ASSERT(m_queue != 0); - if (get_version() < 102) + if (get_version() < 120) BOOST_THROW_EXCEPTION(opencl_error(CL_INVALID_DEVICE)); clEnqueueBarrierWithWaitList( @@ -1374,7 +1374,7 @@ class command_queue { cl_int ret; #ifdef CL_VERSION_1_2 - if (get_version() >= 102) + if (get_version() >= 120) ret = clEnqueueMarkerWithWaitList(m_queue, 0, 0, clevent); else #endif @@ -1393,7 +1393,7 @@ class command_queue void enqueue_marker(const wait_list &events, cl_event * clevent = NULL) { - if (get_version() < 102) + if (get_version() < 120) BOOST_THROW_EXCEPTION(opencl_error(CL_INVALID_DEVICE)); cl_int ret = clEnqueueMarkerWithWaitList( diff --git a/include/boost/compute/context.hpp b/include/boost/compute/context.hpp index a4512ffa0..58ebc02f8 100644 --- a/include/boost/compute/context.hpp +++ b/include/boost/compute/context.hpp @@ -255,7 +255,7 @@ class context return m_context; } - /// Returns the device version number. (eg. 1.1 is 101, 1.2 is 102, 2.0 is 200) + /// Returns the device version number. (eg. 1.1 is 110, 1.2 is 120, 2.0 is 200, 3.11 is 311) uint_ get_version() const { if (m_version == 0) diff --git a/include/boost/compute/device.hpp b/include/boost/compute/device.hpp index 3a838f7b3..0d8604de5 100644 --- a/include/boost/compute/device.hpp +++ b/include/boost/compute/device.hpp @@ -192,7 +192,7 @@ class device return get_info(CL_DEVICE_VERSION); } - /// Returns the device version number: major * 100 + minor (eg. 1.1 is 101, 1.2 is 102, 2.0 is 200) + /// Returns the device version number: major.minor * 100 (eg. 1.1 is 110, 1.2 is 120, 2.0 is 200, 3.01 is 301) uint_ get_version() const { if (m_version == 0) { @@ -202,7 +202,10 @@ class device ss.ignore(7); // 'OpenCL ' ss >> major; ss.ignore(1); // '.' + bool is_zero = ss.peek() == '0'; ss >> minor; + if (!is_zero && minor < 10) + minor *= 10; m_version = major * 100 + minor; // cache } return m_version; @@ -301,7 +304,7 @@ class device bool is_subdevice() const { #if defined(CL_VERSION_1_2) - if (get_version() >= 102) + if (get_version() >= 120) return get_info(CL_DEVICE_PARENT_DEVICE) != 0; else #endif // CL_VERSION_1_2 @@ -341,7 +344,7 @@ class device std::vector partition(const cl_device_partition_property *properties) const { - if (get_version() < 102) + if (get_version() < 120) return std::vector(); // get sub-device count diff --git a/include/boost/compute/image/image1d.hpp b/include/boost/compute/image/image1d.hpp index 26dbfa74c..470f22850 100644 --- a/include/boost/compute/image/image1d.hpp +++ b/include/boost/compute/image/image1d.hpp @@ -51,7 +51,7 @@ class image1d : public image_object void *host_ptr = 0) { #ifdef CL_VERSION_1_2 - if (context.get_version() >= 102) + if (context.get_version() >= 120) { cl_image_desc desc; desc.image_type = CL_MEM_OBJECT_IMAGE1D; diff --git a/include/boost/compute/image/image2d.hpp b/include/boost/compute/image/image2d.hpp index 9d208639b..db3b84018 100644 --- a/include/boost/compute/image/image2d.hpp +++ b/include/boost/compute/image/image2d.hpp @@ -59,7 +59,7 @@ class image2d : public image_object cl_int error = 0; #ifdef CL_VERSION_1_2 - if (context.get_version() >= 102) + if (context.get_version() >= 120) { cl_image_desc desc; desc.image_type = CL_MEM_OBJECT_IMAGE2D; @@ -114,7 +114,7 @@ class image2d : public image_object cl_int error = 0; #ifdef CL_VERSION_1_2 - if (context.get_version() >= 102) + if (context.get_version() >= 120) { cl_image_desc desc; desc.image_type = CL_MEM_OBJECT_IMAGE2D; diff --git a/include/boost/compute/image/image3d.hpp b/include/boost/compute/image/image3d.hpp index f6b083642..c463e2b84 100644 --- a/include/boost/compute/image/image3d.hpp +++ b/include/boost/compute/image/image3d.hpp @@ -52,7 +52,7 @@ class image3d : public image_object cl_int error = 0; #ifdef CL_VERSION_1_2 - if (context.get_version() >= 102) + if (context.get_version() >= 120) { cl_image_desc desc; desc.image_type = CL_MEM_OBJECT_IMAGE3D; @@ -109,7 +109,7 @@ class image3d : public image_object cl_int error = 0; #ifdef CL_VERSION_1_2 - if (context.get_version() >= 102) + if (context.get_version() >= 120) { cl_image_desc desc; desc.image_type = CL_MEM_OBJECT_IMAGE3D; diff --git a/test/test_fill.cpp b/test/test_fill.cpp index c0d517dd3..f6176cf03 100644 --- a/test/test_fill.cpp +++ b/test/test_fill.cpp @@ -105,7 +105,7 @@ BOOST_AUTO_TEST_CASE(check_fill_type) #ifdef CL_VERSION_1_2 BOOST_CHECK_EQUAL( future.get_event().get_command_type(), - device.check_version(1,2) ? CL_COMMAND_FILL_BUFFER : CL_COMMAND_NDRANGE_KERNEL + device.check_version(1,20) ? CL_COMMAND_FILL_BUFFER : CL_COMMAND_NDRANGE_KERNEL ); #else BOOST_CHECK( From 93a5be6fdd72d4f7f9fc920cd2449367cb6c35d2 Mon Sep 17 00:00:00 2001 From: David Jurado Date: Wed, 1 Apr 2015 02:22:23 +0200 Subject: [PATCH 21/39] * Resolve ambiguity find_device_name() --- include/boost/compute/system.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/boost/compute/system.hpp b/include/boost/compute/system.hpp index ec4190a5f..313e9ef9f 100644 --- a/include/boost/compute/system.hpp +++ b/include/boost/compute/system.hpp @@ -98,7 +98,7 @@ class system /// a minimun version of \p min_version (eg. 1.1 is 101, 1.2 is 102, 2.0 is 200). /// /// \throws no_device_found if no device with \p name is found. - static device find_device(const std::string &name, uint_ min_version = 100) + static device find_device_name(const std::string &name, uint_ min_version = 100) { BOOST_FOREACH(const device &device, devices()){ if(device.name().find(name.c_str()) != std::string::npos From ee08966ae8bd3656e57fc451a9747d8dbad7312b Mon Sep 17 00:00:00 2001 From: David Jurado Date: Mon, 6 Apr 2015 00:02:33 +0200 Subject: [PATCH 22/39] * Do not use the internal cl_event type in the functions interface --- include/boost/compute/command_queue.hpp | 186 ++++++++++++------------ 1 file changed, 93 insertions(+), 93 deletions(-) diff --git a/include/boost/compute/command_queue.hpp b/include/boost/compute/command_queue.hpp index cb11b7118..8e6151be1 100644 --- a/include/boost/compute/command_queue.hpp +++ b/include/boost/compute/command_queue.hpp @@ -268,7 +268,7 @@ class command_queue size_t size, void *host_ptr, const wait_list &events = wait_list(), - cl_event * clevent = NULL) + event * event_ = NULL) { BOOST_ASSERT(m_queue != 0); BOOST_ASSERT(size <= buffer.size()); @@ -278,13 +278,13 @@ class command_queue cl_int ret = clEnqueueReadBuffer( m_queue, buffer.get(), - clevent ? CL_FALSE : CL_TRUE, + event_ ? CL_FALSE : CL_TRUE, offset, size, host_ptr, events.size(), events.get_event_ptr(), - clevent + event_ ? &event_->get() : NULL ); if(ret != CL_SUCCESS){ @@ -311,7 +311,7 @@ class command_queue size, host_ptr, events, - &event_.get()); + &event_); return event_; } @@ -333,7 +333,7 @@ class command_queue size_t host_slice_pitch, void *host_ptr, const wait_list &events = wait_list(), - cl_event * clevent = NULL) + event * event_ = NULL) { BOOST_ASSERT(m_queue != 0); BOOST_ASSERT(buffer.get_context() == this->get_context()); @@ -345,7 +345,7 @@ class command_queue cl_int ret = clEnqueueReadBufferRect( m_queue, buffer.get(), - clevent ? CL_FALSE : CL_TRUE, + event_ ? CL_FALSE : CL_TRUE, buffer_origin, host_origin, region, @@ -356,7 +356,7 @@ class command_queue host_ptr, events.size(), events.get_event_ptr(), - clevent + event_ ? &event_->get() : NULL ); if(ret != CL_SUCCESS){ @@ -375,7 +375,7 @@ class command_queue size_t size, const void *host_ptr, const wait_list &events = wait_list(), - cl_event * clevent = NULL) + event * event_ = NULL) { BOOST_ASSERT(m_queue != 0); BOOST_ASSERT(size <= buffer.size()); @@ -385,13 +385,13 @@ class command_queue cl_int ret = clEnqueueWriteBuffer( m_queue, buffer.get(), - clevent ? CL_FALSE : CL_TRUE, + event_ ? CL_FALSE : CL_TRUE, offset, size, host_ptr, events.size(), events.get_event_ptr(), - clevent + event_ ? &event_->get() : NULL ); if(ret != CL_SUCCESS){ @@ -418,7 +418,7 @@ class command_queue size, host_ptr, events, - &event_.get()); + &event_); return event_; } @@ -440,7 +440,7 @@ class command_queue size_t host_slice_pitch, void *host_ptr, const wait_list &events = wait_list(), - cl_event * clevent = NULL) + event * event_ = NULL) { BOOST_ASSERT(m_queue != 0); BOOST_ASSERT(buffer.get_context() == this->get_context()); @@ -452,7 +452,7 @@ class command_queue cl_int ret = clEnqueueWriteBufferRect( m_queue, buffer.get(), - clevent ? CL_FALSE : CL_TRUE, + event_ ? CL_FALSE : CL_TRUE, buffer_origin, host_origin, region, @@ -463,7 +463,7 @@ class command_queue host_ptr, events.size(), events.get_event_ptr(), - clevent + event_ ? &event_->get() : NULL ); if(ret != CL_SUCCESS){ @@ -484,7 +484,7 @@ class command_queue size_t dst_offset, size_t size, const wait_list &events = wait_list(), - cl_event * clevent = NULL) + event * event_ = NULL) { BOOST_ASSERT(m_queue != 0); BOOST_ASSERT(src_offset + size <= src_buffer.size()); @@ -501,7 +501,7 @@ class command_queue size, events.size(), events.get_event_ptr(), - clevent + event_ ? &event_->get() : NULL ); if(ret != CL_SUCCESS){ @@ -524,7 +524,7 @@ class command_queue dst_offset, size, events, - &event_.get()); + &event_); return event_; } @@ -546,7 +546,7 @@ class command_queue size_t host_row_pitch, size_t host_slice_pitch, const wait_list &events = wait_list(), - cl_event * clevent = NULL) + event * event_ = NULL) { BOOST_ASSERT(m_queue != 0); BOOST_ASSERT(src_buffer.get_context() == this->get_context()); @@ -568,7 +568,7 @@ class command_queue host_slice_pitch, events.size(), events.get_event_ptr(), - clevent + event_ ? &event_->get() : NULL ); if(ret != CL_SUCCESS){ @@ -591,7 +591,7 @@ class command_queue size_t offset, size_t size, const wait_list &events = wait_list(), - cl_event * clevent = NULL) + event * event_ = NULL) { BOOST_ASSERT(m_queue != 0); BOOST_ASSERT(offset + size <= buffer.size()); @@ -609,7 +609,7 @@ class command_queue size, events.size(), events.get_event_ptr(), - clevent + event_ ? &event_->get() : NULL ); if(ret != CL_SUCCESS){ @@ -632,7 +632,7 @@ class command_queue offset, size, events, - &event_.get()); + &event_); return event_; } @@ -646,7 +646,7 @@ class command_queue size_t offset, size_t size, const wait_list &events = wait_list(), - cl_event * clevent = NULL) + event * event_ = NULL) { BOOST_ASSERT(m_queue != 0); BOOST_ASSERT(offset + size <= buffer_.size()); @@ -656,13 +656,13 @@ class command_queue void *pointer = clEnqueueMapBuffer( m_queue, buffer_.get(), - clevent ? CL_FALSE : CL_TRUE, + event_ ? CL_FALSE : CL_TRUE, flags, offset, size, events.size(), events.get_event_ptr(), - clevent, + event_ ? &event_->get() : NULL, &ret ); @@ -683,7 +683,7 @@ class command_queue size_t *row_pitch, size_t *slice_pitch = NULL, const wait_list &events = wait_list(), - cl_event * clevent = NULL) + event * event_ = NULL) { BOOST_ASSERT(m_queue != 0); BOOST_ASSERT(image.get_context() == this->get_context()); @@ -692,7 +692,7 @@ class command_queue void *pointer = clEnqueueMapImage( m_queue, image.get(), - clevent ? CL_FALSE : CL_TRUE, + event_ ? CL_FALSE : CL_TRUE, flags, origin, region, @@ -700,7 +700,7 @@ class command_queue slice_pitch, events.size(), events.get_event_ptr(), - clevent, + event_ ? &event_->get() : NULL, &ret ); @@ -721,7 +721,7 @@ class command_queue size_t *row_pitch, size_t *slice_pitch = NULL, const wait_list &events = wait_list(), - cl_event * clevent = NULL) + event * event_ = NULL) { BOOST_STATIC_ASSERT(N <= 3); BOOST_ASSERT(image.get_context() == this->get_context()); @@ -733,7 +733,7 @@ class command_queue std::copy(region.data(), region.data() + N, region3); return enqueue_map_image( - image, flags ,origin3, region3, row_pitch, slice_pitch, events, clevent + image, flags ,origin3, region3, row_pitch, slice_pitch, events, event_ ); } @@ -743,11 +743,11 @@ class command_queue void enqueue_unmap_buffer(const memory_object &mem_object, void *mapped_ptr, const wait_list &events = wait_list(), - cl_event * clevent = NULL) + event * event_ = NULL) { BOOST_ASSERT(mem_object.get_context() == this->get_context()); - enqueue_unmap_mem_object(mem_object.get(), mapped_ptr, events, clevent); + enqueue_unmap_mem_object(mem_object.get(), mapped_ptr, events, event_); } /// Enqueues a command to unmap \p mem from the host memory space. @@ -756,7 +756,7 @@ class command_queue void enqueue_unmap_mem_object(cl_mem mem, void *mapped_ptr, const wait_list &events = wait_list(), - cl_event * clevent = NULL) + event * event_ = NULL) { BOOST_ASSERT(m_queue != 0); @@ -766,7 +766,7 @@ class command_queue mapped_ptr, events.size(), events.get_event_ptr(), - clevent + event_ ? &event_->get() : NULL ); if(ret != CL_SUCCESS){ @@ -784,14 +784,14 @@ class command_queue size_t slice_pitch, void *host_ptr, const wait_list &events = wait_list(), - cl_event * clevent = NULL) + event * event_ = NULL) { BOOST_ASSERT(m_queue != 0); cl_int ret = clEnqueueReadImage( m_queue, image.get(), - clevent ? CL_FALSE : CL_TRUE, + event_ ? CL_FALSE : CL_TRUE, origin, region, row_pitch, @@ -799,7 +799,7 @@ class command_queue host_ptr, events.size(), events.get_event_ptr(), - clevent + event_ ? &event_->get() : NULL ); if(ret != CL_SUCCESS){ @@ -816,7 +816,7 @@ class command_queue size_t row_pitch = 0, size_t slice_pitch = 0, const wait_list &events = wait_list(), - cl_event * clevent = NULL) + event * event_ = NULL) { BOOST_STATIC_ASSERT(N <= 3); BOOST_ASSERT(image.get_context() == this->get_context()); @@ -828,7 +828,7 @@ class command_queue std::copy(region.data(), region.data() + N, region3); enqueue_read_image( - image, origin3, region3, row_pitch, slice_pitch, host_ptr, events, clevent + image, origin3, region3, row_pitch, slice_pitch, host_ptr, events, event_ ); } @@ -842,14 +842,14 @@ class command_queue size_t input_row_pitch = 0, size_t input_slice_pitch = 0, const wait_list &events = wait_list(), - cl_event * clevent = NULL) + event * event_ = NULL) { BOOST_ASSERT(m_queue != 0); cl_int ret = clEnqueueWriteImage( m_queue, image.get(), - clevent ? CL_FALSE : CL_TRUE, + event_ ? CL_FALSE : CL_TRUE, origin, region, input_row_pitch, @@ -857,7 +857,7 @@ class command_queue host_ptr, events.size(), events.get_event_ptr(), - clevent + event_ ? &event_->get() : NULL ); if(ret != CL_SUCCESS){ @@ -874,7 +874,7 @@ class command_queue const size_t input_row_pitch = 0, const size_t input_slice_pitch = 0, const wait_list &events = wait_list(), - cl_event * clevent = NULL) + event * event_ = NULL) { BOOST_STATIC_ASSERT(N <= 3); BOOST_ASSERT(image.get_context() == this->get_context()); @@ -886,7 +886,7 @@ class command_queue std::copy(region.data(), region.data() + N, region3); enqueue_write_image( - image, origin3, region3, host_ptr, input_row_pitch, input_slice_pitch, events, clevent + image, origin3, region3, host_ptr, input_row_pitch, input_slice_pitch, events, event_ ); } @@ -899,7 +899,7 @@ class command_queue const size_t *dst_origin, const size_t *region, const wait_list &events = wait_list(), - cl_event * clevent = NULL) + event * event_ = NULL) { BOOST_ASSERT(m_queue != 0); @@ -912,7 +912,7 @@ class command_queue region, events.size(), events.get_event_ptr(), - clevent + event_ ? &event_->get() : NULL ); if(ret != CL_SUCCESS){ @@ -928,7 +928,7 @@ class command_queue const extents dst_origin, const extents region, const wait_list &events = wait_list(), - cl_event * clevent = NULL) + event * event_ = NULL) { BOOST_STATIC_ASSERT(N <= 3); BOOST_ASSERT(src_image.get_context() == this->get_context()); @@ -945,7 +945,7 @@ class command_queue std::copy(region.data(), region.data() + N, region3); enqueue_copy_image( - src_image, dst_image, src_origin3, dst_origin3, region3, events, clevent + src_image, dst_image, src_origin3, dst_origin3, region3, events, event_ ); } @@ -958,7 +958,7 @@ class command_queue const size_t *region, size_t dst_offset, const wait_list &events = wait_list(), - cl_event * clevent = NULL) + event * event_ = NULL) { BOOST_ASSERT(m_queue != 0); @@ -971,7 +971,7 @@ class command_queue dst_offset, events.size(), events.get_event_ptr(), - clevent + event_ ? &event_->get() : NULL ); if(ret != CL_SUCCESS){ @@ -988,7 +988,7 @@ class command_queue const size_t *dst_origin, const size_t *region, const wait_list &events = wait_list(), - cl_event * clevent = NULL) + event * event_ = NULL) { BOOST_ASSERT(m_queue != 0); @@ -1001,7 +1001,7 @@ class command_queue region, events.size(), events.get_event_ptr(), - clevent + event_ ? &event_->get() : NULL ); if(ret != CL_SUCCESS){ @@ -1020,7 +1020,7 @@ class command_queue const size_t *origin, const size_t *region, const wait_list &events = wait_list(), - cl_event * clevent = NULL) + event * event_ = NULL) { BOOST_ASSERT(m_queue != 0); BOOST_ASSERT(image.get_context() == this->get_context()); @@ -1036,7 +1036,7 @@ class command_queue region, events.size(), events.get_event_ptr(), - clevent + event_ ? &event_->get() : NULL ); if(ret != CL_SUCCESS){ @@ -1051,7 +1051,7 @@ class command_queue const extents origin, const extents region, const wait_list &events = wait_list(), - cl_event * clevent = NULL) + event * event_ = NULL) { BOOST_STATIC_ASSERT(N <= 3); BOOST_ASSERT(image.get_context() == this->get_context()); @@ -1063,7 +1063,7 @@ class command_queue std::copy(region.data(), region.data() + N, region3); enqueue_fill_image( - image, fill_color, origin3, region3, events, clevent + image, fill_color, origin3, region3, events, event_ ); } @@ -1076,7 +1076,7 @@ class command_queue const cl_mem *mem_objects, cl_mem_migration_flags flags, const wait_list &events = wait_list(), - cl_event * clevent = NULL) + event * event_ = NULL) { BOOST_ASSERT(m_queue != 0); @@ -1090,7 +1090,7 @@ class command_queue flags, events.size(), events.get_event_ptr(), - clevent + event_ ? &event_->get() : NULL ); if(ret != CL_SUCCESS){ @@ -1108,7 +1108,7 @@ class command_queue const size_t *global_work_size, const size_t *local_work_size, const wait_list &events = wait_list(), - cl_event * clevent = NULL) + event * event_ = NULL) { BOOST_ASSERT(m_queue != 0); BOOST_ASSERT(work_dim > 0); @@ -1123,7 +1123,7 @@ class command_queue local_work_size, events.size(), events.get_event_ptr(), - clevent + event_ ? &event_->get() : NULL ); if(ret != CL_SUCCESS){ @@ -1138,7 +1138,7 @@ class command_queue const extents &global_work_size, const extents &local_work_size, const wait_list &events = wait_list(), - cl_event * clevent = NULL) + event * event_ = NULL) { BOOST_STATIC_ASSERT(N > 0); enqueue_nd_range_kernel( @@ -1148,7 +1148,7 @@ class command_queue global_work_size.data(), (local_work_size[0] == 0) ? NULL : local_work_size.data(), events, - clevent + event_ ); } @@ -1170,7 +1170,7 @@ class command_queue global_work_size, local_work_size, events, - &event_.get()); + &event_); return event_; } @@ -1204,7 +1204,7 @@ class command_queue size_t global_work_size, size_t local_work_size, const wait_list &events = wait_list(), - cl_event * clevent = NULL) + event * event_ = NULL) { enqueue_nd_range_kernel( kernel, @@ -1213,7 +1213,7 @@ class command_queue &global_work_size, local_work_size ? &local_work_size : 0, events, - clevent + event_ ); } @@ -1229,7 +1229,7 @@ class command_queue global_work_size, local_work_size, events, - &event_.get()); + &event_); return event_; } @@ -1239,7 +1239,7 @@ class command_queue /// \see_opencl_ref{clEnqueueTask} void enqueue_task(const kernel &kernel, const wait_list &events = wait_list(), - cl_event * clevent = NULL) + event * event_ = NULL) { BOOST_ASSERT(m_queue != 0); BOOST_ASSERT(kernel.get_context() == this->get_context()); @@ -1254,14 +1254,14 @@ class command_queue size_t one = 1; ret = clEnqueueNDRangeKernel( m_queue, kernel, 1, 0, &one, &one, - events.size(), events.get_event_ptr(), clevent + events.size(), events.get_event_ptr(), event_ ? &event_->get() : NULL ); } else #endif { ret = clEnqueueTask( - m_queue, kernel, events.size(), events.get_event_ptr(), clevent + m_queue, kernel, events.size(), events.get_event_ptr(), event_ ? &event_->get() : NULL ); } @@ -1278,7 +1278,7 @@ class command_queue const cl_mem *mem_list, const void **args_mem_loc, const wait_list &events = wait_list(), - cl_event * clevent = NULL) + event * event_ = NULL) { BOOST_ASSERT(m_queue != 0); @@ -1292,7 +1292,7 @@ class command_queue args_mem_loc, events.size(), events.get_event_ptr(), - clevent + event_ ? &event_->get() : NULL ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); @@ -1303,7 +1303,7 @@ class command_queue /// native kernel on the host with a nullary function. void enqueue_native_kernel(void (BOOST_COMPUTE_CL_CALLBACK *user_func)(void), const wait_list &events = wait_list(), - cl_event * clevent = NULL) + event * event_ = NULL) { enqueue_native_kernel( detail::nullary_native_kernel_trampoline, @@ -1313,7 +1313,7 @@ class command_queue 0, 0, events, - clevent + event_ ); } @@ -1355,7 +1355,7 @@ class command_queue /// /// \opencl_version_warning{1,2} void enqueue_barrier(const wait_list &events, - cl_event * clevent = NULL) + event * event_ = NULL) { BOOST_ASSERT(m_queue != 0); @@ -1363,22 +1363,22 @@ class command_queue BOOST_THROW_EXCEPTION(opencl_error(CL_INVALID_DEVICE)); clEnqueueBarrierWithWaitList( - m_queue, events.size(), events.get_event_ptr(), clevent + m_queue, events.size(), events.get_event_ptr(), event_ ? &event_->get() : NULL ); } #endif // CL_VERSION_1_2 /// Enqueues a marker in the queue and returns an event that can be /// used to track its progress. - void enqueue_marker(cl_event * clevent) + void enqueue_marker(event * event_) { cl_int ret; #ifdef CL_VERSION_1_2 if (get_version() >= 120) - ret = clEnqueueMarkerWithWaitList(m_queue, 0, 0, clevent); + ret = clEnqueueMarkerWithWaitList(m_queue, 0, 0, event_ ? &event_->get() : NULL); else #endif - ret = clEnqueueMarker(m_queue, clevent); + ret = clEnqueueMarker(m_queue, event_ ? &event_->get() : NULL); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); @@ -1391,13 +1391,13 @@ class command_queue /// /// \opencl_version_warning{1,2} void enqueue_marker(const wait_list &events, - cl_event * clevent = NULL) + event * event_ = NULL) { if (get_version() < 120) BOOST_THROW_EXCEPTION(opencl_error(CL_INVALID_DEVICE)); cl_int ret = clEnqueueMarkerWithWaitList( - m_queue, events.size(), events.get_event_ptr(), clevent + m_queue, events.size(), events.get_event_ptr(), event_ ? &event_->get() : NULL ); if(ret != CL_SUCCESS){ @@ -1417,20 +1417,20 @@ class command_queue const void *src_ptr, size_t size, const wait_list &events = wait_list(), - cl_event * clevent = NULL) + event * event_ = NULL) { if (get_version() < 200) BOOST_THROW_EXCEPTION(opencl_error(CL_INVALID_DEVICE)); cl_int ret = clEnqueueSVMMemcpy( m_queue, - clevent ? CL_FALSE : CL_TRUE, + event_ ? CL_FALSE : CL_TRUE, dst_ptr, src_ptr, size, events.size(), events.get_event_ptr(), - clevent + event_ ? &event_->get() : NULL ); if(ret != CL_SUCCESS){ @@ -1455,7 +1455,7 @@ class command_queue src_ptr, size, events, - &event_.get()); + &event_); return event_; } @@ -1471,7 +1471,7 @@ class command_queue size_t pattern_size, size_t size, const wait_list &events = wait_list(), - cl_event * clevent = NULL) + event * event_ = NULL) { if (get_version() < 200) @@ -1485,7 +1485,7 @@ class command_queue size, events.size(), events.get_event_ptr(), - clevent + event_ ? &event_->get() : NULL ); if(ret != CL_SUCCESS){ @@ -1502,7 +1502,7 @@ class command_queue /// \see svm_free() void enqueue_svm_free(void *svm_ptr, const wait_list &events = wait_list(), - cl_event * clevent = NULL) + event * event_ = NULL) { if (get_version() < 200) BOOST_THROW_EXCEPTION(opencl_error(CL_INVALID_DEVICE)); @@ -1515,7 +1515,7 @@ class command_queue 0, events.size(), events.get_event_ptr(), - clevent + event_ ? &event_->get() : NULL ); if(ret != CL_SUCCESS){ @@ -1532,20 +1532,20 @@ class command_queue size_t size, cl_map_flags flags, const wait_list &events = wait_list(), - cl_event * clevent = NULL) + event * event_ = NULL) { if (get_version() < 200) BOOST_THROW_EXCEPTION(opencl_error(CL_INVALID_DEVICE)); cl_int ret = clEnqueueSVMMap( m_queue, - clevent ? CL_FALSE : CL_TRUE, + event_ ? CL_FALSE : CL_TRUE, flags, svm_ptr, size, events.size(), events.get_event_ptr(), - clevent + event_ ? &event_->get() : NULL ); if(ret != CL_SUCCESS){ @@ -1560,7 +1560,7 @@ class command_queue /// \see_opencl2_ref{clEnqueueSVMUnmap} void enqueue_svm_unmap(void *svm_ptr, const wait_list &events = wait_list(), - cl_event * clevent = NULL) + event * event_ = NULL) { if (get_version() < 200) BOOST_THROW_EXCEPTION(opencl_error(CL_INVALID_DEVICE)); @@ -1570,7 +1570,7 @@ class command_queue svm_ptr, events.size(), events.get_event_ptr(), - clevent + event_ ? &event_->get() : NULL ); if(ret != CL_SUCCESS){ From d3947001a6618b6b53b34d236970c5ead908b4b3 Mon Sep 17 00:00:00 2001 From: David Jurado Date: Mon, 6 Apr 2015 00:03:03 +0200 Subject: [PATCH 23/39] * Avoid the use of min max macros in Windows --- include/boost/compute/algorithm/accumulate.hpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/boost/compute/algorithm/accumulate.hpp b/include/boost/compute/algorithm/accumulate.hpp index 38d47c5cd..652ae02bb 100644 --- a/include/boost/compute/algorithm/accumulate.hpp +++ b/include/boost/compute/algorithm/accumulate.hpp @@ -11,6 +11,10 @@ #ifndef BOOST_COMPUTE_ALGORITHM_ACCUMULATE_HPP #define BOOST_COMPUTE_ALGORITHM_ACCUMULATE_HPP +#ifdef _WIN32 +#define NOMINMAX +#endif + #include #include From 3ea3157189130cba5c6cd50ebd58c6a491a6ae22 Mon Sep 17 00:00:00 2001 From: David Jurado Date: Sat, 11 Apr 2015 14:58:54 +0200 Subject: [PATCH 24/39] * Use the execution_status enum * Declare const those functions than does not modify class members (even if is modified the refered data). * Empty constructor for user_event. --- include/boost/compute/event.hpp | 12 ++++++------ include/boost/compute/user_event.hpp | 7 ++++++- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/include/boost/compute/event.hpp b/include/boost/compute/event.hpp index d92b2e354..3b6a9e24b 100644 --- a/include/boost/compute/event.hpp +++ b/include/boost/compute/event.hpp @@ -170,9 +170,9 @@ class event } /// Returns the status of the event. - cl_int status() const + execution_status status() const { - return get_info(CL_EVENT_COMMAND_EXECUTION_STATUS); + return static_cast(get_info(CL_EVENT_COMMAND_EXECUTION_STATUS)); } /// Returns the command type for the event. @@ -210,7 +210,7 @@ class event /// Blocks until the actions corresponding to the event have /// completed. - void wait() + void wait() const { cl_int ret = clWaitForEvents(1, &m_event); if(ret != CL_SUCCESS){ @@ -230,7 +230,7 @@ class event cl_event event, cl_int status, void *user_data ), cl_int status = CL_COMPLETE, - void *user_data = 0) + void *user_data = 0) const { cl_int ret = clSetEventCallback(m_event, status, callback, user_data); if(ret != CL_SUCCESS){ @@ -246,7 +246,7 @@ class event /// /// \opencl_version_warning{1,1} template - void set_callback(Function callback, cl_int status = CL_COMPLETE) + void set_callback(Function callback, cl_int status = CL_COMPLETE) const { set_callback( event_callback_invoker, @@ -294,7 +294,7 @@ class event } /// \internal_ (deprecated) - cl_int get_status() const + execution_status get_status() const { return status(); } diff --git a/include/boost/compute/user_event.hpp b/include/boost/compute/user_event.hpp index f67fb73e4..f4489686f 100644 --- a/include/boost/compute/user_event.hpp +++ b/include/boost/compute/user_event.hpp @@ -27,6 +27,11 @@ namespace compute { class user_event : public event { public: + /// Creates a null user_event object. + user_event() : event() + { + } + /// Creates a new user-event object. /// /// \see_opencl_ref{clCreateUserEvent} @@ -72,7 +77,7 @@ class user_event : public event /// Sets the execution status for the user-event. /// /// \see_opencl_ref{clSetUserEventStatus} - void set_status(cl_int execution_status) + void set_status(cl_int execution_status) const { cl_int ret = clSetUserEventStatus(m_event, execution_status); if(ret != CL_SUCCESS){ From 6453d856b784ba73e4a95fa05b7ff81659797113 Mon Sep 17 00:00:00 2001 From: David Jurado Date: Sat, 11 Apr 2015 15:01:20 +0200 Subject: [PATCH 25/39] * Use const specifier --- include/boost/compute/command_queue.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/boost/compute/command_queue.hpp b/include/boost/compute/command_queue.hpp index 8e6151be1..f6715c21a 100644 --- a/include/boost/compute/command_queue.hpp +++ b/include/boost/compute/command_queue.hpp @@ -1015,7 +1015,7 @@ class command_queue /// \see_opencl_ref{clEnqueueFillImage} /// /// \opencl_version_warning{1,2} - void enqueue_fill_image(image_object& image, + void enqueue_fill_image(const image_object& image, const void *fill_color, const size_t *origin, const size_t *region, @@ -1046,7 +1046,7 @@ class command_queue /// \overload template - void enqueue_fill_image(image_object& image, + void enqueue_fill_image(const image_object& image, const void *fill_color, const extents origin, const extents region, From 07b5659abf553f08989a497597ce87ca087a2eef Mon Sep 17 00:00:00 2001 From: David Jurado Date: Sat, 11 Apr 2015 15:02:33 +0200 Subject: [PATCH 26/39] * Rename the variable future so it is not equal to the template future --- include/boost/compute/utility/wait_list.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/boost/compute/utility/wait_list.hpp b/include/boost/compute/utility/wait_list.hpp index ca45773d2..bfbfe7de3 100644 --- a/include/boost/compute/utility/wait_list.hpp +++ b/include/boost/compute/utility/wait_list.hpp @@ -128,9 +128,9 @@ class wait_list /// Inserts the event from \p future into the wait-list. template - void insert(const future &future) + void insert(const future &future_) { - insert(future.get_event()); + insert(future_.get_event()); } /// Blocks until all of the events in the wait-list have completed. From 8cf1a0fed0117c3548dc77b5d9ccecf8aa44cc54 Mon Sep 17 00:00:00 2001 From: David Jurado Date: Sat, 11 Apr 2015 15:03:26 +0200 Subject: [PATCH 27/39] * Addede two utility functions: channel_order get_channel_order() channel_data_type get_channel_data_type() const --- include/boost/compute/image/image_format.hpp | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/include/boost/compute/image/image_format.hpp b/include/boost/compute/image/image_format.hpp index bd7ab0ad8..1e1801685 100644 --- a/include/boost/compute/image/image_format.hpp +++ b/include/boost/compute/image/image_format.hpp @@ -110,6 +110,18 @@ class image_format return &m_format; } + /// Returns the imgage channel order. + channel_order get_channel_order() const + { + return static_cast(m_format.image_channel_order); + } + + /// Returns the imgage channel data type. + channel_data_type get_channel_data_type() const + { + return static_cast(m_format.image_channel_data_type); + } + /// Returns \c true if \c *this is the same as \p other. bool operator==(const image_format &other) const { From 2a099056be8dd6d4017052927921015f03930f3e Mon Sep 17 00:00:00 2001 From: David Jurado Date: Tue, 14 Apr 2015 23:12:56 +0200 Subject: [PATCH 28/39] get_type for not colliding with 'type' enum symbol --- include/boost/compute/device.hpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/include/boost/compute/device.hpp b/include/boost/compute/device.hpp index 0d8604de5..e2ba757a8 100644 --- a/include/boost/compute/device.hpp +++ b/include/boost/compute/device.hpp @@ -45,9 +45,12 @@ class device { public: enum type { + default_type = CL_DEVICE_TYPE_DEFAULT, cpu = CL_DEVICE_TYPE_CPU, gpu = CL_DEVICE_TYPE_GPU, - accelerator = CL_DEVICE_TYPE_ACCELERATOR + accelerator = CL_DEVICE_TYPE_ACCELERATOR, + custom = CL_DEVICE_TYPE_CUSTOM, + all = CL_DEVICE_TYPE_ALL }; /// Creates a null device object. @@ -156,7 +159,7 @@ class device } /// Returns the type of the device. - cl_device_type type() const + cl_device_type get_type() const { return get_info(CL_DEVICE_TYPE); } From 8103bd33c80a96f0c2ffbcc784b81face159ed4c Mon Sep 17 00:00:00 2001 From: David Jurado Date: Tue, 14 Apr 2015 23:14:02 +0200 Subject: [PATCH 29/39] find_device() improvements --- include/boost/compute/system.hpp | 38 ++++++++++++++------------------ 1 file changed, 16 insertions(+), 22 deletions(-) diff --git a/include/boost/compute/system.hpp b/include/boost/compute/system.hpp index 313e9ef9f..211675742 100644 --- a/include/boost/compute/system.hpp +++ b/include/boost/compute/system.hpp @@ -80,29 +80,23 @@ class system return default_device; } - /// Returns the device with \p name. - /// - /// \throws no_device_found if no device with \p name is found. - static device find_device(const std::string &name) - { - BOOST_FOREACH(const device &device, devices()){ - if(device.name() == name){ - return device; - } - } - - BOOST_THROW_EXCEPTION(no_device_found()); - } - - /// Returns the device with \p name contained in its name and + /// Returns the device with \p device_name contained in its name + /// with \p platform_name contained in its platform name + /// with a matching \p device_type and /// a minimun version of \p min_version (eg. 1.1 is 101, 1.2 is 102, 2.0 is 200). /// /// \throws no_device_found if no device with \p name is found. - static device find_device_name(const std::string &name, uint_ min_version = 100) + static device find_device(const std::string &device_name = std::string(), + const std::string &platform_name = std::string(), + device::type device_type = device::all, + uint_ min_version = 100) { BOOST_FOREACH(const device &device, devices()){ - if(device.name().find(name.c_str()) != std::string::npos - && device.get_version() >= min_version){ + std::string platform_name_ = device.platform().name(); + if((platform_name.empty() || platform_name_.find(platform_name.c_str()) != std::string::npos) + && (device_name.empty() || device.name().find(device_name.c_str()) != std::string::npos) + && (device.get_type() & device_type) + && device.get_version() >= min_version){ return device; } } @@ -235,11 +229,11 @@ class system continue; if (type && matches(std::string("GPU"), type)) - if (device.type() != device::gpu) + if (device.get_type() != device::gpu) continue; if (type && matches(std::string("CPU"), type)) - if (device.type() != device::cpu) + if (device.get_type() != device::cpu) continue; if (platform && !matches(device.platform().name(), platform)) @@ -254,14 +248,14 @@ class system // find the first gpu device BOOST_FOREACH(const device &device, devices_){ - if(device.type() == device::gpu){ + if(device.get_type() == device::gpu){ return device; } } // find the first cpu device BOOST_FOREACH(const device &device, devices_){ - if(device.type() == device::cpu){ + if(device.get_type() == device::cpu){ return device; } } From 432665c47a90bbb1d4c1a81088e6e690f6fcedcd Mon Sep 17 00:00:00 2001 From: David Jurado Date: Tue, 14 Apr 2015 23:15:33 +0200 Subject: [PATCH 30/39] swap() facility --- include/boost/compute/memory_object.hpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/include/boost/compute/memory_object.hpp b/include/boost/compute/memory_object.hpp index 163c9ffd1..82ca4086d 100644 --- a/include/boost/compute/memory_object.hpp +++ b/include/boost/compute/memory_object.hpp @@ -151,6 +151,15 @@ class memory_object return m_mem != other.m_mem; } + /// Swaps \c this memory object with the \p other + memory_object& swap(memory_object &other) + { + cl_mem temp = m_mem; + m_mem = other.m_mem; + other.m_mem = temp; + return *this; + } + private: #ifdef CL_VERSION_1_1 /// \internal_ From 64992da81b6b11538b39edfec333bf2823e56510 Mon Sep 17 00:00:00 2001 From: David Jurado Date: Tue, 14 Apr 2015 23:16:44 +0200 Subject: [PATCH 31/39] enqueue_fill_image() fallback for pre 1.2 devices --- include/boost/compute/command_queue.hpp | 134 ++++++++++++++++++++---- 1 file changed, 116 insertions(+), 18 deletions(-) diff --git a/include/boost/compute/command_queue.hpp b/include/boost/compute/command_queue.hpp index f6715c21a..7ac7b6932 100644 --- a/include/boost/compute/command_queue.hpp +++ b/include/boost/compute/command_queue.hpp @@ -1009,6 +1009,94 @@ class command_queue } } + /// The function specified by \p walk_elemets must be invokable with arguments + /// (void *pElem, size_t x, size_t y, size_t z), + /// like std::function. + template + void enqueue_walk_image(const image_object& image, + Function walk_elemets, + cl_map_flags flags, + const size_t *origin, + const size_t *region, + const wait_list &events = wait_list(), + event * pevent = NULL) + { + BOOST_ASSERT(m_queue != 0); + BOOST_ASSERT(image.get_context() == this->get_context()); + + size_t row_pitch = 0; + size_t slice_pitch = 0; + compute::event map_event, *pmap_event = NULL; + compute::user_event user_event; + compute::wait_list unmap_wait; + + if (pevent) { + // Async exec + user_event = compute::user_event(get_context()); + unmap_wait.insert(user_event); + pmap_event = &map_event; + } + + char * const pImage3D = reinterpret_cast( + enqueue_map_image( + image, + flags, + origin, + region, + &row_pitch, + &slice_pitch, + events, + pmap_event) + ); + + size_t element_size = image.get_image_info(CL_IMAGE_ELEMENT_SIZE); + + auto func = [=]() + { + // Walks all the image elements + char * pImage2D = pImage3D; + for(size_t d = origin[2]; d < region[2]; ++d) { + char * pImage1D = pImage2D; + for(size_t h = origin[1]; h < region[1]; ++h) { + char *pElem = pImage1D; + for(size_t w = origin[0]; w < region[0]; ++w) { + walk_elemets((void *)pElem, w, h, d); + pElem += element_size; + } + pImage1D += row_pitch; + } + pImage2D += slice_pitch; + } + if(pevent) { + user_event.set_status(compute::event::complete); + } + }; + + if (pevent) { + // Async exec + pmap_event->set_callback(func); + } else { + func(); + } + enqueue_unmap_buffer(image, pImage3D, unmap_wait, pevent); + } + + /// Enqueues a command to fill \p image with \p fill_color. + void enqueue_fill_image_walking(const image_object& image, + const void *fill_color, + const size_t *origin, + const size_t *region, + const wait_list &events = wait_list(), + event * event_ = NULL) + { + size_t element_size = image.get_image_info(CL_IMAGE_ELEMENT_SIZE); + enqueue_walk_image(image, [=](void *pelem, size_t, size_t, size_t) + { + std::copy_n(static_cast(fill_color), element_size, static_cast(pelem)); + }, + compute::command_queue::map_write, origin, region, events, event_); + } + #if defined(CL_VERSION_1_2) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) /// Enqueues a command to fill \p image with \p fill_color. /// @@ -1022,25 +1110,35 @@ class command_queue const wait_list &events = wait_list(), event * event_ = NULL) { - BOOST_ASSERT(m_queue != 0); - BOOST_ASSERT(image.get_context() == this->get_context()); - if (get_version() < 120) - BOOST_THROW_EXCEPTION(opencl_error(CL_INVALID_DEVICE)); - - cl_int ret = clEnqueueFillImage( - m_queue, - image.get(), - fill_color, - origin, - region, - events.size(), - events.get_event_ptr(), - event_ ? &event_->get() : NULL - ); - - if(ret != CL_SUCCESS){ - BOOST_THROW_EXCEPTION(opencl_error(ret)); + { + // fallback + enqueue_fill_image_walking(image, + fill_color, + origin, + region, + events, + event_); + } + else + { + BOOST_ASSERT(m_queue != 0); + BOOST_ASSERT(image.get_context() == this->get_context()); + + cl_int ret = clEnqueueFillImage( + m_queue, + image.get(), + fill_color, + origin, + region, + events.size(), + events.get_event_ptr(), + event_ ? &event_->get() : NULL + ); + + if(ret != CL_SUCCESS){ + BOOST_THROW_EXCEPTION(opencl_error(ret)); + } } } From b3ce7b7c9fc6307949eaf5734611393b7a09baa9 Mon Sep 17 00:00:00 2001 From: David Jurado Date: Wed, 15 Apr 2015 18:51:22 +0200 Subject: [PATCH 32/39] * Optional use of CL_DEVICE_TYPE_CUSTOM --- include/boost/compute/device.hpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/boost/compute/device.hpp b/include/boost/compute/device.hpp index e2ba757a8..eac72baad 100644 --- a/include/boost/compute/device.hpp +++ b/include/boost/compute/device.hpp @@ -49,7 +49,9 @@ class device cpu = CL_DEVICE_TYPE_CPU, gpu = CL_DEVICE_TYPE_GPU, accelerator = CL_DEVICE_TYPE_ACCELERATOR, +#ifdef CL_DEVICE_TYPE_CUSTOM custom = CL_DEVICE_TYPE_CUSTOM, +#endif all = CL_DEVICE_TYPE_ALL }; From 0d2fb59fbc687e19677dc6a891a3b4826db298be Mon Sep 17 00:00:00 2001 From: David Jurado Date: Wed, 15 Apr 2015 19:00:18 +0200 Subject: [PATCH 33/39] * deleted compute:: --- include/boost/compute/command_queue.hpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/include/boost/compute/command_queue.hpp b/include/boost/compute/command_queue.hpp index 7ac7b6932..4f295da3f 100644 --- a/include/boost/compute/command_queue.hpp +++ b/include/boost/compute/command_queue.hpp @@ -1026,13 +1026,13 @@ class command_queue size_t row_pitch = 0; size_t slice_pitch = 0; - compute::event map_event, *pmap_event = NULL; - compute::user_event user_event; - compute::wait_list unmap_wait; + event map_event, *pmap_event = NULL; + user_event user_event; + wait_list unmap_wait; if (pevent) { // Async exec - user_event = compute::user_event(get_context()); + user_event = user_event(get_context()); unmap_wait.insert(user_event); pmap_event = &map_event; } @@ -1068,7 +1068,7 @@ class command_queue pImage2D += slice_pitch; } if(pevent) { - user_event.set_status(compute::event::complete); + user_event.set_status(event::complete); } }; @@ -1094,7 +1094,7 @@ class command_queue { std::copy_n(static_cast(fill_color), element_size, static_cast(pelem)); }, - compute::command_queue::map_write, origin, region, events, event_); + command_queue::map_write, origin, region, events, event_); } #if defined(CL_VERSION_1_2) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) From 0f0b8c1fb2dbdd5574314ed3d950e024077a0b18 Mon Sep 17 00:00:00 2001 From: David Jurado Date: Wed, 15 Apr 2015 22:44:52 +0200 Subject: [PATCH 34/39] * Pre c++11 adaptation. Warning: Code under development --- include/boost/compute/command_queue.hpp | 91 +++++++++++++++++++++---- 1 file changed, 79 insertions(+), 12 deletions(-) diff --git a/include/boost/compute/command_queue.hpp b/include/boost/compute/command_queue.hpp index 4f295da3f..d75b051ba 100644 --- a/include/boost/compute/command_queue.hpp +++ b/include/boost/compute/command_queue.hpp @@ -14,10 +14,12 @@ #include #include +#include #include #include #include +#include #include #include #include @@ -1027,13 +1029,15 @@ class command_queue size_t row_pitch = 0; size_t slice_pitch = 0; event map_event, *pmap_event = NULL; - user_event user_event; + user_event user_ev; wait_list unmap_wait; + size_t origin3[3] = { origin[0], origin[1], origin[2] }; + size_t region3[3] = { region[0], region[1], region[2] }; if (pevent) { // Async exec - user_event = user_event(get_context()); - unmap_wait.insert(user_event); + user_ev = user_event(get_context()); + unmap_wait.insert(user_ev); pmap_event = &map_event; } @@ -1051,15 +1055,68 @@ class command_queue size_t element_size = image.get_image_info(CL_IMAGE_ELEMENT_SIZE); +#ifdef BOOST_NO_CXX11_LAMBDAS + // Resolve the lambda syntax sugar + struct walk_image + { + Function m_walk_elemets; + char * m_pImage3D; + size_t m_origin3[3]; + size_t m_region3[3]; + size_t m_row_pitch; + size_t m_slice_pitch; + size_t m_element_size; + user_event m_user_ev; + walk_image(Function walk_elemets, + char * pImage3D, + const size_t *origin, + const size_t *region, + size_t row_pitch, + size_t slice_pitch, + size_t element_size, + user_event user_ev) : + m_walk_elemets(walk_elemets), + m_pImage3D(pImage3D), + m_row_pitch(row_pitch), + m_slice_pitch(slice_pitch), + m_element_size(element_size), + m_user_ev(user_ev) + { + std::copy_n(origin, 3, m_origin3); + std::copy_n(region, 3, m_region3); + } + void operator () () const + { + // Walks all the image elements + char * pImage2D = m_pImage3D; + for(size_t d = m_origin3[2]; d < m_region3[2]; ++d) { + char * pImage1D = pImage2D; + for(size_t h = m_origin3[1]; h < m_region3[1]; ++h) { + char *pElem = pImage1D; + for(size_t w = m_origin3[0]; w < m_region3[0]; ++w) { + m_walk_elemets((void *)pElem, w, h, d); + pElem += m_element_size; + } + pImage1D += m_row_pitch; + } + pImage2D += m_slice_pitch; + } + if(m_user_ev.get()) { + m_user_ev.set_status(event::complete); + } + } + }; + walk_image func(walk_elemets, pImage3D, origin3, region3, row_pitch, slice_pitch, element_size, user_ev); +#else auto func = [=]() { // Walks all the image elements char * pImage2D = pImage3D; - for(size_t d = origin[2]; d < region[2]; ++d) { + for(size_t d = origin3[2]; d < region3[2]; ++d) { char * pImage1D = pImage2D; - for(size_t h = origin[1]; h < region[1]; ++h) { + for(size_t h = origin3[1]; h < region3[1]; ++h) { char *pElem = pImage1D; - for(size_t w = origin[0]; w < region[0]; ++w) { + for(size_t w = origin3[0]; w < region3[0]; ++w) { walk_elemets((void *)pElem, w, h, d); pElem += element_size; } @@ -1068,10 +1125,10 @@ class command_queue pImage2D += slice_pitch; } if(pevent) { - user_event.set_status(event::complete); + user_ev.set_status(event::complete); } }; - +#endif if (pevent) { // Async exec pmap_event->set_callback(func); @@ -1090,11 +1147,21 @@ class command_queue event * event_ = NULL) { size_t element_size = image.get_image_info(CL_IMAGE_ELEMENT_SIZE); - enqueue_walk_image(image, [=](void *pelem, size_t, size_t, size_t) + struct fillc { - std::copy_n(static_cast(fill_color), element_size, static_cast(pelem)); - }, - command_queue::map_write, origin, region, events, event_); + size_t m_element_size; + char m_fill_color[16]; + fillc(size_t element_size, const void * fill_color) : m_element_size(element_size) + { + std::copy_n(static_cast(fill_color), 16, static_cast(m_fill_color)); + } + void operator () (void *pelem, size_t, size_t, size_t) const + { + // Bug: m_fill_color must be converted + std::copy_n(m_fill_color, m_element_size, static_cast(pelem)); + } + }; + enqueue_walk_image(image, fillc(element_size, fill_color), command_queue::map_write, origin, region, events, event_); } #if defined(CL_VERSION_1_2) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) From 30c69ed82c093422590fda7c4bd268cde3416f3d Mon Sep 17 00:00:00 2001 From: David Jurado Date: Mon, 20 Apr 2015 18:14:06 +0200 Subject: [PATCH 35/39] * Introduced a raw version of fill image --- include/boost/compute/command_queue.hpp | 37 ++++++++++++++++++------- 1 file changed, 27 insertions(+), 10 deletions(-) diff --git a/include/boost/compute/command_queue.hpp b/include/boost/compute/command_queue.hpp index d75b051ba..55eab7238 100644 --- a/include/boost/compute/command_queue.hpp +++ b/include/boost/compute/command_queue.hpp @@ -1031,8 +1031,8 @@ class command_queue event map_event, *pmap_event = NULL; user_event user_ev; wait_list unmap_wait; - size_t origin3[3] = { origin[0], origin[1], origin[2] }; - size_t region3[3] = { region[0], region[1], region[2] }; + extents<3> origin3 = { origin[0], origin[1], origin[2] }; + extents<3> region3 = { region[0], region[1], region[2] }; if (pevent) { // Async exec @@ -1139,7 +1139,7 @@ class command_queue } /// Enqueues a command to fill \p image with \p fill_color. - void enqueue_fill_image_walking(const image_object& image, + void enqueue_rawfill_image_walking(const image_object& image, const void *fill_color, const size_t *origin, const size_t *region, @@ -1164,6 +1164,29 @@ class command_queue enqueue_walk_image(image, fillc(element_size, fill_color), command_queue::map_write, origin, region, events, event_); } + /// \overload + template + void enqueue_rawfill_image_walking(const image_object& image, + const void *fill_color, + const extents origin, + const extents region, + const wait_list &events = wait_list(), + event * event_ = NULL) + { + BOOST_STATIC_ASSERT(N <= 3); + BOOST_ASSERT(image.get_context() == this->get_context()); + + size_t origin3[3] = { 0, 0, 0 }; + size_t region3[3] = { 1, 1, 1 }; + + std::copy(origin.data(), origin.data() + N, origin3); + std::copy(region.data(), region.data() + N, region3); + + enqueue_rawfill_image_walking( + image, fill_color, origin3, region3, events, event_ + ); + } + #if defined(CL_VERSION_1_2) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) /// Enqueues a command to fill \p image with \p fill_color. /// @@ -1179,13 +1202,7 @@ class command_queue { if (get_version() < 120) { - // fallback - enqueue_fill_image_walking(image, - fill_color, - origin, - region, - events, - event_); + BOOST_THROW_EXCEPTION(opencl_error(CL_INVALID_DEVICE)); } else { From 893248d231840c599167efbb9481de0c7b84358a Mon Sep 17 00:00:00 2001 From: David Jurado Date: Mon, 20 Apr 2015 23:16:55 +0200 Subject: [PATCH 36/39] * enqueue_walk_image Default args --- include/boost/compute/command_queue.hpp | 22 +++++++++++++++------- include/boost/compute/image/image1d.hpp | 3 +++ 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/include/boost/compute/command_queue.hpp b/include/boost/compute/command_queue.hpp index 55eab7238..2695cab26 100644 --- a/include/boost/compute/command_queue.hpp +++ b/include/boost/compute/command_queue.hpp @@ -1017,9 +1017,9 @@ class command_queue template void enqueue_walk_image(const image_object& image, Function walk_elemets, - cl_map_flags flags, - const size_t *origin, - const size_t *region, + cl_map_flags flags = compute::command_queue::map_read, + const size_t *origin = NULL, + const size_t *region = NULL, const wait_list &events = wait_list(), event * pevent = NULL) { @@ -1031,8 +1031,16 @@ class command_queue event map_event, *pmap_event = NULL; user_event user_ev; wait_list unmap_wait; - extents<3> origin3 = { origin[0], origin[1], origin[2] }; - extents<3> region3 = { region[0], region[1], region[2] }; + extents<3> origin3 = { 0, 0, 0 }; + extents<3> region3 = { image.width(), std::max((size_t)1, image.height()), std::max((size_t)1, image.depth()) }; + + if (origin) { + origin3[0] = origin[0]; origin3[1] = origin[1]; origin3[2] = origin[2]; + } + + if (region) { + region3[0] = region[0]; region3[1] = region[1]; region3[2] = region[2]; + } if (pevent) { // Async exec @@ -1045,8 +1053,8 @@ class command_queue enqueue_map_image( image, flags, - origin, - region, + origin3.data(), + region3.data(), &row_pitch, &slice_pitch, events, diff --git a/include/boost/compute/image/image1d.hpp b/include/boost/compute/image/image1d.hpp index 470f22850..04f745ea0 100644 --- a/include/boost/compute/image/image1d.hpp +++ b/include/boost/compute/image/image1d.hpp @@ -81,6 +81,7 @@ class image1d : public image_object } else #endif + (void) context; (void) image_width; (void) format; (void) flags; (void) host_ptr; // image1d objects are only supported in OpenCL 1.2 and later BOOST_THROW_EXCEPTION(opencl_error(CL_IMAGE_FORMAT_NOT_SUPPORTED)); } @@ -157,6 +158,7 @@ class image1d : public image_object #ifdef CL_VERSION_1_2 return image_object::get_supported_formats(context, CL_MEM_OBJECT_IMAGE1D, flags); #else + (void) context; (void) flags; return std::vector(); #endif } @@ -172,6 +174,7 @@ class image1d : public image_object format, context, CL_MEM_OBJECT_IMAGE1D, flags ); #else + (void) format; (void) context; (void) flags; return false; #endif } From 35709b3e7923613090f0fb7368d4bea60254fe3b Mon Sep 17 00:00:00 2001 From: David Jurado Date: Tue, 21 Apr 2015 12:58:59 +0200 Subject: [PATCH 37/39] Adapt 1 for old compilers --- include/boost/compute/command_queue.hpp | 27 ++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/include/boost/compute/command_queue.hpp b/include/boost/compute/command_queue.hpp index 2695cab26..4bfc0fd1d 100644 --- a/include/boost/compute/command_queue.hpp +++ b/include/boost/compute/command_queue.hpp @@ -1031,8 +1031,11 @@ class command_queue event map_event, *pmap_event = NULL; user_event user_ev; wait_list unmap_wait; - extents<3> origin3 = { 0, 0, 0 }; - extents<3> region3 = { image.width(), std::max((size_t)1, image.height()), std::max((size_t)1, image.depth()) }; + extents<3> origin3( 0 ); + extents<3> region3; + region3[0] = image.width(); + region3[1] = (size_t)std::max((size_t)1, image.height()); + region3[2] = (size_t)std::max((size_t)1, image.depth()); if (origin) { origin3[0] = origin[0]; origin3[1] = origin[1]; origin3[2] = origin[2]; @@ -1090,8 +1093,8 @@ class command_queue m_element_size(element_size), m_user_ev(user_ev) { - std::copy_n(origin, 3, m_origin3); - std::copy_n(region, 3, m_region3); + std::copy(origin, origin + 3, m_origin3); + std::copy(region, region + 3, m_region3); } void operator () () const { @@ -1159,14 +1162,24 @@ class command_queue { size_t m_element_size; char m_fill_color[16]; + fillc(const fillc & o) : m_element_size(o.m_element_size) + { + const char * origin = static_cast(o.m_fill_color); + std::copy(origin, origin + 16, static_cast(m_fill_color)); + } + fillc(size_t element_size, const void * fill_color) : m_element_size(element_size) { - std::copy_n(static_cast(fill_color), 16, static_cast(m_fill_color)); + const char * origin = static_cast(fill_color); + std::copy(origin, origin + 16, static_cast(m_fill_color)); } void operator () (void *pelem, size_t, size_t, size_t) const { - // Bug: m_fill_color must be converted - std::copy_n(m_fill_color, m_element_size, static_cast(pelem)); + std::copy(m_fill_color, m_fill_color + m_element_size, static_cast(pelem)); + } + void operator () (void *pelem, size_t, size_t, size_t) + { + std::copy(m_fill_color, m_fill_color + m_element_size, static_cast(pelem)); } }; enqueue_walk_image(image, fillc(element_size, fill_color), command_queue::map_write, origin, region, events, event_); From ef865b03ca28e93f5288dcfb5e4bc4325c2a869d Mon Sep 17 00:00:00 2001 From: David Jurado Date: Tue, 21 Apr 2015 14:43:35 +0200 Subject: [PATCH 38/39] Adapt 2 for old compilers --- include/boost/compute/command_queue.hpp | 160 +++++++++++++----------- 1 file changed, 85 insertions(+), 75 deletions(-) diff --git a/include/boost/compute/command_queue.hpp b/include/boost/compute/command_queue.hpp index 4bfc0fd1d..57b4de858 100644 --- a/include/boost/compute/command_queue.hpp +++ b/include/boost/compute/command_queue.hpp @@ -1011,6 +1011,62 @@ class command_queue } } + +#ifdef BOOST_NO_CXX11_LAMBDAS + // Resolve the lambda syntax sugar + template + struct walk_image + { + Function m_walk_elemets; + char * m_pImage3D; + size_t m_origin3[3]; + size_t m_region3[3]; + size_t m_row_pitch; + size_t m_slice_pitch; + size_t m_element_size; + user_event m_user_ev; + walk_image(Function walk_elemets, + char * pImage3D, + const size_t *origin, + const size_t *region, + size_t row_pitch, + size_t slice_pitch, + size_t element_size, + user_event user_ev) : + m_walk_elemets(walk_elemets), + m_pImage3D(pImage3D), + m_row_pitch(row_pitch), + m_slice_pitch(slice_pitch), + m_element_size(element_size), + m_user_ev(user_ev) + { + std::copy(origin, origin + 3, m_origin3); + std::copy(region, region + 3, m_region3); + } + + void operator () () const + { + // Walks all the image elements + char * pImage2D = m_pImage3D; + for(size_t d = m_origin3[2]; d < m_region3[2]; ++d) { + char * pImage1D = pImage2D; + for(size_t h = m_origin3[1]; h < m_region3[1]; ++h) { + char *pElem = pImage1D; + for(size_t w = m_origin3[0]; w < m_region3[0]; ++w) { + m_walk_elemets((void *)pElem, w, h, d); + pElem += m_element_size; + } + pImage1D += m_row_pitch; + } + pImage2D += m_slice_pitch; + } + if(m_user_ev.get()) { + m_user_ev.set_status(event::complete); + } + } + }; +#endif + /// The function specified by \p walk_elemets must be invokable with arguments /// (void *pElem, size_t x, size_t y, size_t z), /// like std::function. @@ -1067,57 +1123,7 @@ class command_queue size_t element_size = image.get_image_info(CL_IMAGE_ELEMENT_SIZE); #ifdef BOOST_NO_CXX11_LAMBDAS - // Resolve the lambda syntax sugar - struct walk_image - { - Function m_walk_elemets; - char * m_pImage3D; - size_t m_origin3[3]; - size_t m_region3[3]; - size_t m_row_pitch; - size_t m_slice_pitch; - size_t m_element_size; - user_event m_user_ev; - walk_image(Function walk_elemets, - char * pImage3D, - const size_t *origin, - const size_t *region, - size_t row_pitch, - size_t slice_pitch, - size_t element_size, - user_event user_ev) : - m_walk_elemets(walk_elemets), - m_pImage3D(pImage3D), - m_row_pitch(row_pitch), - m_slice_pitch(slice_pitch), - m_element_size(element_size), - m_user_ev(user_ev) - { - std::copy(origin, origin + 3, m_origin3); - std::copy(region, region + 3, m_region3); - } - void operator () () const - { - // Walks all the image elements - char * pImage2D = m_pImage3D; - for(size_t d = m_origin3[2]; d < m_region3[2]; ++d) { - char * pImage1D = pImage2D; - for(size_t h = m_origin3[1]; h < m_region3[1]; ++h) { - char *pElem = pImage1D; - for(size_t w = m_origin3[0]; w < m_region3[0]; ++w) { - m_walk_elemets((void *)pElem, w, h, d); - pElem += m_element_size; - } - pImage1D += m_row_pitch; - } - pImage2D += m_slice_pitch; - } - if(m_user_ev.get()) { - m_user_ev.set_status(event::complete); - } - } - }; - walk_image func(walk_elemets, pImage3D, origin3, region3, row_pitch, slice_pitch, element_size, user_ev); + walk_image func(walk_elemets, pImage3D, origin3.data(), region3.data(), row_pitch, slice_pitch, element_size, user_ev); #else auto func = [=]() { @@ -1149,6 +1155,31 @@ class command_queue enqueue_unmap_buffer(image, pImage3D, unmap_wait, pevent); } + struct fillc + { + size_t m_element_size; + char m_fill_color[16]; + fillc(const fillc & o) : m_element_size(o.m_element_size) + { + const char * origin = static_cast(o.m_fill_color); + std::copy(origin, origin + 16, static_cast(m_fill_color)); + } + + fillc(size_t element_size, const void * fill_color) : m_element_size(element_size) + { + const char * origin = static_cast(fill_color); + std::copy(origin, origin + 16, static_cast(m_fill_color)); + } + void operator () (void *pelem, size_t, size_t, size_t) const + { + std::copy(m_fill_color, m_fill_color + m_element_size, static_cast(pelem)); + } + void operator () (void *pelem, size_t, size_t, size_t) + { + std::copy(m_fill_color, m_fill_color + m_element_size, static_cast(pelem)); + } + }; + /// Enqueues a command to fill \p image with \p fill_color. void enqueue_rawfill_image_walking(const image_object& image, const void *fill_color, @@ -1158,31 +1189,10 @@ class command_queue event * event_ = NULL) { size_t element_size = image.get_image_info(CL_IMAGE_ELEMENT_SIZE); - struct fillc - { - size_t m_element_size; - char m_fill_color[16]; - fillc(const fillc & o) : m_element_size(o.m_element_size) - { - const char * origin = static_cast(o.m_fill_color); - std::copy(origin, origin + 16, static_cast(m_fill_color)); - } - fillc(size_t element_size, const void * fill_color) : m_element_size(element_size) - { - const char * origin = static_cast(fill_color); - std::copy(origin, origin + 16, static_cast(m_fill_color)); - } - void operator () (void *pelem, size_t, size_t, size_t) const - { - std::copy(m_fill_color, m_fill_color + m_element_size, static_cast(pelem)); - } - void operator () (void *pelem, size_t, size_t, size_t) - { - std::copy(m_fill_color, m_fill_color + m_element_size, static_cast(pelem)); - } - }; - enqueue_walk_image(image, fillc(element_size, fill_color), command_queue::map_write, origin, region, events, event_); + fillc f(element_size, fill_color); + + enqueue_walk_image(image, f, command_queue::map_write, origin, region, events, event_); } /// \overload From a8947bdd50ab6f5ebcff02bb9ebee2740fc664a5 Mon Sep 17 00:00:00 2001 From: David Jurado Date: Tue, 21 Apr 2015 21:18:47 +0200 Subject: [PATCH 39/39] * Added BOOST_COMPUTE_USE_CPP11 * changed enum device_type name --- include/boost/compute/command_queue.hpp | 4 ++-- include/boost/compute/device.hpp | 4 ++-- include/boost/compute/system.hpp | 12 ++++++------ 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/include/boost/compute/command_queue.hpp b/include/boost/compute/command_queue.hpp index 57b4de858..deea331ba 100644 --- a/include/boost/compute/command_queue.hpp +++ b/include/boost/compute/command_queue.hpp @@ -1012,7 +1012,7 @@ class command_queue } -#ifdef BOOST_NO_CXX11_LAMBDAS +#if defined(BOOST_NO_CXX11_LAMBDAS) || !defined(BOOST_COMPUTE_USE_CPP11) // Resolve the lambda syntax sugar template struct walk_image @@ -1122,7 +1122,7 @@ class command_queue size_t element_size = image.get_image_info(CL_IMAGE_ELEMENT_SIZE); -#ifdef BOOST_NO_CXX11_LAMBDAS +#if defined(BOOST_NO_CXX11_LAMBDAS) || !defined(BOOST_COMPUTE_USE_CPP11) walk_image func(walk_elemets, pImage3D, origin3.data(), region3.data(), row_pitch, slice_pitch, element_size, user_ev); #else auto func = [=]() diff --git a/include/boost/compute/device.hpp b/include/boost/compute/device.hpp index eac72baad..a67f5d90d 100644 --- a/include/boost/compute/device.hpp +++ b/include/boost/compute/device.hpp @@ -44,7 +44,7 @@ class platform; class device { public: - enum type { + enum device_type { default_type = CL_DEVICE_TYPE_DEFAULT, cpu = CL_DEVICE_TYPE_CPU, gpu = CL_DEVICE_TYPE_GPU, @@ -161,7 +161,7 @@ class device } /// Returns the type of the device. - cl_device_type get_type() const + cl_device_type type() const { return get_info(CL_DEVICE_TYPE); } diff --git a/include/boost/compute/system.hpp b/include/boost/compute/system.hpp index 211675742..d08c48a9d 100644 --- a/include/boost/compute/system.hpp +++ b/include/boost/compute/system.hpp @@ -88,14 +88,14 @@ class system /// \throws no_device_found if no device with \p name is found. static device find_device(const std::string &device_name = std::string(), const std::string &platform_name = std::string(), - device::type device_type = device::all, + device::device_type device_type = device::all, uint_ min_version = 100) { BOOST_FOREACH(const device &device, devices()){ std::string platform_name_ = device.platform().name(); if((platform_name.empty() || platform_name_.find(platform_name.c_str()) != std::string::npos) && (device_name.empty() || device.name().find(device_name.c_str()) != std::string::npos) - && (device.get_type() & device_type) + && (device.type() & device_type) && device.get_version() >= min_version){ return device; } @@ -229,11 +229,11 @@ class system continue; if (type && matches(std::string("GPU"), type)) - if (device.get_type() != device::gpu) + if (device.type() != device::gpu) continue; if (type && matches(std::string("CPU"), type)) - if (device.get_type() != device::cpu) + if (device.type() != device::cpu) continue; if (platform && !matches(device.platform().name(), platform)) @@ -248,14 +248,14 @@ class system // find the first gpu device BOOST_FOREACH(const device &device, devices_){ - if(device.get_type() == device::gpu){ + if(device.type() == device::gpu){ return device; } } // find the first cpu device BOOST_FOREACH(const device &device, devices_){ - if(device.get_type() == device::cpu){ + if(device.type() == device::cpu){ return device; } }