diff --git a/example/matrix_transpose.cpp b/example/matrix_transpose.cpp index da8d3ffa7..cff6ae24a 100644 --- a/example/matrix_transpose.cpp +++ b/example/matrix_transpose.cpp @@ -281,7 +281,7 @@ int main(int argc, char *argv[]) kernel.set_arg(1, d_output); compute::event start; - start = queue.enqueue_nd_range_kernel(kernel, 2, 0, global_work_size, local_work_size); + start = queue.enqueue_nd_range_kernel_async(kernel, 2, 0, global_work_size, local_work_size); queue.finish(); uint64_t elapsed = start.duration().count(); @@ -298,7 +298,7 @@ int main(int argc, char *argv[]) kernel.set_arg(0, d_input); kernel.set_arg(1, d_output); - start = queue.enqueue_nd_range_kernel(kernel, 2, 0, global_work_size, local_work_size); + start = queue.enqueue_nd_range_kernel_async(kernel, 2, 0, global_work_size, local_work_size); queue.finish(); elapsed = start.duration().count(); std::cout << " Elapsed: " << elapsed << " ns" << std::endl; @@ -314,7 +314,7 @@ int main(int argc, char *argv[]) kernel.set_arg(0, d_input); kernel.set_arg(1, d_output); - start = queue.enqueue_nd_range_kernel(kernel, 2, 0, global_work_size, local_work_size); + start = queue.enqueue_nd_range_kernel_async(kernel, 2, 0, global_work_size, local_work_size); queue.finish(); elapsed = start.duration().count(); std::cout << " Elapsed: " << elapsed << " ns" << std::endl; @@ -332,7 +332,7 @@ int main(int argc, char *argv[]) kernel.set_arg(0, d_input); kernel.set_arg(1, d_output); - start = queue.enqueue_nd_range_kernel(kernel, 2, 0, global_work_size, local_work_size); + start = queue.enqueue_nd_range_kernel_async(kernel, 2, 0, global_work_size, local_work_size); queue.finish(); elapsed = start.duration().count(); std::cout << " Elapsed: " << elapsed << " ns" << std::endl; diff --git a/example/nbody.cpp b/example/nbody.cpp index 76c720d1f..0b113f892 100644 --- a/example/nbody.cpp +++ b/example/nbody.cpp @@ -10,14 +10,9 @@ #include -#define GL_GLEXT_PROTOTYPES -#ifdef __APPLE__ -#include -#include -#else -#include -#include -#endif +//#define GL_GLEXT_PROTOTYPES +#include +#include #include #if QT_VERSION >= 0x050000 @@ -193,8 +188,8 @@ void NBodyWidget::updateParticles() { // enqueue kernels to update particles and make sure that the command queue is finished compute::opengl_enqueue_acquire_buffer(m_position, m_queue); - m_queue.enqueue_1d_range_kernel(m_velocity_kernel, 0, m_particles, 0).wait(); - m_queue.enqueue_1d_range_kernel(m_position_kernel, 0, m_particles, 0).wait(); + m_queue.enqueue_1d_range_kernel_async(m_velocity_kernel, 0, m_particles, 0).wait(); + m_queue.enqueue_1d_range_kernel_async(m_position_kernel, 0, m_particles, 0).wait(); m_queue.finish(); compute::opengl_enqueue_release_buffer(m_position, m_queue); } diff --git a/include/boost/compute/algorithm/accumulate.hpp b/include/boost/compute/algorithm/accumulate.hpp index 38d47c5cd..652ae02bb 100644 --- a/include/boost/compute/algorithm/accumulate.hpp +++ b/include/boost/compute/algorithm/accumulate.hpp @@ -11,6 +11,10 @@ #ifndef BOOST_COMPUTE_ALGORITHM_ACCUMULATE_HPP #define BOOST_COMPUTE_ALGORITHM_ACCUMULATE_HPP +#ifdef _WIN32 +#define NOMINMAX +#endif + #include #include diff --git a/include/boost/compute/algorithm/copy.hpp b/include/boost/compute/algorithm/copy.hpp index 3bb65af5f..0a3cd8bb3 100644 --- a/include/boost/compute/algorithm/copy.hpp +++ b/include/boost/compute/algorithm/copy.hpp @@ -250,7 +250,7 @@ dispatch_copy_async(InputIterator first, } event event_ = - queue.enqueue_copy_buffer( + queue.enqueue_copy_buffer_async( first.get_buffer(), result.get_buffer(), first.get_index() * sizeof(value_type), diff --git a/include/boost/compute/algorithm/fill.hpp b/include/boost/compute/algorithm/fill.hpp index 481ec4785..7c3aac3e2 100644 --- a/include/boost/compute/algorithm/fill.hpp +++ b/include/boost/compute/algorithm/fill.hpp @@ -163,11 +163,11 @@ dispatch_fill_async(BufferIterator first, size_t offset = static_cast(first.get_index()); event event_ = - queue.enqueue_fill_buffer(first.get_buffer(), - &pattern, - sizeof(value_type), - offset * sizeof(value_type), - count * sizeof(value_type)); + queue.enqueue_fill_buffer_async(first.get_buffer(), + &pattern, + sizeof(value_type), + offset * sizeof(value_type), + count * sizeof(value_type)); return future(event_); } diff --git a/include/boost/compute/command_queue.hpp b/include/boost/compute/command_queue.hpp index 433156425..deea331ba 100644 --- a/include/boost/compute/command_queue.hpp +++ b/include/boost/compute/command_queue.hpp @@ -14,10 +14,12 @@ #include #include +#include #include #include #include +#include #include #include #include @@ -93,12 +95,12 @@ class command_queue /// Creates a null command queue. command_queue() - : m_queue(0) + : m_queue(0), m_version(0) { } explicit command_queue(cl_command_queue queue, bool retain = true) - : m_queue(queue) + : m_queue(queue), m_version(0) { if(m_queue && retain){ clRetainCommandQueue(m_queue); @@ -116,26 +118,32 @@ class command_queue BOOST_ASSERT(device.id() != 0); cl_int error = 0; + m_version = device.get_version(); #ifdef CL_VERSION_2_0 - std::vector queue_properties; - if(properties){ - queue_properties.push_back(CL_QUEUE_PROPERTIES); - queue_properties.push_back(cl_queue_properties(properties)); - queue_properties.push_back(cl_queue_properties(0)); - } + if (get_version() >= 200) + { + std::vector queue_properties; + if(properties){ + queue_properties.push_back(CL_QUEUE_PROPERTIES); + queue_properties.push_back(cl_queue_properties(properties)); + queue_properties.push_back(cl_queue_properties(0)); + } - const cl_queue_properties *queue_properties_ptr = - queue_properties.empty() ? 0 : &queue_properties[0]; + const cl_queue_properties *queue_properties_ptr = + queue_properties.empty() ? 0 : &queue_properties[0]; - m_queue = clCreateCommandQueueWithProperties( - context, device.id(), queue_properties_ptr, &error - ); - #else - m_queue = clCreateCommandQueue( - context, device.id(), properties, &error - ); + m_queue = clCreateCommandQueueWithProperties( + context, device.id(), queue_properties_ptr, &error + ); + } + else #endif + { + m_queue = clCreateCommandQueue( + context, device.id(), properties, &error + ); + } if(!m_queue){ BOOST_THROW_EXCEPTION(opencl_error(error)); @@ -144,7 +152,7 @@ class command_queue /// Creates a new command queue object as a copy of \p other. command_queue(const command_queue &other) - : m_queue(other.m_queue) + : m_queue(other.m_queue), m_version(other.m_version) { if(m_queue){ clRetainCommandQueue(m_queue); @@ -160,6 +168,7 @@ class command_queue } m_queue = other.m_queue; + m_version = other.m_version; if(m_queue){ clRetainCommandQueue(m_queue); @@ -172,9 +181,10 @@ class command_queue #ifndef BOOST_COMPUTE_NO_RVALUE_REFERENCES /// Move-constructs a new command queue object from \p other. command_queue(command_queue&& other) BOOST_NOEXCEPT - : m_queue(other.m_queue) + : m_queue(other.m_queue), m_version(other.m_version) { other.m_queue = 0; + other.m_version = 0; } /// Move-assigns the command queue from \p other to \c *this. @@ -185,6 +195,7 @@ class command_queue } m_queue = other.m_queue; + m_version = other.m_version; other.m_queue = 0; return *this; @@ -221,6 +232,14 @@ class command_queue return context(get_info(CL_QUEUE_CONTEXT)); } + /// Returns the numeric version: major * 100 + minor. + uint_ get_version() const + { + if (m_version == 0) + m_version = get_device().get_version(); // The version of the first device + return m_version; + } + /// Returns information about the command queue. /// /// \see_opencl_ref{clGetCommandQueueInfo} @@ -250,7 +269,8 @@ class command_queue size_t offset, size_t size, void *host_ptr, - const wait_list &events = wait_list()) + const wait_list &events = wait_list(), + event * event_ = NULL) { BOOST_ASSERT(m_queue != 0); BOOST_ASSERT(size <= buffer.size()); @@ -260,13 +280,13 @@ class command_queue cl_int ret = clEnqueueReadBuffer( m_queue, buffer.get(), - CL_TRUE, + event_ ? CL_FALSE : CL_TRUE, offset, size, host_ptr, events.size(), events.get_event_ptr(), - 0 + event_ ? &event_->get() : NULL ); if(ret != CL_SUCCESS){ @@ -286,28 +306,14 @@ class command_queue void *host_ptr, const wait_list &events = wait_list()) { - BOOST_ASSERT(m_queue != 0); - BOOST_ASSERT(size <= buffer.size()); - BOOST_ASSERT(buffer.get_context() == this->get_context()); - BOOST_ASSERT(host_ptr != 0); - event event_; - cl_int ret = clEnqueueReadBuffer( - m_queue, - buffer.get(), - CL_FALSE, - offset, - size, - host_ptr, - events.size(), - events.get_event_ptr(), - &event_.get() - ); - - if(ret != CL_SUCCESS){ - BOOST_THROW_EXCEPTION(opencl_error(ret)); - } + enqueue_read_buffer(buffer, + offset, + size, + host_ptr, + events, + &event_); return event_; } @@ -328,16 +334,20 @@ class command_queue size_t host_row_pitch, size_t host_slice_pitch, void *host_ptr, - const wait_list &events = wait_list()) + const wait_list &events = wait_list(), + event * event_ = NULL) { BOOST_ASSERT(m_queue != 0); BOOST_ASSERT(buffer.get_context() == this->get_context()); BOOST_ASSERT(host_ptr != 0); + if (get_version() < 110) + BOOST_THROW_EXCEPTION(opencl_error(CL_INVALID_DEVICE)); + cl_int ret = clEnqueueReadBufferRect( m_queue, buffer.get(), - CL_TRUE, + event_ ? CL_FALSE : CL_TRUE, buffer_origin, host_origin, region, @@ -348,7 +358,7 @@ class command_queue host_ptr, events.size(), events.get_event_ptr(), - 0 + event_ ? &event_->get() : NULL ); if(ret != CL_SUCCESS){ @@ -366,7 +376,8 @@ class command_queue size_t offset, size_t size, const void *host_ptr, - const wait_list &events = wait_list()) + const wait_list &events = wait_list(), + event * event_ = NULL) { BOOST_ASSERT(m_queue != 0); BOOST_ASSERT(size <= buffer.size()); @@ -376,13 +387,13 @@ class command_queue cl_int ret = clEnqueueWriteBuffer( m_queue, buffer.get(), - CL_TRUE, + event_ ? CL_FALSE : CL_TRUE, offset, size, host_ptr, events.size(), events.get_event_ptr(), - 0 + event_ ? &event_->get() : NULL ); if(ret != CL_SUCCESS){ @@ -402,30 +413,16 @@ class command_queue const void *host_ptr, const wait_list &events = wait_list()) { - BOOST_ASSERT(m_queue != 0); - BOOST_ASSERT(size <= buffer.size()); - BOOST_ASSERT(buffer.get_context() == this->get_context()); - BOOST_ASSERT(host_ptr != 0); - event event_; - cl_int ret = clEnqueueWriteBuffer( - m_queue, - buffer.get(), - CL_FALSE, - offset, - size, - host_ptr, - events.size(), - events.get_event_ptr(), - &event_.get() - ); - - if(ret != CL_SUCCESS){ - BOOST_THROW_EXCEPTION(opencl_error(ret)); - } + enqueue_write_buffer(buffer, + offset, + size, + host_ptr, + events, + &event_); - return event_; + return event_; } #if defined(CL_VERSION_1_1) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) @@ -444,16 +441,20 @@ class command_queue size_t host_row_pitch, size_t host_slice_pitch, void *host_ptr, - const wait_list &events = wait_list()) + const wait_list &events = wait_list(), + event * event_ = NULL) { BOOST_ASSERT(m_queue != 0); BOOST_ASSERT(buffer.get_context() == this->get_context()); BOOST_ASSERT(host_ptr != 0); + if (get_version() < 110) + BOOST_THROW_EXCEPTION(opencl_error(CL_INVALID_DEVICE)); + cl_int ret = clEnqueueWriteBufferRect( m_queue, buffer.get(), - CL_TRUE, + event_ ? CL_FALSE : CL_TRUE, buffer_origin, host_origin, region, @@ -464,7 +465,7 @@ class command_queue host_ptr, events.size(), events.get_event_ptr(), - 0 + event_ ? &event_->get() : NULL ); if(ret != CL_SUCCESS){ @@ -479,12 +480,13 @@ class command_queue /// \see_opencl_ref{clEnqueueCopyBuffer} /// /// \see copy() - event enqueue_copy_buffer(const buffer &src_buffer, + void enqueue_copy_buffer(const buffer &src_buffer, const buffer &dst_buffer, size_t src_offset, size_t dst_offset, size_t size, - const wait_list &events = wait_list()) + const wait_list &events = wait_list(), + event * event_ = NULL) { BOOST_ASSERT(m_queue != 0); BOOST_ASSERT(src_offset + size <= src_buffer.size()); @@ -492,8 +494,6 @@ class command_queue BOOST_ASSERT(src_buffer.get_context() == this->get_context()); BOOST_ASSERT(dst_buffer.get_context() == this->get_context()); - event event_; - cl_int ret = clEnqueueCopyBuffer( m_queue, src_buffer.get(), @@ -503,16 +503,34 @@ class command_queue size, events.size(), events.get_event_ptr(), - &event_.get() + event_ ? &event_->get() : NULL ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } - - return event_; } + event enqueue_copy_buffer_async(const buffer &src_buffer, + const buffer &dst_buffer, + size_t src_offset, + size_t dst_offset, + size_t size, + const wait_list &events = wait_list()) + { + event event_; + + enqueue_copy_buffer(src_buffer, + dst_buffer, + src_offset, + dst_offset, + size, + events, + &event_); + + return event_; + } + #if defined(CL_VERSION_1_1) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) /// Enqueues a command to copy a rectangular region from /// \p src_buffer to \p dst_buffer. @@ -520,7 +538,7 @@ class command_queue /// \see_opencl_ref{clEnqueueCopyBufferRect} /// /// \opencl_version_warning{1,1} - event enqueue_copy_buffer_rect(const buffer &src_buffer, + void enqueue_copy_buffer_rect(const buffer &src_buffer, const buffer &dst_buffer, const size_t src_origin[3], const size_t dst_origin[3], @@ -529,13 +547,15 @@ class command_queue size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, - const wait_list &events = wait_list()) + const wait_list &events = wait_list(), + event * event_ = NULL) { BOOST_ASSERT(m_queue != 0); BOOST_ASSERT(src_buffer.get_context() == this->get_context()); BOOST_ASSERT(dst_buffer.get_context() == this->get_context()); - event event_; + if (get_version() < 110) + BOOST_THROW_EXCEPTION(opencl_error(CL_INVALID_DEVICE)); cl_int ret = clEnqueueCopyBufferRect( m_queue, @@ -550,14 +570,12 @@ class command_queue host_slice_pitch, events.size(), events.get_event_ptr(), - &event_.get() + event_ ? &event_->get() : NULL ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } - - return event_; } #endif // CL_VERSION_1_1 @@ -569,18 +587,20 @@ class command_queue /// \opencl_version_warning{1,2} /// /// \see fill() - event enqueue_fill_buffer(const buffer &buffer, + void enqueue_fill_buffer(const buffer &buffer, const void *pattern, size_t pattern_size, size_t offset, size_t size, - const wait_list &events = wait_list()) + const wait_list &events = wait_list(), + event * event_ = NULL) { BOOST_ASSERT(m_queue != 0); BOOST_ASSERT(offset + size <= buffer.size()); BOOST_ASSERT(buffer.get_context() == this->get_context()); - event event_; + if (get_version() < 120) + BOOST_THROW_EXCEPTION(opencl_error(CL_INVALID_DEVICE)); cl_int ret = clEnqueueFillBuffer( m_queue, @@ -591,41 +611,98 @@ class command_queue size, events.size(), events.get_event_ptr(), - &event_.get() + event_ ? &event_->get() : NULL ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } - - return event_; } + + event enqueue_fill_buffer_async(const buffer &buffer, + const void *pattern, + size_t pattern_size, + size_t offset, + size_t size, + const wait_list &events = wait_list()) + { + event event_; + + enqueue_fill_buffer(buffer, + pattern, + pattern_size, + offset, + size, + events, + &event_); + + return event_; + } #endif // CL_VERSION_1_2 /// Enqueues a command to map \p buffer into the host address space. /// /// \see_opencl_ref{clEnqueueMapBuffer} - void* enqueue_map_buffer(const buffer &buffer, + void* enqueue_map_buffer(const buffer &buffer_, cl_map_flags flags, size_t offset, size_t size, - const wait_list &events = wait_list()) + const wait_list &events = wait_list(), + event * event_ = NULL) { BOOST_ASSERT(m_queue != 0); - BOOST_ASSERT(offset + size <= buffer.size()); - BOOST_ASSERT(buffer.get_context() == this->get_context()); + BOOST_ASSERT(offset + size <= buffer_.size()); + BOOST_ASSERT(buffer_.get_context() == this->get_context()); cl_int ret = 0; void *pointer = clEnqueueMapBuffer( m_queue, - buffer.get(), - CL_TRUE, + buffer_.get(), + event_ ? CL_FALSE : CL_TRUE, flags, offset, size, events.size(), events.get_event_ptr(), - 0, + event_ ? &event_->get() : NULL, + &ret + ); + + if(ret != CL_SUCCESS){ + BOOST_THROW_EXCEPTION(opencl_error(ret)); + } + + return pointer; + } + + /// Enqueues a command to map \p image into the host address space. + /// + /// \see_opencl_ref{clEnqueueMapImage} + void* enqueue_map_image(const image_object &image, + cl_map_flags flags, + const size_t *origin, + const size_t *region, + size_t *row_pitch, + size_t *slice_pitch = NULL, + const wait_list &events = wait_list(), + event * event_ = NULL) + { + BOOST_ASSERT(m_queue != 0); + BOOST_ASSERT(image.get_context() == this->get_context()); + + cl_int ret = 0; + void *pointer = clEnqueueMapImage( + m_queue, + image.get(), + event_ ? CL_FALSE : CL_TRUE, + flags, + origin, + region, + row_pitch, + slice_pitch, + events.size(), + events.get_event_ptr(), + event_ ? &event_->get() : NULL, &ret ); @@ -636,64 +713,87 @@ class command_queue return pointer; } + + /// \overload + template + void* enqueue_map_image(const image_object& image, + cl_map_flags flags, + const extents origin, + const extents region, + size_t *row_pitch, + size_t *slice_pitch = NULL, + const wait_list &events = wait_list(), + event * event_ = NULL) + { + BOOST_STATIC_ASSERT(N <= 3); + BOOST_ASSERT(image.get_context() == this->get_context()); + + size_t origin3[3] = { 0, 0, 0 }; + size_t region3[3] = { 1, 1, 1 }; + + std::copy(origin.data(), origin.data() + N, origin3); + std::copy(region.data(), region.data() + N, region3); + + return enqueue_map_image( + image, flags ,origin3, region3, row_pitch, slice_pitch, events, event_ + ); + } + /// Enqueues a command to unmap \p buffer from the host memory space. /// /// \see_opencl_ref{clEnqueueUnmapMemObject} - event enqueue_unmap_buffer(const buffer &buffer, + void enqueue_unmap_buffer(const memory_object &mem_object, void *mapped_ptr, - const wait_list &events = wait_list()) + const wait_list &events = wait_list(), + event * event_ = NULL) { - BOOST_ASSERT(buffer.get_context() == this->get_context()); + BOOST_ASSERT(mem_object.get_context() == this->get_context()); - return enqueue_unmap_mem_object(buffer.get(), mapped_ptr, events); + enqueue_unmap_mem_object(mem_object.get(), mapped_ptr, events, event_); } /// Enqueues a command to unmap \p mem from the host memory space. /// /// \see_opencl_ref{clEnqueueUnmapMemObject} - event enqueue_unmap_mem_object(cl_mem mem, + void enqueue_unmap_mem_object(cl_mem mem, void *mapped_ptr, - const wait_list &events = wait_list()) + const wait_list &events = wait_list(), + event * event_ = NULL) { BOOST_ASSERT(m_queue != 0); - event event_; - cl_int ret = clEnqueueUnmapMemObject( m_queue, mem, mapped_ptr, events.size(), events.get_event_ptr(), - &event_.get() + event_ ? &event_->get() : NULL ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } - - return event_; } /// Enqueues a command to read data from \p image to host memory. /// /// \see_opencl_ref{clEnqueueReadImage} - event enqueue_read_image(const image_object& image, + void enqueue_read_image(const image_object& image, const size_t *origin, const size_t *region, size_t row_pitch, size_t slice_pitch, void *host_ptr, - const wait_list &events = wait_list()) + const wait_list &events = wait_list(), + event * event_ = NULL) { BOOST_ASSERT(m_queue != 0); - event event_; - cl_int ret = clEnqueueReadImage( m_queue, image.get(), - CL_TRUE, + event_ ? CL_FALSE : CL_TRUE, origin, region, row_pitch, @@ -701,26 +801,26 @@ class command_queue host_ptr, events.size(), events.get_event_ptr(), - &event_.get() + event_ ? &event_->get() : NULL ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } - - return event_; } /// \overload template - event enqueue_read_image(const image_object& image, + void enqueue_read_image(const image_object& image, const extents origin, const extents region, void *host_ptr, size_t row_pitch = 0, size_t slice_pitch = 0, - const wait_list &events = wait_list()) + const wait_list &events = wait_list(), + event * event_ = NULL) { + BOOST_STATIC_ASSERT(N <= 3); BOOST_ASSERT(image.get_context() == this->get_context()); size_t origin3[3] = { 0, 0, 0 }; @@ -729,30 +829,29 @@ class command_queue std::copy(origin.data(), origin.data() + N, origin3); std::copy(region.data(), region.data() + N, region3); - return enqueue_read_image( - image, origin3, region3, row_pitch, slice_pitch, host_ptr, events + enqueue_read_image( + image, origin3, region3, row_pitch, slice_pitch, host_ptr, events, event_ ); } /// Enqueues a command to write data from host memory to \p image. /// /// \see_opencl_ref{clEnqueueWriteImage} - event enqueue_write_image(image_object& image, + void enqueue_write_image(image_object& image, const size_t *origin, const size_t *region, const void *host_ptr, size_t input_row_pitch = 0, size_t input_slice_pitch = 0, - const wait_list &events = wait_list()) + const wait_list &events = wait_list(), + event * event_ = NULL) { BOOST_ASSERT(m_queue != 0); - event event_; - cl_int ret = clEnqueueWriteImage( m_queue, image.get(), - CL_TRUE, + event_ ? CL_FALSE : CL_TRUE, origin, region, input_row_pitch, @@ -760,26 +859,26 @@ class command_queue host_ptr, events.size(), events.get_event_ptr(), - &event_.get() + event_ ? &event_->get() : NULL ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } - - return event_; } /// \overload template - event enqueue_write_image(image_object& image, + void enqueue_write_image(image_object& image, const extents origin, const extents region, const void *host_ptr, const size_t input_row_pitch = 0, const size_t input_slice_pitch = 0, - const wait_list &events = wait_list()) + const wait_list &events = wait_list(), + event * event_ = NULL) { + BOOST_STATIC_ASSERT(N <= 3); BOOST_ASSERT(image.get_context() == this->get_context()); size_t origin3[3] = { 0, 0, 0 }; @@ -788,25 +887,24 @@ class command_queue std::copy(origin.data(), origin.data() + N, origin3); std::copy(region.data(), region.data() + N, region3); - return enqueue_write_image( - image, origin3, region3, host_ptr, input_row_pitch, input_slice_pitch, events + enqueue_write_image( + image, origin3, region3, host_ptr, input_row_pitch, input_slice_pitch, events, event_ ); } /// Enqueues a command to copy data from \p src_image to \p dst_image. /// /// \see_opencl_ref{clEnqueueCopyImage} - event enqueue_copy_image(const image_object& src_image, + void enqueue_copy_image(const image_object& src_image, image_object& dst_image, const size_t *src_origin, const size_t *dst_origin, const size_t *region, - const wait_list &events = wait_list()) + const wait_list &events = wait_list(), + event * event_ = NULL) { BOOST_ASSERT(m_queue != 0); - event event_; - cl_int ret = clEnqueueCopyImage( m_queue, src_image.get(), @@ -816,25 +914,25 @@ class command_queue region, events.size(), events.get_event_ptr(), - &event_.get() + event_ ? &event_->get() : NULL ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } - - return event_; } /// \overload template - event enqueue_copy_image(const image_object& src_image, + void enqueue_copy_image(const image_object& src_image, image_object& dst_image, const extents src_origin, const extents dst_origin, const extents region, - const wait_list &events = wait_list()) + const wait_list &events = wait_list(), + event * event_ = NULL) { + BOOST_STATIC_ASSERT(N <= 3); BOOST_ASSERT(src_image.get_context() == this->get_context()); BOOST_ASSERT(dst_image.get_context() == this->get_context()); BOOST_ASSERT_MSG(src_image.format() == dst_image.format(), @@ -848,25 +946,24 @@ class command_queue std::copy(dst_origin.data(), dst_origin.data() + N, dst_origin3); std::copy(region.data(), region.data() + N, region3); - return enqueue_copy_image( - src_image, dst_image, src_origin3, dst_origin3, region3, events + enqueue_copy_image( + src_image, dst_image, src_origin3, dst_origin3, region3, events, event_ ); } /// Enqueues a command to copy data from \p src_image to \p dst_buffer. /// /// \see_opencl_ref{clEnqueueCopyImageToBuffer} - event enqueue_copy_image_to_buffer(const image_object& src_image, + void enqueue_copy_image_to_buffer(const image_object& src_image, memory_object& dst_buffer, const size_t *src_origin, const size_t *region, size_t dst_offset, - const wait_list &events = wait_list()) + const wait_list &events = wait_list(), + event * event_ = NULL) { BOOST_ASSERT(m_queue != 0); - event event_; - cl_int ret = clEnqueueCopyImageToBuffer( m_queue, src_image.get(), @@ -876,30 +973,27 @@ class command_queue dst_offset, events.size(), events.get_event_ptr(), - &event_.get() + event_ ? &event_->get() : NULL ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } - - return event_; } /// Enqueues a command to copy data from \p src_buffer to \p dst_image. /// /// \see_opencl_ref{clEnqueueCopyBufferToImage} - event enqueue_copy_buffer_to_image(const memory_object& src_buffer, + void enqueue_copy_buffer_to_image(const memory_object& src_buffer, image_object& dst_image, size_t src_offset, const size_t *dst_origin, const size_t *region, - const wait_list &events = wait_list()) + const wait_list &events = wait_list(), + event * event_ = NULL) { BOOST_ASSERT(m_queue != 0); - event event_; - cl_int ret = clEnqueueCopyBufferToImage( m_queue, src_buffer.get(), @@ -909,14 +1003,219 @@ class command_queue region, events.size(), events.get_event_ptr(), - &event_.get() + event_ ? &event_->get() : NULL ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } + } - return event_; + +#if defined(BOOST_NO_CXX11_LAMBDAS) || !defined(BOOST_COMPUTE_USE_CPP11) + // Resolve the lambda syntax sugar + template + struct walk_image + { + Function m_walk_elemets; + char * m_pImage3D; + size_t m_origin3[3]; + size_t m_region3[3]; + size_t m_row_pitch; + size_t m_slice_pitch; + size_t m_element_size; + user_event m_user_ev; + walk_image(Function walk_elemets, + char * pImage3D, + const size_t *origin, + const size_t *region, + size_t row_pitch, + size_t slice_pitch, + size_t element_size, + user_event user_ev) : + m_walk_elemets(walk_elemets), + m_pImage3D(pImage3D), + m_row_pitch(row_pitch), + m_slice_pitch(slice_pitch), + m_element_size(element_size), + m_user_ev(user_ev) + { + std::copy(origin, origin + 3, m_origin3); + std::copy(region, region + 3, m_region3); + } + + void operator () () const + { + // Walks all the image elements + char * pImage2D = m_pImage3D; + for(size_t d = m_origin3[2]; d < m_region3[2]; ++d) { + char * pImage1D = pImage2D; + for(size_t h = m_origin3[1]; h < m_region3[1]; ++h) { + char *pElem = pImage1D; + for(size_t w = m_origin3[0]; w < m_region3[0]; ++w) { + m_walk_elemets((void *)pElem, w, h, d); + pElem += m_element_size; + } + pImage1D += m_row_pitch; + } + pImage2D += m_slice_pitch; + } + if(m_user_ev.get()) { + m_user_ev.set_status(event::complete); + } + } + }; +#endif + + /// The function specified by \p walk_elemets must be invokable with arguments + /// (void *pElem, size_t x, size_t y, size_t z), + /// like std::function. + template + void enqueue_walk_image(const image_object& image, + Function walk_elemets, + cl_map_flags flags = compute::command_queue::map_read, + const size_t *origin = NULL, + const size_t *region = NULL, + const wait_list &events = wait_list(), + event * pevent = NULL) + { + BOOST_ASSERT(m_queue != 0); + BOOST_ASSERT(image.get_context() == this->get_context()); + + size_t row_pitch = 0; + size_t slice_pitch = 0; + event map_event, *pmap_event = NULL; + user_event user_ev; + wait_list unmap_wait; + extents<3> origin3( 0 ); + extents<3> region3; + region3[0] = image.width(); + region3[1] = (size_t)std::max((size_t)1, image.height()); + region3[2] = (size_t)std::max((size_t)1, image.depth()); + + if (origin) { + origin3[0] = origin[0]; origin3[1] = origin[1]; origin3[2] = origin[2]; + } + + if (region) { + region3[0] = region[0]; region3[1] = region[1]; region3[2] = region[2]; + } + + if (pevent) { + // Async exec + user_ev = user_event(get_context()); + unmap_wait.insert(user_ev); + pmap_event = &map_event; + } + + char * const pImage3D = reinterpret_cast( + enqueue_map_image( + image, + flags, + origin3.data(), + region3.data(), + &row_pitch, + &slice_pitch, + events, + pmap_event) + ); + + size_t element_size = image.get_image_info(CL_IMAGE_ELEMENT_SIZE); + +#if defined(BOOST_NO_CXX11_LAMBDAS) || !defined(BOOST_COMPUTE_USE_CPP11) + walk_image func(walk_elemets, pImage3D, origin3.data(), region3.data(), row_pitch, slice_pitch, element_size, user_ev); +#else + auto func = [=]() + { + // Walks all the image elements + char * pImage2D = pImage3D; + for(size_t d = origin3[2]; d < region3[2]; ++d) { + char * pImage1D = pImage2D; + for(size_t h = origin3[1]; h < region3[1]; ++h) { + char *pElem = pImage1D; + for(size_t w = origin3[0]; w < region3[0]; ++w) { + walk_elemets((void *)pElem, w, h, d); + pElem += element_size; + } + pImage1D += row_pitch; + } + pImage2D += slice_pitch; + } + if(pevent) { + user_ev.set_status(event::complete); + } + }; +#endif + if (pevent) { + // Async exec + pmap_event->set_callback(func); + } else { + func(); + } + enqueue_unmap_buffer(image, pImage3D, unmap_wait, pevent); + } + + struct fillc + { + size_t m_element_size; + char m_fill_color[16]; + fillc(const fillc & o) : m_element_size(o.m_element_size) + { + const char * origin = static_cast(o.m_fill_color); + std::copy(origin, origin + 16, static_cast(m_fill_color)); + } + + fillc(size_t element_size, const void * fill_color) : m_element_size(element_size) + { + const char * origin = static_cast(fill_color); + std::copy(origin, origin + 16, static_cast(m_fill_color)); + } + void operator () (void *pelem, size_t, size_t, size_t) const + { + std::copy(m_fill_color, m_fill_color + m_element_size, static_cast(pelem)); + } + void operator () (void *pelem, size_t, size_t, size_t) + { + std::copy(m_fill_color, m_fill_color + m_element_size, static_cast(pelem)); + } + }; + + /// Enqueues a command to fill \p image with \p fill_color. + void enqueue_rawfill_image_walking(const image_object& image, + const void *fill_color, + const size_t *origin, + const size_t *region, + const wait_list &events = wait_list(), + event * event_ = NULL) + { + size_t element_size = image.get_image_info(CL_IMAGE_ELEMENT_SIZE); + + fillc f(element_size, fill_color); + + enqueue_walk_image(image, f, command_queue::map_write, origin, region, events, event_); + } + + /// \overload + template + void enqueue_rawfill_image_walking(const image_object& image, + const void *fill_color, + const extents origin, + const extents region, + const wait_list &events = wait_list(), + event * event_ = NULL) + { + BOOST_STATIC_ASSERT(N <= 3); + BOOST_ASSERT(image.get_context() == this->get_context()); + + size_t origin3[3] = { 0, 0, 0 }; + size_t region3[3] = { 1, 1, 1 }; + + std::copy(origin.data(), origin.data() + N, origin3); + std::copy(region.data(), region.data() + N, region3); + + enqueue_rawfill_image_walking( + image, fill_color, origin3, region3, events, event_ + ); } #if defined(CL_VERSION_1_2) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) @@ -925,42 +1224,49 @@ class command_queue /// \see_opencl_ref{clEnqueueFillImage} /// /// \opencl_version_warning{1,2} - event enqueue_fill_image(image_object& image, + void enqueue_fill_image(const image_object& image, const void *fill_color, const size_t *origin, const size_t *region, - const wait_list &events = wait_list()) + const wait_list &events = wait_list(), + event * event_ = NULL) { - BOOST_ASSERT(m_queue != 0); - - event event_; - - cl_int ret = clEnqueueFillImage( - m_queue, - image.get(), - fill_color, - origin, - region, - events.size(), - events.get_event_ptr(), - &event_.get() - ); - - if(ret != CL_SUCCESS){ - BOOST_THROW_EXCEPTION(opencl_error(ret)); + if (get_version() < 120) + { + BOOST_THROW_EXCEPTION(opencl_error(CL_INVALID_DEVICE)); + } + else + { + BOOST_ASSERT(m_queue != 0); + BOOST_ASSERT(image.get_context() == this->get_context()); + + cl_int ret = clEnqueueFillImage( + m_queue, + image.get(), + fill_color, + origin, + region, + events.size(), + events.get_event_ptr(), + event_ ? &event_->get() : NULL + ); + + if(ret != CL_SUCCESS){ + BOOST_THROW_EXCEPTION(opencl_error(ret)); + } } - - return event_; } /// \overload template - event enqueue_fill_image(image_object& image, + void enqueue_fill_image(const image_object& image, const void *fill_color, const extents origin, const extents region, - const wait_list &events = wait_list()) + const wait_list &events = wait_list(), + event * event_ = NULL) { + BOOST_STATIC_ASSERT(N <= 3); BOOST_ASSERT(image.get_context() == this->get_context()); size_t origin3[3] = { 0, 0, 0 }; @@ -969,8 +1275,8 @@ class command_queue std::copy(origin.data(), origin.data() + N, origin3); std::copy(region.data(), region.data() + N, region3); - return enqueue_fill_image( - image, fill_color, origin3, region3, events + enqueue_fill_image( + image, fill_color, origin3, region3, events, event_ ); } @@ -979,14 +1285,16 @@ class command_queue /// \see_opencl_ref{clEnqueueMigrateMemObjects} /// /// \opencl_version_warning{1,2} - event enqueue_migrate_memory_objects(uint_ num_mem_objects, + void enqueue_migrate_memory_objects(uint_ num_mem_objects, const cl_mem *mem_objects, cl_mem_migration_flags flags, - const wait_list &events = wait_list()) + const wait_list &events = wait_list(), + event * event_ = NULL) { BOOST_ASSERT(m_queue != 0); - event event_; + if (get_version() < 120) + BOOST_THROW_EXCEPTION(opencl_error(CL_INVALID_DEVICE)); cl_int ret = clEnqueueMigrateMemObjects( m_queue, @@ -995,32 +1303,30 @@ class command_queue flags, events.size(), events.get_event_ptr(), - &event_.get() + event_ ? &event_->get() : NULL ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } - - return event_; } #endif // CL_VERSION_1_2 /// Enqueues a kernel for execution. /// /// \see_opencl_ref{clEnqueueNDRangeKernel} - event enqueue_nd_range_kernel(const kernel &kernel, + void enqueue_nd_range_kernel(const kernel &kernel, size_t work_dim, const size_t *global_work_offset, const size_t *global_work_size, const size_t *local_work_size, - const wait_list &events = wait_list()) + const wait_list &events = wait_list(), + event * event_ = NULL) { BOOST_ASSERT(m_queue != 0); + BOOST_ASSERT(work_dim > 0); BOOST_ASSERT(kernel.get_context() == this->get_context()); - event event_; - cl_int ret = clEnqueueNDRangeKernel( m_queue, kernel, @@ -1030,95 +1336,165 @@ class command_queue local_work_size, events.size(), events.get_event_ptr(), - &event_.get() + event_ ? &event_->get() : NULL ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } + } + /// \overload + template + void enqueue_nd_range_kernel(const kernel &kernel, + const extents &global_work_offset, + const extents &global_work_size, + const extents &local_work_size, + const wait_list &events = wait_list(), + event * event_ = NULL) + { + BOOST_STATIC_ASSERT(N > 0); + enqueue_nd_range_kernel( + kernel, + N, + global_work_offset.data(), + global_work_size.data(), + (local_work_size[0] == 0) ? NULL : local_work_size.data(), + events, + event_ + ); + } + + /// Enqueues a kernel for execution. + /// + /// \see_opencl_ref{clEnqueueNDRangeKernel} + event enqueue_nd_range_kernel_async(const kernel &kernel, + size_t work_dim, + const size_t *global_work_offset, + const size_t *global_work_size, + const size_t *local_work_size, + const wait_list &events = wait_list()) + { + event event_; + + enqueue_nd_range_kernel(kernel, + work_dim, + global_work_offset, + global_work_size, + local_work_size, + events, + &event_); return event_; } /// \overload template - event enqueue_nd_range_kernel(const kernel &kernel, + event enqueue_nd_range_kernel_async(const kernel &kernel, const extents &global_work_offset, const extents &global_work_size, const extents &local_work_size, const wait_list &events = wait_list()) { - return enqueue_nd_range_kernel( + event event_; + + enqueue_nd_range_kernel( kernel, N, global_work_offset.data(), global_work_size.data(), - local_work_size.data(), - events + (local_work_size[0] == 0) ? NULL : local_work_size.data(), + events, + &event_.get() ); + + return event_; } /// Convenience method which calls enqueue_nd_range_kernel() with a /// one-dimensional range. - event enqueue_1d_range_kernel(const kernel &kernel, + void enqueue_1d_range_kernel(const kernel &kernel, size_t global_work_offset, size_t global_work_size, size_t local_work_size, - const wait_list &events = wait_list()) + const wait_list &events = wait_list(), + event * event_ = NULL) { - return enqueue_nd_range_kernel( + enqueue_nd_range_kernel( kernel, 1, &global_work_offset, &global_work_size, local_work_size ? &local_work_size : 0, - events + events, + event_ ); } + event enqueue_1d_range_kernel_async(const kernel &kernel, + size_t global_work_offset, + size_t global_work_size, + size_t local_work_size, + const wait_list &events = wait_list()) + { + event event_; + enqueue_1d_range_kernel(kernel, + global_work_offset, + global_work_size, + local_work_size, + events, + &event_); + return event_; + + } + /// Enqueues a kernel to execute using a single work-item. /// /// \see_opencl_ref{clEnqueueTask} - event enqueue_task(const kernel &kernel, const wait_list &events = wait_list()) + void enqueue_task(const kernel &kernel, + const wait_list &events = wait_list(), + event * event_ = NULL) { BOOST_ASSERT(m_queue != 0); BOOST_ASSERT(kernel.get_context() == this->get_context()); - event event_; + cl_int ret; // clEnqueueTask() was deprecated in OpenCL 2.0. In that case we // just forward to the equivalent clEnqueueNDRangeKernel() call. #ifdef CL_VERSION_2_0 - size_t one = 1; - cl_int ret = clEnqueueNDRangeKernel( - m_queue, kernel, 1, 0, &one, &one, - events.size(), events.get_event_ptr(), &event_.get() - ); - #else - cl_int ret = clEnqueueTask( - m_queue, kernel, events.size(), events.get_event_ptr(), &event_.get() - ); + if (get_version() >= 200) + { + size_t one = 1; + ret = clEnqueueNDRangeKernel( + m_queue, kernel, 1, 0, &one, &one, + events.size(), events.get_event_ptr(), event_ ? &event_->get() : NULL + ); + } + else #endif + { + ret = clEnqueueTask( + m_queue, kernel, events.size(), events.get_event_ptr(), event_ ? &event_->get() : NULL + ); + } if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } - - return event_; } /// Enqueues a function to execute on the host. - event enqueue_native_kernel(void (BOOST_COMPUTE_CL_CALLBACK *user_func)(void *), - void *args, - size_t cb_args, - uint_ num_mem_objects, - const cl_mem *mem_list, - const void **args_mem_loc, - const wait_list &events = wait_list()) + void enqueue_native_kernel(void (BOOST_COMPUTE_CL_CALLBACK *user_func)(void *), + void *args, + size_t cb_args, + uint_ num_mem_objects, + const cl_mem *mem_list, + const void **args_mem_loc, + const wait_list &events = wait_list(), + event * event_ = NULL) { BOOST_ASSERT(m_queue != 0); - event event_; cl_int ret = clEnqueueNativeKernel( m_queue, user_func, @@ -1129,28 +1505,28 @@ class command_queue args_mem_loc, events.size(), events.get_event_ptr(), - &event_.get() + event_ ? &event_->get() : NULL ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } - - return event_; } /// Convenience overload for enqueue_native_kernel() which enqueues a /// native kernel on the host with a nullary function. - event enqueue_native_kernel(void (BOOST_COMPUTE_CL_CALLBACK *user_func)(void), - const wait_list &events = wait_list()) + void enqueue_native_kernel(void (BOOST_COMPUTE_CL_CALLBACK *user_func)(void), + const wait_list &events = wait_list(), + event * event_ = NULL) { - return enqueue_native_kernel( + enqueue_native_kernel( detail::nullary_native_kernel_trampoline, reinterpret_cast(&user_func), sizeof(user_func), 0, 0, 0, - events + events, + event_ ); } @@ -1180,43 +1556,46 @@ class command_queue BOOST_ASSERT(m_queue != 0); #ifdef CL_VERSION_1_2 - clEnqueueBarrierWithWaitList(m_queue, 0, 0, 0); - #else - clEnqueueBarrier(m_queue); + if (get_version() >= 120) + clEnqueueBarrierWithWaitList(m_queue, 0, 0, 0); + else #endif + clEnqueueBarrier(m_queue); } #if defined(CL_VERSION_1_2) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) /// Enqueues a barrier in the queue after \p events. /// /// \opencl_version_warning{1,2} - void enqueue_barrier(const wait_list &events) + void enqueue_barrier(const wait_list &events, + event * event_ = NULL) { BOOST_ASSERT(m_queue != 0); + if (get_version() < 120) + BOOST_THROW_EXCEPTION(opencl_error(CL_INVALID_DEVICE)); + clEnqueueBarrierWithWaitList( - m_queue, events.size(), events.get_event_ptr(), 0 + m_queue, events.size(), events.get_event_ptr(), event_ ? &event_->get() : NULL ); } #endif // CL_VERSION_1_2 /// Enqueues a marker in the queue and returns an event that can be /// used to track its progress. - event enqueue_marker() + void enqueue_marker(event * event_) { - event event_; - + cl_int ret; #ifdef CL_VERSION_1_2 - cl_int ret = clEnqueueMarkerWithWaitList(m_queue, 0, 0, &event_.get()); - #else - cl_int ret = clEnqueueMarker(m_queue, &event_.get()); + if (get_version() >= 120) + ret = clEnqueueMarkerWithWaitList(m_queue, 0, 0, event_ ? &event_->get() : NULL); + else #endif + ret = clEnqueueMarker(m_queue, event_ ? &event_->get() : NULL); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } - - return event_; } #if defined(CL_VERSION_1_2) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED) @@ -1224,19 +1603,19 @@ class command_queue /// event that can be used to track its progress. /// /// \opencl_version_warning{1,2} - event enqueue_marker(const wait_list &events) + void enqueue_marker(const wait_list &events, + event * event_ = NULL) { - event event_; + if (get_version() < 120) + BOOST_THROW_EXCEPTION(opencl_error(CL_INVALID_DEVICE)); cl_int ret = clEnqueueMarkerWithWaitList( - m_queue, events.size(), events.get_event_ptr(), &event_.get() + m_queue, events.size(), events.get_event_ptr(), event_ ? &event_->get() : NULL ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } - - return event_; } #endif // CL_VERSION_1_2 @@ -1250,17 +1629,21 @@ class command_queue void enqueue_svm_memcpy(void *dst_ptr, const void *src_ptr, size_t size, - const wait_list &events = wait_list()) + const wait_list &events = wait_list(), + event * event_ = NULL) { + if (get_version() < 200) + BOOST_THROW_EXCEPTION(opencl_error(CL_INVALID_DEVICE)); + cl_int ret = clEnqueueSVMMemcpy( m_queue, - CL_TRUE, + event_ ? CL_FALSE : CL_TRUE, dst_ptr, src_ptr, size, events.size(), events.get_event_ptr(), - 0 + event_ ? &event_->get() : NULL ); if(ret != CL_SUCCESS){ @@ -1281,20 +1664,11 @@ class command_queue { event event_; - cl_int ret = clEnqueueSVMMemcpy( - m_queue, - CL_FALSE, - dst_ptr, - src_ptr, - size, - events.size(), - events.get_event_ptr(), - &event_.get() - ); - - if(ret != CL_SUCCESS){ - BOOST_THROW_EXCEPTION(opencl_error(ret)); - } + enqueue_svm_memcpy(dst_ptr, + src_ptr, + size, + events, + &event_); return event_; } @@ -1305,14 +1679,16 @@ class command_queue /// \opencl_version_warning{2,0} /// /// \see_opencl2_ref{clEnqueueSVMMemFill} - event enqueue_svm_fill(void *svm_ptr, + void enqueue_svm_fill(void *svm_ptr, const void *pattern, size_t pattern_size, size_t size, - const wait_list &events = wait_list()) + const wait_list &events = wait_list(), + event * event_ = NULL) { - event event_; + if (get_version() < 200) + BOOST_THROW_EXCEPTION(opencl_error(CL_INVALID_DEVICE)); cl_int ret = clEnqueueSVMMemFill( m_queue, @@ -1322,14 +1698,12 @@ class command_queue size, events.size(), events.get_event_ptr(), - &event_.get() + event_ ? &event_->get() : NULL ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } - - return event_; } /// Enqueues a command to free \p svm_ptr. @@ -1339,10 +1713,12 @@ class command_queue /// \see_opencl2_ref{clEnqueueSVMFree} /// /// \see svm_free() - event enqueue_svm_free(void *svm_ptr, - const wait_list &events = wait_list()) + void enqueue_svm_free(void *svm_ptr, + const wait_list &events = wait_list(), + event * event_ = NULL) { - event event_; + if (get_version() < 200) + BOOST_THROW_EXCEPTION(opencl_error(CL_INVALID_DEVICE)); cl_int ret = clEnqueueSVMFree( m_queue, @@ -1352,14 +1728,12 @@ class command_queue 0, events.size(), events.get_event_ptr(), - &event_.get() + event_ ? &event_->get() : NULL ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } - - return event_; } /// Enqueues a command to map \p svm_ptr to the host memory space. @@ -1370,17 +1744,21 @@ class command_queue void enqueue_svm_map(void *svm_ptr, size_t size, cl_map_flags flags, - const wait_list &events = wait_list()) + const wait_list &events = wait_list(), + event * event_ = NULL) { + if (get_version() < 200) + BOOST_THROW_EXCEPTION(opencl_error(CL_INVALID_DEVICE)); + cl_int ret = clEnqueueSVMMap( m_queue, - CL_TRUE, + event_ ? CL_FALSE : CL_TRUE, flags, svm_ptr, size, events.size(), events.get_event_ptr(), - 0 + event_ ? &event_->get() : NULL ); if(ret != CL_SUCCESS){ @@ -1393,24 +1771,24 @@ class command_queue /// \opencl_version_warning{2,0} /// /// \see_opencl2_ref{clEnqueueSVMUnmap} - event enqueue_svm_unmap(void *svm_ptr, - const wait_list &events = wait_list()) + void enqueue_svm_unmap(void *svm_ptr, + const wait_list &events = wait_list(), + event * event_ = NULL) { - event event_; + if (get_version() < 200) + BOOST_THROW_EXCEPTION(opencl_error(CL_INVALID_DEVICE)); cl_int ret = clEnqueueSVMUnmap( m_queue, svm_ptr, events.size(), events.get_event_ptr(), - &event_.get() + event_ ? &event_->get() : NULL ); if(ret != CL_SUCCESS){ BOOST_THROW_EXCEPTION(opencl_error(ret)); } - - return event_; } #endif // CL_VERSION_2_0 @@ -1435,11 +1813,14 @@ class command_queue /// \internal_ bool check_device_version(int major, int minor) const { - return get_device().check_version(major, minor); + int ver = static_cast(get_version()); + int check = major * 100 + minor; + return check <= ver; } private: cl_command_queue m_queue; + mutable uint_ m_version; }; inline buffer buffer::clone(command_queue &queue) const diff --git a/include/boost/compute/context.hpp b/include/boost/compute/context.hpp index 7f8602aec..58ebc02f8 100644 --- a/include/boost/compute/context.hpp +++ b/include/boost/compute/context.hpp @@ -50,7 +50,7 @@ class context public: /// Create a null context object. context() - : m_context(0) + : m_context(0), m_version(0) { } @@ -64,6 +64,7 @@ class context cl_device_id device_id = device.id(); + m_version = 0; cl_int error = 0; m_context = clCreateContext(properties, 1, &device_id, 0, 0, &error); @@ -82,10 +83,19 @@ class context cl_int error = 0; + std::vector device_ids; + std::string name, version; + for (size_t i = 0; i < devices.size(); ++i) { + const device &dev = devices[i]; + name = dev.name(); + version = dev.version(); + device_ids.push_back(devices[i].get()); + } + m_version = 0; m_context = clCreateContext( properties, - static_cast(devices.size()), - reinterpret_cast(&devices[0]), + static_cast(device_ids.size()), + reinterpret_cast(&device_ids[0]), 0, 0, &error @@ -99,7 +109,7 @@ class context /// Creates a new context object for \p context. If \p retain is /// \c true, the reference count for \p context will be incremented. explicit context(cl_context context, bool retain = true) - : m_context(context) + : m_context(context), m_version(0) { if(m_context && retain){ clRetainContext(m_context); @@ -123,6 +133,7 @@ class context clReleaseContext(m_context); } + m_version = other.m_version; m_context = other.m_context; if(m_context){ @@ -136,9 +147,10 @@ class context #ifndef BOOST_COMPUTE_NO_RVALUE_REFERENCES /// Move-constructs a new context object from \p other. context(context&& other) BOOST_NOEXCEPT - : m_context(other.m_context) + : m_context(other.m_context), m_version(other.m_version) { other.m_context = 0; + other.m_version = 0; } /// Move-assigns the context from \p other to \c *this. @@ -148,7 +160,9 @@ class context clReleaseContext(m_context); } + m_version = other.m_version; m_context = other.m_context; + other.m_version = 0; other.m_context = 0; return *this; @@ -201,7 +215,12 @@ class context /// Returns a vector of devices for the context. std::vector get_devices() const { - return get_info >(CL_CONTEXT_DEVICES); + std::vector out; + std::vector id_vector = get_info >(CL_CONTEXT_DEVICES); + for (std::vector::iterator it = id_vector.begin(); it != id_vector.end(); ++it) { + out.push_back(device(*it)); + } + return out; } /// Returns information about the context. @@ -236,8 +255,17 @@ class context return m_context; } + /// Returns the device version number. (eg. 1.1 is 110, 1.2 is 120, 2.0 is 200, 3.11 is 311) + uint_ get_version() const + { + if (m_version == 0) + m_version = get_device().get_version(); // The version of the first device + return m_version; + } + private: cl_context m_context; + mutable uint_ m_version; // Cached ICD OpenCL version number }; /// \internal_ define get_info() specializations for context diff --git a/include/boost/compute/detail/meta_kernel.hpp b/include/boost/compute/detail/meta_kernel.hpp index 2d6bd5de7..056275608 100644 --- a/include/boost/compute/detail/meta_kernel.hpp +++ b/include/boost/compute/detail/meta_kernel.hpp @@ -636,7 +636,7 @@ class meta_kernel ::boost::compute::kernel kernel = compile(context); - return queue.enqueue_1d_range_kernel( + return queue.enqueue_1d_range_kernel_async( kernel, global_work_offset, global_work_size, @@ -653,7 +653,7 @@ class meta_kernel ::boost::compute::kernel kernel = compile(context); - return queue.enqueue_1d_range_kernel( + return queue.enqueue_1d_range_kernel_async( kernel, global_work_offset, global_work_size, diff --git a/include/boost/compute/detail/work_size.hpp b/include/boost/compute/detail/work_size.hpp index 5378941d5..49021e3af 100644 --- a/include/boost/compute/detail/work_size.hpp +++ b/include/boost/compute/detail/work_size.hpp @@ -23,7 +23,7 @@ namespace detail { // passed to clEnqueueNDRangeKernel() for a 1D algorithm. inline size_t calculate_work_size(size_t count, size_t vpt, size_t tpb) { - size_t work_size = std::ceil(float(count) / vpt); + size_t work_size = static_cast(std::ceil(float(count) / vpt)); if(work_size % tpb != 0){ work_size += tpb - work_size % tpb; } diff --git a/include/boost/compute/device.hpp b/include/boost/compute/device.hpp index ab4cbacc0..a67f5d90d 100644 --- a/include/boost/compute/device.hpp +++ b/include/boost/compute/device.hpp @@ -44,22 +44,27 @@ class platform; class device { public: - enum type { + enum device_type { + default_type = CL_DEVICE_TYPE_DEFAULT, cpu = CL_DEVICE_TYPE_CPU, gpu = CL_DEVICE_TYPE_GPU, - accelerator = CL_DEVICE_TYPE_ACCELERATOR + accelerator = CL_DEVICE_TYPE_ACCELERATOR, +#ifdef CL_DEVICE_TYPE_CUSTOM + custom = CL_DEVICE_TYPE_CUSTOM, +#endif + all = CL_DEVICE_TYPE_ALL }; /// Creates a null device object. device() - : m_id(0) + : m_id(0), m_version(0) { } /// Creates a new device object for \p id. If \p retain is \c true, /// the reference count for the device will be incremented. explicit device(cl_device_id id, bool retain = true) - : m_id(id) + : m_id(id), m_version(0) { #ifdef CL_VERSION_1_2 if(m_id && retain && is_subdevice()){ @@ -72,7 +77,7 @@ class device /// Creates a new device object as a copy of \p other. device(const device &other) - : m_id(other.m_id) + : m_id(other.m_id), m_version(other.m_version) { #ifdef CL_VERSION_1_2 if(m_id && is_subdevice()){ @@ -92,6 +97,7 @@ class device #endif m_id = other.m_id; + m_version = other.m_version; #ifdef CL_VERSION_1_2 if(m_id && is_subdevice()){ @@ -106,9 +112,10 @@ class device #ifndef BOOST_COMPUTE_NO_RVALUE_REFERENCES /// Move-constructs a new device object from \p other. device(device&& other) BOOST_NOEXCEPT - : m_id(other.m_id) + : m_id(other.m_id), m_version(other.m_version) { other.m_id = 0; + other.m_version = 0; } /// Move-assigns the device from \p other to \c *this. @@ -118,10 +125,12 @@ class device if(m_id && is_subdevice()){ clReleaseDevice(m_id); } - #endif + #endif // CL_VERSION_1_2 m_id = other.m_id; + m_version = other.m_version; other.m_id = 0; + other.m_version = 0; return *this; } @@ -136,7 +145,7 @@ class device clReleaseDevice(m_id) ); } - #endif + #endif // CL_VERSION_1_2 } /// Returns the ID of the device. @@ -188,6 +197,25 @@ class device return get_info(CL_DEVICE_VERSION); } + /// Returns the device version number: major.minor * 100 (eg. 1.1 is 110, 1.2 is 120, 2.0 is 200, 3.01 is 301) + uint_ get_version() const + { + if (m_version == 0) { + std::string strversion(version()); + std::stringstream ss(strversion); + ushort_ major, minor; + ss.ignore(7); // 'OpenCL ' + ss >> major; + ss.ignore(1); // '.' + bool is_zero = ss.peek() == '0'; + ss >> minor; + if (!is_zero && minor < 10) + minor *= 10; + m_version = major * 100 + minor; // cache + } + return m_version; + } + /// Returns the driver version string. std::string driver_version() const { @@ -281,17 +309,11 @@ class device bool is_subdevice() const { #if defined(CL_VERSION_1_2) - try { + if (get_version() >= 120) return get_info(CL_DEVICE_PARENT_DEVICE) != 0; - } - catch(opencl_error&){ - // the get_info() call above will throw if the device's opencl version - // is less than 1.2 (in which case it can't be a sub-device). + else + #endif // CL_VERSION_1_2 return false; - } - #else - return false; - #endif } /// Returns information about the device. @@ -327,6 +349,9 @@ class device std::vector partition(const cl_device_partition_property *properties) const { + if (get_version() < 120) + return std::vector(); + // get sub-device count uint_ count = 0; int_ ret = clCreateSubDevices(m_id, properties, 0, 0, &count); @@ -408,21 +433,14 @@ class device /// \internal_ bool check_version(int major, int minor) const { - std::stringstream stream; - stream << version(); - - int actual_major, actual_minor; - stream.ignore(7); // 'OpenCL ' - stream >> actual_major; - stream.ignore(1); // '.' - stream >> actual_minor; - - return actual_major > major || - (actual_major == major && actual_minor >= minor); + int ver = static_cast(get_version()); + int check = major * 100 + minor; + return check <= ver; } private: cl_device_id m_id; + mutable uint_ m_version; // Cached ICD OpenCL version number }; /// \internal_ diff --git a/include/boost/compute/event.hpp b/include/boost/compute/event.hpp index d92b2e354..3b6a9e24b 100644 --- a/include/boost/compute/event.hpp +++ b/include/boost/compute/event.hpp @@ -170,9 +170,9 @@ class event } /// Returns the status of the event. - cl_int status() const + execution_status status() const { - return get_info(CL_EVENT_COMMAND_EXECUTION_STATUS); + return static_cast(get_info(CL_EVENT_COMMAND_EXECUTION_STATUS)); } /// Returns the command type for the event. @@ -210,7 +210,7 @@ class event /// Blocks until the actions corresponding to the event have /// completed. - void wait() + void wait() const { cl_int ret = clWaitForEvents(1, &m_event); if(ret != CL_SUCCESS){ @@ -230,7 +230,7 @@ class event cl_event event, cl_int status, void *user_data ), cl_int status = CL_COMPLETE, - void *user_data = 0) + void *user_data = 0) const { cl_int ret = clSetEventCallback(m_event, status, callback, user_data); if(ret != CL_SUCCESS){ @@ -246,7 +246,7 @@ class event /// /// \opencl_version_warning{1,1} template - void set_callback(Function callback, cl_int status = CL_COMPLETE) + void set_callback(Function callback, cl_int status = CL_COMPLETE) const { set_callback( event_callback_invoker, @@ -294,7 +294,7 @@ class event } /// \internal_ (deprecated) - cl_int get_status() const + execution_status get_status() const { return status(); } diff --git a/include/boost/compute/exception/context_error.hpp b/include/boost/compute/exception/context_error.hpp index 2d4580b1a..56abee769 100644 --- a/include/boost/compute/exception/context_error.hpp +++ b/include/boost/compute/exception/context_error.hpp @@ -70,7 +70,7 @@ class context_error : public std::exception } /// Returns the size of the private info memory block. - const size_t get_private_info_size() const throw() + size_t get_private_info_size() const throw() { return m_private_info_size; } diff --git a/include/boost/compute/image/image1d.hpp b/include/boost/compute/image/image1d.hpp index 59a5f2557..04f745ea0 100644 --- a/include/boost/compute/image/image1d.hpp +++ b/include/boost/compute/image/image1d.hpp @@ -51,35 +51,39 @@ class image1d : public image_object void *host_ptr = 0) { #ifdef CL_VERSION_1_2 - cl_image_desc desc; - desc.image_type = CL_MEM_OBJECT_IMAGE1D; - desc.image_width = image_width; - desc.image_height = 1; - desc.image_depth = 1; - desc.image_array_size = 0; - desc.image_row_pitch = 0; - desc.image_slice_pitch = 0; - desc.num_mip_levels = 0; - desc.num_samples = 0; - #ifdef CL_VERSION_2_0 - desc.mem_object = 0; - #else - desc.buffer = 0; - #endif - - cl_int error = 0; - - m_mem = clCreateImage( - context, flags, format.get_format_ptr(), &desc, host_ptr, &error - ); - - if(!m_mem){ - BOOST_THROW_EXCEPTION(opencl_error(error)); + if (context.get_version() >= 120) + { + cl_image_desc desc; + desc.image_type = CL_MEM_OBJECT_IMAGE1D; + desc.image_width = image_width; + desc.image_height = 1; + desc.image_depth = 1; + desc.image_array_size = 0; + desc.image_row_pitch = 0; + desc.image_slice_pitch = 0; + desc.num_mip_levels = 0; + desc.num_samples = 0; + #ifdef CL_VERSION_2_0 + desc.mem_object = 0; + #else + desc.buffer = 0; + #endif + + cl_int error = 0; + + m_mem = clCreateImage( + context, flags, format.get_format_ptr(), &desc, host_ptr, &error + ); + + if(!m_mem){ + BOOST_THROW_EXCEPTION(opencl_error(error)); + } } - #else + else + #endif + (void) context; (void) image_width; (void) format; (void) flags; (void) host_ptr; // image1d objects are only supported in OpenCL 1.2 and later BOOST_THROW_EXCEPTION(opencl_error(CL_IMAGE_FORMAT_NOT_SUPPORTED)); - #endif } /// Creates a new image1d as a copy of \p other. @@ -154,6 +158,7 @@ class image1d : public image_object #ifdef CL_VERSION_1_2 return image_object::get_supported_formats(context, CL_MEM_OBJECT_IMAGE1D, flags); #else + (void) context; (void) flags; return std::vector(); #endif } @@ -169,6 +174,7 @@ class image1d : public image_object format, context, CL_MEM_OBJECT_IMAGE1D, flags ); #else + (void) format; (void) context; (void) flags; return false; #endif } diff --git a/include/boost/compute/image/image2d.hpp b/include/boost/compute/image/image2d.hpp index cd903fe91..db3b84018 100644 --- a/include/boost/compute/image/image2d.hpp +++ b/include/boost/compute/image/image2d.hpp @@ -59,38 +59,43 @@ class image2d : public image_object cl_int error = 0; #ifdef CL_VERSION_1_2 - cl_image_desc desc; - desc.image_type = CL_MEM_OBJECT_IMAGE2D; - desc.image_width = image_width; - desc.image_height = image_height; - desc.image_depth = 1; - desc.image_array_size = 0; - desc.image_row_pitch = image_row_pitch; - desc.image_slice_pitch = 0; - desc.num_mip_levels = 0; - desc.num_samples = 0; - #ifdef CL_VERSION_2_0 - desc.mem_object = 0; - #else - desc.buffer = 0; - #endif - - m_mem = clCreateImage(context, - flags, - format.get_format_ptr(), - &desc, - host_ptr, - &error); - #else - m_mem = clCreateImage2D(context, - flags, - format.get_format_ptr(), - image_width, - image_height, - image_row_pitch, - host_ptr, - &error); + if (context.get_version() >= 120) + { + cl_image_desc desc; + desc.image_type = CL_MEM_OBJECT_IMAGE2D; + desc.image_width = image_width; + desc.image_height = image_height; + desc.image_depth = 1; + desc.image_array_size = 0; + desc.image_row_pitch = image_row_pitch; + desc.image_slice_pitch = 0; + desc.num_mip_levels = 0; + desc.num_samples = 0; + #ifdef CL_VERSION_2_0 + desc.mem_object = 0; + #else + desc.buffer = 0; + #endif + + m_mem = clCreateImage(context, + flags, + format.get_format_ptr(), + &desc, + host_ptr, + &error); + } + else #endif + { + m_mem = clCreateImage2D(context, + flags, + format.get_format_ptr(), + image_width, + image_height, + image_row_pitch, + host_ptr, + &error); + } if(!m_mem){ BOOST_THROW_EXCEPTION(opencl_error(error)); @@ -109,38 +114,43 @@ class image2d : public image_object cl_int error = 0; #ifdef CL_VERSION_1_2 - cl_image_desc desc; - desc.image_type = CL_MEM_OBJECT_IMAGE2D; - desc.image_width = image_width; - desc.image_height = image_height; - desc.image_depth = 1; - desc.image_array_size = 0; - desc.image_row_pitch = image_row_pitch; - desc.image_slice_pitch = 0; - desc.num_mip_levels = 0; - desc.num_samples = 0; - #ifdef CL_VERSION_2_0 - desc.mem_object = 0; - #else - desc.buffer = 0; - #endif - - m_mem = clCreateImage(context, - flags, - format.get_format_ptr(), - &desc, - host_ptr, - &error); - #else - m_mem = clCreateImage2D(context, - flags, - format.get_format_ptr(), - image_width, - image_height, - image_row_pitch, - host_ptr, - &error); + if (context.get_version() >= 120) + { + cl_image_desc desc; + desc.image_type = CL_MEM_OBJECT_IMAGE2D; + desc.image_width = image_width; + desc.image_height = image_height; + desc.image_depth = 1; + desc.image_array_size = 0; + desc.image_row_pitch = image_row_pitch; + desc.image_slice_pitch = 0; + desc.num_mip_levels = 0; + desc.num_samples = 0; + #ifdef CL_VERSION_2_0 + desc.mem_object = 0; + #else + desc.buffer = 0; + #endif + + m_mem = clCreateImage(context, + flags, + format.get_format_ptr(), + &desc, + host_ptr, + &error); + } + else #endif + { + m_mem = clCreateImage2D(context, + flags, + format.get_format_ptr(), + image_width, + image_height, + image_row_pitch, + host_ptr, + &error); + } if(!m_mem){ BOOST_THROW_EXCEPTION(opencl_error(error)); diff --git a/include/boost/compute/image/image3d.hpp b/include/boost/compute/image/image3d.hpp index ee3768ac6..c463e2b84 100644 --- a/include/boost/compute/image/image3d.hpp +++ b/include/boost/compute/image/image3d.hpp @@ -52,29 +52,33 @@ class image3d : public image_object cl_int error = 0; #ifdef CL_VERSION_1_2 - cl_image_desc desc; - desc.image_type = CL_MEM_OBJECT_IMAGE3D; - desc.image_width = image_width; - desc.image_height = image_height; - desc.image_depth = image_depth; - desc.image_array_size = 0; - desc.image_row_pitch = image_row_pitch; - desc.image_slice_pitch = image_slice_pitch; - desc.num_mip_levels = 0; - desc.num_samples = 0; - #ifdef CL_VERSION_2_0 - desc.mem_object = 0; - #else - desc.buffer = 0; + if (context.get_version() >= 120) + { + cl_image_desc desc; + desc.image_type = CL_MEM_OBJECT_IMAGE3D; + desc.image_width = image_width; + desc.image_height = image_height; + desc.image_depth = image_depth; + desc.image_array_size = 0; + desc.image_row_pitch = image_row_pitch; + desc.image_slice_pitch = image_slice_pitch; + desc.num_mip_levels = 0; + desc.num_samples = 0; + #ifdef CL_VERSION_2_0 + desc.mem_object = 0; + #else + desc.buffer = 0; + #endif + + m_mem = clCreateImage(context, + flags, + format.get_format_ptr(), + &desc, + host_ptr, + &error); + } + else #endif - - m_mem = clCreateImage(context, - flags, - format.get_format_ptr(), - &desc, - host_ptr, - &error); - #else m_mem = clCreateImage3D(context, flags, format.get_format_ptr(), @@ -85,7 +89,6 @@ class image3d : public image_object image_slice_pitch, host_ptr, &error); - #endif if(!m_mem){ BOOST_THROW_EXCEPTION(opencl_error(error)); @@ -106,29 +109,33 @@ class image3d : public image_object cl_int error = 0; #ifdef CL_VERSION_1_2 - cl_image_desc desc; - desc.image_type = CL_MEM_OBJECT_IMAGE3D; - desc.image_width = image_width; - desc.image_height = image_height; - desc.image_depth = image_depth; - desc.image_array_size = 0; - desc.image_row_pitch = image_row_pitch; - desc.image_slice_pitch = image_slice_pitch; - desc.num_mip_levels = 0; - desc.num_samples = 0; - #ifdef CL_VERSION_2_0 - desc.mem_object = 0; - #else - desc.buffer = 0; + if (context.get_version() >= 120) + { + cl_image_desc desc; + desc.image_type = CL_MEM_OBJECT_IMAGE3D; + desc.image_width = image_width; + desc.image_height = image_height; + desc.image_depth = image_depth; + desc.image_array_size = 0; + desc.image_row_pitch = image_row_pitch; + desc.image_slice_pitch = image_slice_pitch; + desc.num_mip_levels = 0; + desc.num_samples = 0; + #ifdef CL_VERSION_2_0 + desc.mem_object = 0; + #else + desc.buffer = 0; + #endif + + m_mem = clCreateImage(context, + flags, + format.get_format_ptr(), + &desc, + host_ptr, + &error); + } + else #endif - - m_mem = clCreateImage(context, - flags, - format.get_format_ptr(), - &desc, - host_ptr, - &error); - #else m_mem = clCreateImage3D(context, flags, format.get_format_ptr(), @@ -139,7 +146,6 @@ class image3d : public image_object image_slice_pitch, host_ptr, &error); - #endif if(!m_mem){ BOOST_THROW_EXCEPTION(opencl_error(error)); diff --git a/include/boost/compute/image/image_format.hpp b/include/boost/compute/image/image_format.hpp index bd7ab0ad8..1e1801685 100644 --- a/include/boost/compute/image/image_format.hpp +++ b/include/boost/compute/image/image_format.hpp @@ -110,6 +110,18 @@ class image_format return &m_format; } + /// Returns the imgage channel order. + channel_order get_channel_order() const + { + return static_cast(m_format.image_channel_order); + } + + /// Returns the imgage channel data type. + channel_data_type get_channel_data_type() const + { + return static_cast(m_format.image_channel_data_type); + } + /// Returns \c true if \c *this is the same as \p other. bool operator==(const image_format &other) const { diff --git a/include/boost/compute/interop/opengl/gl.hpp b/include/boost/compute/interop/opengl/gl.hpp index c00fb7a8f..2c6f703ef 100644 --- a/include/boost/compute/interop/opengl/gl.hpp +++ b/include/boost/compute/interop/opengl/gl.hpp @@ -14,6 +14,10 @@ #if defined(__APPLE__) #include #else +#if defined(_WIN32) +// Avoid error: 'APIENTRY' : illegal use of type 'void' +#include "windows.h" +#endif #include #endif diff --git a/include/boost/compute/memory_object.hpp b/include/boost/compute/memory_object.hpp index 163c9ffd1..82ca4086d 100644 --- a/include/boost/compute/memory_object.hpp +++ b/include/boost/compute/memory_object.hpp @@ -151,6 +151,15 @@ class memory_object return m_mem != other.m_mem; } + /// Swaps \c this memory object with the \p other + memory_object& swap(memory_object &other) + { + cl_mem temp = m_mem; + m_mem = other.m_mem; + other.m_mem = temp; + return *this; + } + private: #ifdef CL_VERSION_1_1 /// \internal_ diff --git a/include/boost/compute/system.hpp b/include/boost/compute/system.hpp index 5bc1cec34..d08c48a9d 100644 --- a/include/boost/compute/system.hpp +++ b/include/boost/compute/system.hpp @@ -80,13 +80,23 @@ class system return default_device; } - /// Returns the device with \p name. + /// Returns the device with \p device_name contained in its name + /// with \p platform_name contained in its platform name + /// with a matching \p device_type and + /// a minimun version of \p min_version (eg. 1.1 is 101, 1.2 is 102, 2.0 is 200). /// /// \throws no_device_found if no device with \p name is found. - static device find_device(const std::string &name) + static device find_device(const std::string &device_name = std::string(), + const std::string &platform_name = std::string(), + device::device_type device_type = device::all, + uint_ min_version = 100) { BOOST_FOREACH(const device &device, devices()){ - if(device.name() == name){ + std::string platform_name_ = device.platform().name(); + if((platform_name.empty() || platform_name_.find(platform_name.c_str()) != std::string::npos) + && (device_name.empty() || device.name().find(device_name.c_str()) != std::string::npos) + && (device.type() & device_type) + && device.get_version() >= min_version){ return device; } } diff --git a/include/boost/compute/user_event.hpp b/include/boost/compute/user_event.hpp index f67fb73e4..f4489686f 100644 --- a/include/boost/compute/user_event.hpp +++ b/include/boost/compute/user_event.hpp @@ -27,6 +27,11 @@ namespace compute { class user_event : public event { public: + /// Creates a null user_event object. + user_event() : event() + { + } + /// Creates a new user-event object. /// /// \see_opencl_ref{clCreateUserEvent} @@ -72,7 +77,7 @@ class user_event : public event /// Sets the execution status for the user-event. /// /// \see_opencl_ref{clSetUserEventStatus} - void set_status(cl_int execution_status) + void set_status(cl_int execution_status) const { cl_int ret = clSetUserEventStatus(m_event, execution_status); if(ret != CL_SUCCESS){ diff --git a/include/boost/compute/utility/wait_list.hpp b/include/boost/compute/utility/wait_list.hpp index 60ebb2b0d..bfbfe7de3 100644 --- a/include/boost/compute/utility/wait_list.hpp +++ b/include/boost/compute/utility/wait_list.hpp @@ -97,7 +97,7 @@ class wait_list /// Returns the number of events in the wait-list. uint_ size() const { - return m_events.size(); + return static_cast(m_events.size()); } /// Removes all of the events from the wait-list. @@ -128,9 +128,9 @@ class wait_list /// Inserts the event from \p future into the wait-list. template - void insert(const future &future) + void insert(const future &future_) { - insert(future.get_event()); + insert(future_.get_event()); } /// Blocks until all of the events in the wait-list have completed. diff --git a/test/test_command_queue.cpp b/test/test_command_queue.cpp index e9fe4d4a8..f72315a79 100644 --- a/test/test_command_queue.cpp +++ b/test/test_command_queue.cpp @@ -105,11 +105,11 @@ BOOST_AUTO_TEST_CASE(kernel_profiling) size_t global_work_size = 8; boost::compute::event event = - queue.enqueue_nd_range_kernel(kernel, - size_t(1), - &global_work_offset, - &global_work_size, - 0); + queue.enqueue_nd_range_kernel_async(kernel, + size_t(1), + &global_work_offset, + &global_work_size, + 0); // wait until kernel is finished event.wait(); diff --git a/test/test_fill.cpp b/test/test_fill.cpp index c0d517dd3..f6176cf03 100644 --- a/test/test_fill.cpp +++ b/test/test_fill.cpp @@ -105,7 +105,7 @@ BOOST_AUTO_TEST_CASE(check_fill_type) #ifdef CL_VERSION_1_2 BOOST_CHECK_EQUAL( future.get_event().get_command_type(), - device.check_version(1,2) ? CL_COMMAND_FILL_BUFFER : CL_COMMAND_NDRANGE_KERNEL + device.check_version(1,20) ? CL_COMMAND_FILL_BUFFER : CL_COMMAND_NDRANGE_KERNEL ); #else BOOST_CHECK(