From 36b6feb98716e78acab09988f4c4c6ee385eb9c5 Mon Sep 17 00:00:00 2001 From: Michal Babej Date: Thu, 30 Mar 2023 15:25:45 +0300 Subject: [PATCH 1/8] Update OpenCL headers in include/CL this is required for cl_khr_command_buffer --- include/CL/cl.h | 53 +- include/CL/cl.hpp | 12966 ------------------- include/CL/cl_d3d10.h | 38 +- include/CL/cl_d3d11.h | 40 +- include/CL/cl_dx9_media_sharing.h | 158 +- include/CL/cl_dx9_media_sharing_intel.h | 156 +- include/CL/cl_egl.h | 8 +- include/CL/cl_ext.h | 1816 ++- include/CL/cl_ext_intel.h | 716 +- include/CL/cl_gl.h | 47 +- include/CL/cl_gl_ext.h | 26 +- include/CL/cl_icd.h | 318 +- include/CL/cl_layer.h | 61 + include/CL/cl_platform.h | 164 +- include/CL/cl_va_api_media_sharing_intel.h | 75 +- include/CL/opencl.hpp | 342 +- 16 files changed, 2620 insertions(+), 14364 deletions(-) delete mode 100644 include/CL/cl.hpp create mode 100644 include/CL/cl_layer.h diff --git a/include/CL/cl.h b/include/CL/cl.h index 3a5aae486..6c700ab17 100644 --- a/include/CL/cl.h +++ b/include/CL/cl.h @@ -141,6 +141,10 @@ typedef struct _cl_image_desc { #pragma warning( push ) #pragma warning( disable : 4201 ) /* Prevents warning about nameless struct/union in /W4 builds */ #endif +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wc11-extensions" /* Prevents warning about nameless union being C11 extension*/ +#endif #if defined(_MSC_VER) && defined(__STDC__) /* Anonymous unions are not supported in /Za builds */ #else @@ -158,6 +162,9 @@ typedef struct _cl_image_desc { #if defined(_MSC_VER) && !defined(__STDC__) #pragma warning( pop ) #endif +#ifdef __clang__ +#pragma clang diagnostic pop +#endif #endif } cl_image_desc; @@ -1311,11 +1318,11 @@ clLinkProgram(cl_context context, #ifdef CL_VERSION_2_2 -extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_2_2_DEPRECATED cl_int CL_API_CALL +extern CL_API_ENTRY CL_API_PREFIX__VERSION_2_2_DEPRECATED cl_int CL_API_CALL clSetProgramReleaseCallback(cl_program program, void (CL_CALLBACK * pfn_notify)(cl_program program, void * user_data), - void * user_data) CL_EXT_SUFFIX__VERSION_2_2_DEPRECATED; + void * user_data) CL_API_SUFFIX__VERSION_2_2_DEPRECATED; extern CL_API_ENTRY cl_int CL_API_CALL clSetProgramSpecializationConstant(cl_program program, @@ -1857,11 +1864,11 @@ clGetExtensionFunctionAddressForPlatform(cl_platform_id platform, clSetCommandQueueProperty(cl_command_queue command_queue, cl_command_queue_properties properties, cl_bool enable, - cl_command_queue_properties * old_properties) CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED; + cl_command_queue_properties * old_properties) CL_API_SUFFIX__VERSION_1_0_DEPRECATED; #endif /* CL_USE_DEPRECATED_OPENCL_1_0_APIS */ /* Deprecated OpenCL 1.1 APIs */ -extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL +extern CL_API_ENTRY CL_API_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL clCreateImage2D(cl_context context, cl_mem_flags flags, const cl_image_format * image_format, @@ -1869,9 +1876,9 @@ clCreateImage2D(cl_context context, size_t image_height, size_t image_row_pitch, void * host_ptr, - cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_1_DEPRECATED; -extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL +extern CL_API_ENTRY CL_API_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL clCreateImage3D(cl_context context, cl_mem_flags flags, const cl_image_format * image_format, @@ -1881,46 +1888,46 @@ clCreateImage3D(cl_context context, size_t image_row_pitch, size_t image_slice_pitch, void * host_ptr, - cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_1_DEPRECATED; -extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL +extern CL_API_ENTRY CL_API_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL clEnqueueMarker(cl_command_queue command_queue, - cl_event * event) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + cl_event * event) CL_API_SUFFIX__VERSION_1_1_DEPRECATED; -extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL +extern CL_API_ENTRY CL_API_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL clEnqueueWaitForEvents(cl_command_queue command_queue, cl_uint num_events, - const cl_event * event_list) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + const cl_event * event_list) CL_API_SUFFIX__VERSION_1_1_DEPRECATED; -extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL -clEnqueueBarrier(cl_command_queue command_queue) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; +extern CL_API_ENTRY CL_API_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL +clEnqueueBarrier(cl_command_queue command_queue) CL_API_SUFFIX__VERSION_1_1_DEPRECATED; -extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL -clUnloadCompiler(void) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; +extern CL_API_ENTRY CL_API_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL +clUnloadCompiler(void) CL_API_SUFFIX__VERSION_1_1_DEPRECATED; -extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED void * CL_API_CALL -clGetExtensionFunctionAddress(const char * func_name) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; +extern CL_API_ENTRY CL_API_PREFIX__VERSION_1_1_DEPRECATED void * CL_API_CALL +clGetExtensionFunctionAddress(const char * func_name) CL_API_SUFFIX__VERSION_1_1_DEPRECATED; /* Deprecated OpenCL 2.0 APIs */ -extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_2_DEPRECATED cl_command_queue CL_API_CALL +extern CL_API_ENTRY CL_API_PREFIX__VERSION_1_2_DEPRECATED cl_command_queue CL_API_CALL clCreateCommandQueue(cl_context context, cl_device_id device, cl_command_queue_properties properties, - cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_2_DEPRECATED; + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2_DEPRECATED; -extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_2_DEPRECATED cl_sampler CL_API_CALL +extern CL_API_ENTRY CL_API_PREFIX__VERSION_1_2_DEPRECATED cl_sampler CL_API_CALL clCreateSampler(cl_context context, cl_bool normalized_coords, cl_addressing_mode addressing_mode, cl_filter_mode filter_mode, - cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_2_DEPRECATED; + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2_DEPRECATED; -extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_2_DEPRECATED cl_int CL_API_CALL +extern CL_API_ENTRY CL_API_PREFIX__VERSION_1_2_DEPRECATED cl_int CL_API_CALL clEnqueueTask(cl_command_queue command_queue, cl_kernel kernel, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, - cl_event * event) CL_EXT_SUFFIX__VERSION_1_2_DEPRECATED; + cl_event * event) CL_API_SUFFIX__VERSION_1_2_DEPRECATED; #ifdef __cplusplus } diff --git a/include/CL/cl.hpp b/include/CL/cl.hpp deleted file mode 100644 index 3e739e73a..000000000 --- a/include/CL/cl.hpp +++ /dev/null @@ -1,12966 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2008-2015 The Khronos Group Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and/or associated documentation files (the - * "Materials"), to deal in the Materials without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Materials, and to - * permit persons to whom the Materials are furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Materials. - * - * MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS - * KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS - * SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT - * https://www.khronos.org/registry/ - * - * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. - ******************************************************************************/ - -/*! \file - * - * \brief C++ bindings for OpenCL 1.0 (rev 48), OpenCL 1.1 (rev 33) and - * OpenCL 1.2 (rev 15) - * \author Benedict R. Gaster, Laurent Morichetti and Lee Howes - * - * Additions and fixes from: - * Brian Cole, March 3rd 2010 and April 2012 - * Matt Gruenke, April 2012. - * Bruce Merry, February 2013. - * Tom Deakin and Simon McIntosh-Smith, July 2013 - * - * \version 1.2.9 - * \date December 2015 - * - * Optional extension support - * - * cl - * cl_ext_device_fission - * #define USE_CL_DEVICE_FISSION - */ - -/*! \mainpage - * \section intro Introduction - * For many large applications C++ is the language of choice and so it seems - * reasonable to define C++ bindings for OpenCL. - * - * - * The interface is contained with a single C++ header file \em cl.hpp and all - * definitions are contained within the namespace \em cl. There is no additional - * requirement to include \em cl.h and to use either the C++ or original C - * bindings it is enough to simply include \em cl.hpp. - * - * The bindings themselves are lightweight and correspond closely to the - * underlying C API. Using the C++ bindings introduces no additional execution - * overhead. - * - * For detail documentation on the bindings see: - * - * The OpenCL C++ Wrapper API 1.2 (revision 09) - * http://www.khronos.org/registry/cl/specs/opencl-cplusplus-1.2.pdf - * - * \section example Example - * - * The following example shows a general use case for the C++ - * bindings, including support for the optional exception feature and - * also the supplied vector and string classes, see following sections for - * decriptions of these features. - * - * \code - * #define __CL_ENABLE_EXCEPTIONS - * - * #if defined(__APPLE__) || defined(__MACOSX) - * #include - * #else - * #include - * #endif - * #include - * #include - * #include - * - * const char * helloStr = "__kernel void " - * "hello(void) " - * "{ " - * " " - * "} "; - * - * int - * main(void) - * { - * cl_int err = CL_SUCCESS; - * try { - * - * std::vector platforms; - * cl::Platform::get(&platforms); - * if (platforms.size() == 0) { - * std::cout << "Platform size 0\n"; - * return -1; - * } - * - * cl_context_properties properties[] = - * { CL_CONTEXT_PLATFORM, (cl_context_properties)(platforms[0])(), 0}; - * cl::Context context(CL_DEVICE_TYPE_CPU, properties); - * - * std::vector devices = context.getInfo(); - * - * cl::Program::Sources source(1, - * std::make_pair(helloStr,strlen(helloStr))); - * cl::Program program_ = cl::Program(context, source); - * program_.build(devices); - * - * cl::Kernel kernel(program_, "hello", &err); - * - * cl::Event event; - * cl::CommandQueue queue(context, devices[0], 0, &err); - * queue.enqueueNDRangeKernel( - * kernel, - * cl::NullRange, - * cl::NDRange(4,4), - * cl::NullRange, - * NULL, - * &event); - * - * event.wait(); - * } - * catch (cl::Error err) { - * std::cerr - * << "ERROR: " - * << err.what() - * << "(" - * << err.err() - * << ")" - * << std::endl; - * } - * - * return EXIT_SUCCESS; - * } - * - * \endcode - * - */ -#ifndef CL_HPP_ -#define CL_HPP_ - -// The latest version of the OpenCL C++ bindings can be found on GitHub: -// -> https://github.com/KhronosGroup/OpenCL-CLHPP -#pragma message("This version of the OpenCL Host API C++ bindings is deprecated, please use cl2.hpp instead.") - -#ifdef _WIN32 - -#include - -#if defined(USE_DX_INTEROP) -#include -#include -#endif -#endif // _WIN32 - -#if defined(_MSC_VER) -#include -#endif // _MSC_VER - -// -#if defined(USE_CL_DEVICE_FISSION) -#include -#endif - -#if defined(__APPLE__) || defined(__MACOSX) -#include -#else -#include -#endif // !__APPLE__ - -#if (_MSC_VER >= 1700) || (__cplusplus >= 201103L) -#define CL_HPP_RVALUE_REFERENCES_SUPPORTED -#define CL_HPP_CPP11_ATOMICS_SUPPORTED -#include -#endif - -#if (__cplusplus >= 201103L) -#define CL_HPP_NOEXCEPT noexcept -#else -#define CL_HPP_NOEXCEPT -#endif - - -// To avoid accidentally taking ownership of core OpenCL types -// such as cl_kernel constructors are made explicit -// under OpenCL 1.2 -#if defined(CL_VERSION_1_2) && !defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) -#define __CL_EXPLICIT_CONSTRUCTORS explicit -#else // #if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) -#define __CL_EXPLICIT_CONSTRUCTORS -#endif // #if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) - -// Define deprecated prefixes and suffixes to ensure compilation -// in case they are not pre-defined -#if !defined(CL_EXT_PREFIX__VERSION_1_1_DEPRECATED) -#define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED -#endif // #if !defined(CL_EXT_PREFIX__VERSION_1_1_DEPRECATED) -#if !defined(CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED) -#define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED -#endif // #if !defined(CL_EXT_PREFIX__VERSION_1_1_DEPRECATED) - -#if !defined(CL_CALLBACK) -#define CL_CALLBACK -#endif //CL_CALLBACK - -#include -#include -#include - -#if defined(__CL_ENABLE_EXCEPTIONS) -#include -#endif // #if defined(__CL_ENABLE_EXCEPTIONS) - -#if !defined(__NO_STD_VECTOR) -#include -#endif - -#if !defined(__NO_STD_STRING) -#include -#endif - -#if defined(__ANDROID__) || defined(linux) || defined(__APPLE__) || defined(__MACOSX) -#include -#endif // linux - -#include - -// Compiler specific weak linking -#ifndef CL_WEAK_ATTRIB_PREFIX -// C++17: use inline variables/functions -#if __cplusplus >= 201703L -#define CL_USE_INLINE -#endif - -#ifdef CL_USE_INLINE -#define CL_WEAK_ATTRIB_PREFIX inline -#define CL_WEAK_ATTRIB_SUFFIX -#elif _WIN32 -#define CL_WEAK_ATTRIB_PREFIX __declspec(selectany) -#define CL_WEAK_ATTRIB_SUFFIX -#else // GCC, CLANG, etc. -#define CL_WEAK_ATTRIB_PREFIX -#define CL_WEAK_ATTRIB_SUFFIX __attribute__((weak)) -#endif // CL_USE_INLINE - -#endif // CL_WEAK_ATTRIB_PREFIX - -/*! \namespace cl - * - * \brief The OpenCL C++ bindings are defined within this namespace. - * - */ -namespace cl { - -class Memory; - -/** - * Deprecated APIs for 1.2 - */ -#if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) || (defined(CL_VERSION_1_1) && !defined(CL_VERSION_1_2)) -#define __INIT_CL_EXT_FCN_PTR(name) \ - if(!pfn_##name) { \ - pfn_##name = (PFN_##name) \ - clGetExtensionFunctionAddress(#name); \ - if(!pfn_##name) { \ - } \ - } -#endif // #if defined(CL_VERSION_1_1) - -#if defined(CL_VERSION_1_2) -#define __INIT_CL_EXT_FCN_PTR_PLATFORM(platform, name) \ - if(!pfn_##name) { \ - pfn_##name = (PFN_##name) \ - clGetExtensionFunctionAddressForPlatform(platform, #name); \ - if(!pfn_##name) { \ - } \ - } -#endif // #if defined(CL_VERSION_1_1) - -class Program; -class Device; -class Context; -class CommandQueue; -class Memory; -class Buffer; - -#if defined(__CL_ENABLE_EXCEPTIONS) -/*! \brief Exception class - * - * This may be thrown by API functions when __CL_ENABLE_EXCEPTIONS is defined. - */ -class Error : public std::exception -{ -private: - cl_int err_; - const char * errStr_; -public: - /*! \brief Create a new CL error exception for a given error code - * and corresponding message. - * - * \param err error code value. - * - * \param errStr a descriptive string that must remain in scope until - * handling of the exception has concluded. If set, it - * will be returned by what(). - */ - Error(cl_int err, const char * errStr = NULL) : err_(err), errStr_(errStr) - {} - - ~Error() throw() {} - - /*! \brief Get error string associated with exception - * - * \return A memory pointer to the error message string. - */ - virtual const char * what() const throw () - { - if (errStr_ == NULL) { - return "empty"; - } - else { - return errStr_; - } - } - - /*! \brief Get error code associated with exception - * - * \return The error code. - */ - cl_int err(void) const { return err_; } -}; - -#define __ERR_STR(x) #x -#else -#define __ERR_STR(x) NULL -#endif // __CL_ENABLE_EXCEPTIONS - - -namespace detail -{ -#if defined(__CL_ENABLE_EXCEPTIONS) -static inline cl_int errHandler ( - cl_int err, - const char * errStr = NULL) -{ - if (err != CL_SUCCESS) { - throw Error(err, errStr); - } - return err; -} -#else -static inline cl_int errHandler (cl_int err, const char * errStr = NULL) -{ - (void) errStr; // suppress unused variable warning - return err; -} -#endif // __CL_ENABLE_EXCEPTIONS -} - - - -//! \cond DOXYGEN_DETAIL -#if !defined(__CL_USER_OVERRIDE_ERROR_STRINGS) -#define __GET_DEVICE_INFO_ERR __ERR_STR(clGetDeviceInfo) -#define __GET_PLATFORM_INFO_ERR __ERR_STR(clGetPlatformInfo) -#define __GET_DEVICE_IDS_ERR __ERR_STR(clGetDeviceIDs) -#define __GET_PLATFORM_IDS_ERR __ERR_STR(clGetPlatformIDs) -#define __GET_CONTEXT_INFO_ERR __ERR_STR(clGetContextInfo) -#define __GET_EVENT_INFO_ERR __ERR_STR(clGetEventInfo) -#define __GET_EVENT_PROFILE_INFO_ERR __ERR_STR(clGetEventProfileInfo) -#define __GET_MEM_OBJECT_INFO_ERR __ERR_STR(clGetMemObjectInfo) -#define __GET_IMAGE_INFO_ERR __ERR_STR(clGetImageInfo) -#define __GET_SAMPLER_INFO_ERR __ERR_STR(clGetSamplerInfo) -#define __GET_KERNEL_INFO_ERR __ERR_STR(clGetKernelInfo) -#if defined(CL_VERSION_1_2) -#define __GET_KERNEL_ARG_INFO_ERR __ERR_STR(clGetKernelArgInfo) -#endif // #if defined(CL_VERSION_1_2) -#define __GET_KERNEL_WORK_GROUP_INFO_ERR __ERR_STR(clGetKernelWorkGroupInfo) -#define __GET_PROGRAM_INFO_ERR __ERR_STR(clGetProgramInfo) -#define __GET_PROGRAM_BUILD_INFO_ERR __ERR_STR(clGetProgramBuildInfo) -#define __GET_COMMAND_QUEUE_INFO_ERR __ERR_STR(clGetCommandQueueInfo) - -#define __CREATE_CONTEXT_ERR __ERR_STR(clCreateContext) -#define __CREATE_CONTEXT_FROM_TYPE_ERR __ERR_STR(clCreateContextFromType) -#define __GET_SUPPORTED_IMAGE_FORMATS_ERR __ERR_STR(clGetSupportedImageFormats) - -#define __CREATE_BUFFER_ERR __ERR_STR(clCreateBuffer) -#define __COPY_ERR __ERR_STR(cl::copy) -#define __CREATE_SUBBUFFER_ERR __ERR_STR(clCreateSubBuffer) -#define __CREATE_GL_BUFFER_ERR __ERR_STR(clCreateFromGLBuffer) -#define __CREATE_GL_RENDER_BUFFER_ERR __ERR_STR(clCreateFromGLBuffer) -#define __GET_GL_OBJECT_INFO_ERR __ERR_STR(clGetGLObjectInfo) -#if defined(CL_VERSION_1_2) -#define __CREATE_IMAGE_ERR __ERR_STR(clCreateImage) -#define __CREATE_GL_TEXTURE_ERR __ERR_STR(clCreateFromGLTexture) -#define __IMAGE_DIMENSION_ERR __ERR_STR(Incorrect image dimensions) -#endif // #if defined(CL_VERSION_1_2) -#define __CREATE_SAMPLER_ERR __ERR_STR(clCreateSampler) -#define __SET_MEM_OBJECT_DESTRUCTOR_CALLBACK_ERR __ERR_STR(clSetMemObjectDestructorCallback) - -#define __CREATE_USER_EVENT_ERR __ERR_STR(clCreateUserEvent) -#define __SET_USER_EVENT_STATUS_ERR __ERR_STR(clSetUserEventStatus) -#define __SET_EVENT_CALLBACK_ERR __ERR_STR(clSetEventCallback) -#define __WAIT_FOR_EVENTS_ERR __ERR_STR(clWaitForEvents) - -#define __CREATE_KERNEL_ERR __ERR_STR(clCreateKernel) -#define __SET_KERNEL_ARGS_ERR __ERR_STR(clSetKernelArg) -#define __CREATE_PROGRAM_WITH_SOURCE_ERR __ERR_STR(clCreateProgramWithSource) -#define __CREATE_PROGRAM_WITH_BINARY_ERR __ERR_STR(clCreateProgramWithBinary) -#if defined(CL_VERSION_1_2) -#define __CREATE_PROGRAM_WITH_BUILT_IN_KERNELS_ERR __ERR_STR(clCreateProgramWithBuiltInKernels) -#endif // #if defined(CL_VERSION_1_2) -#define __BUILD_PROGRAM_ERR __ERR_STR(clBuildProgram) -#if defined(CL_VERSION_1_2) -#define __COMPILE_PROGRAM_ERR __ERR_STR(clCompileProgram) -#define __LINK_PROGRAM_ERR __ERR_STR(clLinkProgram) -#endif // #if defined(CL_VERSION_1_2) -#define __CREATE_KERNELS_IN_PROGRAM_ERR __ERR_STR(clCreateKernelsInProgram) - -#define __CREATE_COMMAND_QUEUE_ERR __ERR_STR(clCreateCommandQueue) -#define __SET_COMMAND_QUEUE_PROPERTY_ERR __ERR_STR(clSetCommandQueueProperty) -#define __ENQUEUE_READ_BUFFER_ERR __ERR_STR(clEnqueueReadBuffer) -#define __ENQUEUE_READ_BUFFER_RECT_ERR __ERR_STR(clEnqueueReadBufferRect) -#define __ENQUEUE_WRITE_BUFFER_ERR __ERR_STR(clEnqueueWriteBuffer) -#define __ENQUEUE_WRITE_BUFFER_RECT_ERR __ERR_STR(clEnqueueWriteBufferRect) -#define __ENQEUE_COPY_BUFFER_ERR __ERR_STR(clEnqueueCopyBuffer) -#define __ENQEUE_COPY_BUFFER_RECT_ERR __ERR_STR(clEnqueueCopyBufferRect) -#define __ENQUEUE_FILL_BUFFER_ERR __ERR_STR(clEnqueueFillBuffer) -#define __ENQUEUE_READ_IMAGE_ERR __ERR_STR(clEnqueueReadImage) -#define __ENQUEUE_WRITE_IMAGE_ERR __ERR_STR(clEnqueueWriteImage) -#define __ENQUEUE_COPY_IMAGE_ERR __ERR_STR(clEnqueueCopyImage) -#define __ENQUEUE_FILL_IMAGE_ERR __ERR_STR(clEnqueueFillImage) -#define __ENQUEUE_COPY_IMAGE_TO_BUFFER_ERR __ERR_STR(clEnqueueCopyImageToBuffer) -#define __ENQUEUE_COPY_BUFFER_TO_IMAGE_ERR __ERR_STR(clEnqueueCopyBufferToImage) -#define __ENQUEUE_MAP_BUFFER_ERR __ERR_STR(clEnqueueMapBuffer) -#define __ENQUEUE_MAP_IMAGE_ERR __ERR_STR(clEnqueueMapImage) -#define __ENQUEUE_UNMAP_MEM_OBJECT_ERR __ERR_STR(clEnqueueUnMapMemObject) -#define __ENQUEUE_NDRANGE_KERNEL_ERR __ERR_STR(clEnqueueNDRangeKernel) -#define __ENQUEUE_TASK_ERR __ERR_STR(clEnqueueTask) -#define __ENQUEUE_NATIVE_KERNEL __ERR_STR(clEnqueueNativeKernel) -#if defined(CL_VERSION_1_2) -#define __ENQUEUE_MIGRATE_MEM_OBJECTS_ERR __ERR_STR(clEnqueueMigrateMemObjects) -#endif // #if defined(CL_VERSION_1_2) - -#define __ENQUEUE_ACQUIRE_GL_ERR __ERR_STR(clEnqueueAcquireGLObjects) -#define __ENQUEUE_RELEASE_GL_ERR __ERR_STR(clEnqueueReleaseGLObjects) - - -#define __RETAIN_ERR __ERR_STR(Retain Object) -#define __RELEASE_ERR __ERR_STR(Release Object) -#define __FLUSH_ERR __ERR_STR(clFlush) -#define __FINISH_ERR __ERR_STR(clFinish) -#define __VECTOR_CAPACITY_ERR __ERR_STR(Vector capacity error) - -/** - * CL 1.2 version that uses device fission. - */ -#if defined(CL_VERSION_1_2) -#define __CREATE_SUB_DEVICES __ERR_STR(clCreateSubDevices) -#else -#define __CREATE_SUB_DEVICES __ERR_STR(clCreateSubDevicesEXT) -#endif // #if defined(CL_VERSION_1_2) - -/** - * Deprecated APIs for 1.2 - */ -#if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) || (defined(CL_VERSION_1_1) && !defined(CL_VERSION_1_2)) -#define __ENQUEUE_MARKER_ERR __ERR_STR(clEnqueueMarker) -#define __ENQUEUE_WAIT_FOR_EVENTS_ERR __ERR_STR(clEnqueueWaitForEvents) -#define __ENQUEUE_BARRIER_ERR __ERR_STR(clEnqueueBarrier) -#define __UNLOAD_COMPILER_ERR __ERR_STR(clUnloadCompiler) -#define __CREATE_GL_TEXTURE_2D_ERR __ERR_STR(clCreateFromGLTexture2D) -#define __CREATE_GL_TEXTURE_3D_ERR __ERR_STR(clCreateFromGLTexture3D) -#define __CREATE_IMAGE2D_ERR __ERR_STR(clCreateImage2D) -#define __CREATE_IMAGE3D_ERR __ERR_STR(clCreateImage3D) -#endif // #if defined(CL_VERSION_1_1) - -#endif // __CL_USER_OVERRIDE_ERROR_STRINGS -//! \endcond - -/** - * CL 1.2 marker and barrier commands - */ -#if defined(CL_VERSION_1_2) -#define __ENQUEUE_MARKER_WAIT_LIST_ERR __ERR_STR(clEnqueueMarkerWithWaitList) -#define __ENQUEUE_BARRIER_WAIT_LIST_ERR __ERR_STR(clEnqueueBarrierWithWaitList) -#endif // #if defined(CL_VERSION_1_2) - -#if !defined(__USE_DEV_STRING) && !defined(__NO_STD_STRING) -typedef std::string STRING_CLASS; -#elif !defined(__USE_DEV_STRING) - -/*! \class string - * \brief Simple string class, that provides a limited subset of std::string - * functionality but avoids many of the issues that come with that class. - - * \note Deprecated. Please use std::string as default or - * re-define the string class to match the std::string - * interface by defining STRING_CLASS - */ -class CL_EXT_PREFIX__VERSION_1_1_DEPRECATED string -{ -private: - ::size_t size_; - char * str_; -public: - //! \brief Constructs an empty string, allocating no memory. - string(void) : size_(0), str_(NULL) - { - } - - /*! \brief Constructs a string populated from an arbitrary value of - * specified size. - * - * An extra '\0' is added, in case none was contained in str. - * - * \param str the initial value of the string instance. Note that '\0' - * characters receive no special treatment. If NULL, - * the string is left empty, with a size of 0. - * - * \param size the number of characters to copy from str. - */ - string(const char * str, ::size_t size) : - size_(size), - str_(NULL) - { - if( size > 0 ) { - str_ = new char[size_+1]; - if (str_ != NULL) { - memcpy(str_, str, size_ * sizeof(char)); - str_[size_] = '\0'; - } - else { - size_ = 0; - } - } - } - - /*! \brief Constructs a string populated from a null-terminated value. - * - * \param str the null-terminated initial value of the string instance. - * If NULL, the string is left empty, with a size of 0. - */ - string(const char * str) : - size_(0), - str_(NULL) - { - if( str ) { - size_= ::strlen(str); - } - if( size_ > 0 ) { - str_ = new char[size_ + 1]; - if (str_ != NULL) { - memcpy(str_, str, (size_ + 1) * sizeof(char)); - } - } - } - - void resize( ::size_t n ) - { - if( size_ == n ) { - return; - } - if (n == 0) { - if( str_ ) { - delete [] str_; - } - str_ = NULL; - size_ = 0; - } - else { - char *newString = new char[n + 1]; - ::size_t copySize = n; - if( size_ < n ) { - copySize = size_; - } - size_ = n; - - if(str_) { - memcpy(newString, str_, (copySize + 1) * sizeof(char)); - } - if( copySize < size_ ) { - memset(newString + copySize, 0, size_ - copySize); - } - newString[size_] = '\0'; - - delete [] str_; - str_ = newString; - } - } - - const char& operator[] ( ::size_t pos ) const - { - return str_[pos]; - } - - char& operator[] ( ::size_t pos ) - { - return str_[pos]; - } - - /*! \brief Copies the value of another string to this one. - * - * \param rhs the string to copy. - * - * \returns a reference to the modified instance. - */ - string& operator=(const string& rhs) - { - if (this == &rhs) { - return *this; - } - - if( str_ != NULL ) { - delete [] str_; - str_ = NULL; - size_ = 0; - } - - if (rhs.size_ == 0 || rhs.str_ == NULL) { - str_ = NULL; - size_ = 0; - } - else { - str_ = new char[rhs.size_ + 1]; - size_ = rhs.size_; - - if (str_ != NULL) { - memcpy(str_, rhs.str_, (size_ + 1) * sizeof(char)); - } - else { - size_ = 0; - } - } - - return *this; - } - - /*! \brief Constructs a string by copying the value of another instance. - * - * \param rhs the string to copy. - */ - string(const string& rhs) : - size_(0), - str_(NULL) - { - *this = rhs; - } - - //! \brief Destructor - frees memory used to hold the current value. - ~string() - { - delete[] str_; - str_ = NULL; - } - - //! \brief Queries the length of the string, excluding any added '\0's. - ::size_t size(void) const { return size_; } - - //! \brief Queries the length of the string, excluding any added '\0's. - ::size_t length(void) const { return size(); } - - /*! \brief Returns a pointer to the private copy held by this instance, - * or "" if empty/unset. - */ - const char * c_str(void) const { return (str_) ? str_ : "";} -} CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; -typedef cl::string STRING_CLASS; -#endif // #elif !defined(__USE_DEV_STRING) - -#if !defined(__USE_DEV_VECTOR) && !defined(__NO_STD_VECTOR) -#define VECTOR_CLASS std::vector -#elif !defined(__USE_DEV_VECTOR) -#define VECTOR_CLASS cl::vector - -#if !defined(__MAX_DEFAULT_VECTOR_SIZE) -#define __MAX_DEFAULT_VECTOR_SIZE 10 -#endif - -/*! \class vector - * \brief Fixed sized vector implementation that mirroring - * - * \note Deprecated. Please use std::vector as default or - * re-define the vector class to match the std::vector - * interface by defining VECTOR_CLASS - - * \note Not recommended for use with custom objects as - * current implementation will construct N elements - * - * std::vector functionality. - * \brief Fixed sized vector compatible with std::vector. - * - * \note - * This differs from std::vector<> not just in memory allocation, - * but also in terms of when members are constructed, destroyed, - * and assigned instead of being copy constructed. - * - * \param T type of element contained in the vector. - * - * \param N maximum size of the vector. - */ -template -class CL_EXT_PREFIX__VERSION_1_1_DEPRECATED vector -{ -private: - T data_[N]; - unsigned int size_; - -public: - //! \brief Constructs an empty vector with no memory allocated. - vector() : - size_(static_cast(0)) - {} - - //! \brief Deallocates the vector's memory and destroys all of its elements. - ~vector() - { - clear(); - } - - //! \brief Returns the number of elements currently contained. - unsigned int size(void) const - { - return size_; - } - - /*! \brief Empties the vector of all elements. - * \note - * This does not deallocate memory but will invoke destructors - * on contained elements. - */ - void clear() - { - while(!empty()) { - pop_back(); - } - } - - /*! \brief Appends an element after the last valid element. - * Calling this on a vector that has reached capacity will throw an - * exception if exceptions are enabled. - */ - void push_back (const T& x) - { - if (size() < N) { - new (&data_[size_]) T(x); - size_++; - } else { - detail::errHandler(CL_MEM_OBJECT_ALLOCATION_FAILURE, __VECTOR_CAPACITY_ERR); - } - } - - /*! \brief Removes the last valid element from the vector. - * Calling this on an empty vector will throw an exception - * if exceptions are enabled. - */ - void pop_back(void) - { - if (size_ != 0) { - --size_; - data_[size_].~T(); - } else { - detail::errHandler(CL_MEM_OBJECT_ALLOCATION_FAILURE, __VECTOR_CAPACITY_ERR); - } - } - - /*! \brief Constructs with a value copied from another. - * - * \param vec the vector to copy. - */ - vector(const vector& vec) : - size_(vec.size_) - { - if (size_ != 0) { - assign(vec.begin(), vec.end()); - } - } - - /*! \brief Constructs with a specified number of initial elements. - * - * \param size number of initial elements. - * - * \param val value of initial elements. - */ - vector(unsigned int size, const T& val = T()) : - size_(0) - { - for (unsigned int i = 0; i < size; i++) { - push_back(val); - } - } - - /*! \brief Overwrites the current content with that copied from another - * instance. - * - * \param rhs vector to copy. - * - * \returns a reference to this. - */ - vector& operator=(const vector& rhs) - { - if (this == &rhs) { - return *this; - } - - if (rhs.size_ != 0) { - assign(rhs.begin(), rhs.end()); - } else { - clear(); - } - - return *this; - } - - /*! \brief Tests equality against another instance. - * - * \param vec the vector against which to compare. - */ - bool operator==(vector &vec) - { - if (size() != vec.size()) { - return false; - } - - for( unsigned int i = 0; i < size(); ++i ) { - if( operator[](i) != vec[i] ) { - return false; - } - } - return true; - } - - //! \brief Conversion operator to T*. - operator T* () { return data_; } - - //! \brief Conversion operator to const T*. - operator const T* () const { return data_; } - - //! \brief Tests whether this instance has any elements. - bool empty (void) const - { - return size_==0; - } - - //! \brief Returns the maximum number of elements this instance can hold. - unsigned int max_size (void) const - { - return N; - } - - //! \brief Returns the maximum number of elements this instance can hold. - unsigned int capacity () const - { - return N; - } - - //! \brief Resizes the vector to the given size - void resize(unsigned int newSize, T fill = T()) - { - if (newSize > N) - { - detail::errHandler(CL_MEM_OBJECT_ALLOCATION_FAILURE, __VECTOR_CAPACITY_ERR); - } - else - { - while (size_ < newSize) - { - new (&data_[size_]) T(fill); - size_++; - } - while (size_ > newSize) - { - --size_; - data_[size_].~T(); - } - } - } - - /*! \brief Returns a reference to a given element. - * - * \param index which element to access. * - * \note - * The caller is responsible for ensuring index is >= 0 and < size(). - */ - T& operator[](int index) - { - return data_[index]; - } - - /*! \brief Returns a const reference to a given element. - * - * \param index which element to access. - * - * \note - * The caller is responsible for ensuring index is >= 0 and < size(). - */ - const T& operator[](int index) const - { - return data_[index]; - } - - /*! \brief Assigns elements of the vector based on a source iterator range. - * - * \param start Beginning iterator of source range - * \param end Enditerator of source range - * - * \note - * Will throw an exception if exceptions are enabled and size exceeded. - */ - template - void assign(I start, I end) - { - clear(); - while(start != end) { - push_back(*start); - start++; - } - } - - /*! \class iterator - * \brief Const iterator class for vectors - */ - class iterator - { - private: - const vector *vec_; - int index_; - - /** - * Internal iterator constructor to capture reference - * to the vector it iterates over rather than taking - * the vector by copy. - */ - iterator (const vector &vec, int index) : - vec_(&vec) - { - if( !vec.empty() ) { - index_ = index; - } else { - index_ = -1; - } - } - - public: - iterator(void) : - index_(-1), - vec_(NULL) - { - } - - iterator(const iterator& rhs) : - vec_(rhs.vec_), - index_(rhs.index_) - { - } - - ~iterator(void) {} - - static iterator begin(const cl::vector &vec) - { - iterator i(vec, 0); - - return i; - } - - static iterator end(const cl::vector &vec) - { - iterator i(vec, vec.size()); - - return i; - } - - bool operator==(iterator i) - { - return ((vec_ == i.vec_) && - (index_ == i.index_)); - } - - bool operator!=(iterator i) - { - return (!(*this==i)); - } - - iterator& operator++() - { - ++index_; - return *this; - } - - iterator operator++(int) - { - iterator retVal(*this); - ++index_; - return retVal; - } - - iterator& operator--() - { - --index_; - return *this; - } - - iterator operator--(int) - { - iterator retVal(*this); - --index_; - return retVal; - } - - const T& operator *() const - { - return (*vec_)[index_]; - } - }; - - iterator begin(void) - { - return iterator::begin(*this); - } - - iterator begin(void) const - { - return iterator::begin(*this); - } - - iterator end(void) - { - return iterator::end(*this); - } - - iterator end(void) const - { - return iterator::end(*this); - } - - T& front(void) - { - return data_[0]; - } - - T& back(void) - { - return data_[size_]; - } - - const T& front(void) const - { - return data_[0]; - } - - const T& back(void) const - { - return data_[size_-1]; - } -} CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; -#endif // #if !defined(__USE_DEV_VECTOR) && !defined(__NO_STD_VECTOR) - - - - - -namespace detail { -#define __DEFAULT_NOT_INITIALIZED 1 -#define __DEFAULT_BEING_INITIALIZED 2 -#define __DEFAULT_INITIALIZED 4 - - /* - * Compare and exchange primitives are needed for handling of defaults - */ - -#ifdef CL_HPP_CPP11_ATOMICS_SUPPORTED - inline int compare_exchange(std::atomic * dest, int exchange, int comparand) -#else // !CL_HPP_CPP11_ATOMICS_SUPPORTED - inline int compare_exchange(volatile int * dest, int exchange, int comparand) -#endif // !CL_HPP_CPP11_ATOMICS_SUPPORTED - { -#ifdef CL_HPP_CPP11_ATOMICS_SUPPORTED - std::atomic_compare_exchange_strong(dest, &comparand, exchange); - return comparand; -#elif _MSC_VER - return (int)(_InterlockedCompareExchange( - (volatile long*)dest, - (long)exchange, - (long)comparand)); -#else // !_MSC_VER && !CL_HPP_CPP11_ATOMICS_SUPPORTED - return (__sync_val_compare_and_swap( - dest, - comparand, - exchange)); -#endif // !CL_HPP_CPP11_ATOMICS_SUPPORTED - } - - inline void fence() { -#ifdef CL_HPP_CPP11_ATOMICS_SUPPORTED - std::atomic_thread_fence(std::memory_order_seq_cst); -#elif _MSC_VER // !CL_HPP_CPP11_ATOMICS_SUPPORTED - _ReadWriteBarrier(); -#else // !_MSC_VER && !CL_HPP_CPP11_ATOMICS_SUPPORTED - __sync_synchronize(); -#endif // !CL_HPP_CPP11_ATOMICS_SUPPORTED - } -} // namespace detail - - -/*! \brief class used to interface between C++ and - * OpenCL C calls that require arrays of size_t values, whose - * size is known statically. - */ -template -class size_t -{ -private: - ::size_t data_[N]; - -public: - //! \brief Initialize size_t to all 0s - size_t() - { - for( int i = 0; i < N; ++i ) { - data_[i] = 0; - } - } - - ::size_t& operator[](int index) - { - return data_[index]; - } - - const ::size_t& operator[](int index) const - { - return data_[index]; - } - - //! \brief Conversion operator to T*. - operator ::size_t* () { return data_; } - - //! \brief Conversion operator to const T*. - operator const ::size_t* () const { return data_; } -}; - -namespace detail { - -// Generic getInfoHelper. The final parameter is used to guide overload -// resolution: the actual parameter passed is an int, which makes this -// a worse conversion sequence than a specialization that declares the -// parameter as an int. -template -inline cl_int getInfoHelper(Functor f, cl_uint name, T* param, long) -{ - return f(name, sizeof(T), param, NULL); -} - -// Specialized getInfoHelper for VECTOR_CLASS params -template -inline cl_int getInfoHelper(Func f, cl_uint name, VECTOR_CLASS* param, long) -{ - ::size_t required; - cl_int err = f(name, 0, NULL, &required); - if (err != CL_SUCCESS) { - return err; - } - - T* value = (T*) alloca(required); - err = f(name, required, value, NULL); - if (err != CL_SUCCESS) { - return err; - } - - param->assign(&value[0], &value[required/sizeof(T)]); - return CL_SUCCESS; -} - -/* Specialization for reference-counted types. This depends on the - * existence of Wrapper::cl_type, and none of the other types having the - * cl_type member. Note that simplify specifying the parameter as Wrapper - * does not work, because when using a derived type (e.g. Context) the generic - * template will provide a better match. - */ -template -inline cl_int getInfoHelper(Func f, cl_uint name, VECTOR_CLASS* param, int, typename T::cl_type = 0) -{ - ::size_t required; - cl_int err = f(name, 0, NULL, &required); - if (err != CL_SUCCESS) { - return err; - } - - typename T::cl_type * value = (typename T::cl_type *) alloca(required); - err = f(name, required, value, NULL); - if (err != CL_SUCCESS) { - return err; - } - - ::size_t elements = required / sizeof(typename T::cl_type); - param->assign(&value[0], &value[elements]); - for (::size_t i = 0; i < elements; i++) - { - if (value[i] != NULL) - { - err = (*param)[i].retain(); - if (err != CL_SUCCESS) { - return err; - } - } - } - return CL_SUCCESS; -} - -// Specialized for getInfo -template -inline cl_int getInfoHelper(Func f, cl_uint name, VECTOR_CLASS* param, int) -{ - cl_int err = f(name, param->size() * sizeof(char *), &(*param)[0], NULL); - - if (err != CL_SUCCESS) { - return err; - } - - return CL_SUCCESS; -} - -// Specialized GetInfoHelper for STRING_CLASS params -template -inline cl_int getInfoHelper(Func f, cl_uint name, STRING_CLASS* param, long) -{ -#if defined(__NO_STD_VECTOR) || defined(__NO_STD_STRING) - ::size_t required; - cl_int err = f(name, 0, NULL, &required); - if (err != CL_SUCCESS) { - return err; - } - - char* value = (char*)alloca(required); - err = f(name, required, value, NULL); - if (err != CL_SUCCESS) { - return err; - } - - *param = value; - return CL_SUCCESS; -#else - ::size_t required; - cl_int err = f(name, 0, NULL, &required); - if (err != CL_SUCCESS) { - return err; - } - - if (required > 0) { - // std::string has a constant data member - // a char vector does not - VECTOR_CLASS value(required); - err = f(name, required, value.data(), NULL); - if (err != CL_SUCCESS) { - return err; - } - if (param) { - param->assign(value.begin(), value.end() - 1u); - } - } - else if (param) { - param->assign(""); - } -#endif - return CL_SUCCESS; -} - -// Specialized GetInfoHelper for cl::size_t params -template -inline cl_int getInfoHelper(Func f, cl_uint name, size_t* param, long) -{ - ::size_t required; - cl_int err = f(name, 0, NULL, &required); - if (err != CL_SUCCESS) { - return err; - } - - ::size_t* value = (::size_t*) alloca(required); - err = f(name, required, value, NULL); - if (err != CL_SUCCESS) { - return err; - } - - for(int i = 0; i < N; ++i) { - (*param)[i] = value[i]; - } - - return CL_SUCCESS; -} - -template struct ReferenceHandler; - -/* Specialization for reference-counted types. This depends on the - * existence of Wrapper::cl_type, and none of the other types having the - * cl_type member. Note that simplify specifying the parameter as Wrapper - * does not work, because when using a derived type (e.g. Context) the generic - * template will provide a better match. - */ -template -inline cl_int getInfoHelper(Func f, cl_uint name, T* param, int, typename T::cl_type = 0) -{ - typename T::cl_type value; - cl_int err = f(name, sizeof(value), &value, NULL); - if (err != CL_SUCCESS) { - return err; - } - *param = value; - if (value != NULL) - { - err = param->retain(); - if (err != CL_SUCCESS) { - return err; - } - } - return CL_SUCCESS; -} - -#define __PARAM_NAME_INFO_1_0(F) \ - F(cl_platform_info, CL_PLATFORM_PROFILE, STRING_CLASS) \ - F(cl_platform_info, CL_PLATFORM_VERSION, STRING_CLASS) \ - F(cl_platform_info, CL_PLATFORM_NAME, STRING_CLASS) \ - F(cl_platform_info, CL_PLATFORM_VENDOR, STRING_CLASS) \ - F(cl_platform_info, CL_PLATFORM_EXTENSIONS, STRING_CLASS) \ - \ - F(cl_device_info, CL_DEVICE_TYPE, cl_device_type) \ - F(cl_device_info, CL_DEVICE_VENDOR_ID, cl_uint) \ - F(cl_device_info, CL_DEVICE_MAX_COMPUTE_UNITS, cl_uint) \ - F(cl_device_info, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, cl_uint) \ - F(cl_device_info, CL_DEVICE_MAX_WORK_GROUP_SIZE, ::size_t) \ - F(cl_device_info, CL_DEVICE_MAX_WORK_ITEM_SIZES, VECTOR_CLASS< ::size_t>) \ - F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, cl_uint) \ - F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, cl_uint) \ - F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, cl_uint) \ - F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG, cl_uint) \ - F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, cl_uint) \ - F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, cl_uint) \ - F(cl_device_info, CL_DEVICE_MAX_CLOCK_FREQUENCY, cl_uint) \ - F(cl_device_info, CL_DEVICE_ADDRESS_BITS, cl_uint) \ - F(cl_device_info, CL_DEVICE_MAX_READ_IMAGE_ARGS, cl_uint) \ - F(cl_device_info, CL_DEVICE_MAX_WRITE_IMAGE_ARGS, cl_uint) \ - F(cl_device_info, CL_DEVICE_MAX_MEM_ALLOC_SIZE, cl_ulong) \ - F(cl_device_info, CL_DEVICE_IMAGE2D_MAX_WIDTH, ::size_t) \ - F(cl_device_info, CL_DEVICE_IMAGE2D_MAX_HEIGHT, ::size_t) \ - F(cl_device_info, CL_DEVICE_IMAGE3D_MAX_WIDTH, ::size_t) \ - F(cl_device_info, CL_DEVICE_IMAGE3D_MAX_HEIGHT, ::size_t) \ - F(cl_device_info, CL_DEVICE_IMAGE3D_MAX_DEPTH, ::size_t) \ - F(cl_device_info, CL_DEVICE_IMAGE_SUPPORT, cl_bool) \ - F(cl_device_info, CL_DEVICE_MAX_PARAMETER_SIZE, ::size_t) \ - F(cl_device_info, CL_DEVICE_MAX_SAMPLERS, cl_uint) \ - F(cl_device_info, CL_DEVICE_MEM_BASE_ADDR_ALIGN, cl_uint) \ - F(cl_device_info, CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE, cl_uint) \ - F(cl_device_info, CL_DEVICE_SINGLE_FP_CONFIG, cl_device_fp_config) \ - F(cl_device_info, CL_DEVICE_DOUBLE_FP_CONFIG, cl_device_fp_config) \ - F(cl_device_info, CL_DEVICE_HALF_FP_CONFIG, cl_device_fp_config) \ - F(cl_device_info, CL_DEVICE_GLOBAL_MEM_CACHE_TYPE, cl_device_mem_cache_type) \ - F(cl_device_info, CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, cl_uint)\ - F(cl_device_info, CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, cl_ulong) \ - F(cl_device_info, CL_DEVICE_GLOBAL_MEM_SIZE, cl_ulong) \ - F(cl_device_info, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, cl_ulong) \ - F(cl_device_info, CL_DEVICE_MAX_CONSTANT_ARGS, cl_uint) \ - F(cl_device_info, CL_DEVICE_LOCAL_MEM_TYPE, cl_device_local_mem_type) \ - F(cl_device_info, CL_DEVICE_LOCAL_MEM_SIZE, cl_ulong) \ - F(cl_device_info, CL_DEVICE_ERROR_CORRECTION_SUPPORT, cl_bool) \ - F(cl_device_info, CL_DEVICE_PROFILING_TIMER_RESOLUTION, ::size_t) \ - F(cl_device_info, CL_DEVICE_ENDIAN_LITTLE, cl_bool) \ - F(cl_device_info, CL_DEVICE_AVAILABLE, cl_bool) \ - F(cl_device_info, CL_DEVICE_COMPILER_AVAILABLE, cl_bool) \ - F(cl_device_info, CL_DEVICE_EXECUTION_CAPABILITIES, cl_device_exec_capabilities) \ - F(cl_device_info, CL_DEVICE_QUEUE_PROPERTIES, cl_command_queue_properties) \ - F(cl_device_info, CL_DEVICE_PLATFORM, cl_platform_id) \ - F(cl_device_info, CL_DEVICE_NAME, STRING_CLASS) \ - F(cl_device_info, CL_DEVICE_VENDOR, STRING_CLASS) \ - F(cl_device_info, CL_DRIVER_VERSION, STRING_CLASS) \ - F(cl_device_info, CL_DEVICE_PROFILE, STRING_CLASS) \ - F(cl_device_info, CL_DEVICE_VERSION, STRING_CLASS) \ - F(cl_device_info, CL_DEVICE_EXTENSIONS, STRING_CLASS) \ - \ - F(cl_context_info, CL_CONTEXT_REFERENCE_COUNT, cl_uint) \ - F(cl_context_info, CL_CONTEXT_DEVICES, VECTOR_CLASS) \ - F(cl_context_info, CL_CONTEXT_PROPERTIES, VECTOR_CLASS) \ - \ - F(cl_event_info, CL_EVENT_COMMAND_QUEUE, cl::CommandQueue) \ - F(cl_event_info, CL_EVENT_COMMAND_TYPE, cl_command_type) \ - F(cl_event_info, CL_EVENT_REFERENCE_COUNT, cl_uint) \ - F(cl_event_info, CL_EVENT_COMMAND_EXECUTION_STATUS, cl_int) \ - \ - F(cl_profiling_info, CL_PROFILING_COMMAND_QUEUED, cl_ulong) \ - F(cl_profiling_info, CL_PROFILING_COMMAND_SUBMIT, cl_ulong) \ - F(cl_profiling_info, CL_PROFILING_COMMAND_START, cl_ulong) \ - F(cl_profiling_info, CL_PROFILING_COMMAND_END, cl_ulong) \ - \ - F(cl_mem_info, CL_MEM_TYPE, cl_mem_object_type) \ - F(cl_mem_info, CL_MEM_FLAGS, cl_mem_flags) \ - F(cl_mem_info, CL_MEM_SIZE, ::size_t) \ - F(cl_mem_info, CL_MEM_HOST_PTR, void*) \ - F(cl_mem_info, CL_MEM_MAP_COUNT, cl_uint) \ - F(cl_mem_info, CL_MEM_REFERENCE_COUNT, cl_uint) \ - F(cl_mem_info, CL_MEM_CONTEXT, cl::Context) \ - \ - F(cl_image_info, CL_IMAGE_FORMAT, cl_image_format) \ - F(cl_image_info, CL_IMAGE_ELEMENT_SIZE, ::size_t) \ - F(cl_image_info, CL_IMAGE_ROW_PITCH, ::size_t) \ - F(cl_image_info, CL_IMAGE_SLICE_PITCH, ::size_t) \ - F(cl_image_info, CL_IMAGE_WIDTH, ::size_t) \ - F(cl_image_info, CL_IMAGE_HEIGHT, ::size_t) \ - F(cl_image_info, CL_IMAGE_DEPTH, ::size_t) \ - \ - F(cl_sampler_info, CL_SAMPLER_REFERENCE_COUNT, cl_uint) \ - F(cl_sampler_info, CL_SAMPLER_CONTEXT, cl::Context) \ - F(cl_sampler_info, CL_SAMPLER_NORMALIZED_COORDS, cl_bool) \ - F(cl_sampler_info, CL_SAMPLER_ADDRESSING_MODE, cl_addressing_mode) \ - F(cl_sampler_info, CL_SAMPLER_FILTER_MODE, cl_filter_mode) \ - \ - F(cl_program_info, CL_PROGRAM_REFERENCE_COUNT, cl_uint) \ - F(cl_program_info, CL_PROGRAM_CONTEXT, cl::Context) \ - F(cl_program_info, CL_PROGRAM_NUM_DEVICES, cl_uint) \ - F(cl_program_info, CL_PROGRAM_DEVICES, VECTOR_CLASS) \ - F(cl_program_info, CL_PROGRAM_SOURCE, STRING_CLASS) \ - F(cl_program_info, CL_PROGRAM_BINARY_SIZES, VECTOR_CLASS< ::size_t>) \ - F(cl_program_info, CL_PROGRAM_BINARIES, VECTOR_CLASS) \ - \ - F(cl_program_build_info, CL_PROGRAM_BUILD_STATUS, cl_build_status) \ - F(cl_program_build_info, CL_PROGRAM_BUILD_OPTIONS, STRING_CLASS) \ - F(cl_program_build_info, CL_PROGRAM_BUILD_LOG, STRING_CLASS) \ - \ - F(cl_kernel_info, CL_KERNEL_FUNCTION_NAME, STRING_CLASS) \ - F(cl_kernel_info, CL_KERNEL_NUM_ARGS, cl_uint) \ - F(cl_kernel_info, CL_KERNEL_REFERENCE_COUNT, cl_uint) \ - F(cl_kernel_info, CL_KERNEL_CONTEXT, cl::Context) \ - F(cl_kernel_info, CL_KERNEL_PROGRAM, cl::Program) \ - \ - F(cl_kernel_work_group_info, CL_KERNEL_WORK_GROUP_SIZE, ::size_t) \ - F(cl_kernel_work_group_info, CL_KERNEL_COMPILE_WORK_GROUP_SIZE, cl::size_t<3>) \ - F(cl_kernel_work_group_info, CL_KERNEL_LOCAL_MEM_SIZE, cl_ulong) \ - \ - F(cl_command_queue_info, CL_QUEUE_CONTEXT, cl::Context) \ - F(cl_command_queue_info, CL_QUEUE_DEVICE, cl::Device) \ - F(cl_command_queue_info, CL_QUEUE_REFERENCE_COUNT, cl_uint) \ - F(cl_command_queue_info, CL_QUEUE_PROPERTIES, cl_command_queue_properties) - -#if defined(CL_VERSION_1_1) -#define __PARAM_NAME_INFO_1_1(F) \ - F(cl_context_info, CL_CONTEXT_NUM_DEVICES, cl_uint)\ - F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF, cl_uint) \ - F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR, cl_uint) \ - F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT, cl_uint) \ - F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_INT, cl_uint) \ - F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG, cl_uint) \ - F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT, cl_uint) \ - F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, cl_uint) \ - F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF, cl_uint) \ - F(cl_device_info, CL_DEVICE_HOST_UNIFIED_MEMORY, cl_bool) \ - F(cl_device_info, CL_DEVICE_OPENCL_C_VERSION, STRING_CLASS) \ - \ - F(cl_mem_info, CL_MEM_ASSOCIATED_MEMOBJECT, cl::Memory) \ - F(cl_mem_info, CL_MEM_OFFSET, ::size_t) \ - \ - F(cl_kernel_work_group_info, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, ::size_t) \ - F(cl_kernel_work_group_info, CL_KERNEL_PRIVATE_MEM_SIZE, cl_ulong) \ - \ - F(cl_event_info, CL_EVENT_CONTEXT, cl::Context) -#endif // CL_VERSION_1_1 - - -#if defined(CL_VERSION_1_2) -#define __PARAM_NAME_INFO_1_2(F) \ - F(cl_image_info, CL_IMAGE_ARRAY_SIZE, ::size_t) \ - F(cl_image_info, CL_IMAGE_BUFFER, cl::Buffer) \ - F(cl_image_info, CL_IMAGE_NUM_MIP_LEVELS, cl_uint) \ - F(cl_image_info, CL_IMAGE_NUM_SAMPLES, cl_uint) \ - \ - F(cl_program_info, CL_PROGRAM_NUM_KERNELS, ::size_t) \ - F(cl_program_info, CL_PROGRAM_KERNEL_NAMES, STRING_CLASS) \ - \ - F(cl_program_build_info, CL_PROGRAM_BINARY_TYPE, cl_program_binary_type) \ - \ - F(cl_kernel_info, CL_KERNEL_ATTRIBUTES, STRING_CLASS) \ - \ - F(cl_kernel_arg_info, CL_KERNEL_ARG_ADDRESS_QUALIFIER, cl_kernel_arg_address_qualifier) \ - F(cl_kernel_arg_info, CL_KERNEL_ARG_ACCESS_QUALIFIER, cl_kernel_arg_access_qualifier) \ - F(cl_kernel_arg_info, CL_KERNEL_ARG_TYPE_NAME, STRING_CLASS) \ - F(cl_kernel_arg_info, CL_KERNEL_ARG_TYPE_QUALIFIER, cl_kernel_arg_type_qualifier) \ - F(cl_kernel_arg_info, CL_KERNEL_ARG_NAME, STRING_CLASS) \ - \ - F(cl_device_info, CL_DEVICE_IMAGE_MAX_BUFFER_SIZE, ::size_t) \ - F(cl_device_info, CL_DEVICE_IMAGE_MAX_ARRAY_SIZE, ::size_t) \ - F(cl_device_info, CL_DEVICE_LINKER_AVAILABLE, cl_bool) \ - F(cl_device_info, CL_DEVICE_BUILT_IN_KERNELS, STRING_CLASS) \ - F(cl_device_info, CL_DEVICE_PRINTF_BUFFER_SIZE, ::size_t) \ - F(cl_device_info, CL_DEVICE_PREFERRED_INTEROP_USER_SYNC, cl_bool) \ - F(cl_device_info, CL_DEVICE_PARENT_DEVICE, cl_device_id) \ - F(cl_device_info, CL_DEVICE_PARTITION_MAX_SUB_DEVICES, cl_uint) \ - F(cl_device_info, CL_DEVICE_PARTITION_PROPERTIES, VECTOR_CLASS) \ - F(cl_device_info, CL_DEVICE_PARTITION_AFFINITY_DOMAIN, cl_device_affinity_domain) \ - F(cl_device_info, CL_DEVICE_PARTITION_TYPE, VECTOR_CLASS) \ - F(cl_device_info, CL_DEVICE_REFERENCE_COUNT, cl_uint) -#endif // #if defined(CL_VERSION_1_2) - -#if defined(USE_CL_DEVICE_FISSION) -#define __PARAM_NAME_DEVICE_FISSION(F) \ - F(cl_device_info, CL_DEVICE_PARENT_DEVICE_EXT, cl_device_id) \ - F(cl_device_info, CL_DEVICE_PARTITION_TYPES_EXT, VECTOR_CLASS) \ - F(cl_device_info, CL_DEVICE_AFFINITY_DOMAINS_EXT, VECTOR_CLASS) \ - F(cl_device_info, CL_DEVICE_REFERENCE_COUNT_EXT , cl_uint) \ - F(cl_device_info, CL_DEVICE_PARTITION_STYLE_EXT, VECTOR_CLASS) -#endif // USE_CL_DEVICE_FISSION - -template -struct param_traits {}; - -#define __CL_DECLARE_PARAM_TRAITS(token, param_name, T) \ -struct token; \ -template<> \ -struct param_traits \ -{ \ - enum { value = param_name }; \ - typedef T param_type; \ -}; - -__PARAM_NAME_INFO_1_0(__CL_DECLARE_PARAM_TRAITS) -#if defined(CL_VERSION_1_1) -__PARAM_NAME_INFO_1_1(__CL_DECLARE_PARAM_TRAITS) -#endif // CL_VERSION_1_1 -#if defined(CL_VERSION_1_2) -__PARAM_NAME_INFO_1_2(__CL_DECLARE_PARAM_TRAITS) -#endif // CL_VERSION_1_1 - -#if defined(USE_CL_DEVICE_FISSION) -__PARAM_NAME_DEVICE_FISSION(__CL_DECLARE_PARAM_TRAITS); -#endif // USE_CL_DEVICE_FISSION - -#ifdef CL_PLATFORM_ICD_SUFFIX_KHR -__CL_DECLARE_PARAM_TRAITS(cl_platform_info, CL_PLATFORM_ICD_SUFFIX_KHR, STRING_CLASS) -#endif - -#ifdef CL_DEVICE_PROFILING_TIMER_OFFSET_AMD -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_PROFILING_TIMER_OFFSET_AMD, cl_ulong) -#endif - -#ifdef CL_DEVICE_GLOBAL_FREE_MEMORY_AMD -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_GLOBAL_FREE_MEMORY_AMD, VECTOR_CLASS< ::size_t>) -#endif -#ifdef CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD, cl_uint) -#endif -#ifdef CL_DEVICE_SIMD_WIDTH_AMD -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_SIMD_WIDTH_AMD, cl_uint) -#endif -#ifdef CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD, cl_uint) -#endif -#ifdef CL_DEVICE_WAVEFRONT_WIDTH_AMD -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_WAVEFRONT_WIDTH_AMD, cl_uint) -#endif -#ifdef CL_DEVICE_GLOBAL_MEM_CHANNELS_AMD -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_GLOBAL_MEM_CHANNELS_AMD, cl_uint) -#endif -#ifdef CL_DEVICE_GLOBAL_MEM_CHANNEL_BANKS_AMD -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_GLOBAL_MEM_CHANNEL_BANKS_AMD, cl_uint) -#endif -#ifdef CL_DEVICE_GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD, cl_uint) -#endif -#ifdef CL_DEVICE_LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD, cl_uint) -#endif -#ifdef CL_DEVICE_LOCAL_MEM_BANKS_AMD -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_LOCAL_MEM_BANKS_AMD, cl_uint) -#endif - -#ifdef CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV, cl_uint) -#endif -#ifdef CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV, cl_uint) -#endif -#ifdef CL_DEVICE_REGISTERS_PER_BLOCK_NV -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_REGISTERS_PER_BLOCK_NV, cl_uint) -#endif -#ifdef CL_DEVICE_WARP_SIZE_NV -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_WARP_SIZE_NV, cl_uint) -#endif -#ifdef CL_DEVICE_GPU_OVERLAP_NV -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_GPU_OVERLAP_NV, cl_bool) -#endif -#ifdef CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV, cl_bool) -#endif -#ifdef CL_DEVICE_INTEGRATED_MEMORY_NV -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_INTEGRATED_MEMORY_NV, cl_bool) -#endif - -// Convenience functions - -template -inline cl_int -getInfo(Func f, cl_uint name, T* param) -{ - return getInfoHelper(f, name, param, 0); -} - -template -struct GetInfoFunctor0 -{ - Func f_; const Arg0& arg0_; - cl_int operator ()( - cl_uint param, ::size_t size, void* value, ::size_t* size_ret) - { return f_(arg0_, param, size, value, size_ret); } -}; - -template -struct GetInfoFunctor1 -{ - Func f_; const Arg0& arg0_; const Arg1& arg1_; - cl_int operator ()( - cl_uint param, ::size_t size, void* value, ::size_t* size_ret) - { return f_(arg0_, arg1_, param, size, value, size_ret); } -}; - -template -inline cl_int -getInfo(Func f, const Arg0& arg0, cl_uint name, T* param) -{ - GetInfoFunctor0 f0 = { f, arg0 }; - return getInfoHelper(f0, name, param, 0); -} - -template -inline cl_int -getInfo(Func f, const Arg0& arg0, const Arg1& arg1, cl_uint name, T* param) -{ - GetInfoFunctor1 f0 = { f, arg0, arg1 }; - return getInfoHelper(f0, name, param, 0); -} - -template -struct ReferenceHandler -{ }; - -#if defined(CL_VERSION_1_2) -/** - * OpenCL 1.2 devices do have retain/release. - */ -template <> -struct ReferenceHandler -{ - /** - * Retain the device. - * \param device A valid device created using createSubDevices - * \return - * CL_SUCCESS if the function executed successfully. - * CL_INVALID_DEVICE if device was not a valid subdevice - * CL_OUT_OF_RESOURCES - * CL_OUT_OF_HOST_MEMORY - */ - static cl_int retain(cl_device_id device) - { return ::clRetainDevice(device); } - /** - * Retain the device. - * \param device A valid device created using createSubDevices - * \return - * CL_SUCCESS if the function executed successfully. - * CL_INVALID_DEVICE if device was not a valid subdevice - * CL_OUT_OF_RESOURCES - * CL_OUT_OF_HOST_MEMORY - */ - static cl_int release(cl_device_id device) - { return ::clReleaseDevice(device); } -}; -#else // #if defined(CL_VERSION_1_2) -/** - * OpenCL 1.1 devices do not have retain/release. - */ -template <> -struct ReferenceHandler -{ - // cl_device_id does not have retain(). - static cl_int retain(cl_device_id) - { return CL_SUCCESS; } - // cl_device_id does not have release(). - static cl_int release(cl_device_id) - { return CL_SUCCESS; } -}; -#endif // #if defined(CL_VERSION_1_2) - -template <> -struct ReferenceHandler -{ - // cl_platform_id does not have retain(). - static cl_int retain(cl_platform_id) - { return CL_SUCCESS; } - // cl_platform_id does not have release(). - static cl_int release(cl_platform_id) - { return CL_SUCCESS; } -}; - -template <> -struct ReferenceHandler -{ - static cl_int retain(cl_context context) - { return ::clRetainContext(context); } - static cl_int release(cl_context context) - { return ::clReleaseContext(context); } -}; - -template <> -struct ReferenceHandler -{ - static cl_int retain(cl_command_queue queue) - { return ::clRetainCommandQueue(queue); } - static cl_int release(cl_command_queue queue) - { return ::clReleaseCommandQueue(queue); } -}; - -template <> -struct ReferenceHandler -{ - static cl_int retain(cl_mem memory) - { return ::clRetainMemObject(memory); } - static cl_int release(cl_mem memory) - { return ::clReleaseMemObject(memory); } -}; - -template <> -struct ReferenceHandler -{ - static cl_int retain(cl_sampler sampler) - { return ::clRetainSampler(sampler); } - static cl_int release(cl_sampler sampler) - { return ::clReleaseSampler(sampler); } -}; - -template <> -struct ReferenceHandler -{ - static cl_int retain(cl_program program) - { return ::clRetainProgram(program); } - static cl_int release(cl_program program) - { return ::clReleaseProgram(program); } -}; - -template <> -struct ReferenceHandler -{ - static cl_int retain(cl_kernel kernel) - { return ::clRetainKernel(kernel); } - static cl_int release(cl_kernel kernel) - { return ::clReleaseKernel(kernel); } -}; - -template <> -struct ReferenceHandler -{ - static cl_int retain(cl_event event) - { return ::clRetainEvent(event); } - static cl_int release(cl_event event) - { return ::clReleaseEvent(event); } -}; - - -// Extracts version number with major in the upper 16 bits, minor in the lower 16 -static cl_uint getVersion(const char *versionInfo) -{ - int highVersion = 0; - int lowVersion = 0; - int index = 7; - while(versionInfo[index] != '.' ) { - highVersion *= 10; - highVersion += versionInfo[index]-'0'; - ++index; - } - ++index; - while(versionInfo[index] != ' ' && versionInfo[index] != '\0') { - lowVersion *= 10; - lowVersion += versionInfo[index]-'0'; - ++index; - } - return (highVersion << 16) | lowVersion; -} - -static cl_uint getPlatformVersion(cl_platform_id platform) -{ - ::size_t size = 0; - clGetPlatformInfo(platform, CL_PLATFORM_VERSION, 0, NULL, &size); - char *versionInfo = (char *) alloca(size); - clGetPlatformInfo(platform, CL_PLATFORM_VERSION, size, &versionInfo[0], &size); - return getVersion(versionInfo); -} - -static cl_uint getDevicePlatformVersion(cl_device_id device) -{ - cl_platform_id platform; - clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(platform), &platform, NULL); - return getPlatformVersion(platform); -} - -#if defined(CL_VERSION_1_2) && defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) -static cl_uint getContextPlatformVersion(cl_context context) -{ - // The platform cannot be queried directly, so we first have to grab a - // device and obtain its context - ::size_t size = 0; - clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &size); - if (size == 0) - return 0; - cl_device_id *devices = (cl_device_id *) alloca(size); - clGetContextInfo(context, CL_CONTEXT_DEVICES, size, devices, NULL); - return getDevicePlatformVersion(devices[0]); -} -#endif // #if defined(CL_VERSION_1_2) && defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) - -template -class Wrapper -{ -public: - typedef T cl_type; - -protected: - cl_type object_; - -public: - Wrapper() : object_(NULL) { } - - Wrapper(const cl_type &obj) : object_(obj) { } - - ~Wrapper() - { - if (object_ != NULL) { release(); } - } - - Wrapper(const Wrapper& rhs) - { - object_ = rhs.object_; - if (object_ != NULL) { detail::errHandler(retain(), __RETAIN_ERR); } - } - -#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - Wrapper(Wrapper&& rhs) CL_HPP_NOEXCEPT - { - object_ = rhs.object_; - rhs.object_ = NULL; - } -#endif - - Wrapper& operator = (const Wrapper& rhs) - { - if (this != &rhs) { - if (object_ != NULL) { detail::errHandler(release(), __RELEASE_ERR); } - object_ = rhs.object_; - if (object_ != NULL) { detail::errHandler(retain(), __RETAIN_ERR); } - } - return *this; - } - -#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - Wrapper& operator = (Wrapper&& rhs) - { - if (this != &rhs) { - if (object_ != NULL) { detail::errHandler(release(), __RELEASE_ERR); } - object_ = rhs.object_; - rhs.object_ = NULL; - } - return *this; - } -#endif - - Wrapper& operator = (const cl_type &rhs) - { - if (object_ != NULL) { detail::errHandler(release(), __RELEASE_ERR); } - object_ = rhs; - return *this; - } - - cl_type operator ()() const { return object_; } - - cl_type& operator ()() { return object_; } - -protected: - template - friend inline cl_int getInfoHelper(Func, cl_uint, U*, int, typename U::cl_type); - - cl_int retain() const - { - return ReferenceHandler::retain(object_); - } - - cl_int release() const - { - return ReferenceHandler::release(object_); - } -}; - -template <> -class Wrapper -{ -public: - typedef cl_device_id cl_type; - -protected: - cl_type object_; - bool referenceCountable_; - - static bool isReferenceCountable(cl_device_id device) - { - bool retVal = false; - if (device != NULL) { - int version = getDevicePlatformVersion(device); - if(version > ((1 << 16) + 1)) { - retVal = true; - } - } - return retVal; - } - -public: - Wrapper() : object_(NULL), referenceCountable_(false) - { - } - - Wrapper(const cl_type &obj) : object_(obj), referenceCountable_(false) - { - referenceCountable_ = isReferenceCountable(obj); - } - - ~Wrapper() - { - if (object_ != NULL) { release(); } - } - - Wrapper(const Wrapper& rhs) - { - object_ = rhs.object_; - referenceCountable_ = isReferenceCountable(object_); - if (object_ != NULL) { detail::errHandler(retain(), __RETAIN_ERR); } - } - -#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - Wrapper(Wrapper&& rhs) CL_HPP_NOEXCEPT - { - object_ = rhs.object_; - referenceCountable_ = rhs.referenceCountable_; - rhs.object_ = NULL; - rhs.referenceCountable_ = false; - } -#endif - - Wrapper& operator = (const Wrapper& rhs) - { - if (this != &rhs) { - if (object_ != NULL) { detail::errHandler(release(), __RELEASE_ERR); } - object_ = rhs.object_; - referenceCountable_ = rhs.referenceCountable_; - if (object_ != NULL) { detail::errHandler(retain(), __RETAIN_ERR); } - } - return *this; - } - -#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - Wrapper& operator = (Wrapper&& rhs) - { - if (this != &rhs) { - if (object_ != NULL) { detail::errHandler(release(), __RELEASE_ERR); } - object_ = rhs.object_; - referenceCountable_ = rhs.referenceCountable_; - rhs.object_ = NULL; - rhs.referenceCountable_ = false; - } - return *this; - } -#endif - - Wrapper& operator = (const cl_type &rhs) - { - if (object_ != NULL) { detail::errHandler(release(), __RELEASE_ERR); } - object_ = rhs; - referenceCountable_ = isReferenceCountable(object_); - return *this; - } - - cl_type operator ()() const { return object_; } - - cl_type& operator ()() { return object_; } - -protected: - template - friend inline cl_int getInfoHelper(Func, cl_uint, U*, int, typename U::cl_type); - - template - friend inline cl_int getInfoHelper(Func, cl_uint, VECTOR_CLASS*, int, typename U::cl_type); - - cl_int retain() const - { - if( referenceCountable_ ) { - return ReferenceHandler::retain(object_); - } - else { - return CL_SUCCESS; - } - } - - cl_int release() const - { - if( referenceCountable_ ) { - return ReferenceHandler::release(object_); - } - else { - return CL_SUCCESS; - } - } -}; - -} // namespace detail -//! \endcond - -/*! \stuct ImageFormat - * \brief Adds constructors and member functions for cl_image_format. - * - * \see cl_image_format - */ -struct ImageFormat : public cl_image_format -{ - //! \brief Default constructor - performs no initialization. - ImageFormat(){} - - //! \brief Initializing constructor. - ImageFormat(cl_channel_order order, cl_channel_type type) - { - image_channel_order = order; - image_channel_data_type = type; - } - - //! \brief Assignment operator. - ImageFormat& operator = (const ImageFormat& rhs) - { - if (this != &rhs) { - this->image_channel_data_type = rhs.image_channel_data_type; - this->image_channel_order = rhs.image_channel_order; - } - return *this; - } -}; - -/*! \brief Class interface for cl_device_id. - * - * \note Copies of these objects are inexpensive, since they don't 'own' - * any underlying resources or data structures. - * - * \see cl_device_id - */ -class Device : public detail::Wrapper -{ -public: - //! \brief Default constructor - initializes to NULL. - Device() : detail::Wrapper() { } - - /*! \brief Constructor from cl_device_id. - * - * This simply copies the device ID value, which is an inexpensive operation. - */ - __CL_EXPLICIT_CONSTRUCTORS Device(const cl_device_id &device) : detail::Wrapper(device) { } - - /*! \brief Returns the first device on the default context. - * - * \see Context::getDefault() - */ - static Device getDefault(cl_int * err = NULL); - - /*! \brief Assignment operator from cl_device_id. - * - * This simply copies the device ID value, which is an inexpensive operation. - */ - Device& operator = (const cl_device_id& rhs) - { - detail::Wrapper::operator=(rhs); - return *this; - } - - /*! \brief Copy constructor to forward copy to the superclass correctly. - * Required for MSVC. - */ - Device(const Device& dev) : detail::Wrapper(dev) {} - - /*! \brief Copy assignment to forward copy to the superclass correctly. - * Required for MSVC. - */ - Device& operator = (const Device &dev) - { - detail::Wrapper::operator=(dev); - return *this; - } - -#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - /*! \brief Move constructor to forward move to the superclass correctly. - * Required for MSVC. - */ - Device(Device&& dev) CL_HPP_NOEXCEPT : detail::Wrapper(std::move(dev)) {} - - /*! \brief Move assignment to forward move to the superclass correctly. - * Required for MSVC. - */ - Device& operator = (Device &&dev) - { - detail::Wrapper::operator=(std::move(dev)); - return *this; - } -#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - - //! \brief Wrapper for clGetDeviceInfo(). - template - cl_int getInfo(cl_device_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetDeviceInfo, object_, name, param), - __GET_DEVICE_INFO_ERR); - } - - //! \brief Wrapper for clGetDeviceInfo() that returns by value. - template typename - detail::param_traits::param_type - getInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_device_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - - /** - * CL 1.2 version - */ -#if defined(CL_VERSION_1_2) - //! \brief Wrapper for clCreateSubDevicesEXT(). - cl_int createSubDevices( - const cl_device_partition_property * properties, - VECTOR_CLASS* devices) - { - cl_uint n = 0; - cl_int err = clCreateSubDevices(object_, properties, 0, NULL, &n); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __CREATE_SUB_DEVICES); - } - - cl_device_id* ids = (cl_device_id*) alloca(n * sizeof(cl_device_id)); - err = clCreateSubDevices(object_, properties, n, ids, NULL); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __CREATE_SUB_DEVICES); - } - - devices->assign(&ids[0], &ids[n]); - return CL_SUCCESS; - } -#endif // #if defined(CL_VERSION_1_2) - -/** - * CL 1.1 version that uses device fission. - */ -#if defined(CL_VERSION_1_1) -#if defined(USE_CL_DEVICE_FISSION) - cl_int createSubDevices( - const cl_device_partition_property_ext * properties, - VECTOR_CLASS* devices) - { - typedef CL_API_ENTRY cl_int - ( CL_API_CALL * PFN_clCreateSubDevicesEXT)( - cl_device_id /*in_device*/, - const cl_device_partition_property_ext * /* properties */, - cl_uint /*num_entries*/, - cl_device_id * /*out_devices*/, - cl_uint * /*num_devices*/ ) CL_EXT_SUFFIX__VERSION_1_1; - - static PFN_clCreateSubDevicesEXT pfn_clCreateSubDevicesEXT = NULL; - __INIT_CL_EXT_FCN_PTR(clCreateSubDevicesEXT); - - cl_uint n = 0; - cl_int err = pfn_clCreateSubDevicesEXT(object_, properties, 0, NULL, &n); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __CREATE_SUB_DEVICES); - } - - cl_device_id* ids = (cl_device_id*) alloca(n * sizeof(cl_device_id)); - err = pfn_clCreateSubDevicesEXT(object_, properties, n, ids, NULL); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __CREATE_SUB_DEVICES); - } - - devices->assign(&ids[0], &ids[n]); - return CL_SUCCESS; - } -#endif // #if defined(USE_CL_DEVICE_FISSION) -#endif // #if defined(CL_VERSION_1_1) -}; - -/*! \brief Class interface for cl_platform_id. - * - * \note Copies of these objects are inexpensive, since they don't 'own' - * any underlying resources or data structures. - * - * \see cl_platform_id - */ -class Platform : public detail::Wrapper -{ -public: - //! \brief Default constructor - initializes to NULL. - Platform() : detail::Wrapper() { } - - /*! \brief Constructor from cl_platform_id. - * - * This simply copies the platform ID value, which is an inexpensive operation. - */ - __CL_EXPLICIT_CONSTRUCTORS Platform(const cl_platform_id &platform) : detail::Wrapper(platform) { } - - /*! \brief Assignment operator from cl_platform_id. - * - * This simply copies the platform ID value, which is an inexpensive operation. - */ - Platform& operator = (const cl_platform_id& rhs) - { - detail::Wrapper::operator=(rhs); - return *this; - } - - //! \brief Wrapper for clGetPlatformInfo(). - cl_int getInfo(cl_platform_info name, STRING_CLASS* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetPlatformInfo, object_, name, param), - __GET_PLATFORM_INFO_ERR); - } - - //! \brief Wrapper for clGetPlatformInfo() that returns by value. - template typename - detail::param_traits::param_type - getInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_platform_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - - /*! \brief Gets a list of devices for this platform. - * - * Wraps clGetDeviceIDs(). - */ - cl_int getDevices( - cl_device_type type, - VECTOR_CLASS* devices) const - { - cl_uint n = 0; - if( devices == NULL ) { - return detail::errHandler(CL_INVALID_ARG_VALUE, __GET_DEVICE_IDS_ERR); - } - cl_int err = ::clGetDeviceIDs(object_, type, 0, NULL, &n); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_DEVICE_IDS_ERR); - } - - cl_device_id* ids = (cl_device_id*) alloca(n * sizeof(cl_device_id)); - err = ::clGetDeviceIDs(object_, type, n, ids, NULL); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_DEVICE_IDS_ERR); - } - - devices->assign(&ids[0], &ids[n]); - return CL_SUCCESS; - } - -#if defined(USE_DX_INTEROP) - /*! \brief Get the list of available D3D10 devices. - * - * \param d3d_device_source. - * - * \param d3d_object. - * - * \param d3d_device_set. - * - * \param devices returns a vector of OpenCL D3D10 devices found. The cl::Device - * values returned in devices can be used to identify a specific OpenCL - * device. If \a devices argument is NULL, this argument is ignored. - * - * \return One of the following values: - * - CL_SUCCESS if the function is executed successfully. - * - * The application can query specific capabilities of the OpenCL device(s) - * returned by cl::getDevices. This can be used by the application to - * determine which device(s) to use. - * - * \note In the case that exceptions are enabled and a return value - * other than CL_SUCCESS is generated, then cl::Error exception is - * generated. - */ - cl_int getDevices( - cl_d3d10_device_source_khr d3d_device_source, - void * d3d_object, - cl_d3d10_device_set_khr d3d_device_set, - VECTOR_CLASS* devices) const - { - typedef CL_API_ENTRY cl_int (CL_API_CALL *PFN_clGetDeviceIDsFromD3D10KHR)( - cl_platform_id platform, - cl_d3d10_device_source_khr d3d_device_source, - void * d3d_object, - cl_d3d10_device_set_khr d3d_device_set, - cl_uint num_entries, - cl_device_id * devices, - cl_uint* num_devices); - - if( devices == NULL ) { - return detail::errHandler(CL_INVALID_ARG_VALUE, __GET_DEVICE_IDS_ERR); - } - - static PFN_clGetDeviceIDsFromD3D10KHR pfn_clGetDeviceIDsFromD3D10KHR = NULL; - __INIT_CL_EXT_FCN_PTR_PLATFORM(object_, clGetDeviceIDsFromD3D10KHR); - - cl_uint n = 0; - cl_int err = pfn_clGetDeviceIDsFromD3D10KHR( - object_, - d3d_device_source, - d3d_object, - d3d_device_set, - 0, - NULL, - &n); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_DEVICE_IDS_ERR); - } - - cl_device_id* ids = (cl_device_id*) alloca(n * sizeof(cl_device_id)); - err = pfn_clGetDeviceIDsFromD3D10KHR( - object_, - d3d_device_source, - d3d_object, - d3d_device_set, - n, - ids, - NULL); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_DEVICE_IDS_ERR); - } - - devices->assign(&ids[0], &ids[n]); - return CL_SUCCESS; - } -#endif - - /*! \brief Gets a list of available platforms. - * - * Wraps clGetPlatformIDs(). - */ - static cl_int get( - VECTOR_CLASS* platforms) - { - cl_uint n = 0; - - if( platforms == NULL ) { - return detail::errHandler(CL_INVALID_ARG_VALUE, __GET_PLATFORM_IDS_ERR); - } - - cl_int err = ::clGetPlatformIDs(0, NULL, &n); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_PLATFORM_IDS_ERR); - } - - cl_platform_id* ids = (cl_platform_id*) alloca( - n * sizeof(cl_platform_id)); - err = ::clGetPlatformIDs(n, ids, NULL); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_PLATFORM_IDS_ERR); - } - - platforms->assign(&ids[0], &ids[n]); - return CL_SUCCESS; - } - - /*! \brief Gets the first available platform. - * - * Wraps clGetPlatformIDs(), returning the first result. - */ - static cl_int get( - Platform * platform) - { - cl_uint n = 0; - - if( platform == NULL ) { - return detail::errHandler(CL_INVALID_ARG_VALUE, __GET_PLATFORM_IDS_ERR); - } - - cl_int err = ::clGetPlatformIDs(0, NULL, &n); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_PLATFORM_IDS_ERR); - } - - cl_platform_id* ids = (cl_platform_id*) alloca( - n * sizeof(cl_platform_id)); - err = ::clGetPlatformIDs(n, ids, NULL); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_PLATFORM_IDS_ERR); - } - - *platform = ids[0]; - return CL_SUCCESS; - } - - /*! \brief Gets the first available platform, returning it by value. - * - * Wraps clGetPlatformIDs(), returning the first result. - */ - static Platform get( - cl_int * errResult = NULL) - { - Platform platform; - cl_uint n = 0; - cl_int err = ::clGetPlatformIDs(0, NULL, &n); - if (err != CL_SUCCESS) { - detail::errHandler(err, __GET_PLATFORM_IDS_ERR); - if (errResult != NULL) { - *errResult = err; - } - return Platform(); - } - - cl_platform_id* ids = (cl_platform_id*) alloca( - n * sizeof(cl_platform_id)); - err = ::clGetPlatformIDs(n, ids, NULL); - - if (err != CL_SUCCESS) { - detail::errHandler(err, __GET_PLATFORM_IDS_ERR); - if (errResult != NULL) { - *errResult = err; - } - return Platform(); - } - - - return Platform(ids[0]); - } - - static Platform getDefault( - cl_int *errResult = NULL ) - { - return get(errResult); - } - - -#if defined(CL_VERSION_1_2) - //! \brief Wrapper for clUnloadCompiler(). - cl_int - unloadCompiler() - { - return ::clUnloadPlatformCompiler(object_); - } -#endif // #if defined(CL_VERSION_1_2) -}; // class Platform - -/** - * Deprecated APIs for 1.2 - */ -#if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) || (defined(CL_VERSION_1_1) && !defined(CL_VERSION_1_2)) -/** - * Unload the OpenCL compiler. - * \note Deprecated for OpenCL 1.2. Use Platform::unloadCompiler instead. - */ -inline CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int -UnloadCompiler() CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; -inline cl_int -UnloadCompiler() -{ - return ::clUnloadCompiler(); -} -#endif // #if defined(CL_VERSION_1_1) - -/*! \brief Class interface for cl_context. - * - * \note Copies of these objects are shallow, meaning that the copy will refer - * to the same underlying cl_context as the original. For details, see - * clRetainContext() and clReleaseContext(). - * - * \see cl_context - */ -class Context - : public detail::Wrapper -{ -private: - -#ifdef CL_HPP_CPP11_ATOMICS_SUPPORTED - static std::atomic default_initialized_; -#else // !CL_HPP_CPP11_ATOMICS_SUPPORTED - static volatile int default_initialized_; -#endif // !CL_HPP_CPP11_ATOMICS_SUPPORTED - static Context default_; - static volatile cl_int default_error_; -public: - /*! \brief Constructs a context including a list of specified devices. - * - * Wraps clCreateContext(). - */ - Context( - const VECTOR_CLASS& devices, - cl_context_properties* properties = NULL, - void (CL_CALLBACK * notifyFptr)( - const char *, - const void *, - ::size_t, - void *) = NULL, - void* data = NULL, - cl_int* err = NULL) - { - cl_int error; - - ::size_t numDevices = devices.size(); - cl_device_id* deviceIDs = (cl_device_id*) alloca(numDevices * sizeof(cl_device_id)); - for( ::size_t deviceIndex = 0; deviceIndex < numDevices; ++deviceIndex ) { - deviceIDs[deviceIndex] = (devices[deviceIndex])(); - } - - object_ = ::clCreateContext( - properties, (cl_uint) numDevices, - deviceIDs, - notifyFptr, data, &error); - - detail::errHandler(error, __CREATE_CONTEXT_ERR); - if (err != NULL) { - *err = error; - } - } - - Context( - const Device& device, - cl_context_properties* properties = NULL, - void (CL_CALLBACK * notifyFptr)( - const char *, - const void *, - ::size_t, - void *) = NULL, - void* data = NULL, - cl_int* err = NULL) - { - cl_int error; - - cl_device_id deviceID = device(); - - object_ = ::clCreateContext( - properties, 1, - &deviceID, - notifyFptr, data, &error); - - detail::errHandler(error, __CREATE_CONTEXT_ERR); - if (err != NULL) { - *err = error; - } - } - - /*! \brief Constructs a context including all or a subset of devices of a specified type. - * - * Wraps clCreateContextFromType(). - */ - Context( - cl_device_type type, - cl_context_properties* properties = NULL, - void (CL_CALLBACK * notifyFptr)( - const char *, - const void *, - ::size_t, - void *) = NULL, - void* data = NULL, - cl_int* err = NULL) - { - cl_int error; - -#if !defined(__APPLE__) && !defined(__MACOS) - cl_context_properties prop[4] = {CL_CONTEXT_PLATFORM, 0, 0, 0 }; - - if (properties == NULL) { - // Get a valid platform ID as we cannot send in a blank one - VECTOR_CLASS platforms; - error = Platform::get(&platforms); - if (error != CL_SUCCESS) { - detail::errHandler(error, __CREATE_CONTEXT_FROM_TYPE_ERR); - if (err != NULL) { - *err = error; - } - return; - } - - // Check the platforms we found for a device of our specified type - cl_context_properties platform_id = 0; - for (unsigned int i = 0; i < platforms.size(); i++) { - - VECTOR_CLASS devices; - -#if defined(__CL_ENABLE_EXCEPTIONS) - try { -#endif - - error = platforms[i].getDevices(type, &devices); - -#if defined(__CL_ENABLE_EXCEPTIONS) - } catch (Error &) {} - // Catch if exceptions are enabled as we don't want to exit if first platform has no devices of type - // We do error checking next anyway, and can throw there if needed -#endif - - // Only squash CL_SUCCESS and CL_DEVICE_NOT_FOUND - if (error != CL_SUCCESS && error != CL_DEVICE_NOT_FOUND) { - detail::errHandler(error, __CREATE_CONTEXT_FROM_TYPE_ERR); - if (err != NULL) { - *err = error; - } - } - - if (devices.size() > 0) { - platform_id = (cl_context_properties)platforms[i](); - break; - } - } - - if (platform_id == 0) { - detail::errHandler(CL_DEVICE_NOT_FOUND, __CREATE_CONTEXT_FROM_TYPE_ERR); - if (err != NULL) { - *err = CL_DEVICE_NOT_FOUND; - } - return; - } - - prop[1] = platform_id; - properties = &prop[0]; - } -#endif - object_ = ::clCreateContextFromType( - properties, type, notifyFptr, data, &error); - - detail::errHandler(error, __CREATE_CONTEXT_FROM_TYPE_ERR); - if (err != NULL) { - *err = error; - } - } - - /*! \brief Copy constructor to forward copy to the superclass correctly. - * Required for MSVC. - */ - Context(const Context& ctx) : detail::Wrapper(ctx) {} - - /*! \brief Copy assignment to forward copy to the superclass correctly. - * Required for MSVC. - */ - Context& operator = (const Context &ctx) - { - detail::Wrapper::operator=(ctx); - return *this; - } - -#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - /*! \brief Move constructor to forward move to the superclass correctly. - * Required for MSVC. - */ - Context(Context&& ctx) CL_HPP_NOEXCEPT : detail::Wrapper(std::move(ctx)) {} - - /*! \brief Move assignment to forward move to the superclass correctly. - * Required for MSVC. - */ - Context& operator = (Context &&ctx) - { - detail::Wrapper::operator=(std::move(ctx)); - return *this; - } -#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - - /*! \brief Returns a singleton context including all devices of CL_DEVICE_TYPE_DEFAULT. - * - * \note All calls to this function return the same cl_context as the first. - */ - static Context getDefault(cl_int * err = NULL) - { - int state = detail::compare_exchange( - &default_initialized_, - __DEFAULT_BEING_INITIALIZED, __DEFAULT_NOT_INITIALIZED); - - if (state & __DEFAULT_INITIALIZED) { - if (err != NULL) { - *err = default_error_; - } - return default_; - } - - if (state & __DEFAULT_BEING_INITIALIZED) { - // Assume writes will propagate eventually... - while(default_initialized_ != __DEFAULT_INITIALIZED) { - detail::fence(); - } - - if (err != NULL) { - *err = default_error_; - } - return default_; - } - - cl_int error; - default_ = Context( - CL_DEVICE_TYPE_DEFAULT, - NULL, - NULL, - NULL, - &error); - - detail::fence(); - - default_error_ = error; - // Assume writes will propagate eventually... - default_initialized_ = __DEFAULT_INITIALIZED; - - detail::fence(); - - if (err != NULL) { - *err = default_error_; - } - return default_; - - } - - //! \brief Default constructor - initializes to NULL. - Context() : detail::Wrapper() { } - - /*! \brief Constructor from cl_context - takes ownership. - * - * This effectively transfers ownership of a refcount on the cl_context - * into the new Context object. - */ - __CL_EXPLICIT_CONSTRUCTORS Context(const cl_context& context) : detail::Wrapper(context) { } - - /*! \brief Assignment operator from cl_context - takes ownership. - * - * This effectively transfers ownership of a refcount on the rhs and calls - * clReleaseContext() on the value previously held by this instance. - */ - Context& operator = (const cl_context& rhs) - { - detail::Wrapper::operator=(rhs); - return *this; - } - - //! \brief Wrapper for clGetContextInfo(). - template - cl_int getInfo(cl_context_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetContextInfo, object_, name, param), - __GET_CONTEXT_INFO_ERR); - } - - //! \brief Wrapper for clGetContextInfo() that returns by value. - template typename - detail::param_traits::param_type - getInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_context_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - - /*! \brief Gets a list of supported image formats. - * - * Wraps clGetSupportedImageFormats(). - */ - cl_int getSupportedImageFormats( - cl_mem_flags flags, - cl_mem_object_type type, - VECTOR_CLASS* formats) const - { - cl_uint numEntries; - - if (!formats) { - return CL_SUCCESS; - } - - cl_int err = ::clGetSupportedImageFormats( - object_, - flags, - type, - 0, - NULL, - &numEntries); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_SUPPORTED_IMAGE_FORMATS_ERR); - } - - if (numEntries > 0) { - ImageFormat* value = (ImageFormat*) - alloca(numEntries * sizeof(ImageFormat)); - err = ::clGetSupportedImageFormats( - object_, - flags, - type, - numEntries, - (cl_image_format*)value, - NULL); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_SUPPORTED_IMAGE_FORMATS_ERR); - } - - formats->assign(&value[0], &value[numEntries]); - } - else { - formats->clear(); - } - return CL_SUCCESS; - } -}; - -inline Device Device::getDefault(cl_int * err) -{ - cl_int error; - Device device; - - Context context = Context::getDefault(&error); - detail::errHandler(error, __CREATE_CONTEXT_ERR); - - if (error != CL_SUCCESS) { - if (err != NULL) { - *err = error; - } - } - else { - device = context.getInfo()[0]; - if (err != NULL) { - *err = CL_SUCCESS; - } - } - - return device; -} - -#ifdef CL_HPP_CPP11_ATOMICS_SUPPORTED -CL_WEAK_ATTRIB_PREFIX std::atomic CL_WEAK_ATTRIB_SUFFIX Context::default_initialized_; -#else // !CL_HPP_CPP11_ATOMICS_SUPPORTED -CL_WEAK_ATTRIB_PREFIX volatile int CL_WEAK_ATTRIB_SUFFIX Context::default_initialized_ = __DEFAULT_NOT_INITIALIZED; -#endif // !CL_HPP_CPP11_ATOMICS_SUPPORTED - -CL_WEAK_ATTRIB_PREFIX Context CL_WEAK_ATTRIB_SUFFIX Context::default_; -CL_WEAK_ATTRIB_PREFIX volatile cl_int CL_WEAK_ATTRIB_SUFFIX Context::default_error_ = CL_SUCCESS; - -/*! \brief Class interface for cl_event. - * - * \note Copies of these objects are shallow, meaning that the copy will refer - * to the same underlying cl_event as the original. For details, see - * clRetainEvent() and clReleaseEvent(). - * - * \see cl_event - */ -class Event : public detail::Wrapper -{ -public: - //! \brief Default constructor - initializes to NULL. - Event() : detail::Wrapper() { } - - /*! \brief Constructor from cl_event - takes ownership. - * - * This effectively transfers ownership of a refcount on the cl_event - * into the new Event object. - */ - __CL_EXPLICIT_CONSTRUCTORS Event(const cl_event& event) : detail::Wrapper(event) { } - - /*! \brief Assignment operator from cl_event - takes ownership. - * - * This effectively transfers ownership of a refcount on the rhs and calls - * clReleaseEvent() on the value previously held by this instance. - */ - Event& operator = (const cl_event& rhs) - { - detail::Wrapper::operator=(rhs); - return *this; - } - - //! \brief Wrapper for clGetEventInfo(). - template - cl_int getInfo(cl_event_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetEventInfo, object_, name, param), - __GET_EVENT_INFO_ERR); - } - - //! \brief Wrapper for clGetEventInfo() that returns by value. - template typename - detail::param_traits::param_type - getInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_event_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - - //! \brief Wrapper for clGetEventProfilingInfo(). - template - cl_int getProfilingInfo(cl_profiling_info name, T* param) const - { - return detail::errHandler(detail::getInfo( - &::clGetEventProfilingInfo, object_, name, param), - __GET_EVENT_PROFILE_INFO_ERR); - } - - //! \brief Wrapper for clGetEventProfilingInfo() that returns by value. - template typename - detail::param_traits::param_type - getProfilingInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_profiling_info, name>::param_type param; - cl_int result = getProfilingInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - - /*! \brief Blocks the calling thread until this event completes. - * - * Wraps clWaitForEvents(). - */ - cl_int wait() const - { - return detail::errHandler( - ::clWaitForEvents(1, &object_), - __WAIT_FOR_EVENTS_ERR); - } - -#if defined(CL_VERSION_1_1) - /*! \brief Registers a user callback function for a specific command execution status. - * - * Wraps clSetEventCallback(). - */ - cl_int setCallback( - cl_int type, - void (CL_CALLBACK * pfn_notify)(cl_event, cl_int, void *), - void * user_data = NULL) - { - return detail::errHandler( - ::clSetEventCallback( - object_, - type, - pfn_notify, - user_data), - __SET_EVENT_CALLBACK_ERR); - } -#endif - - /*! \brief Blocks the calling thread until every event specified is complete. - * - * Wraps clWaitForEvents(). - */ - static cl_int - waitForEvents(const VECTOR_CLASS& events) - { - return detail::errHandler( - ::clWaitForEvents( - (cl_uint) events.size(), (events.size() > 0) ? (cl_event*)&events.front() : NULL), - __WAIT_FOR_EVENTS_ERR); - } -}; - -#if defined(CL_VERSION_1_1) -/*! \brief Class interface for user events (a subset of cl_event's). - * - * See Event for details about copy semantics, etc. - */ -class UserEvent : public Event -{ -public: - /*! \brief Constructs a user event on a given context. - * - * Wraps clCreateUserEvent(). - */ - UserEvent( - const Context& context, - cl_int * err = NULL) - { - cl_int error; - object_ = ::clCreateUserEvent( - context(), - &error); - - detail::errHandler(error, __CREATE_USER_EVENT_ERR); - if (err != NULL) { - *err = error; - } - } - - //! \brief Default constructor - initializes to NULL. - UserEvent() : Event() { } - - /*! \brief Sets the execution status of a user event object. - * - * Wraps clSetUserEventStatus(). - */ - cl_int setStatus(cl_int status) - { - return detail::errHandler( - ::clSetUserEventStatus(object_,status), - __SET_USER_EVENT_STATUS_ERR); - } -}; -#endif - -/*! \brief Blocks the calling thread until every event specified is complete. - * - * Wraps clWaitForEvents(). - */ -inline static cl_int -WaitForEvents(const VECTOR_CLASS& events) -{ - return detail::errHandler( - ::clWaitForEvents( - (cl_uint) events.size(), (events.size() > 0) ? (cl_event*)&events.front() : NULL), - __WAIT_FOR_EVENTS_ERR); -} - -/*! \brief Class interface for cl_mem. - * - * \note Copies of these objects are shallow, meaning that the copy will refer - * to the same underlying cl_mem as the original. For details, see - * clRetainMemObject() and clReleaseMemObject(). - * - * \see cl_mem - */ -class Memory : public detail::Wrapper -{ -public: - //! \brief Default constructor - initializes to NULL. - Memory() : detail::Wrapper() { } - - /*! \brief Constructor from cl_mem - takes ownership. - * - * This effectively transfers ownership of a refcount on the cl_mem - * into the new Memory object. - */ - __CL_EXPLICIT_CONSTRUCTORS Memory(const cl_mem& memory) : detail::Wrapper(memory) { } - - /*! \brief Assignment operator from cl_mem - takes ownership. - * - * This effectively transfers ownership of a refcount on the rhs and calls - * clReleaseMemObject() on the value previously held by this instance. - */ - Memory& operator = (const cl_mem& rhs) - { - detail::Wrapper::operator=(rhs); - return *this; - } - - /*! \brief Copy constructor to forward copy to the superclass correctly. - * Required for MSVC. - */ - Memory(const Memory& mem) : detail::Wrapper(mem) {} - - /*! \brief Copy assignment to forward copy to the superclass correctly. - * Required for MSVC. - */ - Memory& operator = (const Memory &mem) - { - detail::Wrapper::operator=(mem); - return *this; - } - -#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - /*! \brief Move constructor to forward move to the superclass correctly. - * Required for MSVC. - */ - Memory(Memory&& mem) CL_HPP_NOEXCEPT : detail::Wrapper(std::move(mem)) {} - - /*! \brief Move assignment to forward move to the superclass correctly. - * Required for MSVC. - */ - Memory& operator = (Memory &&mem) - { - detail::Wrapper::operator=(std::move(mem)); - return *this; - } -#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - - //! \brief Wrapper for clGetMemObjectInfo(). - template - cl_int getInfo(cl_mem_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetMemObjectInfo, object_, name, param), - __GET_MEM_OBJECT_INFO_ERR); - } - - //! \brief Wrapper for clGetMemObjectInfo() that returns by value. - template typename - detail::param_traits::param_type - getInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_mem_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - -#if defined(CL_VERSION_1_1) - /*! \brief Registers a callback function to be called when the memory object - * is no longer needed. - * - * Wraps clSetMemObjectDestructorCallback(). - * - * Repeated calls to this function, for a given cl_mem value, will append - * to the list of functions called (in reverse order) when memory object's - * resources are freed and the memory object is deleted. - * - * \note - * The registered callbacks are associated with the underlying cl_mem - * value - not the Memory class instance. - */ - cl_int setDestructorCallback( - void (CL_CALLBACK * pfn_notify)(cl_mem, void *), - void * user_data = NULL) - { - return detail::errHandler( - ::clSetMemObjectDestructorCallback( - object_, - pfn_notify, - user_data), - __SET_MEM_OBJECT_DESTRUCTOR_CALLBACK_ERR); - } -#endif - -}; - -// Pre-declare copy functions -class Buffer; -template< typename IteratorType > -cl_int copy( IteratorType startIterator, IteratorType endIterator, cl::Buffer &buffer ); -template< typename IteratorType > -cl_int copy( const cl::Buffer &buffer, IteratorType startIterator, IteratorType endIterator ); -template< typename IteratorType > -cl_int copy( const CommandQueue &queue, IteratorType startIterator, IteratorType endIterator, cl::Buffer &buffer ); -template< typename IteratorType > -cl_int copy( const CommandQueue &queue, const cl::Buffer &buffer, IteratorType startIterator, IteratorType endIterator ); - - -/*! \brief Class interface for Buffer Memory Objects. - * - * See Memory for details about copy semantics, etc. - * - * \see Memory - */ -class Buffer : public Memory -{ -public: - - /*! \brief Constructs a Buffer in a specified context. - * - * Wraps clCreateBuffer(). - * - * \param host_ptr Storage to be used if the CL_MEM_USE_HOST_PTR flag was - * specified. Note alignment & exclusivity requirements. - */ - Buffer( - const Context& context, - cl_mem_flags flags, - ::size_t size, - void* host_ptr = NULL, - cl_int* err = NULL) - { - cl_int error; - object_ = ::clCreateBuffer(context(), flags, size, host_ptr, &error); - - detail::errHandler(error, __CREATE_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - } - - /*! \brief Constructs a Buffer in the default context. - * - * Wraps clCreateBuffer(). - * - * \param host_ptr Storage to be used if the CL_MEM_USE_HOST_PTR flag was - * specified. Note alignment & exclusivity requirements. - * - * \see Context::getDefault() - */ - Buffer( - cl_mem_flags flags, - ::size_t size, - void* host_ptr = NULL, - cl_int* err = NULL) - { - cl_int error; - - Context context = Context::getDefault(err); - - object_ = ::clCreateBuffer(context(), flags, size, host_ptr, &error); - - detail::errHandler(error, __CREATE_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - } - - /*! - * \brief Construct a Buffer from a host container via iterators. - * IteratorType must be random access. - * If useHostPtr is specified iterators must represent contiguous data. - */ - template< typename IteratorType > - Buffer( - IteratorType startIterator, - IteratorType endIterator, - bool readOnly, - bool useHostPtr = false, - cl_int* err = NULL) - { - typedef typename std::iterator_traits::value_type DataType; - cl_int error; - - cl_mem_flags flags = 0; - if( readOnly ) { - flags |= CL_MEM_READ_ONLY; - } - else { - flags |= CL_MEM_READ_WRITE; - } - if( useHostPtr ) { - flags |= CL_MEM_USE_HOST_PTR; - } - - ::size_t size = sizeof(DataType)*(endIterator - startIterator); - - Context context = Context::getDefault(err); - - if( useHostPtr ) { - object_ = ::clCreateBuffer(context(), flags, size, static_cast(&*startIterator), &error); - } else { - object_ = ::clCreateBuffer(context(), flags, size, 0, &error); - } - - detail::errHandler(error, __CREATE_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - - if( !useHostPtr ) { - error = cl::copy(startIterator, endIterator, *this); - detail::errHandler(error, __CREATE_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - } - } - - /*! - * \brief Construct a Buffer from a host container via iterators using a specified context. - * IteratorType must be random access. - * If useHostPtr is specified iterators must represent contiguous data. - */ - template< typename IteratorType > - Buffer(const Context &context, IteratorType startIterator, IteratorType endIterator, - bool readOnly, bool useHostPtr = false, cl_int* err = NULL); - - /*! - * \brief Construct a Buffer from a host container via iterators using a specified queue. - * If useHostPtr is specified iterators must represent contiguous data. - */ - template< typename IteratorType > - Buffer(const CommandQueue &queue, IteratorType startIterator, IteratorType endIterator, - bool readOnly, bool useHostPtr = false, cl_int* err = NULL); - - //! \brief Default constructor - initializes to NULL. - Buffer() : Memory() { } - - /*! \brief Constructor from cl_mem - takes ownership. - * - * See Memory for further details. - */ - __CL_EXPLICIT_CONSTRUCTORS Buffer(const cl_mem& buffer) : Memory(buffer) { } - - /*! \brief Assignment from cl_mem - performs shallow copy. - * - * See Memory for further details. - */ - Buffer& operator = (const cl_mem& rhs) - { - Memory::operator=(rhs); - return *this; - } - - /*! \brief Copy constructor to forward copy to the superclass correctly. - * Required for MSVC. - */ - Buffer(const Buffer& buf) : Memory(buf) {} - - /*! \brief Copy assignment to forward copy to the superclass correctly. - * Required for MSVC. - */ - Buffer& operator = (const Buffer &buf) - { - Memory::operator=(buf); - return *this; - } - -#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - /*! \brief Move constructor to forward move to the superclass correctly. - * Required for MSVC. - */ - Buffer(Buffer&& buf) CL_HPP_NOEXCEPT : Memory(std::move(buf)) {} - - /*! \brief Move assignment to forward move to the superclass correctly. - * Required for MSVC. - */ - Buffer& operator = (Buffer &&buf) - { - Memory::operator=(std::move(buf)); - return *this; - } -#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - -#if defined(CL_VERSION_1_1) - /*! \brief Creates a new buffer object from this. - * - * Wraps clCreateSubBuffer(). - */ - Buffer createSubBuffer( - cl_mem_flags flags, - cl_buffer_create_type buffer_create_type, - const void * buffer_create_info, - cl_int * err = NULL) - { - Buffer result; - cl_int error; - result.object_ = ::clCreateSubBuffer( - object_, - flags, - buffer_create_type, - buffer_create_info, - &error); - - detail::errHandler(error, __CREATE_SUBBUFFER_ERR); - if (err != NULL) { - *err = error; - } - - return result; - } -#endif -}; - -#if defined (USE_DX_INTEROP) -/*! \brief Class interface for creating OpenCL buffers from ID3D10Buffer's. - * - * This is provided to facilitate interoperability with Direct3D. - * - * See Memory for details about copy semantics, etc. - * - * \see Memory - */ -class BufferD3D10 : public Buffer -{ -public: - typedef CL_API_ENTRY cl_mem (CL_API_CALL *PFN_clCreateFromD3D10BufferKHR)( - cl_context context, cl_mem_flags flags, ID3D10Buffer* buffer, - cl_int* errcode_ret); - - /*! \brief Constructs a BufferD3D10, in a specified context, from a - * given ID3D10Buffer. - * - * Wraps clCreateFromD3D10BufferKHR(). - */ - BufferD3D10( - const Context& context, - cl_mem_flags flags, - ID3D10Buffer* bufobj, - cl_int * err = NULL) - { - static PFN_clCreateFromD3D10BufferKHR pfn_clCreateFromD3D10BufferKHR = NULL; - -#if defined(CL_VERSION_1_2) - vector props = context.getInfo(); - cl_platform platform = -1; - for( int i = 0; i < props.size(); ++i ) { - if( props[i] == CL_CONTEXT_PLATFORM ) { - platform = props[i+1]; - } - } - __INIT_CL_EXT_FCN_PTR_PLATFORM(platform, clCreateFromD3D10BufferKHR); -#endif -#if defined(CL_VERSION_1_1) - __INIT_CL_EXT_FCN_PTR(clCreateFromD3D10BufferKHR); -#endif - - cl_int error; - object_ = pfn_clCreateFromD3D10BufferKHR( - context(), - flags, - bufobj, - &error); - - detail::errHandler(error, __CREATE_GL_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - } - - //! \brief Default constructor - initializes to NULL. - BufferD3D10() : Buffer() { } - - /*! \brief Constructor from cl_mem - takes ownership. - * - * See Memory for further details. - */ - __CL_EXPLICIT_CONSTRUCTORS BufferD3D10(const cl_mem& buffer) : Buffer(buffer) { } - - /*! \brief Assignment from cl_mem - performs shallow copy. - * - * See Memory for further details. - */ - BufferD3D10& operator = (const cl_mem& rhs) - { - Buffer::operator=(rhs); - return *this; - } - - /*! \brief Copy constructor to forward copy to the superclass correctly. - * Required for MSVC. - */ - BufferD3D10(const BufferD3D10& buf) : Buffer(buf) {} - - /*! \brief Copy assignment to forward copy to the superclass correctly. - * Required for MSVC. - */ - BufferD3D10& operator = (const BufferD3D10 &buf) - { - Buffer::operator=(buf); - return *this; - } - -#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - /*! \brief Move constructor to forward move to the superclass correctly. - * Required for MSVC. - */ - BufferD3D10(BufferD3D10&& buf) CL_HPP_NOEXCEPT : Buffer(std::move(buf)) {} - - /*! \brief Move assignment to forward move to the superclass correctly. - * Required for MSVC. - */ - BufferD3D10& operator = (BufferD3D10 &&buf) - { - Buffer::operator=(std::move(buf)); - return *this; - } -#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) -}; -#endif - -/*! \brief Class interface for GL Buffer Memory Objects. - * - * This is provided to facilitate interoperability with OpenGL. - * - * See Memory for details about copy semantics, etc. - * - * \see Memory - */ -class BufferGL : public Buffer -{ -public: - /*! \brief Constructs a BufferGL in a specified context, from a given - * GL buffer. - * - * Wraps clCreateFromGLBuffer(). - */ - BufferGL( - const Context& context, - cl_mem_flags flags, - cl_GLuint bufobj, - cl_int * err = NULL) - { - cl_int error; - object_ = ::clCreateFromGLBuffer( - context(), - flags, - bufobj, - &error); - - detail::errHandler(error, __CREATE_GL_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - } - - //! \brief Default constructor - initializes to NULL. - BufferGL() : Buffer() { } - - /*! \brief Constructor from cl_mem - takes ownership. - * - * See Memory for further details. - */ - __CL_EXPLICIT_CONSTRUCTORS BufferGL(const cl_mem& buffer) : Buffer(buffer) { } - - /*! \brief Assignment from cl_mem - performs shallow copy. - * - * See Memory for further details. - */ - BufferGL& operator = (const cl_mem& rhs) - { - Buffer::operator=(rhs); - return *this; - } - - /*! \brief Copy constructor to forward copy to the superclass correctly. - * Required for MSVC. - */ - BufferGL(const BufferGL& buf) : Buffer(buf) {} - - /*! \brief Copy assignment to forward copy to the superclass correctly. - * Required for MSVC. - */ - BufferGL& operator = (const BufferGL &buf) - { - Buffer::operator=(buf); - return *this; - } - -#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - /*! \brief Move constructor to forward move to the superclass correctly. - * Required for MSVC. - */ - BufferGL(BufferGL&& buf) CL_HPP_NOEXCEPT : Buffer(std::move(buf)) {} - - /*! \brief Move assignment to forward move to the superclass correctly. - * Required for MSVC. - */ - BufferGL& operator = (BufferGL &&buf) - { - Buffer::operator=(std::move(buf)); - return *this; - } -#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - - //! \brief Wrapper for clGetGLObjectInfo(). - cl_int getObjectInfo( - cl_gl_object_type *type, - cl_GLuint * gl_object_name) - { - return detail::errHandler( - ::clGetGLObjectInfo(object_,type,gl_object_name), - __GET_GL_OBJECT_INFO_ERR); - } -}; - -/*! \brief C++ base class for Image Memory objects. - * - * See Memory for details about copy semantics, etc. - * - * \see Memory - */ -class Image : public Memory -{ -protected: - //! \brief Default constructor - initializes to NULL. - Image() : Memory() { } - - /*! \brief Constructor from cl_mem - takes ownership. - * - * See Memory for further details. - */ - __CL_EXPLICIT_CONSTRUCTORS Image(const cl_mem& image) : Memory(image) { } - - /*! \brief Assignment from cl_mem - performs shallow copy. - * - * See Memory for further details. - */ - Image& operator = (const cl_mem& rhs) - { - Memory::operator=(rhs); - return *this; - } - - /*! \brief Copy constructor to forward copy to the superclass correctly. - * Required for MSVC. - */ - Image(const Image& img) : Memory(img) {} - - /*! \brief Copy assignment to forward copy to the superclass correctly. - * Required for MSVC. - */ - Image& operator = (const Image &img) - { - Memory::operator=(img); - return *this; - } - -#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - /*! \brief Move constructor to forward move to the superclass correctly. - * Required for MSVC. - */ - Image(Image&& img) CL_HPP_NOEXCEPT : Memory(std::move(img)) {} - - /*! \brief Move assignment to forward move to the superclass correctly. - * Required for MSVC. - */ - Image& operator = (Image &&img) - { - Memory::operator=(std::move(img)); - return *this; - } -#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - -public: - //! \brief Wrapper for clGetImageInfo(). - template - cl_int getImageInfo(cl_image_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetImageInfo, object_, name, param), - __GET_IMAGE_INFO_ERR); - } - - //! \brief Wrapper for clGetImageInfo() that returns by value. - template typename - detail::param_traits::param_type - getImageInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_image_info, name>::param_type param; - cl_int result = getImageInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } -}; - -#if defined(CL_VERSION_1_2) -/*! \brief Class interface for 1D Image Memory objects. - * - * See Memory for details about copy semantics, etc. - * - * \see Memory - */ -class Image1D : public Image -{ -public: - /*! \brief Constructs a 1D Image in a specified context. - * - * Wraps clCreateImage(). - */ - Image1D( - const Context& context, - cl_mem_flags flags, - ImageFormat format, - ::size_t width, - void* host_ptr = NULL, - cl_int* err = NULL) - { - cl_int error; - cl_image_desc desc = - { - CL_MEM_OBJECT_IMAGE1D, - width, - 0, 0, 0, 0, 0, 0, 0, 0 - }; - object_ = ::clCreateImage( - context(), - flags, - &format, - &desc, - host_ptr, - &error); - - detail::errHandler(error, __CREATE_IMAGE_ERR); - if (err != NULL) { - *err = error; - } - } - - //! \brief Default constructor - initializes to NULL. - Image1D() { } - - /*! \brief Constructor from cl_mem - takes ownership. - * - * See Memory for further details. - */ - __CL_EXPLICIT_CONSTRUCTORS Image1D(const cl_mem& image1D) : Image(image1D) { } - - /*! \brief Assignment from cl_mem - performs shallow copy. - * - * See Memory for further details. - */ - Image1D& operator = (const cl_mem& rhs) - { - Image::operator=(rhs); - return *this; - } - - /*! \brief Copy constructor to forward copy to the superclass correctly. - * Required for MSVC. - */ - Image1D(const Image1D& img) : Image(img) {} - - /*! \brief Copy assignment to forward copy to the superclass correctly. - * Required for MSVC. - */ - Image1D& operator = (const Image1D &img) - { - Image::operator=(img); - return *this; - } - -#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - /*! \brief Move constructor to forward move to the superclass correctly. - * Required for MSVC. - */ - Image1D(Image1D&& img) CL_HPP_NOEXCEPT : Image(std::move(img)) {} - - /*! \brief Move assignment to forward move to the superclass correctly. - * Required for MSVC. - */ - Image1D& operator = (Image1D &&img) - { - Image::operator=(std::move(img)); - return *this; - } -#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) -}; - -/*! \class Image1DBuffer - * \brief Image interface for 1D buffer images. - */ -class Image1DBuffer : public Image -{ -public: - Image1DBuffer( - const Context& context, - cl_mem_flags flags, - ImageFormat format, - ::size_t width, - const Buffer &buffer, - cl_int* err = NULL) - { - cl_int error; - cl_image_desc desc = - { - CL_MEM_OBJECT_IMAGE1D_BUFFER, - width, - 0, 0, 0, 0, 0, 0, 0, - buffer() - }; - object_ = ::clCreateImage( - context(), - flags, - &format, - &desc, - NULL, - &error); - - detail::errHandler(error, __CREATE_IMAGE_ERR); - if (err != NULL) { - *err = error; - } - } - - Image1DBuffer() { } - - __CL_EXPLICIT_CONSTRUCTORS Image1DBuffer(const cl_mem& image1D) : Image(image1D) { } - - Image1DBuffer& operator = (const cl_mem& rhs) - { - Image::operator=(rhs); - return *this; - } - - /*! \brief Copy constructor to forward copy to the superclass correctly. - * Required for MSVC. - */ - Image1DBuffer(const Image1DBuffer& img) : Image(img) {} - - /*! \brief Copy assignment to forward copy to the superclass correctly. - * Required for MSVC. - */ - Image1DBuffer& operator = (const Image1DBuffer &img) - { - Image::operator=(img); - return *this; - } - -#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - /*! \brief Move constructor to forward move to the superclass correctly. - * Required for MSVC. - */ - Image1DBuffer(Image1DBuffer&& img) CL_HPP_NOEXCEPT : Image(std::move(img)) {} - - /*! \brief Move assignment to forward move to the superclass correctly. - * Required for MSVC. - */ - Image1DBuffer& operator = (Image1DBuffer &&img) - { - Image::operator=(std::move(img)); - return *this; - } -#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) -}; - -/*! \class Image1DArray - * \brief Image interface for arrays of 1D images. - */ -class Image1DArray : public Image -{ -public: - Image1DArray( - const Context& context, - cl_mem_flags flags, - ImageFormat format, - ::size_t arraySize, - ::size_t width, - ::size_t rowPitch, - void* host_ptr = NULL, - cl_int* err = NULL) - { - cl_int error; - cl_image_desc desc = - { - CL_MEM_OBJECT_IMAGE1D_ARRAY, - width, - 0, 0, // height, depth (unused) - arraySize, - rowPitch, - 0, 0, 0, 0 - }; - object_ = ::clCreateImage( - context(), - flags, - &format, - &desc, - host_ptr, - &error); - - detail::errHandler(error, __CREATE_IMAGE_ERR); - if (err != NULL) { - *err = error; - } - } - - Image1DArray() { } - - __CL_EXPLICIT_CONSTRUCTORS Image1DArray(const cl_mem& imageArray) : Image(imageArray) { } - - Image1DArray& operator = (const cl_mem& rhs) - { - Image::operator=(rhs); - return *this; - } - - /*! \brief Copy constructor to forward copy to the superclass correctly. - * Required for MSVC. - */ - Image1DArray(const Image1DArray& img) : Image(img) {} - - /*! \brief Copy assignment to forward copy to the superclass correctly. - * Required for MSVC. - */ - Image1DArray& operator = (const Image1DArray &img) - { - Image::operator=(img); - return *this; - } - -#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - /*! \brief Move constructor to forward move to the superclass correctly. - * Required for MSVC. - */ - Image1DArray(Image1DArray&& img) CL_HPP_NOEXCEPT : Image(std::move(img)) {} - - /*! \brief Move assignment to forward move to the superclass correctly. - * Required for MSVC. - */ - Image1DArray& operator = (Image1DArray &&img) - { - Image::operator=(std::move(img)); - return *this; - } -#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) -}; -#endif // #if defined(CL_VERSION_1_2) - - -/*! \brief Class interface for 2D Image Memory objects. - * - * See Memory for details about copy semantics, etc. - * - * \see Memory - */ -class Image2D : public Image -{ -public: - /*! \brief Constructs a 1D Image in a specified context. - * - * Wraps clCreateImage(). - */ - Image2D( - const Context& context, - cl_mem_flags flags, - ImageFormat format, - ::size_t width, - ::size_t height, - ::size_t row_pitch = 0, - void* host_ptr = NULL, - cl_int* err = NULL) - { - cl_int error; - bool useCreateImage; - -#if defined(CL_VERSION_1_2) && defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) - // Run-time decision based on the actual platform - { - cl_uint version = detail::getContextPlatformVersion(context()); - useCreateImage = (version >= 0x10002); // OpenCL 1.2 or above - } -#elif defined(CL_VERSION_1_2) - useCreateImage = true; -#else - useCreateImage = false; -#endif - -#if defined(CL_VERSION_1_2) - if (useCreateImage) - { - cl_image_desc desc = - { - CL_MEM_OBJECT_IMAGE2D, - width, - height, - 0, 0, // depth, array size (unused) - row_pitch, - 0, 0, 0, 0 - }; - object_ = ::clCreateImage( - context(), - flags, - &format, - &desc, - host_ptr, - &error); - - detail::errHandler(error, __CREATE_IMAGE_ERR); - if (err != NULL) { - *err = error; - } - } -#endif // #if defined(CL_VERSION_1_2) -#if !defined(CL_VERSION_1_2) || defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) - if (!useCreateImage) - { - object_ = ::clCreateImage2D( - context(), flags,&format, width, height, row_pitch, host_ptr, &error); - - detail::errHandler(error, __CREATE_IMAGE2D_ERR); - if (err != NULL) { - *err = error; - } - } -#endif // #if !defined(CL_VERSION_1_2) || defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) - } - - //! \brief Default constructor - initializes to NULL. - Image2D() { } - - /*! \brief Constructor from cl_mem - takes ownership. - * - * See Memory for further details. - */ - __CL_EXPLICIT_CONSTRUCTORS Image2D(const cl_mem& image2D) : Image(image2D) { } - - /*! \brief Assignment from cl_mem - performs shallow copy. - * - * See Memory for further details. - */ - Image2D& operator = (const cl_mem& rhs) - { - Image::operator=(rhs); - return *this; - } - - /*! \brief Copy constructor to forward copy to the superclass correctly. - * Required for MSVC. - */ - Image2D(const Image2D& img) : Image(img) {} - - /*! \brief Copy assignment to forward copy to the superclass correctly. - * Required for MSVC. - */ - Image2D& operator = (const Image2D &img) - { - Image::operator=(img); - return *this; - } - -#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - /*! \brief Move constructor to forward move to the superclass correctly. - * Required for MSVC. - */ - Image2D(Image2D&& img) CL_HPP_NOEXCEPT : Image(std::move(img)) {} - - /*! \brief Move assignment to forward move to the superclass correctly. - * Required for MSVC. - */ - Image2D& operator = (Image2D &&img) - { - Image::operator=(std::move(img)); - return *this; - } -#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) -}; - - -#if !defined(CL_VERSION_1_2) -/*! \brief Class interface for GL 2D Image Memory objects. - * - * This is provided to facilitate interoperability with OpenGL. - * - * See Memory for details about copy semantics, etc. - * - * \see Memory - * \note Deprecated for OpenCL 1.2. Please use ImageGL instead. - */ -class CL_EXT_PREFIX__VERSION_1_1_DEPRECATED Image2DGL CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED : public Image2D -{ -public: - /*! \brief Constructs an Image2DGL in a specified context, from a given - * GL Texture. - * - * Wraps clCreateFromGLTexture2D(). - */ - Image2DGL( - const Context& context, - cl_mem_flags flags, - cl_GLenum target, - cl_GLint miplevel, - cl_GLuint texobj, - cl_int * err = NULL) - { - cl_int error; - object_ = ::clCreateFromGLTexture2D( - context(), - flags, - target, - miplevel, - texobj, - &error); - - detail::errHandler(error, __CREATE_GL_TEXTURE_2D_ERR); - if (err != NULL) { - *err = error; - } - - } - - //! \brief Default constructor - initializes to NULL. - Image2DGL() : Image2D() { } - - /*! \brief Constructor from cl_mem - takes ownership. - * - * See Memory for further details. - */ - __CL_EXPLICIT_CONSTRUCTORS Image2DGL(const cl_mem& image) : Image2D(image) { } - - /*! \brief Assignment from cl_mem - performs shallow copy. - * - * See Memory for further details. - */ - Image2DGL& operator = (const cl_mem& rhs) - { - Image2D::operator=(rhs); - return *this; - } - - /*! \brief Copy constructor to forward copy to the superclass correctly. - * Required for MSVC. - */ - Image2DGL(const Image2DGL& img) : Image2D(img) {} - - /*! \brief Copy assignment to forward copy to the superclass correctly. - * Required for MSVC. - */ - Image2DGL& operator = (const Image2DGL &img) - { - Image2D::operator=(img); - return *this; - } - -#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - /*! \brief Move constructor to forward move to the superclass correctly. - * Required for MSVC. - */ - Image2DGL(Image2DGL&& img) CL_HPP_NOEXCEPT : Image2D(std::move(img)) {} - - /*! \brief Move assignment to forward move to the superclass correctly. - * Required for MSVC. - */ - Image2DGL& operator = (Image2DGL &&img) - { - Image2D::operator=(std::move(img)); - return *this; - } -#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) -}; -#endif // #if !defined(CL_VERSION_1_2) - -#if defined(CL_VERSION_1_2) -/*! \class Image2DArray - * \brief Image interface for arrays of 2D images. - */ -class Image2DArray : public Image -{ -public: - Image2DArray( - const Context& context, - cl_mem_flags flags, - ImageFormat format, - ::size_t arraySize, - ::size_t width, - ::size_t height, - ::size_t rowPitch, - ::size_t slicePitch, - void* host_ptr = NULL, - cl_int* err = NULL) - { - cl_int error; - cl_image_desc desc = - { - CL_MEM_OBJECT_IMAGE2D_ARRAY, - width, - height, - 0, // depth (unused) - arraySize, - rowPitch, - slicePitch, - 0, 0, 0 - }; - object_ = ::clCreateImage( - context(), - flags, - &format, - &desc, - host_ptr, - &error); - - detail::errHandler(error, __CREATE_IMAGE_ERR); - if (err != NULL) { - *err = error; - } - } - - Image2DArray() { } - - __CL_EXPLICIT_CONSTRUCTORS Image2DArray(const cl_mem& imageArray) : Image(imageArray) { } - - Image2DArray& operator = (const cl_mem& rhs) - { - Image::operator=(rhs); - return *this; - } - - /*! \brief Copy constructor to forward copy to the superclass correctly. - * Required for MSVC. - */ - Image2DArray(const Image2DArray& img) : Image(img) {} - - /*! \brief Copy assignment to forward copy to the superclass correctly. - * Required for MSVC. - */ - Image2DArray& operator = (const Image2DArray &img) - { - Image::operator=(img); - return *this; - } - -#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - /*! \brief Move constructor to forward move to the superclass correctly. - * Required for MSVC. - */ - Image2DArray(Image2DArray&& img) CL_HPP_NOEXCEPT : Image(std::move(img)) {} - - /*! \brief Move assignment to forward move to the superclass correctly. - * Required for MSVC. - */ - Image2DArray& operator = (Image2DArray &&img) - { - Image::operator=(std::move(img)); - return *this; - } -#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) -}; -#endif // #if defined(CL_VERSION_1_2) - -/*! \brief Class interface for 3D Image Memory objects. - * - * See Memory for details about copy semantics, etc. - * - * \see Memory - */ -class Image3D : public Image -{ -public: - /*! \brief Constructs a 3D Image in a specified context. - * - * Wraps clCreateImage(). - */ - Image3D( - const Context& context, - cl_mem_flags flags, - ImageFormat format, - ::size_t width, - ::size_t height, - ::size_t depth, - ::size_t row_pitch = 0, - ::size_t slice_pitch = 0, - void* host_ptr = NULL, - cl_int* err = NULL) - { - cl_int error; - bool useCreateImage; - -#if defined(CL_VERSION_1_2) && defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) - // Run-time decision based on the actual platform - { - cl_uint version = detail::getContextPlatformVersion(context()); - useCreateImage = (version >= 0x10002); // OpenCL 1.2 or above - } -#elif defined(CL_VERSION_1_2) - useCreateImage = true; -#else - useCreateImage = false; -#endif - -#if defined(CL_VERSION_1_2) - if (useCreateImage) - { - cl_image_desc desc = - { - CL_MEM_OBJECT_IMAGE3D, - width, - height, - depth, - 0, // array size (unused) - row_pitch, - slice_pitch, - 0, 0, 0 - }; - object_ = ::clCreateImage( - context(), - flags, - &format, - &desc, - host_ptr, - &error); - - detail::errHandler(error, __CREATE_IMAGE_ERR); - if (err != NULL) { - *err = error; - } - } -#endif // #if defined(CL_VERSION_1_2) -#if !defined(CL_VERSION_1_2) || defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) - if (!useCreateImage) - { - object_ = ::clCreateImage3D( - context(), flags, &format, width, height, depth, row_pitch, - slice_pitch, host_ptr, &error); - - detail::errHandler(error, __CREATE_IMAGE3D_ERR); - if (err != NULL) { - *err = error; - } - } -#endif // #if !defined(CL_VERSION_1_2) || defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) - } - - //! \brief Default constructor - initializes to NULL. - Image3D() : Image() { } - - /*! \brief Constructor from cl_mem - takes ownership. - * - * See Memory for further details. - */ - __CL_EXPLICIT_CONSTRUCTORS Image3D(const cl_mem& image3D) : Image(image3D) { } - - /*! \brief Assignment from cl_mem - performs shallow copy. - * - * See Memory for further details. - */ - Image3D& operator = (const cl_mem& rhs) - { - Image::operator=(rhs); - return *this; - } - - /*! \brief Copy constructor to forward copy to the superclass correctly. - * Required for MSVC. - */ - Image3D(const Image3D& img) : Image(img) {} - - /*! \brief Copy assignment to forward copy to the superclass correctly. - * Required for MSVC. - */ - Image3D& operator = (const Image3D &img) - { - Image::operator=(img); - return *this; - } - -#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - /*! \brief Move constructor to forward move to the superclass correctly. - * Required for MSVC. - */ - Image3D(Image3D&& img) CL_HPP_NOEXCEPT : Image(std::move(img)) {} - - /*! \brief Move assignment to forward move to the superclass correctly. - * Required for MSVC. - */ - Image3D& operator = (Image3D &&img) - { - Image::operator=(std::move(img)); - return *this; - } -#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) -}; - -#if !defined(CL_VERSION_1_2) -/*! \brief Class interface for GL 3D Image Memory objects. - * - * This is provided to facilitate interoperability with OpenGL. - * - * See Memory for details about copy semantics, etc. - * - * \see Memory - */ -class Image3DGL : public Image3D -{ -public: - /*! \brief Constructs an Image3DGL in a specified context, from a given - * GL Texture. - * - * Wraps clCreateFromGLTexture3D(). - */ - Image3DGL( - const Context& context, - cl_mem_flags flags, - cl_GLenum target, - cl_GLint miplevel, - cl_GLuint texobj, - cl_int * err = NULL) - { - cl_int error; - object_ = ::clCreateFromGLTexture3D( - context(), - flags, - target, - miplevel, - texobj, - &error); - - detail::errHandler(error, __CREATE_GL_TEXTURE_3D_ERR); - if (err != NULL) { - *err = error; - } - } - - //! \brief Default constructor - initializes to NULL. - Image3DGL() : Image3D() { } - - /*! \brief Constructor from cl_mem - takes ownership. - * - * See Memory for further details. - */ - __CL_EXPLICIT_CONSTRUCTORS Image3DGL(const cl_mem& image) : Image3D(image) { } - - /*! \brief Assignment from cl_mem - performs shallow copy. - * - * See Memory for further details. - */ - Image3DGL& operator = (const cl_mem& rhs) - { - Image3D::operator=(rhs); - return *this; - } - - /*! \brief Copy constructor to forward copy to the superclass correctly. - * Required for MSVC. - */ - Image3DGL(const Image3DGL& img) : Image3D(img) {} - - /*! \brief Copy assignment to forward copy to the superclass correctly. - * Required for MSVC. - */ - Image3DGL& operator = (const Image3DGL &img) - { - Image3D::operator=(img); - return *this; - } - -#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - /*! \brief Move constructor to forward move to the superclass correctly. - * Required for MSVC. - */ - Image3DGL(Image3DGL&& img) CL_HPP_NOEXCEPT : Image3D(std::move(img)) {} - - /*! \brief Move assignment to forward move to the superclass correctly. - * Required for MSVC. - */ - Image3DGL& operator = (Image3DGL &&img) - { - Image3D::operator=(std::move(img)); - return *this; - } -#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) -}; -#endif // #if !defined(CL_VERSION_1_2) - -#if defined(CL_VERSION_1_2) -/*! \class ImageGL - * \brief general image interface for GL interop. - * We abstract the 2D and 3D GL images into a single instance here - * that wraps all GL sourced images on the grounds that setup information - * was performed by OpenCL anyway. - */ -class ImageGL : public Image -{ -public: - ImageGL( - const Context& context, - cl_mem_flags flags, - cl_GLenum target, - cl_GLint miplevel, - cl_GLuint texobj, - cl_int * err = NULL) - { - cl_int error; - object_ = ::clCreateFromGLTexture( - context(), - flags, - target, - miplevel, - texobj, - &error); - - detail::errHandler(error, __CREATE_GL_TEXTURE_ERR); - if (err != NULL) { - *err = error; - } - } - - ImageGL() : Image() { } - - __CL_EXPLICIT_CONSTRUCTORS ImageGL(const cl_mem& image) : Image(image) { } - - ImageGL& operator = (const cl_mem& rhs) - { - Image::operator=(rhs); - return *this; - } - - /*! \brief Copy constructor to forward copy to the superclass correctly. - * Required for MSVC. - */ - ImageGL(const ImageGL& img) : Image(img) {} - - /*! \brief Copy assignment to forward copy to the superclass correctly. - * Required for MSVC. - */ - ImageGL& operator = (const ImageGL &img) - { - Image::operator=(img); - return *this; - } - -#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - /*! \brief Move constructor to forward move to the superclass correctly. - * Required for MSVC. - */ - ImageGL(ImageGL&& img) CL_HPP_NOEXCEPT : Image(std::move(img)) {} - - /*! \brief Move assignment to forward move to the superclass correctly. - * Required for MSVC. - */ - ImageGL& operator = (ImageGL &&img) - { - Image::operator=(std::move(img)); - return *this; - } -#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) -}; -#endif // #if defined(CL_VERSION_1_2) - -/*! \brief Class interface for GL Render Buffer Memory Objects. -* -* This is provided to facilitate interoperability with OpenGL. -* -* See Memory for details about copy semantics, etc. -* -* \see Memory -*/ -class BufferRenderGL : -#if defined(CL_VERSION_1_2) - public ImageGL -#else // #if defined(CL_VERSION_1_2) - public Image2DGL -#endif //#if defined(CL_VERSION_1_2) -{ -public: - /*! \brief Constructs a BufferRenderGL in a specified context, from a given - * GL Renderbuffer. - * - * Wraps clCreateFromGLRenderbuffer(). - */ - BufferRenderGL( - const Context& context, - cl_mem_flags flags, - cl_GLuint bufobj, - cl_int * err = NULL) - { - cl_int error; - object_ = ::clCreateFromGLRenderbuffer( - context(), - flags, - bufobj, - &error); - - detail::errHandler(error, __CREATE_GL_RENDER_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - } - - //! \brief Default constructor - initializes to NULL. -#if defined(CL_VERSION_1_2) - BufferRenderGL() : ImageGL() {}; -#else // #if defined(CL_VERSION_1_2) - BufferRenderGL() : Image2DGL() {}; -#endif //#if defined(CL_VERSION_1_2) - - /*! \brief Constructor from cl_mem - takes ownership. - * - * See Memory for further details. - */ -#if defined(CL_VERSION_1_2) - __CL_EXPLICIT_CONSTRUCTORS BufferRenderGL(const cl_mem& buffer) : ImageGL(buffer) { } -#else // #if defined(CL_VERSION_1_2) - __CL_EXPLICIT_CONSTRUCTORS BufferRenderGL(const cl_mem& buffer) : Image2DGL(buffer) { } -#endif //#if defined(CL_VERSION_1_2) - - - /*! \brief Assignment from cl_mem - performs shallow copy. - * - * See Memory for further details. - */ - BufferRenderGL& operator = (const cl_mem& rhs) - { -#if defined(CL_VERSION_1_2) - ImageGL::operator=(rhs); -#else // #if defined(CL_VERSION_1_2) - Image2DGL::operator=(rhs); -#endif //#if defined(CL_VERSION_1_2) - - return *this; - } - - /*! \brief Copy constructor to forward copy to the superclass correctly. - * Required for MSVC. - */ -#if defined(CL_VERSION_1_2) - BufferRenderGL(const BufferRenderGL& buf) : ImageGL(buf) {} -#else // #if defined(CL_VERSION_1_2) - BufferRenderGL(const BufferRenderGL& buf) : Image2DGL(buf) {} -#endif //#if defined(CL_VERSION_1_2) - - /*! \brief Copy assignment to forward copy to the superclass correctly. - * Required for MSVC. - */ - BufferRenderGL& operator = (const BufferRenderGL &rhs) - { -#if defined(CL_VERSION_1_2) - ImageGL::operator=(rhs); -#else // #if defined(CL_VERSION_1_2) - Image2DGL::operator=(rhs); -#endif //#if defined(CL_VERSION_1_2) - return *this; - } - -#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - /*! \brief Move constructor to forward move to the superclass correctly. - * Required for MSVC. - */ -#if defined(CL_VERSION_1_2) - BufferRenderGL(BufferRenderGL&& buf) CL_HPP_NOEXCEPT : ImageGL(std::move(buf)) {} -#else // #if defined(CL_VERSION_1_2) - BufferRenderGL(BufferRenderGL&& buf) CL_HPP_NOEXCEPT : Image2DGL(std::move(buf)) {} -#endif //#if defined(CL_VERSION_1_2) - - - /*! \brief Move assignment to forward move to the superclass correctly. - * Required for MSVC. - */ - BufferRenderGL& operator = (BufferRenderGL &&buf) - { -#if defined(CL_VERSION_1_2) - ImageGL::operator=(std::move(buf)); -#else // #if defined(CL_VERSION_1_2) - Image2DGL::operator=(std::move(buf)); -#endif //#if defined(CL_VERSION_1_2) - - return *this; - } -#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - - //! \brief Wrapper for clGetGLObjectInfo(). - cl_int getObjectInfo( - cl_gl_object_type *type, - cl_GLuint * gl_object_name) - { - return detail::errHandler( - ::clGetGLObjectInfo(object_, type, gl_object_name), - __GET_GL_OBJECT_INFO_ERR); - } -}; - -/*! \brief Class interface for cl_sampler. - * - * \note Copies of these objects are shallow, meaning that the copy will refer - * to the same underlying cl_sampler as the original. For details, see - * clRetainSampler() and clReleaseSampler(). - * - * \see cl_sampler - */ -class Sampler : public detail::Wrapper -{ -public: - //! \brief Default constructor - initializes to NULL. - Sampler() { } - - /*! \brief Constructs a Sampler in a specified context. - * - * Wraps clCreateSampler(). - */ - Sampler( - const Context& context, - cl_bool normalized_coords, - cl_addressing_mode addressing_mode, - cl_filter_mode filter_mode, - cl_int* err = NULL) - { - cl_int error; - object_ = ::clCreateSampler( - context(), - normalized_coords, - addressing_mode, - filter_mode, - &error); - - detail::errHandler(error, __CREATE_SAMPLER_ERR); - if (err != NULL) { - *err = error; - } - } - - /*! \brief Constructor from cl_sampler - takes ownership. - * - * This effectively transfers ownership of a refcount on the cl_sampler - * into the new Sampler object. - */ - __CL_EXPLICIT_CONSTRUCTORS Sampler(const cl_sampler& sampler) : detail::Wrapper(sampler) { } - - /*! \brief Assignment operator from cl_sampler - takes ownership. - * - * This effectively transfers ownership of a refcount on the rhs and calls - * clReleaseSampler() on the value previously held by this instance. - */ - Sampler& operator = (const cl_sampler& rhs) - { - detail::Wrapper::operator=(rhs); - return *this; - } - - /*! \brief Copy constructor to forward copy to the superclass correctly. - * Required for MSVC. - */ - Sampler(const Sampler& sam) : detail::Wrapper(sam) {} - - /*! \brief Copy assignment to forward copy to the superclass correctly. - * Required for MSVC. - */ - Sampler& operator = (const Sampler &sam) - { - detail::Wrapper::operator=(sam); - return *this; - } - -#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - /*! \brief Move constructor to forward move to the superclass correctly. - * Required for MSVC. - */ - Sampler(Sampler&& sam) CL_HPP_NOEXCEPT : detail::Wrapper(std::move(sam)) {} - - /*! \brief Move assignment to forward move to the superclass correctly. - * Required for MSVC. - */ - Sampler& operator = (Sampler &&sam) - { - detail::Wrapper::operator=(std::move(sam)); - return *this; - } -#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - - //! \brief Wrapper for clGetSamplerInfo(). - template - cl_int getInfo(cl_sampler_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetSamplerInfo, object_, name, param), - __GET_SAMPLER_INFO_ERR); - } - - //! \brief Wrapper for clGetSamplerInfo() that returns by value. - template typename - detail::param_traits::param_type - getInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_sampler_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } -}; - -class Program; -class CommandQueue; -class Kernel; - -//! \brief Class interface for specifying NDRange values. -class NDRange -{ -private: - size_t<3> sizes_; - cl_uint dimensions_; - -public: - //! \brief Default constructor - resulting range has zero dimensions. - NDRange() - : dimensions_(0) - { } - - //! \brief Constructs one-dimensional range. - NDRange(::size_t size0) - : dimensions_(1) - { - sizes_[0] = size0; - } - - //! \brief Constructs two-dimensional range. - NDRange(::size_t size0, ::size_t size1) - : dimensions_(2) - { - sizes_[0] = size0; - sizes_[1] = size1; - } - - //! \brief Constructs three-dimensional range. - NDRange(::size_t size0, ::size_t size1, ::size_t size2) - : dimensions_(3) - { - sizes_[0] = size0; - sizes_[1] = size1; - sizes_[2] = size2; - } - - /*! \brief Conversion operator to const ::size_t *. - * - * \returns a pointer to the size of the first dimension. - */ - operator const ::size_t*() const { - return (const ::size_t*) sizes_; - } - - //! \brief Queries the number of dimensions in the range. - ::size_t dimensions() const { return dimensions_; } -}; - -//! \brief A zero-dimensional range. -static const NDRange NullRange; - -//! \brief Local address wrapper for use with Kernel::setArg -struct LocalSpaceArg -{ - ::size_t size_; -}; - -namespace detail { - -template -struct KernelArgumentHandler -{ - static ::size_t size(const T&) { return sizeof(T); } - static const T* ptr(const T& value) { return &value; } -}; - -template <> -struct KernelArgumentHandler -{ - static ::size_t size(const LocalSpaceArg& value) { return value.size_; } - static const void* ptr(const LocalSpaceArg&) { return NULL; } -}; - -} -//! \endcond - -/*! __local - * \brief Helper function for generating LocalSpaceArg objects. - * Deprecated. Replaced with Local. - */ -inline CL_EXT_PREFIX__VERSION_1_1_DEPRECATED LocalSpaceArg -__local(::size_t size) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; -inline LocalSpaceArg -__local(::size_t size) -{ - LocalSpaceArg ret = { size }; - return ret; -} - -/*! Local - * \brief Helper function for generating LocalSpaceArg objects. - */ -inline LocalSpaceArg -Local(::size_t size) -{ - LocalSpaceArg ret = { size }; - return ret; -} - -//class KernelFunctor; - -/*! \brief Class interface for cl_kernel. - * - * \note Copies of these objects are shallow, meaning that the copy will refer - * to the same underlying cl_kernel as the original. For details, see - * clRetainKernel() and clReleaseKernel(). - * - * \see cl_kernel - */ -class Kernel : public detail::Wrapper -{ -public: - inline Kernel(const Program& program, const char* name, cl_int* err = NULL); - - //! \brief Default constructor - initializes to NULL. - Kernel() { } - - /*! \brief Constructor from cl_kernel - takes ownership. - * - * This effectively transfers ownership of a refcount on the cl_kernel - * into the new Kernel object. - */ - __CL_EXPLICIT_CONSTRUCTORS Kernel(const cl_kernel& kernel) : detail::Wrapper(kernel) { } - - /*! \brief Assignment operator from cl_kernel - takes ownership. - * - * This effectively transfers ownership of a refcount on the rhs and calls - * clReleaseKernel() on the value previously held by this instance. - */ - Kernel& operator = (const cl_kernel& rhs) - { - detail::Wrapper::operator=(rhs); - return *this; - } - - /*! \brief Copy constructor to forward copy to the superclass correctly. - * Required for MSVC. - */ - Kernel(const Kernel& kernel) : detail::Wrapper(kernel) {} - - /*! \brief Copy assignment to forward copy to the superclass correctly. - * Required for MSVC. - */ - Kernel& operator = (const Kernel &kernel) - { - detail::Wrapper::operator=(kernel); - return *this; - } - -#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - /*! \brief Move constructor to forward move to the superclass correctly. - * Required for MSVC. - */ - Kernel(Kernel&& kernel) CL_HPP_NOEXCEPT : detail::Wrapper(std::move(kernel)) {} - - /*! \brief Move assignment to forward move to the superclass correctly. - * Required for MSVC. - */ - Kernel& operator = (Kernel &&kernel) - { - detail::Wrapper::operator=(std::move(kernel)); - return *this; - } -#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - - template - cl_int getInfo(cl_kernel_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetKernelInfo, object_, name, param), - __GET_KERNEL_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_kernel_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - -#if defined(CL_VERSION_1_2) - template - cl_int getArgInfo(cl_uint argIndex, cl_kernel_arg_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetKernelArgInfo, object_, argIndex, name, param), - __GET_KERNEL_ARG_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getArgInfo(cl_uint argIndex, cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_kernel_arg_info, name>::param_type param; - cl_int result = getArgInfo(argIndex, name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } -#endif // #if defined(CL_VERSION_1_2) - - template - cl_int getWorkGroupInfo( - const Device& device, cl_kernel_work_group_info name, T* param) const - { - return detail::errHandler( - detail::getInfo( - &::clGetKernelWorkGroupInfo, object_, device(), name, param), - __GET_KERNEL_WORK_GROUP_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getWorkGroupInfo(const Device& device, cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_kernel_work_group_info, name>::param_type param; - cl_int result = getWorkGroupInfo(device, name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - - template - cl_int setArg(cl_uint index, const T &value) - { - return detail::errHandler( - ::clSetKernelArg( - object_, - index, - detail::KernelArgumentHandler::size(value), - detail::KernelArgumentHandler::ptr(value)), - __SET_KERNEL_ARGS_ERR); - } - - cl_int setArg(cl_uint index, ::size_t size, const void* argPtr) - { - return detail::errHandler( - ::clSetKernelArg(object_, index, size, argPtr), - __SET_KERNEL_ARGS_ERR); - } -}; - -/*! \class Program - * \brief Program interface that implements cl_program. - */ -class Program : public detail::Wrapper -{ -public: - typedef VECTOR_CLASS > Binaries; - typedef VECTOR_CLASS > Sources; - - Program( - const STRING_CLASS& source, - bool build = false, - cl_int* err = NULL) - { - cl_int error; - - const char * strings = source.c_str(); - const ::size_t length = source.size(); - - Context context = Context::getDefault(err); - - object_ = ::clCreateProgramWithSource( - context(), (cl_uint)1, &strings, &length, &error); - - detail::errHandler(error, __CREATE_PROGRAM_WITH_SOURCE_ERR); - - if (error == CL_SUCCESS && build) { - - error = ::clBuildProgram( - object_, - 0, - NULL, - "", - NULL, - NULL); - - detail::errHandler(error, __BUILD_PROGRAM_ERR); - } - - if (err != NULL) { - *err = error; - } - } - - Program( - const Context& context, - const STRING_CLASS& source, - bool build = false, - cl_int* err = NULL) - { - cl_int error; - - const char * strings = source.c_str(); - const ::size_t length = source.size(); - - object_ = ::clCreateProgramWithSource( - context(), (cl_uint)1, &strings, &length, &error); - - detail::errHandler(error, __CREATE_PROGRAM_WITH_SOURCE_ERR); - - if (error == CL_SUCCESS && build) { - - error = ::clBuildProgram( - object_, - 0, - NULL, - "", - NULL, - NULL); - - detail::errHandler(error, __BUILD_PROGRAM_ERR); - } - - if (err != NULL) { - *err = error; - } - } - - Program( - const Context& context, - const Sources& sources, - cl_int* err = NULL) - { - cl_int error; - - const ::size_t n = (::size_t)sources.size(); - ::size_t* lengths = (::size_t*) alloca(n * sizeof(::size_t)); - const char** strings = (const char**) alloca(n * sizeof(const char*)); - - for (::size_t i = 0; i < n; ++i) { - strings[i] = sources[(int)i].first; - lengths[i] = sources[(int)i].second; - } - - object_ = ::clCreateProgramWithSource( - context(), (cl_uint)n, strings, lengths, &error); - - detail::errHandler(error, __CREATE_PROGRAM_WITH_SOURCE_ERR); - if (err != NULL) { - *err = error; - } - } - - /** - * Construct a program object from a list of devices and a per-device list of binaries. - * \param context A valid OpenCL context in which to construct the program. - * \param devices A vector of OpenCL device objects for which the program will be created. - * \param binaries A vector of pairs of a pointer to a binary object and its length. - * \param binaryStatus An optional vector that on completion will be resized to - * match the size of binaries and filled with values to specify if each binary - * was successfully loaded. - * Set to CL_SUCCESS if the binary was successfully loaded. - * Set to CL_INVALID_VALUE if the length is 0 or the binary pointer is NULL. - * Set to CL_INVALID_BINARY if the binary provided is not valid for the matching device. - * \param err if non-NULL will be set to CL_SUCCESS on successful operation or one of the following errors: - * CL_INVALID_CONTEXT if context is not a valid context. - * CL_INVALID_VALUE if the length of devices is zero; or if the length of binaries does not match the length of devices; - * or if any entry in binaries is NULL or has length 0. - * CL_INVALID_DEVICE if OpenCL devices listed in devices are not in the list of devices associated with context. - * CL_INVALID_BINARY if an invalid program binary was encountered for any device. binaryStatus will return specific status for each device. - * CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required by the OpenCL implementation on the host. - */ - Program( - const Context& context, - const VECTOR_CLASS& devices, - const Binaries& binaries, - VECTOR_CLASS* binaryStatus = NULL, - cl_int* err = NULL) - { - cl_int error; - - const ::size_t numDevices = devices.size(); - - // Catch size mismatch early and return - if(binaries.size() != numDevices) { - error = CL_INVALID_VALUE; - detail::errHandler(error, __CREATE_PROGRAM_WITH_BINARY_ERR); - if (err != NULL) { - *err = error; - } - return; - } - - ::size_t* lengths = (::size_t*) alloca(numDevices * sizeof(::size_t)); - const unsigned char** images = (const unsigned char**) alloca(numDevices * sizeof(const unsigned char**)); - - for (::size_t i = 0; i < numDevices; ++i) { - images[i] = (const unsigned char*)binaries[i].first; - lengths[i] = binaries[(int)i].second; - } - - cl_device_id* deviceIDs = (cl_device_id*) alloca(numDevices * sizeof(cl_device_id)); - for( ::size_t deviceIndex = 0; deviceIndex < numDevices; ++deviceIndex ) { - deviceIDs[deviceIndex] = (devices[deviceIndex])(); - } - - if(binaryStatus) { - binaryStatus->resize(numDevices); - } - - object_ = ::clCreateProgramWithBinary( - context(), (cl_uint) devices.size(), - deviceIDs, - lengths, images, (binaryStatus != NULL && numDevices > 0) - ? &binaryStatus->front() - : NULL, &error); - - detail::errHandler(error, __CREATE_PROGRAM_WITH_BINARY_ERR); - if (err != NULL) { - *err = error; - } - } - - -#if defined(CL_VERSION_1_2) - /** - * Create program using builtin kernels. - * \param kernelNames Semi-colon separated list of builtin kernel names - */ - Program( - const Context& context, - const VECTOR_CLASS& devices, - const STRING_CLASS& kernelNames, - cl_int* err = NULL) - { - cl_int error; - - - ::size_t numDevices = devices.size(); - cl_device_id* deviceIDs = (cl_device_id*) alloca(numDevices * sizeof(cl_device_id)); - for( ::size_t deviceIndex = 0; deviceIndex < numDevices; ++deviceIndex ) { - deviceIDs[deviceIndex] = (devices[deviceIndex])(); - } - - object_ = ::clCreateProgramWithBuiltInKernels( - context(), - (cl_uint) devices.size(), - deviceIDs, - kernelNames.c_str(), - &error); - - detail::errHandler(error, __CREATE_PROGRAM_WITH_BUILT_IN_KERNELS_ERR); - if (err != NULL) { - *err = error; - } - } -#endif // #if defined(CL_VERSION_1_2) - - Program() { } - - __CL_EXPLICIT_CONSTRUCTORS Program(const cl_program& program) : detail::Wrapper(program) { } - - Program& operator = (const cl_program& rhs) - { - detail::Wrapper::operator=(rhs); - return *this; - } - - /*! \brief Copy constructor to forward copy to the superclass correctly. - * Required for MSVC. - */ - Program(const Program& program) : detail::Wrapper(program) {} - - /*! \brief Copy assignment to forward copy to the superclass correctly. - * Required for MSVC. - */ - Program& operator = (const Program &program) - { - detail::Wrapper::operator=(program); - return *this; - } - -#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - /*! \brief Move constructor to forward move to the superclass correctly. - * Required for MSVC. - */ - Program(Program&& program) CL_HPP_NOEXCEPT : detail::Wrapper(std::move(program)) {} - - /*! \brief Move assignment to forward move to the superclass correctly. - * Required for MSVC. - */ - Program& operator = (Program &&program) - { - detail::Wrapper::operator=(std::move(program)); - return *this; - } -#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - - cl_int build( - const VECTOR_CLASS& devices, - const char* options = NULL, - void (CL_CALLBACK * notifyFptr)(cl_program, void *) = NULL, - void* data = NULL) const - { - ::size_t numDevices = devices.size(); - cl_device_id* deviceIDs = (cl_device_id*) alloca(numDevices * sizeof(cl_device_id)); - for( ::size_t deviceIndex = 0; deviceIndex < numDevices; ++deviceIndex ) { - deviceIDs[deviceIndex] = (devices[deviceIndex])(); - } - - return detail::errHandler( - ::clBuildProgram( - object_, - (cl_uint) - devices.size(), - deviceIDs, - options, - notifyFptr, - data), - __BUILD_PROGRAM_ERR); - } - - cl_int build( - const char* options = NULL, - void (CL_CALLBACK * notifyFptr)(cl_program, void *) = NULL, - void* data = NULL) const - { - return detail::errHandler( - ::clBuildProgram( - object_, - 0, - NULL, - options, - notifyFptr, - data), - __BUILD_PROGRAM_ERR); - } - -#if defined(CL_VERSION_1_2) - cl_int compile( - const char* options = NULL, - void (CL_CALLBACK * notifyFptr)(cl_program, void *) = NULL, - void* data = NULL) const - { - return detail::errHandler( - ::clCompileProgram( - object_, - 0, - NULL, - options, - 0, - NULL, - NULL, - notifyFptr, - data), - __COMPILE_PROGRAM_ERR); - } -#endif - - template - cl_int getInfo(cl_program_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetProgramInfo, object_, name, param), - __GET_PROGRAM_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_program_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - - template - cl_int getBuildInfo( - const Device& device, cl_program_build_info name, T* param) const - { - return detail::errHandler( - detail::getInfo( - &::clGetProgramBuildInfo, object_, device(), name, param), - __GET_PROGRAM_BUILD_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getBuildInfo(const Device& device, cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_program_build_info, name>::param_type param; - cl_int result = getBuildInfo(device, name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - - cl_int createKernels(VECTOR_CLASS* kernels) - { - cl_uint numKernels; - cl_int err = ::clCreateKernelsInProgram(object_, 0, NULL, &numKernels); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __CREATE_KERNELS_IN_PROGRAM_ERR); - } - - Kernel* value = (Kernel*) alloca(numKernels * sizeof(Kernel)); - err = ::clCreateKernelsInProgram( - object_, numKernels, (cl_kernel*) value, NULL); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __CREATE_KERNELS_IN_PROGRAM_ERR); - } - - kernels->assign(&value[0], &value[numKernels]); - return CL_SUCCESS; - } -}; - -#if defined(CL_VERSION_1_2) -inline Program linkProgram( - Program input1, - Program input2, - const char* options = NULL, - void (CL_CALLBACK * notifyFptr)(cl_program, void *) = NULL, - void* data = NULL, - cl_int* err = NULL) -{ - cl_int error_local = CL_SUCCESS; - - cl_program programs[2] = { input1(), input2() }; - - Context ctx = input1.getInfo(&error_local); - if(error_local!=CL_SUCCESS) { - detail::errHandler(error_local, __LINK_PROGRAM_ERR); - } - - cl_program prog = ::clLinkProgram( - ctx(), - 0, - NULL, - options, - 2, - programs, - notifyFptr, - data, - &error_local); - - detail::errHandler(error_local,__COMPILE_PROGRAM_ERR); - if (err != NULL) { - *err = error_local; - } - - return Program(prog); -} - -inline Program linkProgram( - VECTOR_CLASS inputPrograms, - const char* options = NULL, - void (CL_CALLBACK * notifyFptr)(cl_program, void *) = NULL, - void* data = NULL, - cl_int* err = NULL) -{ - cl_int error_local = CL_SUCCESS; - - cl_program * programs = (cl_program*) alloca(inputPrograms.size() * sizeof(cl_program)); - - if (programs != NULL) { - for (unsigned int i = 0; i < inputPrograms.size(); i++) { - programs[i] = inputPrograms[i](); - } - } - - Context ctx; - if(inputPrograms.size() > 0) { - ctx = inputPrograms[0].getInfo(&error_local); - if(error_local!=CL_SUCCESS) { - detail::errHandler(error_local, __LINK_PROGRAM_ERR); - } - } - cl_program prog = ::clLinkProgram( - ctx(), - 0, - NULL, - options, - (cl_uint)inputPrograms.size(), - programs, - notifyFptr, - data, - &error_local); - - detail::errHandler(error_local,__COMPILE_PROGRAM_ERR); - if (err != NULL) { - *err = error_local; - } - - return Program(prog); -} -#endif - -template<> -inline VECTOR_CLASS cl::Program::getInfo(cl_int* err) const -{ - VECTOR_CLASS< ::size_t> sizes = getInfo(); - VECTOR_CLASS binaries; - for (VECTOR_CLASS< ::size_t>::iterator s = sizes.begin(); s != sizes.end(); ++s) - { - char *ptr = NULL; - if (*s != 0) - ptr = new char[*s]; - binaries.push_back(ptr); - } - - cl_int result = getInfo(CL_PROGRAM_BINARIES, &binaries); - if (err != NULL) { - *err = result; - } - return binaries; -} - -inline Kernel::Kernel(const Program& program, const char* name, cl_int* err) -{ - cl_int error; - - object_ = ::clCreateKernel(program(), name, &error); - detail::errHandler(error, __CREATE_KERNEL_ERR); - - if (err != NULL) { - *err = error; - } - -} - -/*! \class CommandQueue - * \brief CommandQueue interface for cl_command_queue. - */ -class CommandQueue : public detail::Wrapper -{ -private: -#ifdef CL_HPP_CPP11_ATOMICS_SUPPORTED - static std::atomic default_initialized_; -#else // !CL_HPP_CPP11_ATOMICS_SUPPORTED - static volatile int default_initialized_; -#endif // !CL_HPP_CPP11_ATOMICS_SUPPORTED - static CommandQueue default_; - static volatile cl_int default_error_; -public: - CommandQueue( - cl_command_queue_properties properties, - cl_int* err = NULL) - { - cl_int error; - - Context context = Context::getDefault(&error); - detail::errHandler(error, __CREATE_CONTEXT_ERR); - - if (error != CL_SUCCESS) { - if (err != NULL) { - *err = error; - } - } - else { - Device device = context.getInfo()[0]; - - object_ = ::clCreateCommandQueue( - context(), device(), properties, &error); - - detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); - if (err != NULL) { - *err = error; - } - } - } - /*! - * \brief Constructs a CommandQueue for an implementation defined device in the given context - */ - explicit CommandQueue( - const Context& context, - cl_command_queue_properties properties = 0, - cl_int* err = NULL) - { - cl_int error; - VECTOR_CLASS devices; - error = context.getInfo(CL_CONTEXT_DEVICES, &devices); - - detail::errHandler(error, __CREATE_CONTEXT_ERR); - - if (error != CL_SUCCESS) - { - if (err != NULL) { - *err = error; - } - return; - } - - object_ = ::clCreateCommandQueue(context(), devices[0](), properties, &error); - - detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); - - if (err != NULL) { - *err = error; - } - - } - - CommandQueue( - const Context& context, - const Device& device, - cl_command_queue_properties properties = 0, - cl_int* err = NULL) - { - cl_int error; - object_ = ::clCreateCommandQueue( - context(), device(), properties, &error); - - detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); - if (err != NULL) { - *err = error; - } - } - - /*! \brief Copy constructor to forward copy to the superclass correctly. - * Required for MSVC. - */ - CommandQueue(const CommandQueue& queue) : detail::Wrapper(queue) {} - - /*! \brief Copy assignment to forward copy to the superclass correctly. - * Required for MSVC. - */ - CommandQueue& operator = (const CommandQueue &queue) - { - detail::Wrapper::operator=(queue); - return *this; - } - -#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - /*! \brief Move constructor to forward move to the superclass correctly. - * Required for MSVC. - */ - CommandQueue(CommandQueue&& queue) CL_HPP_NOEXCEPT : detail::Wrapper(std::move(queue)) {} - - /*! \brief Move assignment to forward move to the superclass correctly. - * Required for MSVC. - */ - CommandQueue& operator = (CommandQueue &&queue) - { - detail::Wrapper::operator=(std::move(queue)); - return *this; - } -#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - - static CommandQueue getDefault(cl_int * err = NULL) - { - int state = detail::compare_exchange( - &default_initialized_, - __DEFAULT_BEING_INITIALIZED, __DEFAULT_NOT_INITIALIZED); - - if (state & __DEFAULT_INITIALIZED) { - if (err != NULL) { - *err = default_error_; - } - return default_; - } - - if (state & __DEFAULT_BEING_INITIALIZED) { - // Assume writes will propagate eventually... - while(default_initialized_ != __DEFAULT_INITIALIZED) { - detail::fence(); - } - - if (err != NULL) { - *err = default_error_; - } - return default_; - } - - cl_int error; - - Context context = Context::getDefault(&error); - detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); - - if (error != CL_SUCCESS) { - if (err != NULL) { - *err = error; - } - } - else { - Device device = context.getInfo()[0]; - - default_ = CommandQueue(context, device, 0, &error); - - detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); - if (err != NULL) { - *err = error; - } - } - - detail::fence(); - - default_error_ = error; - // Assume writes will propagate eventually... - default_initialized_ = __DEFAULT_INITIALIZED; - - detail::fence(); - - if (err != NULL) { - *err = default_error_; - } - return default_; - - } - - CommandQueue() { } - - __CL_EXPLICIT_CONSTRUCTORS CommandQueue(const cl_command_queue& commandQueue) : detail::Wrapper(commandQueue) { } - - CommandQueue& operator = (const cl_command_queue& rhs) - { - detail::Wrapper::operator=(rhs); - return *this; - } - - template - cl_int getInfo(cl_command_queue_info name, T* param) const - { - return detail::errHandler( - detail::getInfo( - &::clGetCommandQueueInfo, object_, name, param), - __GET_COMMAND_QUEUE_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getInfo(cl_int* err = NULL) const - { - typename detail::param_traits< - detail::cl_command_queue_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != NULL) { - *err = result; - } - return param; - } - - cl_int enqueueReadBuffer( - const Buffer& buffer, - cl_bool blocking, - ::size_t offset, - ::size_t size, - void* ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueReadBuffer( - object_, buffer(), blocking, offset, size, - ptr, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_READ_BUFFER_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - cl_int enqueueWriteBuffer( - const Buffer& buffer, - cl_bool blocking, - ::size_t offset, - ::size_t size, - const void* ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueWriteBuffer( - object_, buffer(), blocking, offset, size, - ptr, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_WRITE_BUFFER_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - cl_int enqueueCopyBuffer( - const Buffer& src, - const Buffer& dst, - ::size_t src_offset, - ::size_t dst_offset, - ::size_t size, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueCopyBuffer( - object_, src(), dst(), src_offset, dst_offset, size, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQEUE_COPY_BUFFER_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } -#if defined(CL_VERSION_1_1) - - cl_int enqueueReadBufferRect( - const Buffer& buffer, - cl_bool blocking, - const size_t<3>& buffer_offset, - const size_t<3>& host_offset, - const size_t<3>& region, - ::size_t buffer_row_pitch, - ::size_t buffer_slice_pitch, - ::size_t host_row_pitch, - ::size_t host_slice_pitch, - void *ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueReadBufferRect( - object_, - buffer(), - blocking, - (const ::size_t *)buffer_offset, - (const ::size_t *)host_offset, - (const ::size_t *)region, - buffer_row_pitch, - buffer_slice_pitch, - host_row_pitch, - host_slice_pitch, - ptr, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_READ_BUFFER_RECT_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - cl_int enqueueWriteBufferRect( - const Buffer& buffer, - cl_bool blocking, - const size_t<3>& buffer_offset, - const size_t<3>& host_offset, - const size_t<3>& region, - ::size_t buffer_row_pitch, - ::size_t buffer_slice_pitch, - ::size_t host_row_pitch, - ::size_t host_slice_pitch, - const void *ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueWriteBufferRect( - object_, - buffer(), - blocking, - (const ::size_t *)buffer_offset, - (const ::size_t *)host_offset, - (const ::size_t *)region, - buffer_row_pitch, - buffer_slice_pitch, - host_row_pitch, - host_slice_pitch, - ptr, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_WRITE_BUFFER_RECT_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - cl_int enqueueCopyBufferRect( - const Buffer& src, - const Buffer& dst, - const size_t<3>& src_origin, - const size_t<3>& dst_origin, - const size_t<3>& region, - ::size_t src_row_pitch, - ::size_t src_slice_pitch, - ::size_t dst_row_pitch, - ::size_t dst_slice_pitch, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueCopyBufferRect( - object_, - src(), - dst(), - (const ::size_t *)src_origin, - (const ::size_t *)dst_origin, - (const ::size_t *)region, - src_row_pitch, - src_slice_pitch, - dst_row_pitch, - dst_slice_pitch, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQEUE_COPY_BUFFER_RECT_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } -#endif //if defined(CL_VERSION_1_1) - -#if defined(CL_VERSION_1_2) - /** - * Enqueue a command to fill a buffer object with a pattern - * of a given size. The pattern is specified a as vector. - * \tparam PatternType The datatype of the pattern field. - * The pattern type must be an accepted OpenCL data type. - */ - template - cl_int enqueueFillBuffer( - const Buffer& buffer, - PatternType pattern, - ::size_t offset, - ::size_t size, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueFillBuffer( - object_, - buffer(), - static_cast(&pattern), - sizeof(PatternType), - offset, - size, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_FILL_BUFFER_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } -#endif // #if defined(CL_VERSION_1_2) - - cl_int enqueueReadImage( - const Image& image, - cl_bool blocking, - const size_t<3>& origin, - const size_t<3>& region, - ::size_t row_pitch, - ::size_t slice_pitch, - void* ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueReadImage( - object_, image(), blocking, (const ::size_t *) origin, - (const ::size_t *) region, row_pitch, slice_pitch, ptr, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_READ_IMAGE_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - cl_int enqueueWriteImage( - const Image& image, - cl_bool blocking, - const size_t<3>& origin, - const size_t<3>& region, - ::size_t row_pitch, - ::size_t slice_pitch, - const void* ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueWriteImage( - object_, image(), blocking, (const ::size_t *) origin, - (const ::size_t *) region, row_pitch, slice_pitch, ptr, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_WRITE_IMAGE_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - cl_int enqueueCopyImage( - const Image& src, - const Image& dst, - const size_t<3>& src_origin, - const size_t<3>& dst_origin, - const size_t<3>& region, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueCopyImage( - object_, src(), dst(), (const ::size_t *) src_origin, - (const ::size_t *)dst_origin, (const ::size_t *) region, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_COPY_IMAGE_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - -#if defined(CL_VERSION_1_2) - /** - * Enqueue a command to fill an image object with a specified color. - * \param fillColor is the color to use to fill the image. - * This is a four component RGBA floating-point color value if - * the image channel data type is not an unnormalized signed or - * unsigned data type. - */ - cl_int enqueueFillImage( - const Image& image, - cl_float4 fillColor, - const size_t<3>& origin, - const size_t<3>& region, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueFillImage( - object_, - image(), - static_cast(&fillColor), - (const ::size_t *) origin, - (const ::size_t *) region, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_FILL_IMAGE_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - /** - * Enqueue a command to fill an image object with a specified color. - * \param fillColor is the color to use to fill the image. - * This is a four component RGBA signed integer color value if - * the image channel data type is an unnormalized signed integer - * type. - */ - cl_int enqueueFillImage( - const Image& image, - cl_int4 fillColor, - const size_t<3>& origin, - const size_t<3>& region, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueFillImage( - object_, - image(), - static_cast(&fillColor), - (const ::size_t *) origin, - (const ::size_t *) region, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_FILL_IMAGE_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - /** - * Enqueue a command to fill an image object with a specified color. - * \param fillColor is the color to use to fill the image. - * This is a four component RGBA unsigned integer color value if - * the image channel data type is an unnormalized unsigned integer - * type. - */ - cl_int enqueueFillImage( - const Image& image, - cl_uint4 fillColor, - const size_t<3>& origin, - const size_t<3>& region, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueFillImage( - object_, - image(), - static_cast(&fillColor), - (const ::size_t *) origin, - (const ::size_t *) region, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_FILL_IMAGE_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } -#endif // #if defined(CL_VERSION_1_2) - - cl_int enqueueCopyImageToBuffer( - const Image& src, - const Buffer& dst, - const size_t<3>& src_origin, - const size_t<3>& region, - ::size_t dst_offset, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueCopyImageToBuffer( - object_, src(), dst(), (const ::size_t *) src_origin, - (const ::size_t *) region, dst_offset, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_COPY_IMAGE_TO_BUFFER_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - cl_int enqueueCopyBufferToImage( - const Buffer& src, - const Image& dst, - ::size_t src_offset, - const size_t<3>& dst_origin, - const size_t<3>& region, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueCopyBufferToImage( - object_, src(), dst(), src_offset, - (const ::size_t *) dst_origin, (const ::size_t *) region, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_COPY_BUFFER_TO_IMAGE_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - void* enqueueMapBuffer( - const Buffer& buffer, - cl_bool blocking, - cl_map_flags flags, - ::size_t offset, - ::size_t size, - const VECTOR_CLASS* events = NULL, - Event* event = NULL, - cl_int* err = NULL) const - { - cl_event tmp; - cl_int error; - void * result = ::clEnqueueMapBuffer( - object_, buffer(), blocking, flags, offset, size, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL, - &error); - - detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - if (event != NULL && error == CL_SUCCESS) - *event = tmp; - - return result; - } - - void* enqueueMapImage( - const Image& buffer, - cl_bool blocking, - cl_map_flags flags, - const size_t<3>& origin, - const size_t<3>& region, - ::size_t * row_pitch, - ::size_t * slice_pitch, - const VECTOR_CLASS* events = NULL, - Event* event = NULL, - cl_int* err = NULL) const - { - cl_event tmp; - cl_int error; - void * result = ::clEnqueueMapImage( - object_, buffer(), blocking, flags, - (const ::size_t *) origin, (const ::size_t *) region, - row_pitch, slice_pitch, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL, - &error); - - detail::errHandler(error, __ENQUEUE_MAP_IMAGE_ERR); - if (err != NULL) { - *err = error; - } - if (event != NULL && error == CL_SUCCESS) - *event = tmp; - return result; - } - - cl_int enqueueUnmapMemObject( - const Memory& memory, - void* mapped_ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueUnmapMemObject( - object_, memory(), mapped_ptr, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_UNMAP_MEM_OBJECT_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - -#if defined(CL_VERSION_1_2) - /** - * Enqueues a marker command which waits for either a list of events to complete, - * or all previously enqueued commands to complete. - * - * Enqueues a marker command which waits for either a list of events to complete, - * or if the list is empty it waits for all commands previously enqueued in command_queue - * to complete before it completes. This command returns an event which can be waited on, - * i.e. this event can be waited on to insure that all events either in the event_wait_list - * or all previously enqueued commands, queued before this command to command_queue, - * have completed. - */ - cl_int enqueueMarkerWithWaitList( - const VECTOR_CLASS *events = 0, - Event *event = 0) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueMarkerWithWaitList( - object_, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_MARKER_WAIT_LIST_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - /** - * A synchronization point that enqueues a barrier operation. - * - * Enqueues a barrier command which waits for either a list of events to complete, - * or if the list is empty it waits for all commands previously enqueued in command_queue - * to complete before it completes. This command blocks command execution, that is, any - * following commands enqueued after it do not execute until it completes. This command - * returns an event which can be waited on, i.e. this event can be waited on to insure that - * all events either in the event_wait_list or all previously enqueued commands, queued - * before this command to command_queue, have completed. - */ - cl_int enqueueBarrierWithWaitList( - const VECTOR_CLASS *events = 0, - Event *event = 0) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueBarrierWithWaitList( - object_, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_BARRIER_WAIT_LIST_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - /** - * Enqueues a command to indicate with which device a set of memory objects - * should be associated. - */ - cl_int enqueueMigrateMemObjects( - const VECTOR_CLASS &memObjects, - cl_mem_migration_flags flags, - const VECTOR_CLASS* events = NULL, - Event* event = NULL - ) const - { - cl_event tmp; - - cl_mem* localMemObjects = static_cast(alloca(memObjects.size() * sizeof(cl_mem))); - for( int i = 0; i < (int)memObjects.size(); ++i ) { - localMemObjects[i] = memObjects[i](); - } - - - cl_int err = detail::errHandler( - ::clEnqueueMigrateMemObjects( - object_, - (cl_uint)memObjects.size(), - static_cast(localMemObjects), - flags, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_UNMAP_MEM_OBJECT_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } -#endif // #if defined(CL_VERSION_1_2) - - cl_int enqueueNDRangeKernel( - const Kernel& kernel, - const NDRange& offset, - const NDRange& global, - const NDRange& local = NullRange, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueNDRangeKernel( - object_, kernel(), (cl_uint) global.dimensions(), - offset.dimensions() != 0 ? (const ::size_t*) offset : NULL, - (const ::size_t*) global, - local.dimensions() != 0 ? (const ::size_t*) local : NULL, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_NDRANGE_KERNEL_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - cl_int enqueueTask( - const Kernel& kernel, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueTask( - object_, kernel(), - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_TASK_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - cl_int enqueueNativeKernel( - void (CL_CALLBACK *userFptr)(void *), - std::pair args, - const VECTOR_CLASS* mem_objects = NULL, - const VECTOR_CLASS* mem_locs = NULL, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - cl_mem * mems = (mem_objects != NULL && mem_objects->size() > 0) - ? (cl_mem*) alloca(mem_objects->size() * sizeof(cl_mem)) - : NULL; - - if (mems != NULL) { - for (unsigned int i = 0; i < mem_objects->size(); i++) { - mems[i] = ((*mem_objects)[i])(); - } - } - - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueNativeKernel( - object_, userFptr, args.first, args.second, - (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, - mems, - (mem_locs != NULL && mem_locs->size() > 0) ? (const void **) &mem_locs->front() : NULL, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_NATIVE_KERNEL); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - -/** - * Deprecated APIs for 1.2 - */ -#if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) || (defined(CL_VERSION_1_1) && !defined(CL_VERSION_1_2)) - CL_EXT_PREFIX__VERSION_1_1_DEPRECATED - cl_int enqueueMarker(Event* event = NULL) const CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueMarker( - object_, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_MARKER_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - CL_EXT_PREFIX__VERSION_1_1_DEPRECATED - cl_int enqueueWaitForEvents(const VECTOR_CLASS& events) const CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED - { - return detail::errHandler( - ::clEnqueueWaitForEvents( - object_, - (cl_uint) events.size(), - events.size() > 0 ? (const cl_event*) &events.front() : NULL), - __ENQUEUE_WAIT_FOR_EVENTS_ERR); - } -#endif // #if defined(CL_VERSION_1_1) - - cl_int enqueueAcquireGLObjects( - const VECTOR_CLASS* mem_objects = NULL, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueAcquireGLObjects( - object_, - (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, - (mem_objects != NULL && mem_objects->size() > 0) ? (const cl_mem *) &mem_objects->front(): NULL, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_ACQUIRE_GL_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - cl_int enqueueReleaseGLObjects( - const VECTOR_CLASS* mem_objects = NULL, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueReleaseGLObjects( - object_, - (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, - (mem_objects != NULL && mem_objects->size() > 0) ? (const cl_mem *) &mem_objects->front(): NULL, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_RELEASE_GL_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - -#if defined (USE_DX_INTEROP) -typedef CL_API_ENTRY cl_int (CL_API_CALL *PFN_clEnqueueAcquireD3D10ObjectsKHR)( - cl_command_queue command_queue, cl_uint num_objects, - const cl_mem* mem_objects, cl_uint num_events_in_wait_list, - const cl_event* event_wait_list, cl_event* event); -typedef CL_API_ENTRY cl_int (CL_API_CALL *PFN_clEnqueueReleaseD3D10ObjectsKHR)( - cl_command_queue command_queue, cl_uint num_objects, - const cl_mem* mem_objects, cl_uint num_events_in_wait_list, - const cl_event* event_wait_list, cl_event* event); - - cl_int enqueueAcquireD3D10Objects( - const VECTOR_CLASS* mem_objects = NULL, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - static PFN_clEnqueueAcquireD3D10ObjectsKHR pfn_clEnqueueAcquireD3D10ObjectsKHR = NULL; -#if defined(CL_VERSION_1_2) - cl_context context = getInfo(); - cl::Device device(getInfo()); - cl_platform_id platform = device.getInfo(); - __INIT_CL_EXT_FCN_PTR_PLATFORM(platform, clEnqueueAcquireD3D10ObjectsKHR); -#endif -#if defined(CL_VERSION_1_1) - __INIT_CL_EXT_FCN_PTR(clEnqueueAcquireD3D10ObjectsKHR); -#endif - - cl_event tmp; - cl_int err = detail::errHandler( - pfn_clEnqueueAcquireD3D10ObjectsKHR( - object_, - (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, - (mem_objects != NULL && mem_objects->size() > 0) ? (const cl_mem *) &mem_objects->front(): NULL, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_ACQUIRE_GL_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - cl_int enqueueReleaseD3D10Objects( - const VECTOR_CLASS* mem_objects = NULL, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const - { - static PFN_clEnqueueReleaseD3D10ObjectsKHR pfn_clEnqueueReleaseD3D10ObjectsKHR = NULL; -#if defined(CL_VERSION_1_2) - cl_context context = getInfo(); - cl::Device device(getInfo()); - cl_platform_id platform = device.getInfo(); - __INIT_CL_EXT_FCN_PTR_PLATFORM(platform, clEnqueueReleaseD3D10ObjectsKHR); -#endif // #if defined(CL_VERSION_1_2) -#if defined(CL_VERSION_1_1) - __INIT_CL_EXT_FCN_PTR(clEnqueueReleaseD3D10ObjectsKHR); -#endif // #if defined(CL_VERSION_1_1) - - cl_event tmp; - cl_int err = detail::errHandler( - pfn_clEnqueueReleaseD3D10ObjectsKHR( - object_, - (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, - (mem_objects != NULL && mem_objects->size() > 0) ? (const cl_mem *) &mem_objects->front(): NULL, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_RELEASE_GL_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; - } -#endif - -/** - * Deprecated APIs for 1.2 - */ -#if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) || (defined(CL_VERSION_1_1) && !defined(CL_VERSION_1_2)) - CL_EXT_PREFIX__VERSION_1_1_DEPRECATED - cl_int enqueueBarrier() const CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED - { - return detail::errHandler( - ::clEnqueueBarrier(object_), - __ENQUEUE_BARRIER_ERR); - } -#endif // #if defined(CL_VERSION_1_1) - - cl_int flush() const - { - return detail::errHandler(::clFlush(object_), __FLUSH_ERR); - } - - cl_int finish() const - { - return detail::errHandler(::clFinish(object_), __FINISH_ERR); - } -}; - -#ifdef CL_HPP_CPP11_ATOMICS_SUPPORTED -CL_WEAK_ATTRIB_PREFIX std::atomic CL_WEAK_ATTRIB_SUFFIX CommandQueue::default_initialized_; -#else // !CL_HPP_CPP11_ATOMICS_SUPPORTED -CL_WEAK_ATTRIB_PREFIX volatile int CL_WEAK_ATTRIB_SUFFIX CommandQueue::default_initialized_ = __DEFAULT_NOT_INITIALIZED; -#endif // !CL_HPP_CPP11_ATOMICS_SUPPORTED - -CL_WEAK_ATTRIB_PREFIX CommandQueue CL_WEAK_ATTRIB_SUFFIX CommandQueue::default_; -CL_WEAK_ATTRIB_PREFIX volatile cl_int CL_WEAK_ATTRIB_SUFFIX CommandQueue::default_error_ = CL_SUCCESS; - -template< typename IteratorType > -Buffer::Buffer( - const Context &context, - IteratorType startIterator, - IteratorType endIterator, - bool readOnly, - bool useHostPtr, - cl_int* err) -{ - typedef typename std::iterator_traits::value_type DataType; - cl_int error; - - cl_mem_flags flags = 0; - if( readOnly ) { - flags |= CL_MEM_READ_ONLY; - } - else { - flags |= CL_MEM_READ_WRITE; - } - if( useHostPtr ) { - flags |= CL_MEM_USE_HOST_PTR; - } - - ::size_t size = sizeof(DataType)*(endIterator - startIterator); - - if( useHostPtr ) { - object_ = ::clCreateBuffer(context(), flags, size, static_cast(&*startIterator), &error); - } else { - object_ = ::clCreateBuffer(context(), flags, size, 0, &error); - } - - detail::errHandler(error, __CREATE_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - - if( !useHostPtr ) { - CommandQueue queue(context, 0, &error); - detail::errHandler(error, __CREATE_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - - error = cl::copy(queue, startIterator, endIterator, *this); - detail::errHandler(error, __CREATE_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - } -} - -template< typename IteratorType > -Buffer::Buffer( - const CommandQueue &queue, - IteratorType startIterator, - IteratorType endIterator, - bool readOnly, - bool useHostPtr, - cl_int* err) -{ - typedef typename std::iterator_traits::value_type DataType; - cl_int error; - - cl_mem_flags flags = 0; - if (readOnly) { - flags |= CL_MEM_READ_ONLY; - } - else { - flags |= CL_MEM_READ_WRITE; - } - if (useHostPtr) { - flags |= CL_MEM_USE_HOST_PTR; - } - - ::size_t size = sizeof(DataType)*(endIterator - startIterator); - - Context context = queue.getInfo(); - - if (useHostPtr) { - object_ = ::clCreateBuffer(context(), flags, size, static_cast(&*startIterator), &error); - } - else { - object_ = ::clCreateBuffer(context(), flags, size, 0, &error); - } - - detail::errHandler(error, __CREATE_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - - if (!useHostPtr) { - error = cl::copy(queue, startIterator, endIterator, *this); - detail::errHandler(error, __CREATE_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - } -} - -inline cl_int enqueueReadBuffer( - const Buffer& buffer, - cl_bool blocking, - ::size_t offset, - ::size_t size, - void* ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) { - return error; - } - - return queue.enqueueReadBuffer(buffer, blocking, offset, size, ptr, events, event); -} - -inline cl_int enqueueWriteBuffer( - const Buffer& buffer, - cl_bool blocking, - ::size_t offset, - ::size_t size, - const void* ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) { - return error; - } - - return queue.enqueueWriteBuffer(buffer, blocking, offset, size, ptr, events, event); -} - -inline void* enqueueMapBuffer( - const Buffer& buffer, - cl_bool blocking, - cl_map_flags flags, - ::size_t offset, - ::size_t size, - const VECTOR_CLASS* events = NULL, - Event* event = NULL, - cl_int* err = NULL) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - - void * result = ::clEnqueueMapBuffer( - queue(), buffer(), blocking, flags, offset, size, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event, - &error); - - detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - return result; -} - -inline cl_int enqueueUnmapMemObject( - const Memory& memory, - void* mapped_ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR); - if (error != CL_SUCCESS) { - return error; - } - - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueUnmapMemObject( - queue(), memory(), mapped_ptr, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_UNMAP_MEM_OBJECT_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; -} - -inline cl_int enqueueCopyBuffer( - const Buffer& src, - const Buffer& dst, - ::size_t src_offset, - ::size_t dst_offset, - ::size_t size, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) { - return error; - } - - return queue.enqueueCopyBuffer(src, dst, src_offset, dst_offset, size, events, event); -} - -/** - * Blocking copy operation between iterators and a buffer. - * Host to Device. - * Uses default command queue. - */ -template< typename IteratorType > -inline cl_int copy( IteratorType startIterator, IteratorType endIterator, cl::Buffer &buffer ) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - if (error != CL_SUCCESS) - return error; - - return cl::copy(queue, startIterator, endIterator, buffer); -} - -/** - * Blocking copy operation between iterators and a buffer. - * Device to Host. - * Uses default command queue. - */ -template< typename IteratorType > -inline cl_int copy( const cl::Buffer &buffer, IteratorType startIterator, IteratorType endIterator ) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - if (error != CL_SUCCESS) - return error; - - return cl::copy(queue, buffer, startIterator, endIterator); -} - -/** - * Blocking copy operation between iterators and a buffer. - * Host to Device. - * Uses specified queue. - */ -template< typename IteratorType > -inline cl_int copy( const CommandQueue &queue, IteratorType startIterator, IteratorType endIterator, cl::Buffer &buffer ) -{ - typedef typename std::iterator_traits::value_type DataType; - cl_int error; - - ::size_t length = endIterator-startIterator; - ::size_t byteLength = length*sizeof(DataType); - - DataType *pointer = - static_cast(queue.enqueueMapBuffer(buffer, CL_TRUE, CL_MAP_WRITE, 0, byteLength, 0, 0, &error)); - // if exceptions enabled, enqueueMapBuffer will throw - if( error != CL_SUCCESS ) { - return error; - } -#if defined(_MSC_VER) - std::copy( - startIterator, - endIterator, - stdext::checked_array_iterator( - pointer, length)); -#else - std::copy(startIterator, endIterator, pointer); -#endif - Event endEvent; - error = queue.enqueueUnmapMemObject(buffer, pointer, 0, &endEvent); - // if exceptions enabled, enqueueUnmapMemObject will throw - if( error != CL_SUCCESS ) { - return error; - } - endEvent.wait(); - return CL_SUCCESS; -} - -/** - * Blocking copy operation between iterators and a buffer. - * Device to Host. - * Uses specified queue. - */ -template< typename IteratorType > -inline cl_int copy( const CommandQueue &queue, const cl::Buffer &buffer, IteratorType startIterator, IteratorType endIterator ) -{ - typedef typename std::iterator_traits::value_type DataType; - cl_int error; - - ::size_t length = endIterator-startIterator; - ::size_t byteLength = length*sizeof(DataType); - - DataType *pointer = - static_cast(queue.enqueueMapBuffer(buffer, CL_TRUE, CL_MAP_READ, 0, byteLength, 0, 0, &error)); - // if exceptions enabled, enqueueMapBuffer will throw - if( error != CL_SUCCESS ) { - return error; - } - std::copy(pointer, pointer + length, startIterator); - Event endEvent; - error = queue.enqueueUnmapMemObject(buffer, pointer, 0, &endEvent); - // if exceptions enabled, enqueueUnmapMemObject will throw - if( error != CL_SUCCESS ) { - return error; - } - endEvent.wait(); - return CL_SUCCESS; -} - -#if defined(CL_VERSION_1_1) -inline cl_int enqueueReadBufferRect( - const Buffer& buffer, - cl_bool blocking, - const size_t<3>& buffer_offset, - const size_t<3>& host_offset, - const size_t<3>& region, - ::size_t buffer_row_pitch, - ::size_t buffer_slice_pitch, - ::size_t host_row_pitch, - ::size_t host_slice_pitch, - void *ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) { - return error; - } - - return queue.enqueueReadBufferRect( - buffer, - blocking, - buffer_offset, - host_offset, - region, - buffer_row_pitch, - buffer_slice_pitch, - host_row_pitch, - host_slice_pitch, - ptr, - events, - event); -} - -inline cl_int enqueueWriteBufferRect( - const Buffer& buffer, - cl_bool blocking, - const size_t<3>& buffer_offset, - const size_t<3>& host_offset, - const size_t<3>& region, - ::size_t buffer_row_pitch, - ::size_t buffer_slice_pitch, - ::size_t host_row_pitch, - ::size_t host_slice_pitch, - const void *ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) { - return error; - } - - return queue.enqueueWriteBufferRect( - buffer, - blocking, - buffer_offset, - host_offset, - region, - buffer_row_pitch, - buffer_slice_pitch, - host_row_pitch, - host_slice_pitch, - ptr, - events, - event); -} - -inline cl_int enqueueCopyBufferRect( - const Buffer& src, - const Buffer& dst, - const size_t<3>& src_origin, - const size_t<3>& dst_origin, - const size_t<3>& region, - ::size_t src_row_pitch, - ::size_t src_slice_pitch, - ::size_t dst_row_pitch, - ::size_t dst_slice_pitch, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) { - return error; - } - - return queue.enqueueCopyBufferRect( - src, - dst, - src_origin, - dst_origin, - region, - src_row_pitch, - src_slice_pitch, - dst_row_pitch, - dst_slice_pitch, - events, - event); -} -#endif - -inline cl_int enqueueReadImage( - const Image& image, - cl_bool blocking, - const size_t<3>& origin, - const size_t<3>& region, - ::size_t row_pitch, - ::size_t slice_pitch, - void* ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) { - return error; - } - - return queue.enqueueReadImage( - image, - blocking, - origin, - region, - row_pitch, - slice_pitch, - ptr, - events, - event); -} - -inline cl_int enqueueWriteImage( - const Image& image, - cl_bool blocking, - const size_t<3>& origin, - const size_t<3>& region, - ::size_t row_pitch, - ::size_t slice_pitch, - const void* ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) { - return error; - } - - return queue.enqueueWriteImage( - image, - blocking, - origin, - region, - row_pitch, - slice_pitch, - ptr, - events, - event); -} - -inline cl_int enqueueCopyImage( - const Image& src, - const Image& dst, - const size_t<3>& src_origin, - const size_t<3>& dst_origin, - const size_t<3>& region, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) { - return error; - } - - return queue.enqueueCopyImage( - src, - dst, - src_origin, - dst_origin, - region, - events, - event); -} - -inline cl_int enqueueCopyImageToBuffer( - const Image& src, - const Buffer& dst, - const size_t<3>& src_origin, - const size_t<3>& region, - ::size_t dst_offset, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) { - return error; - } - - return queue.enqueueCopyImageToBuffer( - src, - dst, - src_origin, - region, - dst_offset, - events, - event); -} - -inline cl_int enqueueCopyBufferToImage( - const Buffer& src, - const Image& dst, - ::size_t src_offset, - const size_t<3>& dst_origin, - const size_t<3>& region, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) { - return error; - } - - return queue.enqueueCopyBufferToImage( - src, - dst, - src_offset, - dst_origin, - region, - events, - event); -} - - -inline cl_int flush(void) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) { - return error; - } - - return queue.flush(); -} - -inline cl_int finish(void) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) { - return error; - } - - - return queue.finish(); -} - -// Kernel Functor support -// New interface as of September 2011 -// Requires the C++11 std::tr1::function (note do not support TR1) -// Visual Studio 2010 and GCC 4.2 - -struct EnqueueArgs -{ - CommandQueue queue_; - const NDRange offset_; - const NDRange global_; - const NDRange local_; - VECTOR_CLASS events_; - - EnqueueArgs(NDRange global) : - queue_(CommandQueue::getDefault()), - offset_(NullRange), - global_(global), - local_(NullRange) - { - - } - - EnqueueArgs(NDRange global, NDRange local) : - queue_(CommandQueue::getDefault()), - offset_(NullRange), - global_(global), - local_(local) - { - - } - - EnqueueArgs(NDRange offset, NDRange global, NDRange local) : - queue_(CommandQueue::getDefault()), - offset_(offset), - global_(global), - local_(local) - { - - } - - EnqueueArgs(Event e, NDRange global) : - queue_(CommandQueue::getDefault()), - offset_(NullRange), - global_(global), - local_(NullRange) - { - events_.push_back(e); - } - - EnqueueArgs(Event e, NDRange global, NDRange local) : - queue_(CommandQueue::getDefault()), - offset_(NullRange), - global_(global), - local_(local) - { - events_.push_back(e); - } - - EnqueueArgs(Event e, NDRange offset, NDRange global, NDRange local) : - queue_(CommandQueue::getDefault()), - offset_(offset), - global_(global), - local_(local) - { - events_.push_back(e); - } - - EnqueueArgs(const VECTOR_CLASS &events, NDRange global) : - queue_(CommandQueue::getDefault()), - offset_(NullRange), - global_(global), - local_(NullRange), - events_(events) - { - - } - - EnqueueArgs(const VECTOR_CLASS &events, NDRange global, NDRange local) : - queue_(CommandQueue::getDefault()), - offset_(NullRange), - global_(global), - local_(local), - events_(events) - { - - } - - EnqueueArgs(const VECTOR_CLASS &events, NDRange offset, NDRange global, NDRange local) : - queue_(CommandQueue::getDefault()), - offset_(offset), - global_(global), - local_(local), - events_(events) - { - - } - - EnqueueArgs(CommandQueue &queue, NDRange global) : - queue_(queue), - offset_(NullRange), - global_(global), - local_(NullRange) - { - - } - - EnqueueArgs(CommandQueue &queue, NDRange global, NDRange local) : - queue_(queue), - offset_(NullRange), - global_(global), - local_(local) - { - - } - - EnqueueArgs(CommandQueue &queue, NDRange offset, NDRange global, NDRange local) : - queue_(queue), - offset_(offset), - global_(global), - local_(local) - { - - } - - EnqueueArgs(CommandQueue &queue, Event e, NDRange global) : - queue_(queue), - offset_(NullRange), - global_(global), - local_(NullRange) - { - events_.push_back(e); - } - - EnqueueArgs(CommandQueue &queue, Event e, NDRange global, NDRange local) : - queue_(queue), - offset_(NullRange), - global_(global), - local_(local) - { - events_.push_back(e); - } - - EnqueueArgs(CommandQueue &queue, Event e, NDRange offset, NDRange global, NDRange local) : - queue_(queue), - offset_(offset), - global_(global), - local_(local) - { - events_.push_back(e); - } - - EnqueueArgs(CommandQueue &queue, const VECTOR_CLASS &events, NDRange global) : - queue_(queue), - offset_(NullRange), - global_(global), - local_(NullRange), - events_(events) - { - - } - - EnqueueArgs(CommandQueue &queue, const VECTOR_CLASS &events, NDRange global, NDRange local) : - queue_(queue), - offset_(NullRange), - global_(global), - local_(local), - events_(events) - { - - } - - EnqueueArgs(CommandQueue &queue, const VECTOR_CLASS &events, NDRange offset, NDRange global, NDRange local) : - queue_(queue), - offset_(offset), - global_(global), - local_(local), - events_(events) - { - - } -}; - -namespace detail { - -class NullType {}; - -template -struct SetArg -{ - static void set (Kernel kernel, T0 arg) - { - kernel.setArg(index, arg); - } -}; - -template -struct SetArg -{ - static void set (Kernel, NullType) - { - } -}; - -template < - typename T0, typename T1, typename T2, typename T3, - typename T4, typename T5, typename T6, typename T7, - typename T8, typename T9, typename T10, typename T11, - typename T12, typename T13, typename T14, typename T15, - typename T16, typename T17, typename T18, typename T19, - typename T20, typename T21, typename T22, typename T23, - typename T24, typename T25, typename T26, typename T27, - typename T28, typename T29, typename T30, typename T31 - -> -class KernelFunctorGlobal -{ -private: - Kernel kernel_; - -public: - KernelFunctorGlobal( - Kernel kernel) : - kernel_(kernel) - {} - - KernelFunctorGlobal( - const Program& program, - const STRING_CLASS name, - cl_int * err = NULL) : - kernel_(program, name.c_str(), err) - {} - - Event operator() ( - const EnqueueArgs& args, - T0 t0, - T1 t1 = NullType(), - T2 t2 = NullType(), - T3 t3 = NullType(), - T4 t4 = NullType(), - T5 t5 = NullType(), - T6 t6 = NullType(), - T7 t7 = NullType(), - T8 t8 = NullType(), - T9 t9 = NullType(), - T10 t10 = NullType(), - T11 t11 = NullType(), - T12 t12 = NullType(), - T13 t13 = NullType(), - T14 t14 = NullType(), - T15 t15 = NullType(), - T16 t16 = NullType(), - T17 t17 = NullType(), - T18 t18 = NullType(), - T19 t19 = NullType(), - T20 t20 = NullType(), - T21 t21 = NullType(), - T22 t22 = NullType(), - T23 t23 = NullType(), - T24 t24 = NullType(), - T25 t25 = NullType(), - T26 t26 = NullType(), - T27 t27 = NullType(), - T28 t28 = NullType(), - T29 t29 = NullType(), - T30 t30 = NullType(), - T31 t31 = NullType() - - ) - { - Event event; - SetArg<0, T0>::set(kernel_, t0); - SetArg<1, T1>::set(kernel_, t1); - SetArg<2, T2>::set(kernel_, t2); - SetArg<3, T3>::set(kernel_, t3); - SetArg<4, T4>::set(kernel_, t4); - SetArg<5, T5>::set(kernel_, t5); - SetArg<6, T6>::set(kernel_, t6); - SetArg<7, T7>::set(kernel_, t7); - SetArg<8, T8>::set(kernel_, t8); - SetArg<9, T9>::set(kernel_, t9); - SetArg<10, T10>::set(kernel_, t10); - SetArg<11, T11>::set(kernel_, t11); - SetArg<12, T12>::set(kernel_, t12); - SetArg<13, T13>::set(kernel_, t13); - SetArg<14, T14>::set(kernel_, t14); - SetArg<15, T15>::set(kernel_, t15); - SetArg<16, T16>::set(kernel_, t16); - SetArg<17, T17>::set(kernel_, t17); - SetArg<18, T18>::set(kernel_, t18); - SetArg<19, T19>::set(kernel_, t19); - SetArg<20, T20>::set(kernel_, t20); - SetArg<21, T21>::set(kernel_, t21); - SetArg<22, T22>::set(kernel_, t22); - SetArg<23, T23>::set(kernel_, t23); - SetArg<24, T24>::set(kernel_, t24); - SetArg<25, T25>::set(kernel_, t25); - SetArg<26, T26>::set(kernel_, t26); - SetArg<27, T27>::set(kernel_, t27); - SetArg<28, T28>::set(kernel_, t28); - SetArg<29, T29>::set(kernel_, t29); - SetArg<30, T30>::set(kernel_, t30); - SetArg<31, T31>::set(kernel_, t31); - - - args.queue_.enqueueNDRangeKernel( - kernel_, - args.offset_, - args.global_, - args.local_, - &args.events_, - &event); - - return event; - } - -}; - -//------------------------------------------------------------------------------------------------------ - - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14, - typename T15, - typename T16, - typename T17, - typename T18, - typename T19, - typename T20, - typename T21, - typename T22, - typename T23, - typename T24, - typename T25, - typename T26, - typename T27, - typename T28, - typename T29, - typename T30, - typename T31> -struct functionImplementation_ -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - T26, - T27, - T28, - T29, - T30, - T31> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 32)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - T26, - T27, - T28, - T29, - T30, - T31); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14, - T15 arg15, - T16 arg16, - T17 arg17, - T18 arg18, - T19 arg19, - T20 arg20, - T21 arg21, - T22 arg22, - T23 arg23, - T24 arg24, - T25 arg25, - T26 arg26, - T27 arg27, - T28 arg28, - T29 arg29, - T30 arg30, - T31 arg31) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14, - arg15, - arg16, - arg17, - arg18, - arg19, - arg20, - arg21, - arg22, - arg23, - arg24, - arg25, - arg26, - arg27, - arg28, - arg29, - arg30, - arg31); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14, - typename T15, - typename T16, - typename T17, - typename T18, - typename T19, - typename T20, - typename T21, - typename T22, - typename T23, - typename T24, - typename T25, - typename T26, - typename T27, - typename T28, - typename T29, - typename T30> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - T26, - T27, - T28, - T29, - T30, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - T26, - T27, - T28, - T29, - T30, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 31)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - T26, - T27, - T28, - T29, - T30); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14, - T15 arg15, - T16 arg16, - T17 arg17, - T18 arg18, - T19 arg19, - T20 arg20, - T21 arg21, - T22 arg22, - T23 arg23, - T24 arg24, - T25 arg25, - T26 arg26, - T27 arg27, - T28 arg28, - T29 arg29, - T30 arg30) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14, - arg15, - arg16, - arg17, - arg18, - arg19, - arg20, - arg21, - arg22, - arg23, - arg24, - arg25, - arg26, - arg27, - arg28, - arg29, - arg30); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14, - typename T15, - typename T16, - typename T17, - typename T18, - typename T19, - typename T20, - typename T21, - typename T22, - typename T23, - typename T24, - typename T25, - typename T26, - typename T27, - typename T28, - typename T29> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - T26, - T27, - T28, - T29, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - T26, - T27, - T28, - T29, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 30)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - T26, - T27, - T28, - T29); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14, - T15 arg15, - T16 arg16, - T17 arg17, - T18 arg18, - T19 arg19, - T20 arg20, - T21 arg21, - T22 arg22, - T23 arg23, - T24 arg24, - T25 arg25, - T26 arg26, - T27 arg27, - T28 arg28, - T29 arg29) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14, - arg15, - arg16, - arg17, - arg18, - arg19, - arg20, - arg21, - arg22, - arg23, - arg24, - arg25, - arg26, - arg27, - arg28, - arg29); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14, - typename T15, - typename T16, - typename T17, - typename T18, - typename T19, - typename T20, - typename T21, - typename T22, - typename T23, - typename T24, - typename T25, - typename T26, - typename T27, - typename T28> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - T26, - T27, - T28, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - T26, - T27, - T28, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 29)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - T26, - T27, - T28); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14, - T15 arg15, - T16 arg16, - T17 arg17, - T18 arg18, - T19 arg19, - T20 arg20, - T21 arg21, - T22 arg22, - T23 arg23, - T24 arg24, - T25 arg25, - T26 arg26, - T27 arg27, - T28 arg28) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14, - arg15, - arg16, - arg17, - arg18, - arg19, - arg20, - arg21, - arg22, - arg23, - arg24, - arg25, - arg26, - arg27, - arg28); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14, - typename T15, - typename T16, - typename T17, - typename T18, - typename T19, - typename T20, - typename T21, - typename T22, - typename T23, - typename T24, - typename T25, - typename T26, - typename T27> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - T26, - T27, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - T26, - T27, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 28)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - T26, - T27); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14, - T15 arg15, - T16 arg16, - T17 arg17, - T18 arg18, - T19 arg19, - T20 arg20, - T21 arg21, - T22 arg22, - T23 arg23, - T24 arg24, - T25 arg25, - T26 arg26, - T27 arg27) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14, - arg15, - arg16, - arg17, - arg18, - arg19, - arg20, - arg21, - arg22, - arg23, - arg24, - arg25, - arg26, - arg27); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14, - typename T15, - typename T16, - typename T17, - typename T18, - typename T19, - typename T20, - typename T21, - typename T22, - typename T23, - typename T24, - typename T25, - typename T26> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - T26, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - T26, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 27)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - T26); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14, - T15 arg15, - T16 arg16, - T17 arg17, - T18 arg18, - T19 arg19, - T20 arg20, - T21 arg21, - T22 arg22, - T23 arg23, - T24 arg24, - T25 arg25, - T26 arg26) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14, - arg15, - arg16, - arg17, - arg18, - arg19, - arg20, - arg21, - arg22, - arg23, - arg24, - arg25, - arg26); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14, - typename T15, - typename T16, - typename T17, - typename T18, - typename T19, - typename T20, - typename T21, - typename T22, - typename T23, - typename T24, - typename T25> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 26)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14, - T15 arg15, - T16 arg16, - T17 arg17, - T18 arg18, - T19 arg19, - T20 arg20, - T21 arg21, - T22 arg22, - T23 arg23, - T24 arg24, - T25 arg25) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14, - arg15, - arg16, - arg17, - arg18, - arg19, - arg20, - arg21, - arg22, - arg23, - arg24, - arg25); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14, - typename T15, - typename T16, - typename T17, - typename T18, - typename T19, - typename T20, - typename T21, - typename T22, - typename T23, - typename T24> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 25)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14, - T15 arg15, - T16 arg16, - T17 arg17, - T18 arg18, - T19 arg19, - T20 arg20, - T21 arg21, - T22 arg22, - T23 arg23, - T24 arg24) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14, - arg15, - arg16, - arg17, - arg18, - arg19, - arg20, - arg21, - arg22, - arg23, - arg24); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14, - typename T15, - typename T16, - typename T17, - typename T18, - typename T19, - typename T20, - typename T21, - typename T22, - typename T23> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 24)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14, - T15 arg15, - T16 arg16, - T17 arg17, - T18 arg18, - T19 arg19, - T20 arg20, - T21 arg21, - T22 arg22, - T23 arg23) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14, - arg15, - arg16, - arg17, - arg18, - arg19, - arg20, - arg21, - arg22, - arg23); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14, - typename T15, - typename T16, - typename T17, - typename T18, - typename T19, - typename T20, - typename T21, - typename T22> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 23)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14, - T15 arg15, - T16 arg16, - T17 arg17, - T18 arg18, - T19 arg19, - T20 arg20, - T21 arg21, - T22 arg22) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14, - arg15, - arg16, - arg17, - arg18, - arg19, - arg20, - arg21, - arg22); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14, - typename T15, - typename T16, - typename T17, - typename T18, - typename T19, - typename T20, - typename T21> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 22)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14, - T15 arg15, - T16 arg16, - T17 arg17, - T18 arg18, - T19 arg19, - T20 arg20, - T21 arg21) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14, - arg15, - arg16, - arg17, - arg18, - arg19, - arg20, - arg21); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14, - typename T15, - typename T16, - typename T17, - typename T18, - typename T19, - typename T20> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 21)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14, - T15 arg15, - T16 arg16, - T17 arg17, - T18 arg18, - T19 arg19, - T20 arg20) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14, - arg15, - arg16, - arg17, - arg18, - arg19, - arg20); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14, - typename T15, - typename T16, - typename T17, - typename T18, - typename T19> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 20)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14, - T15 arg15, - T16 arg16, - T17 arg17, - T18 arg18, - T19 arg19) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14, - arg15, - arg16, - arg17, - arg18, - arg19); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14, - typename T15, - typename T16, - typename T17, - typename T18> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 19)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14, - T15 arg15, - T16 arg16, - T17 arg17, - T18 arg18) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14, - arg15, - arg16, - arg17, - arg18); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14, - typename T15, - typename T16, - typename T17> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 18)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14, - T15 arg15, - T16 arg16, - T17 arg17) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14, - arg15, - arg16, - arg17); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14, - typename T15, - typename T16> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 17)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14, - T15 arg15, - T16 arg16) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14, - arg15, - arg16); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14, - typename T15> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 16)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14, - T15 arg15) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14, - arg15); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 15)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 14)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 13)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 12)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 11)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 10)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 9)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 8)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 7)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 6)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 5)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 4)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3); - } - - -}; - -template< - typename T0, - typename T1, - typename T2> -struct functionImplementation_ -< T0, - T1, - T2, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 3)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2); - } - - -}; - -template< - typename T0, - typename T1> -struct functionImplementation_ -< T0, - T1, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 2)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1) - { - return functor_( - enqueueArgs, - arg0, - arg1); - } - - -}; - -template< - typename T0> -struct functionImplementation_ -< T0, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 1)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0) - { - return functor_( - enqueueArgs, - arg0); - } - - -}; - - - - - -} // namespace detail - -//---------------------------------------------------------------------------------------------- - -template < - typename T0, typename T1 = detail::NullType, typename T2 = detail::NullType, - typename T3 = detail::NullType, typename T4 = detail::NullType, - typename T5 = detail::NullType, typename T6 = detail::NullType, - typename T7 = detail::NullType, typename T8 = detail::NullType, - typename T9 = detail::NullType, typename T10 = detail::NullType, - typename T11 = detail::NullType, typename T12 = detail::NullType, - typename T13 = detail::NullType, typename T14 = detail::NullType, - typename T15 = detail::NullType, typename T16 = detail::NullType, - typename T17 = detail::NullType, typename T18 = detail::NullType, - typename T19 = detail::NullType, typename T20 = detail::NullType, - typename T21 = detail::NullType, typename T22 = detail::NullType, - typename T23 = detail::NullType, typename T24 = detail::NullType, - typename T25 = detail::NullType, typename T26 = detail::NullType, - typename T27 = detail::NullType, typename T28 = detail::NullType, - typename T29 = detail::NullType, typename T30 = detail::NullType, - typename T31 = detail::NullType - -> -struct make_kernel : - public detail::functionImplementation_< - T0, T1, T2, T3, - T4, T5, T6, T7, - T8, T9, T10, T11, - T12, T13, T14, T15, - T16, T17, T18, T19, - T20, T21, T22, T23, - T24, T25, T26, T27, - T28, T29, T30, T31 - - > -{ -public: - typedef detail::KernelFunctorGlobal< - T0, T1, T2, T3, - T4, T5, T6, T7, - T8, T9, T10, T11, - T12, T13, T14, T15, - T16, T17, T18, T19, - T20, T21, T22, T23, - T24, T25, T26, T27, - T28, T29, T30, T31 - - > FunctorType; - - make_kernel( - const Program& program, - const STRING_CLASS name, - cl_int * err = NULL) : - detail::functionImplementation_< - T0, T1, T2, T3, - T4, T5, T6, T7, - T8, T9, T10, T11, - T12, T13, T14, T15, - T16, T17, T18, T19, - T20, T21, T22, T23, - T24, T25, T26, T27, - T28, T29, T30, T31 - - >( - FunctorType(program, name, err)) - {} - - make_kernel( - const Kernel kernel) : - detail::functionImplementation_< - T0, T1, T2, T3, - T4, T5, T6, T7, - T8, T9, T10, T11, - T12, T13, T14, T15, - T16, T17, T18, T19, - T20, T21, T22, T23, - T24, T25, T26, T27, - T28, T29, T30, T31 - - >( - FunctorType(kernel)) - {} -}; - - -//---------------------------------------------------------------------------------------------------------------------- - -#undef __ERR_STR -#if !defined(__CL_USER_OVERRIDE_ERROR_STRINGS) -#undef __GET_DEVICE_INFO_ERR -#undef __GET_PLATFORM_INFO_ERR -#undef __GET_DEVICE_IDS_ERR -#undef __GET_CONTEXT_INFO_ERR -#undef __GET_EVENT_INFO_ERR -#undef __GET_EVENT_PROFILE_INFO_ERR -#undef __GET_MEM_OBJECT_INFO_ERR -#undef __GET_IMAGE_INFO_ERR -#undef __GET_SAMPLER_INFO_ERR -#undef __GET_KERNEL_INFO_ERR -#undef __GET_KERNEL_ARG_INFO_ERR -#undef __GET_KERNEL_WORK_GROUP_INFO_ERR -#undef __GET_PROGRAM_INFO_ERR -#undef __GET_PROGRAM_BUILD_INFO_ERR -#undef __GET_COMMAND_QUEUE_INFO_ERR - -#undef __CREATE_CONTEXT_ERR -#undef __CREATE_CONTEXT_FROM_TYPE_ERR -#undef __GET_SUPPORTED_IMAGE_FORMATS_ERR - -#undef __CREATE_BUFFER_ERR -#undef __CREATE_SUBBUFFER_ERR -#undef __CREATE_IMAGE2D_ERR -#undef __CREATE_IMAGE3D_ERR -#undef __CREATE_SAMPLER_ERR -#undef __SET_MEM_OBJECT_DESTRUCTOR_CALLBACK_ERR - -#undef __CREATE_USER_EVENT_ERR -#undef __SET_USER_EVENT_STATUS_ERR -#undef __SET_EVENT_CALLBACK_ERR -#undef __SET_PRINTF_CALLBACK_ERR - -#undef __WAIT_FOR_EVENTS_ERR - -#undef __CREATE_KERNEL_ERR -#undef __SET_KERNEL_ARGS_ERR -#undef __CREATE_PROGRAM_WITH_SOURCE_ERR -#undef __CREATE_PROGRAM_WITH_BINARY_ERR -#undef __CREATE_PROGRAM_WITH_BUILT_IN_KERNELS_ERR -#undef __BUILD_PROGRAM_ERR -#undef __CREATE_KERNELS_IN_PROGRAM_ERR - -#undef __CREATE_COMMAND_QUEUE_ERR -#undef __SET_COMMAND_QUEUE_PROPERTY_ERR -#undef __ENQUEUE_READ_BUFFER_ERR -#undef __ENQUEUE_WRITE_BUFFER_ERR -#undef __ENQUEUE_READ_BUFFER_RECT_ERR -#undef __ENQUEUE_WRITE_BUFFER_RECT_ERR -#undef __ENQEUE_COPY_BUFFER_ERR -#undef __ENQEUE_COPY_BUFFER_RECT_ERR -#undef __ENQUEUE_READ_IMAGE_ERR -#undef __ENQUEUE_WRITE_IMAGE_ERR -#undef __ENQUEUE_COPY_IMAGE_ERR -#undef __ENQUEUE_COPY_IMAGE_TO_BUFFER_ERR -#undef __ENQUEUE_COPY_BUFFER_TO_IMAGE_ERR -#undef __ENQUEUE_MAP_BUFFER_ERR -#undef __ENQUEUE_MAP_IMAGE_ERR -#undef __ENQUEUE_UNMAP_MEM_OBJECT_ERR -#undef __ENQUEUE_NDRANGE_KERNEL_ERR -#undef __ENQUEUE_TASK_ERR -#undef __ENQUEUE_NATIVE_KERNEL - -#undef __CL_EXPLICIT_CONSTRUCTORS - -#undef __UNLOAD_COMPILER_ERR -#endif //__CL_USER_OVERRIDE_ERROR_STRINGS - -#undef __CL_FUNCTION_TYPE - -// Extensions -/** - * Deprecated APIs for 1.2 - */ -#if defined(CL_VERSION_1_1) -#undef __INIT_CL_EXT_FCN_PTR -#endif // #if defined(CL_VERSION_1_1) -#undef __CREATE_SUB_DEVICES - -#if defined(USE_CL_DEVICE_FISSION) -#undef __PARAM_NAME_DEVICE_FISSION -#endif // USE_CL_DEVICE_FISSION - -#undef __DEFAULT_NOT_INITIALIZED -#undef __DEFAULT_BEING_INITIALIZED -#undef __DEFAULT_INITIALIZED - -#undef CL_HPP_RVALUE_REFERENCES_SUPPORTED -#undef CL_HPP_NOEXCEPT - -} // namespace cl - -#endif // CL_HPP_ diff --git a/include/CL/cl_d3d10.h b/include/CL/cl_d3d10.h index 2b80d90cb..0d9950bed 100644 --- a/include/CL/cl_d3d10.h +++ b/include/CL/cl_d3d10.h @@ -21,6 +21,7 @@ #if _MSC_VER >=1500 #pragma warning( push ) #pragma warning( disable : 4201 ) +#pragma warning( disable : 5105 ) #endif #endif #include @@ -75,7 +76,7 @@ typedef cl_uint cl_d3d10_device_set_khr; /******************************************************************************/ -typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetDeviceIDsFromD3D10KHR_fn)( +typedef cl_int (CL_API_CALL *clGetDeviceIDsFromD3D10KHR_fn)( cl_platform_id platform, cl_d3d10_device_source_khr d3d_device_source, void * d3d_object, @@ -84,27 +85,27 @@ typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetDeviceIDsFromD3D10KHR_fn)( cl_device_id * devices, cl_uint * num_devices) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D10BufferKHR_fn)( +typedef cl_mem (CL_API_CALL *clCreateFromD3D10BufferKHR_fn)( cl_context context, cl_mem_flags flags, ID3D10Buffer * resource, cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D10Texture2DKHR_fn)( +typedef cl_mem (CL_API_CALL *clCreateFromD3D10Texture2DKHR_fn)( cl_context context, cl_mem_flags flags, ID3D10Texture2D * resource, UINT subresource, cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D10Texture3DKHR_fn)( +typedef cl_mem (CL_API_CALL *clCreateFromD3D10Texture3DKHR_fn)( cl_context context, cl_mem_flags flags, ID3D10Texture3D * resource, UINT subresource, cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireD3D10ObjectsKHR_fn)( +typedef cl_int (CL_API_CALL *clEnqueueAcquireD3D10ObjectsKHR_fn)( cl_command_queue command_queue, cl_uint num_objects, const cl_mem * mem_objects, @@ -112,7 +113,7 @@ typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireD3D10ObjectsKHR_fn)( const cl_event * event_wait_list, cl_event * event) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseD3D10ObjectsKHR_fn)( +typedef cl_int (CL_API_CALL *clEnqueueReleaseD3D10ObjectsKHR_fn)( cl_command_queue command_queue, cl_uint num_objects, const cl_mem * mem_objects, @@ -120,6 +121,31 @@ typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseD3D10ObjectsKHR_fn)( const cl_event * event_wait_list, cl_event * event) CL_API_SUFFIX__VERSION_1_0; +/*************************************************************** +* cl_intel_sharing_format_query_d3d10 +***************************************************************/ +#define cl_intel_sharing_format_query_d3d10 1 + +/* when cl_khr_d3d10_sharing is supported */ + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetSupportedD3D10TextureFormatsINTEL( + cl_context context, + cl_mem_flags flags, + cl_mem_object_type image_type, + cl_uint num_entries, + DXGI_FORMAT* d3d10_formats, + cl_uint* num_texture_formats) ; + +typedef cl_int (CL_API_CALL * +clGetSupportedD3D10TextureFormatsINTEL_fn)( + cl_context context, + cl_mem_flags flags, + cl_mem_object_type image_type, + cl_uint num_entries, + DXGI_FORMAT* d3d10_formats, + cl_uint* num_texture_formats) ; + #ifdef __cplusplus } #endif diff --git a/include/CL/cl_d3d11.h b/include/CL/cl_d3d11.h index 10023dde0..9393e5c84 100644 --- a/include/CL/cl_d3d11.h +++ b/include/CL/cl_d3d11.h @@ -21,6 +21,7 @@ #if _MSC_VER >=1500 #pragma warning( push ) #pragma warning( disable : 4201 ) +#pragma warning( disable : 5105 ) #endif #endif #include @@ -75,7 +76,7 @@ typedef cl_uint cl_d3d11_device_set_khr; /******************************************************************************/ -typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetDeviceIDsFromD3D11KHR_fn)( +typedef cl_int (CL_API_CALL *clGetDeviceIDsFromD3D11KHR_fn)( cl_platform_id platform, cl_d3d11_device_source_khr d3d_device_source, void * d3d_object, @@ -84,27 +85,27 @@ typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetDeviceIDsFromD3D11KHR_fn)( cl_device_id * devices, cl_uint * num_devices) CL_API_SUFFIX__VERSION_1_2; -typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D11BufferKHR_fn)( +typedef cl_mem (CL_API_CALL *clCreateFromD3D11BufferKHR_fn)( cl_context context, cl_mem_flags flags, ID3D11Buffer * resource, cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2; -typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D11Texture2DKHR_fn)( +typedef cl_mem (CL_API_CALL *clCreateFromD3D11Texture2DKHR_fn)( cl_context context, cl_mem_flags flags, ID3D11Texture2D * resource, UINT subresource, cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2; -typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D11Texture3DKHR_fn)( +typedef cl_mem (CL_API_CALL *clCreateFromD3D11Texture3DKHR_fn)( cl_context context, cl_mem_flags flags, ID3D11Texture3D * resource, UINT subresource, cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2; -typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireD3D11ObjectsKHR_fn)( +typedef cl_int (CL_API_CALL *clEnqueueAcquireD3D11ObjectsKHR_fn)( cl_command_queue command_queue, cl_uint num_objects, const cl_mem * mem_objects, @@ -112,7 +113,7 @@ typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireD3D11ObjectsKHR_fn)( const cl_event * event_wait_list, cl_event * event) CL_API_SUFFIX__VERSION_1_2; -typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseD3D11ObjectsKHR_fn)( +typedef cl_int (CL_API_CALL *clEnqueueReleaseD3D11ObjectsKHR_fn)( cl_command_queue command_queue, cl_uint num_objects, const cl_mem * mem_objects, @@ -120,6 +121,33 @@ typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseD3D11ObjectsKHR_fn)( const cl_event * event_wait_list, cl_event * event) CL_API_SUFFIX__VERSION_1_2; +/*************************************************************** +* cl_intel_sharing_format_query_d3d11 +***************************************************************/ +#define cl_intel_sharing_format_query_d3d11 1 + +/* when cl_khr_d3d11_sharing is supported */ + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetSupportedD3D11TextureFormatsINTEL( + cl_context context, + cl_mem_flags flags, + cl_mem_object_type image_type, + cl_uint plane, + cl_uint num_entries, + DXGI_FORMAT* d3d11_formats, + cl_uint* num_texture_formats) ; + +typedef cl_int (CL_API_CALL * +clGetSupportedD3D11TextureFormatsINTEL_fn)( + cl_context context, + cl_mem_flags flags, + cl_mem_object_type image_type, + cl_uint plane, + cl_uint num_entries, + DXGI_FORMAT* d3d11_formats, + cl_uint* num_texture_formats) ; + #ifdef __cplusplus } #endif diff --git a/include/CL/cl_dx9_media_sharing.h b/include/CL/cl_dx9_media_sharing.h index 048937005..fd03bbdc2 100644 --- a/include/CL/cl_dx9_media_sharing.h +++ b/include/CL/cl_dx9_media_sharing.h @@ -32,7 +32,19 @@ typedef cl_uint cl_dx9_media_adapter_type_khr; typedef cl_uint cl_dx9_media_adapter_set_khr; #if defined(_WIN32) +#if defined(_MSC_VER) +#if _MSC_VER >=1500 +#pragma warning( push ) +#pragma warning( disable : 4201 ) +#pragma warning( disable : 5105 ) +#endif +#endif #include +#if defined(_MSC_VER) +#if _MSC_VER >=1500 +#pragma warning( pop ) +#endif +#endif typedef struct _cl_dx9_surface_info_khr { IDirect3DSurface9 *resource; @@ -76,7 +88,7 @@ typedef struct _cl_dx9_surface_info_khr /******************************************************************************/ -typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetDeviceIDsFromDX9MediaAdapterKHR_fn)( +typedef cl_int (CL_API_CALL *clGetDeviceIDsFromDX9MediaAdapterKHR_fn)( cl_platform_id platform, cl_uint num_media_adapters, cl_dx9_media_adapter_type_khr * media_adapter_type, @@ -86,7 +98,7 @@ typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetDeviceIDsFromDX9MediaAdapterKHR_f cl_device_id * devices, cl_uint * num_devices) CL_API_SUFFIX__VERSION_1_2; -typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromDX9MediaSurfaceKHR_fn)( +typedef cl_mem (CL_API_CALL *clCreateFromDX9MediaSurfaceKHR_fn)( cl_context context, cl_mem_flags flags, cl_dx9_media_adapter_type_khr adapter_type, @@ -94,7 +106,7 @@ typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromDX9MediaSurfaceKHR_fn)( cl_uint plane, cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2; -typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireDX9MediaSurfacesKHR_fn)( +typedef cl_int (CL_API_CALL *clEnqueueAcquireDX9MediaSurfacesKHR_fn)( cl_command_queue command_queue, cl_uint num_objects, const cl_mem * mem_objects, @@ -102,7 +114,7 @@ typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireDX9MediaSurfacesKHR_fn const cl_event * event_wait_list, cl_event * event) CL_API_SUFFIX__VERSION_1_2; -typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseDX9MediaSurfacesKHR_fn)( +typedef cl_int (CL_API_CALL *clEnqueueReleaseDX9MediaSurfacesKHR_fn)( cl_command_queue command_queue, cl_uint num_objects, const cl_mem * mem_objects, @@ -110,6 +122,144 @@ typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseDX9MediaSurfacesKHR_fn const cl_event * event_wait_list, cl_event * event) CL_API_SUFFIX__VERSION_1_2; +/*************************************** +* cl_intel_dx9_media_sharing extension * +****************************************/ + +#define cl_intel_dx9_media_sharing 1 + +typedef cl_uint cl_dx9_device_source_intel; +typedef cl_uint cl_dx9_device_set_intel; + +/* error codes */ +#define CL_INVALID_DX9_DEVICE_INTEL -1010 +#define CL_INVALID_DX9_RESOURCE_INTEL -1011 +#define CL_DX9_RESOURCE_ALREADY_ACQUIRED_INTEL -1012 +#define CL_DX9_RESOURCE_NOT_ACQUIRED_INTEL -1013 + +/* cl_dx9_device_source_intel */ +#define CL_D3D9_DEVICE_INTEL 0x4022 +#define CL_D3D9EX_DEVICE_INTEL 0x4070 +#define CL_DXVA_DEVICE_INTEL 0x4071 + +/* cl_dx9_device_set_intel */ +#define CL_PREFERRED_DEVICES_FOR_DX9_INTEL 0x4024 +#define CL_ALL_DEVICES_FOR_DX9_INTEL 0x4025 + +/* cl_context_info */ +#define CL_CONTEXT_D3D9_DEVICE_INTEL 0x4026 +#define CL_CONTEXT_D3D9EX_DEVICE_INTEL 0x4072 +#define CL_CONTEXT_DXVA_DEVICE_INTEL 0x4073 + +/* cl_mem_info */ +#define CL_MEM_DX9_RESOURCE_INTEL 0x4027 +#define CL_MEM_DX9_SHARED_HANDLE_INTEL 0x4074 + +/* cl_image_info */ +#define CL_IMAGE_DX9_PLANE_INTEL 0x4075 + +/* cl_command_type */ +#define CL_COMMAND_ACQUIRE_DX9_OBJECTS_INTEL 0x402A +#define CL_COMMAND_RELEASE_DX9_OBJECTS_INTEL 0x402B +/******************************************************************************/ + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetDeviceIDsFromDX9INTEL( + cl_platform_id platform, + cl_dx9_device_source_intel dx9_device_source, + void* dx9_object, + cl_dx9_device_set_intel dx9_device_set, + cl_uint num_entries, + cl_device_id* devices, + cl_uint* num_devices) CL_API_SUFFIX__VERSION_1_1; + +typedef cl_int (CL_API_CALL* clGetDeviceIDsFromDX9INTEL_fn)( + cl_platform_id platform, + cl_dx9_device_source_intel dx9_device_source, + void* dx9_object, + cl_dx9_device_set_intel dx9_device_set, + cl_uint num_entries, + cl_device_id* devices, + cl_uint* num_devices) CL_API_SUFFIX__VERSION_1_1; + +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreateFromDX9MediaSurfaceINTEL( + cl_context context, + cl_mem_flags flags, + IDirect3DSurface9* resource, + HANDLE sharedHandle, + UINT plane, + cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_1; + +typedef cl_mem (CL_API_CALL *clCreateFromDX9MediaSurfaceINTEL_fn)( + cl_context context, + cl_mem_flags flags, + IDirect3DSurface9* resource, + HANDLE sharedHandle, + UINT plane, + cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_1; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueAcquireDX9ObjectsINTEL( + cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem* mem_objects, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) CL_API_SUFFIX__VERSION_1_1; + +typedef cl_int (CL_API_CALL *clEnqueueAcquireDX9ObjectsINTEL_fn)( + cl_command_queue command_queue, + cl_uint num_objects, + const cl_mem* mem_objects, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) CL_API_SUFFIX__VERSION_1_1; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueReleaseDX9ObjectsINTEL( + cl_command_queue command_queue, + cl_uint num_objects, + cl_mem* mem_objects, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) CL_API_SUFFIX__VERSION_1_1; + +typedef cl_int (CL_API_CALL *clEnqueueReleaseDX9ObjectsINTEL_fn)( + cl_command_queue command_queue, + cl_uint num_objects, + cl_mem* mem_objects, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) CL_API_SUFFIX__VERSION_1_1; + +/*************************************************************** +* cl_intel_sharing_format_query_dx9 +***************************************************************/ +#define cl_intel_sharing_format_query_dx9 1 + +/* when cl_khr_dx9_media_sharing or cl_intel_dx9_media_sharing is supported */ + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetSupportedDX9MediaSurfaceFormatsINTEL( + cl_context context, + cl_mem_flags flags, + cl_mem_object_type image_type, + cl_uint plane, + cl_uint num_entries, + D3DFORMAT* dx9_formats, + cl_uint* num_surface_formats) ; + +typedef cl_int (CL_API_CALL * +clGetSupportedDX9MediaSurfaceFormatsINTEL_fn)( + cl_context context, + cl_mem_flags flags, + cl_mem_object_type image_type, + cl_uint plane, + cl_uint num_entries, + D3DFORMAT* dx9_formats, + cl_uint* num_surface_formats) ; + #ifdef __cplusplus } #endif diff --git a/include/CL/cl_dx9_media_sharing_intel.h b/include/CL/cl_dx9_media_sharing_intel.h index 4525a175e..f6518d7f6 100644 --- a/include/CL/cl_dx9_media_sharing_intel.h +++ b/include/CL/cl_dx9_media_sharing_intel.h @@ -13,158 +13,6 @@ * See the License for the specific language governing permissions and * limitations under the License. ******************************************************************************/ -/*****************************************************************************\ - -Copyright (c) 2013-2019 Intel Corporation All Rights Reserved. - -THESE MATERIALS ARE PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS -CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY -OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING -NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THESE -MATERIALS, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -File Name: cl_dx9_media_sharing_intel.h - -Abstract: - -Notes: - -\*****************************************************************************/ - -#ifndef __OPENCL_CL_DX9_MEDIA_SHARING_INTEL_H -#define __OPENCL_CL_DX9_MEDIA_SHARING_INTEL_H - -#include -#include -#include -#include -#include -#include - -#ifdef __cplusplus -extern "C" { -#endif - -/*************************************** -* cl_intel_dx9_media_sharing extension * -****************************************/ - -#define cl_intel_dx9_media_sharing 1 - -typedef cl_uint cl_dx9_device_source_intel; -typedef cl_uint cl_dx9_device_set_intel; - -/* error codes */ -#define CL_INVALID_DX9_DEVICE_INTEL -1010 -#define CL_INVALID_DX9_RESOURCE_INTEL -1011 -#define CL_DX9_RESOURCE_ALREADY_ACQUIRED_INTEL -1012 -#define CL_DX9_RESOURCE_NOT_ACQUIRED_INTEL -1013 - -/* cl_dx9_device_source_intel */ -#define CL_D3D9_DEVICE_INTEL 0x4022 -#define CL_D3D9EX_DEVICE_INTEL 0x4070 -#define CL_DXVA_DEVICE_INTEL 0x4071 - -/* cl_dx9_device_set_intel */ -#define CL_PREFERRED_DEVICES_FOR_DX9_INTEL 0x4024 -#define CL_ALL_DEVICES_FOR_DX9_INTEL 0x4025 - -/* cl_context_info */ -#define CL_CONTEXT_D3D9_DEVICE_INTEL 0x4026 -#define CL_CONTEXT_D3D9EX_DEVICE_INTEL 0x4072 -#define CL_CONTEXT_DXVA_DEVICE_INTEL 0x4073 - -/* cl_mem_info */ -#define CL_MEM_DX9_RESOURCE_INTEL 0x4027 -#define CL_MEM_DX9_SHARED_HANDLE_INTEL 0x4074 - -/* cl_image_info */ -#define CL_IMAGE_DX9_PLANE_INTEL 0x4075 - -/* cl_command_type */ -#define CL_COMMAND_ACQUIRE_DX9_OBJECTS_INTEL 0x402A -#define CL_COMMAND_RELEASE_DX9_OBJECTS_INTEL 0x402B -/******************************************************************************/ - -extern CL_API_ENTRY cl_int CL_API_CALL -clGetDeviceIDsFromDX9INTEL( - cl_platform_id platform, - cl_dx9_device_source_intel dx9_device_source, - void* dx9_object, - cl_dx9_device_set_intel dx9_device_set, - cl_uint num_entries, - cl_device_id* devices, - cl_uint* num_devices) CL_EXT_SUFFIX__VERSION_1_1; - -typedef CL_API_ENTRY cl_int (CL_API_CALL* clGetDeviceIDsFromDX9INTEL_fn)( - cl_platform_id platform, - cl_dx9_device_source_intel dx9_device_source, - void* dx9_object, - cl_dx9_device_set_intel dx9_device_set, - cl_uint num_entries, - cl_device_id* devices, - cl_uint* num_devices) CL_EXT_SUFFIX__VERSION_1_1; - -extern CL_API_ENTRY cl_mem CL_API_CALL -clCreateFromDX9MediaSurfaceINTEL( - cl_context context, - cl_mem_flags flags, - IDirect3DSurface9* resource, - HANDLE sharedHandle, - UINT plane, - cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_1; - -typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromDX9MediaSurfaceINTEL_fn)( - cl_context context, - cl_mem_flags flags, - IDirect3DSurface9* resource, - HANDLE sharedHandle, - UINT plane, - cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_1; - -extern CL_API_ENTRY cl_int CL_API_CALL -clEnqueueAcquireDX9ObjectsINTEL( - cl_command_queue command_queue, - cl_uint num_objects, - const cl_mem* mem_objects, - cl_uint num_events_in_wait_list, - const cl_event* event_wait_list, - cl_event* event) CL_EXT_SUFFIX__VERSION_1_1; - -typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireDX9ObjectsINTEL_fn)( - cl_command_queue command_queue, - cl_uint num_objects, - const cl_mem* mem_objects, - cl_uint num_events_in_wait_list, - const cl_event* event_wait_list, - cl_event* event) CL_EXT_SUFFIX__VERSION_1_1; - -extern CL_API_ENTRY cl_int CL_API_CALL -clEnqueueReleaseDX9ObjectsINTEL( - cl_command_queue command_queue, - cl_uint num_objects, - cl_mem* mem_objects, - cl_uint num_events_in_wait_list, - const cl_event* event_wait_list, - cl_event* event) CL_EXT_SUFFIX__VERSION_1_1; - -typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseDX9ObjectsINTEL_fn)( - cl_command_queue command_queue, - cl_uint num_objects, - cl_mem* mem_objects, - cl_uint num_events_in_wait_list, - const cl_event* event_wait_list, - cl_event* event) CL_EXT_SUFFIX__VERSION_1_1; - -#ifdef __cplusplus -} -#endif - -#endif /* __OPENCL_CL_DX9_MEDIA_SHARING_INTEL_H */ +#include +#pragma message("The Intel DX9 media sharing extensions have been moved into cl_dx9_media_sharing.h. Please include cl_dx9_media_sharing.h directly.") diff --git a/include/CL/cl_egl.h b/include/CL/cl_egl.h index c8bde80e1..357a37c02 100644 --- a/include/CL/cl_egl.h +++ b/include/CL/cl_egl.h @@ -56,7 +56,7 @@ clCreateFromEGLImageKHR(cl_context context, const cl_egl_image_properties_khr * properties, cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromEGLImageKHR_fn)( +typedef cl_mem (CL_API_CALL *clCreateFromEGLImageKHR_fn)( cl_context context, CLeglDisplayKHR egldisplay, CLeglImageKHR eglimage, @@ -73,7 +73,7 @@ clEnqueueAcquireEGLObjectsKHR(cl_command_queue command_queue, const cl_event * event_wait_list, cl_event * event) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireEGLObjectsKHR_fn)( +typedef cl_int (CL_API_CALL *clEnqueueAcquireEGLObjectsKHR_fn)( cl_command_queue command_queue, cl_uint num_objects, const cl_mem * mem_objects, @@ -90,7 +90,7 @@ clEnqueueReleaseEGLObjectsKHR(cl_command_queue command_queue, const cl_event * event_wait_list, cl_event * event) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseEGLObjectsKHR_fn)( +typedef cl_int (CL_API_CALL *clEnqueueReleaseEGLObjectsKHR_fn)( cl_command_queue command_queue, cl_uint num_objects, const cl_mem * mem_objects, @@ -107,7 +107,7 @@ clCreateEventFromEGLSyncKHR(cl_context context, CLeglDisplayKHR display, cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_event (CL_API_CALL *clCreateEventFromEGLSyncKHR_fn)( +typedef cl_event (CL_API_CALL *clCreateEventFromEGLSyncKHR_fn)( cl_context context, CLeglSyncKHR sync, CLeglDisplayKHR display, diff --git a/include/CL/cl_ext.h b/include/CL/cl_ext.h index b57acfc87..b6ea825b5 100644 --- a/include/CL/cl_ext.h +++ b/include/CL/cl_ext.h @@ -26,6 +26,494 @@ extern "C" { #include +/*************************************************************** +* cl_khr_command_buffer +***************************************************************/ +#define cl_khr_command_buffer 1 +#define CL_KHR_COMMAND_BUFFER_EXTENSION_NAME \ + "cl_khr_command_buffer" + +typedef cl_bitfield cl_device_command_buffer_capabilities_khr; +typedef struct _cl_command_buffer_khr* cl_command_buffer_khr; +typedef cl_uint cl_sync_point_khr; +typedef cl_uint cl_command_buffer_info_khr; +typedef cl_uint cl_command_buffer_state_khr; +typedef cl_properties cl_command_buffer_properties_khr; +typedef cl_bitfield cl_command_buffer_flags_khr; +typedef cl_properties cl_ndrange_kernel_command_properties_khr; +typedef struct _cl_mutable_command_khr* cl_mutable_command_khr; + +/* cl_device_info */ +#define CL_DEVICE_COMMAND_BUFFER_CAPABILITIES_KHR 0x12A9 +#define CL_DEVICE_COMMAND_BUFFER_REQUIRED_QUEUE_PROPERTIES_KHR 0x12AA + +/* cl_device_command_buffer_capabilities_khr - bitfield */ +#define CL_COMMAND_BUFFER_CAPABILITY_KERNEL_PRINTF_KHR (1 << 0) +#define CL_COMMAND_BUFFER_CAPABILITY_DEVICE_SIDE_ENQUEUE_KHR (1 << 1) +#define CL_COMMAND_BUFFER_CAPABILITY_SIMULTANEOUS_USE_KHR (1 << 2) +#define CL_COMMAND_BUFFER_CAPABILITY_OUT_OF_ORDER_KHR (1 << 3) + +/* cl_command_buffer_properties_khr */ +#define CL_COMMAND_BUFFER_FLAGS_KHR 0x1293 + +/* cl_command_buffer_flags_khr */ +#define CL_COMMAND_BUFFER_SIMULTANEOUS_USE_KHR (1 << 0) + +/* Error codes */ +#define CL_INVALID_COMMAND_BUFFER_KHR -1138 +#define CL_INVALID_SYNC_POINT_WAIT_LIST_KHR -1139 +#define CL_INCOMPATIBLE_COMMAND_QUEUE_KHR -1140 + +/* cl_command_buffer_info_khr */ +#define CL_COMMAND_BUFFER_QUEUES_KHR 0x1294 +#define CL_COMMAND_BUFFER_NUM_QUEUES_KHR 0x1295 +#define CL_COMMAND_BUFFER_REFERENCE_COUNT_KHR 0x1296 +#define CL_COMMAND_BUFFER_STATE_KHR 0x1297 +#define CL_COMMAND_BUFFER_PROPERTIES_ARRAY_KHR 0x1298 + +/* cl_command_buffer_state_khr */ +#define CL_COMMAND_BUFFER_STATE_RECORDING_KHR 0 +#define CL_COMMAND_BUFFER_STATE_EXECUTABLE_KHR 1 +#define CL_COMMAND_BUFFER_STATE_PENDING_KHR 2 +#define CL_COMMAND_BUFFER_STATE_INVALID_KHR 3 + +/* cl_command_type */ +#define CL_COMMAND_COMMAND_BUFFER_KHR 0x12A8 + + +typedef cl_command_buffer_khr (CL_API_CALL * +clCreateCommandBufferKHR_fn)( + cl_uint num_queues, + const cl_command_queue* queues, + const cl_command_buffer_properties_khr* properties, + cl_int* errcode_ret) ; + +typedef cl_int (CL_API_CALL * +clFinalizeCommandBufferKHR_fn)( + cl_command_buffer_khr command_buffer) ; + +typedef cl_int (CL_API_CALL * +clRetainCommandBufferKHR_fn)( + cl_command_buffer_khr command_buffer) ; + +typedef cl_int (CL_API_CALL * +clReleaseCommandBufferKHR_fn)( + cl_command_buffer_khr command_buffer) ; + +typedef cl_int (CL_API_CALL * +clEnqueueCommandBufferKHR_fn)( + cl_uint num_queues, + cl_command_queue* queues, + cl_command_buffer_khr command_buffer, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) ; + +typedef cl_int (CL_API_CALL * +clCommandBarrierWithWaitListKHR_fn)( + cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr* sync_point_wait_list, + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle) ; + +typedef cl_int (CL_API_CALL * +clCommandCopyBufferKHR_fn)( + cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + cl_mem src_buffer, + cl_mem dst_buffer, + size_t src_offset, + size_t dst_offset, + size_t size, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr* sync_point_wait_list, + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle) ; + +typedef cl_int (CL_API_CALL * +clCommandCopyBufferRectKHR_fn)( + cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + cl_mem src_buffer, + cl_mem dst_buffer, + const size_t* src_origin, + const size_t* dst_origin, + const size_t* region, + size_t src_row_pitch, + size_t src_slice_pitch, + size_t dst_row_pitch, + size_t dst_slice_pitch, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr* sync_point_wait_list, + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle) ; + +typedef cl_int (CL_API_CALL * +clCommandCopyBufferToImageKHR_fn)( + cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + cl_mem src_buffer, + cl_mem dst_image, + size_t src_offset, + const size_t* dst_origin, + const size_t* region, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr* sync_point_wait_list, + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle) ; + +typedef cl_int (CL_API_CALL * +clCommandCopyImageKHR_fn)( + cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + cl_mem src_image, + cl_mem dst_image, + const size_t* src_origin, + const size_t* dst_origin, + const size_t* region, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr* sync_point_wait_list, + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle) ; + +typedef cl_int (CL_API_CALL * +clCommandCopyImageToBufferKHR_fn)( + cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + cl_mem src_image, + cl_mem dst_buffer, + const size_t* src_origin, + const size_t* region, + size_t dst_offset, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr* sync_point_wait_list, + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle) ; + +typedef cl_int (CL_API_CALL * +clCommandFillBufferKHR_fn)( + cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + cl_mem buffer, + const void* pattern, + size_t pattern_size, + size_t offset, + size_t size, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr* sync_point_wait_list, + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle) ; + +typedef cl_int (CL_API_CALL * +clCommandFillImageKHR_fn)( + cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + cl_mem image, + const void* fill_color, + const size_t* origin, + const size_t* region, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr* sync_point_wait_list, + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle) ; + +typedef cl_int (CL_API_CALL * +clCommandNDRangeKernelKHR_fn)( + cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + const cl_ndrange_kernel_command_properties_khr* properties, + cl_kernel kernel, + cl_uint work_dim, + const size_t* global_work_offset, + const size_t* global_work_size, + const size_t* local_work_size, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr* sync_point_wait_list, + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle) ; + +typedef cl_int (CL_API_CALL * +clGetCommandBufferInfoKHR_fn)( + cl_command_buffer_khr command_buffer, + cl_command_buffer_info_khr param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret) ; + +#ifndef CL_NO_PROTOTYPES + +extern CL_API_ENTRY cl_command_buffer_khr CL_API_CALL +clCreateCommandBufferKHR( + cl_uint num_queues, + const cl_command_queue* queues, + const cl_command_buffer_properties_khr* properties, + cl_int* errcode_ret) ; + +extern CL_API_ENTRY cl_int CL_API_CALL +clFinalizeCommandBufferKHR( + cl_command_buffer_khr command_buffer) ; + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainCommandBufferKHR( + cl_command_buffer_khr command_buffer) ; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseCommandBufferKHR( + cl_command_buffer_khr command_buffer) ; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueCommandBufferKHR( + cl_uint num_queues, + cl_command_queue* queues, + cl_command_buffer_khr command_buffer, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) ; + +extern CL_API_ENTRY cl_int CL_API_CALL +clCommandBarrierWithWaitListKHR( + cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr* sync_point_wait_list, + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle) ; + +extern CL_API_ENTRY cl_int CL_API_CALL +clCommandCopyBufferKHR( + cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + cl_mem src_buffer, + cl_mem dst_buffer, + size_t src_offset, + size_t dst_offset, + size_t size, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr* sync_point_wait_list, + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle) ; + +extern CL_API_ENTRY cl_int CL_API_CALL +clCommandCopyBufferRectKHR( + cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + cl_mem src_buffer, + cl_mem dst_buffer, + const size_t* src_origin, + const size_t* dst_origin, + const size_t* region, + size_t src_row_pitch, + size_t src_slice_pitch, + size_t dst_row_pitch, + size_t dst_slice_pitch, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr* sync_point_wait_list, + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle) ; + +extern CL_API_ENTRY cl_int CL_API_CALL +clCommandCopyBufferToImageKHR( + cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + cl_mem src_buffer, + cl_mem dst_image, + size_t src_offset, + const size_t* dst_origin, + const size_t* region, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr* sync_point_wait_list, + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle) ; + +extern CL_API_ENTRY cl_int CL_API_CALL +clCommandCopyImageKHR( + cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + cl_mem src_image, + cl_mem dst_image, + const size_t* src_origin, + const size_t* dst_origin, + const size_t* region, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr* sync_point_wait_list, + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle) ; + +extern CL_API_ENTRY cl_int CL_API_CALL +clCommandCopyImageToBufferKHR( + cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + cl_mem src_image, + cl_mem dst_buffer, + const size_t* src_origin, + const size_t* region, + size_t dst_offset, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr* sync_point_wait_list, + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle) ; + +extern CL_API_ENTRY cl_int CL_API_CALL +clCommandFillBufferKHR( + cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + cl_mem buffer, + const void* pattern, + size_t pattern_size, + size_t offset, + size_t size, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr* sync_point_wait_list, + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle) ; + +extern CL_API_ENTRY cl_int CL_API_CALL +clCommandFillImageKHR( + cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + cl_mem image, + const void* fill_color, + const size_t* origin, + const size_t* region, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr* sync_point_wait_list, + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle) ; + +extern CL_API_ENTRY cl_int CL_API_CALL +clCommandNDRangeKernelKHR( + cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + const cl_ndrange_kernel_command_properties_khr* properties, + cl_kernel kernel, + cl_uint work_dim, + const size_t* global_work_offset, + const size_t* global_work_size, + const size_t* local_work_size, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr* sync_point_wait_list, + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle) ; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetCommandBufferInfoKHR( + cl_command_buffer_khr command_buffer, + cl_command_buffer_info_khr param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret) ; + +#endif /* CL_NO_PROTOTYPES */ + +/*************************************************************** +* cl_khr_command_buffer_mutable_dispatch +***************************************************************/ +#define cl_khr_command_buffer_mutable_dispatch 1 +#define CL_KHR_COMMAND_BUFFER_MUTABLE_DISPATCH_EXTENSION_NAME \ + "cl_khr_command_buffer_mutable_dispatch" + +typedef cl_uint cl_command_buffer_structure_type_khr; +typedef cl_bitfield cl_mutable_dispatch_fields_khr; +typedef cl_uint cl_mutable_command_info_khr; +typedef struct _cl_mutable_dispatch_arg_khr { + cl_uint arg_index; + size_t arg_size; + const void* arg_value; +} cl_mutable_dispatch_arg_khr; +typedef struct _cl_mutable_dispatch_exec_info_khr { + cl_uint param_name; + size_t param_value_size; + const void* param_value; +} cl_mutable_dispatch_exec_info_khr; +typedef struct _cl_mutable_dispatch_config_khr { + cl_command_buffer_structure_type_khr type; + const void* next; + cl_mutable_command_khr command; + cl_uint num_args; + cl_uint num_svm_args; + cl_uint num_exec_infos; + cl_uint work_dim; + const cl_mutable_dispatch_arg_khr* arg_list; + const cl_mutable_dispatch_arg_khr* arg_svm_list; + const cl_mutable_dispatch_exec_info_khr* exec_info_list; + const size_t* global_work_offset; + const size_t* global_work_size; + const size_t* local_work_size; +} cl_mutable_dispatch_config_khr; +typedef struct _cl_mutable_base_config_khr { + cl_command_buffer_structure_type_khr type; + const void* next; + cl_uint num_mutable_dispatch; + const cl_mutable_dispatch_config_khr* mutable_dispatch_list; +} cl_mutable_base_config_khr; + +/* cl_command_buffer_flags_khr - bitfield */ +#define CL_COMMAND_BUFFER_MUTABLE_KHR (1 << 1) + +/* Error codes */ +#define CL_INVALID_MUTABLE_COMMAND_KHR -1141 + +/* cl_device_info */ +#define CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR 0x12B0 + +/* cl_ndrange_kernel_command_properties_khr */ +#define CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR 0x12B1 + +/* cl_mutable_dispatch_fields_khr - bitfield */ +#define CL_MUTABLE_DISPATCH_GLOBAL_OFFSET_KHR (1 << 0) +#define CL_MUTABLE_DISPATCH_GLOBAL_SIZE_KHR (1 << 1) +#define CL_MUTABLE_DISPATCH_LOCAL_SIZE_KHR (1 << 2) +#define CL_MUTABLE_DISPATCH_ARGUMENTS_KHR (1 << 3) +#define CL_MUTABLE_DISPATCH_EXEC_INFO_KHR (1 << 4) + +/* cl_mutable_command_info_khr */ +#define CL_MUTABLE_COMMAND_COMMAND_QUEUE_KHR 0x12A0 +#define CL_MUTABLE_COMMAND_COMMAND_BUFFER_KHR 0x12A1 +#define CL_MUTABLE_COMMAND_COMMAND_TYPE_KHR 0x12AD +#define CL_MUTABLE_DISPATCH_PROPERTIES_ARRAY_KHR 0x12A2 +#define CL_MUTABLE_DISPATCH_KERNEL_KHR 0x12A3 +#define CL_MUTABLE_DISPATCH_DIMENSIONS_KHR 0x12A4 +#define CL_MUTABLE_DISPATCH_GLOBAL_WORK_OFFSET_KHR 0x12A5 +#define CL_MUTABLE_DISPATCH_GLOBAL_WORK_SIZE_KHR 0x12A6 +#define CL_MUTABLE_DISPATCH_LOCAL_WORK_SIZE_KHR 0x12A7 + +/* cl_command_buffer_structure_type_khr */ +#define CL_STRUCTURE_TYPE_MUTABLE_BASE_CONFIG_KHR 0 +#define CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR 1 + + +typedef cl_int (CL_API_CALL * +clUpdateMutableCommandsKHR_fn)( + cl_command_buffer_khr command_buffer, + const cl_mutable_base_config_khr* mutable_config) ; + +typedef cl_int (CL_API_CALL * +clGetMutableCommandInfoKHR_fn)( + cl_mutable_command_khr command, + cl_mutable_command_info_khr param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret) ; + +#ifndef CL_NO_PROTOTYPES + +extern CL_API_ENTRY cl_int CL_API_CALL +clUpdateMutableCommandsKHR( + cl_command_buffer_khr command_buffer, + const cl_mutable_base_config_khr* mutable_config) ; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetMutableCommandInfoKHR( + cl_mutable_command_khr command, + cl_mutable_command_info_khr param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret) ; + +#endif /* CL_NO_PROTOTYPES */ + /* cl_khr_fp64 extension - no extension #define since it has no functions */ /* CL_DEVICE_DOUBLE_FP_CONFIG is defined in CL.h for OpenCL >= 120 */ @@ -54,9 +542,9 @@ extern "C" { * before using. */ #define cl_APPLE_SetMemObjectDestructor 1 -cl_int CL_API_ENTRY clSetMemObjectDestructorAPPLE( cl_mem memobj, +extern CL_API_ENTRY cl_int CL_API_CALL clSetMemObjectDestructorAPPLE( cl_mem memobj, void (* pfn_notify)(cl_mem memobj, void * user_data), - void * user_data) CL_EXT_SUFFIX__VERSION_1_0; + void * user_data) CL_API_SUFFIX__VERSION_1_0; /* Context Logging Functions @@ -68,22 +556,22 @@ cl_int CL_API_ENTRY clSetMemObjectDestructorAPPLE( cl_mem memobj, * clLogMessagesToSystemLog forwards on all log messages to the Apple System Logger */ #define cl_APPLE_ContextLoggingFunctions 1 -extern void CL_API_ENTRY clLogMessagesToSystemLogAPPLE( const char * errstr, +extern CL_API_ENTRY void CL_API_CALL clLogMessagesToSystemLogAPPLE( const char * errstr, const void * private_info, size_t cb, - void * user_data) CL_EXT_SUFFIX__VERSION_1_0; + void * user_data) CL_API_SUFFIX__VERSION_1_0; /* clLogMessagesToStdout sends all log messages to the file descriptor stdout */ -extern void CL_API_ENTRY clLogMessagesToStdoutAPPLE( const char * errstr, +extern CL_API_ENTRY void CL_API_CALL clLogMessagesToStdoutAPPLE( const char * errstr, const void * private_info, size_t cb, - void * user_data) CL_EXT_SUFFIX__VERSION_1_0; + void * user_data) CL_API_SUFFIX__VERSION_1_0; /* clLogMessagesToStderr sends all log messages to the file descriptor stderr */ -extern void CL_API_ENTRY clLogMessagesToStderrAPPLE( const char * errstr, +extern CL_API_ENTRY void CL_API_CALL clLogMessagesToStderrAPPLE( const char * errstr, const void * private_info, size_t cb, - void * user_data) CL_EXT_SUFFIX__VERSION_1_0; + void * user_data) CL_API_SUFFIX__VERSION_1_0; /************************ @@ -102,7 +590,7 @@ clIcdGetPlatformIDsKHR(cl_uint num_entries, cl_platform_id * platforms, cl_uint * num_platforms); -typedef CL_API_ENTRY cl_int +typedef cl_int (CL_API_CALL *clIcdGetPlatformIDsKHR_fn)(cl_uint num_entries, cl_platform_id * platforms, cl_uint * num_platforms); @@ -129,11 +617,11 @@ clCreateProgramWithILKHR(cl_context context, size_t length, cl_int * errcode_ret); -typedef CL_API_ENTRY cl_program +typedef cl_program (CL_API_CALL *clCreateProgramWithILKHR_fn)(cl_context context, const void * il, size_t length, - cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_2; + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2; /* Extension: cl_khr_image2d_from_buffer * @@ -176,10 +664,10 @@ typedef CL_API_ENTRY cl_program #define cl_khr_terminate_context 1 extern CL_API_ENTRY cl_int CL_API_CALL -clTerminateContextKHR(cl_context context) CL_EXT_SUFFIX__VERSION_1_2; +clTerminateContextKHR(cl_context context) CL_API_SUFFIX__VERSION_1_2; -typedef CL_API_ENTRY cl_int -(CL_API_CALL *clTerminateContextKHR_fn)(cl_context context) CL_EXT_SUFFIX__VERSION_1_2; +typedef cl_int +(CL_API_CALL *clTerminateContextKHR_fn)(cl_context context) CL_API_SUFFIX__VERSION_1_2; /* @@ -204,13 +692,13 @@ extern CL_API_ENTRY cl_command_queue CL_API_CALL clCreateCommandQueueWithPropertiesKHR(cl_context context, cl_device_id device, const cl_queue_properties_khr* properties, - cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_2; + cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_2; -typedef CL_API_ENTRY cl_command_queue +typedef cl_command_queue (CL_API_CALL *clCreateCommandQueueWithPropertiesKHR_fn)(cl_context context, cl_device_id device, const cl_queue_properties_khr* properties, - cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_2; + cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_2; /****************************************** @@ -226,6 +714,11 @@ typedef CL_API_ENTRY cl_command_queue #define CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV 0x4005 #define CL_DEVICE_INTEGRATED_MEMORY_NV 0x4006 +/* extension to cl_nv_device_attribute_query */ +#define CL_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT_NV 0x4007 +#define CL_DEVICE_PCI_BUS_ID_NV 0x4008 +#define CL_DEVICE_PCI_SLOT_ID_NV 0x4009 +#define CL_DEVICE_PCI_DOMAIN_ID_NV 0x400A /********************************* * cl_amd_device_attribute_query * @@ -268,16 +761,16 @@ typedef CL_API_ENTRY cl_command_queue #define cl_ext_device_fission 1 extern CL_API_ENTRY cl_int CL_API_CALL -clReleaseDeviceEXT(cl_device_id device) CL_EXT_SUFFIX__VERSION_1_1; +clReleaseDeviceEXT(cl_device_id device) CL_API_SUFFIX__VERSION_1_1; -typedef CL_API_ENTRY cl_int -(CL_API_CALL *clReleaseDeviceEXT_fn)(cl_device_id device) CL_EXT_SUFFIX__VERSION_1_1; +typedef cl_int +(CL_API_CALL *clReleaseDeviceEXT_fn)(cl_device_id device) CL_API_SUFFIX__VERSION_1_1; extern CL_API_ENTRY cl_int CL_API_CALL -clRetainDeviceEXT(cl_device_id device) CL_EXT_SUFFIX__VERSION_1_1; +clRetainDeviceEXT(cl_device_id device) CL_API_SUFFIX__VERSION_1_1; -typedef CL_API_ENTRY cl_int -(CL_API_CALL *clRetainDeviceEXT_fn)(cl_device_id device) CL_EXT_SUFFIX__VERSION_1_1; +typedef cl_int +(CL_API_CALL *clRetainDeviceEXT_fn)(cl_device_id device) CL_API_SUFFIX__VERSION_1_1; typedef cl_ulong cl_device_partition_property_ext; extern CL_API_ENTRY cl_int CL_API_CALL @@ -285,14 +778,14 @@ clCreateSubDevicesEXT(cl_device_id in_device, const cl_device_partition_property_ext * properties, cl_uint num_entries, cl_device_id * out_devices, - cl_uint * num_devices) CL_EXT_SUFFIX__VERSION_1_1; + cl_uint * num_devices) CL_API_SUFFIX__VERSION_1_1; -typedef CL_API_ENTRY cl_int +typedef cl_int (CL_API_CALL * clCreateSubDevicesEXT_fn)(cl_device_id in_device, const cl_device_partition_property_ext * properties, cl_uint num_entries, cl_device_id * out_devices, - cl_uint * num_devices) CL_EXT_SUFFIX__VERSION_1_1; + cl_uint * num_devices) CL_API_SUFFIX__VERSION_1_1; /* cl_device_partition_property_ext */ #define CL_DEVICE_PARTITION_EQUALLY_EXT 0x4050 @@ -346,7 +839,7 @@ clEnqueueMigrateMemObjectEXT(cl_command_queue command_queue, const cl_event * event_wait_list, cl_event * event); -typedef CL_API_ENTRY cl_int +typedef cl_int (CL_API_CALL *clEnqueueMigrateMemObjectEXT_fn)(cl_command_queue command_queue, cl_uint num_mem_objects, const cl_mem * mem_objects, @@ -490,7 +983,7 @@ clEnqueueAcquireGrallocObjectsIMG(cl_command_queue command_queue, const cl_mem * mem_objects, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, - cl_event * event) CL_EXT_SUFFIX__VERSION_1_2; + cl_event * event) CL_API_SUFFIX__VERSION_1_2; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueReleaseGrallocObjectsIMG(cl_command_queue command_queue, @@ -498,7 +991,7 @@ clEnqueueReleaseGrallocObjectsIMG(cl_command_queue command_queue, const cl_mem * mem_objects, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, - cl_event * event) CL_EXT_SUFFIX__VERSION_1_2; + cl_event * event) CL_API_SUFFIX__VERSION_1_2; /****************************************** * cl_img_generate_mipmap extension * @@ -523,7 +1016,7 @@ clEnqueueGenerateMipmapIMG(cl_command_queue command_queue, const size_t *mip_region, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, - cl_event *event) CL_EXT_SUFFIX__VERSION_1_2; + cl_event *event) CL_API_SUFFIX__VERSION_1_2; /****************************************** * cl_img_mem_properties extension * @@ -564,9 +1057,9 @@ clGetKernelSubGroupInfoKHR(cl_kernel in_kernel, const void * input_value, size_t param_value_size, void * param_value, - size_t * param_value_size_ret) CL_EXT_SUFFIX__VERSION_2_0_DEPRECATED; + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_2_0_DEPRECATED; -typedef CL_API_ENTRY cl_int +typedef cl_int (CL_API_CALL * clGetKernelSubGroupInfoKHR_fn)(cl_kernel in_kernel, cl_device_id in_device, cl_kernel_sub_group_info param_name, @@ -574,7 +1067,7 @@ typedef CL_API_ENTRY cl_int const void * input_value, size_t param_value_size, void * param_value, - size_t * param_value_size_ret) CL_EXT_SUFFIX__VERSION_2_0_DEPRECATED; + size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_2_0_DEPRECATED; /********************************* @@ -694,6 +1187,361 @@ typedef struct _cl_name_version_khr #define CL_DEVICE_NODE_MASK_KHR 0x106E +/*************************************************************** +* cl_khr_pci_bus_info +***************************************************************/ +#define cl_khr_pci_bus_info 1 + +typedef struct _cl_device_pci_bus_info_khr { + cl_uint pci_domain; + cl_uint pci_bus; + cl_uint pci_device; + cl_uint pci_function; +} cl_device_pci_bus_info_khr; + +/* cl_device_info */ +#define CL_DEVICE_PCI_BUS_INFO_KHR 0x410F + + +/*************************************************************** +* cl_khr_suggested_local_work_size +***************************************************************/ +#define cl_khr_suggested_local_work_size 1 + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetKernelSuggestedLocalWorkSizeKHR( + cl_command_queue command_queue, + cl_kernel kernel, + cl_uint work_dim, + const size_t* global_work_offset, + const size_t* global_work_size, + size_t* suggested_local_work_size) CL_API_SUFFIX__VERSION_3_0; + +typedef cl_int (CL_API_CALL * +clGetKernelSuggestedLocalWorkSizeKHR_fn)( + cl_command_queue command_queue, + cl_kernel kernel, + cl_uint work_dim, + const size_t* global_work_offset, + const size_t* global_work_size, + size_t* suggested_local_work_size) CL_API_SUFFIX__VERSION_3_0; + + +/*************************************************************** +* cl_khr_integer_dot_product +***************************************************************/ +#define cl_khr_integer_dot_product 1 + +typedef cl_bitfield cl_device_integer_dot_product_capabilities_khr; + +/* cl_device_integer_dot_product_capabilities_khr */ +#define CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_PACKED_KHR (1 << 0) +#define CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_KHR (1 << 1) + +typedef struct _cl_device_integer_dot_product_acceleration_properties_khr { + cl_bool signed_accelerated; + cl_bool unsigned_accelerated; + cl_bool mixed_signedness_accelerated; + cl_bool accumulating_saturating_signed_accelerated; + cl_bool accumulating_saturating_unsigned_accelerated; + cl_bool accumulating_saturating_mixed_signedness_accelerated; +} cl_device_integer_dot_product_acceleration_properties_khr; + +/* cl_device_info */ +#define CL_DEVICE_INTEGER_DOT_PRODUCT_CAPABILITIES_KHR 0x1073 +#define CL_DEVICE_INTEGER_DOT_PRODUCT_ACCELERATION_PROPERTIES_8BIT_KHR 0x1074 +#define CL_DEVICE_INTEGER_DOT_PRODUCT_ACCELERATION_PROPERTIES_4x8BIT_PACKED_KHR 0x1075 + + +/*************************************************************** +* cl_khr_external_memory +***************************************************************/ +#define cl_khr_external_memory 1 + +typedef cl_uint cl_external_memory_handle_type_khr; + +/* cl_platform_info */ +#define CL_PLATFORM_EXTERNAL_MEMORY_IMPORT_HANDLE_TYPES_KHR 0x2044 + +/* cl_device_info */ +#define CL_DEVICE_EXTERNAL_MEMORY_IMPORT_HANDLE_TYPES_KHR 0x204F + +/* cl_mem_properties */ +#define CL_DEVICE_HANDLE_LIST_KHR 0x2051 +#define CL_DEVICE_HANDLE_LIST_END_KHR 0 + +/* cl_command_type */ +#define CL_COMMAND_ACQUIRE_EXTERNAL_MEM_OBJECTS_KHR 0x2047 +#define CL_COMMAND_RELEASE_EXTERNAL_MEM_OBJECTS_KHR 0x2048 + + +typedef cl_int (CL_API_CALL * +clEnqueueAcquireExternalMemObjectsKHR_fn)( + cl_command_queue command_queue, + cl_uint num_mem_objects, + const cl_mem* mem_objects, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) CL_API_SUFFIX__VERSION_3_0; + +typedef cl_int (CL_API_CALL * +clEnqueueReleaseExternalMemObjectsKHR_fn)( + cl_command_queue command_queue, + cl_uint num_mem_objects, + const cl_mem* mem_objects, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) CL_API_SUFFIX__VERSION_3_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueAcquireExternalMemObjectsKHR( + cl_command_queue command_queue, + cl_uint num_mem_objects, + const cl_mem* mem_objects, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) CL_API_SUFFIX__VERSION_3_0; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueReleaseExternalMemObjectsKHR( + cl_command_queue command_queue, + cl_uint num_mem_objects, + const cl_mem* mem_objects, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) CL_API_SUFFIX__VERSION_3_0; + +/*************************************************************** +* cl_khr_external_memory_dma_buf +***************************************************************/ +#define cl_khr_external_memory_dma_buf 1 + +/* cl_external_memory_handle_type_khr */ +#define CL_EXTERNAL_MEMORY_HANDLE_DMA_BUF_KHR 0x2067 + +/*************************************************************** +* cl_khr_external_memory_dx +***************************************************************/ +#define cl_khr_external_memory_dx 1 + +/* cl_external_memory_handle_type_khr */ +#define CL_EXTERNAL_MEMORY_HANDLE_D3D11_TEXTURE_KHR 0x2063 +#define CL_EXTERNAL_MEMORY_HANDLE_D3D11_TEXTURE_KMT_KHR 0x2064 +#define CL_EXTERNAL_MEMORY_HANDLE_D3D12_HEAP_KHR 0x2065 +#define CL_EXTERNAL_MEMORY_HANDLE_D3D12_RESOURCE_KHR 0x2066 + +/*************************************************************** +* cl_khr_external_memory_opaque_fd +***************************************************************/ +#define cl_khr_external_memory_opaque_fd 1 + +/* cl_external_memory_handle_type_khr */ +#define CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_FD_KHR 0x2060 + +/*************************************************************** +* cl_khr_external_memory_win32 +***************************************************************/ +#define cl_khr_external_memory_win32 1 + +/* cl_external_memory_handle_type_khr */ +#define CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KHR 0x2061 +#define CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KMT_KHR 0x2062 + +/*************************************************************** +* cl_khr_external_semaphore +***************************************************************/ +#define cl_khr_external_semaphore 1 + +typedef struct _cl_semaphore_khr * cl_semaphore_khr; +typedef cl_uint cl_external_semaphore_handle_type_khr; + +/* cl_platform_info */ +#define CL_PLATFORM_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR 0x2037 +#define CL_PLATFORM_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR 0x2038 + +/* cl_device_info */ +#define CL_DEVICE_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR 0x204D +#define CL_DEVICE_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR 0x204E + +/* cl_semaphore_properties_khr */ +#define CL_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR 0x203F +#define CL_SEMAPHORE_EXPORT_HANDLE_TYPES_LIST_END_KHR 0 + + +typedef cl_int (CL_API_CALL * +clGetSemaphoreHandleForTypeKHR_fn)( + cl_semaphore_khr sema_object, + cl_device_id device, + cl_external_semaphore_handle_type_khr handle_type, + size_t handle_size, + void* handle_ptr, + size_t* handle_size_ret) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetSemaphoreHandleForTypeKHR( + cl_semaphore_khr sema_object, + cl_device_id device, + cl_external_semaphore_handle_type_khr handle_type, + size_t handle_size, + void* handle_ptr, + size_t* handle_size_ret) CL_API_SUFFIX__VERSION_1_2; + +/*************************************************************** +* cl_khr_external_semaphore_dx_fence +***************************************************************/ +#define cl_khr_external_semaphore_dx_fence 1 + +/* cl_external_semaphore_handle_type_khr */ +#define CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR 0x2059 + +/*************************************************************** +* cl_khr_external_semaphore_opaque_fd +***************************************************************/ +#define cl_khr_external_semaphore_opaque_fd 1 + +/* cl_external_semaphore_handle_type_khr */ +#define CL_SEMAPHORE_HANDLE_OPAQUE_FD_KHR 0x2055 + +/*************************************************************** +* cl_khr_external_semaphore_sync_fd +***************************************************************/ +#define cl_khr_external_semaphore_sync_fd 1 + +/* cl_external_semaphore_handle_type_khr */ +#define CL_SEMAPHORE_HANDLE_SYNC_FD_KHR 0x2058 + +/*************************************************************** +* cl_khr_external_semaphore_win32 +***************************************************************/ +#define cl_khr_external_semaphore_win32 1 + +/* cl_external_semaphore_handle_type_khr */ +#define CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_KHR 0x2056 +#define CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_KMT_KHR 0x2057 + +/*************************************************************** +* cl_khr_semaphore +***************************************************************/ +#define cl_khr_semaphore 1 + +/* type cl_semaphore_khr */ +typedef cl_properties cl_semaphore_properties_khr; +typedef cl_uint cl_semaphore_info_khr; +typedef cl_uint cl_semaphore_type_khr; +typedef cl_ulong cl_semaphore_payload_khr; + +/* cl_semaphore_type */ +#define CL_SEMAPHORE_TYPE_BINARY_KHR 1 + +/* cl_platform_info */ +#define CL_PLATFORM_SEMAPHORE_TYPES_KHR 0x2036 + +/* cl_device_info */ +#define CL_DEVICE_SEMAPHORE_TYPES_KHR 0x204C + +/* cl_semaphore_info_khr */ +#define CL_SEMAPHORE_CONTEXT_KHR 0x2039 +#define CL_SEMAPHORE_REFERENCE_COUNT_KHR 0x203A +#define CL_SEMAPHORE_PROPERTIES_KHR 0x203B +#define CL_SEMAPHORE_PAYLOAD_KHR 0x203C + +/* cl_semaphore_info_khr or cl_semaphore_properties_khr */ +#define CL_SEMAPHORE_TYPE_KHR 0x203D +/* enum CL_DEVICE_HANDLE_LIST_KHR */ +/* enum CL_DEVICE_HANDLE_LIST_END_KHR */ + +/* cl_command_type */ +#define CL_COMMAND_SEMAPHORE_WAIT_KHR 0x2042 +#define CL_COMMAND_SEMAPHORE_SIGNAL_KHR 0x2043 + +/* Error codes */ +#define CL_INVALID_SEMAPHORE_KHR -1142 + + +typedef cl_semaphore_khr (CL_API_CALL * +clCreateSemaphoreWithPropertiesKHR_fn)( + cl_context context, + const cl_semaphore_properties_khr* sema_props, + cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_2; + +typedef cl_int (CL_API_CALL * +clEnqueueWaitSemaphoresKHR_fn)( + cl_command_queue command_queue, + cl_uint num_sema_objects, + const cl_semaphore_khr* sema_objects, + const cl_semaphore_payload_khr* sema_payload_list, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) CL_API_SUFFIX__VERSION_1_2; + +typedef cl_int (CL_API_CALL * +clEnqueueSignalSemaphoresKHR_fn)( + cl_command_queue command_queue, + cl_uint num_sema_objects, + const cl_semaphore_khr* sema_objects, + const cl_semaphore_payload_khr* sema_payload_list, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) CL_API_SUFFIX__VERSION_1_2; + +typedef cl_int (CL_API_CALL * +clGetSemaphoreInfoKHR_fn)( + cl_semaphore_khr sema_object, + cl_semaphore_info_khr param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret) CL_API_SUFFIX__VERSION_1_2; + +typedef cl_int (CL_API_CALL * +clReleaseSemaphoreKHR_fn)( + cl_semaphore_khr sema_object) CL_API_SUFFIX__VERSION_1_2; + +typedef cl_int (CL_API_CALL * +clRetainSemaphoreKHR_fn)( + cl_semaphore_khr sema_object) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_semaphore_khr CL_API_CALL +clCreateSemaphoreWithPropertiesKHR( + cl_context context, + const cl_semaphore_properties_khr* sema_props, + cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueWaitSemaphoresKHR( + cl_command_queue command_queue, + cl_uint num_sema_objects, + const cl_semaphore_khr* sema_objects, + const cl_semaphore_payload_khr* sema_payload_list, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueSignalSemaphoresKHR( + cl_command_queue command_queue, + cl_uint num_sema_objects, + const cl_semaphore_khr* sema_objects, + const cl_semaphore_payload_khr* sema_payload_list, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetSemaphoreInfoKHR( + cl_semaphore_khr sema_object, + cl_semaphore_info_khr param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseSemaphoreKHR( + cl_semaphore_khr sema_object) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainSemaphoreKHR( + cl_semaphore_khr sema_object) CL_API_SUFFIX__VERSION_1_2; + /********************************** * cl_arm_import_memory extension * **********************************/ @@ -719,6 +1567,12 @@ typedef intptr_t cl_import_properties_arm; /* Data consistency with host property */ #define CL_IMPORT_DMA_BUF_DATA_CONSISTENCY_WITH_HOST_ARM 0x41E3 +/* Index of plane in a multiplanar hardware buffer */ +#define CL_IMPORT_ANDROID_HARDWARE_BUFFER_PLANE_INDEX_ARM 0x41EF + +/* Index of layer in a multilayer hardware buffer */ +#define CL_IMPORT_ANDROID_HARDWARE_BUFFER_LAYER_INDEX_ARM 0x41F0 + /* Import memory size value to indicate a size for the whole buffer */ #define CL_IMPORT_MEMORY_WHOLE_ALLOCATION_ARM SIZE_MAX @@ -744,7 +1598,7 @@ clImportMemoryARM( cl_context context, const cl_import_properties_arm *properties, void *memory, size_t size, - cl_int *errcode_ret) CL_EXT_SUFFIX__VERSION_1_0; + cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; /****************************************** @@ -787,11 +1641,11 @@ extern CL_API_ENTRY void * CL_API_CALL clSVMAllocARM(cl_context context, cl_svm_mem_flags_arm flags, size_t size, - cl_uint alignment) CL_EXT_SUFFIX__VERSION_1_2; + cl_uint alignment) CL_API_SUFFIX__VERSION_1_2; extern CL_API_ENTRY void CL_API_CALL clSVMFreeARM(cl_context context, - void * svm_pointer) CL_EXT_SUFFIX__VERSION_1_2; + void * svm_pointer) CL_API_SUFFIX__VERSION_1_2; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueSVMFreeARM(cl_command_queue command_queue, @@ -804,7 +1658,7 @@ clEnqueueSVMFreeARM(cl_command_queue command_queue, void * user_data, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, - cl_event * event) CL_EXT_SUFFIX__VERSION_1_2; + cl_event * event) CL_API_SUFFIX__VERSION_1_2; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueSVMMemcpyARM(cl_command_queue command_queue, @@ -814,7 +1668,7 @@ clEnqueueSVMMemcpyARM(cl_command_queue command_queue, size_t size, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, - cl_event * event) CL_EXT_SUFFIX__VERSION_1_2; + cl_event * event) CL_API_SUFFIX__VERSION_1_2; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueSVMMemFillARM(cl_command_queue command_queue, @@ -824,7 +1678,7 @@ clEnqueueSVMMemFillARM(cl_command_queue command_queue, size_t size, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, - cl_event * event) CL_EXT_SUFFIX__VERSION_1_2; + cl_event * event) CL_API_SUFFIX__VERSION_1_2; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueSVMMapARM(cl_command_queue command_queue, @@ -834,25 +1688,25 @@ clEnqueueSVMMapARM(cl_command_queue command_queue, size_t size, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, - cl_event * event) CL_EXT_SUFFIX__VERSION_1_2; + cl_event * event) CL_API_SUFFIX__VERSION_1_2; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueSVMUnmapARM(cl_command_queue command_queue, void * svm_ptr, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, - cl_event * event) CL_EXT_SUFFIX__VERSION_1_2; + cl_event * event) CL_API_SUFFIX__VERSION_1_2; extern CL_API_ENTRY cl_int CL_API_CALL clSetKernelArgSVMPointerARM(cl_kernel kernel, cl_uint arg_index, - const void * arg_value) CL_EXT_SUFFIX__VERSION_1_2; + const void * arg_value) CL_API_SUFFIX__VERSION_1_2; extern CL_API_ENTRY cl_int CL_API_CALL clSetKernelExecInfoARM(cl_kernel kernel, cl_kernel_exec_info_arm param_name, size_t param_value_size, - const void * param_value) CL_EXT_SUFFIX__VERSION_1_2; + const void * param_value) CL_API_SUFFIX__VERSION_1_2; /******************************** * cl_arm_get_core_id extension * @@ -885,6 +1739,8 @@ clSetKernelExecInfoARM(cl_kernel kernel, #define cl_arm_scheduling_controls 1 +typedef cl_bitfield cl_device_scheduling_controls_capabilities_arm; + /* cl_device_info */ #define CL_DEVICE_SCHEDULING_CONTROLS_CAPABILITIES_ARM 0x41E4 @@ -892,15 +1748,889 @@ clSetKernelExecInfoARM(cl_kernel kernel, #define CL_DEVICE_SCHEDULING_WORKGROUP_BATCH_SIZE_ARM (1 << 1) #define CL_DEVICE_SCHEDULING_WORKGROUP_BATCH_SIZE_MODIFIER_ARM (1 << 2) #define CL_DEVICE_SCHEDULING_DEFERRED_FLUSH_ARM (1 << 3) +#define CL_DEVICE_SCHEDULING_REGISTER_ALLOCATION_ARM (1 << 4) +#define CL_DEVICE_SCHEDULING_WARP_THROTTLING_ARM (1 << 5) +#define CL_DEVICE_SCHEDULING_COMPUTE_UNIT_BATCH_QUEUE_SIZE_ARM (1 << 6) + +#define CL_DEVICE_SUPPORTED_REGISTER_ALLOCATIONS_ARM 0x41EB +#define CL_DEVICE_MAX_WARP_COUNT_ARM 0x41EA /* cl_kernel_info */ +#define CL_KERNEL_MAX_WARP_COUNT_ARM 0x41E9 + +/* cl_kernel_exec_info */ #define CL_KERNEL_EXEC_INFO_WORKGROUP_BATCH_SIZE_ARM 0x41E5 #define CL_KERNEL_EXEC_INFO_WORKGROUP_BATCH_SIZE_MODIFIER_ARM 0x41E6 +#define CL_KERNEL_EXEC_INFO_WARP_COUNT_LIMIT_ARM 0x41E8 +#define CL_KERNEL_EXEC_INFO_COMPUTE_UNIT_MAX_QUEUED_BATCHES_ARM 0x41F1 /* cl_queue_properties */ #define CL_QUEUE_KERNEL_BATCHING_ARM 0x41E7 #define CL_QUEUE_DEFERRED_FLUSH_ARM 0x41EC +/************************************** +* cl_arm_controlled_kernel_termination +***************************************/ + +#define cl_arm_controlled_kernel_termination 1 + +/* Error code to indicate kernel terminated with failure */ +#define CL_COMMAND_TERMINATED_ITSELF_WITH_FAILURE_ARM -1108 + +/* cl_device_info */ +#define CL_DEVICE_CONTROLLED_TERMINATION_CAPABILITIES_ARM 0x41EE + +/* Bit fields for controlled termination feature query */ +typedef cl_bitfield cl_device_controlled_termination_capabilities_arm; + +#define CL_DEVICE_CONTROLLED_TERMINATION_SUCCESS_ARM (1 << 0) +#define CL_DEVICE_CONTROLLED_TERMINATION_FAILURE_ARM (1 << 1) +#define CL_DEVICE_CONTROLLED_TERMINATION_QUERY_ARM (1 << 2) + +/* cl_event_info */ +#define CL_EVENT_COMMAND_TERMINATION_REASON_ARM 0x41ED + +/* Values returned for event termination reason query */ +typedef cl_uint cl_command_termination_reason_arm; + +#define CL_COMMAND_TERMINATION_COMPLETION_ARM 0 +#define CL_COMMAND_TERMINATION_CONTROLLED_SUCCESS_ARM 1 +#define CL_COMMAND_TERMINATION_CONTROLLED_FAILURE_ARM 2 +#define CL_COMMAND_TERMINATION_ERROR_ARM 3 + +/************************************* +* cl_arm_protected_memory_allocation * +*************************************/ + +#define cl_arm_protected_memory_allocation 1 + +#define CL_MEM_PROTECTED_ALLOC_ARM (1ULL << 36) + +/****************************************** +* cl_intel_exec_by_local_thread extension * +******************************************/ + +#define cl_intel_exec_by_local_thread 1 + +#define CL_QUEUE_THREAD_LOCAL_EXEC_ENABLE_INTEL (((cl_bitfield)1) << 31) + +/*************************************************************** +* cl_intel_device_attribute_query +***************************************************************/ + +#define cl_intel_device_attribute_query 1 + +typedef cl_bitfield cl_device_feature_capabilities_intel; + +/* cl_device_feature_capabilities_intel */ +#define CL_DEVICE_FEATURE_FLAG_DP4A_INTEL (1 << 0) +#define CL_DEVICE_FEATURE_FLAG_DPAS_INTEL (1 << 1) + +/* cl_device_info */ +#define CL_DEVICE_IP_VERSION_INTEL 0x4250 +#define CL_DEVICE_ID_INTEL 0x4251 +#define CL_DEVICE_NUM_SLICES_INTEL 0x4252 +#define CL_DEVICE_NUM_SUB_SLICES_PER_SLICE_INTEL 0x4253 +#define CL_DEVICE_NUM_EUS_PER_SUB_SLICE_INTEL 0x4254 +#define CL_DEVICE_NUM_THREADS_PER_EU_INTEL 0x4255 +#define CL_DEVICE_FEATURE_CAPABILITIES_INTEL 0x4256 + +/*********************************************** +* cl_intel_device_partition_by_names extension * +************************************************/ + +#define cl_intel_device_partition_by_names 1 + +#define CL_DEVICE_PARTITION_BY_NAMES_INTEL 0x4052 +#define CL_PARTITION_BY_NAMES_LIST_END_INTEL -1 + +/************************************************ +* cl_intel_accelerator extension * +* cl_intel_motion_estimation extension * +* cl_intel_advanced_motion_estimation extension * +*************************************************/ + +#define cl_intel_accelerator 1 +#define cl_intel_motion_estimation 1 +#define cl_intel_advanced_motion_estimation 1 + +typedef struct _cl_accelerator_intel* cl_accelerator_intel; +typedef cl_uint cl_accelerator_type_intel; +typedef cl_uint cl_accelerator_info_intel; + +typedef struct _cl_motion_estimation_desc_intel { + cl_uint mb_block_type; + cl_uint subpixel_mode; + cl_uint sad_adjust_mode; + cl_uint search_path_type; +} cl_motion_estimation_desc_intel; + +/* error codes */ +#define CL_INVALID_ACCELERATOR_INTEL -1094 +#define CL_INVALID_ACCELERATOR_TYPE_INTEL -1095 +#define CL_INVALID_ACCELERATOR_DESCRIPTOR_INTEL -1096 +#define CL_ACCELERATOR_TYPE_NOT_SUPPORTED_INTEL -1097 + +/* cl_accelerator_type_intel */ +#define CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL 0x0 + +/* cl_accelerator_info_intel */ +#define CL_ACCELERATOR_DESCRIPTOR_INTEL 0x4090 +#define CL_ACCELERATOR_REFERENCE_COUNT_INTEL 0x4091 +#define CL_ACCELERATOR_CONTEXT_INTEL 0x4092 +#define CL_ACCELERATOR_TYPE_INTEL 0x4093 + +/* cl_motion_detect_desc_intel flags */ +#define CL_ME_MB_TYPE_16x16_INTEL 0x0 +#define CL_ME_MB_TYPE_8x8_INTEL 0x1 +#define CL_ME_MB_TYPE_4x4_INTEL 0x2 + +#define CL_ME_SUBPIXEL_MODE_INTEGER_INTEL 0x0 +#define CL_ME_SUBPIXEL_MODE_HPEL_INTEL 0x1 +#define CL_ME_SUBPIXEL_MODE_QPEL_INTEL 0x2 + +#define CL_ME_SAD_ADJUST_MODE_NONE_INTEL 0x0 +#define CL_ME_SAD_ADJUST_MODE_HAAR_INTEL 0x1 + +#define CL_ME_SEARCH_PATH_RADIUS_2_2_INTEL 0x0 +#define CL_ME_SEARCH_PATH_RADIUS_4_4_INTEL 0x1 +#define CL_ME_SEARCH_PATH_RADIUS_16_12_INTEL 0x5 + +#define CL_ME_SKIP_BLOCK_TYPE_16x16_INTEL 0x0 +#define CL_ME_CHROMA_INTRA_PREDICT_ENABLED_INTEL 0x1 +#define CL_ME_LUMA_INTRA_PREDICT_ENABLED_INTEL 0x2 +#define CL_ME_SKIP_BLOCK_TYPE_8x8_INTEL 0x4 + +#define CL_ME_FORWARD_INPUT_MODE_INTEL 0x1 +#define CL_ME_BACKWARD_INPUT_MODE_INTEL 0x2 +#define CL_ME_BIDIRECTION_INPUT_MODE_INTEL 0x3 + +#define CL_ME_BIDIR_WEIGHT_QUARTER_INTEL 16 +#define CL_ME_BIDIR_WEIGHT_THIRD_INTEL 21 +#define CL_ME_BIDIR_WEIGHT_HALF_INTEL 32 +#define CL_ME_BIDIR_WEIGHT_TWO_THIRD_INTEL 43 +#define CL_ME_BIDIR_WEIGHT_THREE_QUARTER_INTEL 48 + +#define CL_ME_COST_PENALTY_NONE_INTEL 0x0 +#define CL_ME_COST_PENALTY_LOW_INTEL 0x1 +#define CL_ME_COST_PENALTY_NORMAL_INTEL 0x2 +#define CL_ME_COST_PENALTY_HIGH_INTEL 0x3 + +#define CL_ME_COST_PRECISION_QPEL_INTEL 0x0 +#define CL_ME_COST_PRECISION_HPEL_INTEL 0x1 +#define CL_ME_COST_PRECISION_PEL_INTEL 0x2 +#define CL_ME_COST_PRECISION_DPEL_INTEL 0x3 + +#define CL_ME_LUMA_PREDICTOR_MODE_VERTICAL_INTEL 0x0 +#define CL_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1 +#define CL_ME_LUMA_PREDICTOR_MODE_DC_INTEL 0x2 +#define CL_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_LEFT_INTEL 0x3 + +#define CL_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_RIGHT_INTEL 0x4 +#define CL_ME_LUMA_PREDICTOR_MODE_PLANE_INTEL 0x4 +#define CL_ME_LUMA_PREDICTOR_MODE_VERTICAL_RIGHT_INTEL 0x5 +#define CL_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_DOWN_INTEL 0x6 +#define CL_ME_LUMA_PREDICTOR_MODE_VERTICAL_LEFT_INTEL 0x7 +#define CL_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_UP_INTEL 0x8 + +#define CL_ME_CHROMA_PREDICTOR_MODE_DC_INTEL 0x0 +#define CL_ME_CHROMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1 +#define CL_ME_CHROMA_PREDICTOR_MODE_VERTICAL_INTEL 0x2 +#define CL_ME_CHROMA_PREDICTOR_MODE_PLANE_INTEL 0x3 + +/* cl_device_info */ +#define CL_DEVICE_ME_VERSION_INTEL 0x407E + +#define CL_ME_VERSION_LEGACY_INTEL 0x0 +#define CL_ME_VERSION_ADVANCED_VER_1_INTEL 0x1 +#define CL_ME_VERSION_ADVANCED_VER_2_INTEL 0x2 + +extern CL_API_ENTRY cl_accelerator_intel CL_API_CALL +clCreateAcceleratorINTEL( + cl_context context, + cl_accelerator_type_intel accelerator_type, + size_t descriptor_size, + const void* descriptor, + cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_2; + +typedef cl_accelerator_intel (CL_API_CALL *clCreateAcceleratorINTEL_fn)( + cl_context context, + cl_accelerator_type_intel accelerator_type, + size_t descriptor_size, + const void* descriptor, + cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetAcceleratorInfoINTEL( + cl_accelerator_intel accelerator, + cl_accelerator_info_intel param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret) CL_API_SUFFIX__VERSION_1_2; + +typedef cl_int (CL_API_CALL *clGetAcceleratorInfoINTEL_fn)( + cl_accelerator_intel accelerator, + cl_accelerator_info_intel param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clRetainAcceleratorINTEL( + cl_accelerator_intel accelerator) CL_API_SUFFIX__VERSION_1_2; + +typedef cl_int (CL_API_CALL *clRetainAcceleratorINTEL_fn)( + cl_accelerator_intel accelerator) CL_API_SUFFIX__VERSION_1_2; + +extern CL_API_ENTRY cl_int CL_API_CALL +clReleaseAcceleratorINTEL( + cl_accelerator_intel accelerator) CL_API_SUFFIX__VERSION_1_2; + +typedef cl_int (CL_API_CALL *clReleaseAcceleratorINTEL_fn)( + cl_accelerator_intel accelerator) CL_API_SUFFIX__VERSION_1_2; + +/****************************************** +* cl_intel_simultaneous_sharing extension * +*******************************************/ + +#define cl_intel_simultaneous_sharing 1 + +#define CL_DEVICE_SIMULTANEOUS_INTEROPS_INTEL 0x4104 +#define CL_DEVICE_NUM_SIMULTANEOUS_INTEROPS_INTEL 0x4105 + +/*********************************** +* cl_intel_egl_image_yuv extension * +************************************/ + +#define cl_intel_egl_image_yuv 1 + +#define CL_EGL_YUV_PLANE_INTEL 0x4107 + +/******************************** +* cl_intel_packed_yuv extension * +*********************************/ + +#define cl_intel_packed_yuv 1 + +#define CL_YUYV_INTEL 0x4076 +#define CL_UYVY_INTEL 0x4077 +#define CL_YVYU_INTEL 0x4078 +#define CL_VYUY_INTEL 0x4079 + +/******************************************** +* cl_intel_required_subgroup_size extension * +*********************************************/ + +#define cl_intel_required_subgroup_size 1 + +#define CL_DEVICE_SUB_GROUP_SIZES_INTEL 0x4108 +#define CL_KERNEL_SPILL_MEM_SIZE_INTEL 0x4109 +#define CL_KERNEL_COMPILE_SUB_GROUP_SIZE_INTEL 0x410A + +/**************************************** +* cl_intel_driver_diagnostics extension * +*****************************************/ + +#define cl_intel_driver_diagnostics 1 + +typedef cl_uint cl_diagnostics_verbose_level; + +#define CL_CONTEXT_SHOW_DIAGNOSTICS_INTEL 0x4106 + +#define CL_CONTEXT_DIAGNOSTICS_LEVEL_ALL_INTEL ( 0xff ) +#define CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL ( 1 ) +#define CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL ( 1 << 1 ) +#define CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL ( 1 << 2 ) + +/******************************** +* cl_intel_planar_yuv extension * +*********************************/ + +#define CL_NV12_INTEL 0x410E + +#define CL_MEM_NO_ACCESS_INTEL ( 1 << 24 ) +#define CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL ( 1 << 25 ) + +#define CL_DEVICE_PLANAR_YUV_MAX_WIDTH_INTEL 0x417E +#define CL_DEVICE_PLANAR_YUV_MAX_HEIGHT_INTEL 0x417F + +/******************************************************* +* cl_intel_device_side_avc_motion_estimation extension * +********************************************************/ + +#define CL_DEVICE_AVC_ME_VERSION_INTEL 0x410B +#define CL_DEVICE_AVC_ME_SUPPORTS_TEXTURE_SAMPLER_USE_INTEL 0x410C +#define CL_DEVICE_AVC_ME_SUPPORTS_PREEMPTION_INTEL 0x410D + +#define CL_AVC_ME_VERSION_0_INTEL 0x0 /* No support. */ +#define CL_AVC_ME_VERSION_1_INTEL 0x1 /* First supported version. */ + +#define CL_AVC_ME_MAJOR_16x16_INTEL 0x0 +#define CL_AVC_ME_MAJOR_16x8_INTEL 0x1 +#define CL_AVC_ME_MAJOR_8x16_INTEL 0x2 +#define CL_AVC_ME_MAJOR_8x8_INTEL 0x3 + +#define CL_AVC_ME_MINOR_8x8_INTEL 0x0 +#define CL_AVC_ME_MINOR_8x4_INTEL 0x1 +#define CL_AVC_ME_MINOR_4x8_INTEL 0x2 +#define CL_AVC_ME_MINOR_4x4_INTEL 0x3 + +#define CL_AVC_ME_MAJOR_FORWARD_INTEL 0x0 +#define CL_AVC_ME_MAJOR_BACKWARD_INTEL 0x1 +#define CL_AVC_ME_MAJOR_BIDIRECTIONAL_INTEL 0x2 + +#define CL_AVC_ME_PARTITION_MASK_ALL_INTEL 0x0 +#define CL_AVC_ME_PARTITION_MASK_16x16_INTEL 0x7E +#define CL_AVC_ME_PARTITION_MASK_16x8_INTEL 0x7D +#define CL_AVC_ME_PARTITION_MASK_8x16_INTEL 0x7B +#define CL_AVC_ME_PARTITION_MASK_8x8_INTEL 0x77 +#define CL_AVC_ME_PARTITION_MASK_8x4_INTEL 0x6F +#define CL_AVC_ME_PARTITION_MASK_4x8_INTEL 0x5F +#define CL_AVC_ME_PARTITION_MASK_4x4_INTEL 0x3F + +#define CL_AVC_ME_SEARCH_WINDOW_EXHAUSTIVE_INTEL 0x0 +#define CL_AVC_ME_SEARCH_WINDOW_SMALL_INTEL 0x1 +#define CL_AVC_ME_SEARCH_WINDOW_TINY_INTEL 0x2 +#define CL_AVC_ME_SEARCH_WINDOW_EXTRA_TINY_INTEL 0x3 +#define CL_AVC_ME_SEARCH_WINDOW_DIAMOND_INTEL 0x4 +#define CL_AVC_ME_SEARCH_WINDOW_LARGE_DIAMOND_INTEL 0x5 +#define CL_AVC_ME_SEARCH_WINDOW_RESERVED0_INTEL 0x6 +#define CL_AVC_ME_SEARCH_WINDOW_RESERVED1_INTEL 0x7 +#define CL_AVC_ME_SEARCH_WINDOW_CUSTOM_INTEL 0x8 +#define CL_AVC_ME_SEARCH_WINDOW_16x12_RADIUS_INTEL 0x9 +#define CL_AVC_ME_SEARCH_WINDOW_4x4_RADIUS_INTEL 0x2 +#define CL_AVC_ME_SEARCH_WINDOW_2x2_RADIUS_INTEL 0xa + +#define CL_AVC_ME_SAD_ADJUST_MODE_NONE_INTEL 0x0 +#define CL_AVC_ME_SAD_ADJUST_MODE_HAAR_INTEL 0x2 + +#define CL_AVC_ME_SUBPIXEL_MODE_INTEGER_INTEL 0x0 +#define CL_AVC_ME_SUBPIXEL_MODE_HPEL_INTEL 0x1 +#define CL_AVC_ME_SUBPIXEL_MODE_QPEL_INTEL 0x3 + +#define CL_AVC_ME_COST_PRECISION_QPEL_INTEL 0x0 +#define CL_AVC_ME_COST_PRECISION_HPEL_INTEL 0x1 +#define CL_AVC_ME_COST_PRECISION_PEL_INTEL 0x2 +#define CL_AVC_ME_COST_PRECISION_DPEL_INTEL 0x3 + +#define CL_AVC_ME_BIDIR_WEIGHT_QUARTER_INTEL 0x10 +#define CL_AVC_ME_BIDIR_WEIGHT_THIRD_INTEL 0x15 +#define CL_AVC_ME_BIDIR_WEIGHT_HALF_INTEL 0x20 +#define CL_AVC_ME_BIDIR_WEIGHT_TWO_THIRD_INTEL 0x2B +#define CL_AVC_ME_BIDIR_WEIGHT_THREE_QUARTER_INTEL 0x30 + +#define CL_AVC_ME_BORDER_REACHED_LEFT_INTEL 0x0 +#define CL_AVC_ME_BORDER_REACHED_RIGHT_INTEL 0x2 +#define CL_AVC_ME_BORDER_REACHED_TOP_INTEL 0x4 +#define CL_AVC_ME_BORDER_REACHED_BOTTOM_INTEL 0x8 + +#define CL_AVC_ME_SKIP_BLOCK_PARTITION_16x16_INTEL 0x0 +#define CL_AVC_ME_SKIP_BLOCK_PARTITION_8x8_INTEL 0x4000 + +#define CL_AVC_ME_SKIP_BLOCK_16x16_FORWARD_ENABLE_INTEL ( 0x1 << 24 ) +#define CL_AVC_ME_SKIP_BLOCK_16x16_BACKWARD_ENABLE_INTEL ( 0x2 << 24 ) +#define CL_AVC_ME_SKIP_BLOCK_16x16_DUAL_ENABLE_INTEL ( 0x3 << 24 ) +#define CL_AVC_ME_SKIP_BLOCK_8x8_FORWARD_ENABLE_INTEL ( 0x55 << 24 ) +#define CL_AVC_ME_SKIP_BLOCK_8x8_BACKWARD_ENABLE_INTEL ( 0xAA << 24 ) +#define CL_AVC_ME_SKIP_BLOCK_8x8_DUAL_ENABLE_INTEL ( 0xFF << 24 ) +#define CL_AVC_ME_SKIP_BLOCK_8x8_0_FORWARD_ENABLE_INTEL ( 0x1 << 24 ) +#define CL_AVC_ME_SKIP_BLOCK_8x8_0_BACKWARD_ENABLE_INTEL ( 0x2 << 24 ) +#define CL_AVC_ME_SKIP_BLOCK_8x8_1_FORWARD_ENABLE_INTEL ( 0x1 << 26 ) +#define CL_AVC_ME_SKIP_BLOCK_8x8_1_BACKWARD_ENABLE_INTEL ( 0x2 << 26 ) +#define CL_AVC_ME_SKIP_BLOCK_8x8_2_FORWARD_ENABLE_INTEL ( 0x1 << 28 ) +#define CL_AVC_ME_SKIP_BLOCK_8x8_2_BACKWARD_ENABLE_INTEL ( 0x2 << 28 ) +#define CL_AVC_ME_SKIP_BLOCK_8x8_3_FORWARD_ENABLE_INTEL ( 0x1 << 30 ) +#define CL_AVC_ME_SKIP_BLOCK_8x8_3_BACKWARD_ENABLE_INTEL ( 0x2 << 30 ) + +#define CL_AVC_ME_BLOCK_BASED_SKIP_4x4_INTEL 0x00 +#define CL_AVC_ME_BLOCK_BASED_SKIP_8x8_INTEL 0x80 + +#define CL_AVC_ME_INTRA_16x16_INTEL 0x0 +#define CL_AVC_ME_INTRA_8x8_INTEL 0x1 +#define CL_AVC_ME_INTRA_4x4_INTEL 0x2 + +#define CL_AVC_ME_INTRA_LUMA_PARTITION_MASK_16x16_INTEL 0x6 +#define CL_AVC_ME_INTRA_LUMA_PARTITION_MASK_8x8_INTEL 0x5 +#define CL_AVC_ME_INTRA_LUMA_PARTITION_MASK_4x4_INTEL 0x3 + +#define CL_AVC_ME_INTRA_NEIGHBOR_LEFT_MASK_ENABLE_INTEL 0x60 +#define CL_AVC_ME_INTRA_NEIGHBOR_UPPER_MASK_ENABLE_INTEL 0x10 +#define CL_AVC_ME_INTRA_NEIGHBOR_UPPER_RIGHT_MASK_ENABLE_INTEL 0x8 +#define CL_AVC_ME_INTRA_NEIGHBOR_UPPER_LEFT_MASK_ENABLE_INTEL 0x4 + +#define CL_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_INTEL 0x0 +#define CL_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1 +#define CL_AVC_ME_LUMA_PREDICTOR_MODE_DC_INTEL 0x2 +#define CL_AVC_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_LEFT_INTEL 0x3 +#define CL_AVC_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_RIGHT_INTEL 0x4 +#define CL_AVC_ME_LUMA_PREDICTOR_MODE_PLANE_INTEL 0x4 +#define CL_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_RIGHT_INTEL 0x5 +#define CL_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_DOWN_INTEL 0x6 +#define CL_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_LEFT_INTEL 0x7 +#define CL_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_UP_INTEL 0x8 +#define CL_AVC_ME_CHROMA_PREDICTOR_MODE_DC_INTEL 0x0 +#define CL_AVC_ME_CHROMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1 +#define CL_AVC_ME_CHROMA_PREDICTOR_MODE_VERTICAL_INTEL 0x2 +#define CL_AVC_ME_CHROMA_PREDICTOR_MODE_PLANE_INTEL 0x3 + +#define CL_AVC_ME_FRAME_FORWARD_INTEL 0x1 +#define CL_AVC_ME_FRAME_BACKWARD_INTEL 0x2 +#define CL_AVC_ME_FRAME_DUAL_INTEL 0x3 + +#define CL_AVC_ME_SLICE_TYPE_PRED_INTEL 0x0 +#define CL_AVC_ME_SLICE_TYPE_BPRED_INTEL 0x1 +#define CL_AVC_ME_SLICE_TYPE_INTRA_INTEL 0x2 + +#define CL_AVC_ME_INTERLACED_SCAN_TOP_FIELD_INTEL 0x0 +#define CL_AVC_ME_INTERLACED_SCAN_BOTTOM_FIELD_INTEL 0x1 + +/******************************************* +* cl_intel_unified_shared_memory extension * +********************************************/ +#define cl_intel_unified_shared_memory 1 + +typedef cl_bitfield cl_device_unified_shared_memory_capabilities_intel; +typedef cl_properties cl_mem_properties_intel; +typedef cl_bitfield cl_mem_alloc_flags_intel; +typedef cl_uint cl_mem_info_intel; +typedef cl_uint cl_unified_shared_memory_type_intel; +typedef cl_uint cl_mem_advice_intel; + +/* cl_device_info */ +#define CL_DEVICE_HOST_MEM_CAPABILITIES_INTEL 0x4190 +#define CL_DEVICE_DEVICE_MEM_CAPABILITIES_INTEL 0x4191 +#define CL_DEVICE_SINGLE_DEVICE_SHARED_MEM_CAPABILITIES_INTEL 0x4192 +#define CL_DEVICE_CROSS_DEVICE_SHARED_MEM_CAPABILITIES_INTEL 0x4193 +#define CL_DEVICE_SHARED_SYSTEM_MEM_CAPABILITIES_INTEL 0x4194 + +/* cl_device_unified_shared_memory_capabilities_intel - bitfield */ +#define CL_UNIFIED_SHARED_MEMORY_ACCESS_INTEL (1 << 0) +#define CL_UNIFIED_SHARED_MEMORY_ATOMIC_ACCESS_INTEL (1 << 1) +#define CL_UNIFIED_SHARED_MEMORY_CONCURRENT_ACCESS_INTEL (1 << 2) +#define CL_UNIFIED_SHARED_MEMORY_CONCURRENT_ATOMIC_ACCESS_INTEL (1 << 3) + +/* cl_mem_properties_intel */ +#define CL_MEM_ALLOC_FLAGS_INTEL 0x4195 + +/* cl_mem_alloc_flags_intel - bitfield */ +#define CL_MEM_ALLOC_WRITE_COMBINED_INTEL (1 << 0) +#define CL_MEM_ALLOC_INITIAL_PLACEMENT_DEVICE_INTEL (1 << 1) +#define CL_MEM_ALLOC_INITIAL_PLACEMENT_HOST_INTEL (1 << 2) + +/* cl_mem_alloc_info_intel */ +#define CL_MEM_ALLOC_TYPE_INTEL 0x419A +#define CL_MEM_ALLOC_BASE_PTR_INTEL 0x419B +#define CL_MEM_ALLOC_SIZE_INTEL 0x419C +#define CL_MEM_ALLOC_DEVICE_INTEL 0x419D + +/* cl_unified_shared_memory_type_intel */ +#define CL_MEM_TYPE_UNKNOWN_INTEL 0x4196 +#define CL_MEM_TYPE_HOST_INTEL 0x4197 +#define CL_MEM_TYPE_DEVICE_INTEL 0x4198 +#define CL_MEM_TYPE_SHARED_INTEL 0x4199 + +/* cl_kernel_exec_info */ +#define CL_KERNEL_EXEC_INFO_INDIRECT_HOST_ACCESS_INTEL 0x4200 +#define CL_KERNEL_EXEC_INFO_INDIRECT_DEVICE_ACCESS_INTEL 0x4201 +#define CL_KERNEL_EXEC_INFO_INDIRECT_SHARED_ACCESS_INTEL 0x4202 +#define CL_KERNEL_EXEC_INFO_USM_PTRS_INTEL 0x4203 + +/* cl_command_type */ +#define CL_COMMAND_MEMFILL_INTEL 0x4204 +#define CL_COMMAND_MEMCPY_INTEL 0x4205 +#define CL_COMMAND_MIGRATEMEM_INTEL 0x4206 +#define CL_COMMAND_MEMADVISE_INTEL 0x4207 + + +typedef void* (CL_API_CALL * +clHostMemAllocINTEL_fn)( + cl_context context, + const cl_mem_properties_intel* properties, + size_t size, + cl_uint alignment, + cl_int* errcode_ret) ; + +typedef void* (CL_API_CALL * +clDeviceMemAllocINTEL_fn)( + cl_context context, + cl_device_id device, + const cl_mem_properties_intel* properties, + size_t size, + cl_uint alignment, + cl_int* errcode_ret) ; + +typedef void* (CL_API_CALL * +clSharedMemAllocINTEL_fn)( + cl_context context, + cl_device_id device, + const cl_mem_properties_intel* properties, + size_t size, + cl_uint alignment, + cl_int* errcode_ret) ; + +typedef cl_int (CL_API_CALL * +clMemFreeINTEL_fn)( + cl_context context, + void* ptr) ; + +typedef cl_int (CL_API_CALL * +clMemBlockingFreeINTEL_fn)( + cl_context context, + void* ptr) ; + +typedef cl_int (CL_API_CALL * +clGetMemAllocInfoINTEL_fn)( + cl_context context, + const void* ptr, + cl_mem_info_intel param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret) ; + +typedef cl_int (CL_API_CALL * +clSetKernelArgMemPointerINTEL_fn)( + cl_kernel kernel, + cl_uint arg_index, + const void* arg_value) ; + +typedef cl_int (CL_API_CALL * +clEnqueueMemFillINTEL_fn)( + cl_command_queue command_queue, + void* dst_ptr, + const void* pattern, + size_t pattern_size, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) ; + +typedef cl_int (CL_API_CALL * +clEnqueueMemcpyINTEL_fn)( + cl_command_queue command_queue, + cl_bool blocking, + void* dst_ptr, + const void* src_ptr, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) ; + +typedef cl_int (CL_API_CALL * +clEnqueueMemAdviseINTEL_fn)( + cl_command_queue command_queue, + const void* ptr, + size_t size, + cl_mem_advice_intel advice, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) ; + +#ifndef CL_NO_PROTOTYPES + +extern CL_API_ENTRY void* CL_API_CALL +clHostMemAllocINTEL( + cl_context context, + const cl_mem_properties_intel* properties, + size_t size, + cl_uint alignment, + cl_int* errcode_ret) ; + +extern CL_API_ENTRY void* CL_API_CALL +clDeviceMemAllocINTEL( + cl_context context, + cl_device_id device, + const cl_mem_properties_intel* properties, + size_t size, + cl_uint alignment, + cl_int* errcode_ret) ; + +extern CL_API_ENTRY void* CL_API_CALL +clSharedMemAllocINTEL( + cl_context context, + cl_device_id device, + const cl_mem_properties_intel* properties, + size_t size, + cl_uint alignment, + cl_int* errcode_ret) ; + +extern CL_API_ENTRY cl_int CL_API_CALL +clMemFreeINTEL( + cl_context context, + void* ptr) ; + +extern CL_API_ENTRY cl_int CL_API_CALL +clMemBlockingFreeINTEL( + cl_context context, + void* ptr) ; + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetMemAllocInfoINTEL( + cl_context context, + const void* ptr, + cl_mem_info_intel param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret) ; + +extern CL_API_ENTRY cl_int CL_API_CALL +clSetKernelArgMemPointerINTEL( + cl_kernel kernel, + cl_uint arg_index, + const void* arg_value) ; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueMemFillINTEL( + cl_command_queue command_queue, + void* dst_ptr, + const void* pattern, + size_t pattern_size, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) ; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueMemcpyINTEL( + cl_command_queue command_queue, + cl_bool blocking, + void* dst_ptr, + const void* src_ptr, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) ; + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueMemAdviseINTEL( + cl_command_queue command_queue, + const void* ptr, + size_t size, + cl_mem_advice_intel advice, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) ; + +#endif /* CL_NO_PROTOTYPES */ + +#if defined(CL_VERSION_1_2) +/* Requires OpenCL 1.2 for cl_mem_migration_flags: */ + +typedef cl_int (CL_API_CALL * +clEnqueueMigrateMemINTEL_fn)( + cl_command_queue command_queue, + const void* ptr, + size_t size, + cl_mem_migration_flags flags, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) ; + +#ifndef CL_NO_PROTOTYPES + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueMigrateMemINTEL( + cl_command_queue command_queue, + const void* ptr, + size_t size, + cl_mem_migration_flags flags, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) ; + +#endif /* CL_NO_PROTOTYPES */ + +#endif /* defined(CL_VERSION_1_2) */ + +/* deprecated, use clEnqueueMemFillINTEL instead */ + +typedef cl_int (CL_API_CALL * +clEnqueueMemsetINTEL_fn)( + cl_command_queue command_queue, + void* dst_ptr, + cl_int value, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) ; + +#ifndef CL_NO_PROTOTYPES + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueMemsetINTEL( + cl_command_queue command_queue, + void* dst_ptr, + cl_int value, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, + cl_event* event) ; + +#endif /* CL_NO_PROTOTYPES */ + +/*************************************************************** +* cl_intel_mem_alloc_buffer_location +***************************************************************/ +#define cl_intel_mem_alloc_buffer_location 1 +#define CL_INTEL_MEM_ALLOC_BUFFER_LOCATION_EXTENSION_NAME \ + "cl_intel_mem_alloc_buffer_location" + +/* cl_mem_properties_intel */ +#define CL_MEM_ALLOC_BUFFER_LOCATION_INTEL 0x419E + +/* cl_mem_alloc_info_intel */ +/* enum CL_MEM_ALLOC_BUFFER_LOCATION_INTEL */ + +/*************************************************** +* cl_intel_create_buffer_with_properties extension * +****************************************************/ + +#define cl_intel_create_buffer_with_properties 1 + +extern CL_API_ENTRY cl_mem CL_API_CALL +clCreateBufferWithPropertiesINTEL( + cl_context context, + const cl_mem_properties_intel* properties, + cl_mem_flags flags, + size_t size, + void * host_ptr, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +typedef cl_mem (CL_API_CALL * +clCreateBufferWithPropertiesINTEL_fn)( + cl_context context, + const cl_mem_properties_intel* properties, + cl_mem_flags flags, + size_t size, + void * host_ptr, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0; + +/****************************************** +* cl_intel_mem_channel_property extension * +*******************************************/ + +#define CL_MEM_CHANNEL_INTEL 0x4213 + +/********************************* +* cl_intel_mem_force_host_memory * +**********************************/ + +#define cl_intel_mem_force_host_memory 1 + +/* cl_mem_flags */ +#define CL_MEM_FORCE_HOST_MEMORY_INTEL (1 << 20) + +/*************************************************************** +* cl_intel_command_queue_families +***************************************************************/ +#define cl_intel_command_queue_families 1 + +typedef cl_bitfield cl_command_queue_capabilities_intel; + +#define CL_QUEUE_FAMILY_MAX_NAME_SIZE_INTEL 64 + +typedef struct _cl_queue_family_properties_intel { + cl_command_queue_properties properties; + cl_command_queue_capabilities_intel capabilities; + cl_uint count; + char name[CL_QUEUE_FAMILY_MAX_NAME_SIZE_INTEL]; +} cl_queue_family_properties_intel; + +/* cl_device_info */ +#define CL_DEVICE_QUEUE_FAMILY_PROPERTIES_INTEL 0x418B + +/* cl_queue_properties */ +#define CL_QUEUE_FAMILY_INTEL 0x418C +#define CL_QUEUE_INDEX_INTEL 0x418D + +/* cl_command_queue_capabilities_intel */ +#define CL_QUEUE_DEFAULT_CAPABILITIES_INTEL 0 +#define CL_QUEUE_CAPABILITY_CREATE_SINGLE_QUEUE_EVENTS_INTEL (1 << 0) +#define CL_QUEUE_CAPABILITY_CREATE_CROSS_QUEUE_EVENTS_INTEL (1 << 1) +#define CL_QUEUE_CAPABILITY_SINGLE_QUEUE_EVENT_WAIT_LIST_INTEL (1 << 2) +#define CL_QUEUE_CAPABILITY_CROSS_QUEUE_EVENT_WAIT_LIST_INTEL (1 << 3) +#define CL_QUEUE_CAPABILITY_TRANSFER_BUFFER_INTEL (1 << 8) +#define CL_QUEUE_CAPABILITY_TRANSFER_BUFFER_RECT_INTEL (1 << 9) +#define CL_QUEUE_CAPABILITY_MAP_BUFFER_INTEL (1 << 10) +#define CL_QUEUE_CAPABILITY_FILL_BUFFER_INTEL (1 << 11) +#define CL_QUEUE_CAPABILITY_TRANSFER_IMAGE_INTEL (1 << 12) +#define CL_QUEUE_CAPABILITY_MAP_IMAGE_INTEL (1 << 13) +#define CL_QUEUE_CAPABILITY_FILL_IMAGE_INTEL (1 << 14) +#define CL_QUEUE_CAPABILITY_TRANSFER_BUFFER_IMAGE_INTEL (1 << 15) +#define CL_QUEUE_CAPABILITY_TRANSFER_IMAGE_BUFFER_INTEL (1 << 16) +#define CL_QUEUE_CAPABILITY_MARKER_INTEL (1 << 24) +#define CL_QUEUE_CAPABILITY_BARRIER_INTEL (1 << 25) +#define CL_QUEUE_CAPABILITY_KERNEL_INTEL (1 << 26) + +/*************************************************************** +* cl_intel_queue_no_sync_operations +***************************************************************/ + +#define cl_intel_queue_no_sync_operations 1 + +/* addition to cl_command_queue_properties */ +#define CL_QUEUE_NO_SYNC_OPERATIONS_INTEL (1 << 29) + +/*************************************************************** +* cl_intel_sharing_format_query +***************************************************************/ +#define cl_intel_sharing_format_query 1 + +/*************************************************************** +* cl_ext_image_requirements_info +***************************************************************/ + +#ifdef CL_VERSION_3_0 + +#define cl_ext_image_requirements_info 1 + +typedef cl_uint cl_image_requirements_info_ext; + +#define CL_IMAGE_REQUIREMENTS_ROW_PITCH_ALIGNMENT_EXT 0x1290 +#define CL_IMAGE_REQUIREMENTS_BASE_ADDRESS_ALIGNMENT_EXT 0x1292 +#define CL_IMAGE_REQUIREMENTS_SIZE_EXT 0x12B2 +#define CL_IMAGE_REQUIREMENTS_MAX_WIDTH_EXT 0x12B3 +#define CL_IMAGE_REQUIREMENTS_MAX_HEIGHT_EXT 0x12B4 +#define CL_IMAGE_REQUIREMENTS_MAX_DEPTH_EXT 0x12B5 +#define CL_IMAGE_REQUIREMENTS_MAX_ARRAY_SIZE_EXT 0x12B6 + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetImageRequirementsInfoEXT( + cl_context context, + const cl_mem_properties* properties, + cl_mem_flags flags, + const cl_image_format* image_format, + const cl_image_desc* image_desc, + cl_image_requirements_info_ext param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret) CL_API_SUFFIX__VERSION_3_0; + +typedef cl_int (CL_API_CALL * +clGetImageRequirementsInfoEXT_fn)( + cl_context context, + const cl_mem_properties* properties, + cl_mem_flags flags, + const cl_image_format* image_format, + const cl_image_desc* image_desc, + cl_image_requirements_info_ext param_name, + size_t param_value_size, + void* param_value, + size_t* param_value_size_ret) CL_API_SUFFIX__VERSION_3_0; + +#endif + +/*************************************************************** +* cl_ext_image_from_buffer +***************************************************************/ + +#ifdef CL_VERSION_3_0 + +#define cl_ext_image_from_buffer 1 + +#define CL_IMAGE_REQUIREMENTS_SLICE_PITCH_ALIGNMENT_EXT 0x1291 + +#endif + #ifdef __cplusplus } #endif diff --git a/include/CL/cl_ext_intel.h b/include/CL/cl_ext_intel.h index aab82284c..a7ae87a34 100644 --- a/include/CL/cl_ext_intel.h +++ b/include/CL/cl_ext_intel.h @@ -14,718 +14,6 @@ * limitations under the License. * ******************************************************************************/ -/*****************************************************************************\ -Copyright (c) 2013-2020 Intel Corporation All Rights Reserved. - -THESE MATERIALS ARE PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS -CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY -OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING -NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THESE -MATERIALS, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -File Name: cl_ext_intel.h - -Abstract: - -Notes: - -\*****************************************************************************/ - -#ifndef __CL_EXT_INTEL_H -#define __CL_EXT_INTEL_H - -#include -#include - -#ifdef __cplusplus -extern "C" { -#endif - -/*************************************** -* cl_intel_thread_local_exec extension * -****************************************/ - -#define cl_intel_thread_local_exec 1 - -#define CL_QUEUE_THREAD_LOCAL_EXEC_ENABLE_INTEL (((cl_bitfield)1) << 31) - -/*********************************************** -* cl_intel_device_partition_by_names extension * -************************************************/ - -#define cl_intel_device_partition_by_names 1 - -#define CL_DEVICE_PARTITION_BY_NAMES_INTEL 0x4052 -#define CL_PARTITION_BY_NAMES_LIST_END_INTEL -1 - -/************************************************ -* cl_intel_accelerator extension * -* cl_intel_motion_estimation extension * -* cl_intel_advanced_motion_estimation extension * -*************************************************/ - -#define cl_intel_accelerator 1 -#define cl_intel_motion_estimation 1 -#define cl_intel_advanced_motion_estimation 1 - -typedef struct _cl_accelerator_intel* cl_accelerator_intel; -typedef cl_uint cl_accelerator_type_intel; -typedef cl_uint cl_accelerator_info_intel; - -typedef struct _cl_motion_estimation_desc_intel { - cl_uint mb_block_type; - cl_uint subpixel_mode; - cl_uint sad_adjust_mode; - cl_uint search_path_type; -} cl_motion_estimation_desc_intel; - -/* error codes */ -#define CL_INVALID_ACCELERATOR_INTEL -1094 -#define CL_INVALID_ACCELERATOR_TYPE_INTEL -1095 -#define CL_INVALID_ACCELERATOR_DESCRIPTOR_INTEL -1096 -#define CL_ACCELERATOR_TYPE_NOT_SUPPORTED_INTEL -1097 - -/* cl_accelerator_type_intel */ -#define CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL 0x0 - -/* cl_accelerator_info_intel */ -#define CL_ACCELERATOR_DESCRIPTOR_INTEL 0x4090 -#define CL_ACCELERATOR_REFERENCE_COUNT_INTEL 0x4091 -#define CL_ACCELERATOR_CONTEXT_INTEL 0x4092 -#define CL_ACCELERATOR_TYPE_INTEL 0x4093 - -/* cl_motion_detect_desc_intel flags */ -#define CL_ME_MB_TYPE_16x16_INTEL 0x0 -#define CL_ME_MB_TYPE_8x8_INTEL 0x1 -#define CL_ME_MB_TYPE_4x4_INTEL 0x2 - -#define CL_ME_SUBPIXEL_MODE_INTEGER_INTEL 0x0 -#define CL_ME_SUBPIXEL_MODE_HPEL_INTEL 0x1 -#define CL_ME_SUBPIXEL_MODE_QPEL_INTEL 0x2 - -#define CL_ME_SAD_ADJUST_MODE_NONE_INTEL 0x0 -#define CL_ME_SAD_ADJUST_MODE_HAAR_INTEL 0x1 - -#define CL_ME_SEARCH_PATH_RADIUS_2_2_INTEL 0x0 -#define CL_ME_SEARCH_PATH_RADIUS_4_4_INTEL 0x1 -#define CL_ME_SEARCH_PATH_RADIUS_16_12_INTEL 0x5 - -#define CL_ME_SKIP_BLOCK_TYPE_16x16_INTEL 0x0 -#define CL_ME_CHROMA_INTRA_PREDICT_ENABLED_INTEL 0x1 -#define CL_ME_LUMA_INTRA_PREDICT_ENABLED_INTEL 0x2 -#define CL_ME_SKIP_BLOCK_TYPE_8x8_INTEL 0x4 - -#define CL_ME_FORWARD_INPUT_MODE_INTEL 0x1 -#define CL_ME_BACKWARD_INPUT_MODE_INTEL 0x2 -#define CL_ME_BIDIRECTION_INPUT_MODE_INTEL 0x3 - -#define CL_ME_BIDIR_WEIGHT_QUARTER_INTEL 16 -#define CL_ME_BIDIR_WEIGHT_THIRD_INTEL 21 -#define CL_ME_BIDIR_WEIGHT_HALF_INTEL 32 -#define CL_ME_BIDIR_WEIGHT_TWO_THIRD_INTEL 43 -#define CL_ME_BIDIR_WEIGHT_THREE_QUARTER_INTEL 48 - -#define CL_ME_COST_PENALTY_NONE_INTEL 0x0 -#define CL_ME_COST_PENALTY_LOW_INTEL 0x1 -#define CL_ME_COST_PENALTY_NORMAL_INTEL 0x2 -#define CL_ME_COST_PENALTY_HIGH_INTEL 0x3 - -#define CL_ME_COST_PRECISION_QPEL_INTEL 0x0 -#define CL_ME_COST_PRECISION_HPEL_INTEL 0x1 -#define CL_ME_COST_PRECISION_PEL_INTEL 0x2 -#define CL_ME_COST_PRECISION_DPEL_INTEL 0x3 - -#define CL_ME_LUMA_PREDICTOR_MODE_VERTICAL_INTEL 0x0 -#define CL_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1 -#define CL_ME_LUMA_PREDICTOR_MODE_DC_INTEL 0x2 -#define CL_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_LEFT_INTEL 0x3 - -#define CL_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_RIGHT_INTEL 0x4 -#define CL_ME_LUMA_PREDICTOR_MODE_PLANE_INTEL 0x4 -#define CL_ME_LUMA_PREDICTOR_MODE_VERTICAL_RIGHT_INTEL 0x5 -#define CL_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_DOWN_INTEL 0x6 -#define CL_ME_LUMA_PREDICTOR_MODE_VERTICAL_LEFT_INTEL 0x7 -#define CL_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_UP_INTEL 0x8 - -#define CL_ME_CHROMA_PREDICTOR_MODE_DC_INTEL 0x0 -#define CL_ME_CHROMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1 -#define CL_ME_CHROMA_PREDICTOR_MODE_VERTICAL_INTEL 0x2 -#define CL_ME_CHROMA_PREDICTOR_MODE_PLANE_INTEL 0x3 - -/* cl_device_info */ -#define CL_DEVICE_ME_VERSION_INTEL 0x407E - -#define CL_ME_VERSION_LEGACY_INTEL 0x0 -#define CL_ME_VERSION_ADVANCED_VER_1_INTEL 0x1 -#define CL_ME_VERSION_ADVANCED_VER_2_INTEL 0x2 - -extern CL_API_ENTRY cl_accelerator_intel CL_API_CALL -clCreateAcceleratorINTEL( - cl_context context, - cl_accelerator_type_intel accelerator_type, - size_t descriptor_size, - const void* descriptor, - cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_2; - -typedef CL_API_ENTRY cl_accelerator_intel (CL_API_CALL *clCreateAcceleratorINTEL_fn)( - cl_context context, - cl_accelerator_type_intel accelerator_type, - size_t descriptor_size, - const void* descriptor, - cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_2; - -extern CL_API_ENTRY cl_int CL_API_CALL -clGetAcceleratorInfoINTEL( - cl_accelerator_intel accelerator, - cl_accelerator_info_intel param_name, - size_t param_value_size, - void* param_value, - size_t* param_value_size_ret) CL_EXT_SUFFIX__VERSION_1_2; - -typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetAcceleratorInfoINTEL_fn)( - cl_accelerator_intel accelerator, - cl_accelerator_info_intel param_name, - size_t param_value_size, - void* param_value, - size_t* param_value_size_ret) CL_EXT_SUFFIX__VERSION_1_2; - -extern CL_API_ENTRY cl_int CL_API_CALL -clRetainAcceleratorINTEL( - cl_accelerator_intel accelerator) CL_EXT_SUFFIX__VERSION_1_2; - -typedef CL_API_ENTRY cl_int (CL_API_CALL *clRetainAcceleratorINTEL_fn)( - cl_accelerator_intel accelerator) CL_EXT_SUFFIX__VERSION_1_2; - -extern CL_API_ENTRY cl_int CL_API_CALL -clReleaseAcceleratorINTEL( - cl_accelerator_intel accelerator) CL_EXT_SUFFIX__VERSION_1_2; - -typedef CL_API_ENTRY cl_int (CL_API_CALL *clReleaseAcceleratorINTEL_fn)( - cl_accelerator_intel accelerator) CL_EXT_SUFFIX__VERSION_1_2; - -/****************************************** -* cl_intel_simultaneous_sharing extension * -*******************************************/ - -#define cl_intel_simultaneous_sharing 1 - -#define CL_DEVICE_SIMULTANEOUS_INTEROPS_INTEL 0x4104 -#define CL_DEVICE_NUM_SIMULTANEOUS_INTEROPS_INTEL 0x4105 - -/*********************************** -* cl_intel_egl_image_yuv extension * -************************************/ - -#define cl_intel_egl_image_yuv 1 - -#define CL_EGL_YUV_PLANE_INTEL 0x4107 - -/******************************** -* cl_intel_packed_yuv extension * -*********************************/ - -#define cl_intel_packed_yuv 1 - -#define CL_YUYV_INTEL 0x4076 -#define CL_UYVY_INTEL 0x4077 -#define CL_YVYU_INTEL 0x4078 -#define CL_VYUY_INTEL 0x4079 - -/******************************************** -* cl_intel_required_subgroup_size extension * -*********************************************/ - -#define cl_intel_required_subgroup_size 1 - -#define CL_DEVICE_SUB_GROUP_SIZES_INTEL 0x4108 -#define CL_KERNEL_SPILL_MEM_SIZE_INTEL 0x4109 -#define CL_KERNEL_COMPILE_SUB_GROUP_SIZE_INTEL 0x410A - -/**************************************** -* cl_intel_driver_diagnostics extension * -*****************************************/ - -#define cl_intel_driver_diagnostics 1 - -typedef cl_uint cl_diagnostics_verbose_level; - -#define CL_CONTEXT_SHOW_DIAGNOSTICS_INTEL 0x4106 - -#define CL_CONTEXT_DIAGNOSTICS_LEVEL_ALL_INTEL ( 0xff ) -#define CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL ( 1 ) -#define CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL ( 1 << 1 ) -#define CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL ( 1 << 2 ) - -/******************************** -* cl_intel_planar_yuv extension * -*********************************/ - -#define CL_NV12_INTEL 0x410E - -#define CL_MEM_NO_ACCESS_INTEL ( 1 << 24 ) -#define CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL ( 1 << 25 ) - -#define CL_DEVICE_PLANAR_YUV_MAX_WIDTH_INTEL 0x417E -#define CL_DEVICE_PLANAR_YUV_MAX_HEIGHT_INTEL 0x417F - -/******************************************************* -* cl_intel_device_side_avc_motion_estimation extension * -********************************************************/ - -#define CL_DEVICE_AVC_ME_VERSION_INTEL 0x410B -#define CL_DEVICE_AVC_ME_SUPPORTS_TEXTURE_SAMPLER_USE_INTEL 0x410C -#define CL_DEVICE_AVC_ME_SUPPORTS_PREEMPTION_INTEL 0x410D - -#define CL_AVC_ME_VERSION_0_INTEL 0x0 /* No support. */ -#define CL_AVC_ME_VERSION_1_INTEL 0x1 /* First supported version. */ - -#define CL_AVC_ME_MAJOR_16x16_INTEL 0x0 -#define CL_AVC_ME_MAJOR_16x8_INTEL 0x1 -#define CL_AVC_ME_MAJOR_8x16_INTEL 0x2 -#define CL_AVC_ME_MAJOR_8x8_INTEL 0x3 - -#define CL_AVC_ME_MINOR_8x8_INTEL 0x0 -#define CL_AVC_ME_MINOR_8x4_INTEL 0x1 -#define CL_AVC_ME_MINOR_4x8_INTEL 0x2 -#define CL_AVC_ME_MINOR_4x4_INTEL 0x3 - -#define CL_AVC_ME_MAJOR_FORWARD_INTEL 0x0 -#define CL_AVC_ME_MAJOR_BACKWARD_INTEL 0x1 -#define CL_AVC_ME_MAJOR_BIDIRECTIONAL_INTEL 0x2 - -#define CL_AVC_ME_PARTITION_MASK_ALL_INTEL 0x0 -#define CL_AVC_ME_PARTITION_MASK_16x16_INTEL 0x7E -#define CL_AVC_ME_PARTITION_MASK_16x8_INTEL 0x7D -#define CL_AVC_ME_PARTITION_MASK_8x16_INTEL 0x7B -#define CL_AVC_ME_PARTITION_MASK_8x8_INTEL 0x77 -#define CL_AVC_ME_PARTITION_MASK_8x4_INTEL 0x6F -#define CL_AVC_ME_PARTITION_MASK_4x8_INTEL 0x5F -#define CL_AVC_ME_PARTITION_MASK_4x4_INTEL 0x3F - -#define CL_AVC_ME_SEARCH_WINDOW_EXHAUSTIVE_INTEL 0x0 -#define CL_AVC_ME_SEARCH_WINDOW_SMALL_INTEL 0x1 -#define CL_AVC_ME_SEARCH_WINDOW_TINY_INTEL 0x2 -#define CL_AVC_ME_SEARCH_WINDOW_EXTRA_TINY_INTEL 0x3 -#define CL_AVC_ME_SEARCH_WINDOW_DIAMOND_INTEL 0x4 -#define CL_AVC_ME_SEARCH_WINDOW_LARGE_DIAMOND_INTEL 0x5 -#define CL_AVC_ME_SEARCH_WINDOW_RESERVED0_INTEL 0x6 -#define CL_AVC_ME_SEARCH_WINDOW_RESERVED1_INTEL 0x7 -#define CL_AVC_ME_SEARCH_WINDOW_CUSTOM_INTEL 0x8 -#define CL_AVC_ME_SEARCH_WINDOW_16x12_RADIUS_INTEL 0x9 -#define CL_AVC_ME_SEARCH_WINDOW_4x4_RADIUS_INTEL 0x2 -#define CL_AVC_ME_SEARCH_WINDOW_2x2_RADIUS_INTEL 0xa - -#define CL_AVC_ME_SAD_ADJUST_MODE_NONE_INTEL 0x0 -#define CL_AVC_ME_SAD_ADJUST_MODE_HAAR_INTEL 0x2 - -#define CL_AVC_ME_SUBPIXEL_MODE_INTEGER_INTEL 0x0 -#define CL_AVC_ME_SUBPIXEL_MODE_HPEL_INTEL 0x1 -#define CL_AVC_ME_SUBPIXEL_MODE_QPEL_INTEL 0x3 - -#define CL_AVC_ME_COST_PRECISION_QPEL_INTEL 0x0 -#define CL_AVC_ME_COST_PRECISION_HPEL_INTEL 0x1 -#define CL_AVC_ME_COST_PRECISION_PEL_INTEL 0x2 -#define CL_AVC_ME_COST_PRECISION_DPEL_INTEL 0x3 - -#define CL_AVC_ME_BIDIR_WEIGHT_QUARTER_INTEL 0x10 -#define CL_AVC_ME_BIDIR_WEIGHT_THIRD_INTEL 0x15 -#define CL_AVC_ME_BIDIR_WEIGHT_HALF_INTEL 0x20 -#define CL_AVC_ME_BIDIR_WEIGHT_TWO_THIRD_INTEL 0x2B -#define CL_AVC_ME_BIDIR_WEIGHT_THREE_QUARTER_INTEL 0x30 - -#define CL_AVC_ME_BORDER_REACHED_LEFT_INTEL 0x0 -#define CL_AVC_ME_BORDER_REACHED_RIGHT_INTEL 0x2 -#define CL_AVC_ME_BORDER_REACHED_TOP_INTEL 0x4 -#define CL_AVC_ME_BORDER_REACHED_BOTTOM_INTEL 0x8 - -#define CL_AVC_ME_SKIP_BLOCK_PARTITION_16x16_INTEL 0x0 -#define CL_AVC_ME_SKIP_BLOCK_PARTITION_8x8_INTEL 0x4000 - -#define CL_AVC_ME_SKIP_BLOCK_16x16_FORWARD_ENABLE_INTEL ( 0x1 << 24 ) -#define CL_AVC_ME_SKIP_BLOCK_16x16_BACKWARD_ENABLE_INTEL ( 0x2 << 24 ) -#define CL_AVC_ME_SKIP_BLOCK_16x16_DUAL_ENABLE_INTEL ( 0x3 << 24 ) -#define CL_AVC_ME_SKIP_BLOCK_8x8_FORWARD_ENABLE_INTEL ( 0x55 << 24 ) -#define CL_AVC_ME_SKIP_BLOCK_8x8_BACKWARD_ENABLE_INTEL ( 0xAA << 24 ) -#define CL_AVC_ME_SKIP_BLOCK_8x8_DUAL_ENABLE_INTEL ( 0xFF << 24 ) -#define CL_AVC_ME_SKIP_BLOCK_8x8_0_FORWARD_ENABLE_INTEL ( 0x1 << 24 ) -#define CL_AVC_ME_SKIP_BLOCK_8x8_0_BACKWARD_ENABLE_INTEL ( 0x2 << 24 ) -#define CL_AVC_ME_SKIP_BLOCK_8x8_1_FORWARD_ENABLE_INTEL ( 0x1 << 26 ) -#define CL_AVC_ME_SKIP_BLOCK_8x8_1_BACKWARD_ENABLE_INTEL ( 0x2 << 26 ) -#define CL_AVC_ME_SKIP_BLOCK_8x8_2_FORWARD_ENABLE_INTEL ( 0x1 << 28 ) -#define CL_AVC_ME_SKIP_BLOCK_8x8_2_BACKWARD_ENABLE_INTEL ( 0x2 << 28 ) -#define CL_AVC_ME_SKIP_BLOCK_8x8_3_FORWARD_ENABLE_INTEL ( 0x1 << 30 ) -#define CL_AVC_ME_SKIP_BLOCK_8x8_3_BACKWARD_ENABLE_INTEL ( 0x2 << 30 ) - -#define CL_AVC_ME_BLOCK_BASED_SKIP_4x4_INTEL 0x00 -#define CL_AVC_ME_BLOCK_BASED_SKIP_8x8_INTEL 0x80 - -#define CL_AVC_ME_INTRA_16x16_INTEL 0x0 -#define CL_AVC_ME_INTRA_8x8_INTEL 0x1 -#define CL_AVC_ME_INTRA_4x4_INTEL 0x2 - -#define CL_AVC_ME_INTRA_LUMA_PARTITION_MASK_16x16_INTEL 0x6 -#define CL_AVC_ME_INTRA_LUMA_PARTITION_MASK_8x8_INTEL 0x5 -#define CL_AVC_ME_INTRA_LUMA_PARTITION_MASK_4x4_INTEL 0x3 - -#define CL_AVC_ME_INTRA_NEIGHBOR_LEFT_MASK_ENABLE_INTEL 0x60 -#define CL_AVC_ME_INTRA_NEIGHBOR_UPPER_MASK_ENABLE_INTEL 0x10 -#define CL_AVC_ME_INTRA_NEIGHBOR_UPPER_RIGHT_MASK_ENABLE_INTEL 0x8 -#define CL_AVC_ME_INTRA_NEIGHBOR_UPPER_LEFT_MASK_ENABLE_INTEL 0x4 - -#define CL_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_INTEL 0x0 -#define CL_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1 -#define CL_AVC_ME_LUMA_PREDICTOR_MODE_DC_INTEL 0x2 -#define CL_AVC_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_LEFT_INTEL 0x3 -#define CL_AVC_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_RIGHT_INTEL 0x4 -#define CL_AVC_ME_LUMA_PREDICTOR_MODE_PLANE_INTEL 0x4 -#define CL_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_RIGHT_INTEL 0x5 -#define CL_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_DOWN_INTEL 0x6 -#define CL_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_LEFT_INTEL 0x7 -#define CL_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_UP_INTEL 0x8 -#define CL_AVC_ME_CHROMA_PREDICTOR_MODE_DC_INTEL 0x0 -#define CL_AVC_ME_CHROMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1 -#define CL_AVC_ME_CHROMA_PREDICTOR_MODE_VERTICAL_INTEL 0x2 -#define CL_AVC_ME_CHROMA_PREDICTOR_MODE_PLANE_INTEL 0x3 - -#define CL_AVC_ME_FRAME_FORWARD_INTEL 0x1 -#define CL_AVC_ME_FRAME_BACKWARD_INTEL 0x2 -#define CL_AVC_ME_FRAME_DUAL_INTEL 0x3 - -#define CL_AVC_ME_SLICE_TYPE_PRED_INTEL 0x0 -#define CL_AVC_ME_SLICE_TYPE_BPRED_INTEL 0x1 -#define CL_AVC_ME_SLICE_TYPE_INTRA_INTEL 0x2 - -#define CL_AVC_ME_INTERLACED_SCAN_TOP_FIELD_INTEL 0x0 -#define CL_AVC_ME_INTERLACED_SCAN_BOTTOM_FIELD_INTEL 0x1 - -/******************************************* -* cl_intel_unified_shared_memory extension * -********************************************/ - -/* These APIs are in sync with Revision Q of the cl_intel_unified_shared_memory spec! */ - -#define cl_intel_unified_shared_memory 1 - -/* cl_device_info */ -#define CL_DEVICE_HOST_MEM_CAPABILITIES_INTEL 0x4190 -#define CL_DEVICE_DEVICE_MEM_CAPABILITIES_INTEL 0x4191 -#define CL_DEVICE_SINGLE_DEVICE_SHARED_MEM_CAPABILITIES_INTEL 0x4192 -#define CL_DEVICE_CROSS_DEVICE_SHARED_MEM_CAPABILITIES_INTEL 0x4193 -#define CL_DEVICE_SHARED_SYSTEM_MEM_CAPABILITIES_INTEL 0x4194 - -typedef cl_bitfield cl_device_unified_shared_memory_capabilities_intel; - -/* cl_device_unified_shared_memory_capabilities_intel - bitfield */ -#define CL_UNIFIED_SHARED_MEMORY_ACCESS_INTEL (1 << 0) -#define CL_UNIFIED_SHARED_MEMORY_ATOMIC_ACCESS_INTEL (1 << 1) -#define CL_UNIFIED_SHARED_MEMORY_CONCURRENT_ACCESS_INTEL (1 << 2) -#define CL_UNIFIED_SHARED_MEMORY_CONCURRENT_ATOMIC_ACCESS_INTEL (1 << 3) - -typedef cl_properties cl_mem_properties_intel; - -/* cl_mem_properties_intel */ -#define CL_MEM_ALLOC_FLAGS_INTEL 0x4195 - -typedef cl_bitfield cl_mem_alloc_flags_intel; - -/* cl_mem_alloc_flags_intel - bitfield */ -#define CL_MEM_ALLOC_WRITE_COMBINED_INTEL (1 << 0) - -typedef cl_uint cl_mem_info_intel; - -/* cl_mem_alloc_info_intel */ -#define CL_MEM_ALLOC_TYPE_INTEL 0x419A -#define CL_MEM_ALLOC_BASE_PTR_INTEL 0x419B -#define CL_MEM_ALLOC_SIZE_INTEL 0x419C -#define CL_MEM_ALLOC_DEVICE_INTEL 0x419D -/* Enum values 0x419E-0x419F are reserved for future queries. */ - -typedef cl_uint cl_unified_shared_memory_type_intel; - -/* cl_unified_shared_memory_type_intel */ -#define CL_MEM_TYPE_UNKNOWN_INTEL 0x4196 -#define CL_MEM_TYPE_HOST_INTEL 0x4197 -#define CL_MEM_TYPE_DEVICE_INTEL 0x4198 -#define CL_MEM_TYPE_SHARED_INTEL 0x4199 - -typedef cl_uint cl_mem_advice_intel; - -/* cl_mem_advice_intel */ -/* Enum values 0x4208-0x420F are reserved for future memory advices. */ - -/* cl_kernel_exec_info */ -#define CL_KERNEL_EXEC_INFO_INDIRECT_HOST_ACCESS_INTEL 0x4200 -#define CL_KERNEL_EXEC_INFO_INDIRECT_DEVICE_ACCESS_INTEL 0x4201 -#define CL_KERNEL_EXEC_INFO_INDIRECT_SHARED_ACCESS_INTEL 0x4202 -#define CL_KERNEL_EXEC_INFO_USM_PTRS_INTEL 0x4203 - -/* cl_command_type */ -#define CL_COMMAND_MEMFILL_INTEL 0x4204 -#define CL_COMMAND_MEMCPY_INTEL 0x4205 -#define CL_COMMAND_MIGRATEMEM_INTEL 0x4206 -#define CL_COMMAND_MEMADVISE_INTEL 0x4207 - -extern CL_API_ENTRY void* CL_API_CALL -clHostMemAllocINTEL( - cl_context context, - const cl_mem_properties_intel* properties, - size_t size, - cl_uint alignment, - cl_int* errcode_ret); - -typedef CL_API_ENTRY void* (CL_API_CALL * -clHostMemAllocINTEL_fn)( - cl_context context, - const cl_mem_properties_intel* properties, - size_t size, - cl_uint alignment, - cl_int* errcode_ret); - -extern CL_API_ENTRY void* CL_API_CALL -clDeviceMemAllocINTEL( - cl_context context, - cl_device_id device, - const cl_mem_properties_intel* properties, - size_t size, - cl_uint alignment, - cl_int* errcode_ret); - -typedef CL_API_ENTRY void* (CL_API_CALL * -clDeviceMemAllocINTEL_fn)( - cl_context context, - cl_device_id device, - const cl_mem_properties_intel* properties, - size_t size, - cl_uint alignment, - cl_int* errcode_ret); - -extern CL_API_ENTRY void* CL_API_CALL -clSharedMemAllocINTEL( - cl_context context, - cl_device_id device, - const cl_mem_properties_intel* properties, - size_t size, - cl_uint alignment, - cl_int* errcode_ret); - -typedef CL_API_ENTRY void* (CL_API_CALL * -clSharedMemAllocINTEL_fn)( - cl_context context, - cl_device_id device, - const cl_mem_properties_intel* properties, - size_t size, - cl_uint alignment, - cl_int* errcode_ret); - -extern CL_API_ENTRY cl_int CL_API_CALL -clMemFreeINTEL( - cl_context context, - void* ptr); - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -clMemFreeINTEL_fn)( - cl_context context, - void* ptr); - -extern CL_API_ENTRY cl_int CL_API_CALL -clMemBlockingFreeINTEL( - cl_context context, - void* ptr); - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -clMemBlockingFreeINTEL_fn)( - cl_context context, - void* ptr); - -extern CL_API_ENTRY cl_int CL_API_CALL -clGetMemAllocInfoINTEL( - cl_context context, - const void* ptr, - cl_mem_info_intel param_name, - size_t param_value_size, - void* param_value, - size_t* param_value_size_ret); - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -clGetMemAllocInfoINTEL_fn)( - cl_context context, - const void* ptr, - cl_mem_info_intel param_name, - size_t param_value_size, - void* param_value, - size_t* param_value_size_ret); - -extern CL_API_ENTRY cl_int CL_API_CALL -clSetKernelArgMemPointerINTEL( - cl_kernel kernel, - cl_uint arg_index, - const void* arg_value); - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -clSetKernelArgMemPointerINTEL_fn)( - cl_kernel kernel, - cl_uint arg_index, - const void* arg_value); - -extern CL_API_ENTRY cl_int CL_API_CALL -clEnqueueMemsetINTEL( /* Deprecated */ - cl_command_queue command_queue, - void* dst_ptr, - cl_int value, - size_t size, - cl_uint num_events_in_wait_list, - const cl_event* event_wait_list, - cl_event* event); - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -clEnqueueMemsetINTEL_fn)( /* Deprecated */ - cl_command_queue command_queue, - void* dst_ptr, - cl_int value, - size_t size, - cl_uint num_events_in_wait_list, - const cl_event* event_wait_list, - cl_event* event); - -extern CL_API_ENTRY cl_int CL_API_CALL -clEnqueueMemFillINTEL( - cl_command_queue command_queue, - void* dst_ptr, - const void* pattern, - size_t pattern_size, - size_t size, - cl_uint num_events_in_wait_list, - const cl_event* event_wait_list, - cl_event* event); - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -clEnqueueMemFillINTEL_fn)( - cl_command_queue command_queue, - void* dst_ptr, - const void* pattern, - size_t pattern_size, - size_t size, - cl_uint num_events_in_wait_list, - const cl_event* event_wait_list, - cl_event* event); - -extern CL_API_ENTRY cl_int CL_API_CALL -clEnqueueMemcpyINTEL( - cl_command_queue command_queue, - cl_bool blocking, - void* dst_ptr, - const void* src_ptr, - size_t size, - cl_uint num_events_in_wait_list, - const cl_event* event_wait_list, - cl_event* event); - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -clEnqueueMemcpyINTEL_fn)( - cl_command_queue command_queue, - cl_bool blocking, - void* dst_ptr, - const void* src_ptr, - size_t size, - cl_uint num_events_in_wait_list, - const cl_event* event_wait_list, - cl_event* event); - -#ifdef CL_VERSION_1_2 - -/* Because these APIs use cl_mem_migration_flags, they require - OpenCL 1.2: */ - -extern CL_API_ENTRY cl_int CL_API_CALL -clEnqueueMigrateMemINTEL( - cl_command_queue command_queue, - const void* ptr, - size_t size, - cl_mem_migration_flags flags, - cl_uint num_events_in_wait_list, - const cl_event* event_wait_list, - cl_event* event); - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -clEnqueueMigrateMemINTEL_fn)( - cl_command_queue command_queue, - const void* ptr, - size_t size, - cl_mem_migration_flags flags, - cl_uint num_events_in_wait_list, - const cl_event* event_wait_list, - cl_event* event); - -#endif - -extern CL_API_ENTRY cl_int CL_API_CALL -clEnqueueMemAdviseINTEL( - cl_command_queue command_queue, - const void* ptr, - size_t size, - cl_mem_advice_intel advice, - cl_uint num_events_in_wait_list, - const cl_event* event_wait_list, - cl_event* event); - -typedef CL_API_ENTRY cl_int (CL_API_CALL * -clEnqueueMemAdviseINTEL_fn)( - cl_command_queue command_queue, - const void* ptr, - size_t size, - cl_mem_advice_intel advice, - cl_uint num_events_in_wait_list, - const cl_event* event_wait_list, - cl_event* event); - -/*************************************************** -* cl_intel_create_buffer_with_properties extension * -****************************************************/ - -#define cl_intel_create_buffer_with_properties 1 - -extern CL_API_ENTRY cl_mem CL_API_CALL -clCreateBufferWithPropertiesINTEL( - cl_context context, - const cl_mem_properties_intel* properties, - cl_mem_flags flags, - size_t size, - void * host_ptr, - cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_0; - -typedef CL_API_ENTRY cl_mem (CL_API_CALL * -clCreateBufferWithPropertiesINTEL_fn)( - cl_context context, - const cl_mem_properties_intel* properties, - cl_mem_flags flags, - size_t size, - void * host_ptr, - cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_0; - -/****************************************** -* cl_intel_mem_channel_property extension * -*******************************************/ - -#define CL_MEM_CHANNEL_INTEL 0x4213 - -/********************************* -* cl_intel_mem_force_host_memory * -**********************************/ - -#define cl_intel_mem_force_host_memory 1 - -/* cl_mem_flags */ -#define CL_MEM_FORCE_HOST_MEMORY_INTEL (1 << 20) - -#ifdef __cplusplus -} -#endif - -#endif /* __CL_EXT_INTEL_H */ +#include +#pragma message("The Intel extensions have been moved into cl_ext.h. Please include cl_ext.h directly.") diff --git a/include/CL/cl_gl.h b/include/CL/cl_gl.h index b587f02a9..327746508 100644 --- a/include/CL/cl_gl.h +++ b/include/CL/cl_gl.h @@ -1,5 +1,5 @@ /******************************************************************************* - * Copyright (c) 2008-2020 The Khronos Group Inc. + * Copyright (c) 2008-2021 The Khronos Group Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -102,21 +102,21 @@ clEnqueueReleaseGLObjects(cl_command_queue command_queue, /* Deprecated OpenCL 1.1 APIs */ -extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL +extern CL_API_ENTRY CL_API_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL clCreateFromGLTexture2D(cl_context context, cl_mem_flags flags, cl_GLenum target, cl_GLint miplevel, cl_GLuint texture, - cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_1_DEPRECATED; -extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL +extern CL_API_ENTRY CL_API_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL clCreateFromGLTexture3D(cl_context context, cl_mem_flags flags, cl_GLenum target, cl_GLint miplevel, cl_GLuint texture, - cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_1_DEPRECATED; /* cl_khr_gl_sharing extension */ @@ -145,13 +145,48 @@ clGetGLContextInfoKHR(const cl_context_properties * properties, void * param_value, size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetGLContextInfoKHR_fn)( +typedef cl_int (CL_API_CALL *clGetGLContextInfoKHR_fn)( const cl_context_properties * properties, cl_gl_context_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret); +/* + * cl_khr_gl_event extension + */ +#define CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR 0x200D + +extern CL_API_ENTRY cl_event CL_API_CALL +clCreateEventFromGLsyncKHR(cl_context context, + cl_GLsync sync, + cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_1; + +/*************************************************************** +* cl_intel_sharing_format_query_gl +***************************************************************/ +#define cl_intel_sharing_format_query_gl 1 + +/* when cl_khr_gl_sharing is supported */ + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetSupportedGLTextureFormatsINTEL( + cl_context context, + cl_mem_flags flags, + cl_mem_object_type image_type, + cl_uint num_entries, + cl_GLenum* gl_formats, + cl_uint* num_texture_formats) ; + +typedef cl_int (CL_API_CALL * +clGetSupportedGLTextureFormatsINTEL_fn)( + cl_context context, + cl_mem_flags flags, + cl_mem_object_type image_type, + cl_uint num_entries, + cl_GLenum* gl_formats, + cl_uint* num_texture_formats) ; + #ifdef __cplusplus } #endif diff --git a/include/CL/cl_gl_ext.h b/include/CL/cl_gl_ext.h index 52107b111..8ec818167 100644 --- a/include/CL/cl_gl_ext.h +++ b/include/CL/cl_gl_ext.h @@ -1,5 +1,5 @@ /******************************************************************************* - * Copyright (c) 2008-2020 The Khronos Group Inc. + * Copyright (c) 2008-2021 The Khronos Group Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,27 +14,5 @@ * limitations under the License. ******************************************************************************/ -#ifndef __OPENCL_CL_GL_EXT_H -#define __OPENCL_CL_GL_EXT_H - -#ifdef __cplusplus -extern "C" { -#endif - #include - -/* - * cl_khr_gl_event extension - */ -#define CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR 0x200D - -extern CL_API_ENTRY cl_event CL_API_CALL -clCreateEventFromGLsyncKHR(cl_context context, - cl_GLsync sync, - cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_1; - -#ifdef __cplusplus -} -#endif - -#endif /* __OPENCL_CL_GL_EXT_H */ +#pragma message("All OpenGL-related extensions have been moved into cl_gl.h. Please include cl_gl.h directly.") diff --git a/include/CL/cl_icd.h b/include/CL/cl_icd.h index 8ff8b94f9..360b87030 100644 --- a/include/CL/cl_icd.h +++ b/include/CL/cl_icd.h @@ -41,35 +41,35 @@ extern "C" { /* API function pointer definitions */ // Platform APIs -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetPlatformIDs)( +typedef cl_int(CL_API_CALL *cl_api_clGetPlatformIDs)( cl_uint num_entries, cl_platform_id *platforms, cl_uint *num_platforms) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetPlatformInfo)( +typedef cl_int(CL_API_CALL *cl_api_clGetPlatformInfo)( cl_platform_id platform, cl_platform_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; // Device APIs -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetDeviceIDs)( +typedef cl_int(CL_API_CALL *cl_api_clGetDeviceIDs)( cl_platform_id platform, cl_device_type device_type, cl_uint num_entries, cl_device_id *devices, cl_uint *num_devices) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetDeviceInfo)( +typedef cl_int(CL_API_CALL *cl_api_clGetDeviceInfo)( cl_device_id device, cl_device_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; #ifdef CL_VERSION_1_2 -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clCreateSubDevices)( +typedef cl_int(CL_API_CALL *cl_api_clCreateSubDevices)( cl_device_id in_device, const cl_device_partition_property *partition_properties, cl_uint num_entries, cl_device_id *out_devices, cl_uint *num_devices); -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clRetainDevice)( +typedef cl_int(CL_API_CALL *cl_api_clRetainDevice)( cl_device_id device) CL_API_SUFFIX__VERSION_1_2; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clReleaseDevice)( +typedef cl_int(CL_API_CALL *cl_api_clReleaseDevice)( cl_device_id device) CL_API_SUFFIX__VERSION_1_2; #else @@ -81,36 +81,36 @@ typedef void *cl_api_clReleaseDevice; #endif // Context APIs -typedef CL_API_ENTRY cl_context(CL_API_CALL *cl_api_clCreateContext)( +typedef cl_context(CL_API_CALL *cl_api_clCreateContext)( const cl_context_properties *properties, cl_uint num_devices, const cl_device_id *devices, void(CL_CALLBACK *pfn_notify)(const char *, const void *, size_t, void *), void *user_data, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_context(CL_API_CALL *cl_api_clCreateContextFromType)( +typedef cl_context(CL_API_CALL *cl_api_clCreateContextFromType)( const cl_context_properties *properties, cl_device_type device_type, void(CL_CALLBACK *pfn_notify)(const char *, const void *, size_t, void *), void *user_data, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clRetainContext)( +typedef cl_int(CL_API_CALL *cl_api_clRetainContext)( cl_context context) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clReleaseContext)( +typedef cl_int(CL_API_CALL *cl_api_clReleaseContext)( cl_context context) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetContextInfo)( +typedef cl_int(CL_API_CALL *cl_api_clGetContextInfo)( cl_context context, cl_context_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; // Command Queue APIs -typedef CL_API_ENTRY cl_command_queue(CL_API_CALL *cl_api_clCreateCommandQueue)( +typedef cl_command_queue(CL_API_CALL *cl_api_clCreateCommandQueue)( cl_context context, cl_device_id device, cl_command_queue_properties properties, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; #ifdef CL_VERSION_2_0 -typedef CL_API_ENTRY +typedef cl_command_queue(CL_API_CALL *cl_api_clCreateCommandQueueWithProperties)( cl_context /* context */, cl_device_id /* device */, const cl_queue_properties * /* properties */, @@ -122,25 +122,25 @@ typedef void *cl_api_clCreateCommandQueueWithProperties; #endif -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clRetainCommandQueue)( +typedef cl_int(CL_API_CALL *cl_api_clRetainCommandQueue)( cl_command_queue command_queue) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clReleaseCommandQueue)( +typedef cl_int(CL_API_CALL *cl_api_clReleaseCommandQueue)( cl_command_queue command_queue) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetCommandQueueInfo)( +typedef cl_int(CL_API_CALL *cl_api_clGetCommandQueueInfo)( cl_command_queue command_queue, cl_command_queue_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; // Memory Object APIs -typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateBuffer)( +typedef cl_mem(CL_API_CALL *cl_api_clCreateBuffer)( cl_context context, cl_mem_flags flags, size_t size, void *host_ptr, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; #ifdef CL_VERSION_1_2 -typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateImage)( +typedef cl_mem(CL_API_CALL *cl_api_clCreateImage)( cl_context context, cl_mem_flags flags, const cl_image_format *image_format, const cl_image_desc *image_desc, void *host_ptr, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_2; @@ -153,17 +153,17 @@ typedef void *cl_api_clCreateImage; #ifdef CL_VERSION_3_0 -typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateBufferWithProperties)( +typedef cl_mem(CL_API_CALL *cl_api_clCreateBufferWithProperties)( cl_context context, const cl_mem_properties *properties, cl_mem_flags flags, size_t size, void *host_ptr, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_3_0; -typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateImageWithProperties)( +typedef cl_mem(CL_API_CALL *cl_api_clCreateImageWithProperties)( cl_context context, const cl_mem_properties *properties, cl_mem_flags flags, const cl_image_format *image_format, const cl_image_desc *image_desc, void *host_ptr, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_3_0; -typedef CL_API_ENTRY cl_int(CL_API_CALL* cl_api_clSetContextDestructorCallback)( +typedef cl_int(CL_API_CALL* cl_api_clSetContextDestructorCallback)( cl_context context, void(CL_CALLBACK* pfn_notify)(cl_context context, void* user_data), void* user_data) CL_API_SUFFIX__VERSION_3_0; @@ -176,43 +176,43 @@ typedef void *cl_api_clSetContextDestructorCallback; #endif -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clRetainMemObject)( +typedef cl_int(CL_API_CALL *cl_api_clRetainMemObject)( cl_mem memobj) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clReleaseMemObject)( +typedef cl_int(CL_API_CALL *cl_api_clReleaseMemObject)( cl_mem memobj) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetSupportedImageFormats)( +typedef cl_int(CL_API_CALL *cl_api_clGetSupportedImageFormats)( cl_context context, cl_mem_flags flags, cl_mem_object_type image_type, cl_uint num_entries, cl_image_format *image_formats, cl_uint *num_image_formats) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetMemObjectInfo)( +typedef cl_int(CL_API_CALL *cl_api_clGetMemObjectInfo)( cl_mem memobj, cl_mem_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetImageInfo)( +typedef cl_int(CL_API_CALL *cl_api_clGetImageInfo)( cl_mem image, cl_image_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; #ifdef CL_VERSION_2_0 -typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreatePipe)( +typedef cl_mem(CL_API_CALL *cl_api_clCreatePipe)( cl_context /* context */, cl_mem_flags /* flags */, cl_uint /* pipe_packet_size */, cl_uint /* pipe_max_packets */, const cl_pipe_properties * /* properties */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_2_0; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetPipeInfo)( +typedef cl_int(CL_API_CALL *cl_api_clGetPipeInfo)( cl_mem /* pipe */, cl_pipe_info /* param_name */, size_t /* param_value_size */, void * /* param_value */, size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_2_0; -typedef CL_API_ENTRY void *(CL_API_CALL *cl_api_clSVMAlloc)( +typedef void *(CL_API_CALL *cl_api_clSVMAlloc)( cl_context /* context */, cl_svm_mem_flags /* flags */, size_t /* size */, unsigned int /* alignment */)CL_API_SUFFIX__VERSION_2_0; -typedef CL_API_ENTRY void(CL_API_CALL *cl_api_clSVMFree)( +typedef void(CL_API_CALL *cl_api_clSVMFree)( cl_context /* context */, void * /* svm_pointer */) CL_API_SUFFIX__VERSION_2_0; @@ -226,24 +226,24 @@ typedef void *cl_api_clSVMFree; #endif // Sampler APIs -typedef CL_API_ENTRY cl_sampler(CL_API_CALL *cl_api_clCreateSampler)( +typedef cl_sampler(CL_API_CALL *cl_api_clCreateSampler)( cl_context context, cl_bool normalized_coords, cl_addressing_mode addressing_mode, cl_filter_mode filter_mode, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clRetainSampler)( +typedef cl_int(CL_API_CALL *cl_api_clRetainSampler)( cl_sampler sampler) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clReleaseSampler)( +typedef cl_int(CL_API_CALL *cl_api_clReleaseSampler)( cl_sampler sampler) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetSamplerInfo)( +typedef cl_int(CL_API_CALL *cl_api_clGetSamplerInfo)( cl_sampler sampler, cl_sampler_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; #ifdef CL_VERSION_2_0 -typedef CL_API_ENTRY +typedef cl_sampler(CL_API_CALL *cl_api_clCreateSamplerWithProperties)( cl_context /* context */, const cl_sampler_properties * /* sampler_properties */, @@ -256,18 +256,18 @@ typedef void *cl_api_clCreateSamplerWithProperties; #endif // Program Object APIs -typedef CL_API_ENTRY cl_program(CL_API_CALL *cl_api_clCreateProgramWithSource)( +typedef cl_program(CL_API_CALL *cl_api_clCreateProgramWithSource)( cl_context context, cl_uint count, const char **strings, const size_t *lengths, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_program(CL_API_CALL *cl_api_clCreateProgramWithBinary)( +typedef cl_program(CL_API_CALL *cl_api_clCreateProgramWithBinary)( cl_context context, cl_uint num_devices, const cl_device_id *device_list, const size_t *lengths, const unsigned char **binaries, cl_int *binary_status, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; #ifdef CL_VERSION_1_2 -typedef CL_API_ENTRY +typedef cl_program(CL_API_CALL *cl_api_clCreateProgramWithBuiltInKernels)( cl_context context, cl_uint num_devices, const cl_device_id *device_list, const char *kernel_names, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_2; @@ -278,13 +278,13 @@ typedef void *cl_api_clCreateProgramWithBuiltInKernels; #endif -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clRetainProgram)( +typedef cl_int(CL_API_CALL *cl_api_clRetainProgram)( cl_program program) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clReleaseProgram)( +typedef cl_int(CL_API_CALL *cl_api_clReleaseProgram)( cl_program program) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clBuildProgram)( +typedef cl_int(CL_API_CALL *cl_api_clBuildProgram)( cl_program program, cl_uint num_devices, const cl_device_id *device_list, const char *options, void(CL_CALLBACK *pfn_notify)(cl_program program, void *user_data), @@ -292,14 +292,14 @@ typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clBuildProgram)( #ifdef CL_VERSION_1_2 -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clCompileProgram)( +typedef cl_int(CL_API_CALL *cl_api_clCompileProgram)( cl_program program, cl_uint num_devices, const cl_device_id *device_list, const char *options, cl_uint num_input_headers, const cl_program *input_headers, const char **header_include_names, void(CL_CALLBACK *pfn_notify)(cl_program program, void *user_data), void *user_data) CL_API_SUFFIX__VERSION_1_2; -typedef CL_API_ENTRY cl_program(CL_API_CALL *cl_api_clLinkProgram)( +typedef cl_program(CL_API_CALL *cl_api_clLinkProgram)( cl_context context, cl_uint num_devices, const cl_device_id *device_list, const char *options, cl_uint num_input_programs, const cl_program *input_programs, @@ -315,12 +315,12 @@ typedef void *cl_api_clLinkProgram; #ifdef CL_VERSION_2_2 -typedef CL_API_ENTRY +typedef cl_int(CL_API_CALL *cl_api_clSetProgramSpecializationConstant)( cl_program program, cl_uint spec_id, size_t spec_size, const void *spec_value) CL_API_SUFFIX__VERSION_2_2; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clSetProgramReleaseCallback)( +typedef cl_int(CL_API_CALL *cl_api_clSetProgramReleaseCallback)( cl_program program, void(CL_CALLBACK *pfn_notify)(cl_program program, void *user_data), void *user_data) CL_API_SUFFIX__VERSION_2_2; @@ -334,7 +334,7 @@ typedef void *cl_api_clSetProgramReleaseCallback; #ifdef CL_VERSION_1_2 -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clUnloadPlatformCompiler)( +typedef cl_int(CL_API_CALL *cl_api_clUnloadPlatformCompiler)( cl_platform_id platform) CL_API_SUFFIX__VERSION_1_2; #else @@ -343,41 +343,41 @@ typedef void *cl_api_clUnloadPlatformCompiler; #endif -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetProgramInfo)( +typedef cl_int(CL_API_CALL *cl_api_clGetProgramInfo)( cl_program program, cl_program_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetProgramBuildInfo)( +typedef cl_int(CL_API_CALL *cl_api_clGetProgramBuildInfo)( cl_program program, cl_device_id device, cl_program_build_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; // Kernel Object APIs -typedef CL_API_ENTRY cl_kernel(CL_API_CALL *cl_api_clCreateKernel)( +typedef cl_kernel(CL_API_CALL *cl_api_clCreateKernel)( cl_program program, const char *kernel_name, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clCreateKernelsInProgram)( +typedef cl_int(CL_API_CALL *cl_api_clCreateKernelsInProgram)( cl_program program, cl_uint num_kernels, cl_kernel *kernels, cl_uint *num_kernels_ret) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clRetainKernel)( +typedef cl_int(CL_API_CALL *cl_api_clRetainKernel)( cl_kernel kernel) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clReleaseKernel)( +typedef cl_int(CL_API_CALL *cl_api_clReleaseKernel)( cl_kernel kernel) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clSetKernelArg)( +typedef cl_int(CL_API_CALL *cl_api_clSetKernelArg)( cl_kernel kernel, cl_uint arg_index, size_t arg_size, const void *arg_value) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetKernelInfo)( +typedef cl_int(CL_API_CALL *cl_api_clGetKernelInfo)( cl_kernel kernel, cl_kernel_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; #ifdef CL_VERSION_1_2 -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetKernelArgInfo)( +typedef cl_int(CL_API_CALL *cl_api_clGetKernelArgInfo)( cl_kernel kernel, cl_uint arg_indx, cl_kernel_arg_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_2; @@ -388,28 +388,28 @@ typedef void *cl_api_clGetKernelArgInfo; #endif -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetKernelWorkGroupInfo)( +typedef cl_int(CL_API_CALL *cl_api_clGetKernelWorkGroupInfo)( cl_kernel kernel, cl_device_id device, cl_kernel_work_group_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; #ifdef CL_VERSION_2_0 -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clSetKernelArgSVMPointer)( +typedef cl_int(CL_API_CALL *cl_api_clSetKernelArgSVMPointer)( cl_kernel /* kernel */, cl_uint /* arg_index */, const void * /* arg_value */) CL_API_SUFFIX__VERSION_2_0; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clSetKernelExecInfo)( +typedef cl_int(CL_API_CALL *cl_api_clSetKernelExecInfo)( cl_kernel /* kernel */, cl_kernel_exec_info /* param_name */, size_t /* param_value_size */, const void * /* param_value */) CL_API_SUFFIX__VERSION_2_0; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetKernelSubGroupInfoKHR)( +typedef cl_int(CL_API_CALL *cl_api_clGetKernelSubGroupInfoKHR)( cl_kernel /* in_kernel */, cl_device_id /*in_device*/, cl_kernel_sub_group_info /* param_name */, size_t /*input_value_size*/, const void * /*input_value*/, size_t /*param_value_size*/, void * /*param_value*/, - size_t * /*param_value_size_ret*/) CL_EXT_SUFFIX__VERSION_2_0; + size_t * /*param_value_size_ret*/) CL_API_SUFFIX__VERSION_2_0; #else @@ -420,33 +420,33 @@ typedef void *cl_api_clGetKernelSubGroupInfoKHR; #endif // Event Object APIs -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clWaitForEvents)( +typedef cl_int(CL_API_CALL *cl_api_clWaitForEvents)( cl_uint num_events, const cl_event *event_list) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetEventInfo)( +typedef cl_int(CL_API_CALL *cl_api_clGetEventInfo)( cl_event event, cl_event_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clRetainEvent)(cl_event event) +typedef cl_int(CL_API_CALL *cl_api_clRetainEvent)(cl_event event) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clReleaseEvent)(cl_event event) +typedef cl_int(CL_API_CALL *cl_api_clReleaseEvent)(cl_event event) CL_API_SUFFIX__VERSION_1_0; // Profiling APIs -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetEventProfilingInfo)( +typedef cl_int(CL_API_CALL *cl_api_clGetEventProfilingInfo)( cl_event event, cl_profiling_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; // Flush and Finish APIs -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clFlush)( +typedef cl_int(CL_API_CALL *cl_api_clFlush)( cl_command_queue command_queue) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clFinish)( +typedef cl_int(CL_API_CALL *cl_api_clFinish)( cl_command_queue command_queue) CL_API_SUFFIX__VERSION_1_0; // Enqueued Commands APIs -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueReadBuffer)( +typedef cl_int(CL_API_CALL *cl_api_clEnqueueReadBuffer)( cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_read, size_t offset, size_t cb, void *ptr, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, @@ -454,7 +454,7 @@ typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueReadBuffer)( #ifdef CL_VERSION_1_1 -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueReadBufferRect)( +typedef cl_int(CL_API_CALL *cl_api_clEnqueueReadBufferRect)( cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_read, const size_t *buffer_origin, const size_t *host_origin, const size_t *region, size_t buffer_row_pitch, size_t buffer_slice_pitch, @@ -468,7 +468,7 @@ typedef void *cl_api_clEnqueueReadBufferRect; #endif -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueWriteBuffer)( +typedef cl_int(CL_API_CALL *cl_api_clEnqueueWriteBuffer)( cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_write, size_t offset, size_t cb, const void *ptr, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, @@ -476,7 +476,7 @@ typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueWriteBuffer)( #ifdef CL_VERSION_1_1 -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueWriteBufferRect)( +typedef cl_int(CL_API_CALL *cl_api_clEnqueueWriteBufferRect)( cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_read, const size_t *buffer_origin, const size_t *host_origin, const size_t *region, size_t buffer_row_pitch, size_t buffer_slice_pitch, @@ -492,7 +492,7 @@ typedef void *cl_api_clEnqueueWriteBufferRect; #ifdef CL_VERSION_1_2 -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueFillBuffer)( +typedef cl_int(CL_API_CALL *cl_api_clEnqueueFillBuffer)( cl_command_queue command_queue, cl_mem buffer, const void *pattern, size_t pattern_size, size_t offset, size_t cb, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, @@ -504,7 +504,7 @@ typedef void *cl_api_clEnqueueFillBuffer; #endif -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueCopyBuffer)( +typedef cl_int(CL_API_CALL *cl_api_clEnqueueCopyBuffer)( cl_command_queue command_queue, cl_mem src_buffer, cl_mem dst_buffer, size_t src_offset, size_t dst_offset, size_t cb, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, @@ -512,7 +512,7 @@ typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueCopyBuffer)( #ifdef CL_VERSION_1_1 -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueCopyBufferRect)( +typedef cl_int(CL_API_CALL *cl_api_clEnqueueCopyBufferRect)( cl_command_queue command_queue, cl_mem src_buffer, cl_mem dst_buffer, const size_t *src_origin, const size_t *dst_origin, const size_t *region, size_t src_row_pitch, size_t src_slice_pitch, size_t dst_row_pitch, @@ -526,14 +526,14 @@ typedef void *cl_api_clEnqueueCopyBufferRect; #endif -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueReadImage)( +typedef cl_int(CL_API_CALL *cl_api_clEnqueueReadImage)( cl_command_queue command_queue, cl_mem image, cl_bool blocking_read, const size_t *origin, const size_t *region, size_t row_pitch, size_t slice_pitch, void *ptr, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueWriteImage)( +typedef cl_int(CL_API_CALL *cl_api_clEnqueueWriteImage)( cl_command_queue command_queue, cl_mem image, cl_bool blocking_write, const size_t *origin, const size_t *region, size_t input_row_pitch, size_t input_slice_pitch, const void *ptr, cl_uint num_events_in_wait_list, @@ -542,7 +542,7 @@ typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueWriteImage)( #ifdef CL_VERSION_1_2 -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueFillImage)( +typedef cl_int(CL_API_CALL *cl_api_clEnqueueFillImage)( cl_command_queue command_queue, cl_mem image, const void *fill_color, const size_t origin[3], const size_t region[3], cl_uint num_events_in_wait_list, const cl_event *event_wait_list, @@ -554,45 +554,45 @@ typedef void *cl_api_clEnqueueFillImage; #endif -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueCopyImage)( +typedef cl_int(CL_API_CALL *cl_api_clEnqueueCopyImage)( cl_command_queue command_queue, cl_mem src_image, cl_mem dst_image, const size_t *src_origin, const size_t *dst_origin, const size_t *region, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueCopyImageToBuffer)( +typedef cl_int(CL_API_CALL *cl_api_clEnqueueCopyImageToBuffer)( cl_command_queue command_queue, cl_mem src_image, cl_mem dst_buffer, const size_t *src_origin, const size_t *region, size_t dst_offset, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueCopyBufferToImage)( +typedef cl_int(CL_API_CALL *cl_api_clEnqueueCopyBufferToImage)( cl_command_queue command_queue, cl_mem src_buffer, cl_mem dst_image, size_t src_offset, const size_t *dst_origin, const size_t *region, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY void *(CL_API_CALL *cl_api_clEnqueueMapBuffer)( +typedef void *(CL_API_CALL *cl_api_clEnqueueMapBuffer)( cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_map, cl_map_flags map_flags, size_t offset, size_t cb, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event, cl_int *errcode_ret)CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY void *(CL_API_CALL *cl_api_clEnqueueMapImage)( +typedef void *(CL_API_CALL *cl_api_clEnqueueMapImage)( cl_command_queue command_queue, cl_mem image, cl_bool blocking_map, cl_map_flags map_flags, const size_t *origin, const size_t *region, size_t *image_row_pitch, size_t *image_slice_pitch, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event, cl_int *errcode_ret)CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueUnmapMemObject)( +typedef cl_int(CL_API_CALL *cl_api_clEnqueueUnmapMemObject)( cl_command_queue command_queue, cl_mem memobj, void *mapped_ptr, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event) CL_API_SUFFIX__VERSION_1_0; #ifdef CL_VERSION_1_2 -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueMigrateMemObjects)( +typedef cl_int(CL_API_CALL *cl_api_clEnqueueMigrateMemObjects)( cl_command_queue command_queue, cl_uint num_mem_objects, const cl_mem *mem_objects, cl_mem_migration_flags flags, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, @@ -604,19 +604,19 @@ typedef void *cl_api_clEnqueueMigrateMemObjects; #endif -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueNDRangeKernel)( +typedef cl_int(CL_API_CALL *cl_api_clEnqueueNDRangeKernel)( cl_command_queue command_queue, cl_kernel kernel, cl_uint work_dim, const size_t *global_work_offset, const size_t *global_work_size, const size_t *local_work_size, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueTask)( +typedef cl_int(CL_API_CALL *cl_api_clEnqueueTask)( cl_command_queue command_queue, cl_kernel kernel, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueNativeKernel)( +typedef cl_int(CL_API_CALL *cl_api_clEnqueueNativeKernel)( cl_command_queue command_queue, void(CL_CALLBACK *user_func)(void *), void *args, size_t cb_args, cl_uint num_mem_objects, const cl_mem *mem_list, const void **args_mem_loc, cl_uint num_events_in_wait_list, @@ -625,17 +625,17 @@ typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueNativeKernel)( #ifdef CL_VERSION_1_2 -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueMarkerWithWaitList)( +typedef cl_int(CL_API_CALL *cl_api_clEnqueueMarkerWithWaitList)( cl_command_queue command_queue, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event) CL_API_SUFFIX__VERSION_1_2; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueBarrierWithWaitList)( +typedef cl_int(CL_API_CALL *cl_api_clEnqueueBarrierWithWaitList)( cl_command_queue command_queue, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event) CL_API_SUFFIX__VERSION_1_2; -typedef CL_API_ENTRY void *( +typedef void *( CL_API_CALL *cl_api_clGetExtensionFunctionAddressForPlatform)( cl_platform_id platform, const char *function_name)CL_API_SUFFIX__VERSION_1_2; @@ -652,7 +652,7 @@ typedef void *cl_api_clGetExtensionFunctionAddressForPlatform; #ifdef CL_VERSION_2_0 -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueSVMFree)( +typedef cl_int(CL_API_CALL *cl_api_clEnqueueSVMFree)( cl_command_queue /* command_queue */, cl_uint /* num_svm_pointers */, void ** /* svm_pointers */, void(CL_CALLBACK *pfn_free_func)(cl_command_queue /* queue */, @@ -663,28 +663,28 @@ typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueSVMFree)( const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_2_0; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueSVMMemcpy)( +typedef cl_int(CL_API_CALL *cl_api_clEnqueueSVMMemcpy)( cl_command_queue /* command_queue */, cl_bool /* blocking_copy */, void * /* dst_ptr */, const void * /* src_ptr */, size_t /* size */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_2_0; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueSVMMemFill)( +typedef cl_int(CL_API_CALL *cl_api_clEnqueueSVMMemFill)( cl_command_queue /* command_queue */, void * /* svm_ptr */, const void * /* pattern */, size_t /* pattern_size */, size_t /* size */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_2_0; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueSVMMap)( +typedef cl_int(CL_API_CALL *cl_api_clEnqueueSVMMap)( cl_command_queue /* command_queue */, cl_bool /* blocking_map */, cl_map_flags /* map_flags */, void * /* svm_ptr */, size_t /* size */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, cl_event * /* event */) CL_API_SUFFIX__VERSION_2_0; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueSVMUnmap)( +typedef cl_int(CL_API_CALL *cl_api_clEnqueueSVMUnmap)( cl_command_queue /* command_queue */, void * /* svm_ptr */, cl_uint /* num_events_in_wait_list */, const cl_event * /* event_wait_list */, @@ -701,119 +701,119 @@ typedef void *cl_api_clEnqueueSVMUnmap; #endif // Deprecated APIs -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clSetCommandQueueProperty)( +typedef cl_int(CL_API_CALL *cl_api_clSetCommandQueueProperty)( cl_command_queue command_queue, cl_command_queue_properties properties, cl_bool enable, cl_command_queue_properties *old_properties) - CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED; + CL_API_SUFFIX__VERSION_1_0_DEPRECATED; -typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateImage2D)( +typedef cl_mem(CL_API_CALL *cl_api_clCreateImage2D)( cl_context context, cl_mem_flags flags, const cl_image_format *image_format, size_t image_width, size_t image_height, size_t image_row_pitch, - void *host_ptr, cl_int *errcode_ret) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + void *host_ptr, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_1_DEPRECATED; -typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateImage3D)( +typedef cl_mem(CL_API_CALL *cl_api_clCreateImage3D)( cl_context context, cl_mem_flags flags, const cl_image_format *image_format, size_t image_width, size_t image_height, size_t image_depth, size_t image_row_pitch, size_t image_slice_pitch, void *host_ptr, - cl_int *errcode_ret) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_1_DEPRECATED; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clUnloadCompiler)(void) - CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; +typedef cl_int(CL_API_CALL *cl_api_clUnloadCompiler)(void) + CL_API_SUFFIX__VERSION_1_1_DEPRECATED; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueMarker)( +typedef cl_int(CL_API_CALL *cl_api_clEnqueueMarker)( cl_command_queue command_queue, - cl_event *event) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + cl_event *event) CL_API_SUFFIX__VERSION_1_1_DEPRECATED; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueWaitForEvents)( +typedef cl_int(CL_API_CALL *cl_api_clEnqueueWaitForEvents)( cl_command_queue command_queue, cl_uint num_events, - const cl_event *event_list) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; + const cl_event *event_list) CL_API_SUFFIX__VERSION_1_1_DEPRECATED; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueBarrier)( - cl_command_queue command_queue) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; +typedef cl_int(CL_API_CALL *cl_api_clEnqueueBarrier)( + cl_command_queue command_queue) CL_API_SUFFIX__VERSION_1_1_DEPRECATED; -typedef CL_API_ENTRY void *(CL_API_CALL *cl_api_clGetExtensionFunctionAddress)( - const char *function_name)CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; +typedef void *(CL_API_CALL *cl_api_clGetExtensionFunctionAddress)( + const char *function_name)CL_API_SUFFIX__VERSION_1_1_DEPRECATED; // GL and other APIs -typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromGLBuffer)( +typedef cl_mem(CL_API_CALL *cl_api_clCreateFromGLBuffer)( cl_context context, cl_mem_flags flags, cl_GLuint bufobj, int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromGLTexture)( +typedef cl_mem(CL_API_CALL *cl_api_clCreateFromGLTexture)( cl_context context, cl_mem_flags flags, cl_GLenum target, cl_GLint miplevel, cl_GLuint texture, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_2; -typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromGLTexture2D)( +typedef cl_mem(CL_API_CALL *cl_api_clCreateFromGLTexture2D)( cl_context context, cl_mem_flags flags, cl_GLenum target, cl_GLint miplevel, cl_GLuint texture, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromGLTexture3D)( +typedef cl_mem(CL_API_CALL *cl_api_clCreateFromGLTexture3D)( cl_context context, cl_mem_flags flags, cl_GLenum target, cl_GLint miplevel, cl_GLuint texture, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromGLRenderbuffer)( +typedef cl_mem(CL_API_CALL *cl_api_clCreateFromGLRenderbuffer)( cl_context context, cl_mem_flags flags, cl_GLuint renderbuffer, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetGLObjectInfo)( +typedef cl_int(CL_API_CALL *cl_api_clGetGLObjectInfo)( cl_mem memobj, cl_gl_object_type *gl_object_type, cl_GLuint *gl_object_name) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetGLTextureInfo)( +typedef cl_int(CL_API_CALL *cl_api_clGetGLTextureInfo)( cl_mem memobj, cl_gl_texture_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueAcquireGLObjects)( +typedef cl_int(CL_API_CALL *cl_api_clEnqueueAcquireGLObjects)( cl_command_queue command_queue, cl_uint num_objects, const cl_mem *mem_objects, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueReleaseGLObjects)( +typedef cl_int(CL_API_CALL *cl_api_clEnqueueReleaseGLObjects)( cl_command_queue command_queue, cl_uint num_objects, const cl_mem *mem_objects, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event) CL_API_SUFFIX__VERSION_1_0; /* cl_khr_gl_sharing */ -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetGLContextInfoKHR)( +typedef cl_int(CL_API_CALL *cl_api_clGetGLContextInfoKHR)( const cl_context_properties *properties, cl_gl_context_info param_name, size_t param_value_size, void *param_value, size_t *param_value_size_ret); /* cl_khr_gl_event */ -typedef CL_API_ENTRY cl_event(CL_API_CALL *cl_api_clCreateEventFromGLsyncKHR)( +typedef cl_event(CL_API_CALL *cl_api_clCreateEventFromGLsyncKHR)( cl_context context, cl_GLsync sync, cl_int *errcode_ret); #if defined(_WIN32) /* cl_khr_d3d10_sharing */ -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetDeviceIDsFromD3D10KHR)( +typedef cl_int(CL_API_CALL *cl_api_clGetDeviceIDsFromD3D10KHR)( cl_platform_id platform, cl_d3d10_device_source_khr d3d_device_source, void *d3d_object, cl_d3d10_device_set_khr d3d_device_set, cl_uint num_entries, cl_device_id *devices, cl_uint *num_devices) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromD3D10BufferKHR)( +typedef cl_mem(CL_API_CALL *cl_api_clCreateFromD3D10BufferKHR)( cl_context context, cl_mem_flags flags, ID3D10Buffer *resource, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromD3D10Texture2DKHR)( +typedef cl_mem(CL_API_CALL *cl_api_clCreateFromD3D10Texture2DKHR)( cl_context context, cl_mem_flags flags, ID3D10Texture2D *resource, UINT subresource, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromD3D10Texture3DKHR)( +typedef cl_mem(CL_API_CALL *cl_api_clCreateFromD3D10Texture3DKHR)( cl_context context, cl_mem_flags flags, ID3D10Texture3D *resource, UINT subresource, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY +typedef cl_int(CL_API_CALL *cl_api_clEnqueueAcquireD3D10ObjectsKHR)( cl_command_queue command_queue, cl_uint num_objects, const cl_mem *mem_objects, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY +typedef cl_int(CL_API_CALL *cl_api_clEnqueueReleaseD3D10ObjectsKHR)( cl_command_queue command_queue, cl_uint num_objects, const cl_mem *mem_objects, cl_uint num_events_in_wait_list, @@ -848,32 +848,32 @@ extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueReleaseD3D10ObjectsKHR( const cl_event *event_wait_list, cl_event *event); /* cl_khr_d3d11_sharing */ -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetDeviceIDsFromD3D11KHR)( +typedef cl_int(CL_API_CALL *cl_api_clGetDeviceIDsFromD3D11KHR)( cl_platform_id platform, cl_d3d11_device_source_khr d3d_device_source, void *d3d_object, cl_d3d11_device_set_khr d3d_device_set, cl_uint num_entries, cl_device_id *devices, cl_uint *num_devices) CL_API_SUFFIX__VERSION_1_2; -typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromD3D11BufferKHR)( +typedef cl_mem(CL_API_CALL *cl_api_clCreateFromD3D11BufferKHR)( cl_context context, cl_mem_flags flags, ID3D11Buffer *resource, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_2; -typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromD3D11Texture2DKHR)( +typedef cl_mem(CL_API_CALL *cl_api_clCreateFromD3D11Texture2DKHR)( cl_context context, cl_mem_flags flags, ID3D11Texture2D *resource, UINT subresource, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_2; -typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromD3D11Texture3DKHR)( +typedef cl_mem(CL_API_CALL *cl_api_clCreateFromD3D11Texture3DKHR)( cl_context context, cl_mem_flags flags, ID3D11Texture3D *resource, UINT subresource, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_2; -typedef CL_API_ENTRY +typedef cl_int(CL_API_CALL *cl_api_clEnqueueAcquireD3D11ObjectsKHR)( cl_command_queue command_queue, cl_uint num_objects, const cl_mem *mem_objects, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event) CL_API_SUFFIX__VERSION_1_2; -typedef CL_API_ENTRY +typedef cl_int(CL_API_CALL *cl_api_clEnqueueReleaseD3D11ObjectsKHR)( cl_command_queue command_queue, cl_uint num_objects, const cl_mem *mem_objects, cl_uint num_events_in_wait_list, @@ -881,26 +881,26 @@ cl_int(CL_API_CALL *cl_api_clEnqueueReleaseD3D11ObjectsKHR)( cl_event *event) CL_API_SUFFIX__VERSION_1_2; /* cl_khr_dx9_media_sharing */ -typedef CL_API_ENTRY +typedef cl_int(CL_API_CALL *cl_api_clGetDeviceIDsFromDX9MediaAdapterKHR)( cl_platform_id platform, cl_uint num_media_adapters, cl_dx9_media_adapter_type_khr *media_adapters_type, void *media_adapters, cl_dx9_media_adapter_set_khr media_adapter_set, cl_uint num_entries, cl_device_id *devices, cl_uint *num_devices) CL_API_SUFFIX__VERSION_1_2; -typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromDX9MediaSurfaceKHR)( +typedef cl_mem(CL_API_CALL *cl_api_clCreateFromDX9MediaSurfaceKHR)( cl_context context, cl_mem_flags flags, cl_dx9_media_adapter_type_khr adapter_type, void *surface_info, cl_uint plane, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_2; -typedef CL_API_ENTRY +typedef cl_int(CL_API_CALL *cl_api_clEnqueueAcquireDX9MediaSurfacesKHR)( cl_command_queue command_queue, cl_uint num_objects, const cl_mem *mem_objects, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event) CL_API_SUFFIX__VERSION_1_2; -typedef CL_API_ENTRY +typedef cl_int(CL_API_CALL *cl_api_clEnqueueReleaseDX9MediaSurfacesKHR)( cl_command_queue command_queue, cl_uint num_objects, const cl_mem *mem_objects, cl_uint num_events_in_wait_list, @@ -987,29 +987,29 @@ typedef void *cl_api_clGetDeviceIDsFromDX9MediaAdapterKHR; #ifdef CL_VERSION_1_1 -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clSetEventCallback)( +typedef cl_int(CL_API_CALL *cl_api_clSetEventCallback)( cl_event /* event */, cl_int /* command_exec_callback_type */, void(CL_CALLBACK * /* pfn_notify */)(cl_event, cl_int, void *), void * /* user_data */) CL_API_SUFFIX__VERSION_1_1; -typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateSubBuffer)( +typedef cl_mem(CL_API_CALL *cl_api_clCreateSubBuffer)( cl_mem /* buffer */, cl_mem_flags /* flags */, cl_buffer_create_type /* buffer_create_type */, const void * /* buffer_create_info */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_1; -typedef CL_API_ENTRY +typedef cl_int(CL_API_CALL *cl_api_clSetMemObjectDestructorCallback)( cl_mem /* memobj */, void(CL_CALLBACK * /*pfn_notify*/)(cl_mem /* memobj */, void * /*user_data*/), void * /*user_data */) CL_API_SUFFIX__VERSION_1_1; -typedef CL_API_ENTRY cl_event(CL_API_CALL *cl_api_clCreateUserEvent)( +typedef cl_event(CL_API_CALL *cl_api_clCreateUserEvent)( cl_context /* context */, cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_1; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clSetUserEventStatus)( +typedef cl_int(CL_API_CALL *cl_api_clSetUserEventStatus)( cl_event /* event */, cl_int /* execution_status */) CL_API_SUFFIX__VERSION_1_1; @@ -1023,68 +1023,68 @@ typedef void *cl_api_clSetUserEventStatus; #endif -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clCreateSubDevicesEXT)( +typedef cl_int(CL_API_CALL *cl_api_clCreateSubDevicesEXT)( cl_device_id in_device, const cl_device_partition_property_ext *partition_properties, cl_uint num_entries, cl_device_id *out_devices, cl_uint *num_devices); -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clRetainDeviceEXT)( +typedef cl_int(CL_API_CALL *cl_api_clRetainDeviceEXT)( cl_device_id device) CL_API_SUFFIX__VERSION_1_0; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clReleaseDeviceEXT)( +typedef cl_int(CL_API_CALL *cl_api_clReleaseDeviceEXT)( cl_device_id device) CL_API_SUFFIX__VERSION_1_0; /* cl_khr_egl_image */ -typedef CL_API_ENTRY cl_mem(CL_API_CALL *cl_api_clCreateFromEGLImageKHR)( +typedef cl_mem(CL_API_CALL *cl_api_clCreateFromEGLImageKHR)( cl_context context, CLeglDisplayKHR display, CLeglImageKHR image, cl_mem_flags flags, const cl_egl_image_properties_khr *properties, cl_int *errcode_ret); -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueAcquireEGLObjectsKHR)( +typedef cl_int(CL_API_CALL *cl_api_clEnqueueAcquireEGLObjectsKHR)( cl_command_queue command_queue, cl_uint num_objects, const cl_mem *mem_objects, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event); -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueReleaseEGLObjectsKHR)( +typedef cl_int(CL_API_CALL *cl_api_clEnqueueReleaseEGLObjectsKHR)( cl_command_queue command_queue, cl_uint num_objects, const cl_mem *mem_objects, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event); /* cl_khr_egl_event */ -typedef CL_API_ENTRY cl_event(CL_API_CALL *cl_api_clCreateEventFromEGLSyncKHR)( +typedef cl_event(CL_API_CALL *cl_api_clCreateEventFromEGLSyncKHR)( cl_context context, CLeglSyncKHR sync, CLeglDisplayKHR display, cl_int *errcode_ret); #ifdef CL_VERSION_2_1 -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clSetDefaultDeviceCommandQueue)( +typedef cl_int(CL_API_CALL *cl_api_clSetDefaultDeviceCommandQueue)( cl_context context, cl_device_id device, cl_command_queue command_queue) CL_API_SUFFIX__VERSION_2_1; -typedef CL_API_ENTRY cl_program(CL_API_CALL *cl_api_clCreateProgramWithIL)( +typedef cl_program(CL_API_CALL *cl_api_clCreateProgramWithIL)( cl_context context, const void *il, size_t length, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_2_1; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetKernelSubGroupInfo)( +typedef cl_int(CL_API_CALL *cl_api_clGetKernelSubGroupInfo)( cl_kernel kernel, cl_device_id device, cl_kernel_sub_group_info param_name, size_t input_value_size, const void *input_value, size_t param_value_size, void *param_value, size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_2_1; -typedef CL_API_ENTRY cl_kernel(CL_API_CALL *cl_api_clCloneKernel)( +typedef cl_kernel(CL_API_CALL *cl_api_clCloneKernel)( cl_kernel source_kernel, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_2_1; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clEnqueueSVMMigrateMem)( +typedef cl_int(CL_API_CALL *cl_api_clEnqueueSVMMigrateMem)( cl_command_queue command_queue, cl_uint num_svm_pointers, const void **svm_pointers, const size_t *sizes, cl_mem_migration_flags flags, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event) CL_API_SUFFIX__VERSION_2_1; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetDeviceAndHostTimer)( +typedef cl_int(CL_API_CALL *cl_api_clGetDeviceAndHostTimer)( cl_device_id device, cl_ulong *device_timestamp, cl_ulong *host_timestamp) CL_API_SUFFIX__VERSION_2_1; -typedef CL_API_ENTRY cl_int(CL_API_CALL *cl_api_clGetHostTimer)( +typedef cl_int(CL_API_CALL *cl_api_clGetHostTimer)( cl_device_id device, cl_ulong *host_timestamp) CL_API_SUFFIX__VERSION_2_1; #else @@ -1099,7 +1099,7 @@ typedef void *cl_api_clGetHostTimer; #endif -/* Vendor dispatch table struture */ +/* Vendor dispatch table structure */ typedef struct _cl_icd_dispatch { /* OpenCL 1.0 */ diff --git a/include/CL/cl_layer.h b/include/CL/cl_layer.h new file mode 100644 index 000000000..59dae7506 --- /dev/null +++ b/include/CL/cl_layer.h @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2020 The Khronos Group Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * OpenCL is a trademark of Apple Inc. used under license by Khronos. + */ + +#ifndef OPENCL_CL_LAYER_H +#define OPENCL_CL_LAYER_H + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +typedef cl_uint cl_layer_info; +typedef cl_uint cl_layer_api_version; +#define CL_LAYER_API_VERSION 0x4240 +#define CL_LAYER_API_VERSION_100 100 + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetLayerInfo(cl_layer_info param_name, + size_t param_value_size, + void *param_value, + size_t *param_value_size_ret); + +typedef cl_int +(CL_API_CALL *pfn_clGetLayerInfo)(cl_layer_info param_name, + size_t param_value_size, + void *param_value, + size_t *param_value_size_ret); + +extern CL_API_ENTRY cl_int CL_API_CALL +clInitLayer(cl_uint num_entries, + const cl_icd_dispatch *target_dispatch, + cl_uint *num_entries_ret, + const cl_icd_dispatch **layer_dispatch_ret); + +typedef cl_int +(CL_API_CALL *pfn_clInitLayer)(cl_uint num_entries, + const cl_icd_dispatch *target_dispatch, + cl_uint *num_entries_ret, + const cl_icd_dispatch **layer_dispatch_ret); + +#ifdef __cplusplus +} +#endif + +#endif /* OPENCL_CL_LAYER_H */ diff --git a/include/CL/cl_platform.h b/include/CL/cl_platform.h index 2d69cc4e5..e7a0d6f47 100644 --- a/include/CL/cl_platform.h +++ b/include/CL/cl_platform.h @@ -24,13 +24,25 @@ extern "C" { #endif #if defined(_WIN32) - #define CL_API_ENTRY - #define CL_API_CALL __stdcall - #define CL_CALLBACK __stdcall + #if !defined(CL_API_ENTRY) + #define CL_API_ENTRY + #endif + #if !defined(CL_API_CALL) + #define CL_API_CALL __stdcall + #endif + #if !defined(CL_CALLBACK) + #define CL_CALLBACK __stdcall + #endif #else - #define CL_API_ENTRY - #define CL_API_CALL - #define CL_CALLBACK + #if !defined(CL_API_ENTRY) + #define CL_API_ENTRY + #endif + #if !defined(CL_API_CALL) + #define CL_API_CALL + #endif + #if !defined(CL_CALLBACK) + #define CL_CALLBACK + #endif #endif /* @@ -41,86 +53,99 @@ extern "C" { * deprecation but is deprecated in versions later than 1.1. */ -#define CL_EXTENSION_WEAK_LINK -#define CL_API_SUFFIX__VERSION_1_0 -#define CL_EXT_SUFFIX__VERSION_1_0 -#define CL_API_SUFFIX__VERSION_1_1 -#define CL_EXT_SUFFIX__VERSION_1_1 -#define CL_API_SUFFIX__VERSION_1_2 -#define CL_EXT_SUFFIX__VERSION_1_2 -#define CL_API_SUFFIX__VERSION_2_0 -#define CL_EXT_SUFFIX__VERSION_2_0 -#define CL_API_SUFFIX__VERSION_2_1 -#define CL_EXT_SUFFIX__VERSION_2_1 -#define CL_API_SUFFIX__VERSION_2_2 -#define CL_EXT_SUFFIX__VERSION_2_2 -#define CL_API_SUFFIX__VERSION_3_0 -#define CL_EXT_SUFFIX__VERSION_3_0 -#define CL_API_SUFFIX__EXPERIMENTAL -#define CL_EXT_SUFFIX__EXPERIMENTAL +#ifndef CL_API_SUFFIX_USER +#define CL_API_SUFFIX_USER +#endif + +#ifndef CL_API_PREFIX_USER +#define CL_API_PREFIX_USER +#endif + +#define CL_API_SUFFIX_COMMON CL_API_SUFFIX_USER +#define CL_API_PREFIX_COMMON CL_API_PREFIX_USER + +#define CL_API_SUFFIX__VERSION_1_0 CL_API_SUFFIX_COMMON +#define CL_API_SUFFIX__VERSION_1_1 CL_API_SUFFIX_COMMON +#define CL_API_SUFFIX__VERSION_1_2 CL_API_SUFFIX_COMMON +#define CL_API_SUFFIX__VERSION_2_0 CL_API_SUFFIX_COMMON +#define CL_API_SUFFIX__VERSION_2_1 CL_API_SUFFIX_COMMON +#define CL_API_SUFFIX__VERSION_2_2 CL_API_SUFFIX_COMMON +#define CL_API_SUFFIX__VERSION_3_0 CL_API_SUFFIX_COMMON +#define CL_API_SUFFIX__EXPERIMENTAL CL_API_SUFFIX_COMMON #ifdef __GNUC__ - #define CL_EXT_SUFFIX_DEPRECATED __attribute__((deprecated)) - #define CL_EXT_PREFIX_DEPRECATED + #define CL_API_SUFFIX_DEPRECATED __attribute__((deprecated)) + #define CL_API_PREFIX_DEPRECATED #elif defined(_WIN32) - #define CL_EXT_SUFFIX_DEPRECATED - #define CL_EXT_PREFIX_DEPRECATED __declspec(deprecated) + #define CL_API_SUFFIX_DEPRECATED + #define CL_API_PREFIX_DEPRECATED __declspec(deprecated) #else - #define CL_EXT_SUFFIX_DEPRECATED - #define CL_EXT_PREFIX_DEPRECATED + #define CL_API_SUFFIX_DEPRECATED + #define CL_API_PREFIX_DEPRECATED #endif #ifdef CL_USE_DEPRECATED_OPENCL_1_0_APIS - #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED - #define CL_EXT_PREFIX__VERSION_1_0_DEPRECATED + #define CL_API_SUFFIX__VERSION_1_0_DEPRECATED CL_API_SUFFIX_COMMON + #define CL_API_PREFIX__VERSION_1_0_DEPRECATED CL_API_PREFIX_COMMON #else - #define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED CL_EXT_SUFFIX_DEPRECATED - #define CL_EXT_PREFIX__VERSION_1_0_DEPRECATED CL_EXT_PREFIX_DEPRECATED + #define CL_API_SUFFIX__VERSION_1_0_DEPRECATED CL_API_SUFFIX_COMMON CL_API_SUFFIX_DEPRECATED + #define CL_API_PREFIX__VERSION_1_0_DEPRECATED CL_API_PREFIX_COMMON CL_API_PREFIX_DEPRECATED #endif #ifdef CL_USE_DEPRECATED_OPENCL_1_1_APIS - #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED - #define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED + #define CL_API_SUFFIX__VERSION_1_1_DEPRECATED CL_API_SUFFIX_COMMON + #define CL_API_PREFIX__VERSION_1_1_DEPRECATED CL_API_PREFIX_COMMON #else - #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED CL_EXT_SUFFIX_DEPRECATED - #define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED CL_EXT_PREFIX_DEPRECATED + #define CL_API_SUFFIX__VERSION_1_1_DEPRECATED CL_API_SUFFIX_COMMON CL_API_SUFFIX_DEPRECATED + #define CL_API_PREFIX__VERSION_1_1_DEPRECATED CL_API_PREFIX_COMMON CL_API_PREFIX_DEPRECATED #endif #ifdef CL_USE_DEPRECATED_OPENCL_1_2_APIS - #define CL_EXT_SUFFIX__VERSION_1_2_DEPRECATED - #define CL_EXT_PREFIX__VERSION_1_2_DEPRECATED + #define CL_API_SUFFIX__VERSION_1_2_DEPRECATED CL_API_SUFFIX_COMMON + #define CL_API_PREFIX__VERSION_1_2_DEPRECATED CL_API_PREFIX_COMMON #else - #define CL_EXT_SUFFIX__VERSION_1_2_DEPRECATED CL_EXT_SUFFIX_DEPRECATED - #define CL_EXT_PREFIX__VERSION_1_2_DEPRECATED CL_EXT_PREFIX_DEPRECATED + #define CL_API_SUFFIX__VERSION_1_2_DEPRECATED CL_API_SUFFIX_COMMON CL_API_SUFFIX_DEPRECATED + #define CL_API_PREFIX__VERSION_1_2_DEPRECATED CL_API_PREFIX_COMMON CL_API_PREFIX_DEPRECATED #endif #ifdef CL_USE_DEPRECATED_OPENCL_2_0_APIS - #define CL_EXT_SUFFIX__VERSION_2_0_DEPRECATED - #define CL_EXT_PREFIX__VERSION_2_0_DEPRECATED + #define CL_API_SUFFIX__VERSION_2_0_DEPRECATED CL_API_SUFFIX_COMMON + #define CL_API_PREFIX__VERSION_2_0_DEPRECATED CL_API_PREFIX_COMMON #else - #define CL_EXT_SUFFIX__VERSION_2_0_DEPRECATED CL_EXT_SUFFIX_DEPRECATED - #define CL_EXT_PREFIX__VERSION_2_0_DEPRECATED CL_EXT_PREFIX_DEPRECATED + #define CL_API_SUFFIX__VERSION_2_0_DEPRECATED CL_API_SUFFIX_COMMON CL_API_SUFFIX_DEPRECATED + #define CL_API_PREFIX__VERSION_2_0_DEPRECATED CL_API_PREFIX_COMMON CL_API_PREFIX_DEPRECATED #endif #ifdef CL_USE_DEPRECATED_OPENCL_2_1_APIS - #define CL_EXT_SUFFIX__VERSION_2_1_DEPRECATED - #define CL_EXT_PREFIX__VERSION_2_1_DEPRECATED + #define CL_API_SUFFIX__VERSION_2_1_DEPRECATED CL_API_SUFFIX_COMMON + #define CL_API_PREFIX__VERSION_2_1_DEPRECATED CL_API_PREFIX_COMMON #else - #define CL_EXT_SUFFIX__VERSION_2_1_DEPRECATED CL_EXT_SUFFIX_DEPRECATED - #define CL_EXT_PREFIX__VERSION_2_1_DEPRECATED CL_EXT_PREFIX_DEPRECATED + #define CL_API_SUFFIX__VERSION_2_1_DEPRECATED CL_API_SUFFIX_COMMON CL_API_SUFFIX_DEPRECATED + #define CL_API_PREFIX__VERSION_2_1_DEPRECATED CL_API_PREFIX_COMMON CL_API_PREFIX_DEPRECATED #endif #ifdef CL_USE_DEPRECATED_OPENCL_2_2_APIS - #define CL_EXT_SUFFIX__VERSION_2_2_DEPRECATED - #define CL_EXT_PREFIX__VERSION_2_2_DEPRECATED + #define CL_API_SUFFIX__VERSION_2_2_DEPRECATED CL_API_SUFFIX_COMMON + #define CL_API_PREFIX__VERSION_2_2_DEPRECATED CL_API_PREFIX_COMMON #else - #define CL_EXT_SUFFIX__VERSION_2_2_DEPRECATED CL_EXT_SUFFIX_DEPRECATED - #define CL_EXT_PREFIX__VERSION_2_2_DEPRECATED CL_EXT_PREFIX_DEPRECATED + #define CL_API_SUFFIX__VERSION_2_2_DEPRECATED CL_API_SUFFIX_COMMON CL_API_SUFFIX_DEPRECATED + #define CL_API_PREFIX__VERSION_2_2_DEPRECATED CL_API_PREFIX_COMMON CL_API_PREFIX_DEPRECATED #endif #if (defined (_WIN32) && defined(_MSC_VER)) +#if defined(__clang__) +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wlanguage-extension-token" +#endif + +/* intptr_t is used in cl.h and provided by stddef.h in Visual C++, but not in clang */ +/* stdint.h was missing before Visual Studio 2010, include it for later versions and for clang */ +#if defined(__clang__) || _MSC_VER >= 1600 + #include +#endif + /* scalar types */ typedef signed __int8 cl_char; typedef unsigned __int8 cl_uchar; @@ -135,6 +160,10 @@ typedef unsigned __int16 cl_half; typedef float cl_float; typedef double cl_double; +#if defined(__clang__) +#pragma clang diagnostic pop +#endif + /* Macro names and corresponding values defined by OpenCL */ #define CL_CHAR_BIT 8 #define CL_SCHAR_MAX 127 @@ -481,25 +510,26 @@ typedef unsigned int cl_GLenum; #if !defined(__cplusplus) && defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #define __CL_HAS_ANON_STRUCT__ 1 #define __CL_ANON_STRUCT__ -#elif defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) +#elif defined(_WIN32) && defined(_MSC_VER) && !defined(__STDC__) +#define __CL_HAS_ANON_STRUCT__ 1 +#define __CL_ANON_STRUCT__ +#elif defined(__GNUC__) && ! defined(__STRICT_ANSI__) +#define __CL_HAS_ANON_STRUCT__ 1 +#define __CL_ANON_STRUCT__ __extension__ +#elif defined(__clang__) #define __CL_HAS_ANON_STRUCT__ 1 #define __CL_ANON_STRUCT__ __extension__ -#elif defined( _WIN32) && defined(_MSC_VER) && ! defined(__STDC__) - #if _MSC_VER >= 1500 - /* Microsoft Developer Studio 2008 supports anonymous structs, but - * complains by default. */ - #define __CL_HAS_ANON_STRUCT__ 1 - #define __CL_ANON_STRUCT__ - /* Disable warning C4201: nonstandard extension used : nameless - * struct/union */ - #pragma warning( push ) - #pragma warning( disable : 4201 ) - #endif #else #define __CL_HAS_ANON_STRUCT__ 0 #define __CL_ANON_STRUCT__ #endif +#if defined(_WIN32) && defined(_MSC_VER) && __CL_HAS_ANON_STRUCT__ + /* Disable warning C4201: nonstandard extension used : nameless struct/union */ + #pragma warning( push ) + #pragma warning( disable : 4201 ) +#endif + /* Define alignment keys */ #if defined( __GNUC__ ) || defined(__INTEGRITY) #define CL_ALIGNED(_x) __attribute__ ((aligned(_x))) @@ -1375,10 +1405,8 @@ typedef union } #endif -#if defined( _WIN32) && defined(_MSC_VER) && ! defined(__STDC__) - #if _MSC_VER >=1500 +#if defined(_WIN32) && defined(_MSC_VER) && __CL_HAS_ANON_STRUCT__ #pragma warning( pop ) - #endif #endif #endif /* __CL_PLATFORM_H */ diff --git a/include/CL/cl_va_api_media_sharing_intel.h b/include/CL/cl_va_api_media_sharing_intel.h index 0e7cd4d6f..547e90e88 100644 --- a/include/CL/cl_va_api_media_sharing_intel.h +++ b/include/CL/cl_va_api_media_sharing_intel.h @@ -13,30 +13,6 @@ * See the License for the specific language governing permissions and * limitations under the License. ******************************************************************************/ -/*****************************************************************************\ - -Copyright (c) 2013-2019 Intel Corporation All Rights Reserved. - -THESE MATERIALS ARE PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS -CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY -OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING -NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THESE -MATERIALS, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -File Name: cl_va_api_media_sharing_intel.h - -Abstract: - -Notes: - -\*****************************************************************************/ - #ifndef __OPENCL_CL_VA_API_MEDIA_SHARING_INTEL_H #define __OPENCL_CL_VA_API_MEDIA_SHARING_INTEL_H @@ -49,6 +25,33 @@ File Name: cl_va_api_media_sharing_intel.h extern "C" { #endif +/*************************************************************** +* cl_intel_sharing_format_query_va_api +***************************************************************/ +#define cl_intel_sharing_format_query_va_api 1 + +/* when cl_intel_va_api_media_sharing is supported */ + +extern CL_API_ENTRY cl_int CL_API_CALL +clGetSupportedVA_APIMediaSurfaceFormatsINTEL( + cl_context context, + cl_mem_flags flags, + cl_mem_object_type image_type, + cl_uint plane, + cl_uint num_entries, + VAImageFormat* va_api_formats, + cl_uint* num_surface_formats) ; + +typedef cl_int (CL_API_CALL * +clGetSupportedVA_APIMediaSurfaceFormatsINTEL_fn)( + cl_context context, + cl_mem_flags flags, + cl_mem_object_type image_type, + cl_uint plane, + cl_uint num_entries, + VAImageFormat* va_api_formats, + cl_uint* num_surface_formats) ; + /****************************************** * cl_intel_va_api_media_sharing extension * *******************************************/ @@ -92,16 +95,16 @@ clGetDeviceIDsFromVA_APIMediaAdapterINTEL( cl_va_api_device_set_intel media_adapter_set, cl_uint num_entries, cl_device_id* devices, - cl_uint* num_devices) CL_EXT_SUFFIX__VERSION_1_2; + cl_uint* num_devices) CL_API_SUFFIX__VERSION_1_2; -typedef CL_API_ENTRY cl_int (CL_API_CALL * clGetDeviceIDsFromVA_APIMediaAdapterINTEL_fn)( +typedef cl_int (CL_API_CALL * clGetDeviceIDsFromVA_APIMediaAdapterINTEL_fn)( cl_platform_id platform, cl_va_api_device_source_intel media_adapter_type, void* media_adapter, cl_va_api_device_set_intel media_adapter_set, cl_uint num_entries, cl_device_id* devices, - cl_uint* num_devices) CL_EXT_SUFFIX__VERSION_1_2; + cl_uint* num_devices) CL_API_SUFFIX__VERSION_1_2; extern CL_API_ENTRY cl_mem CL_API_CALL clCreateFromVA_APIMediaSurfaceINTEL( @@ -109,14 +112,14 @@ clCreateFromVA_APIMediaSurfaceINTEL( cl_mem_flags flags, VASurfaceID* surface, cl_uint plane, - cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_2; + cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_2; -typedef CL_API_ENTRY cl_mem (CL_API_CALL * clCreateFromVA_APIMediaSurfaceINTEL_fn)( +typedef cl_mem (CL_API_CALL * clCreateFromVA_APIMediaSurfaceINTEL_fn)( cl_context context, cl_mem_flags flags, VASurfaceID* surface, cl_uint plane, - cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_2; + cl_int* errcode_ret) CL_API_SUFFIX__VERSION_1_2; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueAcquireVA_APIMediaSurfacesINTEL( @@ -125,15 +128,15 @@ clEnqueueAcquireVA_APIMediaSurfacesINTEL( const cl_mem* mem_objects, cl_uint num_events_in_wait_list, const cl_event* event_wait_list, - cl_event* event) CL_EXT_SUFFIX__VERSION_1_2; + cl_event* event) CL_API_SUFFIX__VERSION_1_2; -typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireVA_APIMediaSurfacesINTEL_fn)( +typedef cl_int (CL_API_CALL *clEnqueueAcquireVA_APIMediaSurfacesINTEL_fn)( cl_command_queue command_queue, cl_uint num_objects, const cl_mem* mem_objects, cl_uint num_events_in_wait_list, const cl_event* event_wait_list, - cl_event* event) CL_EXT_SUFFIX__VERSION_1_2; + cl_event* event) CL_API_SUFFIX__VERSION_1_2; extern CL_API_ENTRY cl_int CL_API_CALL clEnqueueReleaseVA_APIMediaSurfacesINTEL( @@ -142,15 +145,15 @@ clEnqueueReleaseVA_APIMediaSurfacesINTEL( const cl_mem* mem_objects, cl_uint num_events_in_wait_list, const cl_event* event_wait_list, - cl_event* event) CL_EXT_SUFFIX__VERSION_1_2; + cl_event* event) CL_API_SUFFIX__VERSION_1_2; -typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseVA_APIMediaSurfacesINTEL_fn)( +typedef cl_int (CL_API_CALL *clEnqueueReleaseVA_APIMediaSurfacesINTEL_fn)( cl_command_queue command_queue, cl_uint num_objects, const cl_mem* mem_objects, cl_uint num_events_in_wait_list, const cl_event* event_wait_list, - cl_event* event) CL_EXT_SUFFIX__VERSION_1_2; + cl_event* event) CL_API_SUFFIX__VERSION_1_2; #ifdef __cplusplus } diff --git a/include/CL/opencl.hpp b/include/CL/opencl.hpp index 834fc104b..3a953c8c8 100644 --- a/include/CL/opencl.hpp +++ b/include/CL/opencl.hpp @@ -165,10 +165,6 @@ * the cl::allocate_pointer functions are not defined and may be * defined by the user before opencl.hpp is included. * - * - CL_HPP_ENABLE_DEVICE_FISSION - * - * Enables device fission for OpenCL 1.2 platforms. - * * - CL_HPP_ENABLE_EXCEPTIONS * * Enable exceptions for use in the C++ bindings header. This is the @@ -194,10 +190,22 @@ * applies to use of cl::Program construction and other program * build variants. * + * - CL_HPP_USE_CL_DEVICE_FISSION + * + * Enable the cl_ext_device_fission extension. + * + * - CL_HPP_USE_CL_IMAGE2D_FROM_BUFFER_KHR + * + * Enable the cl_khr_image2d_from_buffer extension. + * * - CL_HPP_USE_CL_SUB_GROUPS_KHR * * Enable the cl_khr_subgroups extension. * + * - CL_HPP_USE_DX_INTEROP + * + * Enable the cl_khr_d3d10_sharing extension. + * * - CL_HPP_USE_IL_KHR * * Enable the cl_khr_il_program extension. @@ -209,6 +217,10 @@ * bindings, including support for the optional exception feature and * also the supplied vector and string classes, see following sections for * decriptions of these features. + * + * Note: the C++ bindings use std::call_once and therefore may need to be + * compiled using special command-line options (such as "-pthread") on some + * platforms! * * \code #define CL_HPP_ENABLE_EXCEPTIONS @@ -224,28 +236,30 @@ int main(void) { - // Filter for a 2.0 platform and set it as the default + // Filter for a 2.0 or newer platform and set it as the default std::vector platforms; cl::Platform::get(&platforms); cl::Platform plat; for (auto &p : platforms) { std::string platver = p.getInfo(); - if (platver.find("OpenCL 2.") != std::string::npos) { + if (platver.find("OpenCL 2.") != std::string::npos || + platver.find("OpenCL 3.") != std::string::npos) { + // Note: an OpenCL 3.x platform may not support all required features! plat = p; } } - if (plat() == 0) { - std::cout << "No OpenCL 2.0 platform found."; + if (plat() == 0) { + std::cout << "No OpenCL 2.0 or newer platform found.\n"; return -1; } cl::Platform newP = cl::Platform::setDefault(plat); if (newP != plat) { - std::cout << "Error setting default platform."; + std::cout << "Error setting default platform.\n"; return -1; } - // Use C++11 raw string literals for kernel source code + // C++11 raw string literal for the first kernel std::string kernel1{R"CLC( global int globalA; kernel void updateGlobal() @@ -253,6 +267,8 @@ globalA = 75; } )CLC"}; + + // Raw string literal for the second kernel std::string kernel2{R"CLC( typedef struct { global int *bar; } Foo; kernel void vectorAdd(global const Foo* aNum, global const int *inputA, global const int *inputB, @@ -279,8 +295,9 @@ } )CLC"}; - // New simpler string interface style - std::vector programStrings {kernel1, kernel2}; + std::vector programStrings; + programStrings.push_back(kernel1); + programStrings.push_back(kernel2); cl::Program vectorAddProgram(programStrings); try { @@ -319,10 +336,9 @@ std::vector>> inputA(numElements, 1, svmAlloc); cl::coarse_svm_vector inputB(numElements, 2, svmAlloc); - // ////////////// - // Traditional cl_mem allocations + std::vector output(numElements, 0xdeadbeef); cl::Buffer outputBuffer(begin(output), end(output), false); cl::Pipe aPipe(sizeof(cl_int), numElements / 2); @@ -346,14 +362,8 @@ // This one was not passed as a parameter vectorAddKernel.setSVMPointers(anSVMInt); - // Hand control of coarse allocations to runtime - cl::enqueueUnmapSVM(anSVMInt); - cl::enqueueUnmapSVM(fooPointer); - cl::unmapSVM(inputB); - cl::unmapSVM(output2); - - cl_int error; - vectorAddKernel( + cl_int error; + vectorAddKernel( cl::EnqueueArgs( cl::NDRange(numElements/2), cl::NDRange(numElements/2)), @@ -364,12 +374,10 @@ 3, aPipe, defaultDeviceQueue, - error + error ); cl::copy(outputBuffer, begin(output), end(output)); - // Grab the SVM output vector using a map - cl::mapSVM(output2); cl::Device d = cl::Device::getDefault(); @@ -548,19 +556,26 @@ // Define deprecated prefixes and suffixes to ensure compilation // in case they are not pre-defined -#if !defined(CL_EXT_PREFIX__VERSION_1_1_DEPRECATED) -#define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED -#endif // #if !defined(CL_EXT_PREFIX__VERSION_1_1_DEPRECATED) -#if !defined(CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED) -#define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED -#endif // #if !defined(CL_EXT_PREFIX__VERSION_1_1_DEPRECATED) - -#if !defined(CL_EXT_PREFIX__VERSION_1_2_DEPRECATED) -#define CL_EXT_PREFIX__VERSION_1_2_DEPRECATED -#endif // #if !defined(CL_EXT_PREFIX__VERSION_1_2_DEPRECATED) -#if !defined(CL_EXT_SUFFIX__VERSION_1_2_DEPRECATED) -#define CL_EXT_SUFFIX__VERSION_1_2_DEPRECATED -#endif // #if !defined(CL_EXT_PREFIX__VERSION_1_2_DEPRECATED) +#if !defined(CL_API_PREFIX__VERSION_1_1_DEPRECATED) +#define CL_API_PREFIX__VERSION_1_1_DEPRECATED +#endif // #if !defined(CL_API_PREFIX__VERSION_1_1_DEPRECATED) +#if !defined(CL_API_SUFFIX__VERSION_1_1_DEPRECATED) +#define CL_API_SUFFIX__VERSION_1_1_DEPRECATED +#endif // #if !defined(CL_API_SUFFIX__VERSION_1_1_DEPRECATED) + +#if !defined(CL_API_PREFIX__VERSION_1_2_DEPRECATED) +#define CL_API_PREFIX__VERSION_1_2_DEPRECATED +#endif // #if !defined(CL_API_PREFIX__VERSION_1_2_DEPRECATED) +#if !defined(CL_API_SUFFIX__VERSION_1_2_DEPRECATED) +#define CL_API_SUFFIX__VERSION_1_2_DEPRECATED +#endif // #if !defined(CL_API_SUFFIX__VERSION_1_2_DEPRECATED) + +#if !defined(CL_API_PREFIX__VERSION_2_2_DEPRECATED) +#define CL_API_PREFIX__VERSION_2_2_DEPRECATED +#endif // #if !defined(CL_API_PREFIX__VERSION_2_2_DEPRECATED) +#if !defined(CL_API_SUFFIX__VERSION_2_2_DEPRECATED) +#define CL_API_SUFFIX__VERSION_2_2_DEPRECATED +#endif // #if !defined(CL_API_SUFFIX__VERSION_2_2_DEPRECATED) #if !defined(CL_CALLBACK) #define CL_CALLBACK @@ -1317,14 +1332,20 @@ inline cl_int getInfoHelper(Func f, cl_uint name, T* param, int, typename T::cl_ F(cl_kernel_arg_info, CL_KERNEL_ARG_NAME, string) \ F(cl_kernel_arg_info, CL_KERNEL_ARG_TYPE_QUALIFIER, cl_kernel_arg_type_qualifier) \ \ + F(cl_kernel_work_group_info, CL_KERNEL_GLOBAL_WORK_SIZE, cl::detail::size_t_array) \ + \ + F(cl_device_info, CL_DEVICE_LINKER_AVAILABLE, cl_bool) \ + F(cl_device_info, CL_DEVICE_IMAGE_MAX_BUFFER_SIZE, size_type) \ + F(cl_device_info, CL_DEVICE_IMAGE_MAX_ARRAY_SIZE, size_type) \ F(cl_device_info, CL_DEVICE_PARENT_DEVICE, cl::Device) \ F(cl_device_info, CL_DEVICE_PARTITION_MAX_SUB_DEVICES, cl_uint) \ F(cl_device_info, CL_DEVICE_PARTITION_PROPERTIES, cl::vector) \ F(cl_device_info, CL_DEVICE_PARTITION_TYPE, cl::vector) \ F(cl_device_info, CL_DEVICE_REFERENCE_COUNT, cl_uint) \ - F(cl_device_info, CL_DEVICE_PREFERRED_INTEROP_USER_SYNC, size_type) \ + F(cl_device_info, CL_DEVICE_PREFERRED_INTEROP_USER_SYNC, cl_bool) \ F(cl_device_info, CL_DEVICE_PARTITION_AFFINITY_DOMAIN, cl_device_affinity_domain) \ F(cl_device_info, CL_DEVICE_BUILT_IN_KERNELS, string) \ + F(cl_device_info, CL_DEVICE_PRINTF_BUFFER_SIZE, size_type) \ \ F(cl_image_info, CL_IMAGE_ARRAY_SIZE, size_type) \ F(cl_image_info, CL_IMAGE_NUM_MIP_LEVELS, cl_uint) \ @@ -1344,7 +1365,11 @@ inline cl_int getInfoHelper(Func f, cl_uint name, T* param, int, typename T::cl_ F(cl_device_info, CL_DEVICE_PREFERRED_PLATFORM_ATOMIC_ALIGNMENT, cl_uint) \ F(cl_device_info, CL_DEVICE_PREFERRED_GLOBAL_ATOMIC_ALIGNMENT, cl_uint) \ F(cl_device_info, CL_DEVICE_PREFERRED_LOCAL_ATOMIC_ALIGNMENT, cl_uint) \ - F(cl_device_info, CL_DEVICE_SUB_GROUP_SIZES_INTEL, cl::vector) \ + F(cl_device_info, CL_DEVICE_IMAGE_PITCH_ALIGNMENT, cl_uint) \ + F(cl_device_info, CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT, cl_uint) \ + F(cl_device_info, CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS, cl_uint ) \ + F(cl_device_info, CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE, size_type ) \ + F(cl_device_info, CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE, size_type ) \ F(cl_profiling_info, CL_PROFILING_COMMAND_COMPLETE, cl_ulong) \ F(cl_kernel_exec_info, CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM, cl_bool) \ F(cl_kernel_exec_info, CL_KERNEL_EXEC_INFO_SVM_PTRS, void**) \ @@ -1363,17 +1388,17 @@ inline cl_int getInfoHelper(Func f, cl_uint name, T* param, int, typename T::cl_ F(cl_program_info, CL_PROGRAM_IL_KHR, cl::vector) #define CL_HPP_PARAM_NAME_INFO_2_1_(F) \ - F(cl_platform_info, CL_PLATFORM_HOST_TIMER_RESOLUTION, size_type) \ + F(cl_platform_info, CL_PLATFORM_HOST_TIMER_RESOLUTION, cl_ulong) \ F(cl_program_info, CL_PROGRAM_IL, cl::vector) \ - F(cl_kernel_info, CL_KERNEL_MAX_NUM_SUB_GROUPS, size_type) \ - F(cl_kernel_info, CL_KERNEL_COMPILE_NUM_SUB_GROUPS, size_type) \ F(cl_device_info, CL_DEVICE_MAX_NUM_SUB_GROUPS, cl_uint) \ F(cl_device_info, CL_DEVICE_IL_VERSION, string) \ F(cl_device_info, CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS, cl_bool) \ F(cl_command_queue_info, CL_QUEUE_DEVICE_DEFAULT, cl::DeviceCommandQueue) \ F(cl_kernel_sub_group_info, CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE, size_type) \ F(cl_kernel_sub_group_info, CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE, size_type) \ - F(cl_kernel_sub_group_info, CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT, cl::detail::size_t_array) + F(cl_kernel_sub_group_info, CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT, cl::detail::size_t_array) \ + F(cl_kernel_sub_group_info, CL_KERNEL_MAX_NUM_SUB_GROUPS, size_type) \ + F(cl_kernel_sub_group_info, CL_KERNEL_COMPILE_NUM_SUB_GROUPS, size_type) #define CL_HPP_PARAM_NAME_INFO_2_2_(F) \ F(cl_program_info, CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT, cl_bool) \ @@ -1459,7 +1484,7 @@ CL_HPP_PARAM_NAME_INFO_3_0_(CL_HPP_DECLARE_PARAM_TRAITS_) CL_HPP_PARAM_NAME_INFO_SUBGROUP_KHR_(CL_HPP_DECLARE_PARAM_TRAITS_) #endif // #if defined(CL_HPP_USE_CL_SUB_GROUPS_KHR) && CL_HPP_TARGET_OPENCL_VERSION < 210 -#if defined(CL_HPP_USE_IL_KHR) +#if defined(CL_HPP_USE_IL_KHR) && CL_HPP_TARGET_OPENCL_VERSION < 210 CL_HPP_PARAM_NAME_INFO_IL_KHR_(CL_HPP_DECLARE_PARAM_TRAITS_) #endif // #if defined(CL_HPP_USE_IL_KHR) @@ -1497,6 +1522,28 @@ CL_HPP_PARAM_NAME_CL_KHR_EXTENDED_VERSIONING_CL3_SHARED_(CL_HPP_DECLARE_PARAM_TR CL_HPP_PARAM_NAME_CL_KHR_EXTENDED_VERSIONING_KHRONLY_(CL_HPP_DECLARE_PARAM_TRAITS_) #endif // cl_khr_extended_versioning +#if defined(cl_khr_device_uuid) +using uuid_array = array; +using luid_array = array; +CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_UUID_KHR, uuid_array) +CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DRIVER_UUID_KHR, uuid_array) +CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_LUID_VALID_KHR, cl_bool) +CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_LUID_KHR, luid_array) +CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_NODE_MASK_KHR, cl_uint) +#endif + +#if defined(cl_khr_pci_bus_info) +CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_PCI_BUS_INFO_KHR, cl_device_pci_bus_info_khr) +#endif + +#if defined(cl_khr_integer_dot_product) +CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_INTEGER_DOT_PRODUCT_CAPABILITIES_KHR, cl_device_integer_dot_product_capabilities_khr) +#if defined(CL_DEVICE_INTEGER_DOT_PRODUCT_ACCELERATION_PROPERTIES_8BIT_KHR) +CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_INTEGER_DOT_PRODUCT_ACCELERATION_PROPERTIES_8BIT_KHR, cl_device_integer_dot_product_acceleration_properties_khr) +CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_INTEGER_DOT_PRODUCT_ACCELERATION_PROPERTIES_4x8BIT_PACKED_KHR, cl_device_integer_dot_product_acceleration_properties_khr) +#endif // defined(CL_DEVICE_INTEGER_DOT_PRODUCT_ACCELERATION_PROPERTIES_8BIT_KHR) +#endif // defined(cl_khr_integer_dot_product) + #ifdef CL_PLATFORM_ICD_SUFFIX_KHR CL_HPP_DECLARE_PARAM_TRAITS_(cl_platform_info, CL_PLATFORM_ICD_SUFFIX_KHR, string) #endif @@ -1504,7 +1551,6 @@ CL_HPP_DECLARE_PARAM_TRAITS_(cl_platform_info, CL_PLATFORM_ICD_SUFFIX_KHR, strin #ifdef CL_DEVICE_PROFILING_TIMER_OFFSET_AMD CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_PROFILING_TIMER_OFFSET_AMD, cl_ulong) #endif - #ifdef CL_DEVICE_GLOBAL_FREE_MEMORY_AMD CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_GLOBAL_FREE_MEMORY_AMD, vector) #endif @@ -1535,6 +1581,9 @@ CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_LOCAL_MEM_SIZE_PER_COMPUT #ifdef CL_DEVICE_LOCAL_MEM_BANKS_AMD CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_LOCAL_MEM_BANKS_AMD, cl_uint) #endif +#ifdef CL_DEVICE_BOARD_NAME_AMD +CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_BOARD_NAME_AMD, string) +#endif #ifdef CL_DEVICE_COMPUTE_UNITS_BITFIELD_ARM CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_COMPUTE_UNITS_BITFIELD_ARM, cl_ulong) @@ -1545,6 +1594,9 @@ CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_JOB_SLOTS_ARM, cl_uint) #ifdef CL_DEVICE_SCHEDULING_CONTROLS_CAPABILITIES_ARM CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_SCHEDULING_CONTROLS_CAPABILITIES_ARM, cl_bitfield) #endif +#ifdef CL_DEVICE_SUPPORTED_REGISTER_ALLOCATIONS_ARM +CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_SUPPORTED_REGISTER_ALLOCATIONS_ARM, vector) +#endif #ifdef CL_KERNEL_EXEC_INFO_WORKGROUP_BATCH_SIZE_ARM CL_HPP_DECLARE_PARAM_TRAITS_(cl_kernel_exec_info, CL_KERNEL_EXEC_INFO_WORKGROUP_BATCH_SIZE_ARM, cl_uint) #endif @@ -1914,6 +1966,7 @@ class Wrapper retVal = true; #endif // CL_HPP_MINIMUM_OPENCL_VERSION < 120 #endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 + (void)device; return retVal; } @@ -2098,6 +2151,9 @@ struct ImageFormat : public cl_image_format image_channel_data_type = type; } + //! \brief Copy constructor. + ImageFormat(const ImageFormat &other) { *this = other; } + //! \brief Assignment operator. ImageFormat& operator = (const ImageFormat& rhs) { @@ -2351,7 +2407,7 @@ class Device : public detail::Wrapper const cl_device_partition_property_ext * /* properties */, cl_uint /*num_entries*/, cl_device_id * /*out_devices*/, - cl_uint * /*num_devices*/ ) CL_EXT_SUFFIX__VERSION_1_1; + cl_uint * /*num_devices*/ ) CL_API_SUFFIX__VERSION_1_1; static PFN_clCreateSubDevicesEXT pfn_clCreateSubDevicesEXT = NULL; CL_HPP_INIT_CL_EXT_FCN_PTR_(clCreateSubDevicesEXT); @@ -2761,8 +2817,8 @@ CL_HPP_DEFINE_STATIC_MEMBER_ cl_int Platform::default_error_ = CL_SUCCESS; * Unload the OpenCL compiler. * \note Deprecated for OpenCL 1.2. Use Platform::unloadCompiler instead. */ -inline CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int -UnloadCompiler() CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; +inline CL_API_PREFIX__VERSION_1_1_DEPRECATED cl_int +UnloadCompiler() CL_API_SUFFIX__VERSION_1_1_DEPRECATED; inline cl_int UnloadCompiler() { @@ -2852,7 +2908,7 @@ class Context */ Context( const vector& devices, - cl_context_properties* properties = NULL, + const cl_context_properties* properties = NULL, void (CL_CALLBACK * notifyFptr)( const char *, const void *, @@ -2887,7 +2943,7 @@ class Context */ Context( const Device& device, - cl_context_properties* properties = NULL, + const cl_context_properties* properties = NULL, void (CL_CALLBACK * notifyFptr)( const char *, const void *, @@ -2917,7 +2973,7 @@ class Context */ Context( cl_device_type type, - cl_context_properties* properties = NULL, + const cl_context_properties* properties = NULL, void (CL_CALLBACK * notifyFptr)( const char *, const void *, @@ -3950,7 +4006,7 @@ class Buffer : public Memory Context context = Context::getDefault(err); if( useHostPtr ) { - object_ = ::clCreateBuffer(context(), flags, size, static_cast(&*startIterator), &error); + object_ = ::clCreateBuffer(context(), flags, size, const_cast(&*startIterator), &error); } else { object_ = ::clCreateBuffer(context(), flags, size, 0, &error); } @@ -4479,12 +4535,11 @@ class Image1D : public Image cl_int* err = NULL) { cl_int error; - cl_image_desc desc = - { - CL_MEM_OBJECT_IMAGE1D, - width, - 0, 0, 0, 0, 0, 0, 0, 0 - }; + + cl_image_desc desc{}; + desc.image_type = CL_MEM_OBJECT_IMAGE1D; + desc.image_width = width; + object_ = ::clCreateImage( context(), flags, @@ -4567,13 +4622,12 @@ class Image1DBuffer : public Image cl_int* err = NULL) { cl_int error; - cl_image_desc desc = - { - CL_MEM_OBJECT_IMAGE1D_BUFFER, - width, - 0, 0, 0, 0, 0, 0, 0, - buffer() - }; + + cl_image_desc desc{}; + desc.image_type = CL_MEM_OBJECT_IMAGE1D_BUFFER; + desc.image_width = width; + desc.buffer = buffer(); + object_ = ::clCreateImage( context(), flags, @@ -4653,15 +4707,13 @@ class Image1DArray : public Image cl_int* err = NULL) { cl_int error; - cl_image_desc desc = - { - CL_MEM_OBJECT_IMAGE1D_ARRAY, - width, - 0, 0, // height, depth (unused) - arraySize, - rowPitch, - 0, 0, 0, 0 - }; + + cl_image_desc desc{}; + desc.image_type = CL_MEM_OBJECT_IMAGE1D_ARRAY; + desc.image_width = width; + desc.image_array_size = arraySize; + desc.image_row_pitch = rowPitch; + object_ = ::clCreateImage( context(), flags, @@ -4768,15 +4820,12 @@ class Image2D : public Image #if CL_HPP_TARGET_OPENCL_VERSION >= 120 if (useCreateImage) { - cl_image_desc desc = - { - CL_MEM_OBJECT_IMAGE2D, - width, - height, - 0, 0, // depth, array size (unused) - row_pitch, - 0, 0, 0, 0 - }; + cl_image_desc desc{}; + desc.image_type = CL_MEM_OBJECT_IMAGE2D; + desc.image_width = width; + desc.image_height = height; + desc.image_row_pitch = row_pitch; + object_ = ::clCreateImage( context(), flags, @@ -4822,17 +4871,13 @@ class Image2D : public Image { cl_int error; - cl_image_desc desc = - { - CL_MEM_OBJECT_IMAGE2D, - width, - height, - 0, 0, // depth, array size (unused) - row_pitch, - 0, 0, 0, - // Use buffer as input to image - sourceBuffer() - }; + cl_image_desc desc{}; + desc.image_type = CL_MEM_OBJECT_IMAGE2D; + desc.image_width = width; + desc.image_height = height; + desc.image_row_pitch = row_pitch; + desc.buffer = sourceBuffer(); + object_ = ::clCreateImage( context(), 0, // flags inherited from buffer @@ -4886,19 +4931,16 @@ class Image2D : public Image // Update only the channel order. // Channel format inherited from source. sourceFormat.image_channel_order = order; - cl_image_desc desc = - { - CL_MEM_OBJECT_IMAGE2D, - sourceWidth, - sourceHeight, - 0, 0, // depth (unused), array size (unused) - sourceRowPitch, - 0, // slice pitch (unused) - sourceNumMIPLevels, - sourceNumSamples, - // Use buffer as input to image - sourceImage() - }; + + cl_image_desc desc{}; + desc.image_type = CL_MEM_OBJECT_IMAGE2D; + desc.image_width = sourceWidth; + desc.image_height = sourceHeight; + desc.image_row_pitch = sourceRowPitch; + desc.num_mip_levels = sourceNumMIPLevels; + desc.num_samples = sourceNumSamples; + desc.buffer = sourceImage(); + object_ = ::clCreateImage( context(), 0, // flags should be inherited from mem_object @@ -4978,7 +5020,7 @@ class Image2D : public Image * \see Memory * \note Deprecated for OpenCL 1.2. Please use ImageGL instead. */ -class CL_EXT_PREFIX__VERSION_1_1_DEPRECATED Image2DGL : public Image2D +class CL_API_PREFIX__VERSION_1_1_DEPRECATED Image2DGL : public Image2D { public: /*! \brief Constructs an Image2DGL in a specified context, from a given @@ -5061,7 +5103,7 @@ class CL_EXT_PREFIX__VERSION_1_1_DEPRECATED Image2DGL : public Image2D return *this; } -} CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; +} CL_API_SUFFIX__VERSION_1_1_DEPRECATED; #endif // CL_USE_DEPRECATED_OPENCL_1_1_APIS #if CL_HPP_TARGET_OPENCL_VERSION >= 120 @@ -5084,17 +5126,15 @@ class Image2DArray : public Image cl_int* err = NULL) { cl_int error; - cl_image_desc desc = - { - CL_MEM_OBJECT_IMAGE2D_ARRAY, - width, - height, - 0, // depth (unused) - arraySize, - rowPitch, - slicePitch, - 0, 0, 0 - }; + + cl_image_desc desc{}; + desc.image_type = CL_MEM_OBJECT_IMAGE2D_ARRAY; + desc.image_width = width; + desc.image_height = height; + desc.image_array_size = arraySize; + desc.image_row_pitch = rowPitch; + desc.image_slice_pitch = slicePitch; + object_ = ::clCreateImage( context(), flags, @@ -5199,17 +5239,14 @@ class Image3D : public Image #if CL_HPP_TARGET_OPENCL_VERSION >= 120 if (useCreateImage) { - cl_image_desc desc = - { - CL_MEM_OBJECT_IMAGE3D, - width, - height, - depth, - 0, // array size (unused) - row_pitch, - slice_pitch, - 0, 0, 0 - }; + cl_image_desc desc{}; + desc.image_type = CL_MEM_OBJECT_IMAGE3D; + desc.image_width = width; + desc.image_height = height; + desc.image_depth = depth; + desc.image_row_pitch = row_pitch; + desc.image_slice_pitch = slice_pitch; + object_ = ::clCreateImage( context(), flags, @@ -6413,8 +6450,7 @@ class Program : public detail::Wrapper static PFN_clCreateProgramWithILKHR pfn_clCreateProgramWithILKHR = NULL; CL_HPP_INIT_CL_EXT_FCN_PTR_(clCreateProgramWithILKHR); - return detail::errHandler( - pfn_clCreateProgramWithILKHR( + object_ = pfn_clCreateProgramWithILKHR( context(), static_cast(IL.data()), IL.size(), &error); #endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 210 @@ -6467,8 +6503,7 @@ class Program : public detail::Wrapper static PFN_clCreateProgramWithILKHR pfn_clCreateProgramWithILKHR = NULL; CL_HPP_INIT_CL_EXT_FCN_PTR_(clCreateProgramWithILKHR); - return detail::errHandler( - pfn_clCreateProgramWithILKHR( + object_ = pfn_clCreateProgramWithILKHR( context(), static_cast(IL.data()), IL.size(), &error); #endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 210 @@ -6696,7 +6731,7 @@ class Program : public detail::Wrapper notifyFptr, data); - BuildLogType buildLog(1); + BuildLogType buildLog(0); buildLog.push_back(std::make_pair(device, getBuildInfo(device))); return detail::buildErrHandler(buildError, __BUILD_PROGRAM_ERR, buildLog); } @@ -6867,9 +6902,9 @@ class Program : public detail::Wrapper * on a callback stack associated with program. The registered user callback * functions are called in the reverse order in which they were registered. */ - CL_EXT_PREFIX__VERSION_2_2_DEPRECATED cl_int setReleaseCallback( + CL_API_PREFIX__VERSION_2_2_DEPRECATED cl_int setReleaseCallback( void (CL_CALLBACK * pfn_notify)(cl_program program, void * user_data), - void * user_data = NULL) CL_EXT_SUFFIX__VERSION_2_2_DEPRECATED + void * user_data = NULL) CL_API_SUFFIX__VERSION_2_2_DEPRECATED { return detail::errHandler( ::clSetProgramReleaseCallback( @@ -7074,6 +7109,11 @@ inline QueueProperties operator|(QueueProperties lhs, QueueProperties rhs) return static_cast(static_cast(lhs) | static_cast(rhs)); } +inline QueueProperties operator&(QueueProperties lhs, QueueProperties rhs) +{ + return static_cast(static_cast(lhs) & static_cast(rhs)); +} + /*! \class CommandQueue * \brief CommandQueue interface for cl_command_queue. */ @@ -8574,10 +8614,10 @@ class CommandQueue : public detail::Wrapper } #if defined(CL_USE_DEPRECATED_OPENCL_1_2_APIS) - CL_EXT_PREFIX__VERSION_1_2_DEPRECATED cl_int enqueueTask( + CL_API_PREFIX__VERSION_1_2_DEPRECATED cl_int enqueueTask( const Kernel& kernel, const vector* events = NULL, - Event* event = NULL) const CL_EXT_SUFFIX__VERSION_1_2_DEPRECATED + Event* event = NULL) const CL_API_SUFFIX__VERSION_1_2_DEPRECATED { cl_event tmp; cl_int err = detail::errHandler( @@ -8634,8 +8674,8 @@ class CommandQueue : public detail::Wrapper * Deprecated APIs for 1.2 */ #if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) - CL_EXT_PREFIX__VERSION_1_1_DEPRECATED - cl_int enqueueMarker(Event* event = NULL) const CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED + CL_API_PREFIX__VERSION_1_1_DEPRECATED + cl_int enqueueMarker(Event* event = NULL) const CL_API_SUFFIX__VERSION_1_1_DEPRECATED { cl_event tmp; cl_int err = detail::errHandler( @@ -8650,8 +8690,8 @@ class CommandQueue : public detail::Wrapper return err; } - CL_EXT_PREFIX__VERSION_1_1_DEPRECATED - cl_int enqueueWaitForEvents(const vector& events) const CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED + CL_API_PREFIX__VERSION_1_1_DEPRECATED + cl_int enqueueWaitForEvents(const vector& events) const CL_API_SUFFIX__VERSION_1_1_DEPRECATED { return detail::errHandler( ::clEnqueueWaitForEvents( @@ -8787,8 +8827,8 @@ typedef CL_API_ENTRY cl_int (CL_API_CALL *PFN_clEnqueueReleaseD3D10ObjectsKHR)( * Deprecated APIs for 1.2 */ #if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) - CL_EXT_PREFIX__VERSION_1_1_DEPRECATED - cl_int enqueueBarrier() const CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED + CL_API_PREFIX__VERSION_1_1_DEPRECATED + cl_int enqueueBarrier() const CL_API_SUFFIX__VERSION_1_1_DEPRECATED { return detail::errHandler( ::clEnqueueBarrier(object_), @@ -9134,7 +9174,7 @@ Buffer::Buffer( size_type size = sizeof(DataType)*(endIterator - startIterator); if( useHostPtr ) { - object_ = ::clCreateBuffer(context(), flags, size, static_cast(&*startIterator), &error); + object_ = ::clCreateBuffer(context(), flags, size, const_cast(&*startIterator), &error); } else { object_ = ::clCreateBuffer(context(), flags, size, 0, &error); } @@ -9187,7 +9227,7 @@ Buffer::Buffer( Context context = queue.getInfo(); if (useHostPtr) { - object_ = ::clCreateBuffer(context(), flags, size, static_cast(&*startIterator), &error); + object_ = ::clCreateBuffer(context(), flags, size, const_cast(&*startIterator), &error); } else { object_ = ::clCreateBuffer(context(), flags, size, 0, &error); @@ -9309,7 +9349,7 @@ inline cl_int enqueueMapSVM( */ template inline cl_int enqueueMapSVM( - cl::pointer ptr, + cl::pointer &ptr, cl_bool blocking, cl_map_flags flags, size_type size, @@ -9333,7 +9373,7 @@ inline cl_int enqueueMapSVM( */ template inline cl_int enqueueMapSVM( - cl::vector container, + cl::vector &container, cl_bool blocking, cl_map_flags flags, const vector* events = NULL, From 905ff026bbf0985d5dc8969838b56e50b16f64e9 Mon Sep 17 00:00:00 2001 From: Michal Babej Date: Wed, 5 Apr 2023 14:45:01 +0300 Subject: [PATCH 2/8] opencl.hpp: make clGetDeviceInfo recognize Intel Subgroup Size --- include/CL/opencl.hpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/CL/opencl.hpp b/include/CL/opencl.hpp index 3a953c8c8..686aed66b 100644 --- a/include/CL/opencl.hpp +++ b/include/CL/opencl.hpp @@ -1398,7 +1398,8 @@ inline cl_int getInfoHelper(Func f, cl_uint name, T* param, int, typename T::cl_ F(cl_kernel_sub_group_info, CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE, size_type) \ F(cl_kernel_sub_group_info, CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT, cl::detail::size_t_array) \ F(cl_kernel_sub_group_info, CL_KERNEL_MAX_NUM_SUB_GROUPS, size_type) \ - F(cl_kernel_sub_group_info, CL_KERNEL_COMPILE_NUM_SUB_GROUPS, size_type) + F(cl_kernel_sub_group_info, CL_KERNEL_COMPILE_NUM_SUB_GROUPS, size_type) \ + F(cl_device_info, CL_DEVICE_SUB_GROUP_SIZES_INTEL, cl::vector) #define CL_HPP_PARAM_NAME_INFO_2_2_(F) \ F(cl_program_info, CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT, cl_bool) \ From cdc8282033b32fee7cb5c4ee0df19a04dbef0edb Mon Sep 17 00:00:00 2001 From: Michal Babej Date: Tue, 9 May 2023 13:46:30 +0300 Subject: [PATCH 3/8] add include/CL/cl_ext_pocl.h (required for command buffer extensions) CL/opencl.h: include cl_ext_pocl.h but exclude cl_gl_ext.h (now just redirects to cl_ext.h) --- include/CL/cl_ext_pocl.h | 520 +++++++++++++++++++++++++++++++++++++++ include/CL/opencl.h | 2 +- 2 files changed, 521 insertions(+), 1 deletion(-) create mode 100644 include/CL/cl_ext_pocl.h diff --git a/include/CL/cl_ext_pocl.h b/include/CL/cl_ext_pocl.h new file mode 100644 index 000000000..68b2180d9 --- /dev/null +++ b/include/CL/cl_ext_pocl.h @@ -0,0 +1,520 @@ +/******************************************************************************* + * Copyright (c) 2021 Tampere University + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and/or associated documentation files (the + * "Materials"), to deal in the Materials without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Materials, and to + * permit persons to whom the Materials are furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Materials. + * + * MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS + * KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS + * SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT + * https://www.khronos.org/registry/ + * + * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. + ******************************************************************************/ + +#ifndef __CL_EXT_POCL_H +#define __CL_EXT_POCL_H + +#include +#include + +#ifdef __cplusplus +extern "C" +{ +#endif + +/*********************************** +* cl_pocl_content_size extension * +************************************/ + +#define cl_pocl_content_size 1 + +extern CL_API_ENTRY cl_int CL_API_CALL +clSetContentSizeBufferPoCL( + cl_mem buffer, + cl_mem content_size_buffer) CL_API_SUFFIX__VERSION_1_2; + +typedef CL_API_ENTRY cl_int +(CL_API_CALL *clSetContentSizeBufferPoCL_fn)( + cl_mem buffer, + cl_mem content_size_buffer) CL_API_SUFFIX__VERSION_1_2; + + +/*********************************** +* cl_pocl_svm_rect + +* cl_pocl_command_buffer_svm + +* cl_pocl_command_buffer_host_exec + +* cl_pocl_command_buffer_host_buffer +* extensions +************************************/ + +// SVM copy/fill functions +#define cl_pocl_command_buffer_svm 1 + +// cl_mem & host related functions (clCommandReadBuffer etc) +#define cl_pocl_command_buffer_host_buffer 1 + +// clCommandHostFuncPOCL, clCommandWaitForEventPOCL, clCommandSignalEventPOCL +#define cl_pocl_command_buffer_host_exec 1 + +// clEnqueueSVMMemFillRectPOCL, clEnqueueSVMMemcpyRectPOCL +#define cl_pocl_svm_rect 1 + +/****************************************************/ + +/* cl_device_command_buffer_capabilities_khr - bitfield */ +#define CL_COMMAND_BUFFER_CAPABILITY_PROFILING_POCL (1 << 8) + +/* cl_command_buffer_flags_khr */ +#define CL_COMMAND_BUFFER_PROFILING_POCL (1 << 8) + +/* cl_command_buffer_info_khr */ +#define CL_COMMAND_BUFFER_INFO_PROFILING_POCL 0x1299 + +/* cl_command_type */ +/* To be used by clGetEventInfo: */ +/* TODO use values from an assigned range */ +#define CL_COMMAND_SVM_MEMCPY_RECT_POCL 0x1210 +#define CL_COMMAND_SVM_MEMFILL_RECT_POCL 0x1211 + + +typedef cl_int (CL_API_CALL * +clCommandSVMMemcpyPOCL_fn)( + cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + void *dst_ptr, + const void *src_ptr, + size_t size, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr* sync_point_wait_list, + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle); + +typedef cl_int (CL_API_CALL * +clCommandSVMMemcpyRectPOCL_fn)( + cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + void *dst_ptr, + const void *src_ptr, + const size_t *dst_origin, + const size_t *src_origin, + const size_t *region, + size_t dst_row_pitch, + size_t dst_slice_pitch, + size_t src_row_pitch, + size_t src_slice_pitch, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr* sync_point_wait_list, + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle); + +typedef cl_int (CL_API_CALL * +clCommandSVMMemfillPOCL_fn)( + cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + void *svm_ptr, + size_t size, + const void *pattern, + size_t pattern_size, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr* sync_point_wait_list, + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle); + + +typedef cl_int (CL_API_CALL * +clCommandSVMMemfillRectPOCL_fn)( + cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + void *svm_ptr, + const size_t *origin, + const size_t *region, + size_t row_pitch, + size_t slice_pitch, + const void *pattern, + size_t pattern_size, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr* sync_point_wait_list, + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle); + + + +typedef void (*CmdBufferCallbackFn_t)(void* userData); + +typedef cl_int (CL_API_CALL * +clCommandHostFuncPOCL_fn)( + cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + CmdBufferCallbackFn_t callback_fn, + void* user_data, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr* sync_point_wait_list, + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle); + +typedef cl_int (CL_API_CALL * +clCommandWaitForEventPOCL_fn)( + cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + cl_event Event, + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle); + +typedef cl_int (CL_API_CALL * +clCommandSignalEventPOCL_fn)( + cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + cl_event *Event, // output + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle); + + + +typedef cl_int (CL_API_CALL * +clCommandReadBufferPOCL_fn)(cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + cl_mem buffer, + size_t offset, + size_t size, + void *ptr, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr* sync_point_wait_list, + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle); + +typedef cl_int (CL_API_CALL * +clCommandReadBufferRectPOCL_fn)(cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + cl_mem buffer, + const size_t *buffer_origin, + const size_t *host_origin, + const size_t *region, + size_t buffer_row_pitch, + size_t buffer_slice_pitch, + size_t host_row_pitch, + size_t host_slice_pitch, + void *ptr, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr* sync_point_wait_list, + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle); + +typedef cl_int (CL_API_CALL * +clCommandReadImagePOCL_fn)(cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + cl_mem image, + const size_t * origin, /* [3] */ + const size_t * region, /* [3] */ + size_t row_pitch, + size_t slice_pitch, + void * ptr, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr* sync_point_wait_list, + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle); + +typedef cl_int (CL_API_CALL * +clCommandWriteBufferPOCL_fn)(cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + cl_mem buffer, + size_t offset, + size_t size, + const void *ptr, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr* sync_point_wait_list, + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle); + +typedef cl_int (CL_API_CALL * +clCommandWriteBufferRectPOCL_fn)(cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + cl_mem buffer, + const size_t *buffer_origin, + const size_t *host_origin, + const size_t *region, + size_t buffer_row_pitch, + size_t buffer_slice_pitch, + size_t host_row_pitch, + size_t host_slice_pitch, + const void *ptr, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr* sync_point_wait_list, + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle); + +typedef cl_int (CL_API_CALL * +clCommandWriteImagePOCL_fn)(cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + cl_mem image, + const size_t * origin, /*[3]*/ + const size_t * region, /*[3]*/ + size_t row_pitch, + size_t slice_pitch, + const void * ptr, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr* sync_point_wait_list, + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle); + +typedef cl_int (CL_API_CALL * +clEnqueueSVMMemcpyRectPOCL_fn) (cl_command_queue command_queue, + cl_bool blocking, + void *dst_ptr, + const void *src_ptr, + const size_t *dst_origin, + const size_t *src_origin, + const size_t *region, + size_t dst_row_pitch, + size_t dst_slice_pitch, + size_t src_row_pitch, + size_t src_slice_pitch, + cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, + cl_event *event); + +typedef cl_int (CL_API_CALL * +clEnqueueSVMMemFillRectPOCL_fn) (cl_command_queue command_queue, + void * svm_ptr, + const size_t * origin, + const size_t * region, + size_t row_pitch, + size_t slice_pitch, + const void * pattern, + size_t pattern_size, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event); + + +#ifndef CL_NO_PROTOTYPES + +extern CL_API_ENTRY cl_int CL_API_CALL +clCommandSVMMemcpyPOCL( + cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + void *dst_ptr, + const void *src_ptr, + size_t size, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr* sync_point_wait_list, + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle); + +extern CL_API_ENTRY cl_int CL_API_CALL +clCommandSVMMemcpyRectPOCL( + cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + void *dst_ptr, + const void *src_ptr, + const size_t *dst_origin, + const size_t *src_origin, + const size_t *region, + size_t dst_row_pitch, + size_t dst_slice_pitch, + size_t src_row_pitch, + size_t src_slice_pitch, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr* sync_point_wait_list, + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle); + +extern CL_API_ENTRY cl_int CL_API_CALL +clCommandSVMMemfillPOCL( + cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + void *svm_ptr, + size_t size, + const void *pattern, + size_t pattern_size, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr* sync_point_wait_list, + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle); + +extern CL_API_ENTRY cl_int CL_API_CALL +clCommandSVMMemfillRectPOCL( + cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + void *svm_ptr, + const size_t *origin, + const size_t *region, + size_t row_pitch, + size_t slice_pitch, + const void *pattern, + size_t pattern_size, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr* sync_point_wait_list, + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle); + + + + +extern CL_API_ENTRY cl_int CL_API_CALL +clCommandHostFuncPOCL( + cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + CmdBufferCallbackFn_t callback_fn, + void* user_data, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr* sync_point_wait_list, + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle); + +extern CL_API_ENTRY cl_int CL_API_CALL +clCommandWaitForEventPOCL( + cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + cl_event Event, + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle); + +extern CL_API_ENTRY cl_int CL_API_CALL +clCommandSignalEventPOCL( + cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + cl_event *Event, // output + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle); + + +extern CL_API_ENTRY cl_int CL_API_CALL +clCommandReadBufferPOCL(cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + cl_mem buffer, + size_t offset, + size_t size, + void *ptr, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr* sync_point_wait_list, + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle); + +extern CL_API_ENTRY cl_int CL_API_CALL +clCommandReadBufferRectPOCL(cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + cl_mem buffer, + const size_t *buffer_origin, + const size_t *host_origin, + const size_t *region, + size_t buffer_row_pitch, + size_t buffer_slice_pitch, + size_t host_row_pitch, + size_t host_slice_pitch, + void *ptr, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr* sync_point_wait_list, + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle); + +extern CL_API_ENTRY cl_int CL_API_CALL +clCommandReadImagePOCL(cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + cl_mem image, + const size_t * origin, /* [3] */ + const size_t * region, /* [3] */ + size_t row_pitch, + size_t slice_pitch, + void * ptr, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr* sync_point_wait_list, + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle); + +extern CL_API_ENTRY cl_int CL_API_CALL +clCommandWriteBufferPOCL(cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + cl_mem buffer, + size_t offset, + size_t size, + const void *ptr, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr* sync_point_wait_list, + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle); + +extern CL_API_ENTRY cl_int CL_API_CALL +clCommandWriteBufferRectPOCL(cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + cl_mem buffer, + const size_t *buffer_origin, + const size_t *host_origin, + const size_t *region, + size_t buffer_row_pitch, + size_t buffer_slice_pitch, + size_t host_row_pitch, + size_t host_slice_pitch, + const void *ptr, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr* sync_point_wait_list, + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle); + +extern CL_API_ENTRY cl_int CL_API_CALL +clCommandWriteImagePOCL(cl_command_buffer_khr command_buffer, + cl_command_queue command_queue, + cl_mem image, + const size_t * origin, /*[3]*/ + const size_t * region, /*[3]*/ + size_t row_pitch, + size_t slice_pitch, + const void * ptr, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr* sync_point_wait_list, + cl_sync_point_khr* sync_point, + cl_mutable_command_khr* mutable_handle); + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueSVMMemcpyRectPOCL (cl_command_queue command_queue, + cl_bool blocking, + void *dst_ptr, + const void *src_ptr, + const size_t *dst_origin, + const size_t *src_origin, + const size_t *region, + size_t dst_row_pitch, + size_t dst_slice_pitch, + size_t src_row_pitch, + size_t src_slice_pitch, + cl_uint num_events_in_wait_list, + const cl_event *event_wait_list, + cl_event *event); + +extern CL_API_ENTRY cl_int CL_API_CALL +clEnqueueSVMMemFillRectPOCL (cl_command_queue command_queue, + void * svm_ptr, + const size_t * origin, + const size_t * region, + size_t row_pitch, + size_t slice_pitch, + const void * pattern, + size_t pattern_size, + size_t size, + cl_uint num_events_in_wait_list, + const cl_event * event_wait_list, + cl_event * event); + + +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* __CL_EXT_POCL_H */ diff --git a/include/CL/opencl.h b/include/CL/opencl.h index 1c4e10c88..c636e0d4e 100644 --- a/include/CL/opencl.h +++ b/include/CL/opencl.h @@ -23,8 +23,8 @@ extern "C" { #include #include -#include #include +#include #ifdef __cplusplus } From 2104c36905be051eaa109eb0b4d0c546f58ad9e1 Mon Sep 17 00:00:00 2001 From: Michal Babej Date: Tue, 9 May 2023 14:27:40 +0300 Subject: [PATCH 4/8] src/CHIPBindings.cc: add missing error-checking for Graph API --- src/CHIPBindings.cc | 94 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 92 insertions(+), 2 deletions(-) diff --git a/src/CHIPBindings.cc b/src/CHIPBindings.cc index 9c50207f0..221e3b96b 100644 --- a/src/CHIPBindings.cc +++ b/src/CHIPBindings.cc @@ -280,6 +280,8 @@ static void handleAbortRequest(chipstar::Queue &Q, chipstar::Module &M) { hipError_t hipGraphCreate(hipGraph_t *pGraph, unsigned int flags) { CHIP_TRY + if (!pGraph) + RETURN(hipErrorInvalidHandle); CHIPInitialize(); CHIPGraph *Graph = new CHIPGraph(); *pGraph = Graph; @@ -289,6 +291,8 @@ hipError_t hipGraphCreate(hipGraph_t *pGraph, unsigned int flags) { hipError_t hipGraphDestroy(hipGraph_t graph) { CHIP_TRY + if (!graph) + RETURN(hipErrorInvalidHandle); CHIPInitialize(); delete graph; RETURN(hipSuccess); @@ -299,6 +303,10 @@ hipError_t hipGraphAddDependencies(hipGraph_t graph, const hipGraphNode_t *from, const hipGraphNode_t *to, size_t numDependencies) { CHIP_TRY + if (!graph) + RETURN(hipErrorInvalidHandle); + if (!from || !to) + RETURN(hipErrorInvalidHandle); CHIPInitialize(); CHIPGraphNode *FoundNode = GRAPH(graph)->findNode(NODE(*to)); if (!FoundNode) @@ -314,6 +322,10 @@ hipError_t hipGraphRemoveDependencies(hipGraph_t graph, const hipGraphNode_t *to, size_t numDependencies) { CHIP_TRY + if (!graph) + RETURN(hipErrorInvalidHandle); + if (!from || !to) + RETURN(hipErrorInvalidHandle); CHIPInitialize(); CHIPGraphNode *FoundNode = GRAPH(graph)->findNode(NODE(*to)); if (!FoundNode) @@ -327,7 +339,11 @@ hipError_t hipGraphRemoveDependencies(hipGraph_t graph, hipError_t hipGraphGetEdges(hipGraph_t graph, hipGraphNode_t *from, hipGraphNode_t *to, size_t *numEdges) { CHIP_TRY + if (!graph) + RETURN(hipErrorInvalidHandle); CHIPInitialize(); + if (!from || !to || !numEdges) + RETURN(hipErrorInvalidHandle); auto Edges = GRAPH(graph)->getEdges(); if (!to && !from) { *numEdges = Edges.size(); @@ -348,6 +364,10 @@ hipError_t hipGraphGetEdges(hipGraph_t graph, hipGraphNode_t *from, hipError_t hipGraphGetNodes(hipGraph_t graph, hipGraphNode_t *nodes, size_t *numNodes) { CHIP_TRY + if (!graph) + RETURN(hipErrorInvalidHandle); + if (!nodes || !numNodes) + RETURN(hipErrorInvalidHandle); CHIPInitialize(); auto Nodes = GRAPH(graph)->getNodes(); *nodes = *(Nodes.data()); @@ -359,6 +379,10 @@ hipError_t hipGraphGetNodes(hipGraph_t graph, hipGraphNode_t *nodes, hipError_t hipGraphGetRootNodes(hipGraph_t graph, hipGraphNode_t *pRootNodes, size_t *pNumRootNodes) { CHIP_TRY + if (!graph) + RETURN(hipErrorInvalidHandle); + if (!pRootNodes || !pNumRootNodes) + RETURN(hipErrorInvalidHandle); CHIPInitialize(); auto Nodes = GRAPH(graph)->getRootNodes(); *pRootNodes = *(Nodes.data()); @@ -371,6 +395,10 @@ hipError_t hipGraphNodeGetDependencies(hipGraphNode_t node, hipGraphNode_t *pDependencies, size_t *pNumDependencies) { CHIP_TRY + if (!node) + RETURN(hipErrorInvalidHandle); + if (!pDependencies || !pNumDependencies) + RETURN(hipErrorInvalidHandle); CHIPInitialize(); auto Deps = NODE(node)->getDependencies(); *pNumDependencies = Deps.size(); @@ -387,6 +415,8 @@ hipError_t hipGraphNodeGetDependentNodes(hipGraphNode_t node, hipGraphNode_t *pDependentNodes, size_t *pNumDependentNodes) { CHIP_TRY + if (!node || !pDependentNodes || !pNumDependentNodes) + RETURN(hipErrorInvalidHandle); CHIPInitialize(); auto Deps = NODE(node)->getDependants(); *pNumDependentNodes = Deps.size(); @@ -401,6 +431,8 @@ hipError_t hipGraphNodeGetDependentNodes(hipGraphNode_t node, hipError_t hipGraphNodeGetType(hipGraphNode_t node, hipGraphNodeType *pType) { CHIP_TRY + if (!pType || !node) + RETURN(hipErrorInvalidHandle); CHIPInitialize(); *pType = NODE(node)->getType(); RETURN(hipSuccess); @@ -410,6 +442,8 @@ hipError_t hipGraphNodeGetType(hipGraphNode_t node, hipGraphNodeType *pType) { hipError_t hipGraphDestroyNode(hipGraphNode_t node) { CHIP_TRY CHIPInitialize(); + if (!node) + RETURN(hipErrorInvalidHandle); /** * have to resort to these shenanigans to call the proper derived destructor */ @@ -455,6 +489,8 @@ hipError_t hipGraphDestroyNode(hipGraphNode_t node) { hipError_t hipGraphClone(hipGraph_t *pGraphClone, hipGraph_t originalGraph) { CHIP_TRY + if (!pGraphClone || !originalGraph) + RETURN(hipErrorInvalidHandle); CHIPInitialize(); CHIPGraph *CloneGraph = new CHIPGraph(*GRAPH(originalGraph)); *pGraphClone = CloneGraph; @@ -466,6 +502,8 @@ hipError_t hipGraphNodeFindInClone(hipGraphNode_t *pNode, hipGraphNode_t originalNode, hipGraph_t clonedGraph) { CHIP_TRY + if (!pNode || !originalNode || !clonedGraph) + RETURN(hipErrorInvalidHandle); CHIPInitialize(); auto Node = GRAPH(clonedGraph)->getClonedNodeFromOriginal(NODE(originalNode)); *pNode = Node; @@ -477,6 +515,8 @@ hipError_t hipGraphInstantiate(hipGraphExec_t *pGraphExec, hipGraph_t graph, hipGraphNode_t *pErrorNode, char *pLogBuffer, size_t bufferSize) { CHIP_TRY + if (!pGraphExec || !graph) + RETURN(hipErrorInvalidHandle); CHIPInitialize(); CHIPGraphExec *GraphExec = new CHIPGraphExec(GRAPH(graph)); *pGraphExec = GraphExec; @@ -489,6 +529,8 @@ hipError_t hipGraphInstantiateWithFlags(hipGraphExec_t *pGraphExec, hipGraph_t graph, unsigned long long flags) { CHIP_TRY + if (!pGraphExec || !graph) + RETURN(hipErrorInvalidHandle); CHIPInitialize(); // flags not yet defined in HIP API. UNIMPLEMENTED(hipErrorNotSupported); @@ -497,6 +539,8 @@ hipError_t hipGraphInstantiateWithFlags(hipGraphExec_t *pGraphExec, hipError_t hipGraphLaunch(hipGraphExec_t graphExec, hipStream_t stream) { CHIP_TRY + if (!graphExec || !stream) + RETURN(hipErrorInvalidHandle); CHIPInitialize(); auto ChipQueue = Backend->findQueue(static_cast(stream)); @@ -508,6 +552,8 @@ hipError_t hipGraphLaunch(hipGraphExec_t graphExec, hipStream_t stream) { hipError_t hipGraphExecDestroy(hipGraphExec_t graphExec) { CHIP_TRY + if (!graphExec) + RETURN(hipErrorInvalidHandle); CHIPInitialize(); delete graphExec; RETURN(hipSuccess); @@ -518,6 +564,8 @@ hipError_t hipGraphExecUpdate(hipGraphExec_t hGraphExec, hipGraph_t hGraph, hipGraphNode_t *hErrorNode_out, hipGraphExecUpdateResult *updateResult_out) { CHIP_TRY + if (!hGraphExec || !hGraph || !hErrorNode_out || !updateResult_out) + RETURN(hipErrorInvalidHandle); CHIPInitialize(); // TODO Graphs - hipGraphExecUpdate /** @@ -631,6 +679,8 @@ hipError_t hipGraphAddKernelNode(hipGraphNode_t *pGraphNode, hipGraph_t graph, size_t numDependencies, const hipKernelNodeParams *pNodeParams) { CHIP_TRY + if (!pGraphNode || !graph) + RETURN(hipErrorInvalidHandle); CHIPInitialize(); CHIPGraphNodeKernel *Node = new CHIPGraphNodeKernel{pNodeParams}; Node->addDependencies(DECONST_NODES(pDependencies), numDependencies); @@ -644,6 +694,8 @@ hipError_t hipGraphAddKernelNode(hipGraphNode_t *pGraphNode, hipGraph_t graph, hipError_t hipGraphKernelNodeGetParams(hipGraphNode_t node, hipKernelNodeParams *pNodeParams) { CHIP_TRY + if (!node) + RETURN(hipErrorInvalidHandle); CHIPInitialize(); *pNodeParams = ((CHIPGraphNodeKernel *)node)->getParams(); RETURN(hipSuccess); @@ -653,6 +705,8 @@ hipError_t hipGraphKernelNodeGetParams(hipGraphNode_t node, hipError_t hipGraphKernelNodeSetParams(hipGraphNode_t node, const hipKernelNodeParams *pNodeParams) { CHIP_TRY + if (!node) + RETURN(hipErrorInvalidHandle); CHIPInitialize(); ((CHIPGraphNodeKernel *)node)->setParams(*pNodeParams); RETURN(hipSuccess); @@ -663,6 +717,8 @@ hipError_t hipGraphExecKernelNodeSetParams(hipGraphExec_t hGraphExec, hipGraphNode_t node, const hipKernelNodeParams *pNodeParams) { CHIP_TRY + if (!hGraphExec) + RETURN(hipErrorInvalidHandle); CHIPInitialize(); // Graph obtained from hipGraphExec_t is a clone of the original CHIPGraph *Graph = EXEC(hGraphExec)->getOriginalGraphPtr(); @@ -687,7 +743,7 @@ hipError_t hipGraphAddMemcpyNode(hipGraphNode_t *pGraphNode, hipGraph_t graph, // graphs test seems wrong - normally we expect hipErrorInvalidHandle // NULLCHECK(graph, pGraphNode, pCopyParams); if (!graph || !pGraphNode || !pCopyParams) - RETURN(hipErrorInvalidValue); + RETURN(hipErrorInvalidHandle); if (pDependencies == nullptr & numDependencies > 0) CHIPERR_LOG_AND_THROW( "numDependencies is not 0 while pDependencies is null", @@ -884,6 +940,8 @@ hipError_t hipGraphMemcpyNodeSetParamsToSymbol(hipGraphNode_t node, size_t offset, hipMemcpyKind kind) { CHIP_TRY + if (!node) + RETURN(hipErrorInvalidHandle); CHIPInitialize(); static_cast(node)->setParams( const_cast(src), symbol, count, offset, kind); @@ -895,6 +953,8 @@ hipError_t hipGraphExecMemcpyNodeSetParamsToSymbol( hipGraphExec_t hGraphExec, hipGraphNode_t node, const void *symbol, const void *src, size_t count, size_t offset, hipMemcpyKind kind) { CHIP_TRY + if (!node || !hGraphExec) + RETURN(hipErrorInvalidHandle); CHIPInitialize(); auto ExecNode = EXEC(hGraphExec)->getOriginalGraphPtr()->nodeLookup(NODE(node)); @@ -918,6 +978,8 @@ hipError_t hipGraphAddMemsetNode(hipGraphNode_t *pGraphNode, hipGraph_t graph, size_t numDependencies, const hipMemsetParams *pMemsetParams) { CHIP_TRY + if (!graph || !pGraphNode) + RETURN(hipErrorInvalidHandle); CHIPInitialize(); CHIPGraphNodeMemset *Node = new CHIPGraphNodeMemset(pMemsetParams); Node->addDependencies(DECONST_NODES(pDependencies), numDependencies); @@ -931,6 +993,8 @@ hipError_t hipGraphAddMemsetNode(hipGraphNode_t *pGraphNode, hipGraph_t graph, hipError_t hipGraphMemsetNodeGetParams(hipGraphNode_t node, hipMemsetParams *pNodeParams) { CHIP_TRY + if (!node) + RETURN(hipErrorInvalidHandle); CHIPInitialize(); hipMemsetParams Params = static_cast(node)->getParams(); @@ -942,6 +1006,8 @@ hipError_t hipGraphMemsetNodeGetParams(hipGraphNode_t node, hipError_t hipGraphMemsetNodeSetParams(hipGraphNode_t node, const hipMemsetParams *pNodeParams) { CHIP_TRY + if (!node) + RETURN(hipErrorInvalidHandle); CHIPInitialize(); static_cast(node)->setParams(pNodeParams); RETURN(hipSuccess); @@ -952,6 +1018,8 @@ hipError_t hipGraphExecMemsetNodeSetParams(hipGraphExec_t hGraphExec, hipGraphNode_t node, const hipMemsetParams *pNodeParams) { CHIP_TRY + if (!node || !hGraphExec) + RETURN(hipErrorInvalidHandle); CHIPInitialize(); auto ExecNode = EXEC(hGraphExec)->getOriginalGraphPtr()->nodeLookup(NODE(node)); @@ -974,6 +1042,8 @@ hipError_t hipGraphAddHostNode(hipGraphNode_t *pGraphNode, hipGraph_t graph, size_t numDependencies, const hipHostNodeParams *pNodeParams) { CHIP_TRY + if (!graph || !pGraphNode) + RETURN(hipErrorInvalidHandle); CHIPInitialize(); CHIPGraphNodeHost *Node = new CHIPGraphNodeHost(pNodeParams); Node->addDependencies(DECONST_NODES(pDependencies), numDependencies); @@ -987,6 +1057,8 @@ hipError_t hipGraphAddHostNode(hipGraphNode_t *pGraphNode, hipGraph_t graph, hipError_t hipGraphHostNodeGetParams(hipGraphNode_t node, hipHostNodeParams *pNodeParams) { CHIP_TRY + if (!node) + RETURN(hipErrorInvalidHandle); CHIPInitialize(); hipHostNodeParams Params = static_cast(node)->getParams(); @@ -998,6 +1070,8 @@ hipError_t hipGraphHostNodeGetParams(hipGraphNode_t node, hipError_t hipGraphHostNodeSetParams(hipGraphNode_t node, const hipHostNodeParams *pNodeParams) { CHIP_TRY + if (!node) + RETURN(hipErrorInvalidHandle); CHIPInitialize(); static_cast(node)->setParams(pNodeParams); RETURN(hipSuccess); @@ -1008,6 +1082,8 @@ hipError_t hipGraphExecHostNodeSetParams(hipGraphExec_t hGraphExec, hipGraphNode_t node, const hipHostNodeParams *pNodeParams) { CHIP_TRY + if (!node || !hGraphExec) + RETURN(hipErrorInvalidHandle); CHIPInitialize(); auto ExecNode = EXEC(hGraphExec)->getOriginalGraphPtr()->nodeLookup(NODE(node)); @@ -1031,6 +1107,8 @@ hipError_t hipGraphAddChildGraphNode(hipGraphNode_t *pGraphNode, size_t numDependencies, hipGraph_t childGraph) { CHIP_TRY + if (!graph || !pGraphNode) + RETURN(hipErrorInvalidHandle); CHIPInitialize(); CHIPGraphNodeGraph *Node = new CHIPGraphNodeGraph(GRAPH(childGraph)); *pGraphNode = Node; @@ -1044,6 +1122,8 @@ hipError_t hipGraphAddChildGraphNode(hipGraphNode_t *pGraphNode, hipError_t hipGraphChildGraphNodeGetGraph(hipGraphNode_t node, hipGraph_t *pGraph) { CHIP_TRY + if (!node || !pGraph) + RETURN(hipErrorInvalidHandle); CHIPInitialize(); *pGraph = static_cast(node)->getGraph(); RETURN(hipSuccess); @@ -1054,6 +1134,8 @@ hipError_t hipGraphExecChildGraphNodeSetParams(hipGraphExec_t hGraphExec, hipGraphNode_t node, hipGraph_t childGraph) { CHIP_TRY + if (!node || !hGraphExec) + RETURN(hipErrorInvalidHandle); CHIPInitialize(); static_cast(node)->setGraph(GRAPH(childGraph)); RETURN(hipSuccess); @@ -1064,6 +1146,8 @@ hipError_t hipGraphAddEmptyNode(hipGraphNode_t *pGraphNode, hipGraph_t graph, const hipGraphNode_t *pDependencies, size_t numDependencies) { CHIP_TRY + if (!graph || !pGraphNode) + RETURN(hipErrorInvalidHandle); CHIPInitialize(); CHIPGraphNodeEmpty *Node = new CHIPGraphNodeEmpty(); Node->addDependencies(DECONST_NODES(pDependencies), numDependencies); @@ -1079,6 +1163,8 @@ hipError_t hipGraphAddEventRecordNode(hipGraphNode_t *pGraphNode, size_t numDependencies, hipEvent_t event) { CHIP_TRY + if (!graph || !pGraphNode) + RETURN(hipErrorInvalidHandle); CHIPInitialize(); CHIPGraphNodeEventRecord *Node = new CHIPGraphNodeEventRecord(static_cast(event)); @@ -1092,6 +1178,8 @@ hipError_t hipGraphAddEventRecordNode(hipGraphNode_t *pGraphNode, hipError_t hipGraphEventRecordNodeGetEvent(hipGraphNode_t node, hipEvent_t *event_out) { CHIP_TRY + if (!node || !event_out) + RETURN(hipErrorInvalidHandle); CHIPInitialize(); auto CastNode = static_cast(node); if (!CastNode) @@ -1105,6 +1193,8 @@ hipError_t hipGraphEventRecordNodeGetEvent(hipGraphNode_t node, hipError_t hipGraphEventRecordNodeSetEvent(hipGraphNode_t node, hipEvent_t event) { CHIP_TRY + if (!node || !event) + RETURN(hipErrorInvalidHandle); CHIPInitialize(); auto CastNode = static_cast(node); if (!CastNode) @@ -2890,9 +2980,9 @@ hipError_t hipArrayCreate(hipArray **Array, hipError_t hipFreeArray(hipArray *Array) { CHIP_TRY CHIPInitialize(); + if (!Array) RETURN(hipErrorInvalidValue); - if (!Array->data) RETURN(hipErrorContextIsDestroyed); From 7be57941476a477e277515eec03d3d0c6b95e005 Mon Sep 17 00:00:00 2001 From: Michal Babej Date: Tue, 9 May 2023 16:27:14 +0300 Subject: [PATCH 5/8] Implement chipstar::GraphNative MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This class is used to implement Graphs that execute "natively" in the backend, using OpenCL command-buffers or LevelZero command-lists and only synchronizing with the host when required. Fallback to original Graph is provided in chipstar::GraphExec::launch() Add more error-checking to the hipGraphXYZ APIs & bugfixes Additionally, samples/graph + samples/graphMatrixMultiply work using the "native graphs" (cl_command_buffer), not the original chip-spv's graph execution. Merged style fixes by Henry. Co-authored-by: Henry Linjamäki --- src/CHIPBackend.cc | 10 +- src/CHIPBackend.hh | 21 +- src/CHIPBindings.cc | 542 ++++++++++++++---------- src/CHIPGraph.cc | 258 ++++++----- src/CHIPGraph.hh | 396 ++++++++++------- src/backend/OpenCL/CHIPBackendOpenCL.cc | 492 ++++++++++++++++++++- src/backend/OpenCL/CHIPBackendOpenCL.hh | 103 ++++- src/common.hh | 17 +- 8 files changed, 1323 insertions(+), 516 deletions(-) diff --git a/src/CHIPBackend.cc b/src/CHIPBackend.cc index 4f14af90d..5165e41b5 100644 --- a/src/CHIPBackend.cc +++ b/src/CHIPBackend.cc @@ -1771,14 +1771,14 @@ void chipstar::Queue::memCopy3DAsync(void *Dst, size_t DPitch, size_t DSPitch, ::Backend->trackEvent(ChipEvent); } -void chipstar::Queue::updateLastNode(CHIPGraphNode *NewNode) { +void chipstar::Queue::updateLastNode(chipstar::GraphNode *NewNode) { if (LastNode_ != nullptr) { NewNode->addDependency(LastNode_); } LastNode_ = NewNode; } -void chipstar::Queue::initCaptureGraph() { CaptureGraph_ = new CHIPGraph(); } +void chipstar::Queue::initCaptureGraph() { CaptureGraph_ = new Graph(); } std::shared_ptr chipstar::Queue::RegisteredVarCopy(chipstar::ExecItem *ExecItem, @@ -1971,6 +1971,6 @@ void chipstar::Queue::addCallback(hipStreamCallback_t Callback, // return false; // } -CHIPGraph *chipstar::Queue::getCaptureGraph() const { - return static_cast(CaptureGraph_); -} \ No newline at end of file +chipstar::Graph *chipstar::Queue::getCaptureGraph() const { + return static_cast(CaptureGraph_); +} diff --git a/src/CHIPBackend.hh b/src/CHIPBackend.hh index 8567dc3f9..4cc90900e 100644 --- a/src/CHIPBackend.hh +++ b/src/CHIPBackend.hh @@ -73,8 +73,6 @@ static inline size_t getChannelByteSize(hipChannelFormatDesc Desc) { template std::string resultToString(T Err); -// class CHIPGraph; -// class CHIPGraphNode; #include "CHIPGraph.hh" /// Describes a memory region to copy from/to. @@ -88,6 +86,8 @@ class Device; class EventMonitor; class Texture; class Context; +class GraphNode; +class GraphNative; class RegionDesc { public: @@ -1188,6 +1188,10 @@ public: ExecItem(dim3 GirdDim, dim3 BlockDim, size_t SharedMem, hipStream_t ChipQueue); + void setupDims(dim3 GridDim, dim3 BlockDim) { + GridDim_ = GridDim; + BlockDim_ = BlockDim; + }; /** * @brief Set the chipstar::Kernel object * @@ -2043,7 +2047,7 @@ protected: std::mutex LastEventMtx; /// @brief node for creating a dependency chain between subsequent record /// events when in graph capture mode - CHIPGraphNode *LastNode_ = nullptr; + GraphNode *LastNode_ = nullptr; int Priority_; /** * @brief Maximum priority that can be had by a queue is 0; Priority range is @@ -2095,7 +2099,7 @@ public: return false; } - void updateLastNode(CHIPGraphNode *NewNode); + void updateLastNode(chipstar::GraphNode *NewNode); void initCaptureGraph(); hipStreamCaptureStatus getCaptureStatus() const { return CaptureStatus_; } @@ -2106,10 +2110,17 @@ public: void setCaptureMode(hipStreamCaptureMode CaptureMode) { CaptureMode_ = CaptureMode; } - CHIPGraph *getCaptureGraph() const; + Graph *getCaptureGraph() const; chipstar::Device *PerThreadQueueForDevice = nullptr; + virtual std::shared_ptr + enqueueNativeGraph(chipstar::GraphNative *NativeGraph) { + return nullptr; + } + virtual chipstar::GraphNative *createNativeGraph() { return nullptr; } + virtual void destroyNativeGraph(chipstar::GraphNative *) { return; } + // I want others to be able to lock this queue? std::mutex QueueMtx; diff --git a/src/CHIPBindings.cc b/src/CHIPBindings.cc index 221e3b96b..7334d1f8a 100644 --- a/src/CHIPBindings.cc +++ b/src/CHIPBindings.cc @@ -57,19 +57,19 @@ #define SVM_ALIGNMENT 128 // TODO Pass as CMAKE Define? -#define GRAPH(x) static_cast(x) +#define GRAPH(x) static_cast(x) -#define NODE(x) static_cast(x) +#define NODE(x) static_cast(x) -#define EXEC(x) static_cast(x) +#define EXEC(x) static_cast(x) -#define NODES(x) reinterpret_cast(x) +#define NODES(x) reinterpret_cast(x) #define DECONST_NODE(x) \ - static_cast(const_cast(x)) + static_cast(const_cast(x)) #define DECONST_NODES(x) \ - reinterpret_cast(const_cast(x)) + reinterpret_cast(const_cast(x)) hipError_t hipFreeArray(hipArray *Array); @@ -281,9 +281,9 @@ static void handleAbortRequest(chipstar::Queue &Q, chipstar::Module &M) { hipError_t hipGraphCreate(hipGraph_t *pGraph, unsigned int flags) { CHIP_TRY if (!pGraph) - RETURN(hipErrorInvalidHandle); + RETURN(hipErrorInvalidValue); CHIPInitialize(); - CHIPGraph *Graph = new CHIPGraph(); + chipstar::Graph *Graph = new chipstar::Graph(); *pGraph = Graph; RETURN(hipSuccess); CHIP_CATCH @@ -292,7 +292,7 @@ hipError_t hipGraphCreate(hipGraph_t *pGraph, unsigned int flags) { hipError_t hipGraphDestroy(hipGraph_t graph) { CHIP_TRY if (!graph) - RETURN(hipErrorInvalidHandle); + RETURN(hipErrorInvalidValue); CHIPInitialize(); delete graph; RETURN(hipSuccess); @@ -303,12 +303,10 @@ hipError_t hipGraphAddDependencies(hipGraph_t graph, const hipGraphNode_t *from, const hipGraphNode_t *to, size_t numDependencies) { CHIP_TRY - if (!graph) - RETURN(hipErrorInvalidHandle); - if (!from || !to) - RETURN(hipErrorInvalidHandle); + if (!graph || !from || !to) + RETURN(hipErrorInvalidValue); CHIPInitialize(); - CHIPGraphNode *FoundNode = GRAPH(graph)->findNode(NODE(*to)); + chipstar::GraphNode *FoundNode = GRAPH(graph)->findNode(NODE(*to)); if (!FoundNode) RETURN(hipErrorInvalidValue); @@ -323,11 +321,11 @@ hipError_t hipGraphRemoveDependencies(hipGraph_t graph, size_t numDependencies) { CHIP_TRY if (!graph) - RETURN(hipErrorInvalidHandle); + RETURN(hipErrorInvalidValue); if (!from || !to) - RETURN(hipErrorInvalidHandle); + RETURN(hipErrorInvalidValue); CHIPInitialize(); - CHIPGraphNode *FoundNode = GRAPH(graph)->findNode(NODE(*to)); + chipstar::GraphNode *FoundNode = GRAPH(graph)->findNode(NODE(*to)); if (!FoundNode) RETURN(hipErrorInvalidValue); @@ -340,17 +338,17 @@ hipError_t hipGraphGetEdges(hipGraph_t graph, hipGraphNode_t *from, hipGraphNode_t *to, size_t *numEdges) { CHIP_TRY if (!graph) - RETURN(hipErrorInvalidHandle); + RETURN(hipErrorInvalidValue); CHIPInitialize(); if (!from || !to || !numEdges) - RETURN(hipErrorInvalidHandle); + RETURN(hipErrorInvalidValue); auto Edges = GRAPH(graph)->getEdges(); if (!to && !from) { *numEdges = Edges.size(); RETURN(hipSuccess); } - for (int i = 0; i < Edges.size(); i++) { + for (size_t i = 0; i < Edges.size(); i++) { auto Edge = Edges[i]; auto FromNode = Edge.first; auto ToNode = Edge.second; @@ -364,14 +362,18 @@ hipError_t hipGraphGetEdges(hipGraph_t graph, hipGraphNode_t *from, hipError_t hipGraphGetNodes(hipGraph_t graph, hipGraphNode_t *nodes, size_t *numNodes) { CHIP_TRY - if (!graph) - RETURN(hipErrorInvalidHandle); - if (!nodes || !numNodes) - RETURN(hipErrorInvalidHandle); + if (!graph || !numNodes) + RETURN(hipErrorInvalidValue); CHIPInitialize(); auto Nodes = GRAPH(graph)->getNodes(); - *nodes = *(Nodes.data()); - *numNodes = GRAPH(graph)->getNodes().size(); + if (nodes) { + if (*numNodes > Nodes.size()) + RETURN(hipErrorInvalidValue); + size_t ToCopy = numNodes ? *numNodes : Nodes.size(); + memcpy(nodes, Nodes.data(), ToCopy * sizeof(chipstar::GraphNode *)); + } else { + *numNodes = Nodes.size(); + } RETURN(hipSuccess); CHIP_CATCH } @@ -379,14 +381,19 @@ hipError_t hipGraphGetNodes(hipGraph_t graph, hipGraphNode_t *nodes, hipError_t hipGraphGetRootNodes(hipGraph_t graph, hipGraphNode_t *pRootNodes, size_t *pNumRootNodes) { CHIP_TRY - if (!graph) - RETURN(hipErrorInvalidHandle); - if (!pRootNodes || !pNumRootNodes) - RETURN(hipErrorInvalidHandle); + if (!graph || !pNumRootNodes) + RETURN(hipErrorInvalidValue); CHIPInitialize(); auto Nodes = GRAPH(graph)->getRootNodes(); - *pRootNodes = *(Nodes.data()); - *pNumRootNodes = GRAPH(graph)->getNodes().size(); + if (pRootNodes) { + if (pNumRootNodes && (*pNumRootNodes > Nodes.size())) + RETURN(hipErrorInvalidValue); + size_t ToCopy = pNumRootNodes ? *pNumRootNodes : Nodes.size(); + memcpy(pRootNodes, Nodes.data(), ToCopy * sizeof(chipstar::GraphNode *)); + } else + // numNodes && pRootNodes == nullptr + *pNumRootNodes = Nodes.size(); + RETURN(hipSuccess); CHIP_CATCH } @@ -396,15 +403,15 @@ hipError_t hipGraphNodeGetDependencies(hipGraphNode_t node, size_t *pNumDependencies) { CHIP_TRY if (!node) - RETURN(hipErrorInvalidHandle); + RETURN(hipErrorInvalidValue); if (!pDependencies || !pNumDependencies) - RETURN(hipErrorInvalidHandle); + RETURN(hipErrorInvalidValue); CHIPInitialize(); auto Deps = NODE(node)->getDependencies(); *pNumDependencies = Deps.size(); if (!pDependencies) RETURN(hipSuccess); - for (int i = 0; i < Deps.size(); i++) { + for (size_t i = 0; i < Deps.size(); i++) { pDependencies[i] = Deps[i]; } RETURN(hipSuccess); @@ -416,13 +423,13 @@ hipError_t hipGraphNodeGetDependentNodes(hipGraphNode_t node, size_t *pNumDependentNodes) { CHIP_TRY if (!node || !pDependentNodes || !pNumDependentNodes) - RETURN(hipErrorInvalidHandle); + RETURN(hipErrorInvalidValue); CHIPInitialize(); auto Deps = NODE(node)->getDependants(); *pNumDependentNodes = Deps.size(); if (!pDependentNodes) RETURN(hipSuccess); - for (int i = 0; i < Deps.size(); i++) { + for (size_t i = 0; i < Deps.size(); i++) { pDependentNodes[i] = Deps[i]; } RETURN(hipSuccess); @@ -432,7 +439,7 @@ hipError_t hipGraphNodeGetDependentNodes(hipGraphNode_t node, hipError_t hipGraphNodeGetType(hipGraphNode_t node, hipGraphNodeType *pType) { CHIP_TRY if (!pType || !node) - RETURN(hipErrorInvalidHandle); + RETURN(hipErrorInvalidValue); CHIPInitialize(); *pType = NODE(node)->getType(); RETURN(hipSuccess); @@ -443,41 +450,41 @@ hipError_t hipGraphDestroyNode(hipGraphNode_t node) { CHIP_TRY CHIPInitialize(); if (!node) - RETURN(hipErrorInvalidHandle); + RETURN(hipErrorInvalidValue); /** * have to resort to these shenanigans to call the proper derived destructor */ auto NodeType = NODE(node)->getType(); switch (NodeType) { case hipGraphNodeTypeKernel: - delete static_cast(node); + delete static_cast(node); break; case hipGraphNodeTypeMemcpy: - delete static_cast(node); + delete static_cast(node); break; case hipGraphNodeTypeMemset: - delete static_cast(node); + delete static_cast(node); break; case hipGraphNodeTypeHost: - delete static_cast(node); + delete static_cast(node); break; case hipGraphNodeTypeGraph: - delete static_cast(node); + delete static_cast(node); break; case hipGraphNodeTypeEmpty: - delete static_cast(node); + delete static_cast(node); break; case hipGraphNodeTypeWaitEvent: - delete static_cast(node); + delete static_cast(node); break; case hipGraphNodeTypeEventRecord: - delete static_cast(node); + delete static_cast(node); break; case hipGraphNodeTypeMemcpyFromSymbol: - delete static_cast(node); + delete static_cast(node); break; case hipGraphNodeTypeMemcpyToSymbol: - delete static_cast(node); + delete static_cast(node); break; default: CHIPERR_LOG_AND_THROW("Unknown graph node type", hipErrorTbd); @@ -490,9 +497,9 @@ hipError_t hipGraphDestroyNode(hipGraphNode_t node) { hipError_t hipGraphClone(hipGraph_t *pGraphClone, hipGraph_t originalGraph) { CHIP_TRY if (!pGraphClone || !originalGraph) - RETURN(hipErrorInvalidHandle); + RETURN(hipErrorInvalidValue); CHIPInitialize(); - CHIPGraph *CloneGraph = new CHIPGraph(*GRAPH(originalGraph)); + chipstar::Graph *CloneGraph = new chipstar::Graph(*GRAPH(originalGraph)); *pGraphClone = CloneGraph; RETURN(hipSuccess); CHIP_CATCH @@ -503,7 +510,7 @@ hipError_t hipGraphNodeFindInClone(hipGraphNode_t *pNode, hipGraph_t clonedGraph) { CHIP_TRY if (!pNode || !originalNode || !clonedGraph) - RETURN(hipErrorInvalidHandle); + RETURN(hipErrorInvalidValue); CHIPInitialize(); auto Node = GRAPH(clonedGraph)->getClonedNodeFromOriginal(NODE(originalNode)); *pNode = Node; @@ -516,9 +523,9 @@ hipError_t hipGraphInstantiate(hipGraphExec_t *pGraphExec, hipGraph_t graph, size_t bufferSize) { CHIP_TRY if (!pGraphExec || !graph) - RETURN(hipErrorInvalidHandle); + RETURN(hipErrorInvalidValue); CHIPInitialize(); - CHIPGraphExec *GraphExec = new CHIPGraphExec(GRAPH(graph)); + chipstar::GraphExec *GraphExec = new chipstar::GraphExec(GRAPH(graph)); *pGraphExec = GraphExec; RETURN(hipSuccess); @@ -530,7 +537,7 @@ hipError_t hipGraphInstantiateWithFlags(hipGraphExec_t *pGraphExec, unsigned long long flags) { CHIP_TRY if (!pGraphExec || !graph) - RETURN(hipErrorInvalidHandle); + RETURN(hipErrorInvalidValue); CHIPInitialize(); // flags not yet defined in HIP API. UNIMPLEMENTED(hipErrorNotSupported); @@ -539,8 +546,8 @@ hipError_t hipGraphInstantiateWithFlags(hipGraphExec_t *pGraphExec, hipError_t hipGraphLaunch(hipGraphExec_t graphExec, hipStream_t stream) { CHIP_TRY - if (!graphExec || !stream) - RETURN(hipErrorInvalidHandle); + if (!graphExec) + RETURN(hipErrorInvalidValue); CHIPInitialize(); auto ChipQueue = Backend->findQueue(static_cast(stream)); @@ -553,7 +560,7 @@ hipError_t hipGraphLaunch(hipGraphExec_t graphExec, hipStream_t stream) { hipError_t hipGraphExecDestroy(hipGraphExec_t graphExec) { CHIP_TRY if (!graphExec) - RETURN(hipErrorInvalidHandle); + RETURN(hipErrorInvalidValue); CHIPInitialize(); delete graphExec; RETURN(hipSuccess); @@ -565,7 +572,7 @@ hipError_t hipGraphExecUpdate(hipGraphExec_t hGraphExec, hipGraph_t hGraph, hipGraphExecUpdateResult *updateResult_out) { CHIP_TRY if (!hGraphExec || !hGraph || !hErrorNode_out || !updateResult_out) - RETURN(hipErrorInvalidHandle); + RETURN(hipErrorInvalidValue); CHIPInitialize(); // TODO Graphs - hipGraphExecUpdate /** @@ -660,8 +667,8 @@ hipError_t hipGraphExecUpdate(hipGraphExec_t hGraphExec, hipGraph_t hGraph, // 4. if (Node->getType() == hipGraphNodeType::hipGraphNodeTypeKernel && NodeFound->getType() == hipGraphNodeType::hipGraphNodeTypeKernel) { - auto NodeCast = static_cast(Node); - auto NodeFoundCast = static_cast(NodeFound); + auto NodeCast = static_cast(Node); + auto NodeFoundCast = static_cast(NodeFound); if (NodeCast->getParams().func != NodeFoundCast->getParams().func) { *updateResult_out = hipGraphExecUpdateErrorFunctionChanged; *hErrorNode_out = Node; @@ -679,10 +686,11 @@ hipError_t hipGraphAddKernelNode(hipGraphNode_t *pGraphNode, hipGraph_t graph, size_t numDependencies, const hipKernelNodeParams *pNodeParams) { CHIP_TRY - if (!pGraphNode || !graph) - RETURN(hipErrorInvalidHandle); + if (!pGraphNode || !graph || !pNodeParams || pNodeParams->func == nullptr || + pNodeParams->kernelParams == nullptr) + RETURN(hipErrorInvalidValue); CHIPInitialize(); - CHIPGraphNodeKernel *Node = new CHIPGraphNodeKernel{pNodeParams}; + chipstar::GraphNodeKernel *Node = new chipstar::GraphNodeKernel{pNodeParams}; Node->addDependencies(DECONST_NODES(pDependencies), numDependencies); *pGraphNode = Node; GRAPH(graph)->addNode(Node); @@ -694,10 +702,13 @@ hipError_t hipGraphAddKernelNode(hipGraphNode_t *pGraphNode, hipGraph_t graph, hipError_t hipGraphKernelNodeGetParams(hipGraphNode_t node, hipKernelNodeParams *pNodeParams) { CHIP_TRY - if (!node) - RETURN(hipErrorInvalidHandle); CHIPInitialize(); - *pNodeParams = ((CHIPGraphNodeKernel *)node)->getParams(); + if (!node || !pNodeParams) + RETURN(hipErrorInvalidValue); + auto CastNode = static_cast(node); + if (CastNode->getType() != hipGraphNodeTypeKernel) + CHIPERR_LOG_AND_THROW("Node is not Kernel", hipErrorInvalidValue); + *pNodeParams = CastNode->getParams(); RETURN(hipSuccess); CHIP_CATCH } @@ -705,10 +716,13 @@ hipError_t hipGraphKernelNodeGetParams(hipGraphNode_t node, hipError_t hipGraphKernelNodeSetParams(hipGraphNode_t node, const hipKernelNodeParams *pNodeParams) { CHIP_TRY - if (!node) - RETURN(hipErrorInvalidHandle); CHIPInitialize(); - ((CHIPGraphNodeKernel *)node)->setParams(*pNodeParams); + if (!node || !pNodeParams) + RETURN(hipErrorInvalidValue); + auto CastNode = static_cast(node); + if (CastNode->getType() != hipGraphNodeTypeKernel) + CHIPERR_LOG_AND_THROW("Node is not Kernel", hipErrorInvalidValue); + CastNode->setParams(*pNodeParams); RETURN(hipSuccess); CHIP_CATCH } @@ -717,16 +731,18 @@ hipError_t hipGraphExecKernelNodeSetParams(hipGraphExec_t hGraphExec, hipGraphNode_t node, const hipKernelNodeParams *pNodeParams) { CHIP_TRY - if (!hGraphExec) - RETURN(hipErrorInvalidHandle); CHIPInitialize(); + if (!hGraphExec || !node || !pNodeParams) + RETURN(hipErrorInvalidValue); // Graph obtained from hipGraphExec_t is a clone of the original - CHIPGraph *Graph = EXEC(hGraphExec)->getOriginalGraphPtr(); + chipstar::Graph *Graph = EXEC(hGraphExec)->getOriginalGraphPtr(); // KernelNode here is a handle to the original - CHIPGraphNodeKernel *ExecKernelNode = static_cast( - GRAPH(Graph)->getClonedNodeFromOriginal(NODE(node))); - assert(ExecKernelNode); + chipstar::GraphNodeKernel *ExecKernelNode = + static_cast( + GRAPH(Graph)->getClonedNodeFromOriginal(NODE(node))); + if (ExecKernelNode->getType() != hipGraphNodeTypeKernel) + CHIPERR_LOG_AND_THROW("Node is not Kernel", hipErrorInvalidValue); ExecKernelNode->setParams(*pNodeParams); RETURN(hipSuccess); @@ -740,14 +756,8 @@ hipError_t hipGraphAddMemcpyNode(hipGraphNode_t *pGraphNode, hipGraph_t graph, CHIP_TRY CHIPInitialize(); - // graphs test seems wrong - normally we expect hipErrorInvalidHandle - // NULLCHECK(graph, pGraphNode, pCopyParams); if (!graph || !pGraphNode || !pCopyParams) - RETURN(hipErrorInvalidHandle); - if (pDependencies == nullptr & numDependencies > 0) - CHIPERR_LOG_AND_THROW( - "numDependencies is not 0 while pDependencies is null", - hipErrorInvalidValue); + RETURN(hipErrorInvalidValue); if (!pCopyParams->srcArray && !pCopyParams->srcPtr.ptr) CHIPERR_LOG_AND_THROW("all src are null", hipErrorInvalidValue); @@ -763,7 +773,7 @@ hipError_t hipGraphAddMemcpyNode(hipGraphNode_t *pGraphNode, hipGraph_t graph, "Passing different element size for hipMemcpy3DParms::srcArray and " "hipMemcpy3DParms::dstArray", hipErrorInvalidValue); - CHIPGraphNodeMemcpy *Node = new CHIPGraphNodeMemcpy(pCopyParams); + chipstar::GraphNodeMemcpy *Node = new chipstar::GraphNodeMemcpy(pCopyParams); Node->addDependencies(DECONST_NODES(pDependencies), numDependencies); *pGraphNode = Node; GRAPH(graph)->addNode(Node); @@ -776,9 +786,14 @@ hipError_t hipGraphMemcpyNodeGetParams(hipGraphNode_t node, hipMemcpy3DParms *pNodeParams) { CHIP_TRY CHIPInitialize(); - hipMemcpy3DParms Params = - static_cast(node)->getParams(); - pNodeParams = &Params; + if (!node || !pNodeParams) + RETURN(hipErrorInvalidValue); + auto CastNode = static_cast(node); + if (CastNode->getType() != hipGraphNodeTypeMemcpy) + CHIPERR_LOG_AND_THROW("Node is not Memcpy", hipErrorInvalidValue); + + *pNodeParams = CastNode->getParams(); + RETURN(hipSuccess); CHIP_CATCH } @@ -787,7 +802,14 @@ hipError_t hipGraphMemcpyNodeSetParams(hipGraphNode_t node, const hipMemcpy3DParms *pNodeParams) { CHIP_TRY CHIPInitialize(); - static_cast(node)->setParams(pNodeParams); + if (!node || !pNodeParams) + RETURN(hipErrorInvalidValue); + + auto CastNode = static_cast(node); + if (CastNode->getType() != hipGraphNodeTypeMemcpy) + CHIPERR_LOG_AND_THROW("Node is not Memcpy", hipErrorInvalidValue); + + CastNode->setParams(pNodeParams); RETURN(hipSuccess); CHIP_CATCH } @@ -797,16 +819,18 @@ hipError_t hipGraphExecMemcpyNodeSetParams(hipGraphExec_t hGraphExec, hipMemcpy3DParms *pNodeParams) { CHIP_TRY CHIPInitialize(); + if (!hGraphExec || !node || !pNodeParams) + RETURN(hipErrorInvalidValue); + auto ExecNode = EXEC(hGraphExec)->getOriginalGraphPtr()->nodeLookup(NODE(node)); if (!ExecNode) CHIPERR_LOG_AND_THROW("Failed to find the node in hipGraphExec_t", hipErrorInvalidValue); - auto CastNode = static_cast(node); - if (!CastNode) - CHIPERR_LOG_AND_THROW("Node provided failed to cast to CHIPGraphNodeMemcpy", - hipErrorInvalidValue); + auto CastNode = static_cast(node); + if (CastNode->getType() != hipGraphNodeTypeMemcpy) + CHIPERR_LOG_AND_THROW("Node is not Memcpy", hipErrorInvalidValue); CastNode->setParams(const_cast(pNodeParams)); RETURN(hipSuccess); @@ -820,7 +844,11 @@ hipError_t hipGraphAddMemcpyNode1D(hipGraphNode_t *pGraphNode, hipGraph_t graph, hipMemcpyKind kind) { CHIP_TRY CHIPInitialize(); - CHIPGraphNodeMemcpy *Node = new CHIPGraphNodeMemcpy(dst, src, count, kind); + if (!graph || !pGraphNode || !dst || !src) + RETURN(hipErrorInvalidValue); + + chipstar::GraphNodeMemcpy *Node = + new chipstar::GraphNodeMemcpy(dst, src, count, kind); *pGraphNode = Node; Node->addDependencies(DECONST_NODES(pDependencies), numDependencies); GRAPH(graph)->addNode(Node); @@ -834,10 +862,12 @@ hipError_t hipGraphMemcpyNodeSetParams1D(hipGraphNode_t node, void *dst, hipMemcpyKind kind) { CHIP_TRY CHIPInitialize(); - auto CastNode = static_cast(node); - if (!CastNode) - CHIPERR_LOG_AND_THROW("Node provided failed to cast to CHIPGraphNodeMemcpy", - hipErrorInvalidValue); + if (!node || !dst || !src || !count) + RETURN(hipErrorInvalidValue); + + auto CastNode = static_cast(node); + if (CastNode->getType() != hipGraphNodeTypeMemcpy) + CHIPERR_LOG_AND_THROW("Node is not Memcpy", hipErrorInvalidValue); CastNode->setParams(dst, src, count, kind); RETURN(hipSuccess); @@ -850,16 +880,18 @@ hipError_t hipGraphExecMemcpyNodeSetParams1D(hipGraphExec_t hGraphExec, hipMemcpyKind kind) { CHIP_TRY CHIPInitialize(); + if (!hGraphExec || !node) + RETURN(hipErrorInvalidValue); + auto ExecNode = EXEC(hGraphExec)->getOriginalGraphPtr()->nodeLookup(NODE(node)); if (!ExecNode) CHIPERR_LOG_AND_THROW("Failed to find the node in hipGraphExec_t", hipErrorInvalidValue); - auto CastNode = static_cast(node); - if (!CastNode) - CHIPERR_LOG_AND_THROW("Node provided failed to cast to CHIPGraphNodeMemcpy", - hipErrorInvalidValue); + auto CastNode = static_cast(node); + if (CastNode->getType() != hipGraphNodeTypeMemcpy) + CHIPERR_LOG_AND_THROW("Node is not Memcpy", hipErrorInvalidValue); CastNode->setParams(dst, src, count, kind); RETURN(hipSuccess); @@ -874,8 +906,8 @@ hipError_t hipGraphAddMemcpyNodeFromSymbol(hipGraphNode_t *pGraphNode, size_t offset, hipMemcpyKind kind) { CHIP_TRY CHIPInitialize(); - CHIPGraphNodeMemcpyFromSymbol *Node = - new CHIPGraphNodeMemcpyFromSymbol(dst, symbol, count, offset, kind); + chipstar::GraphNodeMemcpyFromSymbol *Node = + new chipstar::GraphNodeMemcpyFromSymbol(dst, symbol, count, offset, kind); *pGraphNode = Node; Node->addDependencies(DECONST_NODES(pDependencies), numDependencies); GRAPH(graph)->addNode(Node); @@ -890,8 +922,16 @@ hipError_t hipGraphMemcpyNodeSetParamsFromSymbol(hipGraphNode_t node, void *dst, hipMemcpyKind kind) { CHIP_TRY CHIPInitialize(); - static_cast(node)->setParams( - dst, symbol, count, offset, kind); + if (!symbol) + RETURN(hipErrorInvalidSymbol); + if (!node || !dst || !count) + RETURN(hipErrorInvalidValue); + + auto CastNode = static_cast(node); + if (CastNode->getType() != hipGraphNodeTypeMemcpyFromSymbol) + CHIPERR_LOG_AND_THROW("Node is not MemcpyFromSymbol", hipErrorInvalidValue); + + CastNode->setParams(dst, symbol, count, offset, kind); RETURN(hipSuccess); CHIP_CATCH } @@ -901,16 +941,24 @@ hipError_t hipGraphExecMemcpyNodeSetParamsFromSymbol( const void *symbol, size_t count, size_t offset, hipMemcpyKind kind) { CHIP_TRY CHIPInitialize(); - // Graph obtained from hipGraphExec_t is a clone of the original - CHIPGraph *Graph = EXEC(hGraphExec)->getOriginalGraphPtr(); - // KernelNode here is a handle to the original - CHIPGraphNodeMemcpyFromSymbol *KernelNode = - ((CHIPGraphNodeMemcpyFromSymbol *)node); - CHIPGraphNodeMemcpyFromSymbol *ExecKernelNode = - ((CHIPGraphNodeMemcpyFromSymbol *)GRAPH(Graph)->getClonedNodeFromOriginal( - KernelNode)); + if (!symbol) + RETURN(hipErrorInvalidSymbol); + if (!node || !hGraphExec) + RETURN(hipErrorInvalidValue); + + auto ExecNode = + EXEC(hGraphExec)->getOriginalGraphPtr()->nodeLookup(NODE(node)); + if (!ExecNode) + CHIPERR_LOG_AND_THROW("Failed to find the node in hipGraphExec_t", + hipErrorInvalidValue); - ExecKernelNode->setParams(dst, symbol, count, offset, kind); + auto CastNode = static_cast(node); + if (!CastNode) + CHIPERR_LOG_AND_THROW( + "Node provided failed to cast to chipstar::GraphNodeMemcpyFromSymbol", + hipErrorInvalidValue); + + CastNode->setParams(dst, symbol, count, offset, kind); RETURN(hipSuccess); CHIP_CATCH } @@ -924,8 +972,9 @@ hipError_t hipGraphAddMemcpyNodeToSymbol(hipGraphNode_t *pGraphNode, hipMemcpyKind kind) { CHIP_TRY CHIPInitialize(); - CHIPGraphNodeMemcpyToSymbol *Node = new CHIPGraphNodeMemcpyToSymbol( - const_cast(src), symbol, count, offset, kind); + chipstar::GraphNodeMemcpyToSymbol *Node = + new chipstar::GraphNodeMemcpyToSymbol(const_cast(src), symbol, + count, offset, kind); *pGraphNode = Node; Node->addDependencies(DECONST_NODES(pDependencies), numDependencies); GRAPH(graph)->addNode(Node); @@ -940,11 +989,17 @@ hipError_t hipGraphMemcpyNodeSetParamsToSymbol(hipGraphNode_t node, size_t offset, hipMemcpyKind kind) { CHIP_TRY - if (!node) - RETURN(hipErrorInvalidHandle); + if (!symbol) + RETURN(hipErrorInvalidSymbol); + if (!node || !src || !count) + RETURN(hipErrorInvalidValue); CHIPInitialize(); - static_cast(node)->setParams( - const_cast(src), symbol, count, offset, kind); + + auto CastNode = static_cast(node); + if (CastNode->getType() != hipGraphNodeTypeMemcpyToSymbol) + CHIPERR_LOG_AND_THROW("Node is not MemcpyToSymbol", hipErrorInvalidValue); + + CastNode->setParams(const_cast(src), symbol, count, offset, kind); RETURN(hipSuccess); CHIP_CATCH } @@ -953,19 +1008,22 @@ hipError_t hipGraphExecMemcpyNodeSetParamsToSymbol( hipGraphExec_t hGraphExec, hipGraphNode_t node, const void *symbol, const void *src, size_t count, size_t offset, hipMemcpyKind kind) { CHIP_TRY - if (!node || !hGraphExec) - RETURN(hipErrorInvalidHandle); CHIPInitialize(); + if (!symbol) + RETURN(hipErrorInvalidSymbol); + if (!node || !hGraphExec) + RETURN(hipErrorInvalidValue); + auto ExecNode = EXEC(hGraphExec)->getOriginalGraphPtr()->nodeLookup(NODE(node)); if (!ExecNode) CHIPERR_LOG_AND_THROW("Failed to find the node in hipGraphExec_t", hipErrorInvalidValue); - auto CastNode = static_cast(node); + auto CastNode = static_cast(node); if (!CastNode) CHIPERR_LOG_AND_THROW( - "Node provided failed to cast to CHIPGraphNodeMemcpyToSymbol", + "Node provided failed to cast to chipstar::GraphNodeMemcpyToSymbol", hipErrorInvalidValue); CastNode->setParams(const_cast(src), symbol, count, offset, kind); @@ -978,10 +1036,15 @@ hipError_t hipGraphAddMemsetNode(hipGraphNode_t *pGraphNode, hipGraph_t graph, size_t numDependencies, const hipMemsetParams *pMemsetParams) { CHIP_TRY - if (!graph || !pGraphNode) - RETURN(hipErrorInvalidHandle); + if (!graph || !pGraphNode || !pMemsetParams || + pMemsetParams->dst == nullptr || pMemsetParams->height == 0) + RETURN(hipErrorInvalidValue); + if (pMemsetParams->elementSize != 1 && pMemsetParams->elementSize != 2 && + pMemsetParams->elementSize != 4) + RETURN(hipErrorInvalidValue); CHIPInitialize(); - CHIPGraphNodeMemset *Node = new CHIPGraphNodeMemset(pMemsetParams); + chipstar::GraphNodeMemset *Node = + new chipstar::GraphNodeMemset(pMemsetParams); Node->addDependencies(DECONST_NODES(pDependencies), numDependencies); GRAPH(graph)->addNode(Node); *pGraphNode = Node; @@ -993,12 +1056,15 @@ hipError_t hipGraphAddMemsetNode(hipGraphNode_t *pGraphNode, hipGraph_t graph, hipError_t hipGraphMemsetNodeGetParams(hipGraphNode_t node, hipMemsetParams *pNodeParams) { CHIP_TRY - if (!node) - RETURN(hipErrorInvalidHandle); CHIPInitialize(); - hipMemsetParams Params = - static_cast(node)->getParams(); - *pNodeParams = Params; + if (!node || !pNodeParams) + RETURN(hipErrorInvalidValue); + + auto CastNode = static_cast(node); + if (CastNode->getType() != hipGraphNodeTypeMemset) + CHIPERR_LOG_AND_THROW("Node is not MemcpyFromSymbol", hipErrorInvalidValue); + + *pNodeParams = CastNode->getParams(); RETURN(hipSuccess); CHIP_CATCH } @@ -1006,10 +1072,15 @@ hipError_t hipGraphMemsetNodeGetParams(hipGraphNode_t node, hipError_t hipGraphMemsetNodeSetParams(hipGraphNode_t node, const hipMemsetParams *pNodeParams) { CHIP_TRY - if (!node) - RETURN(hipErrorInvalidHandle); CHIPInitialize(); - static_cast(node)->setParams(pNodeParams); + if (!node || !pNodeParams) + RETURN(hipErrorInvalidValue); + + auto CastNode = static_cast(node); + if (CastNode->getType() != hipGraphNodeTypeMemset) + CHIPERR_LOG_AND_THROW("Node is not MemcpyFromSymbol", hipErrorInvalidValue); + + CastNode->setParams(pNodeParams); RETURN(hipSuccess); CHIP_CATCH } @@ -1018,19 +1089,19 @@ hipError_t hipGraphExecMemsetNodeSetParams(hipGraphExec_t hGraphExec, hipGraphNode_t node, const hipMemsetParams *pNodeParams) { CHIP_TRY - if (!node || !hGraphExec) - RETURN(hipErrorInvalidHandle); CHIPInitialize(); + if (!node || !hGraphExec) + RETURN(hipErrorInvalidValue); + auto ExecNode = EXEC(hGraphExec)->getOriginalGraphPtr()->nodeLookup(NODE(node)); if (!ExecNode) CHIPERR_LOG_AND_THROW("Failed to find the node in hipGraphExec_t", hipErrorInvalidValue); - auto CastNode = static_cast(node); - if (!CastNode) - CHIPERR_LOG_AND_THROW("Node provided failed to cast to CHIPGraphNodeMemset", - hipErrorInvalidValue); + auto CastNode = static_cast(node); + if (CastNode->getType() != hipGraphNodeTypeMemset) + CHIPERR_LOG_AND_THROW("Node is not MemcpyFromSymbol", hipErrorInvalidValue); CastNode->setParams(pNodeParams); RETURN(hipSuccess); @@ -1042,10 +1113,10 @@ hipError_t hipGraphAddHostNode(hipGraphNode_t *pGraphNode, hipGraph_t graph, size_t numDependencies, const hipHostNodeParams *pNodeParams) { CHIP_TRY - if (!graph || !pGraphNode) - RETURN(hipErrorInvalidHandle); CHIPInitialize(); - CHIPGraphNodeHost *Node = new CHIPGraphNodeHost(pNodeParams); + if (!graph || !pGraphNode || !pNodeParams || pNodeParams->fn == nullptr) + RETURN(hipErrorInvalidValue); + chipstar::GraphNodeHost *Node = new chipstar::GraphNodeHost(pNodeParams); Node->addDependencies(DECONST_NODES(pDependencies), numDependencies); GRAPH(graph)->addNode(Node); *pGraphNode = Node; @@ -1057,12 +1128,15 @@ hipError_t hipGraphAddHostNode(hipGraphNode_t *pGraphNode, hipGraph_t graph, hipError_t hipGraphHostNodeGetParams(hipGraphNode_t node, hipHostNodeParams *pNodeParams) { CHIP_TRY - if (!node) - RETURN(hipErrorInvalidHandle); CHIPInitialize(); - hipHostNodeParams Params = - static_cast(node)->getParams(); - *pNodeParams = Params; + if (!node || !pNodeParams) + RETURN(hipErrorInvalidValue); + + auto CastNode = static_cast(node); + if (CastNode->getType() != hipGraphNodeTypeHost) + CHIPERR_LOG_AND_THROW("NodeType is not Host", hipErrorInvalidValue); + + *pNodeParams = CastNode->getParams(); RETURN(hipSuccess); CHIP_CATCH } @@ -1070,10 +1144,15 @@ hipError_t hipGraphHostNodeGetParams(hipGraphNode_t node, hipError_t hipGraphHostNodeSetParams(hipGraphNode_t node, const hipHostNodeParams *pNodeParams) { CHIP_TRY - if (!node) - RETURN(hipErrorInvalidHandle); + if (!node || !pNodeParams || pNodeParams->fn == nullptr) + RETURN(hipErrorInvalidValue); CHIPInitialize(); - static_cast(node)->setParams(pNodeParams); + + auto CastNode = static_cast(node); + if (CastNode->getType() != hipGraphNodeTypeHost) + CHIPERR_LOG_AND_THROW("NodeType is not Host", hipErrorInvalidValue); + + CastNode->setParams(pNodeParams); RETURN(hipSuccess); CHIP_CATCH } @@ -1083,7 +1162,7 @@ hipError_t hipGraphExecHostNodeSetParams(hipGraphExec_t hGraphExec, const hipHostNodeParams *pNodeParams) { CHIP_TRY if (!node || !hGraphExec) - RETURN(hipErrorInvalidHandle); + RETURN(hipErrorInvalidValue); CHIPInitialize(); auto ExecNode = EXEC(hGraphExec)->getOriginalGraphPtr()->nodeLookup(NODE(node)); @@ -1091,10 +1170,9 @@ hipError_t hipGraphExecHostNodeSetParams(hipGraphExec_t hGraphExec, CHIPERR_LOG_AND_THROW("Failed to find the node in hipGraphExec_t", hipErrorInvalidValue); - auto CastNode = static_cast(ExecNode); - if (!CastNode) - CHIPERR_LOG_AND_THROW("Node provided failed to cast to CHIPGraphNodeMemset", - hipErrorInvalidValue); + auto CastNode = static_cast(ExecNode); + if (CastNode->getType() != hipGraphNodeTypeHost) + CHIPERR_LOG_AND_THROW("NodeType is not Host", hipErrorInvalidValue); CastNode->setParams(pNodeParams); RETURN(hipSuccess); @@ -1107,10 +1185,11 @@ hipError_t hipGraphAddChildGraphNode(hipGraphNode_t *pGraphNode, size_t numDependencies, hipGraph_t childGraph) { CHIP_TRY - if (!graph || !pGraphNode) - RETURN(hipErrorInvalidHandle); + if (!graph || !pGraphNode || !childGraph) + RETURN(hipErrorInvalidValue); CHIPInitialize(); - CHIPGraphNodeGraph *Node = new CHIPGraphNodeGraph(GRAPH(childGraph)); + chipstar::GraphNodeGraph *Node = + new chipstar::GraphNodeGraph(GRAPH(childGraph)); *pGraphNode = Node; Node->addDependencies(DECONST_NODES(pDependencies), numDependencies); GRAPH(graph)->addNode(Node); @@ -1123,9 +1202,14 @@ hipError_t hipGraphChildGraphNodeGetGraph(hipGraphNode_t node, hipGraph_t *pGraph) { CHIP_TRY if (!node || !pGraph) - RETURN(hipErrorInvalidHandle); + RETURN(hipErrorInvalidValue); CHIPInitialize(); - *pGraph = static_cast(node)->getGraph(); + + auto CastNode = static_cast(node); + if (CastNode->getType() != hipGraphNodeTypeGraph) + CHIPERR_LOG_AND_THROW("Node is not NodeTypeGraph", hipErrorInvalidValue); + + *pGraph = CastNode->getGraph(); RETURN(hipSuccess); CHIP_CATCH } @@ -1134,10 +1218,15 @@ hipError_t hipGraphExecChildGraphNodeSetParams(hipGraphExec_t hGraphExec, hipGraphNode_t node, hipGraph_t childGraph) { CHIP_TRY - if (!node || !hGraphExec) - RETURN(hipErrorInvalidHandle); + if (!node || !hGraphExec || !childGraph) + RETURN(hipErrorInvalidValue); CHIPInitialize(); - static_cast(node)->setGraph(GRAPH(childGraph)); + + auto CastNode = static_cast(node); + if (CastNode->getType() != hipGraphNodeTypeGraph) + CHIPERR_LOG_AND_THROW("Node is not NodeTypeGraph", hipErrorInvalidValue); + + CastNode->setGraph(GRAPH(childGraph)); RETURN(hipSuccess); CHIP_CATCH } @@ -1147,9 +1236,9 @@ hipError_t hipGraphAddEmptyNode(hipGraphNode_t *pGraphNode, hipGraph_t graph, size_t numDependencies) { CHIP_TRY if (!graph || !pGraphNode) - RETURN(hipErrorInvalidHandle); + RETURN(hipErrorInvalidValue); CHIPInitialize(); - CHIPGraphNodeEmpty *Node = new CHIPGraphNodeEmpty(); + chipstar::GraphNodeEmpty *Node = new chipstar::GraphNodeEmpty(); Node->addDependencies(DECONST_NODES(pDependencies), numDependencies); *pGraphNode = Node; GRAPH(graph)->addNode(Node); @@ -1163,11 +1252,11 @@ hipError_t hipGraphAddEventRecordNode(hipGraphNode_t *pGraphNode, size_t numDependencies, hipEvent_t event) { CHIP_TRY - if (!graph || !pGraphNode) - RETURN(hipErrorInvalidHandle); + if (!graph || !pGraphNode || !event) + RETURN(hipErrorInvalidValue); CHIPInitialize(); - CHIPGraphNodeEventRecord *Node = - new CHIPGraphNodeEventRecord(static_cast(event)); + chipstar::GraphNodeEventRecord *Node = + new chipstar::GraphNodeEventRecord(static_cast(event)); Node->addDependencies(DECONST_NODES(pDependencies), numDependencies); *pGraphNode = Node; GRAPH(graph)->addNode(Node); @@ -1179,12 +1268,13 @@ hipError_t hipGraphEventRecordNodeGetEvent(hipGraphNode_t node, hipEvent_t *event_out) { CHIP_TRY if (!node || !event_out) - RETURN(hipErrorInvalidHandle); + RETURN(hipErrorInvalidValue); CHIPInitialize(); - auto CastNode = static_cast(node); - if (!CastNode) - CHIPERR_LOG_AND_THROW("Failed to cast CHIPGraphNodeEventRecord", - hipErrorInvalidValue); + + auto CastNode = static_cast(node); + if (CastNode->getType() != hipGraphNodeTypeEventRecord) + CHIPERR_LOG_AND_THROW("Node is not EventRecord", hipErrorInvalidValue); + *event_out = CastNode->getEvent(); RETURN(hipSuccess); CHIP_CATCH @@ -1193,13 +1283,14 @@ hipError_t hipGraphEventRecordNodeGetEvent(hipGraphNode_t node, hipError_t hipGraphEventRecordNodeSetEvent(hipGraphNode_t node, hipEvent_t event) { CHIP_TRY - if (!node || !event) - RETURN(hipErrorInvalidHandle); CHIPInitialize(); - auto CastNode = static_cast(node); - if (!CastNode) - CHIPERR_LOG_AND_THROW("Failed to cast CHIPGraphNodeEventRecord", - hipErrorInvalidValue); + if (!node || !event) + RETURN(hipErrorInvalidValue); + + auto CastNode = static_cast(node); + if (CastNode->getType() != hipGraphNodeTypeEventRecord) + CHIPERR_LOG_AND_THROW("Node is not EventRecord", hipErrorInvalidValue); + CastNode->setEvent(static_cast(event)); RETURN(hipSuccess); CHIP_CATCH @@ -1210,17 +1301,17 @@ hipError_t hipGraphExecEventRecordNodeSetEvent(hipGraphExec_t hGraphExec, hipEvent_t event) { CHIP_TRY CHIPInitialize(); + if (!hNode || !hGraphExec || !event) + RETURN(hipErrorInvalidValue); auto ExecNode = EXEC(hGraphExec)->getOriginalGraphPtr()->nodeLookup(NODE(hNode)); if (!ExecNode) CHIPERR_LOG_AND_THROW("Failed to find the node in hipGraphExec_t", hipErrorInvalidValue); - auto CastNode = static_cast(hNode); - if (!CastNode) - CHIPERR_LOG_AND_THROW( - "Node provided failed to cast to CHIPGraphNodeEventRecord", - hipErrorInvalidValue); + auto CastNode = static_cast(hNode); + if (CastNode->getType() != hipGraphNodeTypeEventRecord) + CHIPERR_LOG_AND_THROW("Node is not EventRecord", hipErrorInvalidValue); CastNode->setEvent(static_cast(event)); RETURN(hipSuccess); @@ -1233,8 +1324,12 @@ hipError_t hipGraphAddEventWaitNode(hipGraphNode_t *pGraphNode, size_t numDependencies, hipEvent_t event) { CHIP_TRY CHIPInitialize(); - CHIPGraphNodeWaitEvent *Node = - new CHIPGraphNodeWaitEvent(static_cast(event)); + if (!graph || !pGraphNode || !event) + RETURN(hipErrorInvalidValue); + + chipstar::GraphNodeWaitEvent *Node = + new chipstar::GraphNodeWaitEvent(static_cast(event)); + *pGraphNode = Node; Node->addDependencies(DECONST_NODES(pDependencies), numDependencies); GRAPH(graph)->addNode(Node); @@ -1247,11 +1342,12 @@ hipError_t hipGraphEventWaitNodeGetEvent(hipGraphNode_t node, hipEvent_t *event_out) { CHIP_TRY CHIPInitialize(); - auto CastNode = static_cast(node); - if (!CastNode) - CHIPERR_LOG_AND_THROW( - "Node provided failed to cast to CHIPGraphNodeWaitEvent", - hipErrorInvalidValue); + if (!node || !event_out) + RETURN(hipErrorInvalidValue); + + auto CastNode = static_cast(node); + if (CastNode->getType() != hipGraphNodeTypeWaitEvent) + CHIPERR_LOG_AND_THROW("Node is not WaitEvent", hipErrorInvalidValue); *event_out = CastNode->getEvent(); RETURN(hipSuccess); @@ -1262,11 +1358,12 @@ hipError_t hipGraphEventWaitNodeSetEvent(hipGraphNode_t node, hipEvent_t event) { CHIP_TRY CHIPInitialize(); - auto CastNode = static_cast(node); - if (!CastNode) - CHIPERR_LOG_AND_THROW( - "Node provided failed to cast to CHIPGraphNodeWaitEvent", - hipErrorInvalidValue); + if (!node || !event) + RETURN(hipErrorInvalidValue); + + auto CastNode = static_cast(node); + if (CastNode->getType() != hipGraphNodeTypeWaitEvent) + CHIPERR_LOG_AND_THROW("Node is not WaitEvent", hipErrorInvalidValue); CastNode->setEvent(static_cast(event)); RETURN(hipSuccess); @@ -1278,6 +1375,9 @@ hipError_t hipGraphExecEventWaitNodeSetEvent(hipGraphExec_t hGraphExec, hipEvent_t event) { CHIP_TRY CHIPInitialize(); + if (!hNode || !hGraphExec || !event) + RETURN(hipErrorInvalidValue); + auto ExecNode = EXEC(hGraphExec)->getOriginalGraphPtr()->nodeLookup(NODE(hNode)); if (!ExecNode) @@ -1285,11 +1385,9 @@ hipError_t hipGraphExecEventWaitNodeSetEvent(hipGraphExec_t hGraphExec, hipErrorInvalidValue); // TODO Grahs check all of these - somewhere using hNode instead of ExecNode - auto CastNode = static_cast(ExecNode); - if (!CastNode) - CHIPERR_LOG_AND_THROW( - "Node provided failed to cast to CHIPGraphNodeWaitEvent", - hipErrorInvalidValue); + auto CastNode = static_cast(ExecNode); + if (CastNode->getType() != hipGraphNodeTypeWaitEvent) + CHIPERR_LOG_AND_THROW("Node is not WaitEvent", hipErrorInvalidValue); CastNode->setEvent(static_cast(event)); RETURN(hipSuccess); @@ -1367,12 +1465,14 @@ hipError_t hipIpcOpenMemHandle(void **DevPtr, hipIpcMemHandle_t Handle, UNIMPLEMENTED(hipErrorNotSupported); CHIP_CATCH } + hipError_t hipIpcCloseMemHandle(void *DevPtr) { CHIP_TRY CHIPInitialize(); UNIMPLEMENTED(hipErrorNotSupported); CHIP_CATCH } + hipError_t hipIpcGetMemHandle(hipIpcMemHandle_t *Handle, void *DevPtr) { CHIP_TRY CHIPInitialize(); @@ -1432,8 +1532,8 @@ static inline hipError_t hipMemcpyAsyncInternal(void *Dst, const void *Src, auto ChipQueue = Backend->findQueue(static_cast(Stream)); - if (ChipQueue->captureIntoGraph(Dst, Src, SizeBytes, - Kind)) { + if (ChipQueue->captureIntoGraph(Dst, Src, + SizeBytes, Kind)) { return hipSuccess; } @@ -1483,7 +1583,7 @@ hipMemcpy2DAsyncInternal(void *Dst, size_t DPitch, const void *Src, make_hipPitchedPtr(Dst, SPitch, Width, Height), /* struct hipExtent extent */ make_hipExtent(Width, Height, 1), /* enum hipMemcpyKind kind */ Kind}; - if (ChipQueue->captureIntoGraph(Params)) { + if (ChipQueue->captureIntoGraph(Params)) { return hipSuccess; } @@ -2240,7 +2340,7 @@ hipError_t hipStreamWaitEventInternal(hipStream_t Stream, hipEvent_t Event, auto ChipQueue = Backend->findQueue(static_cast(Stream)); - if (ChipQueue->captureIntoGraph(ChipEvent)) { + if (ChipQueue->captureIntoGraph(ChipEvent)) { return hipSuccess; } ERROR_IF((ChipQueue == nullptr), hipErrorInvalidResourceHandle); @@ -2404,7 +2504,7 @@ hipError_t hipEventRecordInternal(hipEvent_t Event, hipStream_t Stream) { auto ChipQueue = Backend->findQueue(static_cast(Stream)); - if (ChipQueue->captureIntoGraph(ChipEvent)) { + if (ChipQueue->captureIntoGraph(ChipEvent)) { return hipSuccess; } @@ -3142,7 +3242,7 @@ static inline hipError_t hipMemsetAsyncInternal(void *Dst, int Value, /* value */ (unsigned int)Value, /* TODO Graphs - why is the arg for memset unsigned? */ /* width */ SizeBytes}; - if (ChipQueue->captureIntoGraph(Params)) { + if (ChipQueue->captureIntoGraph(Params)) { return hipSuccess; } @@ -3195,7 +3295,7 @@ static inline hipError_t hipMemset2DAsyncInternal(void *Dst, size_t Pitch, /* value */ (unsigned int)Value, /* TODO Graphs - why is the arg for memset unsigned? */ /* width */ Width}; - if (ChipQueue->captureIntoGraph(Params)) { + if (ChipQueue->captureIntoGraph(Params)) { return hipSuccess; } @@ -3241,7 +3341,7 @@ static inline hipError_t hipMemset3DAsyncInternal(hipPitchedPtr PitchedDevPtr, /* value */ (unsigned int)Value, /* TODO Graphs - why is the arg for memset unsigned? */ /* width */ Extent.width}; - if (ChipQueue->captureIntoGraph(Params)) { + if (ChipQueue->captureIntoGraph(Params)) { return hipSuccess; } @@ -3372,7 +3472,7 @@ hipError_t hipMemsetD8Async(hipDeviceptr_t Dest, unsigned char Value, /* value */ (unsigned int)Value, /* TODO Graphs - why is the arg for memset unsigned? */ /* width */ Count}; - if (ChipQueue->captureIntoGraph(Params)) { + if (ChipQueue->captureIntoGraph(Params)) { RETURN(hipSuccess); } @@ -3409,7 +3509,7 @@ hipError_t hipMemsetD16Async(hipDeviceptr_t Dest, unsigned short Value, /* value */ (unsigned int)Value, /* TODO Graphs - why is the arg for memset unsigned? */ /* width */ 2 * Count}; - if (ChipQueue->captureIntoGraph(Params)) { + if (ChipQueue->captureIntoGraph(Params)) { RETURN(hipSuccess); } @@ -3449,7 +3549,7 @@ hipError_t hipMemsetD32Async(hipDeviceptr_t Dst, int Value, size_t Count, /* value */ (unsigned int)Value, /* TODO Graphs - why is the arg for memset unsigned? */ /* width */ 4 * Count}; - if (ChipQueue->captureIntoGraph(Params)) { + if (ChipQueue->captureIntoGraph(Params)) { RETURN(hipSuccess); } @@ -3528,7 +3628,7 @@ hipMemcpy2DToArrayAsyncInternal(hipArray *Dst, size_t WOffset, size_t HOffset, /* struct hipPitchedPtr dstPtr */ make_hipPitchedPtr(nullptr, 0, 0, 0), /* struct hipExtent extent */ make_hipExtent(Width, Height, 1), /* enum hipMemcpyKind kind */ Kind}; - if (ChipQueue->captureIntoGraph(Params)) { + if (ChipQueue->captureIntoGraph(Params)) { return hipSuccess; } @@ -3602,7 +3702,7 @@ hipMemcpy2DFromArrayAsyncInternal(void *Dst, size_t DPitch, make_hipPitchedPtr(Dst, DPitch, Width, Height), /* struct hipExtent extent */ make_hipExtent(Width, Height, 1), /* enum hipMemcpyKind kind */ Kind}; - if (ChipQueue->captureIntoGraph(Params)) { + if (ChipQueue->captureIntoGraph(Params)) { return hipSuccess; } @@ -3750,7 +3850,7 @@ hipError_t hipMemcpy3DAsyncInternal(const struct hipMemcpy3DParms *Params, auto ChipQueue = Backend->findQueue(static_cast(Stream)); LOCK(ChipQueue->QueueMtx); - if (ChipQueue->captureIntoGraph(Params)) { + if (ChipQueue->captureIntoGraph(Params)) { return hipSuccess; } @@ -3951,7 +4051,7 @@ hipError_t hipMemcpyToSymbolAsyncInternal(const void *Symbol, const void *Src, hipErrorInvalidMemcpyDirection); auto ChipQueue = Backend->findQueue(static_cast(Stream)); - if (ChipQueue->captureIntoGraph( + if (ChipQueue->captureIntoGraph( const_cast(Src), Symbol, SizeBytes, Offset, Kind)) { return hipSuccess; } @@ -4011,7 +4111,7 @@ hipError_t hipMemcpyFromSymbolAsyncInternal(void *Dst, const void *Symbol, hipErrorInvalidMemcpyDirection); auto ChipQueue = Backend->findQueue(static_cast(Stream)); - if (ChipQueue->captureIntoGraph( + if (ChipQueue->captureIntoGraph( const_cast(Dst), Symbol, SizeBytes, Offset, Kind)) { return hipSuccess; } @@ -4096,7 +4196,7 @@ static inline hipError_t hipLaunchKernelInternal(const void *HostFunction, void **Args, size_t SharedMem, hipStream_t Stream) { auto ChipQueue = Backend->findQueue(static_cast(Stream)); - if (ChipQueue->captureIntoGraph( + if (ChipQueue->captureIntoGraph( HostFunction, GridDim, BlockDim, Args, SharedMem)) { return hipSuccess; } diff --git a/src/CHIPGraph.cc b/src/CHIPGraph.cc index 34d158367..63a3b013e 100644 --- a/src/CHIPGraph.cc +++ b/src/CHIPGraph.cc @@ -22,7 +22,7 @@ /** * @file CHIPGraph.cc * @author Paulius Velesko (pvelesko@pglc.io) - * @brief CHIPGraph Implementation File + * @brief Graph Implementation File * @version 0.1 * @date 2022-11-28 * @@ -32,10 +32,13 @@ #include "CHIPBackend.hh" #include "CHIPBindingsInternal.hh" -// CHIPGraphNode + +using namespace chipstar; + +// GraphNode //************************************************************************************* -void CHIPGraphNode::DFS(std::vector CurrPath, - std::vector> &Paths) { +void GraphNode::DFS(std::vector CurrPath, + std::vector> &Paths) { CurrPath.push_back(this); for (auto &Dep : Dependencies_) { Dep->DFS(CurrPath, Paths); @@ -54,22 +57,32 @@ void CHIPGraphNode::DFS(std::vector CurrPath, return; } -CHIPGraph::CHIPGraph(const CHIPGraph &OriginalGraph) { +void GraphNode::checkDependencies(size_t numDependencies, + GraphNode **pDependencies) { + if (numDependencies > 0 && pDependencies == nullptr) + CHIPERR_LOG_AND_THROW("numDependencies > 0 && pDependencies == nullptr", + hipErrorInvalidValue); + if (numDependencies == 0 && pDependencies != nullptr) + CHIPERR_LOG_AND_THROW("numDependencies == 0 && pDependencies != nullptr", + hipErrorInvalidValue); +} + +Graph::Graph(const Graph &OriginalGraph) { /** * Create another Graph using the copy constructor. * This other graph will contain vectors/sets for dependencies/edges. * The edges, however, are pointers which point nodes in the original graph. * These edges must be remapped to this node. - * 1. Use the overriden CHIPGraphNode operator==() to check if two nodes are + * 1. Use the overriden GraphNode operator==() to check if two nodes are identical * 2. Create a map that maps pointers from old graph to new graph * 3. Remap the cloned graph. * */ std::cout << "\n\n"; - for (CHIPGraphNode *OriginalNode : OriginalGraph.Nodes_) { - CHIPGraphNode *CloneNode = OriginalNode->clone(); + for (GraphNode *OriginalNode : OriginalGraph.Nodes_) { + GraphNode *CloneNode = OriginalNode->clone(); Nodes_.push_back(CloneNode); CloneMap_[OriginalNode] = CloneNode; logDebug("Adding to CloneMap: Original {} {} -> Clone {} {}", @@ -77,24 +90,18 @@ CHIPGraph::CHIPGraph(const CHIPGraph &OriginalGraph) { (void *)CloneNode); } - for (CHIPGraphNode *Node : Nodes_) { + for (GraphNode *Node : Nodes_) { Node->updateDependencies(CloneMap_); Node->updateDependants(CloneMap_); } } -CHIPGraphNodeKernel::CHIPGraphNodeKernel(const CHIPGraphNodeKernel &Other) - : CHIPGraphNode(Other) { - Params_ = Other.Params_; - ExecItem_ = Other.ExecItem_->clone(); -} - -CHIPGraphNode *CHIPGraphNodeKernel::clone() const { - auto NewNode = new CHIPGraphNodeKernel(*this); +GraphNode *GraphNodeKernel::clone() const { + auto NewNode = new GraphNodeKernel(*this); return NewNode; } -void CHIPGraphNodeMemset::execute(chipstar::Queue *Queue) const { +void GraphNodeMemset::execute(chipstar::Queue *Queue) const { const unsigned int Val = Params_.value; size_t Height = std::max(1, Params_.height); size_t Width = std::max(1, Params_.width); @@ -102,7 +109,7 @@ void CHIPGraphNodeMemset::execute(chipstar::Queue *Queue) const { Queue->memFillAsync(Params_.dst, Size, (void *)&Val, Params_.elementSize); } -void CHIPGraphNodeMemcpy::execute(chipstar::Queue *Queue) const { +void GraphNodeMemcpy::execute(chipstar::Queue *Queue) const { if (Dst_ && Src_) { auto Status = hipMemcpyInternal(Dst_, Src_, Count_, Kind_); if (Status != hipSuccess) @@ -115,35 +122,52 @@ void CHIPGraphNodeMemcpy::execute(chipstar::Queue *Queue) const { hipErrorTbd); } } -void CHIPGraphNodeKernel::execute(chipstar::Queue *Queue) const { + +void GraphNodeKernel::setupKernelArgs() const { + ExecItem_->copyArgs(Params_.kernelParams); + ExecItem_->setupAllArgs(); +} + +GraphNodeKernel::~GraphNodeKernel() { delete ExecItem_; } + +void GraphNodeKernel::execute(chipstar::Queue *Queue) const { + // need to call this b/c launchImpl only + // calls ChipOclExecItem->setupAllArgs() without calling copyArgs() first + setupKernelArgs(); Queue->launch(ExecItem_); } -CHIPGraphNodeKernel::CHIPGraphNodeKernel(const hipKernelNodeParams *TheParams) - : CHIPGraphNode(hipGraphNodeTypeKernel) { +GraphNodeKernel::GraphNodeKernel(const GraphNodeKernel &Other) + : GraphNode(Other) { + Params_ = Other.Params_; + Kernel_ = Other.Kernel_; + ExecItem_ = ::Backend->createExecItem(Params_.gridDim, Params_.blockDim, + Params_.sharedMemBytes, nullptr); + ExecItem_->setKernel(Kernel_); +} + +GraphNodeKernel::GraphNodeKernel(const hipKernelNodeParams *TheParams) + : GraphNode(hipGraphNodeTypeKernel) { Params_.blockDim = TheParams->blockDim; Params_.extra = TheParams->extra; Params_.func = TheParams->func; Params_.gridDim = TheParams->gridDim; Params_.kernelParams = TheParams->kernelParams; Params_.sharedMemBytes = TheParams->sharedMemBytes; - auto Dev = Backend->getActiveDevice(); - chipstar::Kernel *ChipKernel = Dev->findKernel(HostPtr(Params_.func)); - if (!ChipKernel) + auto Dev = ::Backend->getActiveDevice(); + + Kernel_ = Dev->findKernel(HostPtr(Params_.func)); + if (!Kernel_) CHIPERR_LOG_AND_THROW("Could not find requested kernel", hipErrorInvalidDeviceFunction); - ExecItem_ = Backend->createExecItem(Params_.gridDim, Params_.blockDim, - Params_.sharedMemBytes, nullptr); - ExecItem_->setKernel(ChipKernel); - - ExecItem_->copyArgs(TheParams->kernelParams); - ExecItem_->setupAllArgs(); + ExecItem_ = ::Backend->createExecItem(Params_.gridDim, Params_.blockDim, + Params_.sharedMemBytes, nullptr); + ExecItem_->setKernel(Kernel_); } -CHIPGraphNodeKernel::CHIPGraphNodeKernel(const void *HostFunction, dim3 GridDim, - dim3 BlockDim, void **Args, - size_t SharedMem) - : CHIPGraphNode(hipGraphNodeTypeKernel) { +GraphNodeKernel::GraphNodeKernel(const void *HostFunction, dim3 GridDim, + dim3 BlockDim, void **Args, size_t SharedMem) + : GraphNode(hipGraphNodeTypeKernel) { Type_ = hipGraphNodeTypeKernel; Params_.blockDim = BlockDim; Params_.extra = nullptr; @@ -151,29 +175,27 @@ CHIPGraphNodeKernel::CHIPGraphNodeKernel(const void *HostFunction, dim3 GridDim, Params_.gridDim = GridDim; Params_.kernelParams = Args; Params_.sharedMemBytes = SharedMem; + auto Dev = ::Backend->getActiveDevice(); - auto Dev = Backend->getActiveDevice(); - chipstar::Kernel *ChipKernel = Dev->findKernel(HostPtr(HostFunction)); - if (!ChipKernel) + Kernel_ = Dev->findKernel(HostPtr(Params_.func)); + if (!Kernel_) CHIPERR_LOG_AND_THROW("Could not find requested kernel", hipErrorInvalidDeviceFunction); - ExecItem_ = Backend->createExecItem(GridDim, BlockDim, SharedMem, nullptr); - ExecItem_->setKernel(ChipKernel); - - ExecItem_->copyArgs(Args); - ExecItem_->setupAllArgs(); + ExecItem_ = ::Backend->createExecItem(GridDim, BlockDim, SharedMem, nullptr); + ExecItem_->setKernel(Kernel_); } -int NodeCounter = 1; -void CHIPGraph::addNode(CHIPGraphNode *Node) { - logDebug("{} CHIPGraph::addNode({})", (void *)this, (void *)Node); +static unsigned NodeCounter = 1; + +void Graph::addNode(GraphNode *Node) { + logDebug("{} Graph::addNode({})", (void *)this, (void *)Node); Node->Msg = "M" + std::to_string(NodeCounter); NodeCounter++; Nodes_.push_back(Node); } -void CHIPGraph::removeNode(CHIPGraphNode *Node) { - logDebug("{} CHIPGraph::removeNode({})", (void *)this, (void *)Node); +void Graph::removeNode(GraphNode *Node) { + logDebug("{} Graph::removeNode({})", (void *)this, (void *)Node); auto Found = std::find(Nodes_.begin(), Nodes_.end(), Node); if (Found == Nodes_.end()) { @@ -184,29 +206,71 @@ void CHIPGraph::removeNode(CHIPGraphNode *Node) { } } -void CHIPGraphExec::launch(chipstar::Queue *Queue) { - logDebug("{} CHIPGraphExec::launch({})", (void *)this, (void *)Queue); - compile(); - auto ExecQueueCopy = ExecQueues_; - while (ExecQueueCopy.size()) { - auto Nodes = ExecQueueCopy.front(); - std::string NodesInThisLevel = ""; - for (auto Node : Nodes) { - NodesInThisLevel += Node->Msg + " "; +bool GraphExec::tryLaunchNative(chipstar::Queue *Queue) { + bool UsedNativeGraph = false; + if (NativeGraph) { + if (NativeGraph->isFinalized()) + logDebug("NativeGraph: launching existing graph"); + else { + logDebug("NativeGraph: constructed but failed to finalize"); + return false; } - logDebug("Executing nodes: {}", NodesInThisLevel); - for (auto Node : Nodes) { - logDebug("Executing {}", Node->Msg); - Node->execute(Queue); - Queue->finish(); + } else { + logDebug("NativeGraph: trying to construct"); + NativeGraph.reset(Queue->createNativeGraph()); + if (!NativeGraph) + return false; + + for (auto &Node : OriginalGraph_->getNodes()) + if (!NativeGraph->addNode(Node)) { + logError("NativeGraph: failed to add node of type: {}", + Node->getType()); + return false; + } + + if (!NativeGraph->finalize()) { + logDebug("NativeGraph: failed to finalize"); + return false; } + } + + assert(NativeGraph->isFinalized()); + if (Queue->enqueueNativeGraph(NativeGraph.get())) { + logDebug("NativeGraph: launched"); + Queue->finish(); + return true; + } + return false; +} - ExecQueueCopy.pop(); +void GraphExec::launch(chipstar::Queue *Queue) { + logDebug("{} GraphExec::launch({})", (void *)this, (void *)Queue); + bool UsedNativeGraph = tryLaunchNative(Queue); + + if (!UsedNativeGraph) { + logDebug("NativeGraph: failed to construct/finalize/launch, using the " + "original code path"); + compile(); + auto ExecQueueCopy = ExecQueues_; + while (ExecQueueCopy.size()) { + auto Nodes = ExecQueueCopy.front(); + std::string NodesInThisLevel = ""; + for (auto Node : Nodes) + NodesInThisLevel += Node->Msg + " "; + logDebug("Executing nodes: {}", NodesInThisLevel); + for (auto Node : Nodes) { + logDebug("Executing {}", Node->Msg); + Node->execute(Queue); + Queue->finish(); + } + + ExecQueueCopy.pop(); + } } } -void unchainUnnecessaryDeps(std::vector Path, - std::vector SubPath) { +void unchainUnnecessaryDeps(std::vector Path, + std::vector SubPath) { assert(Path.size() > SubPath.size()); std::string PathStr = ""; for (auto Node : SubPath) { @@ -218,7 +282,7 @@ void unchainUnnecessaryDeps(std::vector Path, } logDebug("unchainUnnecessaryDeps({}, {})", PathStr, LongerPathStr); - for (int i = 0; i < SubPath.size(); i++) { + for (size_t i = 0; i < SubPath.size(); i++) { if (SubPath[i] != Path[i]) { SubPath[i - 1]->removeDependency(SubPath[i]); break; @@ -226,8 +290,8 @@ void unchainUnnecessaryDeps(std::vector Path, } } -std::vector CHIPGraph::getLeafNodes() { - std::vector LeafNodes; +std::vector Graph::getLeafNodes() { + std::vector LeafNodes; for (auto Node : Nodes_) { // no other node depends on leaf node. if (Node->getDependants().size() == 0) @@ -237,28 +301,28 @@ std::vector CHIPGraph::getLeafNodes() { return LeafNodes; } -void CHIPGraphExec::pruneGraph_() { - std::vector LeafNodes_ = OriginalGraph_->getLeafNodes(); +void GraphExec::pruneGraph_() { + std::vector LeafNodes_ = OriginalGraph_->getLeafNodes(); for (auto LeafNode : LeafNodes_) { // Generate all paths from leaf to root - std::vector CurrPath; - std::vector> Paths; + std::vector CurrPath; + std::vector> Paths; LeafNode->DFS(CurrPath, Paths); if (Paths.size() < 2) { continue; } - std::sort(Paths.begin(), Paths.end(), - [](std::vector PathA, - std::vector PathB) { - return PathA.size() > PathB.size(); - }); + std::sort( + Paths.begin(), Paths.end(), + [](std::vector PathA, std::vector PathB) { + return PathA.size() > PathB.size(); + }); for (auto Path : Paths) { // convert the current path to a set - std::set PathSet(Path.begin(), Path.end()); + std::set PathSet(Path.begin(), Path.end()); // Check other paths to see if they are a subset of this (longer) path for (auto SubPathIter = Paths.begin(); SubPathIter != Paths.end(); @@ -270,7 +334,7 @@ void CHIPGraphExec::pruneGraph_() { } // convert the other path to a set - std::set SubPathSet(SubPath.begin(), SubPath.end()); + std::set SubPathSet(SubPath.begin(), SubPath.end()); // std::string PathStr = ""; // for(auto Node : Path) { // PathStr += Node->Msg + " "; @@ -290,8 +354,8 @@ void CHIPGraphExec::pruneGraph_() { } } -std::vector CHIPGraph::getRootNodes() { - std::vector RootNodes; +std::vector Graph::getRootNodes() { + std::vector RootNodes; for (auto Node : Nodes_) { if (Node->getDependencies().size() == 0) { RootNodes.push_back(Node); @@ -300,13 +364,13 @@ std::vector CHIPGraph::getRootNodes() { return RootNodes; } -void CHIPGraphExec::compile() { +void GraphExec::compile() { ExtractSubGraphs_(); pruneGraph_(); - logDebug("{} CHIPGraphExec::compile()", (void *)this); - std::vector Nodes = OriginalGraph_->getNodes(); + logDebug("{} GraphExec::compile()", (void *)this); + std::vector Nodes = OriginalGraph_->getNodes(); auto RootNodesVec = OriginalGraph_->getRootNodes(); - std::set RootNodes(RootNodesVec.begin(), RootNodesVec.end()); + std::set RootNodes(RootNodesVec.begin(), RootNodesVec.end()); ExecQueues_.push(RootNodes); // Remove root nodes from the set of nodes for (auto Node : RootNodes) { @@ -320,8 +384,8 @@ void CHIPGraphExec::compile() { * find all the nodes that depend only the nodes in the back of the exec * queue. */ - std::set NextSet; - std::set PrevLevelNodes = RootNodes; + std::set NextSet; + std::set PrevLevelNodes = RootNodes; auto NodeIter = Nodes.begin(); while (Nodes.size()) { // while more unnasigned nodes available auto CurrentNodeDeps = (*NodeIter)->getDependencies(); @@ -357,17 +421,17 @@ void CHIPGraphExec::compile() { } } -void CHIPGraphNodeHost::execute(chipstar::Queue *Queue) const { +void GraphNodeHost::execute(chipstar::Queue *Queue) const { Queue->finish(); Params_.fn(Params_.userData); } -void CHIPGraphExec::ExtractSubGraphs_() { +void GraphExec::ExtractSubGraphs_() { auto Nodes = CompiledGraph_.getNodes(); - for (int i = 0; i < Nodes.size(); i++) { + for (size_t i = 0; i < Nodes.size(); i++) { auto Node = Nodes[i]; if (Node->getType() == hipGraphNodeTypeGraph) { - auto SubGraphNode = static_cast(Node); + auto SubGraphNode = static_cast(Node); auto SubGraph = SubGraphNode->getGraph(); // 1. get all the root nodes @@ -396,7 +460,7 @@ void CHIPGraphExec::ExtractSubGraphs_() { } } -void CHIPGraphNodeEventRecord::execute(chipstar::Queue *Queue) const { +void GraphNodeEventRecord::execute(chipstar::Queue *Queue) const { NULLCHECK(Event_); auto Status = hipEventRecordInternal(Event_, Queue); if (Status != hipSuccess) @@ -404,9 +468,9 @@ void CHIPGraphNodeEventRecord::execute(chipstar::Queue *Queue) const { hipErrorTbd); } -void CHIPGraphNodeMemcpyFromSymbol::execute(chipstar::Queue *Queue) const { +void GraphNodeMemcpyFromSymbol::execute(chipstar::Queue *Queue) const { NULLCHECK(Dst_, Symbol_); - auto ChipQueue = Backend->getActiveDevice()->getDefaultQueue(); + auto ChipQueue = ::Backend->getActiveDevice()->getDefaultQueue(); auto Status = hipMemcpyFromSymbolAsyncInternal(Dst_, Symbol_, SizeBytes_, Offset_, Kind_, ChipQueue); if (Status == hipSuccess) @@ -416,9 +480,9 @@ void CHIPGraphNodeMemcpyFromSymbol::execute(chipstar::Queue *Queue) const { hipErrorTbd); } -void CHIPGraphNodeMemcpyToSymbol::execute(chipstar::Queue *Queue) const { +void GraphNodeMemcpyToSymbol::execute(chipstar::Queue *Queue) const { NULLCHECK(Symbol_, Src_); - auto ChipQueue = Backend->getActiveDevice()->getDefaultQueue(); + auto ChipQueue = ::Backend->getActiveDevice()->getDefaultQueue(); auto Status = hipMemcpyToSymbolAsyncInternal(Symbol_, Src_, SizeBytes_, Offset_, Kind_, ChipQueue); if (Status == hipSuccess) @@ -428,7 +492,7 @@ void CHIPGraphNodeMemcpyToSymbol::execute(chipstar::Queue *Queue) const { hipErrorTbd); } -void CHIPGraphNodeWaitEvent::execute(chipstar::Queue *Queue) const { +void GraphNodeWaitEvent::execute(chipstar::Queue *Queue) const { // current HIP API requires Flags unsigned int Flags = 0; auto Status = hipStreamWaitEventInternal(Queue, Event_, Flags); diff --git a/src/CHIPGraph.hh b/src/CHIPGraph.hh index 191d6b304..40aa3cd71 100644 --- a/src/CHIPGraph.hh +++ b/src/CHIPGraph.hh @@ -41,38 +41,43 @@ namespace chipstar { class Queue; +class Kernel; class Event; class ExecItem; +class Graph; +class GraphNode; } // namespace chipstar -class CHIPGraph; +namespace chipstar { -class CHIPGraphNode : public hipGraphNode { +class GraphNode : public hipGraphNode { protected: hipGraphNodeType Type_; // nodes which depend on this node - std::vector Dependendants_; + std::vector Dependendants_; // nodes on which this node depends - std::vector Dependencies_; + std::vector Dependencies_; /** - * @brief Destroy the CHIPGraphNode object + * @brief Destroy the GraphNode object * Hidden virtual destructor. Should only be called through derived classes. */ - virtual ~CHIPGraphNode() { + virtual ~GraphNode() { Dependendants_.clear(); Dependencies_.clear(); } - CHIPGraphNode(hipGraphNodeType Type) : Type_(Type) {} + GraphNode(hipGraphNodeType Type) : Type_(Type) {} + + void checkDependencies(size_t numDependencies, GraphNode **pDependencies); public: std::string Msg; // TODO Graphs cleanup - CHIPGraphNode(const CHIPGraphNode &Other) + GraphNode(const GraphNode &Other) : Type_(Other.Type_), Dependendants_(Other.Dependendants_), Dependencies_(Other.Dependencies_), Msg(Other.Msg) {} hipGraphNodeType getType() { return Type_; } - virtual CHIPGraphNode *clone() const = 0; + virtual GraphNode *clone() const = 0; /** * @brief Depth-first search of the graph. @@ -82,8 +87,8 @@ public: * @param CurrPath space for the current path * @param Paths space for the paths */ - void DFS(std::vector CurrPath, - std::vector> &Paths); + void DFS(std::vector CurrPath, + std::vector> &Paths); /** * @brief Pure virtual method to be overriden by derived classes. This method @@ -100,7 +105,7 @@ public: * * @param TheNode */ - void addDependant(CHIPGraphNode *TheNode) { + void addDependant(GraphNode *TheNode) { logDebug("{} addDependant() <{} depends on {}>", (void *)this, TheNode->Msg, Msg); Dependendants_.push_back(TheNode); @@ -113,7 +118,7 @@ public: * * @param TheNode */ - void addDependants(std::vector Nodes) { + void addDependants(std::vector Nodes) { for (auto Node : Nodes) { addDependant(Node); } @@ -126,7 +131,10 @@ public: * * @param TheNode */ - void addDependency(CHIPGraphNode *TheNode) { + void addDependency(GraphNode *TheNode) { + if (TheNode == nullptr) + CHIPERR_LOG_AND_THROW("addDependency called with nullptr", + hipErrorInvalidValue); logDebug("{} addDependency() <{} depends on {}>", (void *)this, Msg, TheNode->Msg); Dependencies_.push_back(TheNode); @@ -140,7 +148,11 @@ public: * * @param TheNode */ - void removeDependency(CHIPGraphNode *TheNode) { + void removeDependency(GraphNode *TheNode) { + if (TheNode == nullptr) { + CHIPERR_LOG_AND_THROW("removeDependency called with nullptr", + hipErrorInvalidValue); + } logDebug("{} removeDependency() <{} depends on {}>", (void *)this, Msg, TheNode->Msg); auto FoundNode = @@ -148,7 +160,7 @@ public: if (FoundNode != Dependencies_.end()) { Dependencies_.erase(FoundNode); } else { - CHIPERR_LOG_AND_THROW("Failed to find", hipErrorTbd); + CHIPERR_LOG_AND_THROW("Failed to find", hipErrorInvalidValue); } } @@ -160,8 +172,9 @@ public: * @param Dependencies * @param Count */ - void addDependencies(CHIPGraphNode **Dependencies, int Count) { - for (int i = 0; i < Count; i++) { + void addDependencies(GraphNode **Dependencies, size_t Count) { + checkDependencies(Count, Dependencies); + for (size_t i = 0; i < Count; i++) { addDependency(Dependencies[i]); } } @@ -173,7 +186,7 @@ public: * * @param Dependencies */ - void addDependencies(std::vector Dependencies) { + void addDependencies(std::vector Dependencies) { for (auto Node : Dependencies) { addDependency(Node); } @@ -186,8 +199,9 @@ public: * * @param TheNode */ - void removeDependencies(CHIPGraphNode **Dependencies, int Count) { - for (int i = 0; i < Count; i++) { + void removeDependencies(GraphNode **Dependencies, size_t Count) { + checkDependencies(Count, Dependencies); + for (size_t i = 0; i < Count; i++) { removeDependency(Dependencies[i]); } } @@ -204,8 +218,8 @@ public: * @param CloneMap the map containing relationships of which original node * does each cloned node correspond to. */ - void updateDependencies(std::map CloneMap) { - std::vector NewDeps; + void updateDependencies(std::map &CloneMap) { + std::vector NewDeps; for (auto Dep : Dependencies_) { auto ClonedDep = CloneMap[Dep]; logDebug("{} {} Replacing dependency {} with {}", (void *)this, this->Msg, @@ -229,8 +243,8 @@ public: * @param CloneMap the map containing relationships of which original node * does each cloned node correspond to. */ - void updateDependants(std::map CloneMap) { - std::vector NewDeps; + void updateDependants(std::map CloneMap) { + std::vector NewDeps; for (auto Dep : Dependendants_) { auto ClonedDep = CloneMap[Dep]; logDebug("{} {} Replacing dependant {} with {}", (void *)this, this->Msg, @@ -246,39 +260,54 @@ public: * @brief Get the Dependencies object * nodes which depend on this node * - * @return std::vector + * @return std::vector */ - std::vector getDependencies() const { return Dependencies_; } + std::vector getDependencies() const { return Dependencies_; } /** * @brief Get the Dependants object * nodes on which this node depends * - * @return std::vector + * @return std::vector */ - std::vector getDependants() const { return Dependendants_; } + std::vector getDependants() const { return Dependendants_; } }; -class CHIPGraphNodeKernel : public CHIPGraphNode { +class GraphNodeKernel : public GraphNode { private: hipKernelNodeParams Params_; chipstar::ExecItem *ExecItem_; + chipstar::Kernel *Kernel_; public: - CHIPGraphNodeKernel(const CHIPGraphNodeKernel &Other); + GraphNodeKernel(const GraphNodeKernel &Other); - CHIPGraphNodeKernel(const hipKernelNodeParams *TheParams); + GraphNodeKernel(const hipKernelNodeParams *TheParams); - CHIPGraphNodeKernel(const void *HostFunction, dim3 GridDim, dim3 BlockDim, - void **Args, size_t SharedMem); - - virtual ~CHIPGraphNodeKernel() override {} + GraphNodeKernel(const void *HostFunction, dim3 GridDim, dim3 BlockDim, + void **Args, size_t SharedMem); + virtual ~GraphNodeKernel() override; virtual void execute(chipstar::Queue *Queue) const override; hipKernelNodeParams getParams() const { return Params_; } - void setParams(const hipKernelNodeParams Params) { Params_ = Params; } + /// the Kernel arguments have to be setup either just before launch (when + /// using the execute() path), or if using the CHIPGraphNative then + /// just before calling their graph construction APIs. + /// + /// This is because Kernels in both LevelZero and OpenCL are stateful, + /// and users can add multiple nodes with the same kernel into a Graph. + /// Setting up arguments in GraphNodeKernel ctor would then + /// lead to all nodes using the same (those set up last) arguments. + void setupKernelArgs() const; + chipstar::Kernel *getKernel() const { return Kernel_; } + + void setParams(const hipKernelNodeParams Params) { + // dont allow changing kernel, needs refactoring + CHIPASSERT(Params.func == Params_.func); + Params_ = Params; + } /** * @brief Createa a copy of this node * Must copy over all the arguments @@ -286,12 +315,12 @@ public: * Copying over the dependencies is important because CHIPGraph::clone() uses * them to remap onto new nodes * - * @return CHIPGraphNode* + * @return GraphNode* */ - virtual CHIPGraphNode *clone() const override; + virtual GraphNode *clone() const override; }; -class CHIPGraphNodeMemcpy : public CHIPGraphNode { +class GraphNodeMemcpy : public GraphNode { private: hipMemcpy3DParms Params_; @@ -301,24 +330,24 @@ private: hipMemcpyKind Kind_; public: - CHIPGraphNodeMemcpy(const CHIPGraphNodeMemcpy &Other) - : CHIPGraphNode(Other), Params_(Other.Params_), Dst_(Other.Dst_), + GraphNodeMemcpy(const GraphNodeMemcpy &Other) + : GraphNode(Other), Params_(Other.Params_), Dst_(Other.Dst_), Src_(Other.Src_), Count_(Other.Count_), Kind_(Other.Kind_) {} - CHIPGraphNodeMemcpy(hipMemcpy3DParms Params) - : CHIPGraphNode(hipGraphNodeTypeMemcpy), Params_(Params) {} - CHIPGraphNodeMemcpy(const hipMemcpy3DParms *Params) - : CHIPGraphNode(hipGraphNodeTypeMemcpy) { + GraphNodeMemcpy(hipMemcpy3DParms Params) + : GraphNode(hipGraphNodeTypeMemcpy), Params_(Params), Dst_(nullptr), + Src_(nullptr), Count_(0), Kind_(hipMemcpyKind::hipMemcpyDefault) {} + GraphNodeMemcpy(const hipMemcpy3DParms *Params) + : GraphNode(hipGraphNodeTypeMemcpy) { setParams(Params); } // 1D MemCpy - CHIPGraphNodeMemcpy(void *Dst, const void *Src, size_t Count, - hipMemcpyKind Kind) - : CHIPGraphNode(hipGraphNodeTypeMemcpy), Dst_(Dst), Src_(Src), - Count_(Count), Kind_(Kind) {} + GraphNodeMemcpy(void *Dst, const void *Src, size_t Count, hipMemcpyKind Kind) + : GraphNode(hipGraphNodeTypeMemcpy), Dst_(Dst), Src_(Src), Count_(Count), + Kind_(Kind) {} - virtual ~CHIPGraphNodeMemcpy() override {} + virtual ~GraphNodeMemcpy() override {} hipMemcpy3DParms getParams() { return Params_; } @@ -330,6 +359,14 @@ public: Kind_ = Kind; } + void getParams(void *&Dst, const void *&Src, size_t &Count, + hipMemcpyKind &Kind) { + Dst = Dst_; + Src = Src_; + Count = Count_; + Kind = Kind_; + } + void setParams(const hipMemcpy3DParms *Params) { Params_.srcArray = Params->srcArray; // if(Params->srcArray) @@ -347,55 +384,55 @@ public: virtual void execute(chipstar::Queue *Queue) const override; - virtual CHIPGraphNode *clone() const override { - auto NewNode = new CHIPGraphNodeMemcpy(*this); + virtual GraphNode *clone() const override { + auto NewNode = new GraphNodeMemcpy(*this); return NewNode; } }; -class CHIPGraphNodeMemset : public CHIPGraphNode { +class GraphNodeMemset : public GraphNode { private: hipMemsetParams Params_; public: - CHIPGraphNodeMemset(const CHIPGraphNodeMemset &Other) - : CHIPGraphNode(Other), Params_(Other.Params_) {} + GraphNodeMemset(const GraphNodeMemset &Other) + : GraphNode(Other), Params_(Other.Params_) {} - CHIPGraphNodeMemset(const hipMemsetParams Params) - : CHIPGraphNode(hipGraphNodeTypeMemset), Params_(Params) {} + GraphNodeMemset(const hipMemsetParams Params) + : GraphNode(hipGraphNodeTypeMemset), Params_(Params) {} - CHIPGraphNodeMemset(const hipMemsetParams *Params) - : CHIPGraphNode(hipGraphNodeTypeMemset), Params_(*Params) {} + GraphNodeMemset(const hipMemsetParams *Params) + : GraphNode(hipGraphNodeTypeMemset), Params_(*Params) {} - virtual ~CHIPGraphNodeMemset() override {} + virtual ~GraphNodeMemset() override {} hipMemsetParams getParams() { return Params_; } void setParams(const hipMemsetParams *Params) { Params_ = *Params; } virtual void execute(chipstar::Queue *Queue) const override; - virtual CHIPGraphNode *clone() const override { - auto NewNode = new CHIPGraphNodeMemset(*this); + virtual GraphNode *clone() const override { + auto NewNode = new GraphNodeMemset(*this); return NewNode; } }; -class CHIPGraphNodeHost : public CHIPGraphNode { +class GraphNodeHost : public GraphNode { private: hipHostNodeParams Params_; public: - CHIPGraphNodeHost(const CHIPGraphNodeHost &Other) - : CHIPGraphNode(Other), Params_(Other.Params_) {} + GraphNodeHost(const GraphNodeHost &Other) + : GraphNode(Other), Params_(Other.Params_) {} - CHIPGraphNodeHost(const hipHostNodeParams *Params) - : CHIPGraphNode(hipGraphNodeTypeHost), Params_(*Params) {} + GraphNodeHost(const hipHostNodeParams *Params) + : GraphNode(hipGraphNodeTypeHost), Params_(*Params) {} - virtual ~CHIPGraphNodeHost() override {} + virtual ~GraphNodeHost() override {} virtual void execute(chipstar::Queue *Queue) const override; - virtual CHIPGraphNode *clone() const override { - auto NewNode = new CHIPGraphNodeHost(*this); + virtual GraphNode *clone() const override { + auto NewNode = new GraphNodeHost(*this); return NewNode; } @@ -404,67 +441,67 @@ public: hipHostNodeParams getParams() { return Params_; } }; -class CHIPGraphNodeGraph : public CHIPGraphNode { +class GraphNodeGraph : public GraphNode { private: - CHIPGraph *SubGraph_; + Graph *SubGraph_; public: - CHIPGraphNodeGraph(CHIPGraph *Graph) - : CHIPGraphNode(hipGraphNodeTypeGraph), SubGraph_(Graph) {} + GraphNodeGraph(Graph *Graph) + : GraphNode(hipGraphNodeTypeGraph), SubGraph_(Graph) {} - CHIPGraphNodeGraph(const CHIPGraphNodeGraph &Other) - : CHIPGraphNode(Other), SubGraph_(Other.SubGraph_) {} + GraphNodeGraph(const GraphNodeGraph &Other) + : GraphNode(Other), SubGraph_(Other.SubGraph_) {} - virtual ~CHIPGraphNodeGraph() override {} + virtual ~GraphNodeGraph() override {} virtual void execute(chipstar::Queue *Queue) const override { CHIPERR_LOG_AND_THROW("Attemped to execute GraphNode", hipErrorTbd); } - virtual CHIPGraphNode *clone() const override { - auto NewNode = new CHIPGraphNodeGraph(*this); + virtual GraphNode *clone() const override { + auto NewNode = new GraphNodeGraph(*this); return NewNode; } - void setGraph(CHIPGraph *Graph) { SubGraph_ = Graph; } + void setGraph(Graph *Graph) { SubGraph_ = Graph; } - CHIPGraph *getGraph() { return SubGraph_; } + Graph *getGraph() { return SubGraph_; } }; -class CHIPGraphNodeEmpty : public CHIPGraphNode { +class GraphNodeEmpty : public GraphNode { public: - CHIPGraphNodeEmpty(const CHIPGraphNodeEmpty &Other) : CHIPGraphNode(Other) {} + GraphNodeEmpty(const GraphNodeEmpty &Other) : GraphNode(Other) {} - CHIPGraphNodeEmpty() : CHIPGraphNode(hipGraphNodeTypeEmpty) {} + GraphNodeEmpty() : GraphNode(hipGraphNodeTypeEmpty) {} - virtual ~CHIPGraphNodeEmpty() override {} + virtual ~GraphNodeEmpty() override {} virtual void execute(chipstar::Queue *Queue) const override { logDebug("Executing empty node"); } - virtual CHIPGraphNode *clone() const override { - auto NewNode = new CHIPGraphNodeEmpty(*this); + virtual GraphNode *clone() const override { + auto NewNode = new GraphNodeEmpty(*this); return NewNode; } }; -class CHIPGraphNodeWaitEvent : public CHIPGraphNode { +class GraphNodeWaitEvent : public GraphNode { private: chipstar::Event *Event_; public: - CHIPGraphNodeWaitEvent(chipstar::Event *Event) - : CHIPGraphNode(hipGraphNodeTypeWaitEvent), Event_(Event) {} + GraphNodeWaitEvent(chipstar::Event *Event) + : GraphNode(hipGraphNodeTypeWaitEvent), Event_(Event) {} - CHIPGraphNodeWaitEvent(const CHIPGraphNodeWaitEvent &Other) - : CHIPGraphNode(Other), Event_(Other.Event_) {} + GraphNodeWaitEvent(const GraphNodeWaitEvent &Other) + : GraphNode(Other), Event_(Other.Event_) {} - virtual ~CHIPGraphNodeWaitEvent() override {} + virtual ~GraphNodeWaitEvent() override {} virtual void execute(chipstar::Queue *Queue) const override; - virtual CHIPGraphNode *clone() const override { - auto NewNode = new CHIPGraphNodeWaitEvent(*this); + virtual GraphNode *clone() const override { + auto NewNode = new GraphNodeWaitEvent(*this); return NewNode; } @@ -472,23 +509,23 @@ public: void setEvent(chipstar::Event *Event) { Event_ = Event; } }; -class CHIPGraphNodeEventRecord : public CHIPGraphNode { +class GraphNodeEventRecord : public GraphNode { private: chipstar::Event *Event_; public: - CHIPGraphNodeEventRecord(chipstar::Event *Event) - : CHIPGraphNode(hipGraphNodeTypeEventRecord), Event_(Event){}; + GraphNodeEventRecord(chipstar::Event *Event) + : GraphNode(hipGraphNodeTypeEventRecord), Event_(Event){}; - CHIPGraphNodeEventRecord(const CHIPGraphNodeEventRecord &Other) - : CHIPGraphNode(Other), Event_(Other.Event_) {} + GraphNodeEventRecord(const GraphNodeEventRecord &Other) + : GraphNode(Other), Event_(Other.Event_) {} - virtual ~CHIPGraphNodeEventRecord() override {} + virtual ~GraphNodeEventRecord() override {} virtual void execute(chipstar::Queue *Queue) const override; - virtual CHIPGraphNode *clone() const override { - auto NewNode = new CHIPGraphNodeEventRecord(*this); + virtual GraphNode *clone() const override { + auto NewNode = new GraphNodeEventRecord(*this); return NewNode; } @@ -497,7 +534,7 @@ public: chipstar::Event *getEvent() { return Event_; } }; -class CHIPGraphNodeMemcpyFromSymbol : public CHIPGraphNode { +class GraphNodeMemcpyFromSymbol : public GraphNode { private: void *Dst_; void *Symbol_; @@ -506,18 +543,18 @@ private: hipMemcpyKind Kind_; public: - CHIPGraphNodeMemcpyFromSymbol(void *Dst, const void *Symbol, size_t SizeBytes, - size_t Offset, hipMemcpyKind Kind) - : CHIPGraphNode(hipGraphNodeTypeMemcpyFromSymbol), Dst_(Dst), + GraphNodeMemcpyFromSymbol(void *Dst, const void *Symbol, size_t SizeBytes, + size_t Offset, hipMemcpyKind Kind) + : GraphNode(hipGraphNodeTypeMemcpyFromSymbol), Dst_(Dst), Symbol_(const_cast(Symbol)), SizeBytes_(SizeBytes), Offset_(Offset), Kind_(Kind) {} - CHIPGraphNodeMemcpyFromSymbol(const CHIPGraphNodeMemcpyFromSymbol &Other) - : CHIPGraphNode(Other), Dst_(Other.Dst_), Symbol_(Other.Symbol_), + GraphNodeMemcpyFromSymbol(const GraphNodeMemcpyFromSymbol &Other) + : GraphNode(Other), Dst_(Other.Dst_), Symbol_(Other.Symbol_), SizeBytes_(Other.SizeBytes_), Offset_(Other.Offset_), Kind_(Other.Kind_) {} - virtual ~CHIPGraphNodeMemcpyFromSymbol() override {} + virtual ~GraphNodeMemcpyFromSymbol() override {} virtual void execute(chipstar::Queue *Queue) const override; @@ -527,16 +564,25 @@ public: Symbol_ = const_cast(Symbol); SizeBytes_ = SizeBytes; Offset_ = Offset; + Kind_ = Kind; + } + + void getParams(void *&Dst, const void *&Symbol, size_t &SizeBytes, + size_t &Offset, hipMemcpyKind &Kind) { + Dst = Dst_; + Symbol = Symbol_; + SizeBytes = SizeBytes_; + Offset = Offset_; Kind = Kind_; } - virtual CHIPGraphNode *clone() const override { - auto NewNode = new CHIPGraphNodeMemcpyFromSymbol(*this); + virtual GraphNode *clone() const override { + auto NewNode = new GraphNodeMemcpyFromSymbol(*this); return NewNode; } }; -class CHIPGraphNodeMemcpyToSymbol : public CHIPGraphNode { +class GraphNodeMemcpyToSymbol : public GraphNode { private: void *Src_; void *Symbol_; @@ -545,23 +591,23 @@ private: hipMemcpyKind Kind_; public: - CHIPGraphNodeMemcpyToSymbol(void *Src, const void *Symbol, size_t SizeBytes, - size_t Offset, hipMemcpyKind Kind) - : CHIPGraphNode(hipGraphNodeTypeMemcpyToSymbol), Src_(Src), + GraphNodeMemcpyToSymbol(void *Src, const void *Symbol, size_t SizeBytes, + size_t Offset, hipMemcpyKind Kind) + : GraphNode(hipGraphNodeTypeMemcpyToSymbol), Src_(Src), Symbol_(const_cast(Symbol)), SizeBytes_(SizeBytes), Offset_(Offset), Kind_(Kind) {} - CHIPGraphNodeMemcpyToSymbol(const CHIPGraphNodeMemcpyToSymbol &Other) - : CHIPGraphNode(Other), Src_(Other.Src_), Symbol_(Other.Symbol_), + GraphNodeMemcpyToSymbol(const GraphNodeMemcpyToSymbol &Other) + : GraphNode(Other), Src_(Other.Src_), Symbol_(Other.Symbol_), SizeBytes_(Other.SizeBytes_), Offset_(Other.Offset_), Kind_(Other.Kind_) {} - virtual ~CHIPGraphNodeMemcpyToSymbol() override {} + virtual ~GraphNodeMemcpyToSymbol() override {} virtual void execute(chipstar::Queue *Queue) const override; - virtual CHIPGraphNode *clone() const override { - auto NewNode = new CHIPGraphNodeMemcpyToSymbol(*this); + virtual GraphNode *clone() const override { + auto NewNode = new GraphNodeMemcpyToSymbol(*this); return NewNode; } @@ -571,60 +617,69 @@ public: Symbol_ = const_cast(Symbol); SizeBytes_ = SizeBytes; Offset_ = Offset; + Kind_ = Kind; + } + + void getParams(void *&Src, const void *&Symbol, size_t &SizeBytes, + size_t &Offset, hipMemcpyKind &Kind) { + Src = Src_; + Symbol = Symbol_; + SizeBytes = SizeBytes_; + Offset = Offset_; Kind = Kind_; } }; -class CHIPGraph : public ihipGraph { +class Graph : public ihipGraph { protected: - std::vector Nodes_; + std::vector Nodes_; // Map the pointers Original -> Clone - std::map CloneMap_; + std::map CloneMap_; public: - CHIPGraph(const CHIPGraph &OriginalGraph); - CHIPGraph() {} - void addNode(CHIPGraphNode *TheNode); - void removeNode(CHIPGraphNode *TheNode); + Graph(const Graph &OriginalGraph); + Graph() {} + void addNode(GraphNode *TheNode); + void removeNode(GraphNode *TheNode); /** * @brief Lookup a cloned(instantiated) node using a pointer to the original * node * * @param OriginalNode pointer to the node which was present in CHIPGraph at * the time of instantiation of CHIPGraph to CHIPGraphExec - * @return CHIPGraphNode* pointer to the resulting node in CHIPGraphExec which + * @return GraphNode* pointer to the resulting node in CHIPGraphExec which * corresponds to the original node */ - CHIPGraphNode *nodeLookup(CHIPGraphNode *OriginalNode) { + GraphNode *nodeLookup(GraphNode *OriginalNode) { if (!CloneMap_.count(OriginalNode)) { return nullptr; } return CloneMap_[OriginalNode]; } - std::vector getLeafNodes(); - std::vector getRootNodes(); - CHIPGraphNode *getClonedNodeFromOriginal(CHIPGraphNode *OriginalNode) { + std::vector getLeafNodes(); + std::vector getRootNodes(); + GraphNode *getClonedNodeFromOriginal(GraphNode *OriginalNode) { if (!CloneMap_.count(OriginalNode)) { - CHIPERR_LOG_AND_THROW("Failed to find the node in clone", hipErrorTbd); + CHIPERR_LOG_AND_THROW("Failed to find the node in clone", + hipErrorInvalidValue); } else { return CloneMap_[OriginalNode]; } } - std::vector &getNodes() { return Nodes_; } + std::vector &getNodes() { return Nodes_; } - std::vector> getEdges() { - std::set> Edges; + std::vector> getEdges() { + std::set> Edges; for (auto Node : Nodes_) { for (auto Dep : Node->getDependencies()) { - auto FromToPair = - std::pair(Node, Dep); + auto FromToPair = std::pair(Node, Dep); Edges.insert(FromToPair); } } - return std::vector>( - Edges.begin(), Edges.end()); + return std::vector>(Edges.begin(), + Edges.end()); }; /** @@ -633,9 +688,9 @@ public: * verify that the node exists in this graph and return the non-const handle. * * @param Node the node to find in this graph - * @return CHIPGraphNode* the non-const handle of this found node. + * @return GraphNode* the non-const handle of this found node. */ - CHIPGraphNode *findNode(CHIPGraphNode *Node) { + GraphNode *findNode(GraphNode *Node) { auto FoundNode = std::find(Nodes_.begin(), Nodes_.end(), Node); if (FoundNode != Nodes_.end()) { return *FoundNode; @@ -645,20 +700,34 @@ public: } }; -class CHIPGraphExec : public hipGraphExec { +class GraphNative { +protected: + bool Finalized; + +public: + GraphNative() : Finalized(false){}; + virtual ~GraphNative() {} + bool isFinalized() { return Finalized; } + virtual bool finalize() { return false; } + virtual bool addNode(GraphNode *NewNode) { return false; } +}; + +class GraphExec : public hipGraphExec { protected: - CHIPGraph *OriginalGraph_; - CHIPGraph CompiledGraph_; + Graph *OriginalGraph_; + Graph CompiledGraph_; + + std::unique_ptr NativeGraph; /** * @brief each element in this queue represents represents a sequence of nodes * that can be submitted to one or more queues * */ - std::queue> ExecQueues_; + std::queue> ExecQueues_; /** - * @brief For every CHIPGraphNodeGraph in CompiledGraph_, replace this node + * @brief For every GraphNodeGraph in CompiledGraph_, replace this node * with its contents. * */ @@ -676,19 +745,6 @@ protected: */ void pruneGraph_(); -public: - CHIPGraphExec(CHIPGraph *Graph) - : OriginalGraph_(Graph), /* Copy the pointer to the original graph */ - CompiledGraph_(CHIPGraph(*Graph)) /* invoke the copy constructor to make - a clone of the graph */ - {} - - ~CHIPGraphExec() {} - - void launch(chipstar::Queue *Queue); - - CHIPGraph *getOriginalGraphPtr() const { return OriginalGraph_; } - /** * @brief Optimize and generate ExecQueues_ * @@ -699,6 +755,22 @@ public: * */ void compile(); + +public: + GraphExec(Graph *Graph) + : OriginalGraph_(Graph), /* Copy the pointer to the original graph */ + CompiledGraph_(*Graph) /* invoke the copy constructor to make + a clone of the graph */ + {} + + ~GraphExec() {} + + void launch(chipstar::Queue *Queue); + bool tryLaunchNative(chipstar::Queue *Queue); + + Graph *getOriginalGraphPtr() const { return OriginalGraph_; } }; +} // namespace chipstar + #endif // include guard diff --git a/src/backend/OpenCL/CHIPBackendOpenCL.cc b/src/backend/OpenCL/CHIPBackendOpenCL.cc index 2f3a13492..9baa22a0f 100644 --- a/src/backend/OpenCL/CHIPBackendOpenCL.cc +++ b/src/backend/OpenCL/CHIPBackendOpenCL.cc @@ -23,6 +23,7 @@ #include "CHIPBackendOpenCL.hh" #include "Utils.hh" +#include #include #include "Utils.hh" @@ -434,7 +435,7 @@ void CHIPDeviceOpenCL::populateDevicePropertiesImpl() { HipDeviceProps_.warpSize = CHIP_DEFAULT_WARP_SIZE; // Try to check that we support the default warp size. - std::vector Sg = ClDevice->getInfo(); + std::vector Sg = ClDevice->getInfo(); if (std::find(Sg.begin(), Sg.end(), CHIP_DEFAULT_WARP_SIZE) == Sg.end()) { logWarn( "The device might not support subgroup size {}, warp-size sensitive " @@ -606,8 +607,8 @@ void CHIPEventOpenCL::takeOver( std::shared_ptr Other = std::static_pointer_cast(OtherIn); LOCK(EventMtx); // chipstar::Event::Refc_ - this->ClEvent = Other->ClEvent; - this->Msg = Other->Msg; + this->ClEvent = Other.get()->ClEvent; + this->Msg = Other.get()->Msg; } } @@ -862,10 +863,84 @@ void CHIPContextOpenCL::freeImpl(void *Ptr) { SvmMemory.free(Ptr); } -cl::Context *CHIPContextOpenCL::get() { return ClContext; } -CHIPContextOpenCL::CHIPContextOpenCL(cl::Context *CtxIn) { +CHIPContextOpenCL::CHIPContextOpenCL(cl::Context *CtxIn, cl::Device Dev, + cl::Platform Plat) { logTrace("CHIPContextOpenCL Initialized via OpenCL Context pointer."); - ClContext = CtxIn; + std::string DevExts = Dev.getInfo(); + std::memset(&Exts_, 0, sizeof(Exts_)); + SupportsCommandBuffers = + DevExts.find("cl_khr_command_buffer") != std::string::npos; + if (SupportsCommandBuffers) { + logDebug("Device supports cl_khr_command_buffer"); + Exts_.clCreateCommandBufferKHR = + (clCreateCommandBufferKHR_fn)::clGetExtensionFunctionAddressForPlatform( + Plat(), "clCreateCommandBufferKHR"); + Exts_.clCommandCopyBufferKHR = + (clCommandCopyBufferKHR_fn)::clGetExtensionFunctionAddressForPlatform( + Plat(), "clCommandCopyBufferKHR"); + Exts_.clCommandCopyBufferRectKHR = (clCommandCopyBufferRectKHR_fn):: + clGetExtensionFunctionAddressForPlatform(Plat(), + "clCommandCopyBufferRectKHR"); + Exts_.clCommandFillBufferKHR = + (clCommandFillBufferKHR_fn)::clGetExtensionFunctionAddressForPlatform( + Plat(), "clCommandFillBufferKHR"); + Exts_.clCommandNDRangeKernelKHR = (clCommandNDRangeKernelKHR_fn):: + clGetExtensionFunctionAddressForPlatform(Plat(), + "clCommandNDRangeKernelKHR"); + Exts_.clCommandBarrierWithWaitListKHR = + (clCommandBarrierWithWaitListKHR_fn):: + clGetExtensionFunctionAddressForPlatform( + Plat(), "clCommandBarrierWithWaitListKHR"); + Exts_.clFinalizeCommandBufferKHR = (clFinalizeCommandBufferKHR_fn):: + clGetExtensionFunctionAddressForPlatform(Plat(), + "clFinalizeCommandBufferKHR"); + Exts_.clEnqueueCommandBufferKHR = (clEnqueueCommandBufferKHR_fn):: + clGetExtensionFunctionAddressForPlatform(Plat(), + "clEnqueueCommandBufferKHR"); + Exts_.clReleaseCommandBufferKHR = (clReleaseCommandBufferKHR_fn):: + clGetExtensionFunctionAddressForPlatform(Plat(), + "clReleaseCommandBufferKHR"); + Exts_.clGetCommandBufferInfoKHR = (clGetCommandBufferInfoKHR_fn):: + clGetExtensionFunctionAddressForPlatform(Plat(), + "clGetCommandBufferInfoKHR"); + } +#ifdef cl_pocl_command_buffer_svm + SupportsCommandBuffersSVM = + DevExts.find("cl_pocl_command_buffer_svm") != std::string::npos; + if (SupportsCommandBuffersSVM) { + logDebug("Device supports cl_pocl_command_buffer_svm"); + Exts_.clCommandSVMMemcpyPOCL = + (clCommandSVMMemcpyPOCL_fn)::clGetExtensionFunctionAddressForPlatform( + Plat(), "clCommandSVMMemcpyPOCL"); + Exts_.clCommandSVMMemcpyRectPOCL = (clCommandSVMMemcpyRectPOCL_fn):: + clGetExtensionFunctionAddressForPlatform(Plat(), + "clCommandSVMMemcpyRectPOCL"); + Exts_.clCommandSVMMemfillPOCL = + (clCommandSVMMemfillPOCL_fn)::clGetExtensionFunctionAddressForPlatform( + Plat(), "clCommandSVMMemfillPOCL"); + Exts_.clCommandSVMMemfillRectPOCL = (clCommandSVMMemfillRectPOCL_fn):: + clGetExtensionFunctionAddressForPlatform(Plat(), + "clCommandSVMMemfillRectPOCL"); + } +#endif +#ifdef cl_pocl_command_buffer_host_exec + SupportsCommandBuffersHost = + DevExts.find("cl_pocl_command_buffer_host_exec") != std::string::npos; + if (SupportsCommandBuffersHost) { + logDebug("Device supports cl_pocl_command_buffer_host_exec"); + Exts_.clCommandHostFuncPOCL = + (clCommandHostFuncPOCL_fn)::clGetExtensionFunctionAddressForPlatform( + Plat(), "clCommandHostFuncPOCL"); + Exts_.clCommandWaitForEventPOCL = (clCommandWaitForEventPOCL_fn):: + clGetExtensionFunctionAddressForPlatform(Plat(), + "clCommandWaitForEventPOCL"); + Exts_.clCommandSignalEventPOCL = + (clCommandSignalEventPOCL_fn)::clGetExtensionFunctionAddressForPlatform( + Plat(), "clCommandSignalEventPOCL"); + } +#endif + + ClContext_ = CtxIn; SvmMemory.init(*CtxIn); } @@ -1291,6 +1366,401 @@ std::shared_ptr CHIPQueueOpenCL::enqueueBarrierImpl( return Event; } +/********************************************************************************/ + +chipstar::GraphNative *CHIPQueueOpenCL::createNativeGraph() { + // should not raise an error if we fail to create a graph, + // because there is a fallback solution + CHIPContextOpenCL *Ctx = (CHIPContextOpenCL *)ChipContext_; + if (!Ctx->supportsCommandBuffers()) + return nullptr; + + cl_command_queue CQ = ClQueue_->get(); + int err = CL_SUCCESS; + cl_command_buffer_khr Res = + Ctx->exts()->clCreateCommandBufferKHR(1, &CQ, 0, &err); + if (Res == nullptr || err != CL_SUCCESS) { + logError("clCreateCommandBufferKHR FAILED with status {}", + resultToString(err)); + return nullptr; + } + + return new CHIPGraphNativeOpenCL(Res, CQ, Ctx->exts()); +} + +std::shared_ptr +CHIPQueueOpenCL::enqueueNativeGraph(chipstar::GraphNative *NativeGraph) { + + std::shared_ptr Event = + ::Backend->createCHIPEvent(ChipContext_); + CHIPContextOpenCL *Ctx = (CHIPContextOpenCL *)ChipContext_; + CHIPGraphNativeOpenCL *G = (CHIPGraphNativeOpenCL *)NativeGraph; + if (!Ctx->supportsCommandBuffers()) + return nullptr; + if (NativeGraph == nullptr) + return nullptr; + cl_command_queue CQ = ClQueue_->get(); + int Status = Ctx->exts()->clEnqueueCommandBufferKHR( + 1, &CQ, G->get(), 0, nullptr, + std::static_pointer_cast(Event)->getNativePtr()); + CHIPERR_CHECK_LOG_AND_THROW(Status, CL_SUCCESS, hipErrorTbd); + + return Event; +} + +void CHIPQueueOpenCL::destroyNativeGraph(chipstar::GraphNative *NativeGraph) { + if (NativeGraph == nullptr) + return; + CHIPGraphNativeOpenCL *G = (CHIPGraphNativeOpenCL *)NativeGraph; + delete G; +} + +bool CHIPGraphNativeOpenCL::addNode(chipstar::GraphNode *NewNode) { + cl_sync_point_khr NewSyncPoint = -1; + + // map the dependent chipstar::GraphNodes to OpenCL syncpoints + const std::vector &Dependencies = + NewNode->getDependencies(); + std::vector SyncPointDeps; + for (auto Node : Dependencies) { + auto Iter = SyncPointMap_.find(Node); + if (Iter == SyncPointMap_.end()) { + logError("Can't find SyncPoint for Node"); + return false; + } + SyncPointDeps.push_back(Iter->second); + } + + hipGraphNodeType NodeType = NewNode->getType(); + bool Res; + switch (NodeType) { + case hipGraphNodeTypeKernel: + Res = addKernelNode((chipstar::GraphNodeKernel *)NewNode, SyncPointDeps, + &NewSyncPoint); + break; + case hipGraphNodeTypeEmpty: + assert(0 && "Empty node should be removed earlier"); + +#ifdef cl_pocl_command_buffer_svm + case hipGraphNodeTypeMemcpy: + Res = addMemcpyNode((chipstar::GraphNodeMemcpy *)NewNode, SyncPointDeps, + &NewSyncPoint); + break; + case hipGraphNodeTypeMemset: + Res = addMemsetNode((chipstar::GraphNodeMemset *)NewNode, SyncPointDeps, + &NewSyncPoint); + break; + case hipGraphNodeTypeMemcpyFromSymbol: + Res = addMemcpyNode((chipstar::GraphNodeMemcpyFromSymbol *)NewNode, + SyncPointDeps, &NewSyncPoint); + break; + case hipGraphNodeTypeMemcpyToSymbol: + Res = addMemcpyNode((chipstar::GraphNodeMemcpyToSymbol *)NewNode, + SyncPointDeps, &NewSyncPoint); + break; +#endif + +#ifdef cl_pocl_command_buffer_host_exec + case hipGraphNodeTypeWaitEvent: + Res = addEventWaitNode((chipstar::GraphNodeWaitEvent *)NewNode, + SyncPointDeps, &NewSyncPoint); + break; + case hipGraphNodeTypeEventRecord: + Res = addEventRecordNode((chipstar::GraphNodeEventRecord *)NewNode, + SyncPointDeps, &NewSyncPoint); + break; + case hipGraphNodeTypeHost: + Res = addHostNode((chipstar::GraphNodeHost *)NewNode, SyncPointDeps, + &NewSyncPoint); + break; +#endif + + default: + Res = false; + } + if (!Res) + return false; + + SyncPointMap_.insert(std::make_pair(NewNode, NewSyncPoint)); + return true; +} + +bool CHIPGraphNativeOpenCL::finalize() { + int Status = Exts_->clFinalizeCommandBufferKHR(Handle_); + if (Status == CL_SUCCESS) { + Finalized = true; + return true; + } + logError("clFinalizeCommandBufferKHR FAILED with status {}", + resultToString(Status)); + return false; +} + +CHIPGraphNativeOpenCL::~CHIPGraphNativeOpenCL() { + if (Handle_ == nullptr) + return; + int Err = Exts_->clReleaseCommandBufferKHR(Handle_); + logError("clReleaseCommandBufferKHR FAILED with status {}", + resultToString(Err)); + assert(Err == CL_SUCCESS); +} + +// TODO finish +bool CHIPGraphNativeOpenCL::addKernelNode( + chipstar::GraphNodeKernel *Node, + std::vector &SyncPointDeps, + cl_sync_point_khr *SyncPoint) { + + int Status; + // possibly use: CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR + cl_ndrange_kernel_command_properties_khr Properties[] = {0, 0}; + + // TODO: we should add what CHIPQueue::launch does with Registered (Global) + // Vars + // TODO also look at SpillBuffer handling in: + // CHIPEvent *CHIPQueueOpenCL::launchImpl(CHIPExecItem *ExecItem) { + + // setup the kernel arguments before calling clCommandNDRange + Node->setupKernelArgs(); + + chipstar::Kernel *K = Node->getKernel(); + CHIPKernelOpenCL *CLK = static_cast(K); + + hipKernelNodeParams Params = Node->getParams(); + size_t LWSize[3] = {Params.blockDim.x, Params.blockDim.y, Params.blockDim.z}; + size_t GWSize[3] = {Params.blockDim.x * Params.gridDim.x, + Params.blockDim.y * Params.gridDim.y, + Params.blockDim.z * Params.gridDim.z}; + uint WorkDim = 3; + + assert(Exts_->clCommandNDRangeKernelKHR); + Status = Exts_->clCommandNDRangeKernelKHR( + Handle_, nullptr, Properties, + CLK->get()->get(), // cl_kernel + WorkDim, // cl_uint work_dim + nullptr, // const size_t* global_work_offset, + GWSize, // const size_t* global_work_size, + LWSize, // const size_t* local_work_size, + SyncPointDeps.size(), SyncPointDeps.data(), SyncPoint, nullptr); + return Status == CL_SUCCESS; +} + +#ifdef cl_pocl_command_buffer_svm + +// TODO finish Arrays +bool CHIPGraphNativeOpenCL::addMemcpyNode( + chipstar::GraphNodeMemcpy *Node, + std::vector &SyncPointDeps, + cl_sync_point_khr *SyncPoint) { + int Status; + void *Dst; + const void *Src; + size_t Size; + hipMemcpyKind Kind; + hipMemcpy3DParms Params; + + // Although ROCm API ref says that Dst and Src should not overlap, + // HIP seems to handle Dst == Src as a special (no-operation) case. + // This is seen in the test unit/memory/hipMemcpyAllApiNegative. + // Intel GPU OpenCL driver seems to do also so for clEnqueueSVMMemcpy, which + // makes/ it pass, but Intel CPU OpenCL returns CL_​MEM_​COPY_​OVERLAP + // like it should. To unify the behavior, let's convert the special case to + // a marker here, so we can return an event. + + Node->getParams(Dst, Src, Size, Kind); + Params = Node->getParams(); + if (Dst == nullptr || Src == nullptr) { + if (!Exts_->clCommandSVMMemcpyRectPOCL) + return false; + // 3D copy + // TODO handle arrays + assert(Params.dstArray == nullptr && "Arrays not supported yet"); + assert(Params.srcArray == nullptr && "Arrays not supported yet"); + + /* + * The struct passed to cudaMemcpy3D() must specify one of srcArray or + * srcPtr and one of dstArray or dstPtr. Passing more than one non-zero + * source or destination will cause cudaMemcpy3D() to return an error. The + * srcPos and dstPos fields are optional offsets into the source and + * destination objects and are defined in units of each object's elements. + * The element for a host or device pointer is assumed to be unsigned char. + * The extent field defines the dimensions of the transferred area in + * elements. If a CUDA array is participating in the copy, the extent is + * defined in terms of that array's elements. If no CUDA array is + * participating in the copy then the extents are defined in elements of + * unsigned char. + */ + + // TODO: HANDLE FOR ARRAYS: + // The srcPos and dstPos fields are optional offsets into the source & + // destination objects and are defined in units of each object's elements + // ... The element for a host or device pointer is assumed to be unsigned + // char. + size_t src_origin[3] = {Params.srcPos.x, Params.srcPos.y, Params.srcPos.z}; + size_t dst_origin[3] = {Params.dstPos.x, Params.dstPos.y, Params.dstPos.z}; + // If no CUDA array is participating in the copy then the extents + // are defined in elements of unsigned char. + size_t region[3] = {Params.extent.width, Params.extent.height, + Params.extent.depth}; + + // TODO this might be wrong. + size_t src_row_pitch = Params.srcPtr.pitch; + size_t src_slice_pitch = src_row_pitch * Params.srcPtr.ysize; + size_t dst_row_pitch = Params.dstPtr.pitch; + size_t dst_slice_pitch = dst_row_pitch * Params.dstPtr.ysize; + + Status = Exts_->clCommandSVMMemcpyRectPOCL( + Handle_, nullptr, Dst, Src, dst_origin, src_origin, region, + dst_row_pitch, dst_slice_pitch, src_row_pitch, src_slice_pitch, + SyncPointDeps.size(), SyncPointDeps.data(), SyncPoint, nullptr); + } else { + // 1D copy + if (!Exts_->clCommandSVMMemcpyPOCL) + return false; + if (Dst == Src) { + Status = Exts_->clCommandBarrierWithWaitListKHR( + Handle_, nullptr, SyncPointDeps.size(), SyncPointDeps.data(), + SyncPoint, nullptr); + CHIPERR_CHECK_LOG_AND_THROW(Status, CL_SUCCESS, hipErrorTbd); + } else + Status = Exts_->clCommandSVMMemcpyPOCL( + Handle_, nullptr, Dst, Src, Size, SyncPointDeps.size(), + SyncPointDeps.data(), SyncPoint, nullptr); + } + + return Status == CL_SUCCESS; +} + +bool CHIPGraphNativeOpenCL::addMemcpyNode( + chipstar::GraphNodeMemcpyFromSymbol *Node, + std::vector &SyncPointDeps, + cl_sync_point_khr *SyncPoint) { + + if (!Exts_->clCommandSVMMemcpyPOCL) + return false; + + void *Dst = nullptr; + void *Src = nullptr; + const void *Symbol; + size_t SizeBytes; + size_t Offset; + hipMemcpyKind Kind; + Node->getParams(Dst, Symbol, SizeBytes, Offset, Kind); + + hipError_t Err = hipGetSymbolAddress(&Src, Symbol); + if (Err != HIP_SUCCESS) + return false; + + int Status = Exts_->clCommandSVMMemcpyPOCL( + Handle_, nullptr, Dst, (const char *)Src + Offset, SizeBytes, + SyncPointDeps.size(), SyncPointDeps.data(), SyncPoint, nullptr); + + return Status == CL_SUCCESS; +} + +bool CHIPGraphNativeOpenCL::addMemcpyNode( + chipstar::GraphNodeMemcpyToSymbol *Node, + std::vector &SyncPointDeps, + cl_sync_point_khr *SyncPoint) { + if (!Exts_->clCommandSVMMemcpyPOCL) + return false; + + void *Dst = nullptr; + void *Src = nullptr; + const void *Symbol; + size_t SizeBytes; + size_t Offset; + hipMemcpyKind Kind; + Node->getParams(Src, Symbol, SizeBytes, Offset, Kind); + + hipError_t Err = hipGetSymbolAddress(&Dst, Symbol); + if (Err != HIP_SUCCESS) + return false; + + int Status = Exts_->clCommandSVMMemcpyPOCL( + Handle_, nullptr, (char *)Dst + Offset, Src, SizeBytes, + SyncPointDeps.size(), SyncPointDeps.data(), SyncPoint, nullptr); + return Status == CL_SUCCESS; +} + +bool CHIPGraphNativeOpenCL::addMemsetNode( + chipstar::GraphNodeMemset *Node, + std::vector &SyncPointDeps, + cl_sync_point_khr *SyncPoint) { + if (!Exts_->clCommandSVMMemfillRectPOCL) + return false; + + hipMemsetParams Params = Node->getParams(); + + int Status; + size_t Region[3] = {Params.width, Params.height, 1}; + Status = Exts_->clCommandSVMMemfillRectPOCL( + Handle_, nullptr, Params.dst, + nullptr, // origin + Region, // region + Params.pitch, // row pitch + 0, // slice pitch + (const void *)&Params.value, Params.elementSize, SyncPointDeps.size(), + SyncPointDeps.data(), SyncPoint, nullptr); + return Status == CL_SUCCESS; +} +#endif + +#ifdef cl_pocl_command_buffer_host_exec + +bool CHIPGraphNativeOpenCL::addHostNode( + chipstar::GraphNodeHost *Node, + std::vector &SyncPointDeps, + cl_sync_point_khr *SyncPoint) { + if (!Exts_->clCommandHostFuncPOCL) + return false; + + hipHostNodeParams Params = Node->getParams(); + + int Status; + Status = Exts_->clCommandHostFuncPOCL( + Handle_, nullptr, Params.fn, Params.userData, SyncPointDeps.size(), + SyncPointDeps.data(), SyncPoint, nullptr); + return Status == CL_SUCCESS; +} + +bool CHIPGraphNativeOpenCL::addEventRecordNode( + chipstar::GraphNodeEventRecord *Node, + std::vector &SyncPointDeps, + cl_sync_point_khr *SyncPoint) { + if (!Exts_->clCommandSignalEventPOCL) + return false; + + return false; + // unfinished +#if 0 + CHIPEvent *E = Node->getEvent(); + CHIPEventOpenCL *CLE = static_cast(E); + + int Status; + Status = Exts->clCommandSignalEventPOCL(Handle, nullptr, nullptr, SyncPoint, + nullptr); + return Status == CL_SUCCESS; +#endif +} + +bool CHIPGraphNativeOpenCL::addEventWaitNode( + chipstar::GraphNodeWaitEvent *Node, + std::vector &SyncPointDeps, + cl_sync_point_khr *SyncPoint) { + if (!Exts_->clCommandWaitForEventPOCL) + return false; + + chipstar::Event *E = Node->getEvent(); + CHIPEventOpenCL *CLE = static_cast(E); + + int Status; + Status = Exts_->clCommandWaitForEventPOCL( + Handle_, nullptr, CLE->getNativeRef(), SyncPoint, nullptr); + return Status == CL_SUCCESS; +} +#endif + // CHIPExecItemOpenCL //************************************************************************* @@ -1423,6 +1893,7 @@ chipstar::ExecItem *CHIPBackendOpenCL::createExecItem(dim3 GirdDim, new CHIPExecItemOpenCL(GirdDim, BlockDim, SharedMem, ChipQueue); return ExecItem; }; + chipstar::Queue *CHIPBackendOpenCL::createCHIPQueue(chipstar::Device *ChipDev) { CHIPDeviceOpenCL *ChipDevCl = (CHIPDeviceOpenCL *)ChipDev; return new CHIPQueueOpenCL(ChipDevCl, OCL_DEFAULT_QUEUE_PRIORITY); @@ -1524,7 +1995,9 @@ void CHIPBackendOpenCL::initializeImpl(std::string CHIPPlatformStr, // Create queues that have devices each of which has an associated context // TODO Change this to spirv_enabled_devices cl::Context *Ctx = new cl::Context(SpirvDevices); - CHIPContextOpenCL *ChipContext = new CHIPContextOpenCL(Ctx); + + CHIPContextOpenCL *ChipContext = + new CHIPContextOpenCL(Ctx, Device, SelectedPlatform); ::Backend->addContext(ChipContext); // TODO for now only a single device is supported. @@ -1544,11 +2017,12 @@ void CHIPBackendOpenCL::initializeFromNative(const uintptr_t *NativeHandles, cl_device_id DevId = (cl_device_id)NativeHandles[1]; cl_context CtxId = (cl_context)NativeHandles[2]; + cl::Device *Dev = new cl::Device(DevId); + cl::Platform Plat(Dev->getInfo()); cl::Context *Ctx = new cl::Context(CtxId); - CHIPContextOpenCL *ChipContext = new CHIPContextOpenCL(Ctx); + CHIPContextOpenCL *ChipContext = new CHIPContextOpenCL(Ctx, *Dev, Plat); addContext(ChipContext); - cl::Device *Dev = new cl::Device(DevId); CHIPDeviceOpenCL *ChipDev = CHIPDeviceOpenCL::create(Dev, ChipContext, 0); logTrace("CHIPDeviceOpenCL {}", ChipDev->ClDevice->getInfo()); diff --git a/src/backend/OpenCL/CHIPBackendOpenCL.hh b/src/backend/OpenCL/CHIPBackendOpenCL.hh index f1afef403..b7dd2936b 100644 --- a/src/backend/OpenCL/CHIPBackendOpenCL.hh +++ b/src/backend/OpenCL/CHIPBackendOpenCL.hh @@ -39,7 +39,7 @@ #define CL_HPP_TARGET_OPENCL_VERSION 210 #define CL_HPP_MINIMUM_OPENCL_VERSION 200 -#include +#include #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wmissing-braces" @@ -137,9 +137,6 @@ public: bool hasPointer(const void *Ptr); bool pointerSize(void *Ptr, size_t *Size); bool pointerInfo(void *Ptr, void **Base, size_t *Size); - int memCopy(void *Dst, const void *Src, size_t Size, cl::CommandQueue &Queue); - int memFill(void *Dst, size_t Size, const void *Pattern, size_t PatternSize, - cl::CommandQueue &Queue); void clear(); size_t getNumAllocations() const { return SvmAllocations_.size(); } @@ -150,12 +147,44 @@ public: } }; +typedef struct { + clCreateCommandBufferKHR_fn clCreateCommandBufferKHR; + clCommandCopyBufferKHR_fn clCommandCopyBufferKHR; + clCommandCopyBufferRectKHR_fn clCommandCopyBufferRectKHR; + clCommandFillBufferKHR_fn clCommandFillBufferKHR; + clCommandNDRangeKernelKHR_fn clCommandNDRangeKernelKHR; + clCommandBarrierWithWaitListKHR_fn clCommandBarrierWithWaitListKHR; + clFinalizeCommandBufferKHR_fn clFinalizeCommandBufferKHR; + clEnqueueCommandBufferKHR_fn clEnqueueCommandBufferKHR; + clReleaseCommandBufferKHR_fn clReleaseCommandBufferKHR; + clGetCommandBufferInfoKHR_fn clGetCommandBufferInfoKHR; + +#ifdef cl_pocl_command_buffer_svm + clCommandSVMMemcpyPOCL_fn clCommandSVMMemcpyPOCL; + clCommandSVMMemcpyRectPOCL_fn clCommandSVMMemcpyRectPOCL; + clCommandSVMMemfillPOCL_fn clCommandSVMMemfillPOCL; + clCommandSVMMemfillRectPOCL_fn clCommandSVMMemfillRectPOCL; +#endif + +#ifdef cl_pocl_command_buffer_host_exec + clCommandHostFuncPOCL_fn clCommandHostFuncPOCL; + clCommandWaitForEventPOCL_fn clCommandWaitForEventPOCL; + clCommandSignalEventPOCL_fn clCommandSignalEventPOCL; +#endif + +} CHIPContextClExts; + class CHIPContextOpenCL : public chipstar::Context { + cl::Context *ClContext_; + bool SupportsCommandBuffers; + bool SupportsCommandBuffersSVM; + bool SupportsCommandBuffersHost; + public: - bool allDevicesSupportFineGrainSVM(); + CHIPContextClExts Exts_; SVMemoryRegion SvmMemory; - cl::Context *ClContext; - CHIPContextOpenCL(cl::Context *ClContext); + bool allDevicesSupportFineGrainSVM(); + CHIPContextOpenCL(cl::Context *ClContext, cl::Device Dev, cl::Platform Plat); virtual ~CHIPContextOpenCL() {} void *allocateImpl( size_t Size, size_t Alignment, hipMemoryType MemType, @@ -163,7 +192,11 @@ public: bool isAllocatedPtrMappedToVM(void *Ptr) override { return false; } // TODO virtual void freeImpl(void *Ptr) override; - cl::Context *get(); + cl::Context *get() { return ClContext_; } + bool supportsCommandBuffers() { return SupportsCommandBuffers; } + bool supportsCommandBuffersSVM() { return SupportsCommandBuffersSVM; } + bool supportsCommandBuffersHost() { return SupportsCommandBuffersHost; } + const CHIPContextClExts *exts() { return &Exts_; } }; class CHIPDeviceOpenCL : public chipstar::Device { @@ -258,12 +291,18 @@ public: virtual hipError_t getBackendHandles(uintptr_t *NativeInfo, int *NumHandles) override; + virtual std::shared_ptr enqueueBarrierImpl( const std::vector> &EventsToWaitFor) override; virtual std::shared_ptr enqueueMarkerImpl() override; virtual std::shared_ptr memPrefetchImpl(const void *Ptr, size_t Count) override; + + virtual std::shared_ptr + enqueueNativeGraph(chipstar::GraphNative *NativeGraph) override; + virtual chipstar::GraphNative *createNativeGraph() override; + virtual void destroyNativeGraph(chipstar::GraphNative *NativeGraph) override; }; class CHIPKernelOpenCL : public chipstar::Kernel { @@ -301,7 +340,7 @@ private: public: CHIPExecItemOpenCL(const CHIPExecItemOpenCL &Other) - : CHIPExecItemOpenCL(Other.GridDim_, Other.BlockDim_, Other.SharedMem_, + : chipstar::ExecItem(Other.GridDim_, Other.BlockDim_, Other.SharedMem_, Other.ChipQueue_) { // TOOD Graphs Is this safe? ClKernel_ = Other.ClKernel_; @@ -386,4 +425,50 @@ public: cl_sampler getSampler() const { return Sampler; } }; +class CHIPGraphNativeOpenCL : public chipstar::GraphNative { + cl_command_buffer_khr Handle_; + cl_command_queue CmdQ_; + std::map SyncPointMap_; + const CHIPContextClExts *Exts_; + + bool addKernelNode(chipstar::GraphNodeKernel *Node, + std::vector &SyncPointDeps, + cl_sync_point_khr *SyncPoint); +#ifdef cl_pocl_command_buffer_svm + bool addMemcpyNode(chipstar::GraphNodeMemcpy *Node, + std::vector &SyncPointDeps, + cl_sync_point_khr *SyncPoint); + bool addMemcpyNode(chipstar::GraphNodeMemcpyFromSymbol *Node, + std::vector &SyncPointDeps, + cl_sync_point_khr *SyncPoint); + bool addMemcpyNode(chipstar::GraphNodeMemcpyToSymbol *Node, + std::vector &SyncPointDeps, + cl_sync_point_khr *SyncPoint); + bool addMemsetNode(chipstar::GraphNodeMemset *Node, + std::vector &SyncPointDeps, + cl_sync_point_khr *SyncPoint); +#endif + +#ifdef cl_pocl_command_buffer_host_exec + bool addHostNode(chipstar::GraphNodeHost *Node, + std::vector &SyncPointDeps, + cl_sync_point_khr *SyncPoint); + bool addEventRecordNode(chipstar::GraphNodeEventRecord *Node, + std::vector &SyncPointDeps, + cl_sync_point_khr *SyncPoint); + bool addEventWaitNode(chipstar::GraphNodeWaitEvent *Node, + std::vector &SyncPointDeps, + cl_sync_point_khr *SyncPoint); +#endif + +public: + CHIPGraphNativeOpenCL(cl_command_buffer_khr H, cl_command_queue CQ, + const CHIPContextClExts *E) + : Handle_(H), CmdQ_(CQ), Exts_(E) {} + virtual ~CHIPGraphNativeOpenCL(); + cl_command_buffer_khr get() const { return Handle_; } + virtual bool finalize() override; + virtual bool addNode(chipstar::GraphNode *NewNode) override; +}; + #endif diff --git a/src/common.hh b/src/common.hh index 1a13eca51..0452cdffd 100644 --- a/src/common.hh +++ b/src/common.hh @@ -26,20 +26,21 @@ #include "CHIPSPVConfig.hh" #include "SPIRVFuncInfo.hh" +#include +#include #include +#include +#include +#include #include -#include +#include +#include #include #include -#include +#include #include #include -#include -#include -#include -#include -#include -#include +#include /// The implementation of ihipEvent_t. The chipstar::Event class inherits this /// so ihipEvent_t pointers may carry chipstar::Event instances. From 5f6a368c843548990f2629d44f580981b0a5bcff Mon Sep 17 00:00:00 2001 From: Michal Babej Date: Wed, 10 May 2023 11:03:09 +0300 Subject: [PATCH 6/8] update unit tests with failing/passing Graph tests move graph tests to FAILING_FOR_ALL --- cmake/UnitTests.cmake | 620 +++++------------------------------------- 1 file changed, 61 insertions(+), 559 deletions(-) diff --git a/cmake/UnitTests.cmake b/cmake/UnitTests.cmake index 0a68e5443..e4555d907 100644 --- a/cmake/UnitTests.cmake +++ b/cmake/UnitTests.cmake @@ -153,6 +153,65 @@ list(APPEND FAILING_FOR_ALL "hipStreamSemantics") # SEGFAULT - likely due to mai # OpenCL CPU & GPU and Intel Level Zero (however, your mileage may vary). list(APPEND FAILING_FOR_ALL "TestIndirectCall") +list(APPEND FAILING_FOR_ALL "Unit_hipGraphAddDependencies_NegTest") # (Failed) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphAddEventRecordNode_MultipleRun") # subprocess aborted +list(APPEND FAILING_FOR_ALL "Unit_hipGraphAddEventWaitNode_MultipleRun") # subprocess aborted +list(APPEND FAILING_FOR_ALL "Unit_hipGraph_BasicFunctional") # subprocess aborted +list(APPEND FAILING_FOR_ALL "Unit_hipGraphAddMemcpyNode_BasicFunctional") # (Failed) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphInstantiateWithFlags_DependencyGraph") # (Failed) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphAddHostNode_ClonedGraphwithHostNode") # (Failed) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphExecHostNodeSetParams_ClonedGraphwithHostNode") # (Failed) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphExecHostNodeSetParams_BasicFunc") # (Failed) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphExecMemsetNodeSetParams_Functional") # (Failed) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphMemcpyNodeSetParamsToSymbol_Negative") # (Failed) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphDestroyNode_DestroyDependencyNode") # (SEGFAULT) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphGetNodes_Functional") # (Failed) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphGetNodes_CapturedStream") # subprocess aborted +list(APPEND FAILING_FOR_ALL "Unit_hipGraphGetRootNodes_Functional") # (Failed) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphGetRootNodes_CapturedStream") # (Failed) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphAddMemcpyNode1D_Negative") # (Failed) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphAddChildGraphNode_OrgGraphAsChildGraph") # (SEGFAULT) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphAddChildGraphNode_CloneChildGraph") # (Failed) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphAddChildGraphNode_MultipleChildNodes") # (Failed) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphAddChildGraphNode_SingleChildNode") # (SEGFAULT) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphExecMemcpyNodeSetParams1D_Negative") # (Failed) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphExecMemcpyNodeSetParams1D_Functional") # (Failed) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphGetEdges_Functionality") # (Timeout) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphRemoveDependencies_Func_Manual") # (Failed) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphRemoveDependencies_Func_StrmCapture") # (Failed) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphRemoveDependencies_ChangeComputeFunc") # (Failed) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphExecUpdate_Negative_TypeChange") # (Failed) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphExecUpdate_Functional") # (Failed) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphExecEventRecordNodeSetEvent_Functional") # (Failed) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphExecEventRecordNodeSetEvent_VerifyEventNotChanged") # (Failed) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphMemsetNodeSetParams_InvalidParams") # (Failed) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphExecMemcpyNodeSetParamsFromSymbol_Functional") # (Failed) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphExecMemcpyNodeSetParams_Functional") # (Failed) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphMemcpyNodeSetParamsFromSymbol_Negative") # (Failed) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphExecEventWaitNodeSetEvent_SetAndVerifyMemory") # (Failed) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphExecEventWaitNodeSetEvent_VerifyEventNotChanged") # (Failed) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphMemcpyNodeSetParams_Functional") # (Failed) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphAddKernelNode_Negative") # SEGFAULT +list(APPEND FAILING_FOR_ALL "Unit_hipGraphKernelNodeGetParams_Functional") # (Failed) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphKernelNodeSetParams_Functional") # (Failed) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphKernelNodeGetSetParams_Functional") # (Failed) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphExecKernelNodeSetParams_Functional") # (Failed) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphLaunch_Negative") # (SEGFAULT) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphMemcpyNodeSetParams1D_Negative") # (Failed) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphExecMemcpyNodeSetParamsToSymbol_Functional") # (Failed) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphNodeGetDependentNodes_Functional") # (Failed) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphNodeGetDependencies_Functional") # (Failed) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphExecChildGraphNodeSetParams_Negative") # (Failed) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphExecChildGraphNodeSetParams_BasicFunc") # (Failed) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphExecChildGraphNodeSetParams_ChildTopology") # (Failed) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphRetainUserObject_Functional_1") # (Failed) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphRetainUserObject_Functional_2") # (Failed) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphRetainUserObject_Negative") # (Failed) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphReleaseUserObject_Negative") # (Failed) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphRetainUserObject_Negative_Basic") # (Failed) +list(APPEND FAILING_FOR_ALL "Unit_hipGraphRetainUserObject_Negative_Null_Object") # (Failed) +list(APPEND FAILING_FOR_ALL "Unit_hipStreamBeginCapture_BasicFunctional") # Subprocess aborted + # CPU OpenCL Unit Test Failures list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipMultiThreadStreams1_AsyncSame") # MemUnmap CL_RESULT != CL_SUCCESS list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_tex1Dfetch_CheckModes") # SEGFAULT @@ -167,64 +226,6 @@ list(APPEND CPU_OPENCL_FAILED_TESTS "unroll") # Failed list(APPEND CPU_OPENCL_FAILED_TESTS "hipConstantTestDeviceSymbol") # Subprocess aborted list(APPEND CPU_OPENCL_FAILED_TESTS "cuda-simpleCallback") # SEGFAULT list(APPEND CPU_OPENCL_FAILED_TESTS "cuda-qrng") # Subprocess aborted -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddEmptyNode_NegTest") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddDependencies_NegTest") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddEventRecordNode_Negative") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddEventWaitNode_Negative") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraph_BasicFunctional") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddMemcpyNode_BasicFunctional") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphClone_Negative") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphInstantiateWithFlags_Negative") # Failed -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphInstantiateWithFlags_DependencyGraph") # Failed -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddHostNode_Negative") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphChildGraphNodeGetGraph_Negative") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphNodeFindInClone_Negative") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecHostNodeSetParams_ClonedGraphwithHostNode") # Failed -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecHostNodeSetParams_BasicFunc") # Failed -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecMemsetNodeSetParams_Negative") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecMemsetNodeSetParams_Functional") # Failed -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParamsToSymbol_Negative") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphDestroyNode_Negative") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphDestroyNode_DestroyDependencyNode") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphGetNodes_Functional") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphGetNodes_CapturedStream") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphGetNodes_ParamValidation") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphGetRootNodes_Functional") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphGetRootNodes_CapturedStream") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphGetRootNodes_ParamValidation") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphHostNodeSetParams_Negative") # Failed -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddMemcpyNode1D_Negative") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddChildGraphNode_Negative") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddChildGraphNode_OrgGraphAsChildGraph") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddChildGraphNode_CloneChildGraph") # Failed -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddChildGraphNode_MultipleChildNodes") # Failed -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddChildGraphNode_SingleChildNode") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphNodeGetType_Negative") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParams1D_Negative") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParams1D_Functional") # Failed -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphGetEdges_Functionality") # Subprocess aborted -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphGetEdges_Negative") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphRemoveDependencies_Func_StrmCapture") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphRemoveDependencies_ChangeComputeFunc") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphRemoveDependencies_Negative") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphInstantiate_Negative") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecUpdate_Negative_Basic") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecUpdate_Negative_TypeChange") # Failed -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecUpdate_Functional") # Failed -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecEventRecordNodeSetEvent_Functional") # Failed -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecEventRecordNodeSetEvent_VerifyEventNotChanged") # Failed -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecEventRecordNodeSetEvent_Negative") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphEventWaitNodeSetEvent_Negative") # Failed -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphMemsetNodeGetParams_Negative") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphMemsetNodeSetParams_InvalidParams") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParamsFromSymbol_Negative") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParamsFromSymbol_Functional") # Failed -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphEventRecordNodeGetEvent_Negative") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphEventRecordNodeSetEvent_Negative") # Failed -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphEventWaitNodeGetEvent_Negative") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParams_Negative") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParams_Functional") # Failed -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipStreamBeginCapture_BasicFunctional") # Subprocess aborted list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipStreamBeginCapture_hipStreamPerThread") # SEGFAULT list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipStreamBeginCapture_Negative") # SEGFAULT list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipStreamBeginCapture_Basic") # Failed @@ -255,34 +256,6 @@ list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipStreamGetCaptureInfo_UniqueID") # F list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipStreamGetCaptureInfo_ArgValidation") # Failed list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipStreamEndCapture_Negative") # SEGFAULT list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipStreamEndCapture_Thread_Negative") # Subprocess aborted -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParamsFromSymbol_Negative") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecEventWaitNodeSetEvent_SetAndVerifyMemory") # Failed -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecEventWaitNodeSetEvent_VerifyEventNotChanged") # Failed -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecEventWaitNodeSetEvent_Negative") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddMemsetNode_Negative") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddKernelNode_Negative") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphMemcpyNodeGetParams_Negative") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphMemcpyNodeGetParams_Functional") # Failed -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParams_Negative") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParams_Functional") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphKernelNodeGetParams_Negative") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphKernelNodeSetParams_Negative") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphKernelNodeSetParams_Functional") # Failed -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphKernelNodeGetSetParams_Functional") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecKernelNodeSetParams_Negative") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecKernelNodeSetParams_Functional") # Failed -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphLaunch_Negative") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParams1D_Negative") # Failed -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParamsToSymbol_Negative") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParamsToSymbol_Functional") # Failed -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphNodeGetDependentNodes_Functional") # Failed -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphNodeGetDependentNodes_ParamValidation") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphNodeGetDependencies_Functional") # Failed -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphNodeGetDependencies_ParamValidation") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphHostNodeGetParams_Negative") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecChildGraphNodeSetParams_Negative") # SEGFAULT -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecChildGraphNodeSetParams_BasicFunc") # Failed -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecChildGraphNodeSetParams_ChildTopology") # Failed list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipStreamGetCaptureInfo_v2_BasicFunctional") # Failed list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipStreamGetCaptureInfo_v2_hipStreamPerThread") # SEGFAULT list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipStreamGetCaptureInfo_v2_UniqueID") # Failed @@ -295,12 +268,6 @@ list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipUserObjectCreate_Negative") # Faile list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipUserObjectRelease_Negative") # Failed list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipUserObjectRetain_Negative") # Failed list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipUserObj_Negative_Test") # Failed -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphRetainUserObject_Functional_1") # Failed -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphRetainUserObject_Functional_2") # Failed -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphRetainUserObject_Negative") # Failed -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphReleaseUserObject_Negative") # Failed -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphRetainUserObject_Negative_Basic") # Failed -list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipGraphRetainUserObject_Negative_Null_Object") # Failed list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipHostRegister_ReferenceFromKernelandhipMemset - int") # Failed list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipHostRegister_ReferenceFromKernelandhipMemset - float") # Failed list(APPEND CPU_OPENCL_FAILED_TESTS "Unit_hipHostRegister_ReferenceFromKernelandhipMemset - double") # Failed @@ -490,64 +457,6 @@ list(APPEND IGPU_OPENCL_FAILED_TESTS "hipStreamSemantics") # SEGFAULT list(APPEND IGPU_OPENCL_FAILED_TESTS "deviceMallocCompile") # Unimplemented list(APPEND IGPU_OPENCL_FAILED_TESTS "cuda-simpleCallback") # SEGFAULT list(APPEND IGPU_OPENCL_FAILED_TESTS "cuda-qrng") # Subprocess aborted -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddEmptyNode_NegTest") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddDependencies_NegTest") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddEventRecordNode_Negative") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddEventWaitNode_Negative") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraph_BasicFunctional") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddMemcpyNode_BasicFunctional") # Failed -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphClone_Negative") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphInstantiateWithFlags_Negative") # Failed -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphInstantiateWithFlags_DependencyGraph") # Failed -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddHostNode_Negative") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphChildGraphNodeGetGraph_Negative") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphNodeFindInClone_Negative") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecHostNodeSetParams_ClonedGraphwithHostNode") # Failed -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecHostNodeSetParams_BasicFunc") # Failed -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecMemsetNodeSetParams_Negative") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecMemsetNodeSetParams_Functional") # Failed -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParamsToSymbol_Negative") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphDestroyNode_Negative") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphDestroyNode_DestroyDependencyNode") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphGetNodes_Functional") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphGetNodes_CapturedStream") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphGetNodes_ParamValidation") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphGetRootNodes_Functional") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphGetRootNodes_CapturedStream") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphGetRootNodes_ParamValidation") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphHostNodeSetParams_Negative") # Failed -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddMemcpyNode1D_Negative") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddChildGraphNode_Negative") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddChildGraphNode_OrgGraphAsChildGraph") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddChildGraphNode_CloneChildGraph") # Failed -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddChildGraphNode_MultipleChildNodes") # Failed -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddChildGraphNode_SingleChildNode") # Failed -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphNodeGetType_Negative") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParams1D_Negative") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParams1D_Functional") # Failed -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphGetEdges_Functionality") # Subprocess aborted -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphGetEdges_Negative") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphRemoveDependencies_Func_StrmCapture") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphRemoveDependencies_ChangeComputeFunc") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphRemoveDependencies_Negative") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphInstantiate_Negative") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecUpdate_Negative_Basic") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecUpdate_Negative_TypeChange") # Failed -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecUpdate_Functional") # Failed -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecEventRecordNodeSetEvent_Functional") # Failed -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecEventRecordNodeSetEvent_VerifyEventNotChanged") # Failed -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecEventRecordNodeSetEvent_Negative") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphEventWaitNodeSetEvent_Negative") # Failed -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphMemsetNodeGetParams_Negative") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphMemsetNodeSetParams_InvalidParams") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParamsFromSymbol_Negative") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParamsFromSymbol_Functional") # Failed -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphEventRecordNodeGetEvent_Negative") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphEventRecordNodeSetEvent_Negative") # Failed -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphEventWaitNodeGetEvent_Negative") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParams_Negative") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParams_Functional") # Failed -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipStreamBeginCapture_BasicFunctional") # Subprocess aborted list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipStreamBeginCapture_hipStreamPerThread") # SEGFAULT list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipStreamBeginCapture_Negative") # SEGFAULT list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipStreamBeginCapture_Basic") # Failed @@ -578,34 +487,6 @@ list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipStreamGetCaptureInfo_UniqueID") # list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipStreamGetCaptureInfo_ArgValidation") # Failed list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipStreamEndCapture_Negative") # SEGFAULT list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipStreamEndCapture_Thread_Negative") # Subprocess aborted -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParamsFromSymbol_Negative") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecEventWaitNodeSetEvent_SetAndVerifyMemory") # Failed -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecEventWaitNodeSetEvent_VerifyEventNotChanged") # Failed -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecEventWaitNodeSetEvent_Negative") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddMemsetNode_Negative") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddKernelNode_Negative") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphMemcpyNodeGetParams_Negative") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphMemcpyNodeGetParams_Functional") # Failed -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParams_Negative") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParams_Functional") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphKernelNodeGetParams_Negative") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphKernelNodeSetParams_Negative") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphKernelNodeSetParams_Functional") # Failed -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphKernelNodeGetSetParams_Functional") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecKernelNodeSetParams_Negative") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecKernelNodeSetParams_Functional") # Failed -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphLaunch_Negative") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParams1D_Negative") # Failed -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParamsToSymbol_Negative") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParamsToSymbol_Functional") # Failed -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphNodeGetDependentNodes_Functional") # Failed -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphNodeGetDependentNodes_ParamValidation") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphNodeGetDependencies_Functional") # Failed -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphNodeGetDependencies_ParamValidation") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphHostNodeGetParams_Negative") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecChildGraphNodeSetParams_Negative") # SEGFAULT -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecChildGraphNodeSetParams_BasicFunc") # Failed -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecChildGraphNodeSetParams_ChildTopology") # Failed list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipStreamGetCaptureInfo_v2_BasicFunctional") # Failed list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipStreamGetCaptureInfo_v2_hipStreamPerThread") # SEGFAULT list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipStreamGetCaptureInfo_v2_UniqueID") # Failed @@ -618,12 +499,6 @@ list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipUserObjectCreate_Negative") # Fail list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipUserObjectRelease_Negative") # Failed list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipUserObjectRetain_Negative") # Failed list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipUserObj_Negative_Test") # Failed -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphRetainUserObject_Functional_1") # Failed -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphRetainUserObject_Functional_2") # Failed -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphRetainUserObject_Negative") # Failed -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphReleaseUserObject_Negative") # Failed -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphRetainUserObject_Negative_Basic") # Failed -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphRetainUserObject_Negative_Null_Object") # Failed list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipHostRegister_ReferenceFromKernelandhipMemset - int") # Failed list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipHostRegister_ReferenceFromKernelandhipMemset - float") # Failed list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipHostRegister_ReferenceFromKernelandhipMemset - double") # Failed @@ -811,7 +686,6 @@ list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipStreamAddCallback_StrmSyncTiming") list(APPEND IGPU_OPENCL_FAILED_TESTS "cuda-simpleCallback") # SEGFAULT list(APPEND IGPU_OPENCL_FAILED_TESTS "stream") # SEGFAULT list(APPEND IGPU_OPENCL_FAILED_TESTS "hipKernelLaunchIsNonBlocking") # Timeout -list(APPEND IGPU_OPENCL_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParams_Functional") # Subprocess aborted list(APPEND IGPU_OPENCL_FAILED_TESTS "syncthreadsExitedThreads") # Timeout # Timeout or out-of-resources error in the CI which emulates double FPs. list(APPEND IGPU_OPENCL_FAILED_TESTS "TestStlFunctionsDouble") @@ -837,64 +711,6 @@ list(APPEND DGPU_OPENCL_FAILED_TESTS "cuda-asyncAPI") # Failed list(APPEND DGPU_OPENCL_FAILED_TESTS "cuda-matrixMul") # Failed list(APPEND DGPU_OPENCL_FAILED_TESTS "cuda-simpleCallback") # SEGFAULT list(APPEND DGPU_OPENCL_FAILED_TESTS "cuda-qrng") # Subprocess aborted -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddEmptyNode_NegTest") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddDependencies_NegTest") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddEventRecordNode_Negative") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddEventWaitNode_Negative") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraph_BasicFunctional") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddMemcpyNode_BasicFunctional") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphClone_Negative") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphInstantiateWithFlags_Negative") # Failed -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphInstantiateWithFlags_DependencyGraph") # Failed -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddHostNode_Negative") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphChildGraphNodeGetGraph_Negative") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphNodeFindInClone_Negative") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecHostNodeSetParams_ClonedGraphwithHostNode") # Failed -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecHostNodeSetParams_BasicFunc") # Failed -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecMemsetNodeSetParams_Negative") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecMemsetNodeSetParams_Functional") # Failed -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParamsToSymbol_Negative") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphDestroyNode_Negative") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphDestroyNode_DestroyDependencyNode") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphGetNodes_Functional") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphGetNodes_CapturedStream") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphGetNodes_ParamValidation") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphGetRootNodes_Functional") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphGetRootNodes_CapturedStream") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphGetRootNodes_ParamValidation") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphHostNodeSetParams_Negative") # Failed -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddMemcpyNode1D_Negative") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddChildGraphNode_Negative") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddChildGraphNode_OrgGraphAsChildGraph") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddChildGraphNode_CloneChildGraph") # Failed -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddChildGraphNode_MultipleChildNodes") # Failed -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddChildGraphNode_SingleChildNode") # Failed -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphNodeGetType_Negative") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParams1D_Negative") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParams1D_Functional") # Failed -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphGetEdges_Functionality") # Timeout -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphGetEdges_Negative") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphRemoveDependencies_Func_StrmCapture") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphRemoveDependencies_ChangeComputeFunc") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphRemoveDependencies_Negative") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphInstantiate_Negative") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecUpdate_Negative_Basic") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecUpdate_Negative_TypeChange") # Failed -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecUpdate_Functional") # Failed -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecEventRecordNodeSetEvent_Functional") # Failed -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecEventRecordNodeSetEvent_VerifyEventNotChanged") # Failed -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecEventRecordNodeSetEvent_Negative") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphEventWaitNodeSetEvent_Negative") # Failed -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphMemsetNodeGetParams_Negative") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphMemsetNodeSetParams_InvalidParams") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParamsFromSymbol_Negative") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParamsFromSymbol_Functional") # Failed -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphEventRecordNodeGetEvent_Negative") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphEventRecordNodeSetEvent_Negative") # Failed -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphEventWaitNodeGetEvent_Negative") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParams_Negative") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParams_Functional") # Failed -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipStreamBeginCapture_BasicFunctional") # Subprocess aborted list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipStreamBeginCapture_hipStreamPerThread") # SEGFAULT list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipStreamBeginCapture_Negative") # SEGFAULT list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipStreamBeginCapture_Basic") # Failed @@ -925,34 +741,6 @@ list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipStreamGetCaptureInfo_UniqueID") # list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipStreamGetCaptureInfo_ArgValidation") # Failed list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipStreamEndCapture_Negative") # SEGFAULT list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipStreamEndCapture_Thread_Negative") # Subprocess aborted -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParamsFromSymbol_Negative") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecEventWaitNodeSetEvent_SetAndVerifyMemory") # Failed -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecEventWaitNodeSetEvent_VerifyEventNotChanged") # Failed -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecEventWaitNodeSetEvent_Negative") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddMemsetNode_Negative") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphAddKernelNode_Negative") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphMemcpyNodeGetParams_Negative") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphMemcpyNodeGetParams_Functional") # Failed -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParams_Negative") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParams_Functional") # Failed -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphKernelNodeGetParams_Negative") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphKernelNodeSetParams_Negative") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphKernelNodeSetParams_Functional") # Failed -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphKernelNodeGetSetParams_Functional") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecKernelNodeSetParams_Negative") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecKernelNodeSetParams_Functional") # Failed -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphLaunch_Negative") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParams1D_Negative") # Failed -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParamsToSymbol_Negative") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParamsToSymbol_Functional") # Failed -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphNodeGetDependentNodes_Functional") # Failed -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphNodeGetDependentNodes_ParamValidation") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphNodeGetDependencies_Functional") # Failed -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphNodeGetDependencies_ParamValidation") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphHostNodeGetParams_Negative") # SEGFAULT -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecChildGraphNodeSetParams_Negative") # Failed -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecChildGraphNodeSetParams_BasicFunc") # Failed -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphExecChildGraphNodeSetParams_ChildTopology") # Failed list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipStreamGetCaptureInfo_v2_BasicFunctional") # Failed list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipStreamGetCaptureInfo_v2_hipStreamPerThread") # SEGFAULT list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipStreamGetCaptureInfo_v2_UniqueID") # Failed @@ -965,12 +753,6 @@ list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipUserObjectCreate_Negative") # Fail list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipUserObjectRelease_Negative") # Failed list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipUserObjectRetain_Negative") # Failed list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipUserObj_Negative_Test") # Failed -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphRetainUserObject_Functional_1") # Failed -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphRetainUserObject_Functional_2") # Failed -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphRetainUserObject_Negative") # Failed -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphReleaseUserObject_Negative") # Failed -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphRetainUserObject_Negative_Basic") # Failed -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphRetainUserObject_Negative_Null_Object") # Failed list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipMemcpy2DToArray_PinnedMemSameGPU") # Failed list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipMemcpy2DToArrayAsync_PinnedHostMemSameGpu") # Failed list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipMemcpy2D_H2D-D2D-D2H - int") # Failed @@ -1194,7 +976,6 @@ list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipMemsetFunctional_PartialSet_3D") # list(APPEND DGPU_OPENCL_FAILED_TESTS "hipMultiThreadAddCallback") # Subprocess aborted list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipMemset3DAsync_ConcurrencyMthread") # Timeout list(APPEND DGPU_OPENCL_FAILED_TESTS "hipKernelLaunchIsNonBlocking") # Timeout -list(APPEND DGPU_OPENCL_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParams_Functional") # Subprocess aborted list(APPEND DGPU_OPENCL_FAILED_TESTS "syncthreadsExitedThreads") # Timeout # dGPU Level Zero Unit Test Failures @@ -1203,64 +984,6 @@ list(APPEND DGPU_LEVEL0_FAILED_TESTS "BitonicSort") # Assertion `!Deleted_ && "E list(APPEND DGPU_LEVEL0_FAILED_TESTS "FloydWarshall") # Assertion `!Deleted_ && "Event use after delete!"' failed. list(APPEND DGPU_LEVEL0_FAILED_TESTS "deviceMallocCompile") # Unimplemented list(APPEND DGPU_LEVEL0_FAILED_TESTS "cuda-simpleCallback") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddEmptyNode_NegTest") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddDependencies_NegTest") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddEventRecordNode_Negative") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddEventWaitNode_Negative") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraph_BasicFunctional") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddMemcpyNode_BasicFunctional") # Subprocess aborted -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphClone_Negative") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphInstantiateWithFlags_Negative") # Failed -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphInstantiateWithFlags_DependencyGraph") # Failed -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddHostNode_Negative") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphChildGraphNodeGetGraph_Negative") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphNodeFindInClone_Negative") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecHostNodeSetParams_ClonedGraphwithHostNode") # Failed -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecHostNodeSetParams_BasicFunc") # Failed -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecMemsetNodeSetParams_Negative") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecMemsetNodeSetParams_Functional") # Failed -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParamsToSymbol_Negative") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphDestroyNode_Negative") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphDestroyNode_DestroyDependencyNode") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphGetNodes_Functional") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphGetNodes_CapturedStream") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphGetNodes_ParamValidation") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphGetRootNodes_Functional") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphGetRootNodes_CapturedStream") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphGetRootNodes_ParamValidation") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphHostNodeSetParams_Negative") # Failed -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddMemcpyNode1D_Negative") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddChildGraphNode_Negative") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddChildGraphNode_OrgGraphAsChildGraph") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddChildGraphNode_CloneChildGraph") # Failed -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddChildGraphNode_MultipleChildNodes") # Failed -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddChildGraphNode_SingleChildNode") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphNodeGetType_Negative") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParams1D_Negative") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParams1D_Functional") # Failed -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphGetEdges_Functionality") # Subprocess aborted -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphGetEdges_Negative") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphRemoveDependencies_Func_StrmCapture") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphRemoveDependencies_ChangeComputeFunc") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphRemoveDependencies_Negative") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphInstantiate_Negative") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecUpdate_Negative_Basic") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecUpdate_Negative_TypeChange") # Failed -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecUpdate_Functional") # Failed -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecEventRecordNodeSetEvent_Functional") # Failed -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecEventRecordNodeSetEvent_VerifyEventNotChanged") # Failed -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecEventRecordNodeSetEvent_Negative") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphEventWaitNodeSetEvent_Negative") # Failed -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphMemsetNodeGetParams_Negative") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphMemsetNodeSetParams_InvalidParams") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParamsFromSymbol_Negative") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParamsFromSymbol_Functional") # Failed -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphEventRecordNodeGetEvent_Negative") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphEventRecordNodeSetEvent_Negative") # Failed -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphEventWaitNodeGetEvent_Negative") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParams_Negative") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParams_Functional") # Failed -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipStreamBeginCapture_BasicFunctional") # Subprocess aborted list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipStreamBeginCapture_hipStreamPerThread") # SEGFAULT list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipStreamBeginCapture_Negative") # SEGFAULT list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipStreamBeginCapture_Basic") # Failed @@ -1291,33 +1014,6 @@ list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipStreamGetCaptureInfo_UniqueID") # list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipStreamGetCaptureInfo_ArgValidation") # Failed list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipStreamEndCapture_Negative") # SEGFAULT list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipStreamEndCapture_Thread_Negative") # Subprocess aborted -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParamsFromSymbol_Negative") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecEventWaitNodeSetEvent_SetAndVerifyMemory") # Timeout -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecEventWaitNodeSetEvent_VerifyEventNotChanged") # Failed -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecEventWaitNodeSetEvent_Negative") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddMemsetNode_Negative") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddKernelNode_Negative") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphMemcpyNodeGetParams_Negative") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphMemcpyNodeGetParams_Functional") # Failed -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParams_Negative") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphKernelNodeGetParams_Negative") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphKernelNodeSetParams_Negative") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphKernelNodeSetParams_Functional") # Failed -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphKernelNodeGetSetParams_Functional") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecKernelNodeSetParams_Negative") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecKernelNodeSetParams_Functional") # Failed -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphLaunch_Negative") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParams1D_Negative") # Failed -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParamsToSymbol_Negative") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParamsToSymbol_Functional") # Failed -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphNodeGetDependentNodes_Functional") # Failed -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphNodeGetDependentNodes_ParamValidation") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphNodeGetDependencies_Functional") # Failed -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphNodeGetDependencies_ParamValidation") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphHostNodeGetParams_Negative") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecChildGraphNodeSetParams_Negative") # SEGFAULT -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecChildGraphNodeSetParams_BasicFunc") # Failed -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecChildGraphNodeSetParams_ChildTopology") # Failed list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipStreamGetCaptureInfo_v2_BasicFunctional") # Failed list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipStreamGetCaptureInfo_v2_hipStreamPerThread") # SEGFAULT list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipStreamGetCaptureInfo_v2_UniqueID") # Failed @@ -1330,12 +1026,6 @@ list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipUserObjectCreate_Negative") # Fail list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipUserObjectRelease_Negative") # Failed list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipUserObjectRetain_Negative") # Failed list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipUserObj_Negative_Test") # Failed -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphRetainUserObject_Functional_1") # Failed -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphRetainUserObject_Functional_2") # Failed -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphRetainUserObject_Negative") # Failed -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphReleaseUserObject_Negative") # Failed -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphRetainUserObject_Negative_Basic") # Failed -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphRetainUserObject_Negative_Null_Object") # Failed list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipHostRegister_ReferenceFromKernelandhipMemset - int") # Failed list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipHostRegister_ReferenceFromKernelandhipMemset - float") # Failed list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipHostRegister_ReferenceFromKernelandhipMemset - double") # Failed @@ -1523,70 +1213,12 @@ list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipPeekAtLastError_Positive_Threaded" list(APPEND DGPU_LEVEL0_FAILED_TESTS "hipDynamicShared") # SEGFAULT list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipMemFaultStackAllocation_Check") # SEGFAULT list(APPEND DGPU_LEVEL0_FAILED_TESTS "hipKernelLaunchIsNonBlocking") # Timeout -list(APPEND DGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParams_Functional") # Subprocess aborted list(APPEND DGPU_LEVEL0_FAILED_TESTS "syncthreadsExitedThreads") # Timeout # iGPU Level Zero Unit Test Failures list(APPEND IGPU_LEVEL0_FAILED_TESTS "hipStreamSemantics") # SEGFAULT list(APPEND IGPU_LEVEL0_FAILED_TESTS "deviceMallocCompile") # Unimplemented list(APPEND IGPU_LEVEL0_FAILED_TESTS "cuda-simpleCallback") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddEmptyNode_NegTest") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddDependencies_NegTest") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddEventRecordNode_Negative") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddEventWaitNode_Negative") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraph_BasicFunctional") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddMemcpyNode_BasicFunctional") # Subprocess aborted -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphClone_Negative") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphInstantiateWithFlags_Negative") # Failed -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphInstantiateWithFlags_DependencyGraph") # Failed -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddHostNode_Negative") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphChildGraphNodeGetGraph_Negative") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphNodeFindInClone_Negative") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecHostNodeSetParams_ClonedGraphwithHostNode") # Failed -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecHostNodeSetParams_BasicFunc") # Failed -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecMemsetNodeSetParams_Negative") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecMemsetNodeSetParams_Functional") # Failed -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParamsToSymbol_Negative") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphDestroyNode_Negative") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphDestroyNode_DestroyDependencyNode") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphGetNodes_Functional") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphGetNodes_CapturedStream") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphGetNodes_ParamValidation") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphGetRootNodes_Functional") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphGetRootNodes_CapturedStream") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphGetRootNodes_ParamValidation") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphHostNodeSetParams_Negative") # Failed -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddMemcpyNode1D_Negative") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddChildGraphNode_Negative") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddChildGraphNode_OrgGraphAsChildGraph") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddChildGraphNode_CloneChildGraph") # Failed -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddChildGraphNode_MultipleChildNodes") # Failed -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddChildGraphNode_SingleChildNode") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphNodeGetType_Negative") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParams1D_Negative") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParams1D_Functional") # Failed -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphGetEdges_Functionality") # Timeout -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphGetEdges_Negative") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphRemoveDependencies_Func_StrmCapture") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphRemoveDependencies_ChangeComputeFunc") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphRemoveDependencies_Negative") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphInstantiate_Negative") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecUpdate_Negative_Basic") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecUpdate_Negative_TypeChange") # Failed -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecUpdate_Functional") # Failed -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecEventRecordNodeSetEvent_Functional") # Failed -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecEventRecordNodeSetEvent_VerifyEventNotChanged") # Failed -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecEventRecordNodeSetEvent_Negative") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphEventWaitNodeSetEvent_Negative") # Failed -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphMemsetNodeGetParams_Negative") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphMemsetNodeSetParams_InvalidParams") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParamsFromSymbol_Negative") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParamsFromSymbol_Functional") # Failed -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphEventRecordNodeGetEvent_Negative") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphEventRecordNodeSetEvent_Negative") # Failed -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphEventWaitNodeGetEvent_Negative") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParams_Negative") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParams_Functional") # Failed list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipStreamBeginCapture_hipStreamPerThread") # SEGFAULT list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipStreamBeginCapture_Negative") # SEGFAULT list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipStreamBeginCapture_Basic") # Failed @@ -1617,33 +1249,6 @@ list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipStreamGetCaptureInfo_UniqueID") # list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipStreamGetCaptureInfo_ArgValidation") # Failed list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipStreamEndCapture_Negative") # SEGFAULT list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipStreamEndCapture_Thread_Negative") # Subprocess aborted -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParamsFromSymbol_Negative") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecEventWaitNodeSetEvent_SetAndVerifyMemory") # Timeout -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecEventWaitNodeSetEvent_VerifyEventNotChanged") # Failed -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecEventWaitNodeSetEvent_Negative") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddMemsetNode_Negative") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphAddKernelNode_Negative") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphMemcpyNodeGetParams_Negative") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphMemcpyNodeGetParams_Functional") # Failed -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParams_Negative") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphKernelNodeGetParams_Negative") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphKernelNodeSetParams_Negative") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphKernelNodeSetParams_Functional") # Failed -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphKernelNodeGetSetParams_Functional") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecKernelNodeSetParams_Negative") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecKernelNodeSetParams_Functional") # Failed -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphLaunch_Negative") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParams1D_Negative") # Failed -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParamsToSymbol_Negative") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParamsToSymbol_Functional") # Failed -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphNodeGetDependentNodes_Functional") # Failed -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphNodeGetDependentNodes_ParamValidation") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphNodeGetDependencies_Functional") # Failed -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphNodeGetDependencies_ParamValidation") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphHostNodeGetParams_Negative") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecChildGraphNodeSetParams_Negative") # SEGFAULT -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecChildGraphNodeSetParams_BasicFunc") # Failed -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphExecChildGraphNodeSetParams_ChildTopology") # Failed list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipStreamGetCaptureInfo_v2_BasicFunctional") # Failed list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipStreamGetCaptureInfo_v2_hipStreamPerThread") # SEGFAULT list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipStreamGetCaptureInfo_v2_UniqueID") # Failed @@ -1656,12 +1261,6 @@ list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipUserObjectCreate_Negative") # Fail list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipUserObjectRelease_Negative") # Failed list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipUserObjectRetain_Negative") # Failed list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipUserObj_Negative_Test") # Failed -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphRetainUserObject_Functional_1") # Failed -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphRetainUserObject_Functional_2") # Failed -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphRetainUserObject_Negative") # Failed -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphReleaseUserObject_Negative") # Failed -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphRetainUserObject_Negative_Basic") # Failed -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphRetainUserObject_Negative_Null_Object") # Failed list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipHostRegister_ReferenceFromKernelandhipMemset - int") # Failed list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipHostRegister_ReferenceFromKernelandhipMemset - float") # Failed list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipHostRegister_ReferenceFromKernelandhipMemset - double") # Failed @@ -1849,7 +1448,6 @@ list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipPeekAtLastError_Positive_Threaded" list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipMemFaultStackAllocation_Check") # SEGFAULT list(APPEND IGPU_LEVEL0_FAILED_TESTS "hip_sycl_interop") # SEGFAULT list(APPEND IGPU_LEVEL0_FAILED_TESTS "hipKernelLaunchIsNonBlocking") # Timeout -list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParams_Functional") # Subprocess aborted list(APPEND IGPU_LEVEL0_FAILED_TESTS "syncthreadsExitedThreads") # Timeout list(APPEND IGPU_LEVEL0_FAILED_TESTS "Unit_hipMemset3DAsync_ConcurrencyMthread") # Flaky. An event related issue. @@ -1886,69 +1484,6 @@ list(APPEND CPU_POCL_FAILED_TESTS "cuda-scan") # Failed list(APPEND CPU_POCL_FAILED_TESTS "cuda-sortnet") # Failed list(APPEND CPU_POCL_FAILED_TESTS "cuda-FDTD3d") # Failed list(APPEND CPU_POCL_FAILED_TESTS "cuda-sobolqrng") # Failed -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphAddEmptyNode_NegTest") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphAddDependencies_NegTest") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphAddEventRecordNode_Negative") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphAddEventWaitNode_Negative") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraph_BasicFunctional") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphAddMemcpyNode_BasicFunctional") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphClone_Negative") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphInstantiateWithFlags_Negative") # Failed -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphInstantiateWithFlags_DependencyGraph") # Failed -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphAddHostNode_Negative") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphAddMemcpyNodeFromSymbol_GlobalMemory") # Failed -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphAddMemcpyNodeFromSymbol_GlobalConstMemory") # Failed -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphAddMemcpyNodeFromSymbol_GlobalMemoryWithKernel") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphChildGraphNodeGetGraph_Negative") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphNodeFindInClone_Negative") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphExecHostNodeSetParams_ClonedGraphwithHostNode") # Failed -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphExecHostNodeSetParams_BasicFunc") # Failed -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphAddMemcpyNodeToSymbol_GlobalMemory") # Failed -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphAddMemcpyNodeToSymbol_GlobalConstMemory") # Failed -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphAddMemcpyNodeToSymbol_MemcpyToSymbolNodeWithKernel") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphExecMemsetNodeSetParams_Negative") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphExecMemsetNodeSetParams_Functional") # Failed -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParamsToSymbol_Negative") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParamsToSymbol_Functional") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphDestroyNode_Negative") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphGetNodes_Functional") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphGetNodes_CapturedStream") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphGetNodes_ParamValidation") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphGetRootNodes_Functional") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphGetRootNodes_CapturedStream") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphGetRootNodes_ParamValidation") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphHostNodeSetParams_Negative") # Failed -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphAddMemcpyNode1D_Negative") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphAddChildGraphNode_Negative") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphAddChildGraphNode_OrgGraphAsChildGraph") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphAddChildGraphNode_CloneChildGraph") # Failed -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphAddChildGraphNode_MultipleChildNodes") # Failed -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphAddChildGraphNode_SingleChildNode") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphNodeGetType_Negative") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParams1D_Negative") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParams1D_Functional") # Failed -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphGetEdges_Functionality") # Failed -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphGetEdges_Negative") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphRemoveDependencies_Func_StrmCapture") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphRemoveDependencies_ChangeComputeFunc") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphRemoveDependencies_Negative") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphInstantiate_Negative") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphExecUpdate_Negative_Basic") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphExecUpdate_Negative_TypeChange") # Failed -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphExecUpdate_Functional") # Failed -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphExecEventRecordNodeSetEvent_Functional") # Failed -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphExecEventRecordNodeSetEvent_VerifyEventNotChanged") # Failed -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphExecEventRecordNodeSetEvent_Negative") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphEventWaitNodeSetEvent_Negative") # Failed -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphMemsetNodeGetParams_Negative") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphMemsetNodeSetParams_InvalidParams") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParamsFromSymbol_Negative") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParamsFromSymbol_Functional") # Failed -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphEventRecordNodeGetEvent_Negative") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphEventRecordNodeSetEvent_Negative") # Failed -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphEventWaitNodeGetEvent_Negative") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParams_Negative") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParams_Functional") # Failed list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipStreamBeginCapture_hipStreamPerThread") # SEGFAULT list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipStreamBeginCapture_Negative") # SEGFAULT list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipStreamBeginCapture_Basic") # Failed @@ -1979,35 +1514,6 @@ list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipStreamGetCaptureInfo_UniqueID") # Fai list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipStreamGetCaptureInfo_ArgValidation") # Failed list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipStreamEndCapture_Negative") # SEGFAULT list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipStreamEndCapture_Thread_Negative") # Subprocess aborted -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParamsFromSymbol_Negative") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParamsFromSymbol_Functional") # Failed -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphExecEventWaitNodeSetEvent_SetAndVerifyMemory") # Timeout -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphExecEventWaitNodeSetEvent_VerifyEventNotChanged") # Failed -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphExecEventWaitNodeSetEvent_Negative") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphAddMemsetNode_Negative") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphAddKernelNode_Negative") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphMemcpyNodeGetParams_Negative") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphMemcpyNodeGetParams_Functional") # Failed -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParams_Negative") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParams_Functional") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphKernelNodeGetParams_Negative") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphKernelNodeSetParams_Negative") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphKernelNodeSetParams_Functional") # Failed -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphKernelNodeGetSetParams_Functional") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphExecKernelNodeSetParams_Negative") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphExecKernelNodeSetParams_Functional") # Failed -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphLaunch_Negative") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphMemcpyNodeSetParams1D_Negative") # Failed -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParamsToSymbol_Negative") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphExecMemcpyNodeSetParamsToSymbol_Functional") # Failed -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphNodeGetDependentNodes_Functional") # Failed -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphNodeGetDependentNodes_ParamValidation") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphNodeGetDependencies_Functional") # Failed -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphNodeGetDependencies_ParamValidation") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphHostNodeGetParams_Negative") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphExecChildGraphNodeSetParams_Negative") # SEGFAULT -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphExecChildGraphNodeSetParams_BasicFunc") # Failed -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphExecChildGraphNodeSetParams_ChildTopology") # Failed list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipStreamGetCaptureInfo_v2_BasicFunctional") # Failed list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipStreamGetCaptureInfo_v2_hipStreamPerThread") # SEGFAULT list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipStreamGetCaptureInfo_v2_UniqueID") # Failed @@ -2020,12 +1526,6 @@ list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipUserObjectCreate_Negative") # Failed list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipUserObjectRelease_Negative") # Failed list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipUserObjectRetain_Negative") # Failed list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipUserObj_Negative_Test") # Failed -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphRetainUserObject_Functional_1") # Failed -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphRetainUserObject_Functional_2") # Failed -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphRetainUserObject_Negative") # Failed -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphReleaseUserObject_Negative") # Failed -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphRetainUserObject_Negative_Basic") # Failed -list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipGraphRetainUserObject_Negative_Null_Object") # Failed list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipHostRegister_ReferenceFromKernelandhipMemset - int") # Failed list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipHostRegister_ReferenceFromKernelandhipMemset - float") # Failed list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipHostRegister_ReferenceFromKernelandhipMemset - double") # Failed @@ -2213,6 +1713,8 @@ list(APPEND CPU_POCL_FAILED_TESTS "Unit_hipDeviceSynchronize_Functional") # Fail list(APPEND CPU_POCL_FAILED_TESTS "syncthreadsExitedThreads") # Timeout list(APPEND CPU_POCL_FAILED_TESTS "hip_sycl_interop") # #terminate called after throwing an instance of 'sycl::_V1::runtime_error' what(): No device of requested type available list(APPEND CPU_POCL_FAILED_TESTS "hip_sycl_interop_no_buffers") # #terminate called after throwing an instance of 'sycl::_V1::runtime_error' what(): No device of requested type available +list(APPEND CPU_POCL_FAILED_TESTS "graphMatrixMultiply") # SEGFAULT inside kernel + # broken tests, they all try to write outside allocated memory; # valgrind + pocl shows: # From 4e2d624ecd99bebcc8dad3cf356ef09e3bd8c0b3 Mon Sep 17 00:00:00 2001 From: Brice Videau Date: Mon, 17 Jul 2023 20:39:13 +0000 Subject: [PATCH 7/8] Fix hipGraphAddDependencies and hipGraphRemoveDependencies semantics. --- src/CHIPBindings.cc | 39 ++++++++++++++++++++++++++++++--------- 1 file changed, 30 insertions(+), 9 deletions(-) diff --git a/src/CHIPBindings.cc b/src/CHIPBindings.cc index 7334d1f8a..4240e4452 100644 --- a/src/CHIPBindings.cc +++ b/src/CHIPBindings.cc @@ -306,11 +306,22 @@ hipError_t hipGraphAddDependencies(hipGraph_t graph, const hipGraphNode_t *from, if (!graph || !from || !to) RETURN(hipErrorInvalidValue); CHIPInitialize(); - chipstar::GraphNode *FoundNode = GRAPH(graph)->findNode(NODE(*to)); - if (!FoundNode) - RETURN(hipErrorInvalidValue); - FoundNode->addDependencies(DECONST_NODES(from), numDependencies); + NULLCHECK(graph); + if (numDependencies) + NULLCHECK(from, to); + chipstar::Graph *G = GRAPH(graph); + for (size_t i = 0; i < numDependencies; i++) { + chipstar::GraphNode *FoundNode = G->findNode(NODE(to[i])); + if (!FoundNode) + RETURN(hipErrorInvalidValue); + FoundNode = G->findNode(NODE(from[i])); + if (!FoundNode) + RETURN(hipErrorInvalidValue); + } + for (size_t i = 0; i < numDependencies; i++) { + G->findNode(NODE(to[i]))->addDependency(G->findNode(NODE(from[i]))); + } RETURN(hipSuccess); CHIP_CATCH } @@ -325,11 +336,21 @@ hipError_t hipGraphRemoveDependencies(hipGraph_t graph, if (!from || !to) RETURN(hipErrorInvalidValue); CHIPInitialize(); - chipstar::GraphNode *FoundNode = GRAPH(graph)->findNode(NODE(*to)); - if (!FoundNode) - RETURN(hipErrorInvalidValue); - - FoundNode->removeDependencies(DECONST_NODES(from), numDependencies); + NULLCHECK(graph); + if (numDependencies) + NULLCHECK(from, to); + chipstar::Graph *G = GRAPH(graph); + for (size_t i = 0; i < numDependencies; i++) { + chipstar::GraphNode *FoundNode = G->findNode(NODE(to[i])); + if (!FoundNode) + RETURN(hipErrorInvalidValue); + FoundNode = G->findNode(NODE(from[i])); + if (!FoundNode) + RETURN(hipErrorInvalidValue); + } + for (size_t i = 0; i < numDependencies; i++) { + G->findNode(NODE(to[i]))->removeDependency(G->findNode(NODE(from[i]))); + } RETURN(hipSuccess); CHIP_CATCH } From 83ee3d8343b09539f8f2a5153d727b85238b767e Mon Sep 17 00:00:00 2001 From: Michal Babej Date: Wed, 2 Aug 2023 15:14:14 +0300 Subject: [PATCH 8/8] fix issue with GraphNodeKernel constructors the constructors were copying the value of Args pointer, instead of making an actual copy of the arguments. This can lead to a crash if the Args pointer lives on the stack, later goes out of scope, and setupAllArgs() is called. --- src/CHIPGraph.cc | 14 ++++++++++++-- src/CHIPGraph.hh | 1 + 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/src/CHIPGraph.cc b/src/CHIPGraph.cc index 63a3b013e..2fbbcb971 100644 --- a/src/CHIPGraph.cc +++ b/src/CHIPGraph.cc @@ -152,7 +152,6 @@ GraphNodeKernel::GraphNodeKernel(const hipKernelNodeParams *TheParams) Params_.extra = TheParams->extra; Params_.func = TheParams->func; Params_.gridDim = TheParams->gridDim; - Params_.kernelParams = TheParams->kernelParams; Params_.sharedMemBytes = TheParams->sharedMemBytes; auto Dev = ::Backend->getActiveDevice(); @@ -160,6 +159,11 @@ GraphNodeKernel::GraphNodeKernel(const hipKernelNodeParams *TheParams) if (!Kernel_) CHIPERR_LOG_AND_THROW("Could not find requested kernel", hipErrorInvalidDeviceFunction); + unsigned NumArgs = Kernel_->getFuncInfo()->getNumClientArgs(); + KernelArgs_.resize(NumArgs); + std::memcpy(KernelArgs_.data(), TheParams->kernelParams, sizeof(void*) * NumArgs); + Params_.kernelParams = KernelArgs_.data(); + ExecItem_ = ::Backend->createExecItem(Params_.gridDim, Params_.blockDim, Params_.sharedMemBytes, nullptr); ExecItem_->setKernel(Kernel_); @@ -173,14 +177,20 @@ GraphNodeKernel::GraphNodeKernel(const void *HostFunction, dim3 GridDim, Params_.extra = nullptr; Params_.func = const_cast(HostFunction); Params_.gridDim = GridDim; - Params_.kernelParams = Args; Params_.sharedMemBytes = SharedMem; auto Dev = ::Backend->getActiveDevice(); Kernel_ = Dev->findKernel(HostPtr(Params_.func)); + if (!Kernel_) CHIPERR_LOG_AND_THROW("Could not find requested kernel", hipErrorInvalidDeviceFunction); + + unsigned NumArgs = Kernel_->getFuncInfo()->getNumClientArgs(); + KernelArgs_.resize(NumArgs); + std::memcpy(KernelArgs_.data(), Args, sizeof(void*) * NumArgs); + Params_.kernelParams = KernelArgs_.data(); + ExecItem_ = ::Backend->createExecItem(GridDim, BlockDim, SharedMem, nullptr); ExecItem_->setKernel(Kernel_); } diff --git a/src/CHIPGraph.hh b/src/CHIPGraph.hh index 40aa3cd71..06b567e4f 100644 --- a/src/CHIPGraph.hh +++ b/src/CHIPGraph.hh @@ -276,6 +276,7 @@ public: class GraphNodeKernel : public GraphNode { private: hipKernelNodeParams Params_; + std::vector KernelArgs_; chipstar::ExecItem *ExecItem_; chipstar::Kernel *Kernel_;